lmnr 0.4.64__py3-none-any.whl → 0.4.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/openllmetry_sdk/instruments.py +1 -0
- lmnr/openllmetry_sdk/tracing/tracing.py +50 -15
- lmnr/sdk/browser/__init__.py +0 -9
- lmnr/sdk/browser/browser_use_otel.py +118 -0
- lmnr/sdk/browser/playwright_otel.py +310 -0
- lmnr/sdk/browser/utils.py +104 -0
- lmnr/sdk/client.py +313 -0
- lmnr/sdk/datasets.py +2 -2
- lmnr/sdk/evaluations.py +32 -10
- lmnr/sdk/laminar.py +72 -194
- lmnr/sdk/types.py +29 -4
- lmnr/version.py +1 -1
- {lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/METADATA +51 -51
- {lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/RECORD +17 -14
- lmnr/sdk/browser/playwright_patch.py +0 -377
- {lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/LICENSE +0 -0
- {lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/WHEEL +0 -0
- {lmnr-0.4.64.dist-info → lmnr-0.4.65.dist-info}/entry_points.txt +0 -0
lmnr/sdk/client.py
ADDED
@@ -0,0 +1,313 @@
|
|
1
|
+
"""
|
2
|
+
Laminar HTTP client. Used to send data to/from the Laminar API.
|
3
|
+
Initialized in `Laminar` singleton, but can be imported
|
4
|
+
in other classes.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import json
|
9
|
+
import aiohttp
|
10
|
+
import gzip
|
11
|
+
from opentelemetry import trace
|
12
|
+
from pydantic.alias_generators import to_snake
|
13
|
+
import requests
|
14
|
+
from typing import Awaitable, Optional, Union
|
15
|
+
import urllib.parse
|
16
|
+
import uuid
|
17
|
+
|
18
|
+
from lmnr.sdk.types import (
|
19
|
+
EvaluationResultDatapoint,
|
20
|
+
GetDatapointsResponse,
|
21
|
+
InitEvaluationResponse,
|
22
|
+
NodeInput,
|
23
|
+
PipelineRunError,
|
24
|
+
PipelineRunRequest,
|
25
|
+
PipelineRunResponse,
|
26
|
+
SemanticSearchRequest,
|
27
|
+
SemanticSearchResponse,
|
28
|
+
)
|
29
|
+
from lmnr.version import SDK_VERSION
|
30
|
+
|
31
|
+
|
32
|
+
class LaminarClient:
|
33
|
+
__base_url: str
|
34
|
+
__project_api_key: str
|
35
|
+
__session: aiohttp.ClientSession = None
|
36
|
+
__sync_session: requests.Session = None
|
37
|
+
|
38
|
+
@classmethod
|
39
|
+
def initialize(cls, base_url: str, project_api_key: str):
|
40
|
+
cls.__base_url = base_url
|
41
|
+
cls.__project_api_key = project_api_key
|
42
|
+
cls.__sync_session = requests.Session()
|
43
|
+
loop = asyncio.get_event_loop()
|
44
|
+
if loop.is_running():
|
45
|
+
cls.__session = aiohttp.ClientSession()
|
46
|
+
|
47
|
+
@classmethod
|
48
|
+
def shutdown(cls):
|
49
|
+
cls.__sync_session.close()
|
50
|
+
if cls.__session is not None:
|
51
|
+
try:
|
52
|
+
loop = asyncio.get_event_loop()
|
53
|
+
if loop.is_running():
|
54
|
+
cls.__session.close()
|
55
|
+
else:
|
56
|
+
asyncio.run(cls.__session.close())
|
57
|
+
except Exception:
|
58
|
+
asyncio.run(cls.__session.close())
|
59
|
+
|
60
|
+
@classmethod
|
61
|
+
async def shutdown_async(cls):
|
62
|
+
if cls.__session is not None:
|
63
|
+
await cls.__session.close()
|
64
|
+
|
65
|
+
@classmethod
|
66
|
+
def run_pipeline(
|
67
|
+
cls,
|
68
|
+
pipeline: str,
|
69
|
+
inputs: dict[str, NodeInput],
|
70
|
+
env: dict[str, str] = {},
|
71
|
+
metadata: dict[str, str] = {},
|
72
|
+
parent_span_id: Optional[uuid.UUID] = None,
|
73
|
+
trace_id: Optional[uuid.UUID] = None,
|
74
|
+
) -> Union[PipelineRunResponse, Awaitable[PipelineRunResponse]]:
|
75
|
+
if cls.__project_api_key is None:
|
76
|
+
raise ValueError(
|
77
|
+
"Please initialize the Laminar object with your project "
|
78
|
+
"API key or set the LMNR_PROJECT_API_KEY environment variable"
|
79
|
+
)
|
80
|
+
try:
|
81
|
+
current_span = trace.get_current_span()
|
82
|
+
if current_span != trace.INVALID_SPAN:
|
83
|
+
parent_span_id = parent_span_id or uuid.UUID(
|
84
|
+
int=current_span.get_span_context().span_id
|
85
|
+
)
|
86
|
+
trace_id = trace_id or uuid.UUID(
|
87
|
+
int=current_span.get_span_context().trace_id
|
88
|
+
)
|
89
|
+
request = PipelineRunRequest(
|
90
|
+
inputs=inputs,
|
91
|
+
pipeline=pipeline,
|
92
|
+
env=env or {},
|
93
|
+
metadata=metadata,
|
94
|
+
parent_span_id=parent_span_id,
|
95
|
+
trace_id=trace_id,
|
96
|
+
)
|
97
|
+
loop = asyncio.get_event_loop()
|
98
|
+
if loop.is_running():
|
99
|
+
return loop.run_in_executor(None, cls.__run, request)
|
100
|
+
else:
|
101
|
+
return asyncio.run(cls.__run(request))
|
102
|
+
except Exception as e:
|
103
|
+
raise ValueError(f"Invalid request: {e}")
|
104
|
+
|
105
|
+
@classmethod
|
106
|
+
def semantic_search(
|
107
|
+
cls,
|
108
|
+
query: str,
|
109
|
+
dataset_id: uuid.UUID,
|
110
|
+
limit: Optional[int] = None,
|
111
|
+
threshold: Optional[float] = None,
|
112
|
+
) -> SemanticSearchResponse:
|
113
|
+
request = SemanticSearchRequest(
|
114
|
+
query=query,
|
115
|
+
dataset_id=dataset_id,
|
116
|
+
limit=limit,
|
117
|
+
threshold=threshold,
|
118
|
+
)
|
119
|
+
loop = asyncio.get_event_loop()
|
120
|
+
if loop.is_running():
|
121
|
+
return loop.run_in_executor(None, cls.__semantic_search, request)
|
122
|
+
else:
|
123
|
+
return asyncio.run(cls.__semantic_search(request))
|
124
|
+
|
125
|
+
@classmethod
|
126
|
+
async def init_eval(
|
127
|
+
cls, name: Optional[str] = None, group_name: Optional[str] = None
|
128
|
+
) -> InitEvaluationResponse:
|
129
|
+
session = await cls.__get_session()
|
130
|
+
async with session.post(
|
131
|
+
cls.__base_url + "/v1/evals",
|
132
|
+
json={
|
133
|
+
"name": name,
|
134
|
+
"groupName": group_name,
|
135
|
+
},
|
136
|
+
headers=cls._headers(),
|
137
|
+
) as response:
|
138
|
+
resp_json = await response.json()
|
139
|
+
return InitEvaluationResponse.model_validate(resp_json)
|
140
|
+
|
141
|
+
@classmethod
|
142
|
+
async def save_eval_datapoints(
|
143
|
+
cls,
|
144
|
+
eval_id: uuid.UUID,
|
145
|
+
datapoints: list[EvaluationResultDatapoint],
|
146
|
+
groupName: Optional[str] = None,
|
147
|
+
):
|
148
|
+
session = await cls.__get_session()
|
149
|
+
async with session.post(
|
150
|
+
cls.__base_url + f"/v1/evals/{eval_id}/datapoints",
|
151
|
+
json={
|
152
|
+
"points": [datapoint.to_dict() for datapoint in datapoints],
|
153
|
+
"groupName": groupName,
|
154
|
+
},
|
155
|
+
headers=cls._headers(),
|
156
|
+
) as response:
|
157
|
+
if response.status != 200:
|
158
|
+
raise ValueError(
|
159
|
+
f"Error saving evaluation datapoints: {await response.text()}"
|
160
|
+
)
|
161
|
+
|
162
|
+
@classmethod
|
163
|
+
async def send_browser_events(
|
164
|
+
cls,
|
165
|
+
session_id: str,
|
166
|
+
trace_id: str,
|
167
|
+
events: list[dict],
|
168
|
+
source: str,
|
169
|
+
):
|
170
|
+
session = await cls.__get_session()
|
171
|
+
payload = {
|
172
|
+
"sessionId": session_id,
|
173
|
+
"traceId": trace_id,
|
174
|
+
"events": events,
|
175
|
+
"source": source,
|
176
|
+
"sdkVersion": SDK_VERSION,
|
177
|
+
}
|
178
|
+
compressed_payload = gzip.compress(json.dumps(payload).encode("utf-8"))
|
179
|
+
|
180
|
+
async with session.post(
|
181
|
+
cls.__base_url + "/v1/browser-sessions/events",
|
182
|
+
data=compressed_payload,
|
183
|
+
headers={
|
184
|
+
**cls._headers(),
|
185
|
+
"Content-Encoding": "gzip",
|
186
|
+
},
|
187
|
+
) as response:
|
188
|
+
if response.status != 200:
|
189
|
+
raise ValueError(
|
190
|
+
f"Failed to send events: [{response.status}] {await response.text()}"
|
191
|
+
)
|
192
|
+
|
193
|
+
@classmethod
|
194
|
+
def send_browser_events_sync(
|
195
|
+
cls,
|
196
|
+
session_id: str,
|
197
|
+
trace_id: str,
|
198
|
+
events: list[dict],
|
199
|
+
source: str,
|
200
|
+
):
|
201
|
+
url = cls.__base_url + "/v1/browser-sessions/events"
|
202
|
+
payload = {
|
203
|
+
"sessionId": session_id,
|
204
|
+
"traceId": trace_id,
|
205
|
+
"events": events,
|
206
|
+
"source": source,
|
207
|
+
"sdkVersion": SDK_VERSION,
|
208
|
+
}
|
209
|
+
compressed_payload = gzip.compress(json.dumps(payload).encode("utf-8"))
|
210
|
+
response = cls.__sync_session.post(
|
211
|
+
url,
|
212
|
+
data=compressed_payload,
|
213
|
+
headers={
|
214
|
+
**cls._headers(),
|
215
|
+
"Content-Encoding": "gzip",
|
216
|
+
},
|
217
|
+
)
|
218
|
+
if response.status_code != 200:
|
219
|
+
raise ValueError(
|
220
|
+
f"Failed to send events: [{response.status_code}] {response.text}"
|
221
|
+
)
|
222
|
+
|
223
|
+
@classmethod
|
224
|
+
def get_datapoints(
|
225
|
+
cls,
|
226
|
+
dataset_name: str,
|
227
|
+
offset: int,
|
228
|
+
limit: int,
|
229
|
+
) -> GetDatapointsResponse:
|
230
|
+
# TODO: Use aiohttp. Currently, this function is called from within
|
231
|
+
# `LaminarDataset.__len__`, which is sync, but can be called from
|
232
|
+
# both sync and async (primarily async). Python does not make it easy
|
233
|
+
# to mix things this way, so we should probably refactor `LaminarDataset`.
|
234
|
+
params = {"name": dataset_name, "offset": offset, "limit": limit}
|
235
|
+
url = (
|
236
|
+
cls.__base_url + "/v1/datasets/datapoints?" + urllib.parse.urlencode(params)
|
237
|
+
)
|
238
|
+
response = cls.__sync_session.get(url, headers=cls._headers())
|
239
|
+
if response.status_code != 200:
|
240
|
+
try:
|
241
|
+
resp_json = response.json()
|
242
|
+
raise ValueError(
|
243
|
+
f"Error fetching datapoints: [{response.status_code}] {json.dumps(resp_json)}"
|
244
|
+
)
|
245
|
+
except requests.exceptions.RequestException:
|
246
|
+
raise ValueError(
|
247
|
+
f"Error fetching datapoints: [{response.status_code}] {response.text}"
|
248
|
+
)
|
249
|
+
return GetDatapointsResponse.model_validate(response.json())
|
250
|
+
|
251
|
+
@classmethod
|
252
|
+
async def __run(
|
253
|
+
cls,
|
254
|
+
request: PipelineRunRequest,
|
255
|
+
) -> PipelineRunResponse:
|
256
|
+
session = await cls.__get_session()
|
257
|
+
async with session.post(
|
258
|
+
cls.__base_url + "/v1/pipeline/run",
|
259
|
+
data=json.dumps(request.to_dict()),
|
260
|
+
headers=cls._headers(),
|
261
|
+
) as response:
|
262
|
+
if response.status != 200:
|
263
|
+
raise PipelineRunError(response)
|
264
|
+
try:
|
265
|
+
resp_json = await response.json()
|
266
|
+
keys = list(resp_json.keys())
|
267
|
+
for key in keys:
|
268
|
+
value = resp_json[key]
|
269
|
+
del resp_json[key]
|
270
|
+
resp_json[to_snake(key)] = value
|
271
|
+
return PipelineRunResponse(**resp_json)
|
272
|
+
except Exception:
|
273
|
+
raise PipelineRunError(response)
|
274
|
+
|
275
|
+
@classmethod
|
276
|
+
async def __semantic_search(
|
277
|
+
cls,
|
278
|
+
request: SemanticSearchRequest,
|
279
|
+
) -> SemanticSearchResponse:
|
280
|
+
session = await cls.__get_session()
|
281
|
+
async with session.post(
|
282
|
+
cls.__base_url + "/v1/semantic-search",
|
283
|
+
data=json.dumps(request.to_dict()),
|
284
|
+
headers=cls._headers(),
|
285
|
+
) as response:
|
286
|
+
if response.status != 200:
|
287
|
+
raise ValueError(
|
288
|
+
f"Error performing semantic search: [{response.status}] {await response.text()}"
|
289
|
+
)
|
290
|
+
try:
|
291
|
+
resp_json = await response.json()
|
292
|
+
for result in resp_json["results"]:
|
293
|
+
result["dataset_id"] = uuid.UUID(result["datasetId"])
|
294
|
+
return SemanticSearchResponse(**resp_json)
|
295
|
+
except Exception as e:
|
296
|
+
raise ValueError(
|
297
|
+
f"Error parsing semantic search response: status={response.status} error={e}"
|
298
|
+
)
|
299
|
+
|
300
|
+
@classmethod
|
301
|
+
def _headers(cls):
|
302
|
+
assert cls.__project_api_key is not None, "Project API key is not set"
|
303
|
+
return {
|
304
|
+
"Authorization": "Bearer " + cls.__project_api_key,
|
305
|
+
"Content-Type": "application/json",
|
306
|
+
"Accept": "application/json",
|
307
|
+
}
|
308
|
+
|
309
|
+
@classmethod
|
310
|
+
async def __get_session(cls):
|
311
|
+
if cls.__session is None:
|
312
|
+
cls.__session = aiohttp.ClientSession()
|
313
|
+
return cls.__session
|
lmnr/sdk/datasets.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
|
3
|
+
from .client import LaminarClient
|
3
4
|
from .log import get_default_logger
|
4
|
-
from .laminar import Laminar as L
|
5
5
|
from .types import Datapoint
|
6
6
|
|
7
7
|
DEFAULT_FETCH_SIZE = 25
|
@@ -38,7 +38,7 @@ class LaminarDataset(EvaluationDataset):
|
|
38
38
|
f"dataset {self.name}. Fetching batch from {self._offset} to "
|
39
39
|
+ f"{self._offset + self._fetch_size}"
|
40
40
|
)
|
41
|
-
resp =
|
41
|
+
resp = LaminarClient.get_datapoints(self.name, self._offset, self._fetch_size)
|
42
42
|
self._fetched_items += resp.items
|
43
43
|
self._offset = len(self._fetched_items)
|
44
44
|
if self._len is None:
|
lmnr/sdk/evaluations.py
CHANGED
@@ -8,6 +8,7 @@ from typing import Any, Awaitable, Optional, Set, Union
|
|
8
8
|
from ..openllmetry_sdk.instruments import Instruments
|
9
9
|
from ..openllmetry_sdk.tracing.attributes import SPAN_TYPE
|
10
10
|
|
11
|
+
from .client import LaminarClient
|
11
12
|
from .datasets import EvaluationDataset
|
12
13
|
from .eval_control import EVALUATION_INSTANCE, PREPARE_ONLY
|
13
14
|
from .laminar import Laminar as L
|
@@ -20,6 +21,7 @@ from .types import (
|
|
20
21
|
HumanEvaluator,
|
21
22
|
Numeric,
|
22
23
|
NumericTypes,
|
24
|
+
PartialEvaluationDatapoint,
|
23
25
|
SpanType,
|
24
26
|
TraceType,
|
25
27
|
)
|
@@ -209,7 +211,9 @@ class Evaluation:
|
|
209
211
|
async def _run(self) -> None:
|
210
212
|
self.reporter.start(len(self.data))
|
211
213
|
try:
|
212
|
-
evaluation = await
|
214
|
+
evaluation = await LaminarClient.init_eval(
|
215
|
+
name=self.name, group_name=self.group_name
|
216
|
+
)
|
213
217
|
result_datapoints = await self._evaluate_in_batches(evaluation.id)
|
214
218
|
|
215
219
|
# Wait for all background upload tasks to complete
|
@@ -227,6 +231,7 @@ class Evaluation:
|
|
227
231
|
average_scores = get_average_scores(result_datapoints)
|
228
232
|
self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
|
229
233
|
self.is_finished = True
|
234
|
+
await LaminarClient.shutdown_async()
|
230
235
|
|
231
236
|
async def _evaluate_in_batches(
|
232
237
|
self, eval_id: uuid.UUID
|
@@ -260,12 +265,29 @@ class Evaluation:
|
|
260
265
|
async def _evaluate_datapoint(
|
261
266
|
self, eval_id: uuid.UUID, datapoint: Datapoint, index: int
|
262
267
|
) -> EvaluationResultDatapoint:
|
268
|
+
evaluation_id = uuid.uuid4()
|
263
269
|
with L.start_as_current_span("evaluation") as evaluation_span:
|
264
270
|
L._set_trace_type(trace_type=TraceType.EVALUATION)
|
265
271
|
evaluation_span.set_attribute(SPAN_TYPE, SpanType.EVALUATION.value)
|
266
272
|
with L.start_as_current_span(
|
267
273
|
"executor", input={"data": datapoint.data}
|
268
274
|
) as executor_span:
|
275
|
+
executor_span_id = uuid.UUID(
|
276
|
+
int=executor_span.get_span_context().span_id
|
277
|
+
)
|
278
|
+
trace_id = uuid.UUID(int=executor_span.get_span_context().trace_id)
|
279
|
+
partial_datapoint = PartialEvaluationDatapoint(
|
280
|
+
id=evaluation_id,
|
281
|
+
data=datapoint.data,
|
282
|
+
target=datapoint.target,
|
283
|
+
index=index,
|
284
|
+
trace_id=trace_id,
|
285
|
+
executor_span_id=executor_span_id,
|
286
|
+
)
|
287
|
+
# First, create datapoint with trace_id so that we can show the dp in the UI
|
288
|
+
await LaminarClient.save_eval_datapoints(
|
289
|
+
eval_id, [partial_datapoint], self.group_name
|
290
|
+
)
|
269
291
|
executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
|
270
292
|
# Run synchronous executors in a thread pool to avoid blocking
|
271
293
|
if not is_async(self.executor):
|
@@ -277,9 +299,6 @@ class Evaluation:
|
|
277
299
|
output = await self.executor(datapoint.data)
|
278
300
|
|
279
301
|
L.set_span_output(output)
|
280
|
-
executor_span_id = uuid.UUID(
|
281
|
-
int=executor_span.get_span_context().span_id
|
282
|
-
)
|
283
302
|
target = datapoint.target
|
284
303
|
|
285
304
|
# Iterate over evaluators
|
@@ -289,11 +308,13 @@ class Evaluation:
|
|
289
308
|
evaluator_name, input={"output": output, "target": target}
|
290
309
|
) as evaluator_span:
|
291
310
|
evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
|
292
|
-
|
293
|
-
await evaluator(output, target)
|
294
|
-
|
295
|
-
|
296
|
-
|
311
|
+
if is_async(evaluator):
|
312
|
+
value = await evaluator(output, target)
|
313
|
+
else:
|
314
|
+
loop = asyncio.get_event_loop()
|
315
|
+
value = await loop.run_in_executor(
|
316
|
+
None, evaluator, output, target
|
317
|
+
)
|
297
318
|
L.set_span_output(value)
|
298
319
|
|
299
320
|
# If evaluator returns a single number, use evaluator name as key
|
@@ -305,6 +326,7 @@ class Evaluation:
|
|
305
326
|
trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
|
306
327
|
|
307
328
|
datapoint = EvaluationResultDatapoint(
|
329
|
+
id=evaluation_id,
|
308
330
|
data=datapoint.data,
|
309
331
|
target=target,
|
310
332
|
executor_output=output,
|
@@ -320,7 +342,7 @@ class Evaluation:
|
|
320
342
|
|
321
343
|
# Create background upload task without awaiting it
|
322
344
|
upload_task = asyncio.create_task(
|
323
|
-
|
345
|
+
LaminarClient.save_eval_datapoints(eval_id, [datapoint], self.group_name)
|
324
346
|
)
|
325
347
|
self.upload_tasks.append(upload_task)
|
326
348
|
|