lmnr 0.4.55__py3-none-any.whl → 0.4.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,6 +32,9 @@ class Traceloop:
32
32
  should_enrich_metrics: bool = False,
33
33
  resource_attributes: dict = {},
34
34
  instruments: Optional[Set[Instruments]] = None,
35
+ base_http_url: Optional[str] = None,
36
+ project_api_key: Optional[str] = None,
37
+ max_export_batch_size: Optional[int] = None,
35
38
  ) -> None:
36
39
  if not is_tracing_enabled():
37
40
  return
@@ -69,4 +72,7 @@ class Traceloop:
69
72
  exporter=exporter,
70
73
  should_enrich_metrics=should_enrich_metrics,
71
74
  instruments=instruments,
75
+ base_http_url=base_http_url,
76
+ project_api_key=project_api_key,
77
+ max_export_batch_size=max_export_batch_size,
72
78
  )
@@ -21,6 +21,7 @@ class Instruments(Enum):
21
21
  OLLAMA = "ollama"
22
22
  OPENAI = "openai"
23
23
  PINECONE = "pinecone"
24
+ PLAYWRIGHT = "playwright"
24
25
  QDRANT = "qdrant"
25
26
  REPLICATE = "replicate"
26
27
  SAGEMAKER = "sagemaker"
@@ -6,6 +6,7 @@ import logging
6
6
  from contextvars import Context
7
7
  from lmnr.sdk.log import VerboseColorfulFormatter
8
8
  from lmnr.openllmetry_sdk.instruments import Instruments
9
+ from lmnr.sdk.browser import init_browser_tracing
9
10
  from lmnr.openllmetry_sdk.tracing.attributes import (
10
11
  ASSOCIATION_PROPERTIES,
11
12
  SPAN_INSTRUMENTATION_SOURCE,
@@ -80,6 +81,9 @@ class TracerWrapper(object):
80
81
  exporter: Optional[SpanExporter] = None,
81
82
  should_enrich_metrics: bool = False,
82
83
  instruments: Optional[Set[Instruments]] = None,
84
+ base_http_url: Optional[str] = None,
85
+ project_api_key: Optional[str] = None,
86
+ max_export_batch_size: Optional[int] = None,
83
87
  ) -> "TracerWrapper":
84
88
  cls._initialize_logger(cls)
85
89
  if not hasattr(cls, "instance"):
@@ -106,7 +110,8 @@ class TracerWrapper(object):
106
110
  )
107
111
  else:
108
112
  obj.__spans_processor: SpanProcessor = BatchSpanProcessor(
109
- obj.__spans_exporter
113
+ obj.__spans_exporter,
114
+ max_export_batch_size=max_export_batch_size,
110
115
  )
111
116
  obj.__spans_processor_original_on_start = None
112
117
 
@@ -122,6 +127,8 @@ class TracerWrapper(object):
122
127
  instrument_set = init_instrumentations(
123
128
  should_enrich_metrics,
124
129
  instruments,
130
+ base_http_url=base_http_url,
131
+ project_api_key=project_api_key,
125
132
  )
126
133
 
127
134
  if not instrument_set:
@@ -286,6 +293,8 @@ def init_instrumentations(
286
293
  should_enrich_metrics: bool,
287
294
  instruments: Optional[Set[Instruments]] = None,
288
295
  block_instruments: Optional[Set[Instruments]] = None,
296
+ base_http_url: Optional[str] = None,
297
+ project_api_key: Optional[str] = None,
289
298
  ):
290
299
  block_instruments = block_instruments or set()
291
300
  # These libraries are not instrumented by default,
@@ -397,6 +406,9 @@ def init_instrumentations(
397
406
  elif instrument == Instruments.WEAVIATE:
398
407
  if init_weaviate_instrumentor():
399
408
  instrument_set = True
409
+ elif instrument == Instruments.PLAYWRIGHT:
410
+ if init_browser_tracing(base_http_url, project_api_key):
411
+ instrument_set = True
400
412
  else:
401
413
  module_logger.warning(
402
414
  f"Warning: {instrument} instrumentation does not exist."
@@ -0,0 +1,9 @@
1
+ from lmnr.openllmetry_sdk.utils.package_check import is_package_installed
2
+
3
+
4
+ def init_browser_tracing(http_url: str, project_api_key: str):
5
+ if is_package_installed("playwright"):
6
+ from .playwright_patch import init_playwright_tracing
7
+
8
+ init_playwright_tracing(http_url, project_api_key)
9
+ # Other browsers can be added here
@@ -0,0 +1,249 @@
1
+ import opentelemetry
2
+ import uuid
3
+ import asyncio
4
+
5
+ try:
6
+ from playwright.async_api import BrowserContext, Page
7
+ from playwright.sync_api import (
8
+ BrowserContext as SyncBrowserContext,
9
+ Page as SyncPage,
10
+ )
11
+ except ImportError as e:
12
+ raise ImportError(
13
+ f"Attempted to import {__file__}, but it is designed "
14
+ "to patch Playwright, which is not installed. Use `pip install playwright` "
15
+ "to install Playwright or remove this import."
16
+ ) from e
17
+
18
+ _original_new_page = None
19
+ _original_new_page_async = None
20
+
21
+ INJECT_PLACEHOLDER = """
22
+ ([baseUrl, projectApiKey]) => {
23
+ const serverUrl = `${baseUrl}/v1/browser-sessions/events`;
24
+ const FLUSH_INTERVAL = 1000;
25
+ const HEARTBEAT_INTERVAL = 1000;
26
+
27
+ window.rrwebEventsBatch = [];
28
+
29
+ window.sendBatch = async () => {
30
+ if (window.rrwebEventsBatch.length === 0) return;
31
+
32
+ const eventsPayload = {
33
+ sessionId: window.rrwebSessionId,
34
+ traceId: window.traceId,
35
+ events: window.rrwebEventsBatch
36
+ };
37
+
38
+ try {
39
+ const jsonString = JSON.stringify(eventsPayload);
40
+ const uint8Array = new TextEncoder().encode(jsonString);
41
+
42
+ const cs = new CompressionStream('gzip');
43
+ const compressedStream = await new Response(
44
+ new Response(uint8Array).body.pipeThrough(cs)
45
+ ).arrayBuffer();
46
+
47
+ const compressedArray = new Uint8Array(compressedStream);
48
+
49
+ const blob = new Blob([compressedArray], { type: 'application/octet-stream' });
50
+
51
+ const response = await fetch(serverUrl, {
52
+ method: 'POST',
53
+ headers: {
54
+ 'Content-Type': 'application/json',
55
+ 'Content-Encoding': 'gzip',
56
+ 'Authorization': `Bearer ${projectApiKey}`
57
+ },
58
+ body: blob,
59
+ compress: false,
60
+ credentials: 'omit',
61
+ mode: 'cors',
62
+ cache: 'no-cache',
63
+ });
64
+
65
+ if (!response.ok) {
66
+ throw new Error(`HTTP error! status: ${response.status}`);
67
+ }
68
+
69
+ window.rrwebEventsBatch = [];
70
+ } catch (error) {
71
+ console.error('Failed to send events:', error);
72
+ }
73
+ };
74
+
75
+ setInterval(() => window.sendBatch(), FLUSH_INTERVAL);
76
+
77
+ setInterval(() => {
78
+ window.rrwebEventsBatch.push({
79
+ type: 6,
80
+ data: { source: 'heartbeat' },
81
+ timestamp: Date.now()
82
+ });
83
+ }, HEARTBEAT_INTERVAL);
84
+
85
+ window.rrweb.record({
86
+ emit(event) {
87
+ window.rrwebEventsBatch.push(event);
88
+ }
89
+ });
90
+
91
+ window.addEventListener('beforeunload', () => {
92
+ window.sendBatch();
93
+ });
94
+ }
95
+ """
96
+
97
+
98
+ def init_playwright_tracing(http_url: str, project_api_key: str):
99
+
100
+ def inject_rrweb(page: SyncPage):
101
+ # Get current trace ID from active span
102
+ current_span = opentelemetry.trace.get_current_span()
103
+ current_span.set_attribute("lmnr.internal.has_browser_session", True)
104
+ trace_id = format(current_span.get_span_context().trace_id, "032x")
105
+ session_id = str(uuid.uuid4().hex)
106
+
107
+ # Generate UUID session ID and set trace ID
108
+ page.evaluate(
109
+ """([traceId, sessionId]) => {
110
+ window.rrwebSessionId = sessionId;
111
+ window.traceId = traceId;
112
+ }""",
113
+ [trace_id, session_id],
114
+ )
115
+
116
+ # Load rrweb from CDN
117
+ page.add_script_tag(
118
+ url="https://cdn.jsdelivr.net/npm/rrweb@latest/dist/rrweb.min.js"
119
+ )
120
+
121
+ # Update the recording setup to include trace ID
122
+ page.evaluate(
123
+ INJECT_PLACEHOLDER,
124
+ [http_url, project_api_key],
125
+ )
126
+
127
+ async def inject_rrweb_async(page: Page):
128
+ try:
129
+ # Wait for the page to be in a ready state first
130
+ await page.wait_for_load_state("domcontentloaded")
131
+
132
+ # Get current trace ID from active span
133
+ current_span = opentelemetry.trace.get_current_span()
134
+ current_span.set_attribute("lmnr.internal.has_browser_session", True)
135
+ trace_id = format(current_span.get_span_context().trace_id, "032x")
136
+ session_id = str(uuid.uuid4().hex)
137
+
138
+ # Generate UUID session ID and set trace ID
139
+ await page.evaluate(
140
+ """([traceId, sessionId]) => {
141
+ window.rrwebSessionId = sessionId;
142
+ window.traceId = traceId;
143
+ }""",
144
+ [trace_id, session_id],
145
+ )
146
+
147
+ # Load rrweb from CDN
148
+ await page.add_script_tag(
149
+ url="https://cdn.jsdelivr.net/npm/rrweb@latest/dist/rrweb.min.js"
150
+ )
151
+
152
+ await page.wait_for_function(
153
+ """(() => window.rrweb || 'rrweb' in window)"""
154
+ )
155
+
156
+ # Update the recording setup to include trace ID
157
+ await page.evaluate(
158
+ INJECT_PLACEHOLDER,
159
+ [http_url, project_api_key],
160
+ )
161
+ except Exception as e:
162
+ print(f"Error injecting rrweb: {e}")
163
+
164
+ def handle_navigation(page: SyncPage):
165
+ def on_load():
166
+ inject_rrweb(page)
167
+
168
+ page.on("load", on_load)
169
+ inject_rrweb(page)
170
+
171
+ async def handle_navigation_async(page: Page):
172
+ async def on_load():
173
+ await inject_rrweb_async(page)
174
+
175
+ page.on("load", lambda: asyncio.create_task(on_load()))
176
+ await inject_rrweb_async(page)
177
+
178
+ async def patched_new_page_async(self: BrowserContext, *args, **kwargs):
179
+ # Modify CSP to allow required domains
180
+ async def handle_route(route):
181
+ try:
182
+ response = await route.fetch()
183
+ headers = dict(response.headers)
184
+
185
+ # Find and modify CSP header
186
+ for header_name in headers:
187
+ if header_name.lower() == "content-security-policy":
188
+ csp = headers[header_name]
189
+ parts = csp.split(";")
190
+ for i, part in enumerate(parts):
191
+ if "script-src" in part:
192
+ parts[i] = f"{part.strip()} cdn.jsdelivr.net"
193
+ elif "connect-src" in part:
194
+ parts[i] = f"{part.strip()} " + http_url
195
+ if not any("connect-src" in part for part in parts):
196
+ parts.append(" connect-src 'self' " + http_url)
197
+ headers[header_name] = ";".join(parts)
198
+
199
+ await route.fulfill(response=response, headers=headers)
200
+ except Exception:
201
+ await route.continue_()
202
+
203
+ await self.route("**/*", handle_route)
204
+ page = await _original_new_page_async(self, *args, **kwargs)
205
+ await handle_navigation_async(page)
206
+ return page
207
+
208
+ def patched_new_page(self: SyncBrowserContext, *args, **kwargs):
209
+ # Modify CSP to allow required domains
210
+ def handle_route(route):
211
+ try:
212
+ response = route.fetch()
213
+ headers = dict(response.headers)
214
+
215
+ # Find and modify CSP header
216
+ for header_name in headers:
217
+ if header_name.lower() == "content-security-policy":
218
+ csp = headers[header_name]
219
+ parts = csp.split(";")
220
+ for i, part in enumerate(parts):
221
+ if "script-src" in part:
222
+ parts[i] = f"{part.strip()} cdn.jsdelivr.net"
223
+ elif "connect-src" in part:
224
+ parts[i] = f"{part.strip()} " + http_url
225
+ if not any("connect-src" in part for part in parts):
226
+ parts.append(" connect-src 'self' " + http_url)
227
+ headers[header_name] = ";".join(parts)
228
+
229
+ route.fulfill(response=response, headers=headers)
230
+ except Exception:
231
+ # Continue with the original request without modification
232
+ route.continue_()
233
+
234
+ self.route("**/*", handle_route)
235
+ page = _original_new_page(self, *args, **kwargs)
236
+ handle_navigation(page)
237
+ return page
238
+
239
+ def patch_browser():
240
+ global _original_new_page, _original_new_page_async
241
+ if _original_new_page_async is None:
242
+ _original_new_page_async = BrowserContext.new_page
243
+ BrowserContext.new_page = patched_new_page_async
244
+
245
+ if _original_new_page is None:
246
+ _original_new_page = SyncBrowserContext.new_page
247
+ SyncBrowserContext.new_page = patched_new_page
248
+
249
+ patch_browser()
lmnr/sdk/evaluations.py CHANGED
@@ -2,7 +2,6 @@ import asyncio
2
2
  import re
3
3
  import sys
4
4
  import uuid
5
-
6
5
  from tqdm import tqdm
7
6
  from typing import Any, Awaitable, Optional, Set, Union
8
7
 
@@ -27,9 +26,12 @@ from .types import (
27
26
  from .utils import is_async
28
27
 
29
28
  DEFAULT_BATCH_SIZE = 5
29
+ MAX_EXPORT_BATCH_SIZE = 64
30
30
 
31
31
 
32
- def get_evaluation_url(project_id: str, evaluation_id: str, base_url: Optional[str] = None):
32
+ def get_evaluation_url(
33
+ project_id: str, evaluation_id: str, base_url: Optional[str] = None
34
+ ):
33
35
  if not base_url:
34
36
  base_url = "https://www.lmnr.ai"
35
37
 
@@ -39,7 +41,7 @@ def get_evaluation_url(project_id: str, evaluation_id: str, base_url: Optional[s
39
41
  if url.endswith("localhost") or url.endswith("127.0.0.1"):
40
42
  # We best effort assume that the frontend is running on port 3000
41
43
  # TODO: expose the frontend port?
42
- url = url + ":3000"
44
+ url = url + ":5667"
43
45
  return f"{url}/project/{project_id}/evaluations/{evaluation_id}"
44
46
 
45
47
 
@@ -97,13 +99,14 @@ class Evaluation:
97
99
  evaluators: dict[str, EvaluatorFunction],
98
100
  human_evaluators: list[HumanEvaluator] = [],
99
101
  name: Optional[str] = None,
100
- group_id: Optional[str] = None,
101
- batch_size: int = DEFAULT_BATCH_SIZE,
102
+ group_name: Optional[str] = None,
103
+ concurrency_limit: int = DEFAULT_BATCH_SIZE,
102
104
  project_api_key: Optional[str] = None,
103
105
  base_url: Optional[str] = None,
104
106
  http_port: Optional[int] = None,
105
107
  grpc_port: Optional[int] = None,
106
108
  instruments: Optional[Set[Instruments]] = None,
109
+ max_export_batch_size: Optional[int] = MAX_EXPORT_BATCH_SIZE,
107
110
  ):
108
111
  """
109
112
  Initializes an instance of the Evaluations class.
@@ -131,12 +134,12 @@ class Evaluation:
131
134
  Used to identify the evaluation in the group.\
132
135
  If not provided, a random name will be generated.
133
136
  Defaults to None.
134
- group_id (Optional[str], optional): an identifier to group\
135
- evaluations. Only evaluations within the same group_id can be\
137
+ group_name (Optional[str], optional): an identifier to group\
138
+ evaluations. Only evaluations within the same group_name can be\
136
139
  visually compared. If not provided, "default" is assigned.
137
140
  Defaults to None
138
- batch_size (int, optional): The batch size for evaluation. This many\
139
- data points will be evaluated in parallel.
141
+ concurrency_limit (int, optional): The concurrency limit for evaluation. This many\
142
+ data points will be evaluated in parallel with a pool of workers.
140
143
  Defaults to DEFAULT_BATCH_SIZE.
141
144
  project_api_key (Optional[str], optional): The project API key.\
142
145
  If not provided, LMNR_PROJECT_API_KEY environment variable is\
@@ -180,17 +183,20 @@ class Evaluation:
180
183
  self.data = data
181
184
  self.executor = executor
182
185
  self.evaluators = evaluators
183
- self.group_id = group_id
186
+ self.group_name = group_name
184
187
  self.name = name
185
- self.batch_size = batch_size
188
+ self.concurrency_limit = concurrency_limit
189
+ self.batch_size = concurrency_limit
186
190
  self._logger = get_default_logger(self.__class__.__name__)
187
191
  self.human_evaluators = human_evaluators
192
+ self.upload_tasks = [] # Add this line to track upload tasks
188
193
  L.initialize(
189
194
  project_api_key=project_api_key,
190
195
  base_url=base_url,
191
196
  http_port=http_port,
192
197
  grpc_port=grpc_port,
193
198
  instruments=instruments,
199
+ max_export_batch_size=max_export_batch_size,
194
200
  )
195
201
 
196
202
  async def run(self) -> Awaitable[None]:
@@ -200,49 +206,60 @@ class Evaluation:
200
206
 
201
207
  async def _run(self) -> None:
202
208
  self.reporter.start(len(self.data))
203
-
204
209
  try:
205
- result_datapoints = await self._evaluate_in_batches()
210
+ evaluation = await L.init_eval(name=self.name, group_name=self.group_name)
211
+ result_datapoints = await self._evaluate_in_batches(evaluation.id)
212
+
213
+ # Wait for all background upload tasks to complete
214
+ if self.upload_tasks:
215
+ self._logger.debug(
216
+ f"Waiting for {len(self.upload_tasks)} upload tasks to complete"
217
+ )
218
+ await asyncio.gather(*self.upload_tasks)
219
+ self._logger.debug("All upload tasks completed")
206
220
  except Exception as e:
207
221
  self.reporter.stopWithError(e)
208
222
  self.is_finished = True
209
223
  return
210
224
 
211
- # For now add all human evaluators to all result datapoints
212
- # In the future, we will add ways to specify which human evaluators
213
- # to add to which result datapoints, e.g. sample some randomly
214
225
  for result_datapoint in result_datapoints:
215
226
  result_datapoint.human_evaluators = self.human_evaluators or {}
216
227
 
217
- evaluation = await L.create_evaluation(
218
- data=result_datapoints, group_id=self.group_id, name=self.name
219
- )
220
228
  average_scores = get_average_scores(result_datapoints)
221
229
  self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
222
230
  self.is_finished = True
223
231
 
224
- async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
225
- result_datapoints = []
226
- for i in range(0, len(self.data), self.batch_size):
227
- batch = (
228
- self.data[i : i + self.batch_size]
229
- if isinstance(self.data, list)
230
- else self.data.slice(i, i + self.batch_size)
231
- )
232
- batch_datapoints = await self._evaluate_batch(batch)
233
- result_datapoints.extend(batch_datapoints)
234
- self.reporter.update(len(batch))
235
- return result_datapoints
236
-
237
- async def _evaluate_batch(
238
- self, batch: list[Datapoint]
232
+ async def _evaluate_in_batches(
233
+ self, eval_id: uuid.UUID
239
234
  ) -> list[EvaluationResultDatapoint]:
240
- batch_promises = [self._evaluate_datapoint(datapoint) for datapoint in batch]
241
- results = await asyncio.gather(*batch_promises)
242
- return results
235
+
236
+ semaphore = asyncio.Semaphore(self.concurrency_limit)
237
+ tasks = []
238
+ data_iter = self.data if isinstance(self.data, list) else range(len(self.data))
239
+
240
+ async def evaluate_task(datapoint, index):
241
+ try:
242
+ result = await self._evaluate_datapoint(eval_id, datapoint, index)
243
+ self.reporter.update(1)
244
+ return index, result
245
+ finally:
246
+ semaphore.release()
247
+
248
+ # Create tasks only after acquiring semaphore
249
+ for idx, item in enumerate(data_iter):
250
+ await semaphore.acquire()
251
+ datapoint = item if isinstance(self.data, list) else self.data[item]
252
+ task = asyncio.create_task(evaluate_task(datapoint, idx))
253
+ tasks.append(task)
254
+
255
+ # Wait for all tasks to complete and preserve order
256
+ results = await asyncio.gather(*tasks)
257
+ ordered_results = [result for _, result in sorted(results, key=lambda x: x[0])]
258
+
259
+ return ordered_results
243
260
 
244
261
  async def _evaluate_datapoint(
245
- self, datapoint: Datapoint
262
+ self, eval_id: uuid.UUID, datapoint: Datapoint, index: int
246
263
  ) -> EvaluationResultDatapoint:
247
264
  with L.start_as_current_span("evaluation") as evaluation_span:
248
265
  L._set_trace_type(trace_type=TraceType.EVALUATION)
@@ -251,11 +268,15 @@ class Evaluation:
251
268
  "executor", input={"data": datapoint.data}
252
269
  ) as executor_span:
253
270
  executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
254
- output = (
255
- await self.executor(datapoint.data)
256
- if is_async(self.executor)
257
- else self.executor(datapoint.data)
258
- )
271
+ # Run synchronous executors in a thread pool to avoid blocking
272
+ if not is_async(self.executor):
273
+ loop = asyncio.get_event_loop()
274
+ output = await loop.run_in_executor(
275
+ None, self.executor, datapoint.data
276
+ )
277
+ else:
278
+ output = await self.executor(datapoint.data)
279
+
259
280
  L.set_span_output(output)
260
281
  executor_span_id = uuid.UUID(
261
282
  int=executor_span.get_span_context().span_id
@@ -283,14 +304,28 @@ class Evaluation:
283
304
  scores.update(value)
284
305
 
285
306
  trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
286
- return EvaluationResultDatapoint(
287
- data=datapoint.data,
288
- target=target,
289
- executor_output=output,
290
- scores=scores,
291
- trace_id=trace_id,
292
- executor_span_id=executor_span_id,
293
- )
307
+
308
+ datapoint = EvaluationResultDatapoint(
309
+ data=datapoint.data,
310
+ target=target,
311
+ executor_output=output,
312
+ scores=scores,
313
+ trace_id=trace_id,
314
+ # For now add all human evaluators to all result datapoints
315
+ # In the future, we will add ways to specify which human evaluators
316
+ # to add to which result datapoints, e.g. sample some randomly
317
+ human_evaluators=self.human_evaluators,
318
+ executor_span_id=executor_span_id,
319
+ index=index,
320
+ )
321
+
322
+ # Create background upload task without awaiting it
323
+ upload_task = asyncio.create_task(
324
+ L.save_eval_datapoints(eval_id, [datapoint], self.group_name)
325
+ )
326
+ self.upload_tasks.append(upload_task)
327
+
328
+ return datapoint
294
329
 
295
330
 
296
331
  def evaluate(
@@ -299,8 +334,9 @@ def evaluate(
299
334
  evaluators: dict[str, EvaluatorFunction],
300
335
  human_evaluators: list[HumanEvaluator] = [],
301
336
  name: Optional[str] = None,
302
- group_id: Optional[str] = None,
303
- batch_size: int = DEFAULT_BATCH_SIZE,
337
+ group_id: Optional[str] = None, # Deprecated
338
+ group_name: Optional[str] = None,
339
+ concurrency_limit: int = DEFAULT_BATCH_SIZE,
304
340
  project_api_key: Optional[str] = None,
305
341
  base_url: Optional[str] = None,
306
342
  http_port: Optional[int] = None,
@@ -318,12 +354,12 @@ def evaluate(
318
354
 
319
355
  Parameters:
320
356
  data (Union[list[EvaluationDatapoint|dict]], EvaluationDataset]):\
321
- List of data points to evaluate or an evaluation dataset.
322
- `data` is the input to the executor function,
323
- `target` is the input to the evaluator function.
357
+ List of data points to evaluate or an evaluation dataset.
358
+ `data` is the input to the executor function,
359
+ `target` is the input to the evaluator function.
324
360
  executor (Callable[..., Any]): The executor function.\
325
- Takes the data point + any additional arguments\
326
- and returns the output to evaluate.
361
+ Takes the data point + any additional arguments\
362
+ and returns the output to evaluate.
327
363
  evaluators (List[Callable[..., Any]]):
328
364
  evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
329
365
  names. Each evaluator function takes the output of the executor\
@@ -337,14 +373,19 @@ def evaluate(
337
373
  evaluator only holds the queue name.
338
374
  Defaults to an empty list.
339
375
  name (Optional[str], optional): Optional name of the evaluation.\
340
- Used to identify the evaluation in the group.\
341
- If not provided, a random name will be generated.
342
- Defaults to None.
343
- group_id (Optional[str], optional): an identifier to group evaluations.\
376
+ Used to identify the evaluation in the group. If not provided, a\
377
+ random name will be generated.
378
+ Defaults to None.
379
+ group_id (Optional[str], optional): [DEPRECATED] Use group_name instead.
380
+ An identifier to group evaluations.\
344
381
  Only evaluations within the same group_id can be\
345
382
  visually compared. If not provided, set to "default".
346
383
  Defaults to None
347
- batch_size (int, optional): The batch size for evaluation.
384
+ group_name (Optional[str], optional): An identifier to group evaluations.\
385
+ Only evaluations within the same group_name can be visually compared.\
386
+ If not provided, set to "default".
387
+ Defaults to None
388
+ concurrency_limit (int, optional): The concurrency limit for evaluation.
348
389
  Defaults to DEFAULT_BATCH_SIZE.
349
390
  project_api_key (Optional[str], optional): The project API key.
350
391
  Defaults to None.
@@ -363,15 +404,19 @@ def evaluate(
363
404
  will be used.
364
405
  Defaults to None.
365
406
  """
407
+ if group_id:
408
+ raise DeprecationWarning("group_id is deprecated. Use group_name instead.")
409
+
410
+ group_name = group_name or group_id
366
411
 
367
412
  evaluation = Evaluation(
368
413
  data=data,
369
414
  executor=executor,
370
415
  evaluators=evaluators,
371
- group_id=group_id,
416
+ group_name=group_name,
372
417
  human_evaluators=human_evaluators,
373
418
  name=name,
374
- batch_size=batch_size,
419
+ concurrency_limit=concurrency_limit,
375
420
  project_api_key=project_api_key,
376
421
  base_url=base_url,
377
422
  http_port=http_port,
lmnr/sdk/laminar.py CHANGED
@@ -47,7 +47,8 @@ from lmnr.openllmetry_sdk.tracing.tracing import (
47
47
  from .log import VerboseColorfulFormatter
48
48
 
49
49
  from .types import (
50
- CreateEvaluationResponse,
50
+ HumanEvaluator,
51
+ InitEvaluationResponse,
51
52
  EvaluationResultDatapoint,
52
53
  GetDatapointsResponse,
53
54
  PipelineRunError,
@@ -78,6 +79,7 @@ class Laminar:
78
79
  grpc_port: Optional[int] = None,
79
80
  instruments: Optional[Set[Instruments]] = None,
80
81
  disable_batch: bool = False,
82
+ max_export_batch_size: Optional[int] = None,
81
83
  ):
82
84
  """Initialize Laminar context across the application.
83
85
  This method must be called before using any other Laminar methods or
@@ -142,12 +144,15 @@ class Laminar:
142
144
  cls._initialize_logger()
143
145
 
144
146
  Traceloop.init(
147
+ base_http_url=cls.__base_http_url,
148
+ project_api_key=cls.__project_api_key,
145
149
  exporter=OTLPSpanExporter(
146
150
  endpoint=cls.__base_grpc_url,
147
151
  headers={"authorization": f"Bearer {cls.__project_api_key}"},
148
152
  ),
149
153
  instruments=instruments,
150
154
  disable_batch=disable_batch,
155
+ max_export_batch_size=max_export_batch_size,
151
156
  )
152
157
 
153
158
  @classmethod
@@ -687,33 +692,44 @@ class Laminar:
687
692
  set_association_properties(props)
688
693
 
689
694
  @classmethod
690
- async def create_evaluation(
691
- cls,
692
- data: list[EvaluationResultDatapoint],
693
- group_id: Optional[str] = None,
694
- name: Optional[str] = None,
695
- ) -> CreateEvaluationResponse:
695
+ async def init_eval(
696
+ cls, name: Optional[str] = None, group_name: Optional[str] = None
697
+ ) -> InitEvaluationResponse:
696
698
  async with aiohttp.ClientSession() as session:
697
699
  async with session.post(
698
- cls.__base_http_url + "/v1/evaluations",
700
+ cls.__base_http_url + "/v1/evals",
699
701
  json={
700
- "groupId": group_id,
701
702
  "name": name,
702
- "points": [datapoint.to_dict() for datapoint in data],
703
+ "groupName": group_name,
703
704
  },
704
705
  headers=cls._headers(),
705
706
  ) as response:
706
- if response.status != 200:
707
- try:
708
- resp_json = await response.json()
709
- raise ValueError(
710
- f"Error creating evaluation {json.dumps(resp_json)}"
711
- )
712
- except aiohttp.ClientError:
713
- text = await response.text()
714
- raise ValueError(f"Error creating evaluation {text}")
715
707
  resp_json = await response.json()
716
- return CreateEvaluationResponse.model_validate(resp_json)
708
+ return InitEvaluationResponse.model_validate(resp_json)
709
+
710
+ @classmethod
711
+ async def save_eval_datapoints(
712
+ cls,
713
+ eval_id: uuid.UUID,
714
+ datapoints: list[EvaluationResultDatapoint],
715
+ groupName: Optional[str] = None,
716
+ human_evaluators: Optional[list[HumanEvaluator]] = None,
717
+ ):
718
+ async with aiohttp.ClientSession() as session:
719
+
720
+ async with session.post(
721
+ cls.__base_http_url + f"/v1/evals/{eval_id}/datapoints",
722
+ json={
723
+ "points": [datapoint.to_dict() for datapoint in datapoints],
724
+ "groupName": groupName,
725
+ "humanEvaluators": human_evaluators,
726
+ },
727
+ headers=cls._headers(),
728
+ ) as response:
729
+ if response.status != 200:
730
+ raise ValueError(
731
+ f"Error saving evaluation datapoints: {response.text}"
732
+ )
717
733
 
718
734
  @classmethod
719
735
  def get_datapoints(
lmnr/sdk/types.py CHANGED
@@ -141,11 +141,8 @@ EvaluatorFunction = Callable[
141
141
  class HumanEvaluator(pydantic.BaseModel):
142
142
  queueName: str
143
143
 
144
- def __init__(self, queue_name: str):
145
- super().__init__(queueName=queue_name)
146
144
 
147
-
148
- class CreateEvaluationResponse(pydantic.BaseModel):
145
+ class InitEvaluationResponse(pydantic.BaseModel):
149
146
  id: uuid.UUID
150
147
  createdAt: datetime.datetime
151
148
  groupId: str
@@ -161,6 +158,7 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
161
158
  human_evaluators: list[HumanEvaluator] = pydantic.Field(default_factory=list)
162
159
  trace_id: uuid.UUID
163
160
  executor_span_id: uuid.UUID
161
+ index: int
164
162
 
165
163
  # uuid is not serializable by default, so we need to convert it to a string
166
164
  def to_dict(self):
@@ -180,6 +178,7 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
180
178
  for v in self.human_evaluators
181
179
  ],
182
180
  "executorSpanId": str(self.executor_span_id),
181
+ "index": self.index,
183
182
  }
184
183
  except Exception as e:
185
184
  raise ValueError(f"Error serializing EvaluationResultDatapoint: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lmnr
3
- Version: 0.4.55
3
+ Version: 0.4.57
4
4
  Summary: Python SDK for Laminar
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -41,7 +41,6 @@ Provides-Extra: watsonx
41
41
  Provides-Extra: weaviate
42
42
  Requires-Dist: aiohttp (>=3.0)
43
43
  Requires-Dist: argparse (>=1.0)
44
- Requires-Dist: deprecated (>=1.0)
45
44
  Requires-Dist: grpcio (<1.68.0)
46
45
  Requires-Dist: opentelemetry-api (>=1.28.0)
47
46
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.28.0)
@@ -2,32 +2,34 @@ lmnr/__init__.py,sha256=Bqxs-8Mh4h69pOHURgBCgo9EW1GwChebxP6wUX2-bsU,452
2
2
  lmnr/cli.py,sha256=4J2RZQhHM3jJcjFvBC4PChQTS-ukxykVvI0X6lTkK-o,2918
3
3
  lmnr/openllmetry_sdk/.flake8,sha256=bCxuDlGx3YQ55QHKPiGJkncHanh9qGjQJUujcFa3lAU,150
4
4
  lmnr/openllmetry_sdk/.python-version,sha256=9OLQBQVbD4zE4cJsPePhnAfV_snrPSoqEQw-PXgPMOs,6
5
- lmnr/openllmetry_sdk/__init__.py,sha256=zPxPkkC43MX8SOK0LbItBnzBQBCr_t1zy9QxhaBMLX8,2355
5
+ lmnr/openllmetry_sdk/__init__.py,sha256=TpFNPrRosz-BUpWdfT9ROiZPTGA_JshNwqOfiXlR0MU,2643
6
6
  lmnr/openllmetry_sdk/config/__init__.py,sha256=DliMGp2NjYAqRFLKpWQPUKjGMHRO8QsVfazBA1qENQ8,248
7
7
  lmnr/openllmetry_sdk/decorators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  lmnr/openllmetry_sdk/decorators/base.py,sha256=BhfTJHjGnKXZRyug41wnmvjbg2UDq2p7eLEak7RsCXI,5779
9
- lmnr/openllmetry_sdk/instruments.py,sha256=CGGUEELldrXkQwAzAkDeAtDq07_pjhz7i14a92P7C_E,1036
9
+ lmnr/openllmetry_sdk/instruments.py,sha256=X1S3XbFF_RDlbxxbNxqKKJ9JNUStdTI6gLYCPWnoOTs,1066
10
10
  lmnr/openllmetry_sdk/tracing/__init__.py,sha256=xT73L1t2si2CM6QmMiTZ7zn-dKKYBLNrpBBWq6WfVBw,68
11
11
  lmnr/openllmetry_sdk/tracing/attributes.py,sha256=B_4KVYWAUu-6DQmsm2eCJQcTxm8pG1EByCBK3uOPkuI,1293
12
12
  lmnr/openllmetry_sdk/tracing/content_allow_list.py,sha256=3feztm6PBWNelc8pAZUcQyEGyeSpNiVKjOaDk65l2ps,846
13
13
  lmnr/openllmetry_sdk/tracing/context_manager.py,sha256=rdSus-p-TaevQ8hIAhfbnZr5dTqRvACDkzXGDpflncY,306
14
- lmnr/openllmetry_sdk/tracing/tracing.py,sha256=N5EeK4yoSa11CPh4J0OvHnDy8uxx018lzJ0FQLVapQ4,32360
14
+ lmnr/openllmetry_sdk/tracing/tracing.py,sha256=APh1kik8iYegVrqLe09Y4NuHvTg3r6ifAAHrAMcXpYE,32961
15
15
  lmnr/openllmetry_sdk/utils/__init__.py,sha256=pNhf0G3vTd5ccoc03i1MXDbricSaiqCbi1DLWhSekK8,604
16
16
  lmnr/openllmetry_sdk/utils/in_memory_span_exporter.py,sha256=H_4TRaThMO1H6vUQ0OpQvzJk_fZH0OOsRAM1iZQXsR8,2112
17
17
  lmnr/openllmetry_sdk/utils/json_encoder.py,sha256=dK6b_axr70IYL7Vv-bu4wntvDDuyntoqsHaddqX7P58,463
18
18
  lmnr/openllmetry_sdk/utils/package_check.py,sha256=Da4WoTX6J9naODs99DnY9BA-2MxH2pWLmbbVkbQ7VUQ,236
19
19
  lmnr/openllmetry_sdk/version.py,sha256=OlatFEFA4ttqSSIiV8jdE-sq3KG5zu2hnC4B4mzWF3s,23
20
20
  lmnr/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ lmnr/sdk/browser/__init__.py,sha256=NSP5sB-dm-f0FP70_GMvVrNFwc5rHf7SW0_Oisyo3cE,343
22
+ lmnr/sdk/browser/playwright_patch.py,sha256=AGK2C7VMrocpe-0GvM23SxF-IY4NBjLnB9m_xT-htJ8,8871
21
23
  lmnr/sdk/datasets.py,sha256=hJcQcwTJbtA4COoVG3god4xll9TBSDMfvrhKmMfanjg,1567
22
24
  lmnr/sdk/decorators.py,sha256=ja2EUWUWvFOp28ER0k78PRuxNahwCVyH0TdM3U-xY7U,1856
23
25
  lmnr/sdk/eval_control.py,sha256=G6Fg3Xx_KWv72iBaWlNMdyRTF2bZFQnwJ68sJNSpIcY,177
24
- lmnr/sdk/evaluations.py,sha256=dUIMEmKUzkOmHZ3nxlddk9kKm518C6xvElpgtNsql10,16344
25
- lmnr/sdk/laminar.py,sha256=611MLSJwGxVHd7LgW0kPCtwPB2rLlHE_BtVyVrIKFz0,31447
26
+ lmnr/sdk/evaluations.py,sha256=vjX5Q860X2fSJo_hANzNSowrUGqFNyzwbTQM35hd1Wo,18300
27
+ lmnr/sdk/laminar.py,sha256=-hLbLNtTZ9UZlHglP0UwvVIikWogHO3EjhNdNcnK0Gc,31968
26
28
  lmnr/sdk/log.py,sha256=nt_YMmPw1IRbGy0b7q4rTtP4Yo3pQfNxqJPXK3nDSNQ,2213
27
- lmnr/sdk/types.py,sha256=FCNoFoa0ingOvpXGfbiETVsakYyq9Zpoc56MXJ1YDzQ,6390
29
+ lmnr/sdk/types.py,sha256=U-e09GlX5cPBNPw84PtPq-17kiqxyoznYlbo5ybAI4o,6351
28
30
  lmnr/sdk/utils.py,sha256=Uk8y15x-sd5tP2ERONahElLDJVEy_3dA_1_5g9A6auY,3358
29
- lmnr-0.4.55.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
30
- lmnr-0.4.55.dist-info/METADATA,sha256=cpdTOdLDIxozdW4A4qwVS2PiAHdeH3th_EZU4Qh_inQ,13861
31
- lmnr-0.4.55.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
32
- lmnr-0.4.55.dist-info/entry_points.txt,sha256=K1jE20ww4jzHNZLnsfWBvU3YKDGBgbOiYG5Y7ivQcq4,37
33
- lmnr-0.4.55.dist-info/RECORD,,
31
+ lmnr-0.4.57.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
32
+ lmnr-0.4.57.dist-info/METADATA,sha256=vrJgbCUrZN7SqYOQDHNyEQhb5t6vMf4PX7jhJgu7Qeg,13827
33
+ lmnr-0.4.57.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
34
+ lmnr-0.4.57.dist-info/entry_points.txt,sha256=K1jE20ww4jzHNZLnsfWBvU3YKDGBgbOiYG5Y7ivQcq4,37
35
+ lmnr-0.4.57.dist-info/RECORD,,
File without changes
File without changes