lmnr 0.4.56__tar.gz → 0.4.57__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {lmnr-0.4.56 → lmnr-0.4.57}/PKG-INFO +1 -2
  2. {lmnr-0.4.56 → lmnr-0.4.57}/pyproject.toml +1 -2
  3. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/__init__.py +2 -0
  4. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/tracing.py +3 -1
  5. lmnr-0.4.57/src/lmnr/sdk/browser/playwright_patch.py +249 -0
  6. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/evaluations.py +110 -65
  7. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/laminar.py +34 -20
  8. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/types.py +3 -4
  9. lmnr-0.4.56/src/lmnr/sdk/browser/playwright_patch.py +0 -192
  10. {lmnr-0.4.56 → lmnr-0.4.57}/LICENSE +0 -0
  11. {lmnr-0.4.56 → lmnr-0.4.57}/README.md +0 -0
  12. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/__init__.py +0 -0
  13. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/cli.py +0 -0
  14. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/.flake8 +0 -0
  15. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/.python-version +0 -0
  16. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/config/__init__.py +0 -0
  17. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/decorators/__init__.py +0 -0
  18. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/decorators/base.py +0 -0
  19. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/instruments.py +0 -0
  20. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/__init__.py +0 -0
  21. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/attributes.py +0 -0
  22. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/content_allow_list.py +0 -0
  23. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/context_manager.py +0 -0
  24. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/utils/__init__.py +0 -0
  25. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/utils/in_memory_span_exporter.py +0 -0
  26. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/utils/json_encoder.py +0 -0
  27. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/utils/package_check.py +0 -0
  28. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/version.py +0 -0
  29. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/__init__.py +0 -0
  30. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/browser/__init__.py +0 -0
  31. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/datasets.py +0 -0
  32. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/decorators.py +0 -0
  33. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/eval_control.py +0 -0
  34. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/log.py +0 -0
  35. {lmnr-0.4.56 → lmnr-0.4.57}/src/lmnr/sdk/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lmnr
3
- Version: 0.4.56
3
+ Version: 0.4.57
4
4
  Summary: Python SDK for Laminar
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -41,7 +41,6 @@ Provides-Extra: watsonx
41
41
  Provides-Extra: weaviate
42
42
  Requires-Dist: aiohttp (>=3.0)
43
43
  Requires-Dist: argparse (>=1.0)
44
- Requires-Dist: deprecated (>=1.0)
45
44
  Requires-Dist: grpcio (<1.68.0)
46
45
  Requires-Dist: opentelemetry-api (>=1.28.0)
47
46
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.28.0)
@@ -6,7 +6,7 @@
6
6
 
7
7
  [project]
8
8
  name = "lmnr"
9
- version = "0.4.56"
9
+ version = "0.4.57"
10
10
  description = "Python SDK for Laminar"
11
11
  authors = [
12
12
  { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -27,7 +27,6 @@ dependencies = [
27
27
  "opentelemetry-instrumentation-urllib3 (>=0.50b0)",
28
28
  "opentelemetry-instrumentation-threading (>=0.50b0)",
29
29
  "opentelemetry-semantic-conventions-ai (>=0.4.2)",
30
- "deprecated (>=1.0)",
31
30
  "tqdm (>=4.0)",
32
31
  "argparse (>=1.0)",
33
32
  "aiohttp (>=3.0)",
@@ -34,6 +34,7 @@ class Traceloop:
34
34
  instruments: Optional[Set[Instruments]] = None,
35
35
  base_http_url: Optional[str] = None,
36
36
  project_api_key: Optional[str] = None,
37
+ max_export_batch_size: Optional[int] = None,
37
38
  ) -> None:
38
39
  if not is_tracing_enabled():
39
40
  return
@@ -73,4 +74,5 @@ class Traceloop:
73
74
  instruments=instruments,
74
75
  base_http_url=base_http_url,
75
76
  project_api_key=project_api_key,
77
+ max_export_batch_size=max_export_batch_size,
76
78
  )
@@ -83,6 +83,7 @@ class TracerWrapper(object):
83
83
  instruments: Optional[Set[Instruments]] = None,
84
84
  base_http_url: Optional[str] = None,
85
85
  project_api_key: Optional[str] = None,
86
+ max_export_batch_size: Optional[int] = None,
86
87
  ) -> "TracerWrapper":
87
88
  cls._initialize_logger(cls)
88
89
  if not hasattr(cls, "instance"):
@@ -109,7 +110,8 @@ class TracerWrapper(object):
109
110
  )
110
111
  else:
111
112
  obj.__spans_processor: SpanProcessor = BatchSpanProcessor(
112
- obj.__spans_exporter
113
+ obj.__spans_exporter,
114
+ max_export_batch_size=max_export_batch_size,
113
115
  )
114
116
  obj.__spans_processor_original_on_start = None
115
117
 
@@ -0,0 +1,249 @@
1
+ import opentelemetry
2
+ import uuid
3
+ import asyncio
4
+
5
+ try:
6
+ from playwright.async_api import BrowserContext, Page
7
+ from playwright.sync_api import (
8
+ BrowserContext as SyncBrowserContext,
9
+ Page as SyncPage,
10
+ )
11
+ except ImportError as e:
12
+ raise ImportError(
13
+ f"Attempted to import {__file__}, but it is designed "
14
+ "to patch Playwright, which is not installed. Use `pip install playwright` "
15
+ "to install Playwright or remove this import."
16
+ ) from e
17
+
18
+ _original_new_page = None
19
+ _original_new_page_async = None
20
+
21
+ INJECT_PLACEHOLDER = """
22
+ ([baseUrl, projectApiKey]) => {
23
+ const serverUrl = `${baseUrl}/v1/browser-sessions/events`;
24
+ const FLUSH_INTERVAL = 1000;
25
+ const HEARTBEAT_INTERVAL = 1000;
26
+
27
+ window.rrwebEventsBatch = [];
28
+
29
+ window.sendBatch = async () => {
30
+ if (window.rrwebEventsBatch.length === 0) return;
31
+
32
+ const eventsPayload = {
33
+ sessionId: window.rrwebSessionId,
34
+ traceId: window.traceId,
35
+ events: window.rrwebEventsBatch
36
+ };
37
+
38
+ try {
39
+ const jsonString = JSON.stringify(eventsPayload);
40
+ const uint8Array = new TextEncoder().encode(jsonString);
41
+
42
+ const cs = new CompressionStream('gzip');
43
+ const compressedStream = await new Response(
44
+ new Response(uint8Array).body.pipeThrough(cs)
45
+ ).arrayBuffer();
46
+
47
+ const compressedArray = new Uint8Array(compressedStream);
48
+
49
+ const blob = new Blob([compressedArray], { type: 'application/octet-stream' });
50
+
51
+ const response = await fetch(serverUrl, {
52
+ method: 'POST',
53
+ headers: {
54
+ 'Content-Type': 'application/json',
55
+ 'Content-Encoding': 'gzip',
56
+ 'Authorization': `Bearer ${projectApiKey}`
57
+ },
58
+ body: blob,
59
+ compress: false,
60
+ credentials: 'omit',
61
+ mode: 'cors',
62
+ cache: 'no-cache',
63
+ });
64
+
65
+ if (!response.ok) {
66
+ throw new Error(`HTTP error! status: ${response.status}`);
67
+ }
68
+
69
+ window.rrwebEventsBatch = [];
70
+ } catch (error) {
71
+ console.error('Failed to send events:', error);
72
+ }
73
+ };
74
+
75
+ setInterval(() => window.sendBatch(), FLUSH_INTERVAL);
76
+
77
+ setInterval(() => {
78
+ window.rrwebEventsBatch.push({
79
+ type: 6,
80
+ data: { source: 'heartbeat' },
81
+ timestamp: Date.now()
82
+ });
83
+ }, HEARTBEAT_INTERVAL);
84
+
85
+ window.rrweb.record({
86
+ emit(event) {
87
+ window.rrwebEventsBatch.push(event);
88
+ }
89
+ });
90
+
91
+ window.addEventListener('beforeunload', () => {
92
+ window.sendBatch();
93
+ });
94
+ }
95
+ """
96
+
97
+
98
+ def init_playwright_tracing(http_url: str, project_api_key: str):
99
+
100
+ def inject_rrweb(page: SyncPage):
101
+ # Get current trace ID from active span
102
+ current_span = opentelemetry.trace.get_current_span()
103
+ current_span.set_attribute("lmnr.internal.has_browser_session", True)
104
+ trace_id = format(current_span.get_span_context().trace_id, "032x")
105
+ session_id = str(uuid.uuid4().hex)
106
+
107
+ # Generate UUID session ID and set trace ID
108
+ page.evaluate(
109
+ """([traceId, sessionId]) => {
110
+ window.rrwebSessionId = sessionId;
111
+ window.traceId = traceId;
112
+ }""",
113
+ [trace_id, session_id],
114
+ )
115
+
116
+ # Load rrweb from CDN
117
+ page.add_script_tag(
118
+ url="https://cdn.jsdelivr.net/npm/rrweb@latest/dist/rrweb.min.js"
119
+ )
120
+
121
+ # Update the recording setup to include trace ID
122
+ page.evaluate(
123
+ INJECT_PLACEHOLDER,
124
+ [http_url, project_api_key],
125
+ )
126
+
127
+ async def inject_rrweb_async(page: Page):
128
+ try:
129
+ # Wait for the page to be in a ready state first
130
+ await page.wait_for_load_state("domcontentloaded")
131
+
132
+ # Get current trace ID from active span
133
+ current_span = opentelemetry.trace.get_current_span()
134
+ current_span.set_attribute("lmnr.internal.has_browser_session", True)
135
+ trace_id = format(current_span.get_span_context().trace_id, "032x")
136
+ session_id = str(uuid.uuid4().hex)
137
+
138
+ # Generate UUID session ID and set trace ID
139
+ await page.evaluate(
140
+ """([traceId, sessionId]) => {
141
+ window.rrwebSessionId = sessionId;
142
+ window.traceId = traceId;
143
+ }""",
144
+ [trace_id, session_id],
145
+ )
146
+
147
+ # Load rrweb from CDN
148
+ await page.add_script_tag(
149
+ url="https://cdn.jsdelivr.net/npm/rrweb@latest/dist/rrweb.min.js"
150
+ )
151
+
152
+ await page.wait_for_function(
153
+ """(() => window.rrweb || 'rrweb' in window)"""
154
+ )
155
+
156
+ # Update the recording setup to include trace ID
157
+ await page.evaluate(
158
+ INJECT_PLACEHOLDER,
159
+ [http_url, project_api_key],
160
+ )
161
+ except Exception as e:
162
+ print(f"Error injecting rrweb: {e}")
163
+
164
+ def handle_navigation(page: SyncPage):
165
+ def on_load():
166
+ inject_rrweb(page)
167
+
168
+ page.on("load", on_load)
169
+ inject_rrweb(page)
170
+
171
+ async def handle_navigation_async(page: Page):
172
+ async def on_load():
173
+ await inject_rrweb_async(page)
174
+
175
+ page.on("load", lambda: asyncio.create_task(on_load()))
176
+ await inject_rrweb_async(page)
177
+
178
+ async def patched_new_page_async(self: BrowserContext, *args, **kwargs):
179
+ # Modify CSP to allow required domains
180
+ async def handle_route(route):
181
+ try:
182
+ response = await route.fetch()
183
+ headers = dict(response.headers)
184
+
185
+ # Find and modify CSP header
186
+ for header_name in headers:
187
+ if header_name.lower() == "content-security-policy":
188
+ csp = headers[header_name]
189
+ parts = csp.split(";")
190
+ for i, part in enumerate(parts):
191
+ if "script-src" in part:
192
+ parts[i] = f"{part.strip()} cdn.jsdelivr.net"
193
+ elif "connect-src" in part:
194
+ parts[i] = f"{part.strip()} " + http_url
195
+ if not any("connect-src" in part for part in parts):
196
+ parts.append(" connect-src 'self' " + http_url)
197
+ headers[header_name] = ";".join(parts)
198
+
199
+ await route.fulfill(response=response, headers=headers)
200
+ except Exception:
201
+ await route.continue_()
202
+
203
+ await self.route("**/*", handle_route)
204
+ page = await _original_new_page_async(self, *args, **kwargs)
205
+ await handle_navigation_async(page)
206
+ return page
207
+
208
+ def patched_new_page(self: SyncBrowserContext, *args, **kwargs):
209
+ # Modify CSP to allow required domains
210
+ def handle_route(route):
211
+ try:
212
+ response = route.fetch()
213
+ headers = dict(response.headers)
214
+
215
+ # Find and modify CSP header
216
+ for header_name in headers:
217
+ if header_name.lower() == "content-security-policy":
218
+ csp = headers[header_name]
219
+ parts = csp.split(";")
220
+ for i, part in enumerate(parts):
221
+ if "script-src" in part:
222
+ parts[i] = f"{part.strip()} cdn.jsdelivr.net"
223
+ elif "connect-src" in part:
224
+ parts[i] = f"{part.strip()} " + http_url
225
+ if not any("connect-src" in part for part in parts):
226
+ parts.append(" connect-src 'self' " + http_url)
227
+ headers[header_name] = ";".join(parts)
228
+
229
+ route.fulfill(response=response, headers=headers)
230
+ except Exception:
231
+ # Continue with the original request without modification
232
+ route.continue_()
233
+
234
+ self.route("**/*", handle_route)
235
+ page = _original_new_page(self, *args, **kwargs)
236
+ handle_navigation(page)
237
+ return page
238
+
239
+ def patch_browser():
240
+ global _original_new_page, _original_new_page_async
241
+ if _original_new_page_async is None:
242
+ _original_new_page_async = BrowserContext.new_page
243
+ BrowserContext.new_page = patched_new_page_async
244
+
245
+ if _original_new_page is None:
246
+ _original_new_page = SyncBrowserContext.new_page
247
+ SyncBrowserContext.new_page = patched_new_page
248
+
249
+ patch_browser()
@@ -2,7 +2,6 @@ import asyncio
2
2
  import re
3
3
  import sys
4
4
  import uuid
5
-
6
5
  from tqdm import tqdm
7
6
  from typing import Any, Awaitable, Optional, Set, Union
8
7
 
@@ -27,9 +26,12 @@ from .types import (
27
26
  from .utils import is_async
28
27
 
29
28
  DEFAULT_BATCH_SIZE = 5
29
+ MAX_EXPORT_BATCH_SIZE = 64
30
30
 
31
31
 
32
- def get_evaluation_url(project_id: str, evaluation_id: str, base_url: Optional[str] = None):
32
+ def get_evaluation_url(
33
+ project_id: str, evaluation_id: str, base_url: Optional[str] = None
34
+ ):
33
35
  if not base_url:
34
36
  base_url = "https://www.lmnr.ai"
35
37
 
@@ -39,7 +41,7 @@ def get_evaluation_url(project_id: str, evaluation_id: str, base_url: Optional[s
39
41
  if url.endswith("localhost") or url.endswith("127.0.0.1"):
40
42
  # We best effort assume that the frontend is running on port 3000
41
43
  # TODO: expose the frontend port?
42
- url = url + ":3000"
44
+ url = url + ":5667"
43
45
  return f"{url}/project/{project_id}/evaluations/{evaluation_id}"
44
46
 
45
47
 
@@ -97,13 +99,14 @@ class Evaluation:
97
99
  evaluators: dict[str, EvaluatorFunction],
98
100
  human_evaluators: list[HumanEvaluator] = [],
99
101
  name: Optional[str] = None,
100
- group_id: Optional[str] = None,
101
- batch_size: int = DEFAULT_BATCH_SIZE,
102
+ group_name: Optional[str] = None,
103
+ concurrency_limit: int = DEFAULT_BATCH_SIZE,
102
104
  project_api_key: Optional[str] = None,
103
105
  base_url: Optional[str] = None,
104
106
  http_port: Optional[int] = None,
105
107
  grpc_port: Optional[int] = None,
106
108
  instruments: Optional[Set[Instruments]] = None,
109
+ max_export_batch_size: Optional[int] = MAX_EXPORT_BATCH_SIZE,
107
110
  ):
108
111
  """
109
112
  Initializes an instance of the Evaluations class.
@@ -131,12 +134,12 @@ class Evaluation:
131
134
  Used to identify the evaluation in the group.\
132
135
  If not provided, a random name will be generated.
133
136
  Defaults to None.
134
- group_id (Optional[str], optional): an identifier to group\
135
- evaluations. Only evaluations within the same group_id can be\
137
+ group_name (Optional[str], optional): an identifier to group\
138
+ evaluations. Only evaluations within the same group_name can be\
136
139
  visually compared. If not provided, "default" is assigned.
137
140
  Defaults to None
138
- batch_size (int, optional): The batch size for evaluation. This many\
139
- data points will be evaluated in parallel.
141
+ concurrency_limit (int, optional): The concurrency limit for evaluation. This many\
142
+ data points will be evaluated in parallel with a pool of workers.
140
143
  Defaults to DEFAULT_BATCH_SIZE.
141
144
  project_api_key (Optional[str], optional): The project API key.\
142
145
  If not provided, LMNR_PROJECT_API_KEY environment variable is\
@@ -180,17 +183,20 @@ class Evaluation:
180
183
  self.data = data
181
184
  self.executor = executor
182
185
  self.evaluators = evaluators
183
- self.group_id = group_id
186
+ self.group_name = group_name
184
187
  self.name = name
185
- self.batch_size = batch_size
188
+ self.concurrency_limit = concurrency_limit
189
+ self.batch_size = concurrency_limit
186
190
  self._logger = get_default_logger(self.__class__.__name__)
187
191
  self.human_evaluators = human_evaluators
192
+ self.upload_tasks = [] # Add this line to track upload tasks
188
193
  L.initialize(
189
194
  project_api_key=project_api_key,
190
195
  base_url=base_url,
191
196
  http_port=http_port,
192
197
  grpc_port=grpc_port,
193
198
  instruments=instruments,
199
+ max_export_batch_size=max_export_batch_size,
194
200
  )
195
201
 
196
202
  async def run(self) -> Awaitable[None]:
@@ -200,49 +206,60 @@ class Evaluation:
200
206
 
201
207
  async def _run(self) -> None:
202
208
  self.reporter.start(len(self.data))
203
-
204
209
  try:
205
- result_datapoints = await self._evaluate_in_batches()
210
+ evaluation = await L.init_eval(name=self.name, group_name=self.group_name)
211
+ result_datapoints = await self._evaluate_in_batches(evaluation.id)
212
+
213
+ # Wait for all background upload tasks to complete
214
+ if self.upload_tasks:
215
+ self._logger.debug(
216
+ f"Waiting for {len(self.upload_tasks)} upload tasks to complete"
217
+ )
218
+ await asyncio.gather(*self.upload_tasks)
219
+ self._logger.debug("All upload tasks completed")
206
220
  except Exception as e:
207
221
  self.reporter.stopWithError(e)
208
222
  self.is_finished = True
209
223
  return
210
224
 
211
- # For now add all human evaluators to all result datapoints
212
- # In the future, we will add ways to specify which human evaluators
213
- # to add to which result datapoints, e.g. sample some randomly
214
225
  for result_datapoint in result_datapoints:
215
226
  result_datapoint.human_evaluators = self.human_evaluators or {}
216
227
 
217
- evaluation = await L.create_evaluation(
218
- data=result_datapoints, group_id=self.group_id, name=self.name
219
- )
220
228
  average_scores = get_average_scores(result_datapoints)
221
229
  self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
222
230
  self.is_finished = True
223
231
 
224
- async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
225
- result_datapoints = []
226
- for i in range(0, len(self.data), self.batch_size):
227
- batch = (
228
- self.data[i : i + self.batch_size]
229
- if isinstance(self.data, list)
230
- else self.data.slice(i, i + self.batch_size)
231
- )
232
- batch_datapoints = await self._evaluate_batch(batch)
233
- result_datapoints.extend(batch_datapoints)
234
- self.reporter.update(len(batch))
235
- return result_datapoints
236
-
237
- async def _evaluate_batch(
238
- self, batch: list[Datapoint]
232
+ async def _evaluate_in_batches(
233
+ self, eval_id: uuid.UUID
239
234
  ) -> list[EvaluationResultDatapoint]:
240
- batch_promises = [self._evaluate_datapoint(datapoint) for datapoint in batch]
241
- results = await asyncio.gather(*batch_promises)
242
- return results
235
+
236
+ semaphore = asyncio.Semaphore(self.concurrency_limit)
237
+ tasks = []
238
+ data_iter = self.data if isinstance(self.data, list) else range(len(self.data))
239
+
240
+ async def evaluate_task(datapoint, index):
241
+ try:
242
+ result = await self._evaluate_datapoint(eval_id, datapoint, index)
243
+ self.reporter.update(1)
244
+ return index, result
245
+ finally:
246
+ semaphore.release()
247
+
248
+ # Create tasks only after acquiring semaphore
249
+ for idx, item in enumerate(data_iter):
250
+ await semaphore.acquire()
251
+ datapoint = item if isinstance(self.data, list) else self.data[item]
252
+ task = asyncio.create_task(evaluate_task(datapoint, idx))
253
+ tasks.append(task)
254
+
255
+ # Wait for all tasks to complete and preserve order
256
+ results = await asyncio.gather(*tasks)
257
+ ordered_results = [result for _, result in sorted(results, key=lambda x: x[0])]
258
+
259
+ return ordered_results
243
260
 
244
261
  async def _evaluate_datapoint(
245
- self, datapoint: Datapoint
262
+ self, eval_id: uuid.UUID, datapoint: Datapoint, index: int
246
263
  ) -> EvaluationResultDatapoint:
247
264
  with L.start_as_current_span("evaluation") as evaluation_span:
248
265
  L._set_trace_type(trace_type=TraceType.EVALUATION)
@@ -251,11 +268,15 @@ class Evaluation:
251
268
  "executor", input={"data": datapoint.data}
252
269
  ) as executor_span:
253
270
  executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
254
- output = (
255
- await self.executor(datapoint.data)
256
- if is_async(self.executor)
257
- else self.executor(datapoint.data)
258
- )
271
+ # Run synchronous executors in a thread pool to avoid blocking
272
+ if not is_async(self.executor):
273
+ loop = asyncio.get_event_loop()
274
+ output = await loop.run_in_executor(
275
+ None, self.executor, datapoint.data
276
+ )
277
+ else:
278
+ output = await self.executor(datapoint.data)
279
+
259
280
  L.set_span_output(output)
260
281
  executor_span_id = uuid.UUID(
261
282
  int=executor_span.get_span_context().span_id
@@ -283,14 +304,28 @@ class Evaluation:
283
304
  scores.update(value)
284
305
 
285
306
  trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
286
- return EvaluationResultDatapoint(
287
- data=datapoint.data,
288
- target=target,
289
- executor_output=output,
290
- scores=scores,
291
- trace_id=trace_id,
292
- executor_span_id=executor_span_id,
293
- )
307
+
308
+ datapoint = EvaluationResultDatapoint(
309
+ data=datapoint.data,
310
+ target=target,
311
+ executor_output=output,
312
+ scores=scores,
313
+ trace_id=trace_id,
314
+ # For now add all human evaluators to all result datapoints
315
+ # In the future, we will add ways to specify which human evaluators
316
+ # to add to which result datapoints, e.g. sample some randomly
317
+ human_evaluators=self.human_evaluators,
318
+ executor_span_id=executor_span_id,
319
+ index=index,
320
+ )
321
+
322
+ # Create background upload task without awaiting it
323
+ upload_task = asyncio.create_task(
324
+ L.save_eval_datapoints(eval_id, [datapoint], self.group_name)
325
+ )
326
+ self.upload_tasks.append(upload_task)
327
+
328
+ return datapoint
294
329
 
295
330
 
296
331
  def evaluate(
@@ -299,8 +334,9 @@ def evaluate(
299
334
  evaluators: dict[str, EvaluatorFunction],
300
335
  human_evaluators: list[HumanEvaluator] = [],
301
336
  name: Optional[str] = None,
302
- group_id: Optional[str] = None,
303
- batch_size: int = DEFAULT_BATCH_SIZE,
337
+ group_id: Optional[str] = None, # Deprecated
338
+ group_name: Optional[str] = None,
339
+ concurrency_limit: int = DEFAULT_BATCH_SIZE,
304
340
  project_api_key: Optional[str] = None,
305
341
  base_url: Optional[str] = None,
306
342
  http_port: Optional[int] = None,
@@ -318,12 +354,12 @@ def evaluate(
318
354
 
319
355
  Parameters:
320
356
  data (Union[list[EvaluationDatapoint|dict]], EvaluationDataset]):\
321
- List of data points to evaluate or an evaluation dataset.
322
- `data` is the input to the executor function,
323
- `target` is the input to the evaluator function.
357
+ List of data points to evaluate or an evaluation dataset.
358
+ `data` is the input to the executor function,
359
+ `target` is the input to the evaluator function.
324
360
  executor (Callable[..., Any]): The executor function.\
325
- Takes the data point + any additional arguments\
326
- and returns the output to evaluate.
361
+ Takes the data point + any additional arguments\
362
+ and returns the output to evaluate.
327
363
  evaluators (List[Callable[..., Any]]):
328
364
  evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
329
365
  names. Each evaluator function takes the output of the executor\
@@ -337,14 +373,19 @@ def evaluate(
337
373
  evaluator only holds the queue name.
338
374
  Defaults to an empty list.
339
375
  name (Optional[str], optional): Optional name of the evaluation.\
340
- Used to identify the evaluation in the group.\
341
- If not provided, a random name will be generated.
342
- Defaults to None.
343
- group_id (Optional[str], optional): an identifier to group evaluations.\
376
+ Used to identify the evaluation in the group. If not provided, a\
377
+ random name will be generated.
378
+ Defaults to None.
379
+ group_id (Optional[str], optional): [DEPRECATED] Use group_name instead.
380
+ An identifier to group evaluations.\
344
381
  Only evaluations within the same group_id can be\
345
382
  visually compared. If not provided, set to "default".
346
383
  Defaults to None
347
- batch_size (int, optional): The batch size for evaluation.
384
+ group_name (Optional[str], optional): An identifier to group evaluations.\
385
+ Only evaluations within the same group_name can be visually compared.\
386
+ If not provided, set to "default".
387
+ Defaults to None
388
+ concurrency_limit (int, optional): The concurrency limit for evaluation.
348
389
  Defaults to DEFAULT_BATCH_SIZE.
349
390
  project_api_key (Optional[str], optional): The project API key.
350
391
  Defaults to None.
@@ -363,15 +404,19 @@ def evaluate(
363
404
  will be used.
364
405
  Defaults to None.
365
406
  """
407
+ if group_id:
408
+ raise DeprecationWarning("group_id is deprecated. Use group_name instead.")
409
+
410
+ group_name = group_name or group_id
366
411
 
367
412
  evaluation = Evaluation(
368
413
  data=data,
369
414
  executor=executor,
370
415
  evaluators=evaluators,
371
- group_id=group_id,
416
+ group_name=group_name,
372
417
  human_evaluators=human_evaluators,
373
418
  name=name,
374
- batch_size=batch_size,
419
+ concurrency_limit=concurrency_limit,
375
420
  project_api_key=project_api_key,
376
421
  base_url=base_url,
377
422
  http_port=http_port,
@@ -47,7 +47,8 @@ from lmnr.openllmetry_sdk.tracing.tracing import (
47
47
  from .log import VerboseColorfulFormatter
48
48
 
49
49
  from .types import (
50
- CreateEvaluationResponse,
50
+ HumanEvaluator,
51
+ InitEvaluationResponse,
51
52
  EvaluationResultDatapoint,
52
53
  GetDatapointsResponse,
53
54
  PipelineRunError,
@@ -78,6 +79,7 @@ class Laminar:
78
79
  grpc_port: Optional[int] = None,
79
80
  instruments: Optional[Set[Instruments]] = None,
80
81
  disable_batch: bool = False,
82
+ max_export_batch_size: Optional[int] = None,
81
83
  ):
82
84
  """Initialize Laminar context across the application.
83
85
  This method must be called before using any other Laminar methods or
@@ -150,6 +152,7 @@ class Laminar:
150
152
  ),
151
153
  instruments=instruments,
152
154
  disable_batch=disable_batch,
155
+ max_export_batch_size=max_export_batch_size,
153
156
  )
154
157
 
155
158
  @classmethod
@@ -689,33 +692,44 @@ class Laminar:
689
692
  set_association_properties(props)
690
693
 
691
694
  @classmethod
692
- async def create_evaluation(
693
- cls,
694
- data: list[EvaluationResultDatapoint],
695
- group_id: Optional[str] = None,
696
- name: Optional[str] = None,
697
- ) -> CreateEvaluationResponse:
695
+ async def init_eval(
696
+ cls, name: Optional[str] = None, group_name: Optional[str] = None
697
+ ) -> InitEvaluationResponse:
698
698
  async with aiohttp.ClientSession() as session:
699
699
  async with session.post(
700
- cls.__base_http_url + "/v1/evaluations",
700
+ cls.__base_http_url + "/v1/evals",
701
701
  json={
702
- "groupId": group_id,
703
702
  "name": name,
704
- "points": [datapoint.to_dict() for datapoint in data],
703
+ "groupName": group_name,
705
704
  },
706
705
  headers=cls._headers(),
707
706
  ) as response:
708
- if response.status != 200:
709
- try:
710
- resp_json = await response.json()
711
- raise ValueError(
712
- f"Error creating evaluation {json.dumps(resp_json)}"
713
- )
714
- except aiohttp.ClientError:
715
- text = await response.text()
716
- raise ValueError(f"Error creating evaluation {text}")
717
707
  resp_json = await response.json()
718
- return CreateEvaluationResponse.model_validate(resp_json)
708
+ return InitEvaluationResponse.model_validate(resp_json)
709
+
710
+ @classmethod
711
+ async def save_eval_datapoints(
712
+ cls,
713
+ eval_id: uuid.UUID,
714
+ datapoints: list[EvaluationResultDatapoint],
715
+ groupName: Optional[str] = None,
716
+ human_evaluators: Optional[list[HumanEvaluator]] = None,
717
+ ):
718
+ async with aiohttp.ClientSession() as session:
719
+
720
+ async with session.post(
721
+ cls.__base_http_url + f"/v1/evals/{eval_id}/datapoints",
722
+ json={
723
+ "points": [datapoint.to_dict() for datapoint in datapoints],
724
+ "groupName": groupName,
725
+ "humanEvaluators": human_evaluators,
726
+ },
727
+ headers=cls._headers(),
728
+ ) as response:
729
+ if response.status != 200:
730
+ raise ValueError(
731
+ f"Error saving evaluation datapoints: {response.text}"
732
+ )
719
733
 
720
734
  @classmethod
721
735
  def get_datapoints(
@@ -141,11 +141,8 @@ EvaluatorFunction = Callable[
141
141
  class HumanEvaluator(pydantic.BaseModel):
142
142
  queueName: str
143
143
 
144
- def __init__(self, queue_name: str):
145
- super().__init__(queueName=queue_name)
146
144
 
147
-
148
- class CreateEvaluationResponse(pydantic.BaseModel):
145
+ class InitEvaluationResponse(pydantic.BaseModel):
149
146
  id: uuid.UUID
150
147
  createdAt: datetime.datetime
151
148
  groupId: str
@@ -161,6 +158,7 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
161
158
  human_evaluators: list[HumanEvaluator] = pydantic.Field(default_factory=list)
162
159
  trace_id: uuid.UUID
163
160
  executor_span_id: uuid.UUID
161
+ index: int
164
162
 
165
163
  # uuid is not serializable by default, so we need to convert it to a string
166
164
  def to_dict(self):
@@ -180,6 +178,7 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
180
178
  for v in self.human_evaluators
181
179
  ],
182
180
  "executorSpanId": str(self.executor_span_id),
181
+ "index": self.index,
183
182
  }
184
183
  except Exception as e:
185
184
  raise ValueError(f"Error serializing EvaluationResultDatapoint: {e}")
@@ -1,192 +0,0 @@
1
- import opentelemetry
2
- import uuid
3
-
4
- try:
5
- from playwright.async_api import BrowserContext, Page
6
- from playwright.sync_api import (
7
- BrowserContext as SyncBrowserContext,
8
- Page as SyncPage,
9
- )
10
- except ImportError as e:
11
- raise ImportError(
12
- f"Attempated to import {__file__}, but it is designed "
13
- "to patch Playwright, which is not installed. Use `pip install playwright` "
14
- "to install Playwright or remove this import."
15
- ) from e
16
-
17
- _original_new_page = None
18
- _original_goto = None
19
- _original_new_page_async = None
20
- _original_goto_async = None
21
-
22
- INJECT_PLACEHOLDER = """
23
- ([baseUrl, projectApiKey]) => {
24
- const serverUrl = `${baseUrl}/v1/browser-sessions/events`;
25
- const BATCH_SIZE = 16;
26
- const FLUSH_INTERVAL = 1000;
27
- const HEARTBEAT_INTERVAL = 1000; // 1 second heartbeat
28
-
29
- window.rrwebEventsBatch = [];
30
-
31
- window.sendBatch = async () => {
32
- if (window.rrwebEventsBatch.length === 0) return;
33
-
34
- const eventsPayload = {
35
- sessionId: window.rrwebSessionId,
36
- traceId: window.traceId,
37
- events: window.rrwebEventsBatch
38
- };
39
-
40
- try {
41
- await fetch(serverUrl, {
42
- method: 'POST',
43
- headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${projectApiKey}` },
44
- body: JSON.stringify(eventsPayload),
45
- });
46
- window.rrwebEventsBatch = [];
47
- } catch (error) {
48
- console.error('Failed to send events:', error);
49
- }
50
- };
51
-
52
- setInterval(() => window.sendBatch(), FLUSH_INTERVAL);
53
-
54
- // Add heartbeat event
55
- setInterval(() => {
56
- window.rrwebEventsBatch.push({
57
- type: 6, // Custom event type
58
- data: { source: 'heartbeat' },
59
- timestamp: Date.now()
60
- });
61
- }, HEARTBEAT_INTERVAL);
62
-
63
- window.rrweb.record({
64
- emit(event) {
65
- window.rrwebEventsBatch.push(event);
66
-
67
- if (window.rrwebEventsBatch.length >= BATCH_SIZE) {
68
- window.sendBatch();
69
- }
70
- }
71
- });
72
-
73
- // Simplified beforeunload handler
74
- window.addEventListener('beforeunload', () => {
75
- window.sendBatch();
76
- });
77
- }
78
- """
79
-
80
-
81
- def init_playwright_tracing(http_url: str, project_api_key: str):
82
- def inject_rrweb(page: SyncPage):
83
- # Get current trace ID from active span
84
- current_span = opentelemetry.trace.get_current_span()
85
- current_span.set_attribute("lmnr.internal.has_browser_session", True)
86
- trace_id = format(current_span.get_span_context().trace_id, "032x")
87
- session_id = str(uuid.uuid4().hex)
88
-
89
- # Generate UUID session ID and set trace ID
90
- page.evaluate(
91
- """([traceId, sessionId]) => {
92
- window.rrwebSessionId = sessionId;
93
- window.traceId = traceId;
94
- }""",
95
- [trace_id, session_id],
96
- )
97
-
98
- # Load rrweb and set up recording
99
- page.add_script_tag(
100
- url="https://cdn.jsdelivr.net/npm/rrweb@latest/dist/rrweb.min.js"
101
- )
102
-
103
- # Update the recording setup to include trace ID
104
- page.evaluate(
105
- INJECT_PLACEHOLDER,
106
- [http_url, project_api_key],
107
- )
108
-
109
- async def inject_rrweb_async(page: Page):
110
- # Wait for the page to be in a ready state first
111
- await page.wait_for_load_state("domcontentloaded")
112
-
113
- # Get current trace ID from active span
114
- current_span = opentelemetry.trace.get_current_span()
115
- current_span.set_attribute("lmnr.internal.has_browser_session", True)
116
- trace_id = format(current_span.get_span_context().trace_id, "032x")
117
- session_id = str(uuid.uuid4().hex)
118
-
119
- # Wait for any existing script load to complete
120
- await page.wait_for_load_state("networkidle")
121
-
122
- # Generate UUID session ID and set trace ID
123
- await page.evaluate(
124
- """([traceId, sessionId]) => {
125
- window.rrwebSessionId = sessionId;
126
- window.traceId = traceId;
127
- }""",
128
- [trace_id, session_id],
129
- )
130
-
131
- # Load rrweb and set up recording
132
- await page.add_script_tag(
133
- url="https://cdn.jsdelivr.net/npm/rrweb@latest/dist/rrweb.min.js"
134
- )
135
-
136
- await page.wait_for_function("""(() => window.rrweb || 'rrweb' in window)""")
137
-
138
- # Update the recording setup to include trace ID
139
- await page.evaluate(
140
- INJECT_PLACEHOLDER,
141
- [http_url, project_api_key],
142
- )
143
-
144
- async def patched_new_page_async(self: BrowserContext, *args, **kwargs):
145
- # Call the original new_page (returns a Page object)
146
- page = await _original_new_page_async(self, *args, **kwargs)
147
- # Inject rrweb automatically after the page is created
148
- await inject_rrweb_async(page)
149
- return page
150
-
151
- async def patched_goto_async(self: Page, *args, **kwargs):
152
- # Call the original goto
153
- result = await _original_goto_async(self, *args, **kwargs)
154
- # Inject rrweb after navigation
155
- await inject_rrweb_async(self)
156
- return result
157
-
158
- def patched_new_page(self: SyncBrowserContext, *args, **kwargs):
159
- # Call the original new_page (returns a Page object)
160
- page = _original_new_page(self, *args, **kwargs)
161
- # Inject rrweb automatically after the page is created
162
- inject_rrweb(page)
163
- return page
164
-
165
- def patched_goto(self: SyncPage, *args, **kwargs):
166
- # Call the original goto
167
- result = _original_goto(self, *args, **kwargs)
168
- # Inject rrweb after navigation
169
- inject_rrweb(self)
170
- return result
171
-
172
- def patch_browser():
173
- """
174
- Overrides BrowserContext.new_page with a patched async function
175
- that injects rrweb into every new page.
176
- """
177
- global _original_new_page, _original_goto, _original_new_page_async, _original_goto_async
178
- if _original_new_page_async is None or _original_goto_async is None:
179
- _original_new_page_async = BrowserContext.new_page
180
- BrowserContext.new_page = patched_new_page_async
181
-
182
- _original_goto_async = Page.goto
183
- Page.goto = patched_goto_async
184
-
185
- if _original_new_page is None or _original_goto is None:
186
- _original_new_page = SyncBrowserContext.new_page
187
- SyncBrowserContext.new_page = patched_new_page
188
-
189
- _original_goto = SyncPage.goto
190
- SyncPage.goto = patched_goto
191
-
192
- patch_browser()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes