lmnr 0.4.55__tar.gz → 0.4.57__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lmnr-0.4.55 → lmnr-0.4.57}/PKG-INFO +1 -2
- {lmnr-0.4.55 → lmnr-0.4.57}/pyproject.toml +1 -2
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/__init__.py +6 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/instruments.py +1 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/tracing.py +13 -1
- lmnr-0.4.57/src/lmnr/sdk/browser/__init__.py +9 -0
- lmnr-0.4.57/src/lmnr/sdk/browser/playwright_patch.py +249 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/sdk/evaluations.py +110 -65
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/sdk/laminar.py +36 -20
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/sdk/types.py +3 -4
- {lmnr-0.4.55 → lmnr-0.4.57}/LICENSE +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/README.md +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/__init__.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/cli.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/.flake8 +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/.python-version +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/config/__init__.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/decorators/__init__.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/decorators/base.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/__init__.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/attributes.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/content_allow_list.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/tracing/context_manager.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/utils/__init__.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/utils/in_memory_span_exporter.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/utils/json_encoder.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/utils/package_check.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/openllmetry_sdk/version.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/sdk/__init__.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/sdk/datasets.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/sdk/decorators.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/sdk/eval_control.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/sdk/log.py +0 -0
- {lmnr-0.4.55 → lmnr-0.4.57}/src/lmnr/sdk/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lmnr
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.57
|
4
4
|
Summary: Python SDK for Laminar
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: lmnr.ai
|
@@ -41,7 +41,6 @@ Provides-Extra: watsonx
|
|
41
41
|
Provides-Extra: weaviate
|
42
42
|
Requires-Dist: aiohttp (>=3.0)
|
43
43
|
Requires-Dist: argparse (>=1.0)
|
44
|
-
Requires-Dist: deprecated (>=1.0)
|
45
44
|
Requires-Dist: grpcio (<1.68.0)
|
46
45
|
Requires-Dist: opentelemetry-api (>=1.28.0)
|
47
46
|
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.28.0)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
[project]
|
8
8
|
name = "lmnr"
|
9
|
-
version = "0.4.
|
9
|
+
version = "0.4.57"
|
10
10
|
description = "Python SDK for Laminar"
|
11
11
|
authors = [
|
12
12
|
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
|
@@ -27,7 +27,6 @@ dependencies = [
|
|
27
27
|
"opentelemetry-instrumentation-urllib3 (>=0.50b0)",
|
28
28
|
"opentelemetry-instrumentation-threading (>=0.50b0)",
|
29
29
|
"opentelemetry-semantic-conventions-ai (>=0.4.2)",
|
30
|
-
"deprecated (>=1.0)",
|
31
30
|
"tqdm (>=4.0)",
|
32
31
|
"argparse (>=1.0)",
|
33
32
|
"aiohttp (>=3.0)",
|
@@ -32,6 +32,9 @@ class Traceloop:
|
|
32
32
|
should_enrich_metrics: bool = False,
|
33
33
|
resource_attributes: dict = {},
|
34
34
|
instruments: Optional[Set[Instruments]] = None,
|
35
|
+
base_http_url: Optional[str] = None,
|
36
|
+
project_api_key: Optional[str] = None,
|
37
|
+
max_export_batch_size: Optional[int] = None,
|
35
38
|
) -> None:
|
36
39
|
if not is_tracing_enabled():
|
37
40
|
return
|
@@ -69,4 +72,7 @@ class Traceloop:
|
|
69
72
|
exporter=exporter,
|
70
73
|
should_enrich_metrics=should_enrich_metrics,
|
71
74
|
instruments=instruments,
|
75
|
+
base_http_url=base_http_url,
|
76
|
+
project_api_key=project_api_key,
|
77
|
+
max_export_batch_size=max_export_batch_size,
|
72
78
|
)
|
@@ -6,6 +6,7 @@ import logging
|
|
6
6
|
from contextvars import Context
|
7
7
|
from lmnr.sdk.log import VerboseColorfulFormatter
|
8
8
|
from lmnr.openllmetry_sdk.instruments import Instruments
|
9
|
+
from lmnr.sdk.browser import init_browser_tracing
|
9
10
|
from lmnr.openllmetry_sdk.tracing.attributes import (
|
10
11
|
ASSOCIATION_PROPERTIES,
|
11
12
|
SPAN_INSTRUMENTATION_SOURCE,
|
@@ -80,6 +81,9 @@ class TracerWrapper(object):
|
|
80
81
|
exporter: Optional[SpanExporter] = None,
|
81
82
|
should_enrich_metrics: bool = False,
|
82
83
|
instruments: Optional[Set[Instruments]] = None,
|
84
|
+
base_http_url: Optional[str] = None,
|
85
|
+
project_api_key: Optional[str] = None,
|
86
|
+
max_export_batch_size: Optional[int] = None,
|
83
87
|
) -> "TracerWrapper":
|
84
88
|
cls._initialize_logger(cls)
|
85
89
|
if not hasattr(cls, "instance"):
|
@@ -106,7 +110,8 @@ class TracerWrapper(object):
|
|
106
110
|
)
|
107
111
|
else:
|
108
112
|
obj.__spans_processor: SpanProcessor = BatchSpanProcessor(
|
109
|
-
obj.__spans_exporter
|
113
|
+
obj.__spans_exporter,
|
114
|
+
max_export_batch_size=max_export_batch_size,
|
110
115
|
)
|
111
116
|
obj.__spans_processor_original_on_start = None
|
112
117
|
|
@@ -122,6 +127,8 @@ class TracerWrapper(object):
|
|
122
127
|
instrument_set = init_instrumentations(
|
123
128
|
should_enrich_metrics,
|
124
129
|
instruments,
|
130
|
+
base_http_url=base_http_url,
|
131
|
+
project_api_key=project_api_key,
|
125
132
|
)
|
126
133
|
|
127
134
|
if not instrument_set:
|
@@ -286,6 +293,8 @@ def init_instrumentations(
|
|
286
293
|
should_enrich_metrics: bool,
|
287
294
|
instruments: Optional[Set[Instruments]] = None,
|
288
295
|
block_instruments: Optional[Set[Instruments]] = None,
|
296
|
+
base_http_url: Optional[str] = None,
|
297
|
+
project_api_key: Optional[str] = None,
|
289
298
|
):
|
290
299
|
block_instruments = block_instruments or set()
|
291
300
|
# These libraries are not instrumented by default,
|
@@ -397,6 +406,9 @@ def init_instrumentations(
|
|
397
406
|
elif instrument == Instruments.WEAVIATE:
|
398
407
|
if init_weaviate_instrumentor():
|
399
408
|
instrument_set = True
|
409
|
+
elif instrument == Instruments.PLAYWRIGHT:
|
410
|
+
if init_browser_tracing(base_http_url, project_api_key):
|
411
|
+
instrument_set = True
|
400
412
|
else:
|
401
413
|
module_logger.warning(
|
402
414
|
f"Warning: {instrument} instrumentation does not exist."
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from lmnr.openllmetry_sdk.utils.package_check import is_package_installed
|
2
|
+
|
3
|
+
|
4
|
+
def init_browser_tracing(http_url: str, project_api_key: str):
|
5
|
+
if is_package_installed("playwright"):
|
6
|
+
from .playwright_patch import init_playwright_tracing
|
7
|
+
|
8
|
+
init_playwright_tracing(http_url, project_api_key)
|
9
|
+
# Other browsers can be added here
|
@@ -0,0 +1,249 @@
|
|
1
|
+
import opentelemetry
|
2
|
+
import uuid
|
3
|
+
import asyncio
|
4
|
+
|
5
|
+
try:
|
6
|
+
from playwright.async_api import BrowserContext, Page
|
7
|
+
from playwright.sync_api import (
|
8
|
+
BrowserContext as SyncBrowserContext,
|
9
|
+
Page as SyncPage,
|
10
|
+
)
|
11
|
+
except ImportError as e:
|
12
|
+
raise ImportError(
|
13
|
+
f"Attempted to import {__file__}, but it is designed "
|
14
|
+
"to patch Playwright, which is not installed. Use `pip install playwright` "
|
15
|
+
"to install Playwright or remove this import."
|
16
|
+
) from e
|
17
|
+
|
18
|
+
_original_new_page = None
|
19
|
+
_original_new_page_async = None
|
20
|
+
|
21
|
+
INJECT_PLACEHOLDER = """
|
22
|
+
([baseUrl, projectApiKey]) => {
|
23
|
+
const serverUrl = `${baseUrl}/v1/browser-sessions/events`;
|
24
|
+
const FLUSH_INTERVAL = 1000;
|
25
|
+
const HEARTBEAT_INTERVAL = 1000;
|
26
|
+
|
27
|
+
window.rrwebEventsBatch = [];
|
28
|
+
|
29
|
+
window.sendBatch = async () => {
|
30
|
+
if (window.rrwebEventsBatch.length === 0) return;
|
31
|
+
|
32
|
+
const eventsPayload = {
|
33
|
+
sessionId: window.rrwebSessionId,
|
34
|
+
traceId: window.traceId,
|
35
|
+
events: window.rrwebEventsBatch
|
36
|
+
};
|
37
|
+
|
38
|
+
try {
|
39
|
+
const jsonString = JSON.stringify(eventsPayload);
|
40
|
+
const uint8Array = new TextEncoder().encode(jsonString);
|
41
|
+
|
42
|
+
const cs = new CompressionStream('gzip');
|
43
|
+
const compressedStream = await new Response(
|
44
|
+
new Response(uint8Array).body.pipeThrough(cs)
|
45
|
+
).arrayBuffer();
|
46
|
+
|
47
|
+
const compressedArray = new Uint8Array(compressedStream);
|
48
|
+
|
49
|
+
const blob = new Blob([compressedArray], { type: 'application/octet-stream' });
|
50
|
+
|
51
|
+
const response = await fetch(serverUrl, {
|
52
|
+
method: 'POST',
|
53
|
+
headers: {
|
54
|
+
'Content-Type': 'application/json',
|
55
|
+
'Content-Encoding': 'gzip',
|
56
|
+
'Authorization': `Bearer ${projectApiKey}`
|
57
|
+
},
|
58
|
+
body: blob,
|
59
|
+
compress: false,
|
60
|
+
credentials: 'omit',
|
61
|
+
mode: 'cors',
|
62
|
+
cache: 'no-cache',
|
63
|
+
});
|
64
|
+
|
65
|
+
if (!response.ok) {
|
66
|
+
throw new Error(`HTTP error! status: ${response.status}`);
|
67
|
+
}
|
68
|
+
|
69
|
+
window.rrwebEventsBatch = [];
|
70
|
+
} catch (error) {
|
71
|
+
console.error('Failed to send events:', error);
|
72
|
+
}
|
73
|
+
};
|
74
|
+
|
75
|
+
setInterval(() => window.sendBatch(), FLUSH_INTERVAL);
|
76
|
+
|
77
|
+
setInterval(() => {
|
78
|
+
window.rrwebEventsBatch.push({
|
79
|
+
type: 6,
|
80
|
+
data: { source: 'heartbeat' },
|
81
|
+
timestamp: Date.now()
|
82
|
+
});
|
83
|
+
}, HEARTBEAT_INTERVAL);
|
84
|
+
|
85
|
+
window.rrweb.record({
|
86
|
+
emit(event) {
|
87
|
+
window.rrwebEventsBatch.push(event);
|
88
|
+
}
|
89
|
+
});
|
90
|
+
|
91
|
+
window.addEventListener('beforeunload', () => {
|
92
|
+
window.sendBatch();
|
93
|
+
});
|
94
|
+
}
|
95
|
+
"""
|
96
|
+
|
97
|
+
|
98
|
+
def init_playwright_tracing(http_url: str, project_api_key: str):
|
99
|
+
|
100
|
+
def inject_rrweb(page: SyncPage):
|
101
|
+
# Get current trace ID from active span
|
102
|
+
current_span = opentelemetry.trace.get_current_span()
|
103
|
+
current_span.set_attribute("lmnr.internal.has_browser_session", True)
|
104
|
+
trace_id = format(current_span.get_span_context().trace_id, "032x")
|
105
|
+
session_id = str(uuid.uuid4().hex)
|
106
|
+
|
107
|
+
# Generate UUID session ID and set trace ID
|
108
|
+
page.evaluate(
|
109
|
+
"""([traceId, sessionId]) => {
|
110
|
+
window.rrwebSessionId = sessionId;
|
111
|
+
window.traceId = traceId;
|
112
|
+
}""",
|
113
|
+
[trace_id, session_id],
|
114
|
+
)
|
115
|
+
|
116
|
+
# Load rrweb from CDN
|
117
|
+
page.add_script_tag(
|
118
|
+
url="https://cdn.jsdelivr.net/npm/rrweb@latest/dist/rrweb.min.js"
|
119
|
+
)
|
120
|
+
|
121
|
+
# Update the recording setup to include trace ID
|
122
|
+
page.evaluate(
|
123
|
+
INJECT_PLACEHOLDER,
|
124
|
+
[http_url, project_api_key],
|
125
|
+
)
|
126
|
+
|
127
|
+
async def inject_rrweb_async(page: Page):
|
128
|
+
try:
|
129
|
+
# Wait for the page to be in a ready state first
|
130
|
+
await page.wait_for_load_state("domcontentloaded")
|
131
|
+
|
132
|
+
# Get current trace ID from active span
|
133
|
+
current_span = opentelemetry.trace.get_current_span()
|
134
|
+
current_span.set_attribute("lmnr.internal.has_browser_session", True)
|
135
|
+
trace_id = format(current_span.get_span_context().trace_id, "032x")
|
136
|
+
session_id = str(uuid.uuid4().hex)
|
137
|
+
|
138
|
+
# Generate UUID session ID and set trace ID
|
139
|
+
await page.evaluate(
|
140
|
+
"""([traceId, sessionId]) => {
|
141
|
+
window.rrwebSessionId = sessionId;
|
142
|
+
window.traceId = traceId;
|
143
|
+
}""",
|
144
|
+
[trace_id, session_id],
|
145
|
+
)
|
146
|
+
|
147
|
+
# Load rrweb from CDN
|
148
|
+
await page.add_script_tag(
|
149
|
+
url="https://cdn.jsdelivr.net/npm/rrweb@latest/dist/rrweb.min.js"
|
150
|
+
)
|
151
|
+
|
152
|
+
await page.wait_for_function(
|
153
|
+
"""(() => window.rrweb || 'rrweb' in window)"""
|
154
|
+
)
|
155
|
+
|
156
|
+
# Update the recording setup to include trace ID
|
157
|
+
await page.evaluate(
|
158
|
+
INJECT_PLACEHOLDER,
|
159
|
+
[http_url, project_api_key],
|
160
|
+
)
|
161
|
+
except Exception as e:
|
162
|
+
print(f"Error injecting rrweb: {e}")
|
163
|
+
|
164
|
+
def handle_navigation(page: SyncPage):
|
165
|
+
def on_load():
|
166
|
+
inject_rrweb(page)
|
167
|
+
|
168
|
+
page.on("load", on_load)
|
169
|
+
inject_rrweb(page)
|
170
|
+
|
171
|
+
async def handle_navigation_async(page: Page):
|
172
|
+
async def on_load():
|
173
|
+
await inject_rrweb_async(page)
|
174
|
+
|
175
|
+
page.on("load", lambda: asyncio.create_task(on_load()))
|
176
|
+
await inject_rrweb_async(page)
|
177
|
+
|
178
|
+
async def patched_new_page_async(self: BrowserContext, *args, **kwargs):
|
179
|
+
# Modify CSP to allow required domains
|
180
|
+
async def handle_route(route):
|
181
|
+
try:
|
182
|
+
response = await route.fetch()
|
183
|
+
headers = dict(response.headers)
|
184
|
+
|
185
|
+
# Find and modify CSP header
|
186
|
+
for header_name in headers:
|
187
|
+
if header_name.lower() == "content-security-policy":
|
188
|
+
csp = headers[header_name]
|
189
|
+
parts = csp.split(";")
|
190
|
+
for i, part in enumerate(parts):
|
191
|
+
if "script-src" in part:
|
192
|
+
parts[i] = f"{part.strip()} cdn.jsdelivr.net"
|
193
|
+
elif "connect-src" in part:
|
194
|
+
parts[i] = f"{part.strip()} " + http_url
|
195
|
+
if not any("connect-src" in part for part in parts):
|
196
|
+
parts.append(" connect-src 'self' " + http_url)
|
197
|
+
headers[header_name] = ";".join(parts)
|
198
|
+
|
199
|
+
await route.fulfill(response=response, headers=headers)
|
200
|
+
except Exception:
|
201
|
+
await route.continue_()
|
202
|
+
|
203
|
+
await self.route("**/*", handle_route)
|
204
|
+
page = await _original_new_page_async(self, *args, **kwargs)
|
205
|
+
await handle_navigation_async(page)
|
206
|
+
return page
|
207
|
+
|
208
|
+
def patched_new_page(self: SyncBrowserContext, *args, **kwargs):
|
209
|
+
# Modify CSP to allow required domains
|
210
|
+
def handle_route(route):
|
211
|
+
try:
|
212
|
+
response = route.fetch()
|
213
|
+
headers = dict(response.headers)
|
214
|
+
|
215
|
+
# Find and modify CSP header
|
216
|
+
for header_name in headers:
|
217
|
+
if header_name.lower() == "content-security-policy":
|
218
|
+
csp = headers[header_name]
|
219
|
+
parts = csp.split(";")
|
220
|
+
for i, part in enumerate(parts):
|
221
|
+
if "script-src" in part:
|
222
|
+
parts[i] = f"{part.strip()} cdn.jsdelivr.net"
|
223
|
+
elif "connect-src" in part:
|
224
|
+
parts[i] = f"{part.strip()} " + http_url
|
225
|
+
if not any("connect-src" in part for part in parts):
|
226
|
+
parts.append(" connect-src 'self' " + http_url)
|
227
|
+
headers[header_name] = ";".join(parts)
|
228
|
+
|
229
|
+
route.fulfill(response=response, headers=headers)
|
230
|
+
except Exception:
|
231
|
+
# Continue with the original request without modification
|
232
|
+
route.continue_()
|
233
|
+
|
234
|
+
self.route("**/*", handle_route)
|
235
|
+
page = _original_new_page(self, *args, **kwargs)
|
236
|
+
handle_navigation(page)
|
237
|
+
return page
|
238
|
+
|
239
|
+
def patch_browser():
|
240
|
+
global _original_new_page, _original_new_page_async
|
241
|
+
if _original_new_page_async is None:
|
242
|
+
_original_new_page_async = BrowserContext.new_page
|
243
|
+
BrowserContext.new_page = patched_new_page_async
|
244
|
+
|
245
|
+
if _original_new_page is None:
|
246
|
+
_original_new_page = SyncBrowserContext.new_page
|
247
|
+
SyncBrowserContext.new_page = patched_new_page
|
248
|
+
|
249
|
+
patch_browser()
|
@@ -2,7 +2,6 @@ import asyncio
|
|
2
2
|
import re
|
3
3
|
import sys
|
4
4
|
import uuid
|
5
|
-
|
6
5
|
from tqdm import tqdm
|
7
6
|
from typing import Any, Awaitable, Optional, Set, Union
|
8
7
|
|
@@ -27,9 +26,12 @@ from .types import (
|
|
27
26
|
from .utils import is_async
|
28
27
|
|
29
28
|
DEFAULT_BATCH_SIZE = 5
|
29
|
+
MAX_EXPORT_BATCH_SIZE = 64
|
30
30
|
|
31
31
|
|
32
|
-
def get_evaluation_url(
|
32
|
+
def get_evaluation_url(
|
33
|
+
project_id: str, evaluation_id: str, base_url: Optional[str] = None
|
34
|
+
):
|
33
35
|
if not base_url:
|
34
36
|
base_url = "https://www.lmnr.ai"
|
35
37
|
|
@@ -39,7 +41,7 @@ def get_evaluation_url(project_id: str, evaluation_id: str, base_url: Optional[s
|
|
39
41
|
if url.endswith("localhost") or url.endswith("127.0.0.1"):
|
40
42
|
# We best effort assume that the frontend is running on port 3000
|
41
43
|
# TODO: expose the frontend port?
|
42
|
-
url = url + ":
|
44
|
+
url = url + ":5667"
|
43
45
|
return f"{url}/project/{project_id}/evaluations/{evaluation_id}"
|
44
46
|
|
45
47
|
|
@@ -97,13 +99,14 @@ class Evaluation:
|
|
97
99
|
evaluators: dict[str, EvaluatorFunction],
|
98
100
|
human_evaluators: list[HumanEvaluator] = [],
|
99
101
|
name: Optional[str] = None,
|
100
|
-
|
101
|
-
|
102
|
+
group_name: Optional[str] = None,
|
103
|
+
concurrency_limit: int = DEFAULT_BATCH_SIZE,
|
102
104
|
project_api_key: Optional[str] = None,
|
103
105
|
base_url: Optional[str] = None,
|
104
106
|
http_port: Optional[int] = None,
|
105
107
|
grpc_port: Optional[int] = None,
|
106
108
|
instruments: Optional[Set[Instruments]] = None,
|
109
|
+
max_export_batch_size: Optional[int] = MAX_EXPORT_BATCH_SIZE,
|
107
110
|
):
|
108
111
|
"""
|
109
112
|
Initializes an instance of the Evaluations class.
|
@@ -131,12 +134,12 @@ class Evaluation:
|
|
131
134
|
Used to identify the evaluation in the group.\
|
132
135
|
If not provided, a random name will be generated.
|
133
136
|
Defaults to None.
|
134
|
-
|
135
|
-
evaluations. Only evaluations within the same
|
137
|
+
group_name (Optional[str], optional): an identifier to group\
|
138
|
+
evaluations. Only evaluations within the same group_name can be\
|
136
139
|
visually compared. If not provided, "default" is assigned.
|
137
140
|
Defaults to None
|
138
|
-
|
139
|
-
data points will be evaluated in parallel.
|
141
|
+
concurrency_limit (int, optional): The concurrency limit for evaluation. This many\
|
142
|
+
data points will be evaluated in parallel with a pool of workers.
|
140
143
|
Defaults to DEFAULT_BATCH_SIZE.
|
141
144
|
project_api_key (Optional[str], optional): The project API key.\
|
142
145
|
If not provided, LMNR_PROJECT_API_KEY environment variable is\
|
@@ -180,17 +183,20 @@ class Evaluation:
|
|
180
183
|
self.data = data
|
181
184
|
self.executor = executor
|
182
185
|
self.evaluators = evaluators
|
183
|
-
self.
|
186
|
+
self.group_name = group_name
|
184
187
|
self.name = name
|
185
|
-
self.
|
188
|
+
self.concurrency_limit = concurrency_limit
|
189
|
+
self.batch_size = concurrency_limit
|
186
190
|
self._logger = get_default_logger(self.__class__.__name__)
|
187
191
|
self.human_evaluators = human_evaluators
|
192
|
+
self.upload_tasks = [] # Add this line to track upload tasks
|
188
193
|
L.initialize(
|
189
194
|
project_api_key=project_api_key,
|
190
195
|
base_url=base_url,
|
191
196
|
http_port=http_port,
|
192
197
|
grpc_port=grpc_port,
|
193
198
|
instruments=instruments,
|
199
|
+
max_export_batch_size=max_export_batch_size,
|
194
200
|
)
|
195
201
|
|
196
202
|
async def run(self) -> Awaitable[None]:
|
@@ -200,49 +206,60 @@ class Evaluation:
|
|
200
206
|
|
201
207
|
async def _run(self) -> None:
|
202
208
|
self.reporter.start(len(self.data))
|
203
|
-
|
204
209
|
try:
|
205
|
-
|
210
|
+
evaluation = await L.init_eval(name=self.name, group_name=self.group_name)
|
211
|
+
result_datapoints = await self._evaluate_in_batches(evaluation.id)
|
212
|
+
|
213
|
+
# Wait for all background upload tasks to complete
|
214
|
+
if self.upload_tasks:
|
215
|
+
self._logger.debug(
|
216
|
+
f"Waiting for {len(self.upload_tasks)} upload tasks to complete"
|
217
|
+
)
|
218
|
+
await asyncio.gather(*self.upload_tasks)
|
219
|
+
self._logger.debug("All upload tasks completed")
|
206
220
|
except Exception as e:
|
207
221
|
self.reporter.stopWithError(e)
|
208
222
|
self.is_finished = True
|
209
223
|
return
|
210
224
|
|
211
|
-
# For now add all human evaluators to all result datapoints
|
212
|
-
# In the future, we will add ways to specify which human evaluators
|
213
|
-
# to add to which result datapoints, e.g. sample some randomly
|
214
225
|
for result_datapoint in result_datapoints:
|
215
226
|
result_datapoint.human_evaluators = self.human_evaluators or {}
|
216
227
|
|
217
|
-
evaluation = await L.create_evaluation(
|
218
|
-
data=result_datapoints, group_id=self.group_id, name=self.name
|
219
|
-
)
|
220
228
|
average_scores = get_average_scores(result_datapoints)
|
221
229
|
self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
|
222
230
|
self.is_finished = True
|
223
231
|
|
224
|
-
async def _evaluate_in_batches(
|
225
|
-
|
226
|
-
for i in range(0, len(self.data), self.batch_size):
|
227
|
-
batch = (
|
228
|
-
self.data[i : i + self.batch_size]
|
229
|
-
if isinstance(self.data, list)
|
230
|
-
else self.data.slice(i, i + self.batch_size)
|
231
|
-
)
|
232
|
-
batch_datapoints = await self._evaluate_batch(batch)
|
233
|
-
result_datapoints.extend(batch_datapoints)
|
234
|
-
self.reporter.update(len(batch))
|
235
|
-
return result_datapoints
|
236
|
-
|
237
|
-
async def _evaluate_batch(
|
238
|
-
self, batch: list[Datapoint]
|
232
|
+
async def _evaluate_in_batches(
|
233
|
+
self, eval_id: uuid.UUID
|
239
234
|
) -> list[EvaluationResultDatapoint]:
|
240
|
-
|
241
|
-
|
242
|
-
|
235
|
+
|
236
|
+
semaphore = asyncio.Semaphore(self.concurrency_limit)
|
237
|
+
tasks = []
|
238
|
+
data_iter = self.data if isinstance(self.data, list) else range(len(self.data))
|
239
|
+
|
240
|
+
async def evaluate_task(datapoint, index):
|
241
|
+
try:
|
242
|
+
result = await self._evaluate_datapoint(eval_id, datapoint, index)
|
243
|
+
self.reporter.update(1)
|
244
|
+
return index, result
|
245
|
+
finally:
|
246
|
+
semaphore.release()
|
247
|
+
|
248
|
+
# Create tasks only after acquiring semaphore
|
249
|
+
for idx, item in enumerate(data_iter):
|
250
|
+
await semaphore.acquire()
|
251
|
+
datapoint = item if isinstance(self.data, list) else self.data[item]
|
252
|
+
task = asyncio.create_task(evaluate_task(datapoint, idx))
|
253
|
+
tasks.append(task)
|
254
|
+
|
255
|
+
# Wait for all tasks to complete and preserve order
|
256
|
+
results = await asyncio.gather(*tasks)
|
257
|
+
ordered_results = [result for _, result in sorted(results, key=lambda x: x[0])]
|
258
|
+
|
259
|
+
return ordered_results
|
243
260
|
|
244
261
|
async def _evaluate_datapoint(
|
245
|
-
self, datapoint: Datapoint
|
262
|
+
self, eval_id: uuid.UUID, datapoint: Datapoint, index: int
|
246
263
|
) -> EvaluationResultDatapoint:
|
247
264
|
with L.start_as_current_span("evaluation") as evaluation_span:
|
248
265
|
L._set_trace_type(trace_type=TraceType.EVALUATION)
|
@@ -251,11 +268,15 @@ class Evaluation:
|
|
251
268
|
"executor", input={"data": datapoint.data}
|
252
269
|
) as executor_span:
|
253
270
|
executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
271
|
+
# Run synchronous executors in a thread pool to avoid blocking
|
272
|
+
if not is_async(self.executor):
|
273
|
+
loop = asyncio.get_event_loop()
|
274
|
+
output = await loop.run_in_executor(
|
275
|
+
None, self.executor, datapoint.data
|
276
|
+
)
|
277
|
+
else:
|
278
|
+
output = await self.executor(datapoint.data)
|
279
|
+
|
259
280
|
L.set_span_output(output)
|
260
281
|
executor_span_id = uuid.UUID(
|
261
282
|
int=executor_span.get_span_context().span_id
|
@@ -283,14 +304,28 @@ class Evaluation:
|
|
283
304
|
scores.update(value)
|
284
305
|
|
285
306
|
trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
307
|
+
|
308
|
+
datapoint = EvaluationResultDatapoint(
|
309
|
+
data=datapoint.data,
|
310
|
+
target=target,
|
311
|
+
executor_output=output,
|
312
|
+
scores=scores,
|
313
|
+
trace_id=trace_id,
|
314
|
+
# For now add all human evaluators to all result datapoints
|
315
|
+
# In the future, we will add ways to specify which human evaluators
|
316
|
+
# to add to which result datapoints, e.g. sample some randomly
|
317
|
+
human_evaluators=self.human_evaluators,
|
318
|
+
executor_span_id=executor_span_id,
|
319
|
+
index=index,
|
320
|
+
)
|
321
|
+
|
322
|
+
# Create background upload task without awaiting it
|
323
|
+
upload_task = asyncio.create_task(
|
324
|
+
L.save_eval_datapoints(eval_id, [datapoint], self.group_name)
|
325
|
+
)
|
326
|
+
self.upload_tasks.append(upload_task)
|
327
|
+
|
328
|
+
return datapoint
|
294
329
|
|
295
330
|
|
296
331
|
def evaluate(
|
@@ -299,8 +334,9 @@ def evaluate(
|
|
299
334
|
evaluators: dict[str, EvaluatorFunction],
|
300
335
|
human_evaluators: list[HumanEvaluator] = [],
|
301
336
|
name: Optional[str] = None,
|
302
|
-
group_id: Optional[str] = None,
|
303
|
-
|
337
|
+
group_id: Optional[str] = None, # Deprecated
|
338
|
+
group_name: Optional[str] = None,
|
339
|
+
concurrency_limit: int = DEFAULT_BATCH_SIZE,
|
304
340
|
project_api_key: Optional[str] = None,
|
305
341
|
base_url: Optional[str] = None,
|
306
342
|
http_port: Optional[int] = None,
|
@@ -318,12 +354,12 @@ def evaluate(
|
|
318
354
|
|
319
355
|
Parameters:
|
320
356
|
data (Union[list[EvaluationDatapoint|dict]], EvaluationDataset]):\
|
321
|
-
|
322
|
-
|
323
|
-
|
357
|
+
List of data points to evaluate or an evaluation dataset.
|
358
|
+
`data` is the input to the executor function,
|
359
|
+
`target` is the input to the evaluator function.
|
324
360
|
executor (Callable[..., Any]): The executor function.\
|
325
|
-
|
326
|
-
|
361
|
+
Takes the data point + any additional arguments\
|
362
|
+
and returns the output to evaluate.
|
327
363
|
evaluators (List[Callable[..., Any]]):
|
328
364
|
evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
|
329
365
|
names. Each evaluator function takes the output of the executor\
|
@@ -337,14 +373,19 @@ def evaluate(
|
|
337
373
|
evaluator only holds the queue name.
|
338
374
|
Defaults to an empty list.
|
339
375
|
name (Optional[str], optional): Optional name of the evaluation.\
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
group_id (Optional[str], optional):
|
376
|
+
Used to identify the evaluation in the group. If not provided, a\
|
377
|
+
random name will be generated.
|
378
|
+
Defaults to None.
|
379
|
+
group_id (Optional[str], optional): [DEPRECATED] Use group_name instead.
|
380
|
+
An identifier to group evaluations.\
|
344
381
|
Only evaluations within the same group_id can be\
|
345
382
|
visually compared. If not provided, set to "default".
|
346
383
|
Defaults to None
|
347
|
-
|
384
|
+
group_name (Optional[str], optional): An identifier to group evaluations.\
|
385
|
+
Only evaluations within the same group_name can be visually compared.\
|
386
|
+
If not provided, set to "default".
|
387
|
+
Defaults to None
|
388
|
+
concurrency_limit (int, optional): The concurrency limit for evaluation.
|
348
389
|
Defaults to DEFAULT_BATCH_SIZE.
|
349
390
|
project_api_key (Optional[str], optional): The project API key.
|
350
391
|
Defaults to None.
|
@@ -363,15 +404,19 @@ def evaluate(
|
|
363
404
|
will be used.
|
364
405
|
Defaults to None.
|
365
406
|
"""
|
407
|
+
if group_id:
|
408
|
+
raise DeprecationWarning("group_id is deprecated. Use group_name instead.")
|
409
|
+
|
410
|
+
group_name = group_name or group_id
|
366
411
|
|
367
412
|
evaluation = Evaluation(
|
368
413
|
data=data,
|
369
414
|
executor=executor,
|
370
415
|
evaluators=evaluators,
|
371
|
-
|
416
|
+
group_name=group_name,
|
372
417
|
human_evaluators=human_evaluators,
|
373
418
|
name=name,
|
374
|
-
|
419
|
+
concurrency_limit=concurrency_limit,
|
375
420
|
project_api_key=project_api_key,
|
376
421
|
base_url=base_url,
|
377
422
|
http_port=http_port,
|
@@ -47,7 +47,8 @@ from lmnr.openllmetry_sdk.tracing.tracing import (
|
|
47
47
|
from .log import VerboseColorfulFormatter
|
48
48
|
|
49
49
|
from .types import (
|
50
|
-
|
50
|
+
HumanEvaluator,
|
51
|
+
InitEvaluationResponse,
|
51
52
|
EvaluationResultDatapoint,
|
52
53
|
GetDatapointsResponse,
|
53
54
|
PipelineRunError,
|
@@ -78,6 +79,7 @@ class Laminar:
|
|
78
79
|
grpc_port: Optional[int] = None,
|
79
80
|
instruments: Optional[Set[Instruments]] = None,
|
80
81
|
disable_batch: bool = False,
|
82
|
+
max_export_batch_size: Optional[int] = None,
|
81
83
|
):
|
82
84
|
"""Initialize Laminar context across the application.
|
83
85
|
This method must be called before using any other Laminar methods or
|
@@ -142,12 +144,15 @@ class Laminar:
|
|
142
144
|
cls._initialize_logger()
|
143
145
|
|
144
146
|
Traceloop.init(
|
147
|
+
base_http_url=cls.__base_http_url,
|
148
|
+
project_api_key=cls.__project_api_key,
|
145
149
|
exporter=OTLPSpanExporter(
|
146
150
|
endpoint=cls.__base_grpc_url,
|
147
151
|
headers={"authorization": f"Bearer {cls.__project_api_key}"},
|
148
152
|
),
|
149
153
|
instruments=instruments,
|
150
154
|
disable_batch=disable_batch,
|
155
|
+
max_export_batch_size=max_export_batch_size,
|
151
156
|
)
|
152
157
|
|
153
158
|
@classmethod
|
@@ -687,33 +692,44 @@ class Laminar:
|
|
687
692
|
set_association_properties(props)
|
688
693
|
|
689
694
|
@classmethod
|
690
|
-
async def
|
691
|
-
cls,
|
692
|
-
|
693
|
-
group_id: Optional[str] = None,
|
694
|
-
name: Optional[str] = None,
|
695
|
-
) -> CreateEvaluationResponse:
|
695
|
+
async def init_eval(
|
696
|
+
cls, name: Optional[str] = None, group_name: Optional[str] = None
|
697
|
+
) -> InitEvaluationResponse:
|
696
698
|
async with aiohttp.ClientSession() as session:
|
697
699
|
async with session.post(
|
698
|
-
cls.__base_http_url + "/v1/
|
700
|
+
cls.__base_http_url + "/v1/evals",
|
699
701
|
json={
|
700
|
-
"groupId": group_id,
|
701
702
|
"name": name,
|
702
|
-
"
|
703
|
+
"groupName": group_name,
|
703
704
|
},
|
704
705
|
headers=cls._headers(),
|
705
706
|
) as response:
|
706
|
-
if response.status != 200:
|
707
|
-
try:
|
708
|
-
resp_json = await response.json()
|
709
|
-
raise ValueError(
|
710
|
-
f"Error creating evaluation {json.dumps(resp_json)}"
|
711
|
-
)
|
712
|
-
except aiohttp.ClientError:
|
713
|
-
text = await response.text()
|
714
|
-
raise ValueError(f"Error creating evaluation {text}")
|
715
707
|
resp_json = await response.json()
|
716
|
-
return
|
708
|
+
return InitEvaluationResponse.model_validate(resp_json)
|
709
|
+
|
710
|
+
@classmethod
|
711
|
+
async def save_eval_datapoints(
|
712
|
+
cls,
|
713
|
+
eval_id: uuid.UUID,
|
714
|
+
datapoints: list[EvaluationResultDatapoint],
|
715
|
+
groupName: Optional[str] = None,
|
716
|
+
human_evaluators: Optional[list[HumanEvaluator]] = None,
|
717
|
+
):
|
718
|
+
async with aiohttp.ClientSession() as session:
|
719
|
+
|
720
|
+
async with session.post(
|
721
|
+
cls.__base_http_url + f"/v1/evals/{eval_id}/datapoints",
|
722
|
+
json={
|
723
|
+
"points": [datapoint.to_dict() for datapoint in datapoints],
|
724
|
+
"groupName": groupName,
|
725
|
+
"humanEvaluators": human_evaluators,
|
726
|
+
},
|
727
|
+
headers=cls._headers(),
|
728
|
+
) as response:
|
729
|
+
if response.status != 200:
|
730
|
+
raise ValueError(
|
731
|
+
f"Error saving evaluation datapoints: {response.text}"
|
732
|
+
)
|
717
733
|
|
718
734
|
@classmethod
|
719
735
|
def get_datapoints(
|
@@ -141,11 +141,8 @@ EvaluatorFunction = Callable[
|
|
141
141
|
class HumanEvaluator(pydantic.BaseModel):
|
142
142
|
queueName: str
|
143
143
|
|
144
|
-
def __init__(self, queue_name: str):
|
145
|
-
super().__init__(queueName=queue_name)
|
146
144
|
|
147
|
-
|
148
|
-
class CreateEvaluationResponse(pydantic.BaseModel):
|
145
|
+
class InitEvaluationResponse(pydantic.BaseModel):
|
149
146
|
id: uuid.UUID
|
150
147
|
createdAt: datetime.datetime
|
151
148
|
groupId: str
|
@@ -161,6 +158,7 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
|
|
161
158
|
human_evaluators: list[HumanEvaluator] = pydantic.Field(default_factory=list)
|
162
159
|
trace_id: uuid.UUID
|
163
160
|
executor_span_id: uuid.UUID
|
161
|
+
index: int
|
164
162
|
|
165
163
|
# uuid is not serializable by default, so we need to convert it to a string
|
166
164
|
def to_dict(self):
|
@@ -180,6 +178,7 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
|
|
180
178
|
for v in self.human_evaluators
|
181
179
|
],
|
182
180
|
"executorSpanId": str(self.executor_span_id),
|
181
|
+
"index": self.index,
|
183
182
|
}
|
184
183
|
except Exception as e:
|
185
184
|
raise ValueError(f"Error serializing EvaluationResultDatapoint: {e}")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|