lmnr 0.6.16__py3-none-any.whl → 0.7.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/__init__.py +6 -15
- lmnr/cli/__init__.py +270 -0
- lmnr/cli/datasets.py +371 -0
- lmnr/{cli.py → cli/evals.py} +20 -102
- lmnr/cli/rules.py +42 -0
- lmnr/opentelemetry_lib/__init__.py +9 -2
- lmnr/opentelemetry_lib/decorators/__init__.py +274 -168
- lmnr/opentelemetry_lib/litellm/__init__.py +352 -38
- lmnr/opentelemetry_lib/litellm/utils.py +82 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +849 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/config.py +13 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_emitter.py +211 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/span_utils.py +401 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/streaming.py +425 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/utils.py +332 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/__init__.py +451 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/proxy.py +144 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_agent/__init__.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/__init__.py +476 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/utils.py +12 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +191 -129
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/schema_utils.py +26 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/utils.py +126 -41
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +488 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/config.py +8 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_emitter.py +143 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/span_utils.py +229 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/utils.py +92 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/__init__.py +381 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/utils.py +36 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +16 -16
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/__init__.py +61 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +472 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1185 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +305 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py +16 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +312 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_emitter.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/utils.py +197 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py +368 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +325 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +135 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +786 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openhands_ai/__init__.py +388 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/opentelemetry/__init__.py +69 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/skyvern/__init__.py +59 -61
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +197 -0
- lmnr/opentelemetry_lib/tracing/__init__.py +119 -18
- lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +124 -25
- lmnr/opentelemetry_lib/tracing/attributes.py +4 -0
- lmnr/opentelemetry_lib/tracing/context.py +200 -0
- lmnr/opentelemetry_lib/tracing/exporter.py +109 -15
- lmnr/opentelemetry_lib/tracing/instruments.py +22 -5
- lmnr/opentelemetry_lib/tracing/processor.py +128 -30
- lmnr/opentelemetry_lib/tracing/span.py +398 -0
- lmnr/opentelemetry_lib/tracing/tracer.py +40 -1
- lmnr/opentelemetry_lib/tracing/utils.py +62 -0
- lmnr/opentelemetry_lib/utils/package_check.py +9 -0
- lmnr/opentelemetry_lib/utils/wrappers.py +11 -0
- lmnr/sdk/browser/background_send_events.py +158 -0
- lmnr/sdk/browser/browser_use_cdp_otel.py +100 -0
- lmnr/sdk/browser/browser_use_otel.py +12 -12
- lmnr/sdk/browser/bubus_otel.py +71 -0
- lmnr/sdk/browser/cdp_utils.py +518 -0
- lmnr/sdk/browser/inject_script.js +514 -0
- lmnr/sdk/browser/patchright_otel.py +18 -44
- lmnr/sdk/browser/playwright_otel.py +104 -187
- lmnr/sdk/browser/pw_utils.py +249 -210
- lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
- lmnr/sdk/browser/utils.py +1 -1
- lmnr/sdk/client/asynchronous/async_client.py +47 -15
- lmnr/sdk/client/asynchronous/resources/__init__.py +2 -7
- lmnr/sdk/client/asynchronous/resources/browser_events.py +1 -0
- lmnr/sdk/client/asynchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/asynchronous/resources/evals.py +122 -18
- lmnr/sdk/client/asynchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/asynchronous/resources/tags.py +4 -10
- lmnr/sdk/client/synchronous/resources/__init__.py +2 -2
- lmnr/sdk/client/synchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/synchronous/resources/evals.py +83 -17
- lmnr/sdk/client/synchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/synchronous/resources/tags.py +4 -10
- lmnr/sdk/client/synchronous/sync_client.py +47 -15
- lmnr/sdk/datasets/__init__.py +94 -0
- lmnr/sdk/datasets/file_utils.py +91 -0
- lmnr/sdk/decorators.py +103 -23
- lmnr/sdk/evaluations.py +122 -33
- lmnr/sdk/laminar.py +816 -333
- lmnr/sdk/log.py +7 -2
- lmnr/sdk/types.py +124 -143
- lmnr/sdk/utils.py +115 -2
- lmnr/version.py +1 -1
- {lmnr-0.6.16.dist-info → lmnr-0.7.26.dist-info}/METADATA +71 -78
- lmnr-0.7.26.dist-info/RECORD +116 -0
- lmnr-0.7.26.dist-info/WHEEL +4 -0
- lmnr-0.7.26.dist-info/entry_points.txt +3 -0
- lmnr/opentelemetry_lib/tracing/context_properties.py +0 -65
- lmnr/sdk/browser/rrweb/rrweb.umd.min.cjs +0 -98
- lmnr/sdk/client/asynchronous/resources/agent.py +0 -329
- lmnr/sdk/client/synchronous/resources/agent.py +0 -323
- lmnr/sdk/datasets.py +0 -60
- lmnr-0.6.16.dist-info/LICENSE +0 -75
- lmnr-0.6.16.dist-info/RECORD +0 -61
- lmnr-0.6.16.dist-info/WHEEL +0 -4
- lmnr-0.6.16.dist-info/entry_points.txt +0 -3
|
@@ -8,11 +8,12 @@ from typing import TypeVar
|
|
|
8
8
|
from types import TracebackType
|
|
9
9
|
|
|
10
10
|
from lmnr.sdk.client.asynchronous.resources import (
|
|
11
|
-
AsyncAgent,
|
|
12
11
|
AsyncBrowserEvents,
|
|
13
12
|
AsyncEvals,
|
|
14
13
|
AsyncTags,
|
|
14
|
+
AsyncEvaluators,
|
|
15
15
|
)
|
|
16
|
+
from lmnr.sdk.client.asynchronous.resources.datasets import AsyncDatasets
|
|
16
17
|
from lmnr.sdk.utils import from_env
|
|
17
18
|
|
|
18
19
|
_T = TypeVar("_T", bound="AsyncLaminarClient")
|
|
@@ -65,28 +66,42 @@ class AsyncLaminarClient:
|
|
|
65
66
|
self.__client = httpx.AsyncClient(
|
|
66
67
|
headers=self._headers(),
|
|
67
68
|
timeout=timeout,
|
|
69
|
+
# Context: If the server responds with a 413, the connection becomes
|
|
70
|
+
# poisoned and freezes on subsequent requests, and there is no way
|
|
71
|
+
# to recover or recycle such connection.
|
|
72
|
+
# Setting max_keepalive_connections to 0 will resolve this, but is
|
|
73
|
+
# less efficient, as it will create a new connection
|
|
74
|
+
# (not client, so still better) for each request.
|
|
75
|
+
#
|
|
76
|
+
# Note: from my experiments with a simple python server, forcing the
|
|
77
|
+
# server to read/consume the request payload from the socket seems
|
|
78
|
+
# to resolve this, but I haven't figured out how to do that in our
|
|
79
|
+
# real actix-web backend server and whether it makes sense to do so.
|
|
80
|
+
#
|
|
81
|
+
# TODO: investigate if there are better ways to fix this rather than
|
|
82
|
+
# setting keepalive_expiry to 0. Other alternative: migrate to
|
|
83
|
+
# requests + aiohttp.
|
|
84
|
+
#
|
|
85
|
+
# limits=httpx.Limits(
|
|
86
|
+
# max_keepalive_connections=0,
|
|
87
|
+
# keepalive_expiry=0,
|
|
88
|
+
# ),
|
|
68
89
|
)
|
|
69
90
|
|
|
70
91
|
# Initialize resource objects
|
|
71
|
-
self.
|
|
92
|
+
self.__evals = AsyncEvals(
|
|
72
93
|
self.__client, self.__base_url, self.__project_api_key
|
|
73
94
|
)
|
|
74
|
-
self.
|
|
95
|
+
self.__evaluators = AsyncEvaluators(
|
|
75
96
|
self.__client, self.__base_url, self.__project_api_key
|
|
76
97
|
)
|
|
77
98
|
self.__browser_events = AsyncBrowserEvents(
|
|
78
99
|
self.__client, self.__base_url, self.__project_api_key
|
|
79
100
|
)
|
|
80
101
|
self.__tags = AsyncTags(self.__client, self.__base_url, self.__project_api_key)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
"""Get the Agent resource.
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
Agent: The Agent resource instance.
|
|
88
|
-
"""
|
|
89
|
-
return self.__agent
|
|
102
|
+
self.__datasets = AsyncDatasets(
|
|
103
|
+
self.__client, self.__base_url, self.__project_api_key
|
|
104
|
+
)
|
|
90
105
|
|
|
91
106
|
@property
|
|
92
107
|
def evals(self) -> AsyncEvals:
|
|
@@ -102,7 +117,7 @@ class AsyncLaminarClient:
|
|
|
102
117
|
"""Get the BrowserEvents resource.
|
|
103
118
|
|
|
104
119
|
Returns:
|
|
105
|
-
|
|
120
|
+
AsyncBrowserEvents: The BrowserEvents resource instance.
|
|
106
121
|
"""
|
|
107
122
|
return self.__browser_events
|
|
108
123
|
|
|
@@ -115,6 +130,25 @@ class AsyncLaminarClient:
|
|
|
115
130
|
"""
|
|
116
131
|
return self.__tags
|
|
117
132
|
|
|
133
|
+
@property
|
|
134
|
+
def evaluators(self) -> AsyncEvaluators:
|
|
135
|
+
"""Get the Evaluators resource.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
AsyncEvaluators: The Evaluators resource instance.
|
|
139
|
+
"""
|
|
140
|
+
return self.__evaluators
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def datasets(self) -> AsyncDatasets:
|
|
144
|
+
"""Get the Datasets resource.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
AsyncDatasets: The Datasets resource instance.
|
|
148
|
+
"""
|
|
149
|
+
return self.__datasets
|
|
150
|
+
|
|
151
|
+
@property
|
|
118
152
|
def is_closed(self) -> bool:
|
|
119
153
|
return self.__client.is_closed
|
|
120
154
|
|
|
@@ -144,5 +178,3 @@ class AsyncLaminarClient:
|
|
|
144
178
|
"Content-Type": "application/json",
|
|
145
179
|
"Accept": "application/json",
|
|
146
180
|
}
|
|
147
|
-
|
|
148
|
-
|
|
@@ -1,11 +1,6 @@
|
|
|
1
|
-
from lmnr.sdk.client.asynchronous.resources.agent import AsyncAgent
|
|
2
1
|
from lmnr.sdk.client.asynchronous.resources.browser_events import AsyncBrowserEvents
|
|
3
2
|
from lmnr.sdk.client.asynchronous.resources.evals import AsyncEvals
|
|
4
3
|
from lmnr.sdk.client.asynchronous.resources.tags import AsyncTags
|
|
4
|
+
from lmnr.sdk.client.asynchronous.resources.evaluators import AsyncEvaluators
|
|
5
5
|
|
|
6
|
-
__all__ = [
|
|
7
|
-
"AsyncAgent",
|
|
8
|
-
"AsyncEvals",
|
|
9
|
-
"AsyncBrowserEvents",
|
|
10
|
-
"AsyncTags",
|
|
11
|
-
]
|
|
6
|
+
__all__ = ["AsyncEvals", "AsyncBrowserEvents", "AsyncTags", "AsyncEvaluators"]
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Datasets resource for interacting with Laminar datasets API."""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from lmnr.sdk.client.asynchronous.resources.base import BaseAsyncResource
|
|
7
|
+
from lmnr.sdk.log import get_default_logger
|
|
8
|
+
from lmnr.sdk.types import (
|
|
9
|
+
Datapoint,
|
|
10
|
+
Dataset,
|
|
11
|
+
GetDatapointsResponse,
|
|
12
|
+
PushDatapointsResponse,
|
|
13
|
+
)
|
|
14
|
+
from lmnr.sdk.utils import serialize
|
|
15
|
+
|
|
16
|
+
logger = get_default_logger(__name__)
|
|
17
|
+
|
|
18
|
+
DEFAULT_DATASET_PULL_LIMIT = 100
|
|
19
|
+
DEFAULT_DATASET_PUSH_BATCH_SIZE = 100
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AsyncDatasets(BaseAsyncResource):
|
|
23
|
+
"""Resource for interacting with Laminar datasets API."""
|
|
24
|
+
|
|
25
|
+
async def list_datasets(self) -> list[Dataset]:
|
|
26
|
+
"""List all datasets."""
|
|
27
|
+
response = await self._client.get(
|
|
28
|
+
f"{self._base_url}/v1/datasets",
|
|
29
|
+
headers=self._headers(),
|
|
30
|
+
)
|
|
31
|
+
if response.status_code != 200:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
f"Error listing datasets: [{response.status_code}] {response.text}"
|
|
34
|
+
)
|
|
35
|
+
return [Dataset.model_validate(dataset) for dataset in response.json()]
|
|
36
|
+
|
|
37
|
+
async def get_dataset_by_name(self, name: str) -> list[Dataset]:
|
|
38
|
+
"""Get a dataset by name."""
|
|
39
|
+
response = await self._client.get(
|
|
40
|
+
f"{self._base_url}/v1/datasets",
|
|
41
|
+
params={"name": name},
|
|
42
|
+
headers=self._headers(),
|
|
43
|
+
)
|
|
44
|
+
if response.status_code != 200:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Error getting dataset: [{response.status_code}] {response.text}"
|
|
47
|
+
)
|
|
48
|
+
return [Dataset.model_validate(dataset) for dataset in response.json()]
|
|
49
|
+
|
|
50
|
+
async def push(
|
|
51
|
+
self,
|
|
52
|
+
points: list[Datapoint],
|
|
53
|
+
name: str | None = None,
|
|
54
|
+
id: uuid.UUID | None = None,
|
|
55
|
+
batch_size: int = DEFAULT_DATASET_PUSH_BATCH_SIZE,
|
|
56
|
+
create_dataset: bool = False,
|
|
57
|
+
) -> PushDatapointsResponse | None:
|
|
58
|
+
"""Push data to a dataset."""
|
|
59
|
+
|
|
60
|
+
if name is None and id is None:
|
|
61
|
+
raise ValueError("Either name or id must be provided")
|
|
62
|
+
|
|
63
|
+
if name is not None and id is not None:
|
|
64
|
+
raise ValueError("Only one of name or id must be provided")
|
|
65
|
+
|
|
66
|
+
if create_dataset and name is None:
|
|
67
|
+
raise ValueError("Name must be provided when creating a new dataset")
|
|
68
|
+
|
|
69
|
+
identifier = {"name": name} if name is not None else {"datasetId": id}
|
|
70
|
+
|
|
71
|
+
batch_num = 0
|
|
72
|
+
total_batches = math.ceil(len(points) / batch_size)
|
|
73
|
+
response = None
|
|
74
|
+
for i in range(0, len(points), batch_size):
|
|
75
|
+
batch_num += 1
|
|
76
|
+
logger.debug(f"Pushing batch {batch_num} of {total_batches}")
|
|
77
|
+
batch = points[i : i + batch_size]
|
|
78
|
+
response = await self._client.post(
|
|
79
|
+
f"{self._base_url}/v1/datasets/datapoints",
|
|
80
|
+
json={
|
|
81
|
+
**identifier,
|
|
82
|
+
"datapoints": [serialize(point) for point in batch],
|
|
83
|
+
"createDataset": create_dataset,
|
|
84
|
+
},
|
|
85
|
+
headers=self._headers(),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# 201 when creating a new dataset
|
|
89
|
+
if response.status_code not in [200, 201]:
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"Error pushing data to dataset: [{response.status_code}] {response.text}"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
response = PushDatapointsResponse.model_validate(response.json())
|
|
95
|
+
# Currently, the response only contains the dataset ID,
|
|
96
|
+
# so it's safe to return the last response only.
|
|
97
|
+
return response
|
|
98
|
+
|
|
99
|
+
async def pull(
|
|
100
|
+
self,
|
|
101
|
+
name: str | None = None,
|
|
102
|
+
id: uuid.UUID | None = None,
|
|
103
|
+
# TODO: move const to one file, import in CLI
|
|
104
|
+
limit: int = DEFAULT_DATASET_PULL_LIMIT,
|
|
105
|
+
offset: int = 0,
|
|
106
|
+
) -> GetDatapointsResponse:
|
|
107
|
+
"""Pull data from a dataset."""
|
|
108
|
+
|
|
109
|
+
if name is None and id is None:
|
|
110
|
+
raise ValueError("Either name or id must be provided")
|
|
111
|
+
|
|
112
|
+
if name is not None and id is not None:
|
|
113
|
+
raise ValueError("Only one of name or id must be provided")
|
|
114
|
+
|
|
115
|
+
identifier = {"name": name} if name is not None else {"datasetId": id}
|
|
116
|
+
|
|
117
|
+
params = {
|
|
118
|
+
**identifier,
|
|
119
|
+
"offset": offset,
|
|
120
|
+
"limit": limit,
|
|
121
|
+
}
|
|
122
|
+
response = await self._client.get(
|
|
123
|
+
f"{self._base_url}/v1/datasets/datapoints",
|
|
124
|
+
params=params,
|
|
125
|
+
headers=self._headers(),
|
|
126
|
+
)
|
|
127
|
+
if response.status_code != 200:
|
|
128
|
+
raise ValueError(
|
|
129
|
+
f"Error pulling data from dataset: [{response.status_code}] {response.text}"
|
|
130
|
+
)
|
|
131
|
+
return GetDatapointsResponse.model_validate(response.json())
|
|
@@ -1,21 +1,32 @@
|
|
|
1
1
|
"""Evals resource for interacting with Laminar evaluations API."""
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
4
3
|
import uuid
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
5
7
|
|
|
6
8
|
from lmnr.sdk.client.asynchronous.resources.base import BaseAsyncResource
|
|
9
|
+
from lmnr.sdk.log import get_default_logger
|
|
7
10
|
from lmnr.sdk.types import (
|
|
11
|
+
GetDatapointsResponse,
|
|
8
12
|
InitEvaluationResponse,
|
|
9
13
|
EvaluationResultDatapoint,
|
|
10
14
|
PartialEvaluationDatapoint,
|
|
11
15
|
)
|
|
16
|
+
from lmnr.sdk.utils import serialize
|
|
17
|
+
|
|
18
|
+
INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH = 16_000_000 # 16MB
|
|
19
|
+
logger = get_default_logger(__name__)
|
|
12
20
|
|
|
13
21
|
|
|
14
22
|
class AsyncEvals(BaseAsyncResource):
|
|
15
23
|
"""Resource for interacting with Laminar evaluations API."""
|
|
16
24
|
|
|
17
25
|
async def init(
|
|
18
|
-
self,
|
|
26
|
+
self,
|
|
27
|
+
name: str | None = None,
|
|
28
|
+
group_name: str | None = None,
|
|
29
|
+
metadata: dict[str, Any] | None = None,
|
|
19
30
|
) -> InitEvaluationResponse:
|
|
20
31
|
"""Initialize a new evaluation.
|
|
21
32
|
|
|
@@ -51,7 +62,7 @@ class AsyncEvals(BaseAsyncResource):
|
|
|
51
62
|
) -> uuid.UUID:
|
|
52
63
|
"""
|
|
53
64
|
Create a new evaluation and return its ID.
|
|
54
|
-
|
|
65
|
+
|
|
55
66
|
Parameters:
|
|
56
67
|
name (str | None, optional): Optional name of the evaluation.
|
|
57
68
|
group_name (str | None, optional): An identifier to group evaluations.
|
|
@@ -60,7 +71,9 @@ class AsyncEvals(BaseAsyncResource):
|
|
|
60
71
|
Returns:
|
|
61
72
|
uuid.UUID: The evaluation ID.
|
|
62
73
|
"""
|
|
63
|
-
evaluation = await self.init(
|
|
74
|
+
evaluation = await self.init(
|
|
75
|
+
name=name, group_name=group_name, metadata=metadata
|
|
76
|
+
)
|
|
64
77
|
return evaluation.id
|
|
65
78
|
|
|
66
79
|
async def create_datapoint(
|
|
@@ -74,7 +87,7 @@ class AsyncEvals(BaseAsyncResource):
|
|
|
74
87
|
) -> uuid.UUID:
|
|
75
88
|
"""
|
|
76
89
|
Create a datapoint for an evaluation.
|
|
77
|
-
|
|
90
|
+
|
|
78
91
|
Parameters:
|
|
79
92
|
eval_id (uuid.UUID): The evaluation ID.
|
|
80
93
|
data: The input data for the executor.
|
|
@@ -82,13 +95,13 @@ class AsyncEvals(BaseAsyncResource):
|
|
|
82
95
|
metadata (dict[str, Any] | None, optional): Optional metadata.
|
|
83
96
|
index (int | None, optional): Optional index of the datapoint.
|
|
84
97
|
trace_id (uuid.UUID | None, optional): Optional trace ID.
|
|
85
|
-
|
|
98
|
+
|
|
86
99
|
Returns:
|
|
87
100
|
uuid.UUID: The datapoint ID.
|
|
88
101
|
"""
|
|
89
|
-
|
|
102
|
+
|
|
90
103
|
datapoint_id = uuid.uuid4()
|
|
91
|
-
|
|
104
|
+
|
|
92
105
|
# Create a minimal datapoint first
|
|
93
106
|
partial_datapoint = PartialEvaluationDatapoint(
|
|
94
107
|
id=datapoint_id,
|
|
@@ -99,7 +112,7 @@ class AsyncEvals(BaseAsyncResource):
|
|
|
99
112
|
executor_span_id=uuid.uuid4(), # Will be updated when executor runs
|
|
100
113
|
metadata=metadata,
|
|
101
114
|
)
|
|
102
|
-
|
|
115
|
+
|
|
103
116
|
await self.save_datapoints(eval_id, [partial_datapoint])
|
|
104
117
|
return datapoint_id
|
|
105
118
|
|
|
@@ -119,18 +132,67 @@ class AsyncEvals(BaseAsyncResource):
|
|
|
119
132
|
Raises:
|
|
120
133
|
ValueError: If there's an error saving the datapoints.
|
|
121
134
|
"""
|
|
135
|
+
length = INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH
|
|
136
|
+
points = [datapoint.to_dict(max_data_length=length) for datapoint in datapoints]
|
|
122
137
|
response = await self._client.post(
|
|
123
138
|
self._base_url + f"/v1/evals/{eval_id}/datapoints",
|
|
124
139
|
json={
|
|
125
|
-
"points":
|
|
140
|
+
"points": points,
|
|
126
141
|
"groupName": group_name,
|
|
127
142
|
},
|
|
128
143
|
headers=self._headers(),
|
|
129
144
|
)
|
|
145
|
+
if response.status_code == 413:
|
|
146
|
+
await self._retry_save_datapoints(eval_id, datapoints, group_name)
|
|
147
|
+
return
|
|
148
|
+
|
|
130
149
|
if response.status_code != 200:
|
|
131
|
-
raise ValueError(
|
|
132
|
-
|
|
133
|
-
|
|
150
|
+
raise ValueError(
|
|
151
|
+
f"Error saving evaluation datapoints: [{response.status_code}] {response.text}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
async def get_datapoints(
|
|
155
|
+
self,
|
|
156
|
+
dataset_name: str,
|
|
157
|
+
offset: int,
|
|
158
|
+
limit: int,
|
|
159
|
+
) -> GetDatapointsResponse:
|
|
160
|
+
"""Get datapoints from a dataset.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
dataset_name (str): The name of the dataset.
|
|
164
|
+
offset (int): The offset to start from.
|
|
165
|
+
limit (int): The maximum number of datapoints to return.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
GetDatapointsResponse: The response containing the datapoints.
|
|
169
|
+
|
|
170
|
+
Raises:
|
|
171
|
+
ValueError: If there's an error fetching the datapoints.
|
|
172
|
+
"""
|
|
173
|
+
warnings.warn(
|
|
174
|
+
"Use client.datasets.pull instead",
|
|
175
|
+
DeprecationWarning,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
params = {"name": dataset_name, "offset": offset, "limit": limit}
|
|
179
|
+
response = await self._client.get(
|
|
180
|
+
self._base_url + "/v1/datasets/datapoints",
|
|
181
|
+
params=params,
|
|
182
|
+
headers=self._headers(),
|
|
183
|
+
)
|
|
184
|
+
if response.status_code != 200:
|
|
185
|
+
try:
|
|
186
|
+
resp_json = response.json()
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Error fetching datapoints: [{response.status_code}] {resp_json}"
|
|
189
|
+
)
|
|
190
|
+
except Exception:
|
|
191
|
+
raise ValueError(
|
|
192
|
+
f"Error fetching datapoints: [{response.status_code}] {response.text}"
|
|
193
|
+
)
|
|
194
|
+
return GetDatapointsResponse.model_validate(response.json())
|
|
195
|
+
|
|
134
196
|
async def update_datapoint(
|
|
135
197
|
self,
|
|
136
198
|
eval_id: uuid.UUID,
|
|
@@ -146,17 +208,59 @@ class AsyncEvals(BaseAsyncResource):
|
|
|
146
208
|
executor_output (Any): The executor output.
|
|
147
209
|
scores (dict[str, float | int] | None, optional): The scores. Defaults to None.
|
|
148
210
|
"""
|
|
149
|
-
|
|
211
|
+
|
|
150
212
|
response = await self._client.post(
|
|
151
213
|
self._base_url + f"/v1/evals/{eval_id}/datapoints/{datapoint_id}",
|
|
152
214
|
json={
|
|
153
|
-
"executorOutput":
|
|
215
|
+
"executorOutput": (
|
|
216
|
+
str(serialize(executor_output))[
|
|
217
|
+
:INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH
|
|
218
|
+
]
|
|
219
|
+
if executor_output is not None
|
|
220
|
+
else None
|
|
221
|
+
),
|
|
154
222
|
"scores": scores,
|
|
155
223
|
},
|
|
156
224
|
headers=self._headers(),
|
|
157
225
|
)
|
|
158
226
|
|
|
159
227
|
if response.status_code != 200:
|
|
160
|
-
raise ValueError(
|
|
161
|
-
|
|
162
|
-
|
|
228
|
+
raise ValueError(
|
|
229
|
+
f"Error updating evaluation datapoint: [{response.status_code}] {response.text}"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
async def _retry_save_datapoints(
|
|
233
|
+
self,
|
|
234
|
+
eval_id: uuid.UUID,
|
|
235
|
+
datapoints: list[EvaluationResultDatapoint | PartialEvaluationDatapoint],
|
|
236
|
+
group_name: str | None = None,
|
|
237
|
+
initial_length: int = INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH,
|
|
238
|
+
max_retries: int = 20,
|
|
239
|
+
):
|
|
240
|
+
retry = 0
|
|
241
|
+
length = initial_length
|
|
242
|
+
while retry < max_retries:
|
|
243
|
+
retry += 1
|
|
244
|
+
length = length // 2
|
|
245
|
+
logger.debug(
|
|
246
|
+
f"Retrying save datapoints: {retry} of {max_retries}, length: {length}"
|
|
247
|
+
)
|
|
248
|
+
if length == 0:
|
|
249
|
+
raise ValueError("Error saving evaluation datapoints")
|
|
250
|
+
points = [
|
|
251
|
+
datapoint.to_dict(max_data_length=length) for datapoint in datapoints
|
|
252
|
+
]
|
|
253
|
+
response = await self._client.post(
|
|
254
|
+
self._base_url + f"/v1/evals/{eval_id}/datapoints",
|
|
255
|
+
json={
|
|
256
|
+
"points": points,
|
|
257
|
+
"groupName": group_name,
|
|
258
|
+
},
|
|
259
|
+
headers=self._headers(),
|
|
260
|
+
)
|
|
261
|
+
if response.status_code != 413:
|
|
262
|
+
break
|
|
263
|
+
if response.status_code != 200:
|
|
264
|
+
raise ValueError(
|
|
265
|
+
f"Error saving evaluation datapoints: [{response.status_code}] {response.text}"
|
|
266
|
+
)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Evaluators resource for creating evaluator scores."""
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from lmnr.sdk.client.asynchronous.resources.base import BaseAsyncResource
|
|
7
|
+
from lmnr.sdk.utils import format_id
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AsyncEvaluators(BaseAsyncResource):
|
|
11
|
+
"""Resource for creating evaluator scores."""
|
|
12
|
+
|
|
13
|
+
async def score(
|
|
14
|
+
self,
|
|
15
|
+
*,
|
|
16
|
+
name: str,
|
|
17
|
+
trace_id: str | int | uuid.UUID | None = None,
|
|
18
|
+
span_id: str | int | uuid.UUID | None = None,
|
|
19
|
+
metadata: dict[str, Any] | None = None,
|
|
20
|
+
score: float,
|
|
21
|
+
) -> None:
|
|
22
|
+
"""Create a score for a span.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
name (str): Name of the score
|
|
26
|
+
trace_id (str | int | uuid.UUID | None, optional): The trace ID to score (will be attached to root span)
|
|
27
|
+
span_id (str | int | uuid.UUID | None, optional): The span ID to score
|
|
28
|
+
metadata (dict[str, Any] | None, optional): Additional metadata. Defaults to None.
|
|
29
|
+
score (float): The score value (float)
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
ValueError: If there's an error creating the score.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
Score by trace ID (will attach to root span):
|
|
36
|
+
|
|
37
|
+
>>> await laminar_client.evaluators.score(
|
|
38
|
+
... name="quality",
|
|
39
|
+
... trace_id="trace-id-here",
|
|
40
|
+
... score=0.95,
|
|
41
|
+
... metadata={"model": "gpt-4"}
|
|
42
|
+
... )
|
|
43
|
+
|
|
44
|
+
Score by span ID:
|
|
45
|
+
|
|
46
|
+
>>> await laminar_client.evaluators.score(
|
|
47
|
+
... name="relevance",
|
|
48
|
+
... span_id="span-id-here",
|
|
49
|
+
... score=0.87
|
|
50
|
+
... )
|
|
51
|
+
"""
|
|
52
|
+
if trace_id is not None and span_id is not None:
|
|
53
|
+
raise ValueError("Cannot provide both trace_id and span_id. Please provide only one.")
|
|
54
|
+
if trace_id is None and span_id is None:
|
|
55
|
+
raise ValueError("Either 'trace_id' or 'span_id' must be provided.")
|
|
56
|
+
|
|
57
|
+
if trace_id is not None:
|
|
58
|
+
formatted_trace_id = format_id(trace_id)
|
|
59
|
+
payload = {
|
|
60
|
+
"name": name,
|
|
61
|
+
"traceId": formatted_trace_id,
|
|
62
|
+
"metadata": metadata,
|
|
63
|
+
"score": score,
|
|
64
|
+
"source": "Code",
|
|
65
|
+
}
|
|
66
|
+
else:
|
|
67
|
+
formatted_span_id = format_id(span_id)
|
|
68
|
+
payload = {
|
|
69
|
+
"name": name,
|
|
70
|
+
"spanId": formatted_span_id,
|
|
71
|
+
"metadata": metadata,
|
|
72
|
+
"score": score,
|
|
73
|
+
"source": "Code",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
response = await self._client.post(
|
|
77
|
+
self._base_url + "/v1/evaluators/score",
|
|
78
|
+
json=payload,
|
|
79
|
+
headers=self._headers(),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if response.status_code != 200:
|
|
83
|
+
if response.status_code == 401:
|
|
84
|
+
raise ValueError("Unauthorized. Please check your project API key.")
|
|
85
|
+
raise ValueError(f"Error creating evaluator score: {response.text}")
|
|
@@ -5,6 +5,7 @@ import uuid
|
|
|
5
5
|
|
|
6
6
|
from lmnr.sdk.client.asynchronous.resources.base import BaseAsyncResource
|
|
7
7
|
from lmnr.sdk.log import get_default_logger
|
|
8
|
+
from lmnr.sdk.utils import format_id
|
|
8
9
|
|
|
9
10
|
logger = get_default_logger(__name__)
|
|
10
11
|
|
|
@@ -54,18 +55,11 @@ class AsyncTags(BaseAsyncResource):
|
|
|
54
55
|
```
|
|
55
56
|
"""
|
|
56
57
|
trace_tags = tags if isinstance(tags, list) else [tags]
|
|
57
|
-
|
|
58
|
-
trace_id = str(trace_id)
|
|
59
|
-
elif isinstance(trace_id, int):
|
|
60
|
-
trace_id = str(uuid.UUID(int=trace_id))
|
|
61
|
-
elif isinstance(trace_id, str):
|
|
62
|
-
uuid.UUID(trace_id) # Will raise ValueError if invalid
|
|
63
|
-
else:
|
|
64
|
-
raise ValueError(f"Invalid trace id: {trace_id}")
|
|
58
|
+
formatted_trace_id = format_id(trace_id)
|
|
65
59
|
|
|
66
60
|
url = self._base_url + "/v1/tag"
|
|
67
61
|
payload = {
|
|
68
|
-
"traceId":
|
|
62
|
+
"traceId": formatted_trace_id,
|
|
69
63
|
"names": trace_tags,
|
|
70
64
|
}
|
|
71
65
|
response = await self._client.post(
|
|
@@ -78,7 +72,7 @@ class AsyncTags(BaseAsyncResource):
|
|
|
78
72
|
|
|
79
73
|
if response.status_code == 404:
|
|
80
74
|
logger.warning(
|
|
81
|
-
f"Trace {
|
|
75
|
+
f"Trace {formatted_trace_id} not found. The trace may have not been ended yet."
|
|
82
76
|
)
|
|
83
77
|
return []
|
|
84
78
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from lmnr.sdk.client.synchronous.resources.agent import Agent
|
|
2
1
|
from lmnr.sdk.client.synchronous.resources.browser_events import BrowserEvents
|
|
3
2
|
from lmnr.sdk.client.synchronous.resources.evals import Evals
|
|
4
3
|
from lmnr.sdk.client.synchronous.resources.tags import Tags
|
|
4
|
+
from lmnr.sdk.client.synchronous.resources.evaluators import Evaluators
|
|
5
5
|
|
|
6
|
-
__all__ = ["
|
|
6
|
+
__all__ = ["Evals", "Evaluators", "BrowserEvents", "Tags"]
|