lmnr 0.6.16__py3-none-any.whl → 0.7.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/__init__.py +6 -15
- lmnr/cli/__init__.py +270 -0
- lmnr/cli/datasets.py +371 -0
- lmnr/{cli.py → cli/evals.py} +20 -102
- lmnr/cli/rules.py +42 -0
- lmnr/opentelemetry_lib/__init__.py +9 -2
- lmnr/opentelemetry_lib/decorators/__init__.py +274 -168
- lmnr/opentelemetry_lib/litellm/__init__.py +352 -38
- lmnr/opentelemetry_lib/litellm/utils.py +82 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +849 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/config.py +13 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_emitter.py +211 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/span_utils.py +401 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/streaming.py +425 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/utils.py +332 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/__init__.py +451 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/proxy.py +144 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_agent/__init__.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/__init__.py +476 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/utils.py +12 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +191 -129
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/schema_utils.py +26 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/utils.py +126 -41
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +488 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/config.py +8 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_emitter.py +143 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/span_utils.py +229 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/utils.py +92 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/__init__.py +381 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/utils.py +36 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +16 -16
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/__init__.py +61 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +472 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1185 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +305 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py +16 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +312 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_emitter.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/utils.py +197 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py +368 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +325 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +135 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +786 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openhands_ai/__init__.py +388 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/opentelemetry/__init__.py +69 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/skyvern/__init__.py +59 -61
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +197 -0
- lmnr/opentelemetry_lib/tracing/__init__.py +119 -18
- lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +124 -25
- lmnr/opentelemetry_lib/tracing/attributes.py +4 -0
- lmnr/opentelemetry_lib/tracing/context.py +200 -0
- lmnr/opentelemetry_lib/tracing/exporter.py +109 -15
- lmnr/opentelemetry_lib/tracing/instruments.py +22 -5
- lmnr/opentelemetry_lib/tracing/processor.py +128 -30
- lmnr/opentelemetry_lib/tracing/span.py +398 -0
- lmnr/opentelemetry_lib/tracing/tracer.py +40 -1
- lmnr/opentelemetry_lib/tracing/utils.py +62 -0
- lmnr/opentelemetry_lib/utils/package_check.py +9 -0
- lmnr/opentelemetry_lib/utils/wrappers.py +11 -0
- lmnr/sdk/browser/background_send_events.py +158 -0
- lmnr/sdk/browser/browser_use_cdp_otel.py +100 -0
- lmnr/sdk/browser/browser_use_otel.py +12 -12
- lmnr/sdk/browser/bubus_otel.py +71 -0
- lmnr/sdk/browser/cdp_utils.py +518 -0
- lmnr/sdk/browser/inject_script.js +514 -0
- lmnr/sdk/browser/patchright_otel.py +18 -44
- lmnr/sdk/browser/playwright_otel.py +104 -187
- lmnr/sdk/browser/pw_utils.py +249 -210
- lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
- lmnr/sdk/browser/utils.py +1 -1
- lmnr/sdk/client/asynchronous/async_client.py +47 -15
- lmnr/sdk/client/asynchronous/resources/__init__.py +2 -7
- lmnr/sdk/client/asynchronous/resources/browser_events.py +1 -0
- lmnr/sdk/client/asynchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/asynchronous/resources/evals.py +122 -18
- lmnr/sdk/client/asynchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/asynchronous/resources/tags.py +4 -10
- lmnr/sdk/client/synchronous/resources/__init__.py +2 -2
- lmnr/sdk/client/synchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/synchronous/resources/evals.py +83 -17
- lmnr/sdk/client/synchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/synchronous/resources/tags.py +4 -10
- lmnr/sdk/client/synchronous/sync_client.py +47 -15
- lmnr/sdk/datasets/__init__.py +94 -0
- lmnr/sdk/datasets/file_utils.py +91 -0
- lmnr/sdk/decorators.py +103 -23
- lmnr/sdk/evaluations.py +122 -33
- lmnr/sdk/laminar.py +816 -333
- lmnr/sdk/log.py +7 -2
- lmnr/sdk/types.py +124 -143
- lmnr/sdk/utils.py +115 -2
- lmnr/version.py +1 -1
- {lmnr-0.6.16.dist-info → lmnr-0.7.26.dist-info}/METADATA +71 -78
- lmnr-0.7.26.dist-info/RECORD +116 -0
- lmnr-0.7.26.dist-info/WHEEL +4 -0
- lmnr-0.7.26.dist-info/entry_points.txt +3 -0
- lmnr/opentelemetry_lib/tracing/context_properties.py +0 -65
- lmnr/sdk/browser/rrweb/rrweb.umd.min.cjs +0 -98
- lmnr/sdk/client/asynchronous/resources/agent.py +0 -329
- lmnr/sdk/client/synchronous/resources/agent.py +0 -323
- lmnr/sdk/datasets.py +0 -60
- lmnr-0.6.16.dist-info/LICENSE +0 -75
- lmnr-0.6.16.dist-info/RECORD +0 -61
- lmnr-0.6.16.dist-info/WHEEL +0 -4
- lmnr-0.6.16.dist-info/entry_points.txt +0 -3
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Datasets resource for interacting with Laminar datasets API."""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from lmnr.sdk.client.synchronous.resources.base import BaseResource
|
|
7
|
+
from lmnr.sdk.log import get_default_logger
|
|
8
|
+
from lmnr.sdk.types import (
|
|
9
|
+
Datapoint,
|
|
10
|
+
Dataset,
|
|
11
|
+
GetDatapointsResponse,
|
|
12
|
+
PushDatapointsResponse,
|
|
13
|
+
)
|
|
14
|
+
from lmnr.sdk.utils import serialize
|
|
15
|
+
|
|
16
|
+
logger = get_default_logger(__name__)
|
|
17
|
+
|
|
18
|
+
DEFAULT_DATASET_PULL_LIMIT = 100
|
|
19
|
+
DEFAULT_DATASET_PUSH_BATCH_SIZE = 100
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Datasets(BaseResource):
|
|
23
|
+
"""Resource for interacting with Laminar datasets API."""
|
|
24
|
+
|
|
25
|
+
def list_datasets(self) -> list[Dataset]:
|
|
26
|
+
"""List all datasets."""
|
|
27
|
+
response = self._client.get(
|
|
28
|
+
f"{self._base_url}/v1/datasets",
|
|
29
|
+
headers=self._headers(),
|
|
30
|
+
)
|
|
31
|
+
if response.status_code != 200:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
f"Error listing datasets: [{response.status_code}] {response.text}"
|
|
34
|
+
)
|
|
35
|
+
return [Dataset.model_validate(dataset) for dataset in response.json()]
|
|
36
|
+
|
|
37
|
+
def get_dataset_by_name(self, name: str) -> list[Dataset]:
|
|
38
|
+
"""Get a dataset by name."""
|
|
39
|
+
response = self._client.get(
|
|
40
|
+
f"{self._base_url}/v1/datasets",
|
|
41
|
+
params={"name": name},
|
|
42
|
+
headers=self._headers(),
|
|
43
|
+
)
|
|
44
|
+
if response.status_code != 200:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Error getting dataset: [{response.status_code}] {response.text}"
|
|
47
|
+
)
|
|
48
|
+
return [Dataset.model_validate(dataset) for dataset in response.json()]
|
|
49
|
+
|
|
50
|
+
def push(
|
|
51
|
+
self,
|
|
52
|
+
points: list[Datapoint],
|
|
53
|
+
name: str | None = None,
|
|
54
|
+
id: uuid.UUID | None = None,
|
|
55
|
+
batch_size: int = DEFAULT_DATASET_PUSH_BATCH_SIZE,
|
|
56
|
+
create_dataset: bool = False,
|
|
57
|
+
) -> PushDatapointsResponse | None:
|
|
58
|
+
"""Push data to a dataset."""
|
|
59
|
+
|
|
60
|
+
if name is None and id is None:
|
|
61
|
+
raise ValueError("Either name or id must be provided")
|
|
62
|
+
|
|
63
|
+
if name is not None and id is not None:
|
|
64
|
+
raise ValueError("Only one of name or id must be provided")
|
|
65
|
+
|
|
66
|
+
if create_dataset and name is None:
|
|
67
|
+
raise ValueError("Name must be provided when creating a new dataset")
|
|
68
|
+
|
|
69
|
+
identifier = {"name": name} if name is not None else {"datasetId": id}
|
|
70
|
+
|
|
71
|
+
batch_num = 0
|
|
72
|
+
total_batches = math.ceil(len(points) / batch_size)
|
|
73
|
+
response = None
|
|
74
|
+
for i in range(0, len(points), batch_size):
|
|
75
|
+
batch_num += 1
|
|
76
|
+
logger.debug(f"Pushing batch {batch_num} of {total_batches}")
|
|
77
|
+
batch = points[i : i + batch_size]
|
|
78
|
+
response = self._client.post(
|
|
79
|
+
f"{self._base_url}/v1/datasets/datapoints",
|
|
80
|
+
json={
|
|
81
|
+
**identifier,
|
|
82
|
+
"datapoints": [serialize(point) for point in batch],
|
|
83
|
+
"createDataset": create_dataset,
|
|
84
|
+
},
|
|
85
|
+
headers=self._headers(),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# 201 when creating a new dataset
|
|
89
|
+
if response.status_code not in [200, 201]:
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"Error pushing data to dataset: [{response.status_code}] {response.text}"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
response = PushDatapointsResponse.model_validate(response.json())
|
|
95
|
+
# Currently, the response only contains the dataset ID,
|
|
96
|
+
# so it's safe to return the last response only.
|
|
97
|
+
return response
|
|
98
|
+
|
|
99
|
+
def pull(
|
|
100
|
+
self,
|
|
101
|
+
name: str | None = None,
|
|
102
|
+
id: uuid.UUID | None = None,
|
|
103
|
+
# TODO: move const to one file, import in CLI
|
|
104
|
+
limit: int = DEFAULT_DATASET_PULL_LIMIT,
|
|
105
|
+
offset: int = 0,
|
|
106
|
+
) -> GetDatapointsResponse:
|
|
107
|
+
"""Pull data from a dataset."""
|
|
108
|
+
|
|
109
|
+
if name is None and id is None:
|
|
110
|
+
raise ValueError("Either name or id must be provided")
|
|
111
|
+
|
|
112
|
+
if name is not None and id is not None:
|
|
113
|
+
raise ValueError("Only one of name or id must be provided")
|
|
114
|
+
|
|
115
|
+
identifier = {"name": name} if name is not None else {"datasetId": id}
|
|
116
|
+
|
|
117
|
+
params = {
|
|
118
|
+
**identifier,
|
|
119
|
+
"offset": offset,
|
|
120
|
+
"limit": limit,
|
|
121
|
+
}
|
|
122
|
+
response = self._client.get(
|
|
123
|
+
f"{self._base_url}/v1/datasets/datapoints",
|
|
124
|
+
params=params,
|
|
125
|
+
headers=self._headers(),
|
|
126
|
+
)
|
|
127
|
+
if response.status_code != 200:
|
|
128
|
+
raise ValueError(
|
|
129
|
+
f"Error pulling data from dataset: [{response.status_code}] {response.text}"
|
|
130
|
+
)
|
|
131
|
+
return GetDatapointsResponse.model_validate(response.json())
|
|
@@ -1,23 +1,32 @@
|
|
|
1
1
|
"""Evals resource for interacting with Laminar evaluations API."""
|
|
2
2
|
|
|
3
3
|
import uuid
|
|
4
|
-
import
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
5
6
|
from typing import Any
|
|
6
7
|
|
|
7
8
|
from lmnr.sdk.client.synchronous.resources.base import BaseResource
|
|
9
|
+
from lmnr.sdk.log import get_default_logger
|
|
8
10
|
from lmnr.sdk.types import (
|
|
9
|
-
|
|
11
|
+
GetDatapointsResponse,
|
|
10
12
|
EvaluationResultDatapoint,
|
|
13
|
+
InitEvaluationResponse,
|
|
11
14
|
PartialEvaluationDatapoint,
|
|
12
|
-
GetDatapointsResponse,
|
|
13
15
|
)
|
|
16
|
+
from lmnr.sdk.utils import serialize
|
|
17
|
+
|
|
18
|
+
INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH = 16_000_000 # 16MB
|
|
19
|
+
logger = get_default_logger(__name__)
|
|
14
20
|
|
|
15
21
|
|
|
16
22
|
class Evals(BaseResource):
|
|
17
23
|
"""Resource for interacting with Laminar evaluations API."""
|
|
18
24
|
|
|
19
25
|
def init(
|
|
20
|
-
self,
|
|
26
|
+
self,
|
|
27
|
+
name: str | None = None,
|
|
28
|
+
group_name: str | None = None,
|
|
29
|
+
metadata: dict[str, Any] | None = None,
|
|
21
30
|
) -> InitEvaluationResponse:
|
|
22
31
|
"""Initialize a new evaluation.
|
|
23
32
|
|
|
@@ -53,7 +62,7 @@ class Evals(BaseResource):
|
|
|
53
62
|
) -> uuid.UUID:
|
|
54
63
|
"""
|
|
55
64
|
Create a new evaluation and return its ID.
|
|
56
|
-
|
|
65
|
+
|
|
57
66
|
Parameters:
|
|
58
67
|
name (str | None, optional): Optional name of the evaluation.
|
|
59
68
|
group_name (str | None, optional): An identifier to group evaluations.
|
|
@@ -76,7 +85,7 @@ class Evals(BaseResource):
|
|
|
76
85
|
) -> uuid.UUID:
|
|
77
86
|
"""
|
|
78
87
|
Create a datapoint for an evaluation.
|
|
79
|
-
|
|
88
|
+
|
|
80
89
|
Parameters:
|
|
81
90
|
eval_id (uuid.UUID): The evaluation ID.
|
|
82
91
|
data: The input data for the executor.
|
|
@@ -84,13 +93,13 @@ class Evals(BaseResource):
|
|
|
84
93
|
metadata (dict[str, Any] | None, optional): Optional metadata.
|
|
85
94
|
index (int | None, optional): Optional index of the datapoint.
|
|
86
95
|
trace_id (uuid.UUID | None, optional): Optional trace ID.
|
|
87
|
-
|
|
96
|
+
|
|
88
97
|
Returns:
|
|
89
98
|
uuid.UUID: The datapoint ID.
|
|
90
99
|
"""
|
|
91
|
-
|
|
100
|
+
|
|
92
101
|
datapoint_id = uuid.uuid4()
|
|
93
|
-
|
|
102
|
+
|
|
94
103
|
# Create a minimal datapoint first
|
|
95
104
|
partial_datapoint = PartialEvaluationDatapoint(
|
|
96
105
|
id=datapoint_id,
|
|
@@ -101,7 +110,7 @@ class Evals(BaseResource):
|
|
|
101
110
|
executor_span_id=uuid.uuid4(), # Will be updated when executor runs
|
|
102
111
|
metadata=metadata,
|
|
103
112
|
)
|
|
104
|
-
|
|
113
|
+
|
|
105
114
|
self.save_datapoints(eval_id, [partial_datapoint])
|
|
106
115
|
return datapoint_id
|
|
107
116
|
|
|
@@ -121,16 +130,24 @@ class Evals(BaseResource):
|
|
|
121
130
|
Raises:
|
|
122
131
|
ValueError: If there's an error saving the datapoints.
|
|
123
132
|
"""
|
|
133
|
+
length = INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH
|
|
134
|
+
points = [datapoint.to_dict(max_data_length=length) for datapoint in datapoints]
|
|
124
135
|
response = self._client.post(
|
|
125
136
|
self._base_url + f"/v1/evals/{eval_id}/datapoints",
|
|
126
137
|
json={
|
|
127
|
-
"points":
|
|
138
|
+
"points": points,
|
|
128
139
|
"groupName": group_name,
|
|
129
140
|
},
|
|
130
141
|
headers=self._headers(),
|
|
131
142
|
)
|
|
143
|
+
if response.status_code == 413:
|
|
144
|
+
self._retry_save_datapoints(eval_id, datapoints, group_name)
|
|
145
|
+
return
|
|
146
|
+
|
|
132
147
|
if response.status_code != 200:
|
|
133
|
-
raise ValueError(
|
|
148
|
+
raise ValueError(
|
|
149
|
+
f"Error saving evaluation datapoints: [{response.status_code}] {response.text}"
|
|
150
|
+
)
|
|
134
151
|
|
|
135
152
|
def update_datapoint(
|
|
136
153
|
self,
|
|
@@ -147,11 +164,17 @@ class Evals(BaseResource):
|
|
|
147
164
|
executor_output (Any): The executor output.
|
|
148
165
|
scores (dict[str, float | int] | None, optional): The scores. Defaults to None.
|
|
149
166
|
"""
|
|
150
|
-
|
|
167
|
+
|
|
151
168
|
response = self._client.post(
|
|
152
169
|
self._base_url + f"/v1/evals/{eval_id}/datapoints/{datapoint_id}",
|
|
153
170
|
json={
|
|
154
|
-
"executorOutput":
|
|
171
|
+
"executorOutput": (
|
|
172
|
+
str(serialize(executor_output))[
|
|
173
|
+
:INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH
|
|
174
|
+
]
|
|
175
|
+
if executor_output is not None
|
|
176
|
+
else None
|
|
177
|
+
),
|
|
155
178
|
"scores": scores,
|
|
156
179
|
},
|
|
157
180
|
headers=self._headers(),
|
|
@@ -179,11 +202,18 @@ class Evals(BaseResource):
|
|
|
179
202
|
Raises:
|
|
180
203
|
ValueError: If there's an error fetching the datapoints.
|
|
181
204
|
"""
|
|
205
|
+
|
|
206
|
+
warnings.warn(
|
|
207
|
+
"Use client.datasets.pull instead",
|
|
208
|
+
DeprecationWarning,
|
|
209
|
+
)
|
|
210
|
+
|
|
182
211
|
params = {"name": dataset_name, "offset": offset, "limit": limit}
|
|
183
|
-
|
|
184
|
-
self._base_url + "/v1/datasets/datapoints
|
|
212
|
+
response = self._client.get(
|
|
213
|
+
self._base_url + "/v1/datasets/datapoints",
|
|
214
|
+
params=params,
|
|
215
|
+
headers=self._headers(),
|
|
185
216
|
)
|
|
186
|
-
response = self._client.get(url, headers=self._headers())
|
|
187
217
|
if response.status_code != 200:
|
|
188
218
|
try:
|
|
189
219
|
resp_json = response.json()
|
|
@@ -195,3 +225,39 @@ class Evals(BaseResource):
|
|
|
195
225
|
f"Error fetching datapoints: [{response.status_code}] {response.text}"
|
|
196
226
|
)
|
|
197
227
|
return GetDatapointsResponse.model_validate(response.json())
|
|
228
|
+
|
|
229
|
+
def _retry_save_datapoints(
|
|
230
|
+
self,
|
|
231
|
+
eval_id: uuid.UUID,
|
|
232
|
+
datapoints: list[EvaluationResultDatapoint | PartialEvaluationDatapoint],
|
|
233
|
+
group_name: str | None = None,
|
|
234
|
+
initial_length: int = INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH,
|
|
235
|
+
max_retries: int = 20,
|
|
236
|
+
):
|
|
237
|
+
retry = 0
|
|
238
|
+
length = initial_length
|
|
239
|
+
while retry < max_retries:
|
|
240
|
+
retry += 1
|
|
241
|
+
length = length // 2
|
|
242
|
+
logger.debug(
|
|
243
|
+
f"Retrying save datapoints: {retry} of {max_retries}, length: {length}"
|
|
244
|
+
)
|
|
245
|
+
if length == 0:
|
|
246
|
+
raise ValueError("Error saving evaluation datapoints")
|
|
247
|
+
points = [
|
|
248
|
+
datapoint.to_dict(max_data_length=length) for datapoint in datapoints
|
|
249
|
+
]
|
|
250
|
+
response = self._client.post(
|
|
251
|
+
self._base_url + f"/v1/evals/{eval_id}/datapoints",
|
|
252
|
+
json={
|
|
253
|
+
"points": points,
|
|
254
|
+
"groupName": group_name,
|
|
255
|
+
},
|
|
256
|
+
headers=self._headers(),
|
|
257
|
+
)
|
|
258
|
+
if response.status_code != 413:
|
|
259
|
+
break
|
|
260
|
+
if response.status_code != 200:
|
|
261
|
+
raise ValueError(
|
|
262
|
+
f"Error saving evaluation datapoints: [{response.status_code}] {response.text}"
|
|
263
|
+
)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Evaluators resource for creating evaluator scores."""
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from lmnr.sdk.client.synchronous.resources.base import BaseResource
|
|
7
|
+
from lmnr.sdk.utils import format_id
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Evaluators(BaseResource):
|
|
11
|
+
"""Resource for creating evaluator scores."""
|
|
12
|
+
|
|
13
|
+
def score(
|
|
14
|
+
self,
|
|
15
|
+
*,
|
|
16
|
+
name: str,
|
|
17
|
+
trace_id: str | int | uuid.UUID | None = None,
|
|
18
|
+
span_id: str | int | uuid.UUID | None = None,
|
|
19
|
+
metadata: dict[str, Any] | None = None,
|
|
20
|
+
score: float,
|
|
21
|
+
) -> None:
|
|
22
|
+
"""Create a score for a span.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
name (str): Name of the score
|
|
26
|
+
trace_id (str | int | uuid.UUID | None, optional): The trace ID to score (will be attached to root span)
|
|
27
|
+
span_id (str | int | uuid.UUID | None, optional): The span ID to score
|
|
28
|
+
metadata (dict[str, Any] | None, optional): Additional metadata. Defaults to None.
|
|
29
|
+
score (float): The score value (float)
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
ValueError: If there's an error creating the score.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
Score by trace ID (will attach to root span):
|
|
36
|
+
|
|
37
|
+
>>> laminar_client.evaluators.score(
|
|
38
|
+
... name="quality",
|
|
39
|
+
... trace_id="trace-id-here",
|
|
40
|
+
... score=0.95,
|
|
41
|
+
... metadata={"model": "gpt-4"}
|
|
42
|
+
... )
|
|
43
|
+
|
|
44
|
+
Score by span ID:
|
|
45
|
+
|
|
46
|
+
>>> laminar_client.evaluators.score(
|
|
47
|
+
... name="relevance",
|
|
48
|
+
... span_id="span-id-here",
|
|
49
|
+
... score=0.87
|
|
50
|
+
... )
|
|
51
|
+
"""
|
|
52
|
+
if trace_id is not None and span_id is not None:
|
|
53
|
+
raise ValueError("Cannot provide both trace_id and span_id. Please provide only one.")
|
|
54
|
+
if trace_id is None and span_id is None:
|
|
55
|
+
raise ValueError("Either 'trace_id' or 'span_id' must be provided.")
|
|
56
|
+
|
|
57
|
+
if trace_id is not None:
|
|
58
|
+
formatted_trace_id = format_id(trace_id)
|
|
59
|
+
payload = {
|
|
60
|
+
"name": name,
|
|
61
|
+
"traceId": formatted_trace_id,
|
|
62
|
+
"metadata": metadata,
|
|
63
|
+
"score": score,
|
|
64
|
+
"source": "Code",
|
|
65
|
+
}
|
|
66
|
+
else:
|
|
67
|
+
formatted_span_id = format_id(span_id)
|
|
68
|
+
payload = {
|
|
69
|
+
"name": name,
|
|
70
|
+
"spanId": formatted_span_id,
|
|
71
|
+
"metadata": metadata,
|
|
72
|
+
"score": score,
|
|
73
|
+
"source": "Code",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
response = self._client.post(
|
|
77
|
+
self._base_url + "/v1/evaluators/score",
|
|
78
|
+
json=payload,
|
|
79
|
+
headers=self._headers(),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if response.status_code != 200:
|
|
83
|
+
if response.status_code == 401:
|
|
84
|
+
raise ValueError("Unauthorized. Please check your project API key.")
|
|
85
|
+
raise ValueError(f"Error creating evaluator score: {response.text}")
|
|
@@ -5,6 +5,7 @@ import uuid
|
|
|
5
5
|
|
|
6
6
|
from lmnr.sdk.client.synchronous.resources.base import BaseResource
|
|
7
7
|
from lmnr.sdk.log import get_default_logger
|
|
8
|
+
from lmnr.sdk.utils import format_id
|
|
8
9
|
|
|
9
10
|
logger = get_default_logger(__name__)
|
|
10
11
|
|
|
@@ -54,18 +55,11 @@ class Tags(BaseResource):
|
|
|
54
55
|
```
|
|
55
56
|
"""
|
|
56
57
|
trace_tags = tags if isinstance(tags, list) else [tags]
|
|
57
|
-
|
|
58
|
-
trace_id = str(trace_id)
|
|
59
|
-
elif isinstance(trace_id, int):
|
|
60
|
-
trace_id = str(uuid.UUID(int=trace_id))
|
|
61
|
-
elif isinstance(trace_id, str):
|
|
62
|
-
uuid.UUID(trace_id)
|
|
63
|
-
else:
|
|
64
|
-
raise ValueError(f"Invalid trace id: {trace_id}")
|
|
58
|
+
formatted_trace_id = format_id(trace_id)
|
|
65
59
|
|
|
66
60
|
url = self._base_url + "/v1/tag"
|
|
67
61
|
payload = {
|
|
68
|
-
"traceId":
|
|
62
|
+
"traceId": formatted_trace_id,
|
|
69
63
|
"names": trace_tags,
|
|
70
64
|
}
|
|
71
65
|
response = self._client.post(
|
|
@@ -78,7 +72,7 @@ class Tags(BaseResource):
|
|
|
78
72
|
|
|
79
73
|
if response.status_code == 404:
|
|
80
74
|
logger.warning(
|
|
81
|
-
f"Trace {
|
|
75
|
+
f"Trace {formatted_trace_id} not found. The trace may have not been ended yet."
|
|
82
76
|
)
|
|
83
77
|
return []
|
|
84
78
|
|
|
@@ -8,11 +8,12 @@ from typing import TypeVar
|
|
|
8
8
|
from types import TracebackType
|
|
9
9
|
|
|
10
10
|
from lmnr.sdk.client.synchronous.resources import (
|
|
11
|
-
Agent,
|
|
12
11
|
BrowserEvents,
|
|
13
12
|
Evals,
|
|
13
|
+
Evaluators,
|
|
14
14
|
Tags,
|
|
15
15
|
)
|
|
16
|
+
from lmnr.sdk.client.synchronous.resources.datasets import Datasets
|
|
16
17
|
from lmnr.sdk.utils import from_env
|
|
17
18
|
|
|
18
19
|
_T = TypeVar("_T", bound="LaminarClient")
|
|
@@ -23,10 +24,9 @@ class LaminarClient:
|
|
|
23
24
|
__project_api_key: str
|
|
24
25
|
__client: httpx.Client = None
|
|
25
26
|
|
|
26
|
-
# Resource properties
|
|
27
|
-
__agent: Agent | None = None
|
|
28
27
|
__evals: Evals | None = None
|
|
29
28
|
__tags: Tags | None = None
|
|
29
|
+
__evaluators: Evaluators | None = None
|
|
30
30
|
|
|
31
31
|
def __init__(
|
|
32
32
|
self,
|
|
@@ -69,24 +69,40 @@ class LaminarClient:
|
|
|
69
69
|
self.__client = httpx.Client(
|
|
70
70
|
headers=self._headers(),
|
|
71
71
|
timeout=timeout,
|
|
72
|
+
# Context: If the server responds with a 413, the connection becomes
|
|
73
|
+
# poisoned and freezes on subsequent requests, and there is no way
|
|
74
|
+
# to recover or recycle such connection.
|
|
75
|
+
# Setting max_keepalive_connections to 0 will resolve this, but is
|
|
76
|
+
# less efficient, as it will create a new connection
|
|
77
|
+
# (not client, so still better) for each request.
|
|
78
|
+
#
|
|
79
|
+
# Note: from my experiments with a simple python server, forcing the
|
|
80
|
+
# server to read/consume the request payload from the socket seems
|
|
81
|
+
# to resolve this, but I haven't figured out how to do that in our
|
|
82
|
+
# real actix-web backend server and whether it makes sense to do so.
|
|
83
|
+
#
|
|
84
|
+
# TODO: investigate if there are better ways to fix this rather than
|
|
85
|
+
# setting keepalive_expiry to 0. Other alternative: migrate to
|
|
86
|
+
# requests + aiohttp.
|
|
87
|
+
#
|
|
88
|
+
# limits=httpx.Limits(
|
|
89
|
+
# max_keepalive_connections=0,
|
|
90
|
+
# keepalive_expiry=0,
|
|
91
|
+
# ),
|
|
72
92
|
)
|
|
73
93
|
|
|
74
94
|
# Initialize resource objects
|
|
75
|
-
self.__agent = Agent(self.__client, self.__base_url, self.__project_api_key)
|
|
76
95
|
self.__evals = Evals(self.__client, self.__base_url, self.__project_api_key)
|
|
96
|
+
self.__evaluators = Evaluators(
|
|
97
|
+
self.__client, self.__base_url, self.__project_api_key
|
|
98
|
+
)
|
|
77
99
|
self.__browser_events = BrowserEvents(
|
|
78
100
|
self.__client, self.__base_url, self.__project_api_key
|
|
79
101
|
)
|
|
80
102
|
self.__tags = Tags(self.__client, self.__base_url, self.__project_api_key)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
"""Get the Agent resource.
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
Agent: The Agent resource instance.
|
|
88
|
-
"""
|
|
89
|
-
return self.__agent
|
|
103
|
+
self.__datasets = Datasets(
|
|
104
|
+
self.__client, self.__base_url, self.__project_api_key
|
|
105
|
+
)
|
|
90
106
|
|
|
91
107
|
@property
|
|
92
108
|
def evals(self) -> Evals:
|
|
@@ -115,6 +131,24 @@ class LaminarClient:
|
|
|
115
131
|
"""
|
|
116
132
|
return self.__tags
|
|
117
133
|
|
|
134
|
+
@property
|
|
135
|
+
def evaluators(self) -> Evaluators:
|
|
136
|
+
"""Get the Evaluators resource.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Evaluators: The Evaluators resource instance.
|
|
140
|
+
"""
|
|
141
|
+
return self.__evaluators
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def datasets(self) -> Datasets:
|
|
145
|
+
"""Get the Datasets resource.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Datasets: The Datasets resource instance.
|
|
149
|
+
"""
|
|
150
|
+
return self.__datasets
|
|
151
|
+
|
|
118
152
|
def shutdown(self):
|
|
119
153
|
"""Shutdown the client by closing underlying connections."""
|
|
120
154
|
self.__client.close()
|
|
@@ -155,5 +189,3 @@ class LaminarClient:
|
|
|
155
189
|
"Content-Type": "application/json",
|
|
156
190
|
"Accept": "application/json",
|
|
157
191
|
}
|
|
158
|
-
|
|
159
|
-
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from lmnr.sdk.client.synchronous.sync_client import LaminarClient
|
|
7
|
+
from lmnr.sdk.datasets.file_utils import load_from_paths
|
|
8
|
+
from lmnr.sdk.log import get_default_logger
|
|
9
|
+
from lmnr.sdk.types import Datapoint
|
|
10
|
+
|
|
11
|
+
DEFAULT_FETCH_SIZE = 25
|
|
12
|
+
LOG = get_default_logger(__name__, verbose=False)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class EvaluationDataset(ABC):
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def __init__(self, *args, **kwargs):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def __len__(self) -> int:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def __getitem__(self, idx) -> Datapoint:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def slice(self, start: int, end: int):
|
|
29
|
+
return [self[i] for i in range(max(start, 0), min(end, len(self)))]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class LaminarDataset(EvaluationDataset):
|
|
33
|
+
client: LaminarClient
|
|
34
|
+
id: uuid.UUID | None = None
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
name: str | None = None,
|
|
39
|
+
id: uuid.UUID | None = None,
|
|
40
|
+
fetch_size: int = DEFAULT_FETCH_SIZE,
|
|
41
|
+
):
|
|
42
|
+
self.name = name
|
|
43
|
+
self.id = id
|
|
44
|
+
if name is None and id is None:
|
|
45
|
+
raise ValueError("Either name or id must be provided")
|
|
46
|
+
if name is not None and id is not None:
|
|
47
|
+
raise ValueError("Only one of name or id must be provided")
|
|
48
|
+
self._len = None
|
|
49
|
+
self._fetched_items = []
|
|
50
|
+
self._offset = 0
|
|
51
|
+
self._fetch_size = fetch_size
|
|
52
|
+
self._logger = get_default_logger(self.__class__.__name__)
|
|
53
|
+
|
|
54
|
+
def _fetch_batch(self):
|
|
55
|
+
self._logger.debug(
|
|
56
|
+
f"dataset name: {self.name}, id: {self.id}. Fetching batch from {self._offset} to "
|
|
57
|
+
+ f"{self._offset + self._fetch_size}"
|
|
58
|
+
)
|
|
59
|
+
identifier = {"id": self.id} if self.id is not None else {"name": self.name}
|
|
60
|
+
resp = self.client.datasets.pull(
|
|
61
|
+
**identifier,
|
|
62
|
+
offset=self._offset,
|
|
63
|
+
limit=self._fetch_size,
|
|
64
|
+
)
|
|
65
|
+
self._fetched_items += resp.items
|
|
66
|
+
self._offset = len(self._fetched_items)
|
|
67
|
+
if self._len is None:
|
|
68
|
+
self._len = resp.total_count
|
|
69
|
+
|
|
70
|
+
def __len__(self) -> int:
|
|
71
|
+
if self._len is None:
|
|
72
|
+
self._fetch_batch()
|
|
73
|
+
return self._len
|
|
74
|
+
|
|
75
|
+
def __getitem__(self, idx) -> Datapoint:
|
|
76
|
+
if idx >= len(self._fetched_items):
|
|
77
|
+
self._fetch_batch()
|
|
78
|
+
return self._fetched_items[idx]
|
|
79
|
+
|
|
80
|
+
def set_client(self, client: LaminarClient):
|
|
81
|
+
self.client = client
|
|
82
|
+
|
|
83
|
+
def push(self, paths: str | list[str], recursive: bool = False):
|
|
84
|
+
paths = [paths] if isinstance(paths, str) else paths
|
|
85
|
+
paths = [Path(path) for path in paths]
|
|
86
|
+
data = load_from_paths(paths, recursive)
|
|
87
|
+
if len(data) == 0:
|
|
88
|
+
LOG.warning("No data to push. Skipping")
|
|
89
|
+
return
|
|
90
|
+
identifier = {"id": self.id} if self.id is not None else {"name": self.name}
|
|
91
|
+
self.client.datasets.push(data, **identifier)
|
|
92
|
+
LOG.info(
|
|
93
|
+
f"Successfully pushed {len(data)} datapoints to dataset [{identifier}]"
|
|
94
|
+
)
|