lmnr 0.4.53.dev0__py3-none-any.whl → 0.7.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/__init__.py +32 -11
- lmnr/cli/__init__.py +270 -0
- lmnr/cli/datasets.py +371 -0
- lmnr/cli/evals.py +111 -0
- lmnr/cli/rules.py +42 -0
- lmnr/opentelemetry_lib/__init__.py +70 -0
- lmnr/opentelemetry_lib/decorators/__init__.py +337 -0
- lmnr/opentelemetry_lib/litellm/__init__.py +685 -0
- lmnr/opentelemetry_lib/litellm/utils.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/__init__.py +849 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/config.py +13 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_emitter.py +211 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/span_utils.py +401 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/streaming.py +425 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/utils.py +332 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/anthropic/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/__init__.py +451 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/claude_agent/proxy.py +144 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_agent/__init__.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/__init__.py +476 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/cua_computer/utils.py +12 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/__init__.py +599 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/config.py +9 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/schema_utils.py +26 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/google_genai/utils.py +330 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/__init__.py +488 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/config.py +8 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_emitter.py +143 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/span_utils.py +229 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/utils.py +92 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/groq/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/__init__.py +381 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/kernel/utils.py +36 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/__init__.py +121 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/langgraph/utils.py +60 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/__init__.py +61 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/__init__.py +472 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +1185 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +305 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/config.py +16 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +312 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_emitter.py +100 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/event_models.py +41 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +68 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/utils.py +197 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v0/__init__.py +176 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/__init__.py +368 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/assistant_wrappers.py +325 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/event_handler_wrapper.py +135 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +786 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openai/version.py +1 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/openhands_ai/__init__.py +388 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/opentelemetry/__init__.py +69 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/skyvern/__init__.py +191 -0
- lmnr/opentelemetry_lib/opentelemetry/instrumentation/threading/__init__.py +197 -0
- lmnr/opentelemetry_lib/tracing/__init__.py +263 -0
- lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +516 -0
- lmnr/{openllmetry_sdk → opentelemetry_lib}/tracing/attributes.py +21 -8
- lmnr/opentelemetry_lib/tracing/context.py +200 -0
- lmnr/opentelemetry_lib/tracing/exporter.py +153 -0
- lmnr/opentelemetry_lib/tracing/instruments.py +140 -0
- lmnr/opentelemetry_lib/tracing/processor.py +193 -0
- lmnr/opentelemetry_lib/tracing/span.py +398 -0
- lmnr/opentelemetry_lib/tracing/tracer.py +57 -0
- lmnr/opentelemetry_lib/tracing/utils.py +62 -0
- lmnr/opentelemetry_lib/utils/package_check.py +18 -0
- lmnr/opentelemetry_lib/utils/wrappers.py +11 -0
- lmnr/sdk/browser/__init__.py +0 -0
- lmnr/sdk/browser/background_send_events.py +158 -0
- lmnr/sdk/browser/browser_use_cdp_otel.py +100 -0
- lmnr/sdk/browser/browser_use_otel.py +142 -0
- lmnr/sdk/browser/bubus_otel.py +71 -0
- lmnr/sdk/browser/cdp_utils.py +518 -0
- lmnr/sdk/browser/inject_script.js +514 -0
- lmnr/sdk/browser/patchright_otel.py +151 -0
- lmnr/sdk/browser/playwright_otel.py +322 -0
- lmnr/sdk/browser/pw_utils.py +363 -0
- lmnr/sdk/browser/recorder/record.umd.min.cjs +84 -0
- lmnr/sdk/browser/utils.py +70 -0
- lmnr/sdk/client/asynchronous/async_client.py +180 -0
- lmnr/sdk/client/asynchronous/resources/__init__.py +6 -0
- lmnr/sdk/client/asynchronous/resources/base.py +32 -0
- lmnr/sdk/client/asynchronous/resources/browser_events.py +41 -0
- lmnr/sdk/client/asynchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/asynchronous/resources/evals.py +266 -0
- lmnr/sdk/client/asynchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/asynchronous/resources/tags.py +83 -0
- lmnr/sdk/client/synchronous/resources/__init__.py +6 -0
- lmnr/sdk/client/synchronous/resources/base.py +32 -0
- lmnr/sdk/client/synchronous/resources/browser_events.py +40 -0
- lmnr/sdk/client/synchronous/resources/datasets.py +131 -0
- lmnr/sdk/client/synchronous/resources/evals.py +263 -0
- lmnr/sdk/client/synchronous/resources/evaluators.py +85 -0
- lmnr/sdk/client/synchronous/resources/tags.py +83 -0
- lmnr/sdk/client/synchronous/sync_client.py +191 -0
- lmnr/sdk/datasets/__init__.py +94 -0
- lmnr/sdk/datasets/file_utils.py +91 -0
- lmnr/sdk/decorators.py +163 -26
- lmnr/sdk/eval_control.py +3 -2
- lmnr/sdk/evaluations.py +403 -191
- lmnr/sdk/laminar.py +1080 -549
- lmnr/sdk/log.py +7 -2
- lmnr/sdk/types.py +246 -134
- lmnr/sdk/utils.py +151 -7
- lmnr/version.py +46 -0
- {lmnr-0.4.53.dev0.dist-info → lmnr-0.7.26.dist-info}/METADATA +152 -106
- lmnr-0.7.26.dist-info/RECORD +116 -0
- lmnr-0.7.26.dist-info/WHEEL +4 -0
- lmnr-0.7.26.dist-info/entry_points.txt +3 -0
- lmnr/cli.py +0 -101
- lmnr/openllmetry_sdk/.python-version +0 -1
- lmnr/openllmetry_sdk/__init__.py +0 -72
- lmnr/openllmetry_sdk/config/__init__.py +0 -9
- lmnr/openllmetry_sdk/decorators/base.py +0 -185
- lmnr/openllmetry_sdk/instruments.py +0 -38
- lmnr/openllmetry_sdk/tracing/__init__.py +0 -1
- lmnr/openllmetry_sdk/tracing/content_allow_list.py +0 -24
- lmnr/openllmetry_sdk/tracing/context_manager.py +0 -13
- lmnr/openllmetry_sdk/tracing/tracing.py +0 -884
- lmnr/openllmetry_sdk/utils/in_memory_span_exporter.py +0 -61
- lmnr/openllmetry_sdk/utils/package_check.py +0 -7
- lmnr/openllmetry_sdk/version.py +0 -1
- lmnr/sdk/datasets.py +0 -55
- lmnr-0.4.53.dev0.dist-info/LICENSE +0 -75
- lmnr-0.4.53.dev0.dist-info/RECORD +0 -33
- lmnr-0.4.53.dev0.dist-info/WHEEL +0 -4
- lmnr-0.4.53.dev0.dist-info/entry_points.txt +0 -3
- /lmnr/{openllmetry_sdk → opentelemetry_lib}/.flake8 +0 -0
- /lmnr/{openllmetry_sdk → opentelemetry_lib}/utils/__init__.py +0 -0
- /lmnr/{openllmetry_sdk → opentelemetry_lib}/utils/json_encoder.py +0 -0
- /lmnr/{openllmetry_sdk/decorators/__init__.py → py.typed} +0 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"""Evals resource for interacting with Laminar evaluations API."""
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from lmnr.sdk.client.synchronous.resources.base import BaseResource
|
|
9
|
+
from lmnr.sdk.log import get_default_logger
|
|
10
|
+
from lmnr.sdk.types import (
|
|
11
|
+
GetDatapointsResponse,
|
|
12
|
+
EvaluationResultDatapoint,
|
|
13
|
+
InitEvaluationResponse,
|
|
14
|
+
PartialEvaluationDatapoint,
|
|
15
|
+
)
|
|
16
|
+
from lmnr.sdk.utils import serialize
|
|
17
|
+
|
|
18
|
+
INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH = 16_000_000 # 16MB
|
|
19
|
+
logger = get_default_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Evals(BaseResource):
|
|
23
|
+
"""Resource for interacting with Laminar evaluations API."""
|
|
24
|
+
|
|
25
|
+
def init(
|
|
26
|
+
self,
|
|
27
|
+
name: str | None = None,
|
|
28
|
+
group_name: str | None = None,
|
|
29
|
+
metadata: dict[str, Any] | None = None,
|
|
30
|
+
) -> InitEvaluationResponse:
|
|
31
|
+
"""Initialize a new evaluation.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
name (str | None, optional): Name of the evaluation. Defaults to None.
|
|
35
|
+
group_name (str | None, optional): Group name for the evaluation. Defaults to None.
|
|
36
|
+
metadata (dict[str, Any] | None, optional): Metadata to associate with. Defaults to None.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
InitEvaluationResponse: The response from the initialization request.
|
|
40
|
+
"""
|
|
41
|
+
response = self._client.post(
|
|
42
|
+
self._base_url + "/v1/evals",
|
|
43
|
+
json={
|
|
44
|
+
"name": name,
|
|
45
|
+
"groupName": group_name,
|
|
46
|
+
"metadata": metadata,
|
|
47
|
+
},
|
|
48
|
+
headers=self._headers(),
|
|
49
|
+
)
|
|
50
|
+
if response.status_code != 200:
|
|
51
|
+
if response.status_code == 401:
|
|
52
|
+
raise ValueError("Unauthorized. Please check your project API key.")
|
|
53
|
+
raise ValueError(f"Error initializing evaluation: {response.text}")
|
|
54
|
+
resp_json = response.json()
|
|
55
|
+
return InitEvaluationResponse.model_validate(resp_json)
|
|
56
|
+
|
|
57
|
+
def create_evaluation(
|
|
58
|
+
self,
|
|
59
|
+
name: str | None = None,
|
|
60
|
+
group_name: str | None = None,
|
|
61
|
+
metadata: dict[str, Any] | None = None,
|
|
62
|
+
) -> uuid.UUID:
|
|
63
|
+
"""
|
|
64
|
+
Create a new evaluation and return its ID.
|
|
65
|
+
|
|
66
|
+
Parameters:
|
|
67
|
+
name (str | None, optional): Optional name of the evaluation.
|
|
68
|
+
group_name (str | None, optional): An identifier to group evaluations.
|
|
69
|
+
metadata (dict[str, Any] | None, optional): Metadata to associate with. Defaults to None.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
uuid.UUID: The evaluation ID.
|
|
73
|
+
"""
|
|
74
|
+
evaluation = self.init(name=name, group_name=group_name, metadata=metadata)
|
|
75
|
+
return evaluation.id
|
|
76
|
+
|
|
77
|
+
def create_datapoint(
|
|
78
|
+
self,
|
|
79
|
+
eval_id: uuid.UUID,
|
|
80
|
+
data: Any,
|
|
81
|
+
target: Any = None,
|
|
82
|
+
metadata: dict[str, Any] | None = None,
|
|
83
|
+
index: int | None = None,
|
|
84
|
+
trace_id: uuid.UUID | None = None,
|
|
85
|
+
) -> uuid.UUID:
|
|
86
|
+
"""
|
|
87
|
+
Create a datapoint for an evaluation.
|
|
88
|
+
|
|
89
|
+
Parameters:
|
|
90
|
+
eval_id (uuid.UUID): The evaluation ID.
|
|
91
|
+
data: The input data for the executor.
|
|
92
|
+
target: The target/expected output for evaluators.
|
|
93
|
+
metadata (dict[str, Any] | None, optional): Optional metadata.
|
|
94
|
+
index (int | None, optional): Optional index of the datapoint.
|
|
95
|
+
trace_id (uuid.UUID | None, optional): Optional trace ID.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
uuid.UUID: The datapoint ID.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
datapoint_id = uuid.uuid4()
|
|
102
|
+
|
|
103
|
+
# Create a minimal datapoint first
|
|
104
|
+
partial_datapoint = PartialEvaluationDatapoint(
|
|
105
|
+
id=datapoint_id,
|
|
106
|
+
data=data,
|
|
107
|
+
target=target,
|
|
108
|
+
index=index or 0,
|
|
109
|
+
trace_id=trace_id or uuid.uuid4(),
|
|
110
|
+
executor_span_id=uuid.uuid4(), # Will be updated when executor runs
|
|
111
|
+
metadata=metadata,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
self.save_datapoints(eval_id, [partial_datapoint])
|
|
115
|
+
return datapoint_id
|
|
116
|
+
|
|
117
|
+
def save_datapoints(
|
|
118
|
+
self,
|
|
119
|
+
eval_id: uuid.UUID,
|
|
120
|
+
datapoints: list[EvaluationResultDatapoint | PartialEvaluationDatapoint],
|
|
121
|
+
group_name: str | None = None,
|
|
122
|
+
):
|
|
123
|
+
"""Save evaluation datapoints.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
eval_id (uuid.UUID): The evaluation ID.
|
|
127
|
+
datapoints (list[EvaluationResultDatapoint | PartialEvaluationDatapoint]): The datapoints to save.
|
|
128
|
+
group_name (str | None, optional): Group name for the datapoints. Defaults to None.
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
ValueError: If there's an error saving the datapoints.
|
|
132
|
+
"""
|
|
133
|
+
length = INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH
|
|
134
|
+
points = [datapoint.to_dict(max_data_length=length) for datapoint in datapoints]
|
|
135
|
+
response = self._client.post(
|
|
136
|
+
self._base_url + f"/v1/evals/{eval_id}/datapoints",
|
|
137
|
+
json={
|
|
138
|
+
"points": points,
|
|
139
|
+
"groupName": group_name,
|
|
140
|
+
},
|
|
141
|
+
headers=self._headers(),
|
|
142
|
+
)
|
|
143
|
+
if response.status_code == 413:
|
|
144
|
+
self._retry_save_datapoints(eval_id, datapoints, group_name)
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
if response.status_code != 200:
|
|
148
|
+
raise ValueError(
|
|
149
|
+
f"Error saving evaluation datapoints: [{response.status_code}] {response.text}"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def update_datapoint(
|
|
153
|
+
self,
|
|
154
|
+
eval_id: uuid.UUID,
|
|
155
|
+
datapoint_id: uuid.UUID,
|
|
156
|
+
scores: dict[str, float | int],
|
|
157
|
+
executor_output: Any | None = None,
|
|
158
|
+
) -> None:
|
|
159
|
+
"""Update a datapoint with evaluation results.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
eval_id (uuid.UUID): The evaluation ID.
|
|
163
|
+
datapoint_id (uuid.UUID): The datapoint ID.
|
|
164
|
+
executor_output (Any): The executor output.
|
|
165
|
+
scores (dict[str, float | int] | None, optional): The scores. Defaults to None.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
response = self._client.post(
|
|
169
|
+
self._base_url + f"/v1/evals/{eval_id}/datapoints/{datapoint_id}",
|
|
170
|
+
json={
|
|
171
|
+
"executorOutput": (
|
|
172
|
+
str(serialize(executor_output))[
|
|
173
|
+
:INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH
|
|
174
|
+
]
|
|
175
|
+
if executor_output is not None
|
|
176
|
+
else None
|
|
177
|
+
),
|
|
178
|
+
"scores": scores,
|
|
179
|
+
},
|
|
180
|
+
headers=self._headers(),
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
if response.status_code != 200:
|
|
184
|
+
raise ValueError(f"Error updating evaluation datapoint: {response.text}")
|
|
185
|
+
|
|
186
|
+
def get_datapoints(
|
|
187
|
+
self,
|
|
188
|
+
dataset_name: str,
|
|
189
|
+
offset: int,
|
|
190
|
+
limit: int,
|
|
191
|
+
) -> GetDatapointsResponse:
|
|
192
|
+
"""Get datapoints from a dataset.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
dataset_name (str): The name of the dataset.
|
|
196
|
+
offset (int): The offset to start from.
|
|
197
|
+
limit (int): The maximum number of datapoints to return.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
GetDatapointsResponse: The response containing the datapoints.
|
|
201
|
+
|
|
202
|
+
Raises:
|
|
203
|
+
ValueError: If there's an error fetching the datapoints.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
warnings.warn(
|
|
207
|
+
"Use client.datasets.pull instead",
|
|
208
|
+
DeprecationWarning,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
params = {"name": dataset_name, "offset": offset, "limit": limit}
|
|
212
|
+
response = self._client.get(
|
|
213
|
+
self._base_url + "/v1/datasets/datapoints",
|
|
214
|
+
params=params,
|
|
215
|
+
headers=self._headers(),
|
|
216
|
+
)
|
|
217
|
+
if response.status_code != 200:
|
|
218
|
+
try:
|
|
219
|
+
resp_json = response.json()
|
|
220
|
+
raise ValueError(
|
|
221
|
+
f"Error fetching datapoints: [{response.status_code}] {resp_json}"
|
|
222
|
+
)
|
|
223
|
+
except Exception:
|
|
224
|
+
raise ValueError(
|
|
225
|
+
f"Error fetching datapoints: [{response.status_code}] {response.text}"
|
|
226
|
+
)
|
|
227
|
+
return GetDatapointsResponse.model_validate(response.json())
|
|
228
|
+
|
|
229
|
+
def _retry_save_datapoints(
|
|
230
|
+
self,
|
|
231
|
+
eval_id: uuid.UUID,
|
|
232
|
+
datapoints: list[EvaluationResultDatapoint | PartialEvaluationDatapoint],
|
|
233
|
+
group_name: str | None = None,
|
|
234
|
+
initial_length: int = INITIAL_EVALUATION_DATAPOINT_MAX_DATA_LENGTH,
|
|
235
|
+
max_retries: int = 20,
|
|
236
|
+
):
|
|
237
|
+
retry = 0
|
|
238
|
+
length = initial_length
|
|
239
|
+
while retry < max_retries:
|
|
240
|
+
retry += 1
|
|
241
|
+
length = length // 2
|
|
242
|
+
logger.debug(
|
|
243
|
+
f"Retrying save datapoints: {retry} of {max_retries}, length: {length}"
|
|
244
|
+
)
|
|
245
|
+
if length == 0:
|
|
246
|
+
raise ValueError("Error saving evaluation datapoints")
|
|
247
|
+
points = [
|
|
248
|
+
datapoint.to_dict(max_data_length=length) for datapoint in datapoints
|
|
249
|
+
]
|
|
250
|
+
response = self._client.post(
|
|
251
|
+
self._base_url + f"/v1/evals/{eval_id}/datapoints",
|
|
252
|
+
json={
|
|
253
|
+
"points": points,
|
|
254
|
+
"groupName": group_name,
|
|
255
|
+
},
|
|
256
|
+
headers=self._headers(),
|
|
257
|
+
)
|
|
258
|
+
if response.status_code != 413:
|
|
259
|
+
break
|
|
260
|
+
if response.status_code != 200:
|
|
261
|
+
raise ValueError(
|
|
262
|
+
f"Error saving evaluation datapoints: [{response.status_code}] {response.text}"
|
|
263
|
+
)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Evaluators resource for creating evaluator scores."""
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from lmnr.sdk.client.synchronous.resources.base import BaseResource
|
|
7
|
+
from lmnr.sdk.utils import format_id
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Evaluators(BaseResource):
|
|
11
|
+
"""Resource for creating evaluator scores."""
|
|
12
|
+
|
|
13
|
+
def score(
|
|
14
|
+
self,
|
|
15
|
+
*,
|
|
16
|
+
name: str,
|
|
17
|
+
trace_id: str | int | uuid.UUID | None = None,
|
|
18
|
+
span_id: str | int | uuid.UUID | None = None,
|
|
19
|
+
metadata: dict[str, Any] | None = None,
|
|
20
|
+
score: float,
|
|
21
|
+
) -> None:
|
|
22
|
+
"""Create a score for a span.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
name (str): Name of the score
|
|
26
|
+
trace_id (str | int | uuid.UUID | None, optional): The trace ID to score (will be attached to root span)
|
|
27
|
+
span_id (str | int | uuid.UUID | None, optional): The span ID to score
|
|
28
|
+
metadata (dict[str, Any] | None, optional): Additional metadata. Defaults to None.
|
|
29
|
+
score (float): The score value (float)
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
ValueError: If there's an error creating the score.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
Score by trace ID (will attach to root span):
|
|
36
|
+
|
|
37
|
+
>>> laminar_client.evaluators.score(
|
|
38
|
+
... name="quality",
|
|
39
|
+
... trace_id="trace-id-here",
|
|
40
|
+
... score=0.95,
|
|
41
|
+
... metadata={"model": "gpt-4"}
|
|
42
|
+
... )
|
|
43
|
+
|
|
44
|
+
Score by span ID:
|
|
45
|
+
|
|
46
|
+
>>> laminar_client.evaluators.score(
|
|
47
|
+
... name="relevance",
|
|
48
|
+
... span_id="span-id-here",
|
|
49
|
+
... score=0.87
|
|
50
|
+
... )
|
|
51
|
+
"""
|
|
52
|
+
if trace_id is not None and span_id is not None:
|
|
53
|
+
raise ValueError("Cannot provide both trace_id and span_id. Please provide only one.")
|
|
54
|
+
if trace_id is None and span_id is None:
|
|
55
|
+
raise ValueError("Either 'trace_id' or 'span_id' must be provided.")
|
|
56
|
+
|
|
57
|
+
if trace_id is not None:
|
|
58
|
+
formatted_trace_id = format_id(trace_id)
|
|
59
|
+
payload = {
|
|
60
|
+
"name": name,
|
|
61
|
+
"traceId": formatted_trace_id,
|
|
62
|
+
"metadata": metadata,
|
|
63
|
+
"score": score,
|
|
64
|
+
"source": "Code",
|
|
65
|
+
}
|
|
66
|
+
else:
|
|
67
|
+
formatted_span_id = format_id(span_id)
|
|
68
|
+
payload = {
|
|
69
|
+
"name": name,
|
|
70
|
+
"spanId": formatted_span_id,
|
|
71
|
+
"metadata": metadata,
|
|
72
|
+
"score": score,
|
|
73
|
+
"source": "Code",
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
response = self._client.post(
|
|
77
|
+
self._base_url + "/v1/evaluators/score",
|
|
78
|
+
json=payload,
|
|
79
|
+
headers=self._headers(),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if response.status_code != 200:
|
|
83
|
+
if response.status_code == 401:
|
|
84
|
+
raise ValueError("Unauthorized. Please check your project API key.")
|
|
85
|
+
raise ValueError(f"Error creating evaluator score: {response.text}")
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Resource for tagging traces."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from lmnr.sdk.client.synchronous.resources.base import BaseResource
|
|
7
|
+
from lmnr.sdk.log import get_default_logger
|
|
8
|
+
from lmnr.sdk.utils import format_id
|
|
9
|
+
|
|
10
|
+
logger = get_default_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Tags(BaseResource):
|
|
14
|
+
"""Resource for tagging traces."""
|
|
15
|
+
|
|
16
|
+
def tag(
|
|
17
|
+
self,
|
|
18
|
+
trace_id: str | int | uuid.UUID,
|
|
19
|
+
tags: list[str] | str,
|
|
20
|
+
):
|
|
21
|
+
"""Tag a trace with a list of tags. Note that the trace must be ended
|
|
22
|
+
before tagging it. You may want to call `Laminar.flush()` after the
|
|
23
|
+
trace that you want to tag.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
trace_id (str | int | uuid.UUID): The trace id to tag.
|
|
27
|
+
tags (list[str] | str): The tag or list of tags to add to the trace.
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
ValueError: If the trace id is not a valid UUID.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
list[dict]: The response from the server.
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
```python
|
|
37
|
+
from lmnr import Laminar, LaminarClient, observe
|
|
38
|
+
|
|
39
|
+
Laminar.initialize()
|
|
40
|
+
client = LaminarClient()
|
|
41
|
+
trace_id = None
|
|
42
|
+
|
|
43
|
+
@observe()
|
|
44
|
+
def foo():
|
|
45
|
+
trace_id = Laminar.get_trace_id()
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
# make sure `foo` is called outside a trace context
|
|
49
|
+
foo()
|
|
50
|
+
|
|
51
|
+
# or make sure the trace is ended by this point
|
|
52
|
+
Laminar.flush()
|
|
53
|
+
|
|
54
|
+
client.tags.tag(trace_id, "my_tag")
|
|
55
|
+
```
|
|
56
|
+
"""
|
|
57
|
+
trace_tags = tags if isinstance(tags, list) else [tags]
|
|
58
|
+
formatted_trace_id = format_id(trace_id)
|
|
59
|
+
|
|
60
|
+
url = self._base_url + "/v1/tag"
|
|
61
|
+
payload = {
|
|
62
|
+
"traceId": formatted_trace_id,
|
|
63
|
+
"names": trace_tags,
|
|
64
|
+
}
|
|
65
|
+
response = self._client.post(
|
|
66
|
+
url,
|
|
67
|
+
content=json.dumps(payload),
|
|
68
|
+
headers={
|
|
69
|
+
**self._headers(),
|
|
70
|
+
},
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
if response.status_code == 404:
|
|
74
|
+
logger.warning(
|
|
75
|
+
f"Trace {formatted_trace_id} not found. The trace may have not been ended yet."
|
|
76
|
+
)
|
|
77
|
+
return []
|
|
78
|
+
|
|
79
|
+
if response.status_code != 200:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
f"Failed to tag trace: [{response.status_code}] {response.text}"
|
|
82
|
+
)
|
|
83
|
+
return response.json()
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Laminar HTTP client. Used to send data to/from the Laminar API.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
import re
|
|
7
|
+
from typing import TypeVar
|
|
8
|
+
from types import TracebackType
|
|
9
|
+
|
|
10
|
+
from lmnr.sdk.client.synchronous.resources import (
|
|
11
|
+
BrowserEvents,
|
|
12
|
+
Evals,
|
|
13
|
+
Evaluators,
|
|
14
|
+
Tags,
|
|
15
|
+
)
|
|
16
|
+
from lmnr.sdk.client.synchronous.resources.datasets import Datasets
|
|
17
|
+
from lmnr.sdk.utils import from_env
|
|
18
|
+
|
|
19
|
+
_T = TypeVar("_T", bound="LaminarClient")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LaminarClient:
|
|
23
|
+
__base_url: str
|
|
24
|
+
__project_api_key: str
|
|
25
|
+
__client: httpx.Client = None
|
|
26
|
+
|
|
27
|
+
__evals: Evals | None = None
|
|
28
|
+
__tags: Tags | None = None
|
|
29
|
+
__evaluators: Evaluators | None = None
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
base_url: str | None = None,
|
|
34
|
+
project_api_key: str | None = None,
|
|
35
|
+
port: int | None = None,
|
|
36
|
+
timeout: int = 3600,
|
|
37
|
+
):
|
|
38
|
+
"""Initializer for the Laminar HTTP client.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
base_url (str | None): base URL of the Laminar API. If not\
|
|
42
|
+
provided, the LMNR_BASE_URL environment variable is used or we\
|
|
43
|
+
default to "https://api.lmnr.ai".
|
|
44
|
+
project_api_key (str | None): Laminar project API key. If not\
|
|
45
|
+
provided, the LMNR_PROJECT_API_KEY environment variable is used.
|
|
46
|
+
port (int | None, optional): port of the Laminar API HTTP server.\
|
|
47
|
+
Overrides any port in the base URL.
|
|
48
|
+
Defaults to None. If none is provided, the default port (443) will
|
|
49
|
+
be used.
|
|
50
|
+
timeout (int, optional): global timeout seconds for the HTTP client.\
|
|
51
|
+
Applied to all httpx operations, i.e. connect, read, get_from_pool, etc.
|
|
52
|
+
Defaults to 3600.
|
|
53
|
+
"""
|
|
54
|
+
# If port is already in the base URL, use it as is
|
|
55
|
+
base_url = base_url or from_env("LMNR_BASE_URL") or "https://api.lmnr.ai"
|
|
56
|
+
if match := re.search(r":(\d{1,5})$", base_url):
|
|
57
|
+
base_url = base_url[: -len(match.group(0))]
|
|
58
|
+
if port is None:
|
|
59
|
+
port = int(match.group(1))
|
|
60
|
+
|
|
61
|
+
base_url = base_url.rstrip("/")
|
|
62
|
+
self.__base_url = f"{base_url}:{port or 443}"
|
|
63
|
+
self.__project_api_key = project_api_key or from_env("LMNR_PROJECT_API_KEY")
|
|
64
|
+
if not self.__project_api_key:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
"Project API key is not set. Please set the LMNR_PROJECT_API_KEY environment "
|
|
67
|
+
"variable or pass project_api_key to the initializer."
|
|
68
|
+
)
|
|
69
|
+
self.__client = httpx.Client(
|
|
70
|
+
headers=self._headers(),
|
|
71
|
+
timeout=timeout,
|
|
72
|
+
# Context: If the server responds with a 413, the connection becomes
|
|
73
|
+
# poisoned and freezes on subsequent requests, and there is no way
|
|
74
|
+
# to recover or recycle such connection.
|
|
75
|
+
# Setting max_keepalive_connections to 0 will resolve this, but is
|
|
76
|
+
# less efficient, as it will create a new connection
|
|
77
|
+
# (not client, so still better) for each request.
|
|
78
|
+
#
|
|
79
|
+
# Note: from my experiments with a simple python server, forcing the
|
|
80
|
+
# server to read/consume the request payload from the socket seems
|
|
81
|
+
# to resolve this, but I haven't figured out how to do that in our
|
|
82
|
+
# real actix-web backend server and whether it makes sense to do so.
|
|
83
|
+
#
|
|
84
|
+
# TODO: investigate if there are better ways to fix this rather than
|
|
85
|
+
# setting keepalive_expiry to 0. Other alternative: migrate to
|
|
86
|
+
# requests + aiohttp.
|
|
87
|
+
#
|
|
88
|
+
# limits=httpx.Limits(
|
|
89
|
+
# max_keepalive_connections=0,
|
|
90
|
+
# keepalive_expiry=0,
|
|
91
|
+
# ),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Initialize resource objects
|
|
95
|
+
self.__evals = Evals(self.__client, self.__base_url, self.__project_api_key)
|
|
96
|
+
self.__evaluators = Evaluators(
|
|
97
|
+
self.__client, self.__base_url, self.__project_api_key
|
|
98
|
+
)
|
|
99
|
+
self.__browser_events = BrowserEvents(
|
|
100
|
+
self.__client, self.__base_url, self.__project_api_key
|
|
101
|
+
)
|
|
102
|
+
self.__tags = Tags(self.__client, self.__base_url, self.__project_api_key)
|
|
103
|
+
self.__datasets = Datasets(
|
|
104
|
+
self.__client, self.__base_url, self.__project_api_key
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def evals(self) -> Evals:
|
|
109
|
+
"""Get the Evals resource.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Evals: The Evals resource instance.
|
|
113
|
+
"""
|
|
114
|
+
return self.__evals
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def _browser_events(self) -> BrowserEvents:
|
|
118
|
+
"""Get the BrowserEvents resource.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
BrowserEvents: The BrowserEvents resource instance.
|
|
122
|
+
"""
|
|
123
|
+
return self.__browser_events
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def tags(self) -> Tags:
|
|
127
|
+
"""Get the Tags resource.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Tags: The Tags resource instance.
|
|
131
|
+
"""
|
|
132
|
+
return self.__tags
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def evaluators(self) -> Evaluators:
|
|
136
|
+
"""Get the Evaluators resource.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Evaluators: The Evaluators resource instance.
|
|
140
|
+
"""
|
|
141
|
+
return self.__evaluators
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def datasets(self) -> Datasets:
|
|
145
|
+
"""Get the Datasets resource.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Datasets: The Datasets resource instance.
|
|
149
|
+
"""
|
|
150
|
+
return self.__datasets
|
|
151
|
+
|
|
152
|
+
def shutdown(self):
|
|
153
|
+
"""Shutdown the client by closing underlying connections."""
|
|
154
|
+
self.__client.close()
|
|
155
|
+
|
|
156
|
+
def is_closed(self) -> bool:
|
|
157
|
+
"""Check if the client is closed.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
bool: True if the client is closed, False otherwise.
|
|
161
|
+
"""
|
|
162
|
+
return self.__client.is_closed
|
|
163
|
+
|
|
164
|
+
def close(self) -> None:
|
|
165
|
+
"""Close the underlying HTTPX client.
|
|
166
|
+
|
|
167
|
+
The client will *not* be usable after this.
|
|
168
|
+
"""
|
|
169
|
+
# If an error is thrown while constructing a client, self._client
|
|
170
|
+
# may not be present
|
|
171
|
+
if hasattr(self, "__client"):
|
|
172
|
+
self.__client.close()
|
|
173
|
+
|
|
174
|
+
def __enter__(self: _T) -> _T:
|
|
175
|
+
return self
|
|
176
|
+
|
|
177
|
+
def __exit__(
|
|
178
|
+
self,
|
|
179
|
+
exc_type: type[BaseException] | None,
|
|
180
|
+
exc: BaseException | None,
|
|
181
|
+
exc_tb: TracebackType | None,
|
|
182
|
+
) -> None:
|
|
183
|
+
self.close()
|
|
184
|
+
|
|
185
|
+
def _headers(self) -> dict[str, str]:
|
|
186
|
+
assert self.__project_api_key is not None, "Project API key is not set"
|
|
187
|
+
return {
|
|
188
|
+
"Authorization": "Bearer " + self.__project_api_key,
|
|
189
|
+
"Content-Type": "application/json",
|
|
190
|
+
"Accept": "application/json",
|
|
191
|
+
}
|