lmnr 0.4.12b3__py3-none-any.whl → 0.4.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/sdk/decorators.py +3 -2
- lmnr/sdk/evaluations.py +94 -63
- lmnr/sdk/laminar.py +46 -43
- lmnr/sdk/types.py +44 -7
- lmnr/sdk/utils.py +4 -5
- lmnr/traceloop_sdk/__init__.py +3 -29
- lmnr/traceloop_sdk/config/__init__.py +0 -4
- lmnr/traceloop_sdk/decorators/base.py +29 -12
- lmnr/traceloop_sdk/tracing/attributes.py +9 -0
- lmnr/traceloop_sdk/tracing/tracing.py +45 -142
- {lmnr-0.4.12b3.dist-info → lmnr-0.4.13.dist-info}/METADATA +73 -101
- {lmnr-0.4.12b3.dist-info → lmnr-0.4.13.dist-info}/RECORD +15 -17
- lmnr/traceloop_sdk/metrics/__init__.py +0 -0
- lmnr/traceloop_sdk/metrics/metrics.py +0 -176
- lmnr/traceloop_sdk/tracing/manual.py +0 -57
- {lmnr-0.4.12b3.dist-info → lmnr-0.4.13.dist-info}/LICENSE +0 -0
- {lmnr-0.4.12b3.dist-info → lmnr-0.4.13.dist-info}/WHEEL +0 -0
- {lmnr-0.4.12b3.dist-info → lmnr-0.4.13.dist-info}/entry_points.txt +0 -0
lmnr/sdk/decorators.py
CHANGED
@@ -6,6 +6,7 @@ from opentelemetry.trace import INVALID_SPAN, get_current_span
|
|
6
6
|
|
7
7
|
from typing import Callable, Optional, cast
|
8
8
|
|
9
|
+
from lmnr.traceloop_sdk.tracing.attributes import SESSION_ID, USER_ID
|
9
10
|
from lmnr.traceloop_sdk.tracing.tracing import update_association_properties
|
10
11
|
|
11
12
|
from .utils import is_async
|
@@ -43,11 +44,11 @@ def observe(
|
|
43
44
|
if current_span != INVALID_SPAN:
|
44
45
|
if session_id is not None:
|
45
46
|
current_span.set_attribute(
|
46
|
-
|
47
|
+
SESSION_ID, session_id
|
47
48
|
)
|
48
49
|
if user_id is not None:
|
49
50
|
current_span.set_attribute(
|
50
|
-
|
51
|
+
USER_ID, user_id
|
51
52
|
)
|
52
53
|
association_properties = {}
|
53
54
|
if session_id is not None:
|
lmnr/sdk/evaluations.py
CHANGED
@@ -2,12 +2,25 @@ import asyncio
|
|
2
2
|
import sys
|
3
3
|
from abc import ABC, abstractmethod
|
4
4
|
from contextlib import contextmanager
|
5
|
-
from typing import Any, Awaitable, Optional, Union
|
5
|
+
from typing import Any, Awaitable, Optional, Set, Union
|
6
|
+
import uuid
|
6
7
|
|
7
8
|
from tqdm import tqdm
|
8
9
|
|
10
|
+
from ..traceloop_sdk.instruments import Instruments
|
11
|
+
from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
|
12
|
+
|
9
13
|
from .laminar import Laminar as L
|
10
|
-
from .types import
|
14
|
+
from .types import (
|
15
|
+
Datapoint,
|
16
|
+
EvaluationResultDatapoint,
|
17
|
+
EvaluatorFunction,
|
18
|
+
ExecutorFunction,
|
19
|
+
Numeric,
|
20
|
+
NumericTypes,
|
21
|
+
SpanType,
|
22
|
+
TraceType,
|
23
|
+
)
|
11
24
|
from .utils import is_async
|
12
25
|
|
13
26
|
DEFAULT_BATCH_SIZE = 5
|
@@ -39,7 +52,11 @@ class EvaluationReporter:
|
|
39
52
|
def start(self, name: str, project_id: str, id: str, length: int):
|
40
53
|
print(f"Running evaluation {name}...\n")
|
41
54
|
print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
|
42
|
-
self.cli_progress = tqdm(
|
55
|
+
self.cli_progress = tqdm(
|
56
|
+
total=length,
|
57
|
+
bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
|
58
|
+
ncols=60,
|
59
|
+
)
|
43
60
|
|
44
61
|
def update(self, batch_length: int):
|
45
62
|
self.cli_progress.update(batch_length)
|
@@ -51,7 +68,7 @@ class EvaluationReporter:
|
|
51
68
|
def stop(self, average_scores: dict[str, Numeric]):
|
52
69
|
self.cli_progress.close()
|
53
70
|
print("\nAverage scores:")
|
54
|
-
for
|
71
|
+
for name, score in average_scores.items():
|
55
72
|
print(f"{name}: {score}")
|
56
73
|
print("\n")
|
57
74
|
|
@@ -78,12 +95,14 @@ class Evaluation:
|
|
78
95
|
self,
|
79
96
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
80
97
|
executor: Any,
|
81
|
-
evaluators:
|
98
|
+
evaluators: dict[str, EvaluatorFunction],
|
82
99
|
name: Optional[str] = None,
|
83
100
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
84
101
|
project_api_key: Optional[str] = None,
|
85
102
|
base_url: Optional[str] = None,
|
86
103
|
http_port: Optional[int] = None,
|
104
|
+
grpc_port: Optional[int] = None,
|
105
|
+
instruments: Optional[Set[Instruments]] = None,
|
87
106
|
):
|
88
107
|
"""
|
89
108
|
Initializes an instance of the Evaluations class.
|
@@ -114,33 +133,18 @@ class Evaluation:
|
|
114
133
|
Defaults to "https://api.lmnr.ai".
|
115
134
|
http_port (Optional[int], optional): The port for the Laminar API HTTP service.
|
116
135
|
Defaults to 443.
|
136
|
+
instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
|
137
|
+
Defaults to None. If None, all available instruments will be used.
|
117
138
|
"""
|
118
139
|
|
119
140
|
self.is_finished = False
|
120
141
|
self.name = name
|
121
142
|
self.reporter = EvaluationReporter()
|
122
143
|
self.executor = executor
|
123
|
-
self.evaluators =
|
124
|
-
zip(
|
125
|
-
[
|
126
|
-
(
|
127
|
-
e.__name__
|
128
|
-
if e.__name__ and e.__name__ != "<lambda>"
|
129
|
-
else f"evaluator_{i+1}"
|
130
|
-
)
|
131
|
-
for i, e in enumerate(evaluators)
|
132
|
-
],
|
133
|
-
evaluators,
|
134
|
-
)
|
135
|
-
)
|
136
|
-
self.evaluator_names = list(self.evaluators.keys())
|
144
|
+
self.evaluators = evaluators
|
137
145
|
if isinstance(data, list):
|
138
146
|
self.data = [
|
139
|
-
(
|
140
|
-
Datapoint.model_validate(point)
|
141
|
-
if isinstance(point, dict)
|
142
|
-
else point
|
143
|
-
)
|
147
|
+
(Datapoint.model_validate(point) if isinstance(point, dict) else point)
|
144
148
|
for point in data
|
145
149
|
]
|
146
150
|
else:
|
@@ -150,7 +154,8 @@ class Evaluation:
|
|
150
154
|
project_api_key=project_api_key,
|
151
155
|
base_url=base_url,
|
152
156
|
http_port=http_port,
|
153
|
-
|
157
|
+
grpc_port=grpc_port,
|
158
|
+
instruments=instruments,
|
154
159
|
)
|
155
160
|
|
156
161
|
def run(self) -> Union[None, Awaitable[None]]:
|
@@ -190,19 +195,19 @@ class Evaluation:
|
|
190
195
|
)
|
191
196
|
|
192
197
|
try:
|
193
|
-
await self.evaluate_in_batches(evaluation)
|
198
|
+
await self.evaluate_in_batches(evaluation.id)
|
194
199
|
except Exception as e:
|
195
200
|
L.update_evaluation_status(evaluation.id, "Error")
|
196
201
|
self.reporter.stopWithError(e)
|
197
202
|
self.is_finished = True
|
198
203
|
return
|
199
204
|
|
200
|
-
|
201
|
-
|
202
|
-
self.reporter.stop(
|
205
|
+
update_evaluation_response = L.update_evaluation_status(evaluation.id, "Finished")
|
206
|
+
average_scores = update_evaluation_response.stats.averageScores
|
207
|
+
self.reporter.stop(average_scores)
|
203
208
|
self.is_finished = True
|
204
209
|
|
205
|
-
async def evaluate_in_batches(self,
|
210
|
+
async def evaluate_in_batches(self, evaluation_id: uuid.UUID):
|
206
211
|
for i in range(0, len(self.data), self.batch_size):
|
207
212
|
batch = (
|
208
213
|
self.data[i: i + self.batch_size]
|
@@ -211,58 +216,78 @@ class Evaluation:
|
|
211
216
|
)
|
212
217
|
try:
|
213
218
|
results = await self._evaluate_batch(batch)
|
214
|
-
L.post_evaluation_results(
|
219
|
+
L.post_evaluation_results(evaluation_id, results)
|
215
220
|
except Exception as e:
|
216
221
|
print(f"Error evaluating batch: {e}")
|
217
222
|
finally:
|
218
223
|
self.reporter.update(len(batch))
|
219
224
|
|
220
|
-
async def _evaluate_batch(
|
225
|
+
async def _evaluate_batch(
|
226
|
+
self, batch: list[Datapoint]
|
227
|
+
) -> list[EvaluationResultDatapoint]:
|
221
228
|
batch_promises = [self._evaluate_datapoint(datapoint) for datapoint in batch]
|
222
229
|
results = await asyncio.gather(*batch_promises)
|
223
230
|
return results
|
224
231
|
|
225
|
-
async def _evaluate_datapoint(
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
232
|
+
async def _evaluate_datapoint(
|
233
|
+
self, datapoint: Datapoint
|
234
|
+
) -> EvaluationResultDatapoint:
|
235
|
+
with L.start_as_current_span("evaluation") as evaluation_span:
|
236
|
+
L._set_trace_type(trace_type=TraceType.EVALUATION)
|
237
|
+
evaluation_span.set_attribute(SPAN_TYPE, SpanType.EVALUATION.value)
|
238
|
+
with L.start_as_current_span(
|
239
|
+
"executor", input={"data": datapoint.data}
|
240
|
+
) as executor_span:
|
241
|
+
executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
|
242
|
+
output = (
|
243
|
+
await self.executor(datapoint.data)
|
244
|
+
if is_async(self.executor)
|
245
|
+
else self.executor(datapoint.data)
|
246
|
+
)
|
247
|
+
L.set_span_output(output)
|
248
|
+
target = datapoint.target
|
249
|
+
|
250
|
+
# Iterate over evaluators
|
251
|
+
scores: dict[str, Numeric] = {}
|
252
|
+
for evaluator_name, evaluator in self.evaluators.items():
|
253
|
+
with L.start_as_current_span(
|
254
|
+
evaluator_name, input={"output": output, "target": target}
|
255
|
+
) as evaluator_span:
|
256
|
+
evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
|
257
|
+
value = (
|
258
|
+
await evaluator(output, target)
|
259
|
+
if is_async(evaluator)
|
260
|
+
else evaluator(output, target)
|
261
|
+
)
|
262
|
+
L.set_span_output(value)
|
263
|
+
|
264
|
+
# If evaluator returns a single number, use evaluator name as key
|
265
|
+
if isinstance(value, NumericTypes):
|
266
|
+
scores[evaluator_name] = value
|
267
|
+
else:
|
268
|
+
scores.update(value)
|
269
|
+
|
270
|
+
trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
|
271
|
+
return EvaluationResultDatapoint(
|
272
|
+
data=datapoint.data,
|
273
|
+
target=target,
|
274
|
+
executor_output=output,
|
275
|
+
scores=scores,
|
276
|
+
trace_id=trace_id,
|
241
277
|
)
|
242
278
|
|
243
|
-
# If evaluator returns a single number, use evaluator name as key
|
244
|
-
if isinstance(value, NumericTypes):
|
245
|
-
scores[evaluator_name] = value
|
246
|
-
else:
|
247
|
-
scores.update(value)
|
248
|
-
|
249
|
-
return EvaluationResultDatapoint(
|
250
|
-
data=datapoint.data,
|
251
|
-
target=target,
|
252
|
-
executorOutput=output,
|
253
|
-
scores=scores,
|
254
|
-
)
|
255
|
-
|
256
279
|
|
257
280
|
def evaluate(
|
258
281
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
259
|
-
executor:
|
260
|
-
evaluators:
|
282
|
+
executor: ExecutorFunction,
|
283
|
+
evaluators: dict[str, EvaluatorFunction],
|
261
284
|
name: Optional[str] = None,
|
262
285
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
263
286
|
project_api_key: Optional[str] = None,
|
264
287
|
base_url: Optional[str] = None,
|
265
288
|
http_port: Optional[int] = None,
|
289
|
+
grpc_port: Optional[int] = None,
|
290
|
+
instruments: Optional[Set[Instruments]] = None,
|
266
291
|
) -> Optional[Awaitable[None]]:
|
267
292
|
"""
|
268
293
|
If added to the file which is called through lmnr eval command, then simply registers the evaluation.
|
@@ -295,6 +320,10 @@ def evaluate(
|
|
295
320
|
Defaults to "https://api.lmnr.ai".
|
296
321
|
http_port (Optional[int], optional): The port for the Laminar API HTTP service.
|
297
322
|
Defaults to 443.
|
323
|
+
grpc_port (Optional[int], optional): The port for the Laminar API gRPC service.
|
324
|
+
Defaults to 8443.
|
325
|
+
instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
|
326
|
+
Defaults to None. If None, all available instruments will be used.
|
298
327
|
"""
|
299
328
|
|
300
329
|
evaluation = Evaluation(
|
@@ -306,6 +335,8 @@ def evaluate(
|
|
306
335
|
project_api_key=project_api_key,
|
307
336
|
base_url=base_url,
|
308
337
|
http_port=http_port,
|
338
|
+
grpc_port=grpc_port,
|
339
|
+
instruments=instruments,
|
309
340
|
)
|
310
341
|
|
311
342
|
global _evaluation
|
lmnr/sdk/laminar.py
CHANGED
@@ -3,12 +3,9 @@ from opentelemetry import context
|
|
3
3
|
from opentelemetry.trace import (
|
4
4
|
INVALID_SPAN,
|
5
5
|
get_current_span,
|
6
|
-
SpanKind,
|
7
6
|
)
|
8
|
-
from opentelemetry.semconv_ai import SpanAttributes
|
9
7
|
from opentelemetry.util.types import AttributeValue
|
10
|
-
from opentelemetry.context
|
11
|
-
from opentelemetry.util import types
|
8
|
+
from opentelemetry.context import set_value, attach, detach
|
12
9
|
from lmnr.traceloop_sdk import Traceloop
|
13
10
|
from lmnr.traceloop_sdk.tracing import get_tracer
|
14
11
|
from contextlib import contextmanager
|
@@ -26,7 +23,19 @@ import os
|
|
26
23
|
import requests
|
27
24
|
import uuid
|
28
25
|
|
29
|
-
from lmnr.traceloop_sdk.tracing.
|
26
|
+
from lmnr.traceloop_sdk.tracing.attributes import (
|
27
|
+
SESSION_ID,
|
28
|
+
SPAN_INPUT,
|
29
|
+
SPAN_OUTPUT,
|
30
|
+
SPAN_PATH,
|
31
|
+
TRACE_TYPE,
|
32
|
+
USER_ID,
|
33
|
+
)
|
34
|
+
from lmnr.traceloop_sdk.tracing.tracing import (
|
35
|
+
get_span_path,
|
36
|
+
set_association_properties,
|
37
|
+
update_association_properties,
|
38
|
+
)
|
30
39
|
|
31
40
|
from .log import VerboseColorfulFormatter
|
32
41
|
|
@@ -37,6 +46,7 @@ from .types import (
|
|
37
46
|
PipelineRunResponse,
|
38
47
|
NodeInput,
|
39
48
|
PipelineRunRequest,
|
49
|
+
TraceType,
|
40
50
|
UpdateEvaluationResponse,
|
41
51
|
)
|
42
52
|
|
@@ -305,14 +315,6 @@ class Laminar:
|
|
305
315
|
cls,
|
306
316
|
name: str,
|
307
317
|
input: Any = None,
|
308
|
-
context: Optional[Context] = None,
|
309
|
-
kind: SpanKind = SpanKind.INTERNAL,
|
310
|
-
attributes: types.Attributes = None,
|
311
|
-
links=None,
|
312
|
-
start_time: Optional[int] = None,
|
313
|
-
record_exception: bool = True,
|
314
|
-
set_status_on_exception: bool = True,
|
315
|
-
end_on_exit: bool = True,
|
316
318
|
):
|
317
319
|
"""Start a new span as the current span. Useful for manual instrumentation.
|
318
320
|
This is the preferred and more stable way to use manual instrumentation.
|
@@ -327,40 +329,29 @@ class Laminar:
|
|
327
329
|
name (str): name of the span
|
328
330
|
input (Any, optional): input to the span. Will be sent as an
|
329
331
|
attribute, so must be json serializable. Defaults to None.
|
330
|
-
context (Optional[Context], optional): context to start the span in.
|
331
|
-
Defaults to None.
|
332
|
-
kind (SpanKind, optional): kind of the span. Defaults to SpanKind.INTERNAL.
|
333
|
-
attributes (types.Attributes, optional): attributes to set on the span.
|
334
|
-
Defaults to None.
|
335
|
-
links ([type], optional): links to set on the span. Defaults to None.
|
336
|
-
start_time (Optional[int], optional): start time of the span.
|
337
|
-
Defaults to None.
|
338
|
-
record_exception (bool, optional): whether to record exceptions.
|
339
|
-
Defaults to True.
|
340
|
-
set_status_on_exception (bool, optional): whether to set status on exception.
|
341
|
-
Defaults to True.
|
342
|
-
end_on_exit (bool, optional): whether to end the span on exit.
|
343
|
-
Defaults to True.
|
344
332
|
"""
|
345
333
|
with get_tracer() as tracer:
|
334
|
+
span_path = get_span_path(name)
|
335
|
+
ctx = set_value("span_path", span_path)
|
336
|
+
ctx_token = attach(set_value("span_path", span_path))
|
346
337
|
with tracer.start_as_current_span(
|
347
338
|
name,
|
348
|
-
context=
|
349
|
-
|
350
|
-
attributes=attributes,
|
351
|
-
links=links,
|
352
|
-
start_time=start_time,
|
353
|
-
record_exception=record_exception,
|
354
|
-
set_status_on_exception=set_status_on_exception,
|
355
|
-
end_on_exit=end_on_exit,
|
339
|
+
context=ctx,
|
340
|
+
attributes={SPAN_PATH: span_path},
|
356
341
|
) as span:
|
357
342
|
if input is not None:
|
358
343
|
span.set_attribute(
|
359
|
-
|
360
|
-
json.dumps(
|
344
|
+
SPAN_INPUT,
|
345
|
+
json.dumps(input),
|
361
346
|
)
|
362
347
|
yield span
|
363
348
|
|
349
|
+
# TODO: Figure out if this is necessary
|
350
|
+
try:
|
351
|
+
detach(ctx_token)
|
352
|
+
except Exception:
|
353
|
+
pass
|
354
|
+
|
364
355
|
@classmethod
|
365
356
|
def set_span_output(cls, output: Any = None):
|
366
357
|
"""Set the output of the current span. Useful for manual instrumentation.
|
@@ -371,9 +362,7 @@ class Laminar:
|
|
371
362
|
"""
|
372
363
|
span = get_current_span()
|
373
364
|
if output is not None and span != INVALID_SPAN:
|
374
|
-
span.set_attribute(
|
375
|
-
SpanAttributes.TRACELOOP_ENTITY_OUTPUT, json.dumps(output)
|
376
|
-
)
|
365
|
+
span.set_attribute(SPAN_OUTPUT, json.dumps(output))
|
377
366
|
|
378
367
|
@classmethod
|
379
368
|
def set_session(
|
@@ -396,9 +385,23 @@ class Laminar:
|
|
396
385
|
"""
|
397
386
|
association_properties = {}
|
398
387
|
if session_id is not None:
|
399
|
-
association_properties[
|
388
|
+
association_properties[SESSION_ID] = session_id
|
400
389
|
if user_id is not None:
|
401
|
-
association_properties[
|
390
|
+
association_properties[USER_ID] = user_id
|
391
|
+
update_association_properties(association_properties)
|
392
|
+
|
393
|
+
@classmethod
|
394
|
+
def _set_trace_type(
|
395
|
+
cls,
|
396
|
+
trace_type: TraceType,
|
397
|
+
):
|
398
|
+
"""Set the trace_type for the current span and the context
|
399
|
+
Args:
|
400
|
+
trace_type (TraceType): Type of the trace
|
401
|
+
"""
|
402
|
+
association_properties = {
|
403
|
+
TRACE_TYPE: trace_type.value,
|
404
|
+
}
|
402
405
|
update_association_properties(association_properties)
|
403
406
|
|
404
407
|
@classmethod
|
@@ -430,7 +433,7 @@ class Laminar:
|
|
430
433
|
) -> requests.Response:
|
431
434
|
body = {
|
432
435
|
"evaluationId": str(evaluation_id),
|
433
|
-
"points": [datapoint.
|
436
|
+
"points": [datapoint.to_dict() for datapoint in data],
|
434
437
|
}
|
435
438
|
response = requests.post(
|
436
439
|
cls.__base_http_url + "/v1/evaluation-datapoints",
|
lmnr/sdk/types.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
import datetime
|
2
|
-
import
|
2
|
+
from enum import Enum
|
3
3
|
import pydantic
|
4
|
-
import
|
4
|
+
import requests
|
5
5
|
from typing import Any, Awaitable, Callable, Literal, Optional, Union
|
6
|
+
import uuid
|
6
7
|
|
7
|
-
from .utils import
|
8
|
+
from .utils import serialize
|
8
9
|
|
9
10
|
|
10
11
|
class ChatMessage(pydantic.BaseModel):
|
@@ -37,7 +38,7 @@ class PipelineRunRequest(pydantic.BaseModel):
|
|
37
38
|
def to_dict(self):
|
38
39
|
return {
|
39
40
|
"inputs": {
|
40
|
-
k: v.model_dump() if isinstance(v, pydantic.BaseModel) else
|
41
|
+
k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
|
41
42
|
for k, v in self.inputs.items()
|
42
43
|
},
|
43
44
|
"pipeline": self.pipeline,
|
@@ -116,14 +117,50 @@ class CreateEvaluationResponse(pydantic.BaseModel):
|
|
116
117
|
status: EvaluationStatus
|
117
118
|
projectId: uuid.UUID
|
118
119
|
metadata: Optional[dict[str, Any]] = None
|
119
|
-
averageScores: Optional[dict[str, Numeric]] = None
|
120
120
|
|
121
121
|
|
122
|
-
|
122
|
+
class EvaluationStats(pydantic.BaseModel):
|
123
|
+
averageScores: dict[str, Numeric]
|
124
|
+
|
125
|
+
|
126
|
+
class UpdateEvaluationResponse(pydantic.BaseModel):
|
127
|
+
stats: EvaluationStats
|
123
128
|
|
124
129
|
|
125
130
|
class EvaluationResultDatapoint(pydantic.BaseModel):
|
126
131
|
data: EvaluationDatapointData
|
127
132
|
target: EvaluationDatapointTarget
|
128
|
-
|
133
|
+
executor_output: ExecutorFunctionReturnType
|
129
134
|
scores: dict[str, Numeric]
|
135
|
+
trace_id: uuid.UUID
|
136
|
+
|
137
|
+
# uuid is not serializable by default, so we need to convert it to a string
|
138
|
+
def to_dict(self):
|
139
|
+
return {
|
140
|
+
"data": {
|
141
|
+
k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
|
142
|
+
for k, v in self.data.items()
|
143
|
+
},
|
144
|
+
"target": {
|
145
|
+
k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
|
146
|
+
for k, v in self.target.items()
|
147
|
+
},
|
148
|
+
"executorOutput": serialize(self.executor_output),
|
149
|
+
"scores": self.scores,
|
150
|
+
"traceId": str(self.trace_id),
|
151
|
+
}
|
152
|
+
|
153
|
+
|
154
|
+
class SpanType(Enum):
|
155
|
+
DEFAULT = "DEFAULT"
|
156
|
+
LLM = "LLM"
|
157
|
+
PIPELINE = "PIPELINE" # must not be set manually
|
158
|
+
EXECUTOR = "EXECUTOR"
|
159
|
+
EVALUATOR = "EVALUATOR"
|
160
|
+
EVALUATION = "EVALUATION"
|
161
|
+
|
162
|
+
|
163
|
+
class TraceType(Enum):
|
164
|
+
DEFAULT = "DEFAULT"
|
165
|
+
EVENT = "EVENT" # must not be set manually
|
166
|
+
EVALUATION = "EVALUATION"
|
lmnr/sdk/utils.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
import asyncio
|
2
|
-
import copy
|
3
2
|
import datetime
|
4
3
|
import dataclasses
|
5
4
|
import enum
|
@@ -50,7 +49,7 @@ def is_iterator(o: typing.Any) -> bool:
|
|
50
49
|
return hasattr(o, "__iter__") and hasattr(o, "__next__")
|
51
50
|
|
52
51
|
|
53
|
-
def
|
52
|
+
def serialize(obj: typing.Any) -> dict[str, typing.Any]:
|
54
53
|
def to_dict_inner(o: typing.Any):
|
55
54
|
if isinstance(o, (datetime.datetime, datetime.date)):
|
56
55
|
return o.strftime("%Y-%m-%dT%H:%M:%S.%f%z")
|
@@ -59,7 +58,7 @@ def to_dict(obj: typing.Any) -> dict[str, typing.Any]:
|
|
59
58
|
elif isinstance(o, (int, float, str, bool)):
|
60
59
|
return o
|
61
60
|
elif isinstance(o, uuid.UUID):
|
62
|
-
return str(o) # same as in return, but explicit
|
61
|
+
return str(o) # same as in final return, but explicit
|
63
62
|
elif isinstance(o, enum.Enum):
|
64
63
|
return o.value
|
65
64
|
elif dataclasses.is_dataclass(o):
|
@@ -90,11 +89,11 @@ def get_input_from_func_args(
|
|
90
89
|
) -> dict[str, typing.Any]:
|
91
90
|
# Remove implicitly passed "self" or "cls" argument for
|
92
91
|
# instance or class methods
|
93
|
-
res = copy
|
92
|
+
res = func_kwargs.copy()
|
94
93
|
for i, k in enumerate(inspect.signature(func).parameters.keys()):
|
95
94
|
if is_method and k in ["self", "cls"]:
|
96
95
|
continue
|
97
96
|
# If param has default value, then it's not present in func args
|
98
|
-
if len(func_args)
|
97
|
+
if i < len(func_args):
|
99
98
|
res[k] = func_args[i]
|
100
99
|
return res
|
lmnr/traceloop_sdk/__init__.py
CHANGED
@@ -3,20 +3,16 @@ import sys
|
|
3
3
|
from pathlib import Path
|
4
4
|
|
5
5
|
from typing import Optional, Set
|
6
|
-
from colorama import Fore
|
7
6
|
from opentelemetry.sdk.trace import SpanProcessor
|
8
7
|
from opentelemetry.sdk.trace.export import SpanExporter
|
9
|
-
from opentelemetry.sdk.metrics.export import MetricExporter
|
10
8
|
from opentelemetry.sdk.resources import SERVICE_NAME
|
11
9
|
from opentelemetry.propagators.textmap import TextMapPropagator
|
12
10
|
from opentelemetry.util.re import parse_env_headers
|
13
11
|
|
14
|
-
from lmnr.traceloop_sdk.metrics.metrics import MetricsWrapper
|
15
12
|
from lmnr.traceloop_sdk.instruments import Instruments
|
16
13
|
from lmnr.traceloop_sdk.config import (
|
17
14
|
is_content_tracing_enabled,
|
18
15
|
is_tracing_enabled,
|
19
|
-
is_metrics_enabled,
|
20
16
|
)
|
21
17
|
from lmnr.traceloop_sdk.tracing.tracing import TracerWrapper
|
22
18
|
from typing import Dict
|
@@ -38,8 +34,6 @@ class Traceloop:
|
|
38
34
|
headers: Dict[str, str] = {},
|
39
35
|
disable_batch=False,
|
40
36
|
exporter: Optional[SpanExporter] = None,
|
41
|
-
metrics_exporter: Optional[MetricExporter] = None,
|
42
|
-
metrics_headers: Optional[Dict[str, str]] = None,
|
43
37
|
processor: Optional[SpanProcessor] = None,
|
44
38
|
propagator: Optional[TextMapPropagator] = None,
|
45
39
|
should_enrich_metrics: bool = True,
|
@@ -50,7 +44,7 @@ class Traceloop:
|
|
50
44
|
api_key = os.getenv("TRACELOOP_API_KEY") or api_key
|
51
45
|
|
52
46
|
if not is_tracing_enabled():
|
53
|
-
print(Fore.YELLOW + "Tracing is disabled" + Fore.RESET)
|
47
|
+
# print(Fore.YELLOW + "Tracing is disabled" + Fore.RESET)
|
54
48
|
return
|
55
49
|
|
56
50
|
enable_content_tracing = is_content_tracing_enabled()
|
@@ -67,12 +61,10 @@ class Traceloop:
|
|
67
61
|
and not api_key
|
68
62
|
):
|
69
63
|
print(
|
70
|
-
|
71
|
-
+ "Error: Missing API key,"
|
64
|
+
"Error: Missing API key,"
|
72
65
|
+ " go to project settings to create one"
|
73
66
|
)
|
74
67
|
print("Set the LMNR_PROJECT_API_KEY environment variable to the key")
|
75
|
-
print(Fore.RESET)
|
76
68
|
return
|
77
69
|
|
78
70
|
if api_key and not exporter and not processor and not headers:
|
@@ -80,7 +72,7 @@ class Traceloop:
|
|
80
72
|
"Authorization": f"Bearer {api_key}",
|
81
73
|
}
|
82
74
|
|
83
|
-
print(Fore.RESET)
|
75
|
+
# print(Fore.RESET)
|
84
76
|
|
85
77
|
# Tracer init
|
86
78
|
resource_attributes.update({SERVICE_NAME: app_name})
|
@@ -95,21 +87,3 @@ class Traceloop:
|
|
95
87
|
should_enrich_metrics=should_enrich_metrics,
|
96
88
|
instruments=instruments,
|
97
89
|
)
|
98
|
-
|
99
|
-
if not metrics_exporter and exporter:
|
100
|
-
return
|
101
|
-
|
102
|
-
metrics_endpoint = os.getenv("TRACELOOP_METRICS_ENDPOINT") or api_endpoint
|
103
|
-
metrics_headers = (
|
104
|
-
os.getenv("TRACELOOP_METRICS_HEADERS") or metrics_headers or headers
|
105
|
-
)
|
106
|
-
|
107
|
-
if not is_metrics_enabled() or not metrics_exporter and exporter:
|
108
|
-
print(Fore.YELLOW + "Metrics are disabled" + Fore.RESET)
|
109
|
-
return
|
110
|
-
|
111
|
-
MetricsWrapper.set_static_params(
|
112
|
-
resource_attributes, metrics_endpoint, metrics_headers
|
113
|
-
)
|
114
|
-
|
115
|
-
Traceloop.__metrics_wrapper = MetricsWrapper(exporter=metrics_exporter)
|
@@ -7,7 +7,3 @@ def is_tracing_enabled() -> bool:
|
|
7
7
|
|
8
8
|
def is_content_tracing_enabled() -> bool:
|
9
9
|
return (os.getenv("TRACELOOP_TRACE_CONTENT") or "true").lower() == "true"
|
10
|
-
|
11
|
-
|
12
|
-
def is_metrics_enabled() -> bool:
|
13
|
-
return (os.getenv("TRACELOOP_METRICS_ENABLED") or "true").lower() == "true"
|