lmnr 0.4.22__tar.gz → 0.4.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lmnr-0.4.22 → lmnr-0.4.24}/PKG-INFO +2 -1
- {lmnr-0.4.22 → lmnr-0.4.24}/pyproject.toml +3 -2
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/__init__.py +2 -0
- lmnr-0.4.24/src/lmnr/sdk/datasets.py +58 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/evaluations.py +22 -38
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/laminar.py +87 -4
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/log.py +10 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/types.py +7 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/decorators/base.py +11 -10
- lmnr-0.4.24/src/lmnr/traceloop_sdk/tracing/attributes.py +35 -0
- lmnr-0.4.22/src/lmnr/traceloop_sdk/tracing/attributes.py +0 -9
- {lmnr-0.4.22 → lmnr-0.4.24}/LICENSE +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/README.md +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/cli.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/__init__.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/decorators.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/sdk/utils.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/.flake8 +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/.python-version +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/__init__.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/instruments.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/tracing/tracing.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
- {lmnr-0.4.22 → lmnr-0.4.24}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lmnr
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.24
|
4
4
|
Summary: Python SDK for Laminar AI
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: lmnr.ai
|
@@ -16,6 +16,7 @@ Requires-Dist: argparse (>=1.0,<2.0)
|
|
16
16
|
Requires-Dist: backoff (>=2.0,<3.0)
|
17
17
|
Requires-Dist: deprecated (>=1.0,<2.0)
|
18
18
|
Requires-Dist: jinja2 (>=3.0,<4.0)
|
19
|
+
Requires-Dist: openai (>=1.52.0,<2.0.0)
|
19
20
|
Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
|
20
21
|
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.27.0,<2.0.0)
|
21
22
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http (>=1.27.0,<2.0.0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lmnr"
|
3
|
-
version = "0.4.
|
3
|
+
version = "0.4.24"
|
4
4
|
description = "Python SDK for Laminar AI"
|
5
5
|
authors = [
|
6
6
|
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
|
@@ -11,7 +11,7 @@ license = "Apache-2.0"
|
|
11
11
|
|
12
12
|
[tool.poetry]
|
13
13
|
name = "lmnr"
|
14
|
-
version = "0.4.
|
14
|
+
version = "0.4.24"
|
15
15
|
description = "Python SDK for Laminar AI"
|
16
16
|
authors = ["lmnr.ai"]
|
17
17
|
readme = "README.md"
|
@@ -62,6 +62,7 @@ opentelemetry-instrumentation-groq = ">=0.33.1"
|
|
62
62
|
tqdm = "~=4.0"
|
63
63
|
argparse = "~=1.0"
|
64
64
|
|
65
|
+
openai = "^1.52.0"
|
65
66
|
[tool.poetry.group.dev.dependencies]
|
66
67
|
autopep8 = "^2.2.0"
|
67
68
|
flake8 = "7.0.0"
|
@@ -1,5 +1,7 @@
|
|
1
|
+
from .sdk.datasets import EvaluationDataset, LaminarDataset
|
1
2
|
from .sdk.evaluations import evaluate
|
2
3
|
from .sdk.laminar import Laminar
|
3
4
|
from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
|
4
5
|
from .sdk.decorators import observe
|
5
6
|
from .traceloop_sdk import Instruments
|
7
|
+
from .traceloop_sdk.tracing.attributes import Attributes
|
@@ -0,0 +1,58 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
import logging
|
3
|
+
|
4
|
+
from .log import get_default_logger
|
5
|
+
from .laminar import Laminar as L
|
6
|
+
from .types import (
|
7
|
+
Datapoint,
|
8
|
+
)
|
9
|
+
|
10
|
+
DEFAULT_FETCH_SIZE = 25
|
11
|
+
|
12
|
+
|
13
|
+
class EvaluationDataset(ABC):
|
14
|
+
@abstractmethod
|
15
|
+
def __init__(self, *args, **kwargs):
|
16
|
+
pass
|
17
|
+
|
18
|
+
@abstractmethod
|
19
|
+
def __len__(self) -> int:
|
20
|
+
pass
|
21
|
+
|
22
|
+
@abstractmethod
|
23
|
+
def __getitem__(self, idx) -> Datapoint:
|
24
|
+
pass
|
25
|
+
|
26
|
+
def slice(self, start: int, end: int):
|
27
|
+
return [self[i] for i in range(max(start, 0), min(end, len(self)))]
|
28
|
+
|
29
|
+
|
30
|
+
class LaminarDataset(EvaluationDataset):
|
31
|
+
def __init__(self, name: str, fetch_size: int = DEFAULT_FETCH_SIZE):
|
32
|
+
self.name = name
|
33
|
+
self._len = None
|
34
|
+
self._fetched_items = []
|
35
|
+
self._offset = 0
|
36
|
+
self._fetch_size = fetch_size
|
37
|
+
self._logger = get_default_logger(self.__class__.__name__, level=logging.DEBUG)
|
38
|
+
|
39
|
+
def _fetch_batch(self):
|
40
|
+
self._logger.debug(
|
41
|
+
f"dataset {self.name}. Fetching batch from {self._offset} to "
|
42
|
+
+ f"{self._offset + self._fetch_size}"
|
43
|
+
)
|
44
|
+
resp = L.get_datapoints(self.name, self._offset, self._fetch_size)
|
45
|
+
self._fetched_items += resp.items
|
46
|
+
self._offset = len(self._fetched_items)
|
47
|
+
if self._len is None:
|
48
|
+
self._len = resp.totalCount
|
49
|
+
|
50
|
+
def __len__(self) -> int:
|
51
|
+
if self._len is None:
|
52
|
+
self._fetch_batch()
|
53
|
+
return self._len
|
54
|
+
|
55
|
+
def __getitem__(self, idx) -> Datapoint:
|
56
|
+
if idx >= len(self._fetched_items):
|
57
|
+
self._fetch_batch()
|
58
|
+
return self._fetched_items[idx]
|
@@ -1,17 +1,18 @@
|
|
1
1
|
import asyncio
|
2
2
|
import re
|
3
3
|
import sys
|
4
|
-
from abc import ABC, abstractmethod
|
5
|
-
from contextlib import contextmanager
|
6
|
-
from typing import Any, Awaitable, Optional, Set, Union
|
7
4
|
import uuid
|
8
5
|
|
6
|
+
from contextlib import contextmanager
|
9
7
|
from tqdm import tqdm
|
8
|
+
from typing import Any, Awaitable, Optional, Set, Union
|
10
9
|
|
11
10
|
from ..traceloop_sdk.instruments import Instruments
|
12
11
|
from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
|
13
12
|
|
13
|
+
from .datasets import EvaluationDataset
|
14
14
|
from .laminar import Laminar as L
|
15
|
+
from .log import get_default_logger
|
15
16
|
from .types import (
|
16
17
|
Datapoint,
|
17
18
|
EvaluationResultDatapoint,
|
@@ -84,7 +85,7 @@ class EvaluationReporter:
|
|
84
85
|
):
|
85
86
|
self.cli_progress.close()
|
86
87
|
print(
|
87
|
-
f"\nCheck
|
88
|
+
f"\nCheck the results at {get_evaluation_url(project_id, evaluation_id)}\n"
|
88
89
|
)
|
89
90
|
print("Average scores:")
|
90
91
|
for name, score in average_scores.items():
|
@@ -92,23 +93,6 @@ class EvaluationReporter:
|
|
92
93
|
print("\n")
|
93
94
|
|
94
95
|
|
95
|
-
class EvaluationDataset(ABC):
|
96
|
-
@abstractmethod
|
97
|
-
def __init__(self, *args, **kwargs):
|
98
|
-
pass
|
99
|
-
|
100
|
-
@abstractmethod
|
101
|
-
def __len__(self) -> int:
|
102
|
-
pass
|
103
|
-
|
104
|
-
@abstractmethod
|
105
|
-
def __getitem__(self, idx) -> Datapoint:
|
106
|
-
pass
|
107
|
-
|
108
|
-
def slice(self, start: int, end: int):
|
109
|
-
return [self[i] for i in range(max(start, 0), min(end, len(self)))]
|
110
|
-
|
111
|
-
|
112
96
|
class Evaluation:
|
113
97
|
def __init__(
|
114
98
|
self,
|
@@ -135,14 +119,13 @@ class Evaluation:
|
|
135
119
|
executor (Callable[..., Any]): The executor function.\
|
136
120
|
Takes the data point + any additional arguments\
|
137
121
|
and returns the output to evaluate.
|
138
|
-
evaluators (
|
139
|
-
Each evaluator function takes the output of the executor
|
140
|
-
the target data, and returns a score. The score can be a\
|
141
|
-
single number or a
|
122
|
+
evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
|
123
|
+
names. Each evaluator function takes the output of the executor\
|
124
|
+
_and_ the target data, and returns a score. The score can be a\
|
125
|
+
single number or a dict of string keys and number values.\
|
142
126
|
If the score is a single number, it will be named after the\
|
143
|
-
evaluator function.
|
144
|
-
|
145
|
-
evaluator function in the list starting from 1.
|
127
|
+
evaluator function. Evaluator function names must contain only\
|
128
|
+
letters, digits, hyphens, underscores, or spaces.
|
146
129
|
group_id (Optional[str], optional): Group id of the evaluation.
|
147
130
|
Defaults to "default".
|
148
131
|
name (Optional[str], optional): The name of the evaluation.\
|
@@ -191,6 +174,7 @@ class Evaluation:
|
|
191
174
|
self.group_id = group_id
|
192
175
|
self.name = name
|
193
176
|
self.batch_size = batch_size
|
177
|
+
self._logger = get_default_logger(self.__class__.__name__)
|
194
178
|
L.initialize(
|
195
179
|
project_api_key=project_api_key,
|
196
180
|
base_url=base_url,
|
@@ -215,7 +199,7 @@ class Evaluation:
|
|
215
199
|
)
|
216
200
|
|
217
201
|
try:
|
218
|
-
result_datapoints = await self.
|
202
|
+
result_datapoints = await self._evaluate_in_batches()
|
219
203
|
except Exception as e:
|
220
204
|
self.reporter.stopWithError(e)
|
221
205
|
self.is_finished = True
|
@@ -228,7 +212,7 @@ class Evaluation:
|
|
228
212
|
self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
|
229
213
|
self.is_finished = True
|
230
214
|
|
231
|
-
async def
|
215
|
+
async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
|
232
216
|
result_datapoints = []
|
233
217
|
for i in range(0, len(self.data), self.batch_size):
|
234
218
|
batch = (
|
@@ -326,14 +310,14 @@ def evaluate(
|
|
326
310
|
executor (Callable[..., Any]): The executor function.\
|
327
311
|
Takes the data point + any additional arguments\
|
328
312
|
and returns the output to evaluate.
|
329
|
-
evaluators (List[Callable[..., Any]]):
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
313
|
+
evaluators (List[Callable[..., Any]]):
|
314
|
+
evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
|
315
|
+
names. Each evaluator function takes the output of the executor\
|
316
|
+
_and_ the target data, and returns a score. The score can be a\
|
317
|
+
single number or a dict of string keys and number values.\
|
318
|
+
If the score is a single number, it will be named after the\
|
319
|
+
evaluator function. Evaluator function names must contain only\
|
320
|
+
letters, digits, hyphens, underscores, or spaces.
|
337
321
|
group_id (Optional[str], optional): an identifier to group evaluations.\
|
338
322
|
It is practical to group evaluations that evaluate\
|
339
323
|
the same feature on the same dataset, to be able to\
|
@@ -9,11 +9,13 @@ from opentelemetry.util.types import AttributeValue
|
|
9
9
|
from opentelemetry.context import set_value, attach, detach
|
10
10
|
from lmnr.traceloop_sdk import Traceloop
|
11
11
|
from lmnr.traceloop_sdk.tracing import get_tracer
|
12
|
+
from lmnr.traceloop_sdk.tracing.attributes import Attributes, SPAN_TYPE
|
13
|
+
from lmnr.traceloop_sdk.decorators.base import json_dumps
|
12
14
|
from contextlib import contextmanager
|
13
15
|
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
14
16
|
|
15
17
|
from pydantic.alias_generators import to_snake
|
16
|
-
from typing import Any, Optional, Set, Union
|
18
|
+
from typing import Any, Literal, Optional, Set, Union
|
17
19
|
|
18
20
|
import copy
|
19
21
|
import datetime
|
@@ -22,6 +24,7 @@ import json
|
|
22
24
|
import logging
|
23
25
|
import os
|
24
26
|
import requests
|
27
|
+
import urllib.parse
|
25
28
|
import uuid
|
26
29
|
|
27
30
|
from lmnr.traceloop_sdk.tracing.attributes import (
|
@@ -43,6 +46,7 @@ from .log import VerboseColorfulFormatter
|
|
43
46
|
from .types import (
|
44
47
|
CreateEvaluationResponse,
|
45
48
|
EvaluationResultDatapoint,
|
49
|
+
GetDatapointsResponse,
|
46
50
|
PipelineRunError,
|
47
51
|
PipelineRunResponse,
|
48
52
|
NodeInput,
|
@@ -279,20 +283,27 @@ class Laminar:
|
|
279
283
|
cls,
|
280
284
|
name: str,
|
281
285
|
input: Any = None,
|
286
|
+
span_type: Union[Literal["DEFAULT"], Literal["LLM"]] = "DEFAULT",
|
282
287
|
):
|
283
288
|
"""Start a new span as the current span. Useful for manual
|
284
|
-
instrumentation.
|
289
|
+
instrumentation. If `span_type` is set to `"LLM"`, you should report
|
290
|
+
usage and response attributes manually. See `Laminar.set_span_attributes`
|
291
|
+
for more information.
|
285
292
|
|
286
293
|
Usage example:
|
287
294
|
```python
|
288
295
|
with Laminar.start_as_current_span("my_span", input="my_input") as span:
|
289
296
|
await my_async_function()
|
297
|
+
Laminar.set_span_output("my_output")`
|
290
298
|
```
|
291
299
|
|
292
300
|
Args:
|
293
301
|
name (str): name of the span
|
294
302
|
input (Any, optional): input to the span. Will be sent as an\
|
295
303
|
attribute, so must be json serializable. Defaults to None.
|
304
|
+
span_type (Union[Literal["DEFAULT"], Literal["LLM"]], optional):\
|
305
|
+
type of the span. If you use `"LLM"`, you should report usage\
|
306
|
+
and response attributes manually. Defaults to "DEFAULT".
|
296
307
|
"""
|
297
308
|
with get_tracer() as tracer:
|
298
309
|
span_path = get_span_path(name)
|
@@ -308,6 +319,7 @@ class Laminar:
|
|
308
319
|
SPAN_INPUT,
|
309
320
|
json.dumps(input),
|
310
321
|
)
|
322
|
+
span.set_attribute(SPAN_TYPE, span_type)
|
311
323
|
yield span
|
312
324
|
|
313
325
|
# TODO: Figure out if this is necessary
|
@@ -327,7 +339,52 @@ class Laminar:
|
|
327
339
|
"""
|
328
340
|
span = get_current_span()
|
329
341
|
if output is not None and span != INVALID_SPAN:
|
330
|
-
span.set_attribute(SPAN_OUTPUT,
|
342
|
+
span.set_attribute(SPAN_OUTPUT, json_dumps(output))
|
343
|
+
|
344
|
+
@classmethod
|
345
|
+
def set_span_attributes(
|
346
|
+
cls,
|
347
|
+
attributes: dict[Attributes, Any],
|
348
|
+
):
|
349
|
+
"""Set attributes for the current span. Useful for manual
|
350
|
+
instrumentation.
|
351
|
+
Example:
|
352
|
+
```python
|
353
|
+
with L.start_as_current_span(
|
354
|
+
name="my_span_name", input=input["messages"], span_type="LLM"
|
355
|
+
):
|
356
|
+
response = await my_custom_call_to_openai(input)
|
357
|
+
L.set_span_output(response["choices"][0]["message"]["content"])
|
358
|
+
L.set_span_attributes({
|
359
|
+
Attributes.PROVIDER: 'openai',
|
360
|
+
Attributes.REQUEST_MODEL: input["model"],
|
361
|
+
Attributes.RESPONSE_MODEL: response["model"],
|
362
|
+
Attributes.INPUT_TOKEN_COUNT: response["usage"]["prompt_tokens"],
|
363
|
+
Attributes.OUTPUT_TOKEN_COUNT: response["usage"]["completion_tokens"],
|
364
|
+
})
|
365
|
+
# ...
|
366
|
+
```
|
367
|
+
|
368
|
+
Args:
|
369
|
+
attributes (dict[ATTRIBUTES, Any]): attributes to set for the span
|
370
|
+
"""
|
371
|
+
span = get_current_span()
|
372
|
+
if span == INVALID_SPAN:
|
373
|
+
return
|
374
|
+
|
375
|
+
for key, value in attributes.items():
|
376
|
+
# Python 3.12+ should do: if key not in Attributes:
|
377
|
+
try:
|
378
|
+
Attributes(key.value)
|
379
|
+
except (TypeError, AttributeError):
|
380
|
+
cls.__logger.warning(
|
381
|
+
f"Attribute {key} is not a valid Laminar attribute."
|
382
|
+
)
|
383
|
+
continue
|
384
|
+
if not isinstance(value, (str, int, float, bool)):
|
385
|
+
span.set_attribute(key.value, json_dumps(value))
|
386
|
+
else:
|
387
|
+
span.set_attribute(key.value, value)
|
331
388
|
|
332
389
|
@classmethod
|
333
390
|
def set_session(
|
@@ -399,10 +456,36 @@ class Laminar:
|
|
399
456
|
try:
|
400
457
|
resp_json = response.json()
|
401
458
|
raise ValueError(f"Error creating evaluation {json.dumps(resp_json)}")
|
402
|
-
except
|
459
|
+
except requests.exceptions.RequestException:
|
403
460
|
raise ValueError(f"Error creating evaluation {response.text}")
|
404
461
|
return CreateEvaluationResponse.model_validate(response.json())
|
405
462
|
|
463
|
+
@classmethod
|
464
|
+
def get_datapoints(
|
465
|
+
cls,
|
466
|
+
dataset_name: str,
|
467
|
+
offset: int,
|
468
|
+
limit: int,
|
469
|
+
) -> GetDatapointsResponse:
|
470
|
+
params = {"name": dataset_name, "offset": offset, "limit": limit}
|
471
|
+
url = (
|
472
|
+
cls.__base_http_url
|
473
|
+
+ "/v1/datasets/datapoints?"
|
474
|
+
+ urllib.parse.urlencode(params)
|
475
|
+
)
|
476
|
+
response = requests.get(url, headers=cls._headers())
|
477
|
+
if response.status_code != 200:
|
478
|
+
try:
|
479
|
+
resp_json = response.json()
|
480
|
+
raise ValueError(
|
481
|
+
f"Error fetching datapoints: [{response.status_code}] {json.dumps(resp_json)}"
|
482
|
+
)
|
483
|
+
except requests.exceptions.RequestException:
|
484
|
+
raise ValueError(
|
485
|
+
f"Error fetching datapoints: [{response.status_code}] {response.text}"
|
486
|
+
)
|
487
|
+
return GetDatapointsResponse.model_validate(response.json())
|
488
|
+
|
406
489
|
@classmethod
|
407
490
|
def _headers(cls):
|
408
491
|
assert cls.__project_api_key is not None, "Project API key is not set"
|
@@ -37,3 +37,13 @@ class VerboseFormatter(CustomFormatter):
|
|
37
37
|
def format(self, record):
|
38
38
|
formatter = logging.Formatter(self.fmt)
|
39
39
|
return formatter.format(record)
|
40
|
+
|
41
|
+
|
42
|
+
def get_default_logger(name: str, level: int = logging.INFO, propagate: bool = False):
|
43
|
+
logger = logging.getLogger(name)
|
44
|
+
logger.setLevel(level)
|
45
|
+
console_log_handler = logging.StreamHandler()
|
46
|
+
console_log_handler.setFormatter(VerboseColorfulFormatter())
|
47
|
+
logger.addHandler(console_log_handler)
|
48
|
+
logger.propagate = propagate
|
49
|
+
return logger
|
@@ -79,6 +79,7 @@ class PipelineRunError(Exception):
|
|
79
79
|
|
80
80
|
EvaluationDatapointData = dict[str, Any]
|
81
81
|
EvaluationDatapointTarget = dict[str, Any]
|
82
|
+
EvaluationDatapointMetadata = Optional[dict[str, Any]]
|
82
83
|
|
83
84
|
|
84
85
|
# EvaluationDatapoint is a single data point in the evaluation
|
@@ -88,6 +89,7 @@ class Datapoint(pydantic.BaseModel):
|
|
88
89
|
# input to the evaluator function (alongside the executor output).
|
89
90
|
# Must be a dict with string keys
|
90
91
|
target: EvaluationDatapointTarget
|
92
|
+
metadata: EvaluationDatapointMetadata = pydantic.Field(default=None)
|
91
93
|
|
92
94
|
|
93
95
|
ExecutorFunctionReturnType = Any
|
@@ -153,3 +155,8 @@ class TraceType(Enum):
|
|
153
155
|
DEFAULT = "DEFAULT"
|
154
156
|
EVENT = "EVENT" # must not be set manually
|
155
157
|
EVALUATION = "EVALUATION"
|
158
|
+
|
159
|
+
|
160
|
+
class GetDatapointsResponse(pydantic.BaseModel):
|
161
|
+
items: list[Datapoint]
|
162
|
+
totalCount: int
|
@@ -1,9 +1,10 @@
|
|
1
1
|
import json
|
2
2
|
from functools import wraps
|
3
|
+
import logging
|
3
4
|
import os
|
5
|
+
import pydantic
|
4
6
|
import types
|
5
7
|
from typing import Any, Optional
|
6
|
-
import warnings
|
7
8
|
|
8
9
|
from opentelemetry import trace
|
9
10
|
from opentelemetry import context as context_api
|
@@ -17,20 +18,20 @@ from lmnr.traceloop_sdk.utils.json_encoder import JSONEncoder
|
|
17
18
|
|
18
19
|
class CustomJSONEncoder(JSONEncoder):
|
19
20
|
def default(self, o: Any) -> Any:
|
21
|
+
if isinstance(o, pydantic.BaseModel):
|
22
|
+
return o.model_dump_json()
|
20
23
|
try:
|
21
24
|
return super().default(o)
|
22
25
|
except TypeError:
|
23
26
|
return str(o) # Fallback to string representation for unsupported types
|
24
27
|
|
25
28
|
|
26
|
-
def
|
29
|
+
def json_dumps(data: dict) -> str:
|
27
30
|
try:
|
28
|
-
|
29
|
-
warnings.simplefilter("ignore", RuntimeWarning)
|
30
|
-
return json.dumps(data, cls=CustomJSONEncoder)
|
31
|
+
return json.dumps(data, cls=CustomJSONEncoder)
|
31
32
|
except Exception:
|
32
33
|
# Log the exception and return a placeholder if serialization completely fails
|
33
|
-
|
34
|
+
logging.warning("Failed to serialize data to JSON, type: %s", type(data))
|
34
35
|
return "{}" # Return an empty JSON object as a fallback
|
35
36
|
|
36
37
|
|
@@ -59,7 +60,7 @@ def entity_method(
|
|
59
60
|
if _should_send_prompts():
|
60
61
|
span.set_attribute(
|
61
62
|
SPAN_INPUT,
|
62
|
-
|
63
|
+
json_dumps(
|
63
64
|
get_input_from_func_args(
|
64
65
|
fn, is_method(fn), args, kwargs
|
65
66
|
)
|
@@ -78,7 +79,7 @@ def entity_method(
|
|
78
79
|
if _should_send_prompts():
|
79
80
|
span.set_attribute(
|
80
81
|
SPAN_OUTPUT,
|
81
|
-
|
82
|
+
json_dumps(res),
|
82
83
|
)
|
83
84
|
except TypeError:
|
84
85
|
pass
|
@@ -121,7 +122,7 @@ def aentity_method(
|
|
121
122
|
if _should_send_prompts():
|
122
123
|
span.set_attribute(
|
123
124
|
SPAN_INPUT,
|
124
|
-
|
125
|
+
json_dumps(
|
125
126
|
get_input_from_func_args(
|
126
127
|
fn, is_method(fn), args, kwargs
|
127
128
|
)
|
@@ -138,7 +139,7 @@ def aentity_method(
|
|
138
139
|
|
139
140
|
try:
|
140
141
|
if _should_send_prompts():
|
141
|
-
span.set_attribute(SPAN_OUTPUT,
|
142
|
+
span.set_attribute(SPAN_OUTPUT, json_dumps(res))
|
142
143
|
except TypeError:
|
143
144
|
pass
|
144
145
|
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from opentelemetry.semconv_ai import SpanAttributes
|
3
|
+
|
4
|
+
SPAN_INPUT = "lmnr.span.input"
|
5
|
+
SPAN_OUTPUT = "lmnr.span.output"
|
6
|
+
SPAN_TYPE = "lmnr.span.type"
|
7
|
+
SPAN_PATH = "lmnr.span.path"
|
8
|
+
|
9
|
+
ASSOCIATION_PROPERTIES = "lmnr.association.properties"
|
10
|
+
SESSION_ID = "session_id"
|
11
|
+
USER_ID = "user_id"
|
12
|
+
TRACE_TYPE = "trace_type"
|
13
|
+
|
14
|
+
|
15
|
+
# exposed to the user, configurable
|
16
|
+
class Attributes(Enum):
|
17
|
+
# == This is the minimum set of attributes for a proper LLM span ==
|
18
|
+
#
|
19
|
+
# not SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
|
20
|
+
INPUT_TOKEN_COUNT = "gen_ai.usage.input_tokens"
|
21
|
+
# not SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
|
22
|
+
OUTPUT_TOKEN_COUNT = "gen_ai.usage.output_tokens"
|
23
|
+
TOTAL_TOKEN_COUNT = SpanAttributes.LLM_USAGE_TOTAL_TOKENS
|
24
|
+
PROVIDER = SpanAttributes.LLM_SYSTEM
|
25
|
+
REQUEST_MODEL = SpanAttributes.LLM_REQUEST_MODEL
|
26
|
+
RESPONSE_MODEL = SpanAttributes.LLM_RESPONSE_MODEL
|
27
|
+
#
|
28
|
+
## == End of minimum set ==
|
29
|
+
# == Additional attributes ==
|
30
|
+
#
|
31
|
+
INPUT_COST = "gen_ai.usage.input_cost"
|
32
|
+
OUTPUT_COST = "gen_ai.usage.output_cost"
|
33
|
+
TOTAL_COST = "gen_ai.usage.cost"
|
34
|
+
#
|
35
|
+
# == End of additional attributes ==
|
@@ -1,9 +0,0 @@
|
|
1
|
-
SPAN_INPUT = "lmnr.span.input"
|
2
|
-
SPAN_OUTPUT = "lmnr.span.output"
|
3
|
-
SPAN_TYPE = "lmnr.span.type"
|
4
|
-
SPAN_PATH = "lmnr.span.path"
|
5
|
-
|
6
|
-
ASSOCIATION_PROPERTIES = "lmnr.association.properties"
|
7
|
-
SESSION_ID = "session_id"
|
8
|
-
USER_ID = "user_id"
|
9
|
-
TRACE_TYPE = "trace_type"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|