lmnr 0.4.23__tar.gz → 0.4.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lmnr-0.4.23 → lmnr-0.4.25}/PKG-INFO +3 -1
- {lmnr-0.4.23 → lmnr-0.4.25}/README.md +1 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/pyproject.toml +3 -2
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/__init__.py +1 -0
- lmnr-0.4.25/src/lmnr/sdk/datasets.py +58 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/evaluations.py +49 -59
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/laminar.py +51 -2
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/log.py +10 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/types.py +7 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/decorators/base.py +4 -6
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/attributes.py +12 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/LICENSE +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/cli.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/__init__.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/decorators.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/utils.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/.flake8 +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/.python-version +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/__init__.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/instruments.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/tracing.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
- {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lmnr
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.25
|
4
4
|
Summary: Python SDK for Laminar AI
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: lmnr.ai
|
@@ -16,6 +16,7 @@ Requires-Dist: argparse (>=1.0,<2.0)
|
|
16
16
|
Requires-Dist: backoff (>=2.0,<3.0)
|
17
17
|
Requires-Dist: deprecated (>=1.0,<2.0)
|
18
18
|
Requires-Dist: jinja2 (>=3.0,<4.0)
|
19
|
+
Requires-Dist: openai (>=1.52.0,<2.0.0)
|
19
20
|
Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
|
20
21
|
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.27.0,<2.0.0)
|
21
22
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http (>=1.27.0,<2.0.0)
|
@@ -246,6 +247,7 @@ You can run evaluations locally by providing executor (part of the logic used in
|
|
246
247
|
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
|
247
248
|
- `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
|
248
249
|
- `name` – optional name for the evaluation. Automatically generated if not provided.
|
250
|
+
- `group_id` – optional group name for the evaluation. Evaluations within the same group can be compared visually side-by-side
|
249
251
|
|
250
252
|
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
251
253
|
|
@@ -188,6 +188,7 @@ You can run evaluations locally by providing executor (part of the logic used in
|
|
188
188
|
- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
|
189
189
|
- `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
|
190
190
|
- `name` – optional name for the evaluation. Automatically generated if not provided.
|
191
|
+
- `group_id` – optional group name for the evaluation. Evaluations within the same group can be compared visually side-by-side
|
191
192
|
|
192
193
|
\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
|
193
194
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lmnr"
|
3
|
-
version = "0.4.
|
3
|
+
version = "0.4.25"
|
4
4
|
description = "Python SDK for Laminar AI"
|
5
5
|
authors = [
|
6
6
|
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
|
@@ -11,7 +11,7 @@ license = "Apache-2.0"
|
|
11
11
|
|
12
12
|
[tool.poetry]
|
13
13
|
name = "lmnr"
|
14
|
-
version = "0.4.
|
14
|
+
version = "0.4.25"
|
15
15
|
description = "Python SDK for Laminar AI"
|
16
16
|
authors = ["lmnr.ai"]
|
17
17
|
readme = "README.md"
|
@@ -62,6 +62,7 @@ opentelemetry-instrumentation-groq = ">=0.33.1"
|
|
62
62
|
tqdm = "~=4.0"
|
63
63
|
argparse = "~=1.0"
|
64
64
|
|
65
|
+
openai = "^1.52.0"
|
65
66
|
[tool.poetry.group.dev.dependencies]
|
66
67
|
autopep8 = "^2.2.0"
|
67
68
|
flake8 = "7.0.0"
|
@@ -0,0 +1,58 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
import logging
|
3
|
+
|
4
|
+
from .log import get_default_logger
|
5
|
+
from .laminar import Laminar as L
|
6
|
+
from .types import (
|
7
|
+
Datapoint,
|
8
|
+
)
|
9
|
+
|
10
|
+
DEFAULT_FETCH_SIZE = 25
|
11
|
+
|
12
|
+
|
13
|
+
class EvaluationDataset(ABC):
|
14
|
+
@abstractmethod
|
15
|
+
def __init__(self, *args, **kwargs):
|
16
|
+
pass
|
17
|
+
|
18
|
+
@abstractmethod
|
19
|
+
def __len__(self) -> int:
|
20
|
+
pass
|
21
|
+
|
22
|
+
@abstractmethod
|
23
|
+
def __getitem__(self, idx) -> Datapoint:
|
24
|
+
pass
|
25
|
+
|
26
|
+
def slice(self, start: int, end: int):
|
27
|
+
return [self[i] for i in range(max(start, 0), min(end, len(self)))]
|
28
|
+
|
29
|
+
|
30
|
+
class LaminarDataset(EvaluationDataset):
|
31
|
+
def __init__(self, name: str, fetch_size: int = DEFAULT_FETCH_SIZE):
|
32
|
+
self.name = name
|
33
|
+
self._len = None
|
34
|
+
self._fetched_items = []
|
35
|
+
self._offset = 0
|
36
|
+
self._fetch_size = fetch_size
|
37
|
+
self._logger = get_default_logger(self.__class__.__name__, level=logging.DEBUG)
|
38
|
+
|
39
|
+
def _fetch_batch(self):
|
40
|
+
self._logger.debug(
|
41
|
+
f"dataset {self.name}. Fetching batch from {self._offset} to "
|
42
|
+
+ f"{self._offset + self._fetch_size}"
|
43
|
+
)
|
44
|
+
resp = L.get_datapoints(self.name, self._offset, self._fetch_size)
|
45
|
+
self._fetched_items += resp.items
|
46
|
+
self._offset = len(self._fetched_items)
|
47
|
+
if self._len is None:
|
48
|
+
self._len = resp.totalCount
|
49
|
+
|
50
|
+
def __len__(self) -> int:
|
51
|
+
if self._len is None:
|
52
|
+
self._fetch_batch()
|
53
|
+
return self._len
|
54
|
+
|
55
|
+
def __getitem__(self, idx) -> Datapoint:
|
56
|
+
if idx >= len(self._fetched_items):
|
57
|
+
self._fetch_batch()
|
58
|
+
return self._fetched_items[idx]
|
@@ -1,17 +1,18 @@
|
|
1
1
|
import asyncio
|
2
2
|
import re
|
3
3
|
import sys
|
4
|
-
from abc import ABC, abstractmethod
|
5
|
-
from contextlib import contextmanager
|
6
|
-
from typing import Any, Awaitable, Optional, Set, Union
|
7
4
|
import uuid
|
8
5
|
|
6
|
+
from contextlib import contextmanager
|
9
7
|
from tqdm import tqdm
|
8
|
+
from typing import Any, Awaitable, Optional, Set, Union
|
10
9
|
|
11
10
|
from ..traceloop_sdk.instruments import Instruments
|
12
11
|
from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
|
13
12
|
|
13
|
+
from .datasets import EvaluationDataset
|
14
14
|
from .laminar import Laminar as L
|
15
|
+
from .log import get_default_logger
|
15
16
|
from .types import (
|
16
17
|
Datapoint,
|
17
18
|
EvaluationResultDatapoint,
|
@@ -84,7 +85,7 @@ class EvaluationReporter:
|
|
84
85
|
):
|
85
86
|
self.cli_progress.close()
|
86
87
|
print(
|
87
|
-
f"\nCheck
|
88
|
+
f"\nCheck the results at {get_evaluation_url(project_id, evaluation_id)}\n"
|
88
89
|
)
|
89
90
|
print("Average scores:")
|
90
91
|
for name, score in average_scores.items():
|
@@ -92,31 +93,14 @@ class EvaluationReporter:
|
|
92
93
|
print("\n")
|
93
94
|
|
94
95
|
|
95
|
-
class EvaluationDataset(ABC):
|
96
|
-
@abstractmethod
|
97
|
-
def __init__(self, *args, **kwargs):
|
98
|
-
pass
|
99
|
-
|
100
|
-
@abstractmethod
|
101
|
-
def __len__(self) -> int:
|
102
|
-
pass
|
103
|
-
|
104
|
-
@abstractmethod
|
105
|
-
def __getitem__(self, idx) -> Datapoint:
|
106
|
-
pass
|
107
|
-
|
108
|
-
def slice(self, start: int, end: int):
|
109
|
-
return [self[i] for i in range(max(start, 0), min(end, len(self)))]
|
110
|
-
|
111
|
-
|
112
96
|
class Evaluation:
|
113
97
|
def __init__(
|
114
98
|
self,
|
115
99
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
116
100
|
executor: Any,
|
117
101
|
evaluators: dict[str, EvaluatorFunction],
|
118
|
-
group_id: Optional[str] = None,
|
119
102
|
name: Optional[str] = None,
|
103
|
+
group_id: Optional[str] = None,
|
120
104
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
121
105
|
project_api_key: Optional[str] = None,
|
122
106
|
base_url: Optional[str] = None,
|
@@ -135,33 +119,40 @@ class Evaluation:
|
|
135
119
|
executor (Callable[..., Any]): The executor function.\
|
136
120
|
Takes the data point + any additional arguments\
|
137
121
|
and returns the output to evaluate.
|
138
|
-
evaluators (
|
139
|
-
Each evaluator function takes the output of the executor
|
140
|
-
the target data, and returns a score. The score can be a\
|
141
|
-
single number or a
|
122
|
+
evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
|
123
|
+
names. Each evaluator function takes the output of the executor\
|
124
|
+
_and_ the target data, and returns a score. The score can be a\
|
125
|
+
single number or a dict of string keys and number values.\
|
142
126
|
If the score is a single number, it will be named after the\
|
143
|
-
evaluator function.
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
127
|
+
evaluator function. Evaluator function names must contain only\
|
128
|
+
letters, digits, hyphens, underscores, or spaces.
|
129
|
+
name (Optional[str], optional): Optional name of the evaluation.\
|
130
|
+
Used to identify the evaluation in the group.\
|
131
|
+
If not provided, a random name will be generated.
|
132
|
+
Defaults to None.
|
133
|
+
group_id (Optional[str], optional): an identifier to group\
|
134
|
+
evaluations. Only evaluations within the same group_id can be\
|
135
|
+
visually compared. If not provided, "default" is assigned.
|
136
|
+
Defaults to None
|
137
|
+
batch_size (int, optional): The batch size for evaluation. This many\
|
138
|
+
data points will be evaluated in parallel.
|
139
|
+
Defaults to DEFAULT_BATCH_SIZE.
|
140
|
+
project_api_key (Optional[str], optional): The project API key.\
|
141
|
+
If not provided, LMNR_PROJECT_API_KEY environment variable is\
|
142
|
+
used.
|
143
|
+
Defaults to an empty string.
|
154
144
|
base_url (Optional[str], optional): The base URL for Laminar API.\
|
155
|
-
|
156
|
-
|
157
|
-
|
145
|
+
Useful if self-hosted. Do NOT include the port, use `http_port`\
|
146
|
+
and `grpc_port` instead.
|
147
|
+
Defaults to "https://api.lmnr.ai".
|
158
148
|
http_port (Optional[int], optional): The port for Laminar API\
|
159
|
-
|
149
|
+
HTTP service. Defaults to 443 if not specified.
|
160
150
|
grpc_port (Optional[int], optional): The port for Laminar API\
|
161
|
-
|
151
|
+
gRPC service. Defaults to 8443 if not specified.
|
162
152
|
instruments (Optional[Set[Instruments]], optional): Set of modules\
|
163
153
|
to auto-instrument. If None, all available instruments will be\
|
164
154
|
used.
|
155
|
+
See https://docs.lmnr.ai/tracing/automatic-instrumentation
|
165
156
|
Defaults to None.
|
166
157
|
"""
|
167
158
|
|
@@ -191,6 +182,7 @@ class Evaluation:
|
|
191
182
|
self.group_id = group_id
|
192
183
|
self.name = name
|
193
184
|
self.batch_size = batch_size
|
185
|
+
self._logger = get_default_logger(self.__class__.__name__)
|
194
186
|
L.initialize(
|
195
187
|
project_api_key=project_api_key,
|
196
188
|
base_url=base_url,
|
@@ -215,7 +207,7 @@ class Evaluation:
|
|
215
207
|
)
|
216
208
|
|
217
209
|
try:
|
218
|
-
result_datapoints = await self.
|
210
|
+
result_datapoints = await self._evaluate_in_batches()
|
219
211
|
except Exception as e:
|
220
212
|
self.reporter.stopWithError(e)
|
221
213
|
self.is_finished = True
|
@@ -228,7 +220,7 @@ class Evaluation:
|
|
228
220
|
self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
|
229
221
|
self.is_finished = True
|
230
222
|
|
231
|
-
async def
|
223
|
+
async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
|
232
224
|
result_datapoints = []
|
233
225
|
for i in range(0, len(self.data), self.batch_size):
|
234
226
|
batch = (
|
@@ -300,8 +292,8 @@ def evaluate(
|
|
300
292
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
301
293
|
executor: ExecutorFunction,
|
302
294
|
evaluators: dict[str, EvaluatorFunction],
|
303
|
-
group_id: Optional[str] = None,
|
304
295
|
name: Optional[str] = None,
|
296
|
+
group_id: Optional[str] = None,
|
305
297
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
306
298
|
project_api_key: Optional[str] = None,
|
307
299
|
base_url: Optional[str] = None,
|
@@ -326,24 +318,22 @@ def evaluate(
|
|
326
318
|
executor (Callable[..., Any]): The executor function.\
|
327
319
|
Takes the data point + any additional arguments\
|
328
320
|
and returns the output to evaluate.
|
329
|
-
evaluators (List[Callable[..., Any]]):
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
group_id (Optional[str], optional): an identifier to group evaluations.\
|
338
|
-
It is practical to group evaluations that evaluate\
|
339
|
-
the same feature on the same dataset, to be able to\
|
340
|
-
view their comparisons in the same place. If not\
|
341
|
-
provided, defaults to "default".
|
342
|
-
Defaults to None
|
321
|
+
evaluators (List[Callable[..., Any]]):
|
322
|
+
evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
|
323
|
+
names. Each evaluator function takes the output of the executor\
|
324
|
+
_and_ the target data, and returns a score. The score can be a\
|
325
|
+
single number or a dict of string keys and number values.\
|
326
|
+
If the score is a single number, it will be named after the\
|
327
|
+
evaluator function. Evaluator function names must contain only\
|
328
|
+
letters, digits, hyphens, underscores, or spaces.
|
343
329
|
name (Optional[str], optional): Optional name of the evaluation.\
|
344
330
|
Used to identify the evaluation in the group.\
|
345
331
|
If not provided, a random name will be generated.
|
346
332
|
Defaults to None.
|
333
|
+
group_id (Optional[str], optional): an identifier to group evaluations.\
|
334
|
+
Only evaluations within the same group_id can be\
|
335
|
+
visually compared. If not provided, set to "default".
|
336
|
+
Defaults to None
|
347
337
|
batch_size (int, optional): The batch size for evaluation.
|
348
338
|
Defaults to DEFAULT_BATCH_SIZE.
|
349
339
|
project_api_key (Optional[str], optional): The project API key.
|
@@ -24,6 +24,7 @@ import json
|
|
24
24
|
import logging
|
25
25
|
import os
|
26
26
|
import requests
|
27
|
+
import urllib.parse
|
27
28
|
import uuid
|
28
29
|
|
29
30
|
from lmnr.traceloop_sdk.tracing.attributes import (
|
@@ -45,6 +46,7 @@ from .log import VerboseColorfulFormatter
|
|
45
46
|
from .types import (
|
46
47
|
CreateEvaluationResponse,
|
47
48
|
EvaluationResultDatapoint,
|
49
|
+
GetDatapointsResponse,
|
48
50
|
PipelineRunError,
|
49
51
|
PipelineRunResponse,
|
50
52
|
NodeInput,
|
@@ -284,7 +286,9 @@ class Laminar:
|
|
284
286
|
span_type: Union[Literal["DEFAULT"], Literal["LLM"]] = "DEFAULT",
|
285
287
|
):
|
286
288
|
"""Start a new span as the current span. Useful for manual
|
287
|
-
instrumentation.
|
289
|
+
instrumentation. If `span_type` is set to `"LLM"`, you should report
|
290
|
+
usage and response attributes manually. See `Laminar.set_span_attributes`
|
291
|
+
for more information.
|
288
292
|
|
289
293
|
Usage example:
|
290
294
|
```python
|
@@ -297,6 +301,9 @@ class Laminar:
|
|
297
301
|
name (str): name of the span
|
298
302
|
input (Any, optional): input to the span. Will be sent as an\
|
299
303
|
attribute, so must be json serializable. Defaults to None.
|
304
|
+
span_type (Union[Literal["DEFAULT"], Literal["LLM"]], optional):\
|
305
|
+
type of the span. If you use `"LLM"`, you should report usage\
|
306
|
+
and response attributes manually. Defaults to "DEFAULT".
|
300
307
|
"""
|
301
308
|
with get_tracer() as tracer:
|
302
309
|
span_path = get_span_path(name)
|
@@ -341,6 +348,22 @@ class Laminar:
|
|
341
348
|
):
|
342
349
|
"""Set attributes for the current span. Useful for manual
|
343
350
|
instrumentation.
|
351
|
+
Example:
|
352
|
+
```python
|
353
|
+
with L.start_as_current_span(
|
354
|
+
name="my_span_name", input=input["messages"], span_type="LLM"
|
355
|
+
):
|
356
|
+
response = await my_custom_call_to_openai(input)
|
357
|
+
L.set_span_output(response["choices"][0]["message"]["content"])
|
358
|
+
L.set_span_attributes({
|
359
|
+
Attributes.PROVIDER: 'openai',
|
360
|
+
Attributes.REQUEST_MODEL: input["model"],
|
361
|
+
Attributes.RESPONSE_MODEL: response["model"],
|
362
|
+
Attributes.INPUT_TOKEN_COUNT: response["usage"]["prompt_tokens"],
|
363
|
+
Attributes.OUTPUT_TOKEN_COUNT: response["usage"]["completion_tokens"],
|
364
|
+
})
|
365
|
+
# ...
|
366
|
+
```
|
344
367
|
|
345
368
|
Args:
|
346
369
|
attributes (dict[ATTRIBUTES, Any]): attributes to set for the span
|
@@ -433,10 +456,36 @@ class Laminar:
|
|
433
456
|
try:
|
434
457
|
resp_json = response.json()
|
435
458
|
raise ValueError(f"Error creating evaluation {json.dumps(resp_json)}")
|
436
|
-
except
|
459
|
+
except requests.exceptions.RequestException:
|
437
460
|
raise ValueError(f"Error creating evaluation {response.text}")
|
438
461
|
return CreateEvaluationResponse.model_validate(response.json())
|
439
462
|
|
463
|
+
@classmethod
|
464
|
+
def get_datapoints(
|
465
|
+
cls,
|
466
|
+
dataset_name: str,
|
467
|
+
offset: int,
|
468
|
+
limit: int,
|
469
|
+
) -> GetDatapointsResponse:
|
470
|
+
params = {"name": dataset_name, "offset": offset, "limit": limit}
|
471
|
+
url = (
|
472
|
+
cls.__base_http_url
|
473
|
+
+ "/v1/datasets/datapoints?"
|
474
|
+
+ urllib.parse.urlencode(params)
|
475
|
+
)
|
476
|
+
response = requests.get(url, headers=cls._headers())
|
477
|
+
if response.status_code != 200:
|
478
|
+
try:
|
479
|
+
resp_json = response.json()
|
480
|
+
raise ValueError(
|
481
|
+
f"Error fetching datapoints: [{response.status_code}] {json.dumps(resp_json)}"
|
482
|
+
)
|
483
|
+
except requests.exceptions.RequestException:
|
484
|
+
raise ValueError(
|
485
|
+
f"Error fetching datapoints: [{response.status_code}] {response.text}"
|
486
|
+
)
|
487
|
+
return GetDatapointsResponse.model_validate(response.json())
|
488
|
+
|
440
489
|
@classmethod
|
441
490
|
def _headers(cls):
|
442
491
|
assert cls.__project_api_key is not None, "Project API key is not set"
|
@@ -37,3 +37,13 @@ class VerboseFormatter(CustomFormatter):
|
|
37
37
|
def format(self, record):
|
38
38
|
formatter = logging.Formatter(self.fmt)
|
39
39
|
return formatter.format(record)
|
40
|
+
|
41
|
+
|
42
|
+
def get_default_logger(name: str, level: int = logging.INFO, propagate: bool = False):
|
43
|
+
logger = logging.getLogger(name)
|
44
|
+
logger.setLevel(level)
|
45
|
+
console_log_handler = logging.StreamHandler()
|
46
|
+
console_log_handler.setFormatter(VerboseColorfulFormatter())
|
47
|
+
logger.addHandler(console_log_handler)
|
48
|
+
logger.propagate = propagate
|
49
|
+
return logger
|
@@ -79,6 +79,7 @@ class PipelineRunError(Exception):
|
|
79
79
|
|
80
80
|
EvaluationDatapointData = dict[str, Any]
|
81
81
|
EvaluationDatapointTarget = dict[str, Any]
|
82
|
+
EvaluationDatapointMetadata = Optional[dict[str, Any]]
|
82
83
|
|
83
84
|
|
84
85
|
# EvaluationDatapoint is a single data point in the evaluation
|
@@ -88,6 +89,7 @@ class Datapoint(pydantic.BaseModel):
|
|
88
89
|
# input to the evaluator function (alongside the executor output).
|
89
90
|
# Must be a dict with string keys
|
90
91
|
target: EvaluationDatapointTarget
|
92
|
+
metadata: EvaluationDatapointMetadata = pydantic.Field(default=None)
|
91
93
|
|
92
94
|
|
93
95
|
ExecutorFunctionReturnType = Any
|
@@ -153,3 +155,8 @@ class TraceType(Enum):
|
|
153
155
|
DEFAULT = "DEFAULT"
|
154
156
|
EVENT = "EVENT" # must not be set manually
|
155
157
|
EVALUATION = "EVALUATION"
|
158
|
+
|
159
|
+
|
160
|
+
class GetDatapointsResponse(pydantic.BaseModel):
|
161
|
+
items: list[Datapoint]
|
162
|
+
totalCount: int
|
@@ -1,10 +1,10 @@
|
|
1
1
|
import json
|
2
2
|
from functools import wraps
|
3
|
+
import logging
|
3
4
|
import os
|
4
5
|
import pydantic
|
5
6
|
import types
|
6
7
|
from typing import Any, Optional
|
7
|
-
import warnings
|
8
8
|
|
9
9
|
from opentelemetry import trace
|
10
10
|
from opentelemetry import context as context_api
|
@@ -28,12 +28,10 @@ class CustomJSONEncoder(JSONEncoder):
|
|
28
28
|
|
29
29
|
def json_dumps(data: dict) -> str:
|
30
30
|
try:
|
31
|
-
|
32
|
-
warnings.simplefilter("ignore", RuntimeWarning)
|
33
|
-
return json.dumps(data, cls=CustomJSONEncoder)
|
31
|
+
return json.dumps(data, cls=CustomJSONEncoder)
|
34
32
|
except Exception:
|
35
33
|
# Log the exception and return a placeholder if serialization completely fails
|
36
|
-
|
34
|
+
logging.warning("Failed to serialize data to JSON, type: %s", type(data))
|
37
35
|
return "{}" # Return an empty JSON object as a fallback
|
38
36
|
|
39
37
|
|
@@ -141,7 +139,7 @@ def aentity_method(
|
|
141
139
|
|
142
140
|
try:
|
143
141
|
if _should_send_prompts():
|
144
|
-
span.set_attribute(SPAN_OUTPUT,
|
142
|
+
span.set_attribute(SPAN_OUTPUT, json_dumps(res))
|
145
143
|
except TypeError:
|
146
144
|
pass
|
147
145
|
|
@@ -14,10 +14,22 @@ TRACE_TYPE = "trace_type"
|
|
14
14
|
|
15
15
|
# exposed to the user, configurable
|
16
16
|
class Attributes(Enum):
|
17
|
+
# == This is the minimum set of attributes for a proper LLM span ==
|
18
|
+
#
|
17
19
|
# not SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
|
18
20
|
INPUT_TOKEN_COUNT = "gen_ai.usage.input_tokens"
|
19
21
|
# not SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
|
20
22
|
OUTPUT_TOKEN_COUNT = "gen_ai.usage.output_tokens"
|
23
|
+
TOTAL_TOKEN_COUNT = SpanAttributes.LLM_USAGE_TOTAL_TOKENS
|
21
24
|
PROVIDER = SpanAttributes.LLM_SYSTEM
|
22
25
|
REQUEST_MODEL = SpanAttributes.LLM_REQUEST_MODEL
|
23
26
|
RESPONSE_MODEL = SpanAttributes.LLM_RESPONSE_MODEL
|
27
|
+
#
|
28
|
+
## == End of minimum set ==
|
29
|
+
# == Additional attributes ==
|
30
|
+
#
|
31
|
+
INPUT_COST = "gen_ai.usage.input_cost"
|
32
|
+
OUTPUT_COST = "gen_ai.usage.output_cost"
|
33
|
+
TOTAL_COST = "gen_ai.usage.cost"
|
34
|
+
#
|
35
|
+
# == End of additional attributes ==
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|