lmnr 0.4.23__tar.gz → 0.4.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {lmnr-0.4.23 → lmnr-0.4.25}/PKG-INFO +3 -1
  2. {lmnr-0.4.23 → lmnr-0.4.25}/README.md +1 -0
  3. {lmnr-0.4.23 → lmnr-0.4.25}/pyproject.toml +3 -2
  4. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/__init__.py +1 -0
  5. lmnr-0.4.25/src/lmnr/sdk/datasets.py +58 -0
  6. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/evaluations.py +49 -59
  7. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/laminar.py +51 -2
  8. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/log.py +10 -0
  9. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/types.py +7 -0
  10. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/decorators/base.py +4 -6
  11. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/attributes.py +12 -0
  12. {lmnr-0.4.23 → lmnr-0.4.25}/LICENSE +0 -0
  13. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/cli.py +0 -0
  14. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/__init__.py +0 -0
  15. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/decorators.py +0 -0
  16. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/sdk/utils.py +0 -0
  17. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/.flake8 +0 -0
  18. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/.python-version +0 -0
  19. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/__init__.py +0 -0
  20. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
  21. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
  22. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/instruments.py +0 -0
  23. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
  24. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
  25. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
  26. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
  27. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
  28. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
  29. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
  30. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
  31. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
  32. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
  33. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
  34. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
  35. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
  36. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
  37. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
  38. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
  39. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
  40. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
  41. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
  42. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
  43. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
  44. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
  45. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
  46. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/tracing/tracing.py +0 -0
  47. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
  48. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
  49. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
  50. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
  51. {lmnr-0.4.23 → lmnr-0.4.25}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.23
3
+ Version: 0.4.25
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -16,6 +16,7 @@ Requires-Dist: argparse (>=1.0,<2.0)
16
16
  Requires-Dist: backoff (>=2.0,<3.0)
17
17
  Requires-Dist: deprecated (>=1.0,<2.0)
18
18
  Requires-Dist: jinja2 (>=3.0,<4.0)
19
+ Requires-Dist: openai (>=1.52.0,<2.0.0)
19
20
  Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
20
21
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc (>=1.27.0,<2.0.0)
21
22
  Requires-Dist: opentelemetry-exporter-otlp-proto-http (>=1.27.0,<2.0.0)
@@ -246,6 +247,7 @@ You can run evaluations locally by providing executor (part of the logic used in
246
247
  - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
247
248
  - `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
248
249
  - `name` – optional name for the evaluation. Automatically generated if not provided.
250
+ - `group_id` – optional group name for the evaluation. Evaluations within the same group can be compared visually side-by-side
249
251
 
250
252
  \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
251
253
 
@@ -188,6 +188,7 @@ You can run evaluations locally by providing executor (part of the logic used in
188
188
  - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. It can be both a function or an `async` function.
189
189
  - `evaluators` – Dictionary which maps evaluator names to evaluators. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores. Each evaluator can be both a function or an `async` function.
190
190
  - `name` – optional name for the evaluation. Automatically generated if not provided.
191
+ - `group_id` – optional group name for the evaluation. Evaluations within the same group can be compared visually side-by-side
191
192
 
192
193
  \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
193
194
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lmnr"
3
- version = "0.4.23"
3
+ version = "0.4.25"
4
4
  description = "Python SDK for Laminar AI"
5
5
  authors = [
6
6
  { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
11
11
 
12
12
  [tool.poetry]
13
13
  name = "lmnr"
14
- version = "0.4.23"
14
+ version = "0.4.25"
15
15
  description = "Python SDK for Laminar AI"
16
16
  authors = ["lmnr.ai"]
17
17
  readme = "README.md"
@@ -62,6 +62,7 @@ opentelemetry-instrumentation-groq = ">=0.33.1"
62
62
  tqdm = "~=4.0"
63
63
  argparse = "~=1.0"
64
64
 
65
+ openai = "^1.52.0"
65
66
  [tool.poetry.group.dev.dependencies]
66
67
  autopep8 = "^2.2.0"
67
68
  flake8 = "7.0.0"
@@ -1,3 +1,4 @@
1
+ from .sdk.datasets import EvaluationDataset, LaminarDataset
1
2
  from .sdk.evaluations import evaluate
2
3
  from .sdk.laminar import Laminar
3
4
  from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
@@ -0,0 +1,58 @@
1
+ from abc import ABC, abstractmethod
2
+ import logging
3
+
4
+ from .log import get_default_logger
5
+ from .laminar import Laminar as L
6
+ from .types import (
7
+ Datapoint,
8
+ )
9
+
10
+ DEFAULT_FETCH_SIZE = 25
11
+
12
+
13
+ class EvaluationDataset(ABC):
14
+ @abstractmethod
15
+ def __init__(self, *args, **kwargs):
16
+ pass
17
+
18
+ @abstractmethod
19
+ def __len__(self) -> int:
20
+ pass
21
+
22
+ @abstractmethod
23
+ def __getitem__(self, idx) -> Datapoint:
24
+ pass
25
+
26
+ def slice(self, start: int, end: int):
27
+ return [self[i] for i in range(max(start, 0), min(end, len(self)))]
28
+
29
+
30
+ class LaminarDataset(EvaluationDataset):
31
+ def __init__(self, name: str, fetch_size: int = DEFAULT_FETCH_SIZE):
32
+ self.name = name
33
+ self._len = None
34
+ self._fetched_items = []
35
+ self._offset = 0
36
+ self._fetch_size = fetch_size
37
+ self._logger = get_default_logger(self.__class__.__name__, level=logging.DEBUG)
38
+
39
+ def _fetch_batch(self):
40
+ self._logger.debug(
41
+ f"dataset {self.name}. Fetching batch from {self._offset} to "
42
+ + f"{self._offset + self._fetch_size}"
43
+ )
44
+ resp = L.get_datapoints(self.name, self._offset, self._fetch_size)
45
+ self._fetched_items += resp.items
46
+ self._offset = len(self._fetched_items)
47
+ if self._len is None:
48
+ self._len = resp.totalCount
49
+
50
+ def __len__(self) -> int:
51
+ if self._len is None:
52
+ self._fetch_batch()
53
+ return self._len
54
+
55
+ def __getitem__(self, idx) -> Datapoint:
56
+ if idx >= len(self._fetched_items):
57
+ self._fetch_batch()
58
+ return self._fetched_items[idx]
@@ -1,17 +1,18 @@
1
1
  import asyncio
2
2
  import re
3
3
  import sys
4
- from abc import ABC, abstractmethod
5
- from contextlib import contextmanager
6
- from typing import Any, Awaitable, Optional, Set, Union
7
4
  import uuid
8
5
 
6
+ from contextlib import contextmanager
9
7
  from tqdm import tqdm
8
+ from typing import Any, Awaitable, Optional, Set, Union
10
9
 
11
10
  from ..traceloop_sdk.instruments import Instruments
12
11
  from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
13
12
 
13
+ from .datasets import EvaluationDataset
14
14
  from .laminar import Laminar as L
15
+ from .log import get_default_logger
15
16
  from .types import (
16
17
  Datapoint,
17
18
  EvaluationResultDatapoint,
@@ -84,7 +85,7 @@ class EvaluationReporter:
84
85
  ):
85
86
  self.cli_progress.close()
86
87
  print(
87
- f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n"
88
+ f"\nCheck the results at {get_evaluation_url(project_id, evaluation_id)}\n"
88
89
  )
89
90
  print("Average scores:")
90
91
  for name, score in average_scores.items():
@@ -92,31 +93,14 @@ class EvaluationReporter:
92
93
  print("\n")
93
94
 
94
95
 
95
- class EvaluationDataset(ABC):
96
- @abstractmethod
97
- def __init__(self, *args, **kwargs):
98
- pass
99
-
100
- @abstractmethod
101
- def __len__(self) -> int:
102
- pass
103
-
104
- @abstractmethod
105
- def __getitem__(self, idx) -> Datapoint:
106
- pass
107
-
108
- def slice(self, start: int, end: int):
109
- return [self[i] for i in range(max(start, 0), min(end, len(self)))]
110
-
111
-
112
96
  class Evaluation:
113
97
  def __init__(
114
98
  self,
115
99
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
116
100
  executor: Any,
117
101
  evaluators: dict[str, EvaluatorFunction],
118
- group_id: Optional[str] = None,
119
102
  name: Optional[str] = None,
103
+ group_id: Optional[str] = None,
120
104
  batch_size: int = DEFAULT_BATCH_SIZE,
121
105
  project_api_key: Optional[str] = None,
122
106
  base_url: Optional[str] = None,
@@ -135,33 +119,40 @@ class Evaluation:
135
119
  executor (Callable[..., Any]): The executor function.\
136
120
  Takes the data point + any additional arguments\
137
121
  and returns the output to evaluate.
138
- evaluators (List[Callable[..., Any]]): List of evaluator functions.\
139
- Each evaluator function takes the output of the executor _and_\
140
- the target data, and returns a score. The score can be a\
141
- single number or a record of string keys and number values.\
122
+ evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
123
+ names. Each evaluator function takes the output of the executor\
124
+ _and_ the target data, and returns a score. The score can be a\
125
+ single number or a dict of string keys and number values.\
142
126
  If the score is a single number, it will be named after the\
143
- evaluator function. If the function is anonymous, it will be\
144
- named `evaluator_${index}`, where index is the index of the\
145
- evaluator function in the list starting from 1.
146
- group_id (Optional[str], optional): Group id of the evaluation.
147
- Defaults to "default".
148
- name (Optional[str], optional): The name of the evaluation.\
149
- It will be auto-generated if not provided.
150
- batch_size (int, optional): The batch size for evaluation.
151
- Defaults to DEFAULT_BATCH_SIZE.
152
- project_api_key (Optional[str], optional): The project API key.
153
- Defaults to an empty string.
127
+ evaluator function. Evaluator function names must contain only\
128
+ letters, digits, hyphens, underscores, or spaces.
129
+ name (Optional[str], optional): Optional name of the evaluation.\
130
+ Used to identify the evaluation in the group.\
131
+ If not provided, a random name will be generated.
132
+ Defaults to None.
133
+ group_id (Optional[str], optional): an identifier to group\
134
+ evaluations. Only evaluations within the same group_id can be\
135
+ visually compared. If not provided, "default" is assigned.
136
+ Defaults to None
137
+ batch_size (int, optional): The batch size for evaluation. This many\
138
+ data points will be evaluated in parallel.
139
+ Defaults to DEFAULT_BATCH_SIZE.
140
+ project_api_key (Optional[str], optional): The project API key.\
141
+ If not provided, LMNR_PROJECT_API_KEY environment variable is\
142
+ used.
143
+ Defaults to an empty string.
154
144
  base_url (Optional[str], optional): The base URL for Laminar API.\
155
- Useful if self-hosted elsewhere. Do NOT include the\
156
- port, use `http_port` and `grpc_port` instead.
157
- Defaults to "https://api.lmnr.ai".
145
+ Useful if self-hosted. Do NOT include the port, use `http_port`\
146
+ and `grpc_port` instead.
147
+ Defaults to "https://api.lmnr.ai".
158
148
  http_port (Optional[int], optional): The port for Laminar API\
159
- HTTP service. Defaults to 443 if not specified.
149
+ HTTP service. Defaults to 443 if not specified.
160
150
  grpc_port (Optional[int], optional): The port for Laminar API\
161
- gRPC service. Defaults to 8443 if not specified.
151
+ gRPC service. Defaults to 8443 if not specified.
162
152
  instruments (Optional[Set[Instruments]], optional): Set of modules\
163
153
  to auto-instrument. If None, all available instruments will be\
164
154
  used.
155
+ See https://docs.lmnr.ai/tracing/automatic-instrumentation
165
156
  Defaults to None.
166
157
  """
167
158
 
@@ -191,6 +182,7 @@ class Evaluation:
191
182
  self.group_id = group_id
192
183
  self.name = name
193
184
  self.batch_size = batch_size
185
+ self._logger = get_default_logger(self.__class__.__name__)
194
186
  L.initialize(
195
187
  project_api_key=project_api_key,
196
188
  base_url=base_url,
@@ -215,7 +207,7 @@ class Evaluation:
215
207
  )
216
208
 
217
209
  try:
218
- result_datapoints = await self.evaluate_in_batches()
210
+ result_datapoints = await self._evaluate_in_batches()
219
211
  except Exception as e:
220
212
  self.reporter.stopWithError(e)
221
213
  self.is_finished = True
@@ -228,7 +220,7 @@ class Evaluation:
228
220
  self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
229
221
  self.is_finished = True
230
222
 
231
- async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
223
+ async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
232
224
  result_datapoints = []
233
225
  for i in range(0, len(self.data), self.batch_size):
234
226
  batch = (
@@ -300,8 +292,8 @@ def evaluate(
300
292
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
301
293
  executor: ExecutorFunction,
302
294
  evaluators: dict[str, EvaluatorFunction],
303
- group_id: Optional[str] = None,
304
295
  name: Optional[str] = None,
296
+ group_id: Optional[str] = None,
305
297
  batch_size: int = DEFAULT_BATCH_SIZE,
306
298
  project_api_key: Optional[str] = None,
307
299
  base_url: Optional[str] = None,
@@ -326,24 +318,22 @@ def evaluate(
326
318
  executor (Callable[..., Any]): The executor function.\
327
319
  Takes the data point + any additional arguments\
328
320
  and returns the output to evaluate.
329
- evaluators (List[Callable[..., Any]]): List of evaluator functions.\
330
- Each evaluator function takes the output of the executor _and_\
331
- the target data, and returns a score. The score can be a\
332
- single number or a record of string keys and number values.\
333
- If the score is a single number, it will be named after the\
334
- evaluator function. If the function is anonymous, it will be\
335
- named `evaluator_${index}`, where index is the index of the\
336
- evaluator function in the list starting from 1.
337
- group_id (Optional[str], optional): an identifier to group evaluations.\
338
- It is practical to group evaluations that evaluate\
339
- the same feature on the same dataset, to be able to\
340
- view their comparisons in the same place. If not\
341
- provided, defaults to "default".
342
- Defaults to None
321
+ evaluators (List[Callable[..., Any]]):
322
+ evaluators (dict[str, Callable[..., Any]]): Evaluator functions and\
323
+ names. Each evaluator function takes the output of the executor\
324
+ _and_ the target data, and returns a score. The score can be a\
325
+ single number or a dict of string keys and number values.\
326
+ If the score is a single number, it will be named after the\
327
+ evaluator function. Evaluator function names must contain only\
328
+ letters, digits, hyphens, underscores, or spaces.
343
329
  name (Optional[str], optional): Optional name of the evaluation.\
344
330
  Used to identify the evaluation in the group.\
345
331
  If not provided, a random name will be generated.
346
332
  Defaults to None.
333
+ group_id (Optional[str], optional): an identifier to group evaluations.\
334
+ Only evaluations within the same group_id can be\
335
+ visually compared. If not provided, set to "default".
336
+ Defaults to None
347
337
  batch_size (int, optional): The batch size for evaluation.
348
338
  Defaults to DEFAULT_BATCH_SIZE.
349
339
  project_api_key (Optional[str], optional): The project API key.
@@ -24,6 +24,7 @@ import json
24
24
  import logging
25
25
  import os
26
26
  import requests
27
+ import urllib.parse
27
28
  import uuid
28
29
 
29
30
  from lmnr.traceloop_sdk.tracing.attributes import (
@@ -45,6 +46,7 @@ from .log import VerboseColorfulFormatter
45
46
  from .types import (
46
47
  CreateEvaluationResponse,
47
48
  EvaluationResultDatapoint,
49
+ GetDatapointsResponse,
48
50
  PipelineRunError,
49
51
  PipelineRunResponse,
50
52
  NodeInput,
@@ -284,7 +286,9 @@ class Laminar:
284
286
  span_type: Union[Literal["DEFAULT"], Literal["LLM"]] = "DEFAULT",
285
287
  ):
286
288
  """Start a new span as the current span. Useful for manual
287
- instrumentation.
289
+ instrumentation. If `span_type` is set to `"LLM"`, you should report
290
+ usage and response attributes manually. See `Laminar.set_span_attributes`
291
+ for more information.
288
292
 
289
293
  Usage example:
290
294
  ```python
@@ -297,6 +301,9 @@ class Laminar:
297
301
  name (str): name of the span
298
302
  input (Any, optional): input to the span. Will be sent as an\
299
303
  attribute, so must be json serializable. Defaults to None.
304
+ span_type (Union[Literal["DEFAULT"], Literal["LLM"]], optional):\
305
+ type of the span. If you use `"LLM"`, you should report usage\
306
+ and response attributes manually. Defaults to "DEFAULT".
300
307
  """
301
308
  with get_tracer() as tracer:
302
309
  span_path = get_span_path(name)
@@ -341,6 +348,22 @@ class Laminar:
341
348
  ):
342
349
  """Set attributes for the current span. Useful for manual
343
350
  instrumentation.
351
+ Example:
352
+ ```python
353
+ with L.start_as_current_span(
354
+ name="my_span_name", input=input["messages"], span_type="LLM"
355
+ ):
356
+ response = await my_custom_call_to_openai(input)
357
+ L.set_span_output(response["choices"][0]["message"]["content"])
358
+ L.set_span_attributes({
359
+ Attributes.PROVIDER: 'openai',
360
+ Attributes.REQUEST_MODEL: input["model"],
361
+ Attributes.RESPONSE_MODEL: response["model"],
362
+ Attributes.INPUT_TOKEN_COUNT: response["usage"]["prompt_tokens"],
363
+ Attributes.OUTPUT_TOKEN_COUNT: response["usage"]["completion_tokens"],
364
+ })
365
+ # ...
366
+ ```
344
367
 
345
368
  Args:
346
369
  attributes (dict[ATTRIBUTES, Any]): attributes to set for the span
@@ -433,10 +456,36 @@ class Laminar:
433
456
  try:
434
457
  resp_json = response.json()
435
458
  raise ValueError(f"Error creating evaluation {json.dumps(resp_json)}")
436
- except Exception:
459
+ except requests.exceptions.RequestException:
437
460
  raise ValueError(f"Error creating evaluation {response.text}")
438
461
  return CreateEvaluationResponse.model_validate(response.json())
439
462
 
463
+ @classmethod
464
+ def get_datapoints(
465
+ cls,
466
+ dataset_name: str,
467
+ offset: int,
468
+ limit: int,
469
+ ) -> GetDatapointsResponse:
470
+ params = {"name": dataset_name, "offset": offset, "limit": limit}
471
+ url = (
472
+ cls.__base_http_url
473
+ + "/v1/datasets/datapoints?"
474
+ + urllib.parse.urlencode(params)
475
+ )
476
+ response = requests.get(url, headers=cls._headers())
477
+ if response.status_code != 200:
478
+ try:
479
+ resp_json = response.json()
480
+ raise ValueError(
481
+ f"Error fetching datapoints: [{response.status_code}] {json.dumps(resp_json)}"
482
+ )
483
+ except requests.exceptions.RequestException:
484
+ raise ValueError(
485
+ f"Error fetching datapoints: [{response.status_code}] {response.text}"
486
+ )
487
+ return GetDatapointsResponse.model_validate(response.json())
488
+
440
489
  @classmethod
441
490
  def _headers(cls):
442
491
  assert cls.__project_api_key is not None, "Project API key is not set"
@@ -37,3 +37,13 @@ class VerboseFormatter(CustomFormatter):
37
37
  def format(self, record):
38
38
  formatter = logging.Formatter(self.fmt)
39
39
  return formatter.format(record)
40
+
41
+
42
+ def get_default_logger(name: str, level: int = logging.INFO, propagate: bool = False):
43
+ logger = logging.getLogger(name)
44
+ logger.setLevel(level)
45
+ console_log_handler = logging.StreamHandler()
46
+ console_log_handler.setFormatter(VerboseColorfulFormatter())
47
+ logger.addHandler(console_log_handler)
48
+ logger.propagate = propagate
49
+ return logger
@@ -79,6 +79,7 @@ class PipelineRunError(Exception):
79
79
 
80
80
  EvaluationDatapointData = dict[str, Any]
81
81
  EvaluationDatapointTarget = dict[str, Any]
82
+ EvaluationDatapointMetadata = Optional[dict[str, Any]]
82
83
 
83
84
 
84
85
  # EvaluationDatapoint is a single data point in the evaluation
@@ -88,6 +89,7 @@ class Datapoint(pydantic.BaseModel):
88
89
  # input to the evaluator function (alongside the executor output).
89
90
  # Must be a dict with string keys
90
91
  target: EvaluationDatapointTarget
92
+ metadata: EvaluationDatapointMetadata = pydantic.Field(default=None)
91
93
 
92
94
 
93
95
  ExecutorFunctionReturnType = Any
@@ -153,3 +155,8 @@ class TraceType(Enum):
153
155
  DEFAULT = "DEFAULT"
154
156
  EVENT = "EVENT" # must not be set manually
155
157
  EVALUATION = "EVALUATION"
158
+
159
+
160
+ class GetDatapointsResponse(pydantic.BaseModel):
161
+ items: list[Datapoint]
162
+ totalCount: int
@@ -1,10 +1,10 @@
1
1
  import json
2
2
  from functools import wraps
3
+ import logging
3
4
  import os
4
5
  import pydantic
5
6
  import types
6
7
  from typing import Any, Optional
7
- import warnings
8
8
 
9
9
  from opentelemetry import trace
10
10
  from opentelemetry import context as context_api
@@ -28,12 +28,10 @@ class CustomJSONEncoder(JSONEncoder):
28
28
 
29
29
  def json_dumps(data: dict) -> str:
30
30
  try:
31
- with warnings.catch_warnings():
32
- warnings.simplefilter("ignore", RuntimeWarning)
33
- return json.dumps(data, cls=CustomJSONEncoder)
31
+ return json.dumps(data, cls=CustomJSONEncoder)
34
32
  except Exception:
35
33
  # Log the exception and return a placeholder if serialization completely fails
36
- # Telemetry().log_exception(e)
34
+ logging.warning("Failed to serialize data to JSON, type: %s", type(data))
37
35
  return "{}" # Return an empty JSON object as a fallback
38
36
 
39
37
 
@@ -141,7 +139,7 @@ def aentity_method(
141
139
 
142
140
  try:
143
141
  if _should_send_prompts():
144
- span.set_attribute(SPAN_OUTPUT, json.dumps(res))
142
+ span.set_attribute(SPAN_OUTPUT, json_dumps(res))
145
143
  except TypeError:
146
144
  pass
147
145
 
@@ -14,10 +14,22 @@ TRACE_TYPE = "trace_type"
14
14
 
15
15
  # exposed to the user, configurable
16
16
  class Attributes(Enum):
17
+ # == This is the minimum set of attributes for a proper LLM span ==
18
+ #
17
19
  # not SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
18
20
  INPUT_TOKEN_COUNT = "gen_ai.usage.input_tokens"
19
21
  # not SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
20
22
  OUTPUT_TOKEN_COUNT = "gen_ai.usage.output_tokens"
23
+ TOTAL_TOKEN_COUNT = SpanAttributes.LLM_USAGE_TOTAL_TOKENS
21
24
  PROVIDER = SpanAttributes.LLM_SYSTEM
22
25
  REQUEST_MODEL = SpanAttributes.LLM_REQUEST_MODEL
23
26
  RESPONSE_MODEL = SpanAttributes.LLM_RESPONSE_MODEL
27
+ #
28
+ ## == End of minimum set ==
29
+ # == Additional attributes ==
30
+ #
31
+ INPUT_COST = "gen_ai.usage.input_cost"
32
+ OUTPUT_COST = "gen_ai.usage.output_cost"
33
+ TOTAL_COST = "gen_ai.usage.cost"
34
+ #
35
+ # == End of additional attributes ==
File without changes
File without changes
File without changes
File without changes
File without changes