lmnr 0.4.12b3__tar.gz → 0.4.12b4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/PKG-INFO +17 -12
  2. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/README.md +17 -10
  3. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/pyproject.toml +2 -3
  4. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/sdk/decorators.py +3 -2
  5. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/sdk/evaluations.py +90 -58
  6. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/sdk/laminar.py +32 -10
  7. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/sdk/types.py +38 -5
  8. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/sdk/utils.py +4 -5
  9. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/__init__.py +3 -29
  10. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/config/__init__.py +0 -4
  11. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/decorators/base.py +16 -9
  12. lmnr-0.4.12b4/src/lmnr/traceloop_sdk/tracing/attributes.py +8 -0
  13. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tracing/tracing.py +31 -142
  14. lmnr-0.4.12b3/src/lmnr/traceloop_sdk/metrics/__init__.py +0 -0
  15. lmnr-0.4.12b3/src/lmnr/traceloop_sdk/metrics/metrics.py +0 -176
  16. lmnr-0.4.12b3/src/lmnr/traceloop_sdk/tracing/manual.py +0 -57
  17. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/LICENSE +0 -0
  18. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/__init__.py +0 -0
  19. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/cli.py +0 -0
  20. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/sdk/__init__.py +0 -0
  21. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/sdk/log.py +0 -0
  22. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/.flake8 +0 -0
  23. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/.python-version +0 -0
  24. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
  25. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/instruments.py +0 -0
  26. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
  27. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
  28. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
  29. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
  30. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
  31. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
  32. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
  33. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
  34. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
  35. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
  36. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
  37. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
  38. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
  39. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
  40. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
  41. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
  42. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
  43. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
  44. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
  45. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
  46. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
  47. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
  48. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
  49. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
  50. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
  51. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
  52. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
  53. {lmnr-0.4.12b3 → lmnr-0.4.12b4}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.12b3
3
+ Version: 0.4.12b4
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -14,7 +14,6 @@ Classifier: Programming Language :: Python :: 3.12
14
14
  Requires-Dist: argparse (>=1.0,<2.0)
15
15
  Requires-Dist: asyncio (>=3.0,<4.0)
16
16
  Requires-Dist: backoff (>=2.0,<3.0)
17
- Requires-Dist: colorama (>=0.4,<0.5)
18
17
  Requires-Dist: deprecated (>=1.0,<2.0)
19
18
  Requires-Dist: jinja2 (>=3.0,<4.0)
20
19
  Requires-Dist: opentelemetry-api (>=1.27.0,<2.0.0)
@@ -197,7 +196,7 @@ L.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"], instruments={In
197
196
 
198
197
  If you want to fully disable any kind of autoinstrumentation, pass an empty set as `instruments=set()` to `.initialize()`.
199
198
 
200
- Majority of the autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
199
+ Autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
201
200
 
202
201
  ## Sending events
203
202
 
@@ -267,13 +266,14 @@ Evaluation takes in the following parameters:
267
266
  - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
268
267
  - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
269
268
  - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
270
- - `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
269
+ - `evaluators` – evaluaton logic. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Pass a dict from evaluator name to a function. Each function can produce either a single number or `dict[str, int|float]` of scores.
271
270
 
272
271
  \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
273
272
 
274
- ### Example
273
+ ### Example code
275
274
 
276
275
  ```python
276
+ from lmnr import evaluate
277
277
  from openai import AsyncOpenAI
278
278
  import asyncio
279
279
  import os
@@ -304,20 +304,25 @@ data = [
304
304
  ]
305
305
 
306
306
 
307
- def evaluator_A(output, target):
307
+ def correctness(output, target):
308
308
  return 1 if output == target["capital"] else 0
309
309
 
310
310
 
311
311
  # Create an Evaluation instance
312
- e = Evaluation(
313
- name="py-evaluation-async",
312
+ e = evaluate(
313
+ name="my-evaluation",
314
314
  data=data,
315
315
  executor=get_capital,
316
- evaluators=[evaluator_A],
316
+ evaluators={"correctness": correctness},
317
317
  project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
318
318
  )
319
-
320
- # Run the evaluation
321
- asyncio.run(e.run())
322
319
  ```
323
320
 
321
+ ### Running from CLI.
322
+
323
+ 1. Make sure `lmnr` is installed in a venv. CLI does not work with a global env
324
+ 1. Run `lmnr path/to/my/eval.py`
325
+
326
+ ### Running from code
327
+
328
+ Simply execute the function, e.g. `python3 path/to/my/eval.py`
@@ -137,7 +137,7 @@ L.initialize(project_api_key=os.environ["LMNR_PROJECT_API_KEY"], instruments={In
137
137
 
138
138
  If you want to fully disable any kind of autoinstrumentation, pass an empty set as `instruments=set()` to `.initialize()`.
139
139
 
140
- Majority of the autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
140
+ Autoinstrumentations are provided by Traceloop's [OpenLLMetry](https://github.com/traceloop/openllmetry).
141
141
 
142
142
  ## Sending events
143
143
 
@@ -207,13 +207,14 @@ Evaluation takes in the following parameters:
207
207
  - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
208
208
  - `data` – an array of `EvaluationDatapoint` objects, where each `EvaluationDatapoint` has two keys: `target` and `data`, each containing a key-value object. Alternatively, you can pass in dictionaries, and we will instantiate `EvaluationDatapoint`s with pydantic if possible
209
209
  - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
210
- - `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `dict[str, int|float]` of scores.
210
+ - `evaluators` – evaluaton logic. Functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Pass a dict from evaluator name to a function. Each function can produce either a single number or `dict[str, int|float]` of scores.
211
211
 
212
212
  \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
213
213
 
214
- ### Example
214
+ ### Example code
215
215
 
216
216
  ```python
217
+ from lmnr import evaluate
217
218
  from openai import AsyncOpenAI
218
219
  import asyncio
219
220
  import os
@@ -244,19 +245,25 @@ data = [
244
245
  ]
245
246
 
246
247
 
247
- def evaluator_A(output, target):
248
+ def correctness(output, target):
248
249
  return 1 if output == target["capital"] else 0
249
250
 
250
251
 
251
252
  # Create an Evaluation instance
252
- e = Evaluation(
253
- name="py-evaluation-async",
253
+ e = evaluate(
254
+ name="my-evaluation",
254
255
  data=data,
255
256
  executor=get_capital,
256
- evaluators=[evaluator_A],
257
+ evaluators={"correctness": correctness},
257
258
  project_api_key=os.environ["LMNR_PROJECT_API_KEY"],
258
259
  )
259
-
260
- # Run the evaluation
261
- asyncio.run(e.run())
262
260
  ```
261
+
262
+ ### Running from CLI.
263
+
264
+ 1. Make sure `lmnr` is installed in a venv. CLI does not work with a global env
265
+ 1. Run `lmnr path/to/my/eval.py`
266
+
267
+ ### Running from code
268
+
269
+ Simply execute the function, e.g. `python3 path/to/my/eval.py`
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lmnr"
3
- version = "0.4.12b3"
3
+ version = "0.4.12b4"
4
4
  description = "Python SDK for Laminar AI"
5
5
  authors = [
6
6
  { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
11
11
 
12
12
  [tool.poetry]
13
13
  name = "lmnr"
14
- version = "0.4.12b3"
14
+ version = "0.4.12b4"
15
15
  description = "Python SDK for Laminar AI"
16
16
  authors = ["lmnr.ai"]
17
17
  readme = "README.md"
@@ -33,7 +33,6 @@ opentelemetry-instrumentation-sqlalchemy = "^0.48b0"
33
33
  opentelemetry-instrumentation-urllib3 = "^0.48b0"
34
34
  opentelemetry-instrumentation-threading = "^0.48b0"
35
35
  opentelemetry-semantic-conventions-ai = "0.4.1"
36
- colorama = "^0.4"
37
36
  tenacity = "~=8.0"
38
37
  jinja2 = "~=3.0"
39
38
  deprecated = "~=1.0"
@@ -6,6 +6,7 @@ from opentelemetry.trace import INVALID_SPAN, get_current_span
6
6
 
7
7
  from typing import Callable, Optional, cast
8
8
 
9
+ from lmnr.traceloop_sdk.tracing.attributes import SESSION_ID, USER_ID
9
10
  from lmnr.traceloop_sdk.tracing.tracing import update_association_properties
10
11
 
11
12
  from .utils import is_async
@@ -43,11 +44,11 @@ def observe(
43
44
  if current_span != INVALID_SPAN:
44
45
  if session_id is not None:
45
46
  current_span.set_attribute(
46
- "traceloop.association.properties.session_id", session_id
47
+ SESSION_ID, session_id
47
48
  )
48
49
  if user_id is not None:
49
50
  current_span.set_attribute(
50
- "traceloop.association.properties.user_id", user_id
51
+ USER_ID, user_id
51
52
  )
52
53
  association_properties = {}
53
54
  if session_id is not None:
@@ -2,12 +2,26 @@ import asyncio
2
2
  import sys
3
3
  from abc import ABC, abstractmethod
4
4
  from contextlib import contextmanager
5
- from typing import Any, Awaitable, Optional, Union
5
+ from typing import Any, Awaitable, Optional, Set, Union
6
+ import uuid
6
7
 
7
8
  from tqdm import tqdm
8
9
 
10
+ from ..traceloop_sdk.instruments import Instruments
11
+ from ..traceloop_sdk.tracing.attributes import SPAN_TYPE
12
+
9
13
  from .laminar import Laminar as L
10
- from .types import CreateEvaluationResponse, Datapoint, EvaluationResultDatapoint, Numeric, NumericTypes
14
+ from .types import (
15
+ CreateEvaluationResponse,
16
+ Datapoint,
17
+ EvaluationResultDatapoint,
18
+ EvaluatorFunction,
19
+ ExecutorFunction,
20
+ Numeric,
21
+ NumericTypes,
22
+ SpanType,
23
+ TraceType,
24
+ )
11
25
  from .utils import is_async
12
26
 
13
27
  DEFAULT_BATCH_SIZE = 5
@@ -39,7 +53,11 @@ class EvaluationReporter:
39
53
  def start(self, name: str, project_id: str, id: str, length: int):
40
54
  print(f"Running evaluation {name}...\n")
41
55
  print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
42
- self.cli_progress = tqdm(total=length, bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}", ncols=60)
56
+ self.cli_progress = tqdm(
57
+ total=length,
58
+ bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
59
+ ncols=60,
60
+ )
43
61
 
44
62
  def update(self, batch_length: int):
45
63
  self.cli_progress.update(batch_length)
@@ -51,7 +69,7 @@ class EvaluationReporter:
51
69
  def stop(self, average_scores: dict[str, Numeric]):
52
70
  self.cli_progress.close()
53
71
  print("\nAverage scores:")
54
- for (name, score) in average_scores.items():
72
+ for name, score in average_scores.items():
55
73
  print(f"{name}: {score}")
56
74
  print("\n")
57
75
 
@@ -78,12 +96,14 @@ class Evaluation:
78
96
  self,
79
97
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
80
98
  executor: Any,
81
- evaluators: list[Any],
99
+ evaluators: dict[str, EvaluatorFunction],
82
100
  name: Optional[str] = None,
83
101
  batch_size: int = DEFAULT_BATCH_SIZE,
84
102
  project_api_key: Optional[str] = None,
85
103
  base_url: Optional[str] = None,
86
104
  http_port: Optional[int] = None,
105
+ grpc_port: Optional[int] = None,
106
+ instruments: Optional[Set[Instruments]] = None,
87
107
  ):
88
108
  """
89
109
  Initializes an instance of the Evaluations class.
@@ -114,33 +134,18 @@ class Evaluation:
114
134
  Defaults to "https://api.lmnr.ai".
115
135
  http_port (Optional[int], optional): The port for the Laminar API HTTP service.
116
136
  Defaults to 443.
137
+ instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
138
+ Defaults to None. If None, all available instruments will be used.
117
139
  """
118
140
 
119
141
  self.is_finished = False
120
142
  self.name = name
121
143
  self.reporter = EvaluationReporter()
122
144
  self.executor = executor
123
- self.evaluators = dict(
124
- zip(
125
- [
126
- (
127
- e.__name__
128
- if e.__name__ and e.__name__ != "<lambda>"
129
- else f"evaluator_{i+1}"
130
- )
131
- for i, e in enumerate(evaluators)
132
- ],
133
- evaluators,
134
- )
135
- )
136
- self.evaluator_names = list(self.evaluators.keys())
145
+ self.evaluators = evaluators
137
146
  if isinstance(data, list):
138
147
  self.data = [
139
- (
140
- Datapoint.model_validate(point)
141
- if isinstance(point, dict)
142
- else point
143
- )
148
+ (Datapoint.model_validate(point) if isinstance(point, dict) else point)
144
149
  for point in data
145
150
  ]
146
151
  else:
@@ -150,7 +155,8 @@ class Evaluation:
150
155
  project_api_key=project_api_key,
151
156
  base_url=base_url,
152
157
  http_port=http_port,
153
- instruments=set(),
158
+ grpc_port=grpc_port,
159
+ instruments=instruments,
154
160
  )
155
161
 
156
162
  def run(self) -> Union[None, Awaitable[None]]:
@@ -205,7 +211,7 @@ class Evaluation:
205
211
  async def evaluate_in_batches(self, evaluation: CreateEvaluationResponse):
206
212
  for i in range(0, len(self.data), self.batch_size):
207
213
  batch = (
208
- self.data[i: i + self.batch_size]
214
+ self.data[i : i + self.batch_size]
209
215
  if isinstance(self.data, list)
210
216
  else self.data.slice(i, i + self.batch_size)
211
217
  )
@@ -217,52 +223,72 @@ class Evaluation:
217
223
  finally:
218
224
  self.reporter.update(len(batch))
219
225
 
220
- async def _evaluate_batch(self, batch: list[Datapoint]) -> list[EvaluationResultDatapoint]:
226
+ async def _evaluate_batch(
227
+ self, batch: list[Datapoint]
228
+ ) -> list[EvaluationResultDatapoint]:
221
229
  batch_promises = [self._evaluate_datapoint(datapoint) for datapoint in batch]
222
230
  results = await asyncio.gather(*batch_promises)
223
231
  return results
224
232
 
225
- async def _evaluate_datapoint(self, datapoint) -> EvaluationResultDatapoint:
226
- output = (
227
- await self.executor(datapoint.data)
228
- if is_async(self.executor)
229
- else self.executor(datapoint.data)
230
- )
231
- target = datapoint.target
232
-
233
- # Iterate over evaluators
234
- scores: dict[str, Numeric] = {}
235
- for evaluator_name in self.evaluator_names:
236
- evaluator = self.evaluators[evaluator_name]
237
- value = (
238
- await evaluator(output, target)
239
- if is_async(evaluator)
240
- else evaluator(output, target)
233
+ async def _evaluate_datapoint(
234
+ self, datapoint: Datapoint
235
+ ) -> EvaluationResultDatapoint:
236
+ with L.start_as_current_span("evaluation") as evaluation_span:
237
+ L._set_trace_type(trace_type=TraceType.EVALUATION)
238
+ evaluation_span.set_attribute(SPAN_TYPE, SpanType.EVALUATION.value)
239
+ with L.start_as_current_span(
240
+ "executor", input={"data": datapoint.data}
241
+ ) as executor_span:
242
+ executor_span.set_attribute(SPAN_TYPE, SpanType.EXECUTOR.value)
243
+ output = (
244
+ await self.executor(datapoint.data)
245
+ if is_async(self.executor)
246
+ else self.executor(datapoint.data)
247
+ )
248
+ L.set_span_output(output)
249
+ target = datapoint.target
250
+
251
+ # Iterate over evaluators
252
+ scores: dict[str, Numeric] = {}
253
+ for evaluator_name, evaluator in self.evaluators.items():
254
+ with L.start_as_current_span(
255
+ "evaluator", input={"output": output, "target": target}
256
+ ) as evaluator_span:
257
+ evaluator_span.set_attribute(SPAN_TYPE, SpanType.EVALUATOR.value)
258
+ value = (
259
+ await evaluator(output, target)
260
+ if is_async(evaluator)
261
+ else evaluator(output, target)
262
+ )
263
+ L.set_span_output(value)
264
+
265
+ # If evaluator returns a single number, use evaluator name as key
266
+ if isinstance(value, NumericTypes):
267
+ scores[evaluator_name] = value
268
+ else:
269
+ scores.update(value)
270
+
271
+ trace_id = uuid.UUID(int=evaluation_span.get_span_context().trace_id)
272
+ return EvaluationResultDatapoint(
273
+ data=datapoint.data,
274
+ target=target,
275
+ executor_output=output,
276
+ scores=scores,
277
+ trace_id=trace_id,
241
278
  )
242
279
 
243
- # If evaluator returns a single number, use evaluator name as key
244
- if isinstance(value, NumericTypes):
245
- scores[evaluator_name] = value
246
- else:
247
- scores.update(value)
248
-
249
- return EvaluationResultDatapoint(
250
- data=datapoint.data,
251
- target=target,
252
- executorOutput=output,
253
- scores=scores,
254
- )
255
-
256
280
 
257
281
  def evaluate(
258
282
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
259
- executor: Any,
260
- evaluators: list[Any],
283
+ executor: ExecutorFunction,
284
+ evaluators: dict[str, EvaluatorFunction],
261
285
  name: Optional[str] = None,
262
286
  batch_size: int = DEFAULT_BATCH_SIZE,
263
287
  project_api_key: Optional[str] = None,
264
288
  base_url: Optional[str] = None,
265
289
  http_port: Optional[int] = None,
290
+ grpc_port: Optional[int] = None,
291
+ instruments: Optional[Set[Instruments]] = None,
266
292
  ) -> Optional[Awaitable[None]]:
267
293
  """
268
294
  If added to the file which is called through lmnr eval command, then simply registers the evaluation.
@@ -295,6 +321,10 @@ def evaluate(
295
321
  Defaults to "https://api.lmnr.ai".
296
322
  http_port (Optional[int], optional): The port for the Laminar API HTTP service.
297
323
  Defaults to 443.
324
+ grpc_port (Optional[int], optional): The port for the Laminar API gRPC service.
325
+ Defaults to 8443.
326
+ instruments (Optional[Set[Instruments]], optional): Set of modules to auto-instrument.
327
+ Defaults to None. If None, all available instruments will be used.
298
328
  """
299
329
 
300
330
  evaluation = Evaluation(
@@ -306,6 +336,8 @@ def evaluate(
306
336
  project_api_key=project_api_key,
307
337
  base_url=base_url,
308
338
  http_port=http_port,
339
+ grpc_port=grpc_port,
340
+ instruments=instruments,
309
341
  )
310
342
 
311
343
  global _evaluation
@@ -5,7 +5,6 @@ from opentelemetry.trace import (
5
5
  get_current_span,
6
6
  SpanKind,
7
7
  )
8
- from opentelemetry.semconv_ai import SpanAttributes
9
8
  from opentelemetry.util.types import AttributeValue
10
9
  from opentelemetry.context.context import Context
11
10
  from opentelemetry.util import types
@@ -26,7 +25,17 @@ import os
26
25
  import requests
27
26
  import uuid
28
27
 
29
- from lmnr.traceloop_sdk.tracing.tracing import set_association_properties, update_association_properties
28
+ from lmnr.traceloop_sdk.tracing.attributes import (
29
+ SESSION_ID,
30
+ SPAN_INPUT,
31
+ SPAN_OUTPUT,
32
+ TRACE_TYPE,
33
+ USER_ID,
34
+ )
35
+ from lmnr.traceloop_sdk.tracing.tracing import (
36
+ set_association_properties,
37
+ update_association_properties,
38
+ )
30
39
 
31
40
  from .log import VerboseColorfulFormatter
32
41
 
@@ -37,6 +46,7 @@ from .types import (
37
46
  PipelineRunResponse,
38
47
  NodeInput,
39
48
  PipelineRunRequest,
49
+ TraceType,
40
50
  UpdateEvaluationResponse,
41
51
  )
42
52
 
@@ -356,8 +366,8 @@ class Laminar:
356
366
  ) as span:
357
367
  if input is not None:
358
368
  span.set_attribute(
359
- SpanAttributes.TRACELOOP_ENTITY_INPUT,
360
- json.dumps({"input": input}),
369
+ SPAN_INPUT,
370
+ json.dumps(input),
361
371
  )
362
372
  yield span
363
373
 
@@ -371,9 +381,7 @@ class Laminar:
371
381
  """
372
382
  span = get_current_span()
373
383
  if output is not None and span != INVALID_SPAN:
374
- span.set_attribute(
375
- SpanAttributes.TRACELOOP_ENTITY_OUTPUT, json.dumps(output)
376
- )
384
+ span.set_attribute(SPAN_OUTPUT, json.dumps(output))
377
385
 
378
386
  @classmethod
379
387
  def set_session(
@@ -396,9 +404,23 @@ class Laminar:
396
404
  """
397
405
  association_properties = {}
398
406
  if session_id is not None:
399
- association_properties["session_id"] = session_id
407
+ association_properties[SESSION_ID] = session_id
400
408
  if user_id is not None:
401
- association_properties["user_id"] = user_id
409
+ association_properties[USER_ID] = user_id
410
+ update_association_properties(association_properties)
411
+
412
+ @classmethod
413
+ def _set_trace_type(
414
+ cls,
415
+ trace_type: TraceType,
416
+ ):
417
+ """Set the trace_type for the current span and the context
418
+ Args:
419
+ trace_type (TraceType): Type of the trace
420
+ """
421
+ association_properties = {
422
+ TRACE_TYPE: trace_type.value,
423
+ }
402
424
  update_association_properties(association_properties)
403
425
 
404
426
  @classmethod
@@ -430,7 +452,7 @@ class Laminar:
430
452
  ) -> requests.Response:
431
453
  body = {
432
454
  "evaluationId": str(evaluation_id),
433
- "points": [datapoint.model_dump() for datapoint in data],
455
+ "points": [datapoint.to_dict() for datapoint in data],
434
456
  }
435
457
  response = requests.post(
436
458
  cls.__base_http_url + "/v1/evaluation-datapoints",
@@ -1,10 +1,11 @@
1
1
  import datetime
2
- import requests
2
+ from enum import Enum
3
3
  import pydantic
4
- import uuid
4
+ import requests
5
5
  from typing import Any, Awaitable, Callable, Literal, Optional, Union
6
+ import uuid
6
7
 
7
- from .utils import to_dict
8
+ from .utils import serialize
8
9
 
9
10
 
10
11
  class ChatMessage(pydantic.BaseModel):
@@ -37,7 +38,7 @@ class PipelineRunRequest(pydantic.BaseModel):
37
38
  def to_dict(self):
38
39
  return {
39
40
  "inputs": {
40
- k: v.model_dump() if isinstance(v, pydantic.BaseModel) else to_dict(v)
41
+ k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
41
42
  for k, v in self.inputs.items()
42
43
  },
43
44
  "pipeline": self.pipeline,
@@ -125,5 +126,37 @@ UpdateEvaluationResponse = CreateEvaluationResponse
125
126
  class EvaluationResultDatapoint(pydantic.BaseModel):
126
127
  data: EvaluationDatapointData
127
128
  target: EvaluationDatapointTarget
128
- executorOutput: ExecutorFunctionReturnType
129
+ executor_output: ExecutorFunctionReturnType
129
130
  scores: dict[str, Numeric]
131
+ trace_id: uuid.UUID
132
+
133
+ # uuid is not serializable by default, so we need to convert it to a string
134
+ def to_dict(self):
135
+ return {
136
+ "data": {
137
+ k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
138
+ for k, v in self.data.items()
139
+ },
140
+ "target": {
141
+ k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
142
+ for k, v in self.target.items()
143
+ },
144
+ "executorOutput": serialize(self.executor_output),
145
+ "scores": self.scores,
146
+ "traceId": str(self.trace_id),
147
+ }
148
+
149
+
150
+ class SpanType(Enum):
151
+ DEFAULT = "DEFAULT"
152
+ LLM = "LLM"
153
+ PIPELINE = "PIPELINE" # must not be set manually
154
+ EXECUTOR = "EXECUTOR"
155
+ EVALUATOR = "EVALUATOR"
156
+ EVALUATION = "EVALUATION"
157
+
158
+
159
+ class TraceType(Enum):
160
+ DEFAULT = "DEFAULT"
161
+ EVENT = "EVENT" # must not be set manually
162
+ EVALUATION = "EVALUATION"
@@ -1,5 +1,4 @@
1
1
  import asyncio
2
- import copy
3
2
  import datetime
4
3
  import dataclasses
5
4
  import enum
@@ -50,7 +49,7 @@ def is_iterator(o: typing.Any) -> bool:
50
49
  return hasattr(o, "__iter__") and hasattr(o, "__next__")
51
50
 
52
51
 
53
- def to_dict(obj: typing.Any) -> dict[str, typing.Any]:
52
+ def serialize(obj: typing.Any) -> dict[str, typing.Any]:
54
53
  def to_dict_inner(o: typing.Any):
55
54
  if isinstance(o, (datetime.datetime, datetime.date)):
56
55
  return o.strftime("%Y-%m-%dT%H:%M:%S.%f%z")
@@ -59,7 +58,7 @@ def to_dict(obj: typing.Any) -> dict[str, typing.Any]:
59
58
  elif isinstance(o, (int, float, str, bool)):
60
59
  return o
61
60
  elif isinstance(o, uuid.UUID):
62
- return str(o) # same as in return, but explicit
61
+ return str(o) # same as in final return, but explicit
63
62
  elif isinstance(o, enum.Enum):
64
63
  return o.value
65
64
  elif dataclasses.is_dataclass(o):
@@ -90,11 +89,11 @@ def get_input_from_func_args(
90
89
  ) -> dict[str, typing.Any]:
91
90
  # Remove implicitly passed "self" or "cls" argument for
92
91
  # instance or class methods
93
- res = copy.deepcopy(func_kwargs)
92
+ res = func_kwargs.copy()
94
93
  for i, k in enumerate(inspect.signature(func).parameters.keys()):
95
94
  if is_method and k in ["self", "cls"]:
96
95
  continue
97
96
  # If param has default value, then it's not present in func args
98
- if len(func_args) > i:
97
+ if i < len(func_args):
99
98
  res[k] = func_args[i]
100
99
  return res