lmnr 0.4.27__tar.gz → 0.4.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {lmnr-0.4.27 → lmnr-0.4.29}/PKG-INFO +2 -1
  2. {lmnr-0.4.27 → lmnr-0.4.29}/pyproject.toml +17 -16
  3. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/__init__.py +8 -1
  4. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/sdk/datasets.py +1 -1
  5. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/sdk/evaluations.py +33 -12
  6. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/sdk/laminar.py +66 -1
  7. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/sdk/types.py +34 -20
  8. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/sdk/utils.py +5 -5
  9. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/instruments.py +15 -11
  10. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/attributes.py +1 -0
  11. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/tracing.py +185 -265
  12. {lmnr-0.4.27 → lmnr-0.4.29}/LICENSE +0 -0
  13. {lmnr-0.4.27 → lmnr-0.4.29}/README.md +0 -0
  14. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/cli.py +0 -0
  15. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/sdk/__init__.py +0 -0
  16. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/sdk/decorators.py +0 -0
  17. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/sdk/log.py +0 -0
  18. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/.flake8 +0 -0
  19. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/.python-version +0 -0
  20. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/__init__.py +0 -0
  21. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
  22. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
  23. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/decorators/base.py +0 -0
  24. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
  25. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
  26. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
  27. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
  28. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
  29. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
  30. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
  31. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
  32. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
  33. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
  34. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
  35. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
  36. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
  37. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
  38. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
  39. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
  40. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
  41. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
  42. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
  43. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
  44. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
  45. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
  46. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
  47. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
  48. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
  49. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
  50. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
  51. {lmnr-0.4.27 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.27
3
+ Version: 0.4.29
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -39,6 +39,7 @@ Requires-Dist: opentelemetry-instrumentation-pinecone (>=0.33.5)
39
39
  Requires-Dist: opentelemetry-instrumentation-qdrant (>=0.33.5)
40
40
  Requires-Dist: opentelemetry-instrumentation-replicate (>=0.33.5)
41
41
  Requires-Dist: opentelemetry-instrumentation-requests (>=0.48b0,<0.49)
42
+ Requires-Dist: opentelemetry-instrumentation-sagemaker (>=0.33.5)
42
43
  Requires-Dist: opentelemetry-instrumentation-sqlalchemy (>=0.48b0,<0.49)
43
44
  Requires-Dist: opentelemetry-instrumentation-threading (>=0.48b0,<0.49)
44
45
  Requires-Dist: opentelemetry-instrumentation-together (>=0.33.5)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lmnr"
3
- version = "0.4.27"
3
+ version = "0.4.29"
4
4
  description = "Python SDK for Laminar AI"
5
5
  authors = [
6
6
  { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
11
11
 
12
12
  [tool.poetry]
13
13
  name = "lmnr"
14
- version = "0.4.27"
14
+ version = "0.4.29"
15
15
  description = "Python SDK for Laminar AI"
16
16
  authors = ["lmnr.ai"]
17
17
  readme = "README.md"
@@ -35,30 +35,31 @@ opentelemetry-semantic-conventions-ai = "0.4.2"
35
35
  tenacity = ">=8.0"
36
36
  jinja2 = "~=3.0"
37
37
  deprecated = "~=1.0"
38
- opentelemetry-instrumentation-mistralai = ">=0.33.5"
39
- opentelemetry-instrumentation-openai = ">=0.33.5"
40
- opentelemetry-instrumentation-ollama = ">=0.33.5"
38
+ opentelemetry-instrumentation-alephalpha = ">=0.33.5"
41
39
  opentelemetry-instrumentation-anthropic = ">=0.33.5"
40
+ opentelemetry-instrumentation-bedrock = ">=0.33.5"
41
+ opentelemetry-instrumentation-chromadb = ">=0.33.5"
42
42
  opentelemetry-instrumentation-cohere = ">=0.33.5"
43
43
  opentelemetry-instrumentation-google-generativeai = ">=0.33.5"
44
- opentelemetry-instrumentation-pinecone = ">=0.33.5"
45
- opentelemetry-instrumentation-qdrant = ">=0.33.5"
46
- opentelemetry-instrumentation-langchain = ">=0.33.5"
44
+ opentelemetry-instrumentation-groq = ">=0.33.5"
45
+ opentelemetry-instrumentation-haystack = ">=0.33.5"
47
46
  opentelemetry-instrumentation-lancedb = ">=0.33.5"
48
- opentelemetry-instrumentation-chromadb = ">=0.33.5"
49
- opentelemetry-instrumentation-transformers = ">=0.33.5"
50
- opentelemetry-instrumentation-together = ">=0.33.5"
47
+ opentelemetry-instrumentation-langchain = ">=0.33.5"
51
48
  opentelemetry-instrumentation-llamaindex = ">=0.33.5"
49
+ opentelemetry-instrumentation-marqo = ">=0.33.5"
52
50
  opentelemetry-instrumentation-milvus = ">=0.33.5"
53
- opentelemetry-instrumentation-haystack = ">=0.33.5"
54
- opentelemetry-instrumentation-bedrock = ">=0.33.5"
51
+ opentelemetry-instrumentation-mistralai = ">=0.33.5"
52
+ opentelemetry-instrumentation-ollama = ">=0.33.5"
53
+ opentelemetry-instrumentation-openai = ">=0.33.5"
54
+ opentelemetry-instrumentation-pinecone = ">=0.33.5"
55
+ opentelemetry-instrumentation-qdrant = ">=0.33.5"
55
56
  opentelemetry-instrumentation-replicate = ">=0.33.5"
57
+ opentelemetry-instrumentation-sagemaker = ">=0.33.5"
58
+ opentelemetry-instrumentation-together = ">=0.33.5"
59
+ opentelemetry-instrumentation-transformers = ">=0.33.5"
56
60
  opentelemetry-instrumentation-vertexai = ">=0.33.5"
57
61
  opentelemetry-instrumentation-watsonx = ">=0.33.5"
58
62
  opentelemetry-instrumentation-weaviate = ">=0.33.5"
59
- opentelemetry-instrumentation-alephalpha = ">=0.33.5"
60
- opentelemetry-instrumentation-marqo = ">=0.33.5"
61
- opentelemetry-instrumentation-groq = ">=0.33.5"
62
63
  tqdm = "~=4.0"
63
64
  argparse = "~=1.0"
64
65
 
@@ -1,7 +1,14 @@
1
1
  from .sdk.datasets import EvaluationDataset, LaminarDataset
2
2
  from .sdk.evaluations import evaluate
3
3
  from .sdk.laminar import Laminar
4
- from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
4
+ from .sdk.types import (
5
+ ChatMessage,
6
+ HumanEvaluator,
7
+ NodeInput,
8
+ PipelineRunError,
9
+ PipelineRunResponse,
10
+ )
5
11
  from .sdk.decorators import observe
6
12
  from .traceloop_sdk import Instruments
7
13
  from .traceloop_sdk.tracing.attributes import Attributes
14
+ from opentelemetry.trace import use_span
@@ -34,7 +34,7 @@ class LaminarDataset(EvaluationDataset):
34
34
  self._fetched_items = []
35
35
  self._offset = 0
36
36
  self._fetch_size = fetch_size
37
- self._logger = get_default_logger(self.__class__.__name__, level=logging.DEBUG)
37
+ self._logger = get_default_logger(self.__class__.__name__)
38
38
 
39
39
  def _fetch_batch(self):
40
40
  self._logger.debug(
@@ -18,6 +18,7 @@ from .types import (
18
18
  EvaluationResultDatapoint,
19
19
  EvaluatorFunction,
20
20
  ExecutorFunction,
21
+ HumanEvaluator,
21
22
  Numeric,
22
23
  NumericTypes,
23
24
  SpanType,
@@ -99,6 +100,7 @@ class Evaluation:
99
100
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
100
101
  executor: Any,
101
102
  evaluators: dict[str, EvaluatorFunction],
103
+ human_evaluators: list[HumanEvaluator] = [],
102
104
  name: Optional[str] = None,
103
105
  group_id: Optional[str] = None,
104
106
  batch_size: int = DEFAULT_BATCH_SIZE,
@@ -126,6 +128,10 @@ class Evaluation:
126
128
  If the score is a single number, it will be named after the\
127
129
  evaluator function. Evaluator function names must contain only\
128
130
  letters, digits, hyphens, underscores, or spaces.
131
+ human_evaluators (list[HumanEvaluator], optional):\
132
+ [Beta] List of instances of HumanEvaluator. For now, human\
133
+ evaluator only holds the queue name.
134
+ Defaults to an empty list.
129
135
  name (Optional[str], optional): Optional name of the evaluation.\
130
136
  Used to identify the evaluation in the group.\
131
137
  If not provided, a random name will be generated.
@@ -159,9 +165,9 @@ class Evaluation:
159
165
  if not evaluators:
160
166
  raise ValueError("No evaluators provided")
161
167
 
162
- # TODO: Compile regex once and then reuse it
168
+ evaluator_name_regex = re.compile(r"^[\w\s-]+$")
163
169
  for evaluator_name in evaluators:
164
- if not re.match(r"^[\w\s-]+$", evaluator_name):
170
+ if not evaluator_name_regex.match(evaluator_name):
165
171
  raise ValueError(
166
172
  f'Invalid evaluator key: "{evaluator_name}". '
167
173
  "Keys must only contain letters, digits, hyphens,"
@@ -183,6 +189,7 @@ class Evaluation:
183
189
  self.name = name
184
190
  self.batch_size = batch_size
185
191
  self._logger = get_default_logger(self.__class__.__name__)
192
+ self.human_evaluators = human_evaluators
186
193
  L.initialize(
187
194
  project_api_key=project_api_key,
188
195
  base_url=base_url,
@@ -202,9 +209,7 @@ class Evaluation:
202
209
  return loop.run_until_complete(self._run())
203
210
 
204
211
  async def _run(self) -> None:
205
- self.reporter.start(
206
- len(self.data),
207
- )
212
+ self.reporter.start(len(self.data))
208
213
 
209
214
  try:
210
215
  result_datapoints = await self._evaluate_in_batches()
@@ -212,13 +217,19 @@ class Evaluation:
212
217
  self.reporter.stopWithError(e)
213
218
  self.is_finished = True
214
219
  return
215
- else:
216
- evaluation = L.create_evaluation(
217
- data=result_datapoints, group_id=self.group_id, name=self.name
218
- )
219
- average_scores = get_average_scores(result_datapoints)
220
- self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
221
- self.is_finished = True
220
+
221
+ # For now add all human evaluators to all result datapoints
222
+ # In the future, we will add ways to specify which human evaluators
223
+ # to add to which result datapoints, e.g. sample some randomly
224
+ for result_datapoint in result_datapoints:
225
+ result_datapoint.human_evaluators = self.human_evaluators or {}
226
+
227
+ evaluation = L.create_evaluation(
228
+ data=result_datapoints, group_id=self.group_id, name=self.name
229
+ )
230
+ average_scores = get_average_scores(result_datapoints)
231
+ self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
232
+ self.is_finished = True
222
233
 
223
234
  async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
224
235
  result_datapoints = []
@@ -256,6 +267,9 @@ class Evaluation:
256
267
  else self.executor(datapoint.data)
257
268
  )
258
269
  L.set_span_output(output)
270
+ executor_span_id = uuid.UUID(
271
+ int=executor_span.get_span_context().span_id
272
+ )
259
273
  target = datapoint.target
260
274
 
261
275
  # Iterate over evaluators
@@ -285,6 +299,7 @@ class Evaluation:
285
299
  executor_output=output,
286
300
  scores=scores,
287
301
  trace_id=trace_id,
302
+ executor_span_id=executor_span_id,
288
303
  )
289
304
 
290
305
 
@@ -292,6 +307,7 @@ def evaluate(
292
307
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
293
308
  executor: ExecutorFunction,
294
309
  evaluators: dict[str, EvaluatorFunction],
310
+ human_evaluators: list[HumanEvaluator] = [],
295
311
  name: Optional[str] = None,
296
312
  group_id: Optional[str] = None,
297
313
  batch_size: int = DEFAULT_BATCH_SIZE,
@@ -326,6 +342,10 @@ def evaluate(
326
342
  If the score is a single number, it will be named after the\
327
343
  evaluator function. Evaluator function names must contain only\
328
344
  letters, digits, hyphens, underscores, or spaces.
345
+ human_evaluators (list[HumanEvaluator], optional):\
346
+ [Beta] List of instances of HumanEvaluator. For now, human\
347
+ evaluator only holds the queue name.
348
+ Defaults to an empty list.
329
349
  name (Optional[str], optional): Optional name of the evaluation.\
330
350
  Used to identify the evaluation in the group.\
331
351
  If not provided, a random name will be generated.
@@ -359,6 +379,7 @@ def evaluate(
359
379
  executor=executor,
360
380
  evaluators=evaluators,
361
381
  group_id=group_id,
382
+ human_evaluators=human_evaluators,
362
383
  name=name,
363
384
  batch_size=batch_size,
364
385
  project_api_key=project_api_key,
@@ -1,3 +1,4 @@
1
+ from contextvars import Context
1
2
  import re
2
3
  from lmnr.traceloop_sdk.instruments import Instruments
3
4
  from opentelemetry import context
@@ -294,6 +295,7 @@ class Laminar:
294
295
  name: str,
295
296
  input: Any = None,
296
297
  span_type: Union[Literal["DEFAULT"], Literal["LLM"]] = "DEFAULT",
298
+ context: Optional[Context] = None,
297
299
  ):
298
300
  """Start a new span as the current span. Useful for manual
299
301
  instrumentation. If `span_type` is set to `"LLM"`, you should report
@@ -314,10 +316,12 @@ class Laminar:
314
316
  span_type (Union[Literal["DEFAULT"], Literal["LLM"]], optional):\
315
317
  type of the span. If you use `"LLM"`, you should report usage\
316
318
  and response attributes manually. Defaults to "DEFAULT".
319
+ context (Optional[Context], optional): raw OpenTelemetry context\
320
+ to attach the span to. Defaults to None.
317
321
  """
318
322
  with get_tracer() as tracer:
319
323
  span_path = get_span_path(name)
320
- ctx = set_value("span_path", span_path)
324
+ ctx = set_value("span_path", span_path, context)
321
325
  ctx_token = attach(ctx)
322
326
  with tracer.start_as_current_span(
323
327
  name,
@@ -338,6 +342,67 @@ class Laminar:
338
342
  except Exception:
339
343
  pass
340
344
 
345
+ @classmethod
346
+ def start_span(
347
+ cls,
348
+ name: str,
349
+ input: Any = None,
350
+ span_type: Union[Literal["DEFAULT"], Literal["LLM"]] = "DEFAULT",
351
+ context: Optional[Context] = None,
352
+ ):
353
+ """Start a new span. Useful for manual instrumentation.
354
+ If `span_type` is set to `"LLM"`, you should report usage and response
355
+ attributes manually. See `Laminar.set_span_attributes` for more
356
+ information.
357
+
358
+ Usage example:
359
+ ```python
360
+ from src.lmnr import Laminar, use_span
361
+ def foo(span):
362
+ with use_span(span):
363
+ with Laminar.start_as_current_span("foo_inner"):
364
+ some_function()
365
+
366
+ def bar():
367
+ with use_span(span):
368
+ openai_client.chat.completions.create()
369
+
370
+ span = Laminar.start_span("outer")
371
+ foo(span)
372
+ bar(span)
373
+ # IMPORTANT: End the span manually
374
+ span.end()
375
+
376
+ # Results in:
377
+ # | outer
378
+ # | | foo
379
+ # | | | foo_inner
380
+ # | | bar
381
+ # | | | openai.chat
382
+ ```
383
+
384
+ Args:
385
+ name (str): name of the span
386
+ input (Any, optional): input to the span. Will be sent as an\
387
+ attribute, so must be json serializable. Defaults to None.
388
+ span_type (Union[Literal["DEFAULT"], Literal["LLM"]], optional):\
389
+ type of the span. If you use `"LLM"`, you should report usage\
390
+ and response attributes manually. Defaults to "DEFAULT".
391
+ context (Optional[Context], optional): raw OpenTelemetry context\
392
+ to attach the span to. Defaults to None.
393
+ """
394
+ with get_tracer() as tracer:
395
+ span_path = get_span_path(name)
396
+ ctx = set_value("span_path", span_path, context)
397
+ span = tracer.start_span(name, context=ctx)
398
+ if input is not None:
399
+ span.set_attribute(
400
+ SPAN_INPUT,
401
+ json_dumps(input),
402
+ )
403
+ span.set_attribute(SPAN_TYPE, span_type)
404
+ return span
405
+
341
406
  @classmethod
342
407
  def set_span_output(cls, output: Any = None):
343
408
  """Set the output of the current span. Useful for manual
@@ -77,18 +77,17 @@ class PipelineRunError(Exception):
77
77
  return super().__str__()
78
78
 
79
79
 
80
- EvaluationDatapointData = dict[str, Any]
81
- EvaluationDatapointTarget = dict[str, Any]
82
- EvaluationDatapointMetadata = Optional[dict[str, Any]]
80
+ EvaluationDatapointData = Any # non-null, must be JSON-serializable
81
+ EvaluationDatapointTarget = Optional[Any] # must be JSON-serializable
82
+ EvaluationDatapointMetadata = Optional[Any] # must be JSON-serializable
83
83
 
84
84
 
85
85
  # EvaluationDatapoint is a single data point in the evaluation
86
86
  class Datapoint(pydantic.BaseModel):
87
- # input to the executor function. Must be a dict with string keys
87
+ # input to the executor function.
88
88
  data: EvaluationDatapointData
89
89
  # input to the evaluator function (alongside the executor output).
90
- # Must be a dict with string keys
91
- target: EvaluationDatapointTarget
90
+ target: EvaluationDatapointTarget = pydantic.Field(default=None)
92
91
  metadata: EvaluationDatapointMetadata = pydantic.Field(default=None)
93
92
 
94
93
 
@@ -110,6 +109,13 @@ EvaluatorFunction = Callable[
110
109
  ]
111
110
 
112
111
 
112
+ class HumanEvaluator(pydantic.BaseModel):
113
+ queueName: str
114
+
115
+ def __init__(self, queue_name: str):
116
+ super().__init__(queueName=queue_name)
117
+
118
+
113
119
  class CreateEvaluationResponse(pydantic.BaseModel):
114
120
  id: uuid.UUID
115
121
  createdAt: datetime.datetime
@@ -123,23 +129,31 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
123
129
  target: EvaluationDatapointTarget
124
130
  executor_output: ExecutorFunctionReturnType
125
131
  scores: dict[str, Numeric]
132
+ human_evaluators: list[HumanEvaluator] = pydantic.Field(default_factory=list)
126
133
  trace_id: uuid.UUID
134
+ executor_span_id: uuid.UUID
127
135
 
128
136
  # uuid is not serializable by default, so we need to convert it to a string
129
137
  def to_dict(self):
130
- return {
131
- "data": {
132
- k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
133
- for k, v in self.data.items()
134
- },
135
- "target": {
136
- k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
137
- for k, v in self.target.items()
138
- },
139
- "executorOutput": serialize(self.executor_output),
140
- "scores": self.scores,
141
- "traceId": str(self.trace_id),
142
- }
138
+ try:
139
+ return {
140
+ "data": serialize(self.data),
141
+ "target": serialize(self.target),
142
+ "executorOutput": serialize(self.executor_output),
143
+ "scores": self.scores,
144
+ "traceId": str(self.trace_id),
145
+ "humanEvaluators": [
146
+ (
147
+ v.model_dump()
148
+ if isinstance(v, pydantic.BaseModel)
149
+ else serialize(v)
150
+ )
151
+ for v in self.human_evaluators
152
+ ],
153
+ "executorSpanId": str(self.executor_span_id),
154
+ }
155
+ except Exception as e:
156
+ raise ValueError(f"Error serializing EvaluationResultDatapoint: {e}")
143
157
 
144
158
 
145
159
  class SpanType(Enum):
@@ -153,7 +167,7 @@ class SpanType(Enum):
153
167
 
154
168
  class TraceType(Enum):
155
169
  DEFAULT = "DEFAULT"
156
- EVENT = "EVENT" # must not be set manually
170
+ EVENT = "EVENT" # deprecated
157
171
  EVALUATION = "EVALUATION"
158
172
 
159
173
 
@@ -50,7 +50,7 @@ def is_iterator(o: typing.Any) -> bool:
50
50
 
51
51
 
52
52
  def serialize(obj: typing.Any) -> dict[str, typing.Any]:
53
- def to_dict_inner(o: typing.Any):
53
+ def serialize_inner(o: typing.Any):
54
54
  if isinstance(o, (datetime.datetime, datetime.date)):
55
55
  return o.strftime("%Y-%m-%dT%H:%M:%S.%f%z")
56
56
  elif o is None:
@@ -68,17 +68,17 @@ def serialize(obj: typing.Any) -> dict[str, typing.Any]:
68
68
  elif isinstance(o, pydantic.BaseModel):
69
69
  return o.model_dump()
70
70
  elif isinstance(o, (tuple, set, frozenset)):
71
- return [to_dict_inner(item) for item in o]
71
+ return [serialize_inner(item) for item in o]
72
72
  elif isinstance(o, list):
73
- return [to_dict_inner(item) for item in o]
73
+ return [serialize_inner(item) for item in o]
74
74
  elif isinstance(o, dict):
75
- return {to_dict_inner(k): to_dict_inner(v) for k, v in o.items()}
75
+ return {serialize_inner(k): serialize_inner(v) for k, v in o.items()}
76
76
  elif isinstance(o, queue.Queue):
77
77
  return type(o).__name__
78
78
 
79
79
  return str(o)
80
80
 
81
- return to_dict_inner(obj)
81
+ return serialize_inner(obj)
82
82
 
83
83
 
84
84
  def get_input_from_func_args(
@@ -4,27 +4,31 @@ from enum import Enum
4
4
  class Instruments(Enum):
5
5
  # The list of libraries which will be autoinstrumented
6
6
  # if no specific instruments are provided to initialize()
7
- OPENAI = "openai"
7
+ ALEPHALPHA = "alephalpha"
8
8
  ANTHROPIC = "anthropic"
9
- COHERE = "cohere"
10
- PINECONE = "pinecone"
9
+ BEDROCK = "bedrock"
11
10
  CHROMA = "chroma"
11
+ COHERE = "cohere"
12
12
  GOOGLE_GENERATIVEAI = "google_generativeai"
13
+ GROQ = "groq"
14
+ HAYSTACK = "haystack"
15
+ LANCEDB = "lancedb"
13
16
  LANGCHAIN = "langchain"
14
- MISTRAL = "mistral"
15
- OLLAMA = "ollama"
16
17
  LLAMA_INDEX = "llama_index"
18
+ MARQO = "marqo"
17
19
  MILVUS = "milvus"
18
- TRANSFORMERS = "transformers"
19
- TOGETHER = "together"
20
- BEDROCK = "bedrock"
20
+ MISTRAL = "mistral"
21
+ OLLAMA = "ollama"
22
+ OPENAI = "openai"
23
+ PINECONE = "pinecone"
24
+ QDRANT = "qdrant"
21
25
  REPLICATE = "replicate"
26
+ SAGEMAKER = "sagemaker"
27
+ TOGETHER = "together"
28
+ TRANSFORMERS = "transformers"
22
29
  VERTEXAI = "vertexai"
23
30
  WATSONX = "watsonx"
24
31
  WEAVIATE = "weaviate"
25
- ALEPHALPHA = "alephalpha"
26
- MARQO = "marqo"
27
- LANCEDB = "lancedb"
28
32
 
29
33
  # The following libraries will not be autoinstrumented unless
30
34
  # specified explicitly in the initialize() call.
@@ -5,6 +5,7 @@ SPAN_INPUT = "lmnr.span.input"
5
5
  SPAN_OUTPUT = "lmnr.span.output"
6
6
  SPAN_TYPE = "lmnr.span.type"
7
7
  SPAN_PATH = "lmnr.span.path"
8
+ SPAN_INSTRUMENTATION_SOURCE = "lmnr.span.instrumentation_source"
8
9
 
9
10
  ASSOCIATION_PROPERTIES = "lmnr.association.properties"
10
11
  SESSION_ID = "session_id"