lmnr 0.4.28__tar.gz → 0.4.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lmnr-0.4.28 → lmnr-0.4.29}/PKG-INFO +2 -1
- {lmnr-0.4.28 → lmnr-0.4.29}/pyproject.toml +17 -16
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/__init__.py +8 -1
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/sdk/evaluations.py +33 -12
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/sdk/laminar.py +66 -1
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/sdk/types.py +34 -20
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/sdk/utils.py +5 -5
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/instruments.py +15 -11
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/tracing.py +175 -260
- {lmnr-0.4.28 → lmnr-0.4.29}/LICENSE +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/README.md +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/cli.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/sdk/__init__.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/sdk/datasets.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/sdk/decorators.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/sdk/log.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/.flake8 +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/.python-version +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/__init__.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/decorators/base.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/attributes.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
- {lmnr-0.4.28 → lmnr-0.4.29}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lmnr
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.29
|
4
4
|
Summary: Python SDK for Laminar AI
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: lmnr.ai
|
@@ -39,6 +39,7 @@ Requires-Dist: opentelemetry-instrumentation-pinecone (>=0.33.5)
|
|
39
39
|
Requires-Dist: opentelemetry-instrumentation-qdrant (>=0.33.5)
|
40
40
|
Requires-Dist: opentelemetry-instrumentation-replicate (>=0.33.5)
|
41
41
|
Requires-Dist: opentelemetry-instrumentation-requests (>=0.48b0,<0.49)
|
42
|
+
Requires-Dist: opentelemetry-instrumentation-sagemaker (>=0.33.5)
|
42
43
|
Requires-Dist: opentelemetry-instrumentation-sqlalchemy (>=0.48b0,<0.49)
|
43
44
|
Requires-Dist: opentelemetry-instrumentation-threading (>=0.48b0,<0.49)
|
44
45
|
Requires-Dist: opentelemetry-instrumentation-together (>=0.33.5)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lmnr"
|
3
|
-
version = "0.4.
|
3
|
+
version = "0.4.29"
|
4
4
|
description = "Python SDK for Laminar AI"
|
5
5
|
authors = [
|
6
6
|
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
|
@@ -11,7 +11,7 @@ license = "Apache-2.0"
|
|
11
11
|
|
12
12
|
[tool.poetry]
|
13
13
|
name = "lmnr"
|
14
|
-
version = "0.4.
|
14
|
+
version = "0.4.29"
|
15
15
|
description = "Python SDK for Laminar AI"
|
16
16
|
authors = ["lmnr.ai"]
|
17
17
|
readme = "README.md"
|
@@ -35,30 +35,31 @@ opentelemetry-semantic-conventions-ai = "0.4.2"
|
|
35
35
|
tenacity = ">=8.0"
|
36
36
|
jinja2 = "~=3.0"
|
37
37
|
deprecated = "~=1.0"
|
38
|
-
opentelemetry-instrumentation-
|
39
|
-
opentelemetry-instrumentation-openai = ">=0.33.5"
|
40
|
-
opentelemetry-instrumentation-ollama = ">=0.33.5"
|
38
|
+
opentelemetry-instrumentation-alephalpha = ">=0.33.5"
|
41
39
|
opentelemetry-instrumentation-anthropic = ">=0.33.5"
|
40
|
+
opentelemetry-instrumentation-bedrock = ">=0.33.5"
|
41
|
+
opentelemetry-instrumentation-chromadb = ">=0.33.5"
|
42
42
|
opentelemetry-instrumentation-cohere = ">=0.33.5"
|
43
43
|
opentelemetry-instrumentation-google-generativeai = ">=0.33.5"
|
44
|
-
opentelemetry-instrumentation-
|
45
|
-
opentelemetry-instrumentation-
|
46
|
-
opentelemetry-instrumentation-langchain = ">=0.33.5"
|
44
|
+
opentelemetry-instrumentation-groq = ">=0.33.5"
|
45
|
+
opentelemetry-instrumentation-haystack = ">=0.33.5"
|
47
46
|
opentelemetry-instrumentation-lancedb = ">=0.33.5"
|
48
|
-
opentelemetry-instrumentation-
|
49
|
-
opentelemetry-instrumentation-transformers = ">=0.33.5"
|
50
|
-
opentelemetry-instrumentation-together = ">=0.33.5"
|
47
|
+
opentelemetry-instrumentation-langchain = ">=0.33.5"
|
51
48
|
opentelemetry-instrumentation-llamaindex = ">=0.33.5"
|
49
|
+
opentelemetry-instrumentation-marqo = ">=0.33.5"
|
52
50
|
opentelemetry-instrumentation-milvus = ">=0.33.5"
|
53
|
-
opentelemetry-instrumentation-
|
54
|
-
opentelemetry-instrumentation-
|
51
|
+
opentelemetry-instrumentation-mistralai = ">=0.33.5"
|
52
|
+
opentelemetry-instrumentation-ollama = ">=0.33.5"
|
53
|
+
opentelemetry-instrumentation-openai = ">=0.33.5"
|
54
|
+
opentelemetry-instrumentation-pinecone = ">=0.33.5"
|
55
|
+
opentelemetry-instrumentation-qdrant = ">=0.33.5"
|
55
56
|
opentelemetry-instrumentation-replicate = ">=0.33.5"
|
57
|
+
opentelemetry-instrumentation-sagemaker = ">=0.33.5"
|
58
|
+
opentelemetry-instrumentation-together = ">=0.33.5"
|
59
|
+
opentelemetry-instrumentation-transformers = ">=0.33.5"
|
56
60
|
opentelemetry-instrumentation-vertexai = ">=0.33.5"
|
57
61
|
opentelemetry-instrumentation-watsonx = ">=0.33.5"
|
58
62
|
opentelemetry-instrumentation-weaviate = ">=0.33.5"
|
59
|
-
opentelemetry-instrumentation-alephalpha = ">=0.33.5"
|
60
|
-
opentelemetry-instrumentation-marqo = ">=0.33.5"
|
61
|
-
opentelemetry-instrumentation-groq = ">=0.33.5"
|
62
63
|
tqdm = "~=4.0"
|
63
64
|
argparse = "~=1.0"
|
64
65
|
|
@@ -1,7 +1,14 @@
|
|
1
1
|
from .sdk.datasets import EvaluationDataset, LaminarDataset
|
2
2
|
from .sdk.evaluations import evaluate
|
3
3
|
from .sdk.laminar import Laminar
|
4
|
-
from .sdk.types import
|
4
|
+
from .sdk.types import (
|
5
|
+
ChatMessage,
|
6
|
+
HumanEvaluator,
|
7
|
+
NodeInput,
|
8
|
+
PipelineRunError,
|
9
|
+
PipelineRunResponse,
|
10
|
+
)
|
5
11
|
from .sdk.decorators import observe
|
6
12
|
from .traceloop_sdk import Instruments
|
7
13
|
from .traceloop_sdk.tracing.attributes import Attributes
|
14
|
+
from opentelemetry.trace import use_span
|
@@ -18,6 +18,7 @@ from .types import (
|
|
18
18
|
EvaluationResultDatapoint,
|
19
19
|
EvaluatorFunction,
|
20
20
|
ExecutorFunction,
|
21
|
+
HumanEvaluator,
|
21
22
|
Numeric,
|
22
23
|
NumericTypes,
|
23
24
|
SpanType,
|
@@ -99,6 +100,7 @@ class Evaluation:
|
|
99
100
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
100
101
|
executor: Any,
|
101
102
|
evaluators: dict[str, EvaluatorFunction],
|
103
|
+
human_evaluators: list[HumanEvaluator] = [],
|
102
104
|
name: Optional[str] = None,
|
103
105
|
group_id: Optional[str] = None,
|
104
106
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
@@ -126,6 +128,10 @@ class Evaluation:
|
|
126
128
|
If the score is a single number, it will be named after the\
|
127
129
|
evaluator function. Evaluator function names must contain only\
|
128
130
|
letters, digits, hyphens, underscores, or spaces.
|
131
|
+
human_evaluators (list[HumanEvaluator], optional):\
|
132
|
+
[Beta] List of instances of HumanEvaluator. For now, human\
|
133
|
+
evaluator only holds the queue name.
|
134
|
+
Defaults to an empty list.
|
129
135
|
name (Optional[str], optional): Optional name of the evaluation.\
|
130
136
|
Used to identify the evaluation in the group.\
|
131
137
|
If not provided, a random name will be generated.
|
@@ -159,9 +165,9 @@ class Evaluation:
|
|
159
165
|
if not evaluators:
|
160
166
|
raise ValueError("No evaluators provided")
|
161
167
|
|
162
|
-
|
168
|
+
evaluator_name_regex = re.compile(r"^[\w\s-]+$")
|
163
169
|
for evaluator_name in evaluators:
|
164
|
-
if not
|
170
|
+
if not evaluator_name_regex.match(evaluator_name):
|
165
171
|
raise ValueError(
|
166
172
|
f'Invalid evaluator key: "{evaluator_name}". '
|
167
173
|
"Keys must only contain letters, digits, hyphens,"
|
@@ -183,6 +189,7 @@ class Evaluation:
|
|
183
189
|
self.name = name
|
184
190
|
self.batch_size = batch_size
|
185
191
|
self._logger = get_default_logger(self.__class__.__name__)
|
192
|
+
self.human_evaluators = human_evaluators
|
186
193
|
L.initialize(
|
187
194
|
project_api_key=project_api_key,
|
188
195
|
base_url=base_url,
|
@@ -202,9 +209,7 @@ class Evaluation:
|
|
202
209
|
return loop.run_until_complete(self._run())
|
203
210
|
|
204
211
|
async def _run(self) -> None:
|
205
|
-
self.reporter.start(
|
206
|
-
len(self.data),
|
207
|
-
)
|
212
|
+
self.reporter.start(len(self.data))
|
208
213
|
|
209
214
|
try:
|
210
215
|
result_datapoints = await self._evaluate_in_batches()
|
@@ -212,13 +217,19 @@ class Evaluation:
|
|
212
217
|
self.reporter.stopWithError(e)
|
213
218
|
self.is_finished = True
|
214
219
|
return
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
220
|
+
|
221
|
+
# For now add all human evaluators to all result datapoints
|
222
|
+
# In the future, we will add ways to specify which human evaluators
|
223
|
+
# to add to which result datapoints, e.g. sample some randomly
|
224
|
+
for result_datapoint in result_datapoints:
|
225
|
+
result_datapoint.human_evaluators = self.human_evaluators or {}
|
226
|
+
|
227
|
+
evaluation = L.create_evaluation(
|
228
|
+
data=result_datapoints, group_id=self.group_id, name=self.name
|
229
|
+
)
|
230
|
+
average_scores = get_average_scores(result_datapoints)
|
231
|
+
self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
|
232
|
+
self.is_finished = True
|
222
233
|
|
223
234
|
async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
|
224
235
|
result_datapoints = []
|
@@ -256,6 +267,9 @@ class Evaluation:
|
|
256
267
|
else self.executor(datapoint.data)
|
257
268
|
)
|
258
269
|
L.set_span_output(output)
|
270
|
+
executor_span_id = uuid.UUID(
|
271
|
+
int=executor_span.get_span_context().span_id
|
272
|
+
)
|
259
273
|
target = datapoint.target
|
260
274
|
|
261
275
|
# Iterate over evaluators
|
@@ -285,6 +299,7 @@ class Evaluation:
|
|
285
299
|
executor_output=output,
|
286
300
|
scores=scores,
|
287
301
|
trace_id=trace_id,
|
302
|
+
executor_span_id=executor_span_id,
|
288
303
|
)
|
289
304
|
|
290
305
|
|
@@ -292,6 +307,7 @@ def evaluate(
|
|
292
307
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
293
308
|
executor: ExecutorFunction,
|
294
309
|
evaluators: dict[str, EvaluatorFunction],
|
310
|
+
human_evaluators: list[HumanEvaluator] = [],
|
295
311
|
name: Optional[str] = None,
|
296
312
|
group_id: Optional[str] = None,
|
297
313
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
@@ -326,6 +342,10 @@ def evaluate(
|
|
326
342
|
If the score is a single number, it will be named after the\
|
327
343
|
evaluator function. Evaluator function names must contain only\
|
328
344
|
letters, digits, hyphens, underscores, or spaces.
|
345
|
+
human_evaluators (list[HumanEvaluator], optional):\
|
346
|
+
[Beta] List of instances of HumanEvaluator. For now, human\
|
347
|
+
evaluator only holds the queue name.
|
348
|
+
Defaults to an empty list.
|
329
349
|
name (Optional[str], optional): Optional name of the evaluation.\
|
330
350
|
Used to identify the evaluation in the group.\
|
331
351
|
If not provided, a random name will be generated.
|
@@ -359,6 +379,7 @@ def evaluate(
|
|
359
379
|
executor=executor,
|
360
380
|
evaluators=evaluators,
|
361
381
|
group_id=group_id,
|
382
|
+
human_evaluators=human_evaluators,
|
362
383
|
name=name,
|
363
384
|
batch_size=batch_size,
|
364
385
|
project_api_key=project_api_key,
|
@@ -1,3 +1,4 @@
|
|
1
|
+
from contextvars import Context
|
1
2
|
import re
|
2
3
|
from lmnr.traceloop_sdk.instruments import Instruments
|
3
4
|
from opentelemetry import context
|
@@ -294,6 +295,7 @@ class Laminar:
|
|
294
295
|
name: str,
|
295
296
|
input: Any = None,
|
296
297
|
span_type: Union[Literal["DEFAULT"], Literal["LLM"]] = "DEFAULT",
|
298
|
+
context: Optional[Context] = None,
|
297
299
|
):
|
298
300
|
"""Start a new span as the current span. Useful for manual
|
299
301
|
instrumentation. If `span_type` is set to `"LLM"`, you should report
|
@@ -314,10 +316,12 @@ class Laminar:
|
|
314
316
|
span_type (Union[Literal["DEFAULT"], Literal["LLM"]], optional):\
|
315
317
|
type of the span. If you use `"LLM"`, you should report usage\
|
316
318
|
and response attributes manually. Defaults to "DEFAULT".
|
319
|
+
context (Optional[Context], optional): raw OpenTelemetry context\
|
320
|
+
to attach the span to. Defaults to None.
|
317
321
|
"""
|
318
322
|
with get_tracer() as tracer:
|
319
323
|
span_path = get_span_path(name)
|
320
|
-
ctx = set_value("span_path", span_path)
|
324
|
+
ctx = set_value("span_path", span_path, context)
|
321
325
|
ctx_token = attach(ctx)
|
322
326
|
with tracer.start_as_current_span(
|
323
327
|
name,
|
@@ -338,6 +342,67 @@ class Laminar:
|
|
338
342
|
except Exception:
|
339
343
|
pass
|
340
344
|
|
345
|
+
@classmethod
|
346
|
+
def start_span(
|
347
|
+
cls,
|
348
|
+
name: str,
|
349
|
+
input: Any = None,
|
350
|
+
span_type: Union[Literal["DEFAULT"], Literal["LLM"]] = "DEFAULT",
|
351
|
+
context: Optional[Context] = None,
|
352
|
+
):
|
353
|
+
"""Start a new span. Useful for manual instrumentation.
|
354
|
+
If `span_type` is set to `"LLM"`, you should report usage and response
|
355
|
+
attributes manually. See `Laminar.set_span_attributes` for more
|
356
|
+
information.
|
357
|
+
|
358
|
+
Usage example:
|
359
|
+
```python
|
360
|
+
from src.lmnr import Laminar, use_span
|
361
|
+
def foo(span):
|
362
|
+
with use_span(span):
|
363
|
+
with Laminar.start_as_current_span("foo_inner"):
|
364
|
+
some_function()
|
365
|
+
|
366
|
+
def bar():
|
367
|
+
with use_span(span):
|
368
|
+
openai_client.chat.completions.create()
|
369
|
+
|
370
|
+
span = Laminar.start_span("outer")
|
371
|
+
foo(span)
|
372
|
+
bar(span)
|
373
|
+
# IMPORTANT: End the span manually
|
374
|
+
span.end()
|
375
|
+
|
376
|
+
# Results in:
|
377
|
+
# | outer
|
378
|
+
# | | foo
|
379
|
+
# | | | foo_inner
|
380
|
+
# | | bar
|
381
|
+
# | | | openai.chat
|
382
|
+
```
|
383
|
+
|
384
|
+
Args:
|
385
|
+
name (str): name of the span
|
386
|
+
input (Any, optional): input to the span. Will be sent as an\
|
387
|
+
attribute, so must be json serializable. Defaults to None.
|
388
|
+
span_type (Union[Literal["DEFAULT"], Literal["LLM"]], optional):\
|
389
|
+
type of the span. If you use `"LLM"`, you should report usage\
|
390
|
+
and response attributes manually. Defaults to "DEFAULT".
|
391
|
+
context (Optional[Context], optional): raw OpenTelemetry context\
|
392
|
+
to attach the span to. Defaults to None.
|
393
|
+
"""
|
394
|
+
with get_tracer() as tracer:
|
395
|
+
span_path = get_span_path(name)
|
396
|
+
ctx = set_value("span_path", span_path, context)
|
397
|
+
span = tracer.start_span(name, context=ctx)
|
398
|
+
if input is not None:
|
399
|
+
span.set_attribute(
|
400
|
+
SPAN_INPUT,
|
401
|
+
json_dumps(input),
|
402
|
+
)
|
403
|
+
span.set_attribute(SPAN_TYPE, span_type)
|
404
|
+
return span
|
405
|
+
|
341
406
|
@classmethod
|
342
407
|
def set_span_output(cls, output: Any = None):
|
343
408
|
"""Set the output of the current span. Useful for manual
|
@@ -77,18 +77,17 @@ class PipelineRunError(Exception):
|
|
77
77
|
return super().__str__()
|
78
78
|
|
79
79
|
|
80
|
-
EvaluationDatapointData =
|
81
|
-
EvaluationDatapointTarget =
|
82
|
-
EvaluationDatapointMetadata = Optional[
|
80
|
+
EvaluationDatapointData = Any # non-null, must be JSON-serializable
|
81
|
+
EvaluationDatapointTarget = Optional[Any] # must be JSON-serializable
|
82
|
+
EvaluationDatapointMetadata = Optional[Any] # must be JSON-serializable
|
83
83
|
|
84
84
|
|
85
85
|
# EvaluationDatapoint is a single data point in the evaluation
|
86
86
|
class Datapoint(pydantic.BaseModel):
|
87
|
-
# input to the executor function.
|
87
|
+
# input to the executor function.
|
88
88
|
data: EvaluationDatapointData
|
89
89
|
# input to the evaluator function (alongside the executor output).
|
90
|
-
|
91
|
-
target: EvaluationDatapointTarget
|
90
|
+
target: EvaluationDatapointTarget = pydantic.Field(default=None)
|
92
91
|
metadata: EvaluationDatapointMetadata = pydantic.Field(default=None)
|
93
92
|
|
94
93
|
|
@@ -110,6 +109,13 @@ EvaluatorFunction = Callable[
|
|
110
109
|
]
|
111
110
|
|
112
111
|
|
112
|
+
class HumanEvaluator(pydantic.BaseModel):
|
113
|
+
queueName: str
|
114
|
+
|
115
|
+
def __init__(self, queue_name: str):
|
116
|
+
super().__init__(queueName=queue_name)
|
117
|
+
|
118
|
+
|
113
119
|
class CreateEvaluationResponse(pydantic.BaseModel):
|
114
120
|
id: uuid.UUID
|
115
121
|
createdAt: datetime.datetime
|
@@ -123,23 +129,31 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
|
|
123
129
|
target: EvaluationDatapointTarget
|
124
130
|
executor_output: ExecutorFunctionReturnType
|
125
131
|
scores: dict[str, Numeric]
|
132
|
+
human_evaluators: list[HumanEvaluator] = pydantic.Field(default_factory=list)
|
126
133
|
trace_id: uuid.UUID
|
134
|
+
executor_span_id: uuid.UUID
|
127
135
|
|
128
136
|
# uuid is not serializable by default, so we need to convert it to a string
|
129
137
|
def to_dict(self):
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
138
|
+
try:
|
139
|
+
return {
|
140
|
+
"data": serialize(self.data),
|
141
|
+
"target": serialize(self.target),
|
142
|
+
"executorOutput": serialize(self.executor_output),
|
143
|
+
"scores": self.scores,
|
144
|
+
"traceId": str(self.trace_id),
|
145
|
+
"humanEvaluators": [
|
146
|
+
(
|
147
|
+
v.model_dump()
|
148
|
+
if isinstance(v, pydantic.BaseModel)
|
149
|
+
else serialize(v)
|
150
|
+
)
|
151
|
+
for v in self.human_evaluators
|
152
|
+
],
|
153
|
+
"executorSpanId": str(self.executor_span_id),
|
154
|
+
}
|
155
|
+
except Exception as e:
|
156
|
+
raise ValueError(f"Error serializing EvaluationResultDatapoint: {e}")
|
143
157
|
|
144
158
|
|
145
159
|
class SpanType(Enum):
|
@@ -153,7 +167,7 @@ class SpanType(Enum):
|
|
153
167
|
|
154
168
|
class TraceType(Enum):
|
155
169
|
DEFAULT = "DEFAULT"
|
156
|
-
EVENT = "EVENT" #
|
170
|
+
EVENT = "EVENT" # deprecated
|
157
171
|
EVALUATION = "EVALUATION"
|
158
172
|
|
159
173
|
|
@@ -50,7 +50,7 @@ def is_iterator(o: typing.Any) -> bool:
|
|
50
50
|
|
51
51
|
|
52
52
|
def serialize(obj: typing.Any) -> dict[str, typing.Any]:
|
53
|
-
def
|
53
|
+
def serialize_inner(o: typing.Any):
|
54
54
|
if isinstance(o, (datetime.datetime, datetime.date)):
|
55
55
|
return o.strftime("%Y-%m-%dT%H:%M:%S.%f%z")
|
56
56
|
elif o is None:
|
@@ -68,17 +68,17 @@ def serialize(obj: typing.Any) -> dict[str, typing.Any]:
|
|
68
68
|
elif isinstance(o, pydantic.BaseModel):
|
69
69
|
return o.model_dump()
|
70
70
|
elif isinstance(o, (tuple, set, frozenset)):
|
71
|
-
return [
|
71
|
+
return [serialize_inner(item) for item in o]
|
72
72
|
elif isinstance(o, list):
|
73
|
-
return [
|
73
|
+
return [serialize_inner(item) for item in o]
|
74
74
|
elif isinstance(o, dict):
|
75
|
-
return {
|
75
|
+
return {serialize_inner(k): serialize_inner(v) for k, v in o.items()}
|
76
76
|
elif isinstance(o, queue.Queue):
|
77
77
|
return type(o).__name__
|
78
78
|
|
79
79
|
return str(o)
|
80
80
|
|
81
|
-
return
|
81
|
+
return serialize_inner(obj)
|
82
82
|
|
83
83
|
|
84
84
|
def get_input_from_func_args(
|
@@ -4,27 +4,31 @@ from enum import Enum
|
|
4
4
|
class Instruments(Enum):
|
5
5
|
# The list of libraries which will be autoinstrumented
|
6
6
|
# if no specific instruments are provided to initialize()
|
7
|
-
|
7
|
+
ALEPHALPHA = "alephalpha"
|
8
8
|
ANTHROPIC = "anthropic"
|
9
|
-
|
10
|
-
PINECONE = "pinecone"
|
9
|
+
BEDROCK = "bedrock"
|
11
10
|
CHROMA = "chroma"
|
11
|
+
COHERE = "cohere"
|
12
12
|
GOOGLE_GENERATIVEAI = "google_generativeai"
|
13
|
+
GROQ = "groq"
|
14
|
+
HAYSTACK = "haystack"
|
15
|
+
LANCEDB = "lancedb"
|
13
16
|
LANGCHAIN = "langchain"
|
14
|
-
MISTRAL = "mistral"
|
15
|
-
OLLAMA = "ollama"
|
16
17
|
LLAMA_INDEX = "llama_index"
|
18
|
+
MARQO = "marqo"
|
17
19
|
MILVUS = "milvus"
|
18
|
-
|
19
|
-
|
20
|
-
|
20
|
+
MISTRAL = "mistral"
|
21
|
+
OLLAMA = "ollama"
|
22
|
+
OPENAI = "openai"
|
23
|
+
PINECONE = "pinecone"
|
24
|
+
QDRANT = "qdrant"
|
21
25
|
REPLICATE = "replicate"
|
26
|
+
SAGEMAKER = "sagemaker"
|
27
|
+
TOGETHER = "together"
|
28
|
+
TRANSFORMERS = "transformers"
|
22
29
|
VERTEXAI = "vertexai"
|
23
30
|
WATSONX = "watsonx"
|
24
31
|
WEAVIATE = "weaviate"
|
25
|
-
ALEPHALPHA = "alephalpha"
|
26
|
-
MARQO = "marqo"
|
27
|
-
LANCEDB = "lancedb"
|
28
32
|
|
29
33
|
# The following libraries will not be autoinstrumented unless
|
30
34
|
# specified explicitly in the initialize() call.
|