lmnr 0.4.13__tar.gz → 0.4.15b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lmnr-0.4.13 → lmnr-0.4.15b1}/PKG-INFO +4 -3
- {lmnr-0.4.13 → lmnr-0.4.15b1}/README.md +2 -1
- {lmnr-0.4.13 → lmnr-0.4.15b1}/pyproject.toml +3 -3
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/evaluations.py +54 -46
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/laminar.py +6 -63
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/types.py +2 -13
- {lmnr-0.4.13 → lmnr-0.4.15b1}/LICENSE +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/__init__.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/cli.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/__init__.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/decorators.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/log.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/sdk/utils.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/.flake8 +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/.python-version +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/__init__.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/decorators/base.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/instruments.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tracing/attributes.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/tracing/tracing.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
- {lmnr-0.4.13 → lmnr-0.4.15b1}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: lmnr
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.15b1
|
4
4
|
Summary: Python SDK for Laminar AI
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: lmnr.ai
|
@@ -53,7 +53,7 @@ Requires-Dist: posthog (>=3.0,<4.0)
|
|
53
53
|
Requires-Dist: pydantic (>=2.7,<3.0)
|
54
54
|
Requires-Dist: python-dotenv (>=1.0,<2.0)
|
55
55
|
Requires-Dist: requests (>=2.0,<3.0)
|
56
|
-
Requires-Dist: tenacity (>=8.0
|
56
|
+
Requires-Dist: tenacity (>=8.0)
|
57
57
|
Requires-Dist: tqdm (>=4.0,<5.0)
|
58
58
|
Description-Content-Type: text/markdown
|
59
59
|
|
@@ -230,7 +230,8 @@ evaluate(
|
|
230
230
|
executor=write_poem,
|
231
231
|
evaluators={
|
232
232
|
"containsPoem": contains_poem
|
233
|
-
}
|
233
|
+
},
|
234
|
+
group_id="my_first_feature"
|
234
235
|
)
|
235
236
|
```
|
236
237
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "lmnr"
|
3
|
-
version = "0.4.
|
3
|
+
version = "0.4.15b1"
|
4
4
|
description = "Python SDK for Laminar AI"
|
5
5
|
authors = [
|
6
6
|
{ name = "lmnr.ai", email = "founders@lmnr.ai" }
|
@@ -11,7 +11,7 @@ license = "Apache-2.0"
|
|
11
11
|
|
12
12
|
[tool.poetry]
|
13
13
|
name = "lmnr"
|
14
|
-
version = "0.4.
|
14
|
+
version = "0.4.15b1"
|
15
15
|
description = "Python SDK for Laminar AI"
|
16
16
|
authors = ["lmnr.ai"]
|
17
17
|
readme = "README.md"
|
@@ -33,7 +33,7 @@ opentelemetry-instrumentation-sqlalchemy = "^0.48b0"
|
|
33
33
|
opentelemetry-instrumentation-urllib3 = "^0.48b0"
|
34
34
|
opentelemetry-instrumentation-threading = "^0.48b0"
|
35
35
|
opentelemetry-semantic-conventions-ai = "0.4.1"
|
36
|
-
tenacity = "
|
36
|
+
tenacity = ">=8.0"
|
37
37
|
jinja2 = "~=3.0"
|
38
38
|
deprecated = "~=1.0"
|
39
39
|
posthog = "~=3.0"
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
import re
|
2
3
|
import sys
|
3
4
|
from abc import ABC, abstractmethod
|
4
5
|
from contextlib import contextmanager
|
@@ -45,13 +46,26 @@ def get_evaluation_url(project_id: str, evaluation_id: str):
|
|
45
46
|
return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
|
46
47
|
|
47
48
|
|
49
|
+
def get_average_scores(results: list[EvaluationResultDatapoint]) -> dict[str, Numeric]:
|
50
|
+
per_score_values = {}
|
51
|
+
for result in results:
|
52
|
+
for key, value in result.scores.items():
|
53
|
+
if key not in per_score_values:
|
54
|
+
per_score_values[key] = []
|
55
|
+
per_score_values[key].append(value)
|
56
|
+
|
57
|
+
average_scores = {}
|
58
|
+
for key, values in per_score_values.items():
|
59
|
+
average_scores[key] = sum(values) / len(values)
|
60
|
+
|
61
|
+
return average_scores
|
62
|
+
|
63
|
+
|
48
64
|
class EvaluationReporter:
|
49
65
|
def __init__(self):
|
50
66
|
pass
|
51
67
|
|
52
|
-
def start(self,
|
53
|
-
print(f"Running evaluation {name}...\n")
|
54
|
-
print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
|
68
|
+
def start(self, length: int):
|
55
69
|
self.cli_progress = tqdm(
|
56
70
|
total=length,
|
57
71
|
bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
|
@@ -65,9 +79,10 @@ class EvaluationReporter:
|
|
65
79
|
self.cli_progress.close()
|
66
80
|
sys.stderr.write(f"\nError: {error}\n")
|
67
81
|
|
68
|
-
def stop(self, average_scores: dict[str, Numeric]):
|
82
|
+
def stop(self, average_scores: dict[str, Numeric], project_id: str, evaluation_id: str):
|
69
83
|
self.cli_progress.close()
|
70
|
-
print("\
|
84
|
+
print(f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n")
|
85
|
+
print("Average scores:")
|
71
86
|
for name, score in average_scores.items():
|
72
87
|
print(f"{name}: {score}")
|
73
88
|
print("\n")
|
@@ -96,6 +111,7 @@ class Evaluation:
|
|
96
111
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
97
112
|
executor: Any,
|
98
113
|
evaluators: dict[str, EvaluatorFunction],
|
114
|
+
group_id: Optional[str] = None,
|
99
115
|
name: Optional[str] = None,
|
100
116
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
101
117
|
project_api_key: Optional[str] = None,
|
@@ -122,6 +138,8 @@ class Evaluation:
|
|
122
138
|
evaluator function. If the function is anonymous, it will be
|
123
139
|
named `evaluator_${index}`, where index is the index of the
|
124
140
|
evaluator function in the list starting from 1.
|
141
|
+
group_id (Optional[str], optional): Group id of the evaluation.
|
142
|
+
Defaults to "default".
|
125
143
|
name (Optional[str], optional): The name of the evaluation.
|
126
144
|
It will be auto-generated if not provided.
|
127
145
|
batch_size (int, optional): The batch size for evaluation.
|
@@ -137,11 +155,16 @@ class Evaluation:
|
|
137
155
|
Defaults to None. If None, all available instruments will be used.
|
138
156
|
"""
|
139
157
|
|
158
|
+
if not evaluators:
|
159
|
+
raise ValueError("No evaluators provided")
|
160
|
+
|
161
|
+
# TODO: Compile regex once and then reuse it
|
162
|
+
for evaluator_name in evaluators:
|
163
|
+
if not re.match(r'^[\w\s-]+$', evaluator_name):
|
164
|
+
raise ValueError(f'Invalid evaluator key: "{evaluator_name}". Keys must only contain letters, digits, hyphens, underscores, or spaces.')
|
165
|
+
|
140
166
|
self.is_finished = False
|
141
|
-
self.name = name
|
142
167
|
self.reporter = EvaluationReporter()
|
143
|
-
self.executor = executor
|
144
|
-
self.evaluators = evaluators
|
145
168
|
if isinstance(data, list):
|
146
169
|
self.data = [
|
147
170
|
(Datapoint.model_validate(point) if isinstance(point, dict) else point)
|
@@ -149,6 +172,10 @@ class Evaluation:
|
|
149
172
|
]
|
150
173
|
else:
|
151
174
|
self.data = data
|
175
|
+
self.executor = executor
|
176
|
+
self.evaluators = evaluators
|
177
|
+
self.group_id = group_id
|
178
|
+
self.name = name
|
152
179
|
self.batch_size = batch_size
|
153
180
|
L.initialize(
|
154
181
|
project_api_key=project_api_key,
|
@@ -159,23 +186,6 @@ class Evaluation:
|
|
159
186
|
)
|
160
187
|
|
161
188
|
def run(self) -> Union[None, Awaitable[None]]:
|
162
|
-
"""Runs the evaluation.
|
163
|
-
|
164
|
-
Creates a new evaluation if no evaluation with such name exists, or
|
165
|
-
adds data to an existing one otherwise. Evaluates data points in
|
166
|
-
batches of `self.batch_size`. The executor
|
167
|
-
function is called on each data point to get the output,
|
168
|
-
and then evaluate it by each evaluator function.
|
169
|
-
|
170
|
-
Usage:
|
171
|
-
```python
|
172
|
-
# in a synchronous context:
|
173
|
-
e.run()
|
174
|
-
# in an asynchronous context:
|
175
|
-
await e.run()
|
176
|
-
```
|
177
|
-
|
178
|
-
"""
|
179
189
|
if self.is_finished:
|
180
190
|
raise Exception("Evaluation is already finished")
|
181
191
|
|
@@ -186,41 +196,34 @@ class Evaluation:
|
|
186
196
|
return loop.run_until_complete(self._run())
|
187
197
|
|
188
198
|
async def _run(self) -> None:
|
189
|
-
evaluation = L.create_evaluation(self.name)
|
190
199
|
self.reporter.start(
|
191
|
-
evaluation.name,
|
192
|
-
evaluation.projectId,
|
193
|
-
evaluation.id,
|
194
200
|
len(self.data),
|
195
201
|
)
|
196
202
|
|
197
203
|
try:
|
198
|
-
await self.evaluate_in_batches(
|
204
|
+
result_datapoints = await self.evaluate_in_batches()
|
199
205
|
except Exception as e:
|
200
|
-
L.update_evaluation_status(evaluation.id, "Error")
|
201
206
|
self.reporter.stopWithError(e)
|
202
207
|
self.is_finished = True
|
203
208
|
return
|
209
|
+
else:
|
210
|
+
evaluation = L.create_evaluation(data=result_datapoints, group_id=self.group_id, name=self.name)
|
211
|
+
average_scores = get_average_scores(result_datapoints)
|
212
|
+
self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
|
213
|
+
self.is_finished = True
|
204
214
|
|
205
|
-
|
206
|
-
|
207
|
-
self.reporter.stop(average_scores)
|
208
|
-
self.is_finished = True
|
209
|
-
|
210
|
-
async def evaluate_in_batches(self, evaluation_id: uuid.UUID):
|
215
|
+
async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
|
216
|
+
result_datapoints = []
|
211
217
|
for i in range(0, len(self.data), self.batch_size):
|
212
218
|
batch = (
|
213
219
|
self.data[i: i + self.batch_size]
|
214
220
|
if isinstance(self.data, list)
|
215
221
|
else self.data.slice(i, i + self.batch_size)
|
216
222
|
)
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
print(f"Error evaluating batch: {e}")
|
222
|
-
finally:
|
223
|
-
self.reporter.update(len(batch))
|
223
|
+
batch_datapoints = await self._evaluate_batch(batch)
|
224
|
+
result_datapoints.extend(batch_datapoints)
|
225
|
+
self.reporter.update(len(batch))
|
226
|
+
return result_datapoints
|
224
227
|
|
225
228
|
async def _evaluate_batch(
|
226
229
|
self, batch: list[Datapoint]
|
@@ -281,6 +284,7 @@ def evaluate(
|
|
281
284
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
282
285
|
executor: ExecutorFunction,
|
283
286
|
evaluators: dict[str, EvaluatorFunction],
|
287
|
+
group_id: Optional[str] = None,
|
284
288
|
name: Optional[str] = None,
|
285
289
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
286
290
|
project_api_key: Optional[str] = None,
|
@@ -309,8 +313,11 @@ def evaluate(
|
|
309
313
|
evaluator function. If the function is anonymous, it will be
|
310
314
|
named `evaluator_${index}`, where index is the index of the
|
311
315
|
evaluator function in the list starting from 1.
|
312
|
-
|
313
|
-
|
316
|
+
group_id (Optional[str], optional): Group name which is same
|
317
|
+
as the feature you are evaluating in your project or application.
|
318
|
+
Defaults to "default".
|
319
|
+
name (Optional[str], optional): Optional name of the evaluation. Used to easily
|
320
|
+
identify the evaluation in the group.
|
314
321
|
batch_size (int, optional): The batch size for evaluation.
|
315
322
|
Defaults to DEFAULT_BATCH_SIZE.
|
316
323
|
project_api_key (Optional[str], optional): The project API key.
|
@@ -330,6 +337,7 @@ def evaluate(
|
|
330
337
|
data=data,
|
331
338
|
executor=executor,
|
332
339
|
evaluators=evaluators,
|
340
|
+
group_id=group_id,
|
333
341
|
name=name,
|
334
342
|
batch_size=batch_size,
|
335
343
|
project_api_key=project_api_key,
|
@@ -47,7 +47,6 @@ from .types import (
|
|
47
47
|
NodeInput,
|
48
48
|
PipelineRunRequest,
|
49
49
|
TraceType,
|
50
|
-
UpdateEvaluationResponse,
|
51
50
|
)
|
52
51
|
|
53
52
|
|
@@ -413,10 +412,14 @@ class Laminar:
|
|
413
412
|
set_association_properties(props)
|
414
413
|
|
415
414
|
@classmethod
|
416
|
-
def create_evaluation(cls, name: Optional[str]) -> CreateEvaluationResponse:
|
415
|
+
def create_evaluation(cls, data: list[EvaluationResultDatapoint], group_id: Optional[str] = None, name: Optional[str] = None) -> CreateEvaluationResponse:
|
417
416
|
response = requests.post(
|
418
417
|
cls.__base_http_url + "/v1/evaluations",
|
419
|
-
data=json.dumps({
|
418
|
+
data=json.dumps({
|
419
|
+
"groupId": group_id,
|
420
|
+
"name": name,
|
421
|
+
"points": [datapoint.to_dict() for datapoint in data]
|
422
|
+
}),
|
420
423
|
headers=cls._headers(),
|
421
424
|
)
|
422
425
|
if response.status_code != 200:
|
@@ -427,66 +430,6 @@ class Laminar:
|
|
427
430
|
raise ValueError(f"Error creating evaluation {response.text}")
|
428
431
|
return CreateEvaluationResponse.model_validate(response.json())
|
429
432
|
|
430
|
-
@classmethod
|
431
|
-
def post_evaluation_results(
|
432
|
-
cls, evaluation_id: uuid.UUID, data: list[EvaluationResultDatapoint]
|
433
|
-
) -> requests.Response:
|
434
|
-
body = {
|
435
|
-
"evaluationId": str(evaluation_id),
|
436
|
-
"points": [datapoint.to_dict() for datapoint in data],
|
437
|
-
}
|
438
|
-
response = requests.post(
|
439
|
-
cls.__base_http_url + "/v1/evaluation-datapoints",
|
440
|
-
data=json.dumps(body),
|
441
|
-
headers=cls._headers(),
|
442
|
-
)
|
443
|
-
if response.status_code != 200:
|
444
|
-
try:
|
445
|
-
resp_json = response.json()
|
446
|
-
raise ValueError(
|
447
|
-
f"Failed to send evaluation results. Response: {json.dumps(resp_json)}"
|
448
|
-
)
|
449
|
-
except Exception:
|
450
|
-
raise ValueError(
|
451
|
-
f"Failed to send evaluation results. Error: {response.text}"
|
452
|
-
)
|
453
|
-
return response
|
454
|
-
|
455
|
-
@classmethod
|
456
|
-
def update_evaluation_status(
|
457
|
-
cls, evaluation_id: str, status: str
|
458
|
-
) -> UpdateEvaluationResponse:
|
459
|
-
"""
|
460
|
-
Updates the status of an evaluation. Returns the updated evaluation object.
|
461
|
-
|
462
|
-
Args:
|
463
|
-
evaluation_id (str): The ID of the evaluation to update.
|
464
|
-
status (str): The status to set for the evaluation.
|
465
|
-
|
466
|
-
Returns:
|
467
|
-
UpdateEvaluationResponse: The updated evaluation response.
|
468
|
-
|
469
|
-
Raises:
|
470
|
-
ValueError: If the request fails.
|
471
|
-
"""
|
472
|
-
body = {
|
473
|
-
"status": status,
|
474
|
-
}
|
475
|
-
url = f"{cls.__base_http_url}/v1/evaluations/{evaluation_id}"
|
476
|
-
|
477
|
-
response = requests.post(
|
478
|
-
url,
|
479
|
-
data=json.dumps(body),
|
480
|
-
headers=cls._headers(),
|
481
|
-
)
|
482
|
-
if response.status_code != 200:
|
483
|
-
raise ValueError(
|
484
|
-
f"Failed to update evaluation status {evaluation_id}. "
|
485
|
-
f"Response: {response.text}"
|
486
|
-
)
|
487
|
-
|
488
|
-
return UpdateEvaluationResponse.model_validate(response.json())
|
489
|
-
|
490
433
|
@classmethod
|
491
434
|
def _headers(cls):
|
492
435
|
assert cls.__project_api_key is not None, "Project API key is not set"
|
@@ -2,7 +2,7 @@ import datetime
|
|
2
2
|
from enum import Enum
|
3
3
|
import pydantic
|
4
4
|
import requests
|
5
|
-
from typing import Any, Awaitable, Callable,
|
5
|
+
from typing import Any, Awaitable, Callable, Optional, Union
|
6
6
|
import uuid
|
7
7
|
|
8
8
|
from .utils import serialize
|
@@ -107,24 +107,13 @@ EvaluatorFunction = Callable[
|
|
107
107
|
Union[EvaluatorFunctionReturnType, Awaitable[EvaluatorFunctionReturnType]],
|
108
108
|
]
|
109
109
|
|
110
|
-
EvaluationStatus = Literal["Started", "Finished", "Error"]
|
111
|
-
|
112
110
|
|
113
111
|
class CreateEvaluationResponse(pydantic.BaseModel):
|
114
112
|
id: uuid.UUID
|
115
113
|
createdAt: datetime.datetime
|
114
|
+
groupId: str
|
116
115
|
name: str
|
117
|
-
status: EvaluationStatus
|
118
116
|
projectId: uuid.UUID
|
119
|
-
metadata: Optional[dict[str, Any]] = None
|
120
|
-
|
121
|
-
|
122
|
-
class EvaluationStats(pydantic.BaseModel):
|
123
|
-
averageScores: dict[str, Numeric]
|
124
|
-
|
125
|
-
|
126
|
-
class UpdateEvaluationResponse(pydantic.BaseModel):
|
127
|
-
stats: EvaluationStats
|
128
117
|
|
129
118
|
|
130
119
|
class EvaluationResultDatapoint(pydantic.BaseModel):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|