lmnr 0.4.13__py3-none-any.whl → 0.4.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lmnr/sdk/evaluations.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import re
2
3
  import sys
3
4
  from abc import ABC, abstractmethod
4
5
  from contextlib import contextmanager
@@ -45,13 +46,26 @@ def get_evaluation_url(project_id: str, evaluation_id: str):
45
46
  return f"https://www.lmnr.ai/project/{project_id}/evaluations/{evaluation_id}"
46
47
 
47
48
 
49
+ def get_average_scores(results: list[EvaluationResultDatapoint]) -> dict[str, Numeric]:
50
+ per_score_values = {}
51
+ for result in results:
52
+ for key, value in result.scores.items():
53
+ if key not in per_score_values:
54
+ per_score_values[key] = []
55
+ per_score_values[key].append(value)
56
+
57
+ average_scores = {}
58
+ for key, values in per_score_values.items():
59
+ average_scores[key] = sum(values) / len(values)
60
+
61
+ return average_scores
62
+
63
+
48
64
  class EvaluationReporter:
49
65
  def __init__(self):
50
66
  pass
51
67
 
52
- def start(self, name: str, project_id: str, id: str, length: int):
53
- print(f"Running evaluation {name}...\n")
54
- print(f"Check progress and results at {get_evaluation_url(project_id, id)}\n")
68
+ def start(self, length: int):
55
69
  self.cli_progress = tqdm(
56
70
  total=length,
57
71
  bar_format="{bar} {percentage:3.0f}% | ETA: {remaining}s | {n_fmt}/{total_fmt}",
@@ -65,9 +79,10 @@ class EvaluationReporter:
65
79
  self.cli_progress.close()
66
80
  sys.stderr.write(f"\nError: {error}\n")
67
81
 
68
- def stop(self, average_scores: dict[str, Numeric]):
82
+ def stop(self, average_scores: dict[str, Numeric], project_id: str, evaluation_id: str):
69
83
  self.cli_progress.close()
70
- print("\nAverage scores:")
84
+ print(f"\nCheck progress and results at {get_evaluation_url(project_id, evaluation_id)}\n")
85
+ print("Average scores:")
71
86
  for name, score in average_scores.items():
72
87
  print(f"{name}: {score}")
73
88
  print("\n")
@@ -96,6 +111,7 @@ class Evaluation:
96
111
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
97
112
  executor: Any,
98
113
  evaluators: dict[str, EvaluatorFunction],
114
+ group_id: Optional[str] = None,
99
115
  name: Optional[str] = None,
100
116
  batch_size: int = DEFAULT_BATCH_SIZE,
101
117
  project_api_key: Optional[str] = None,
@@ -122,6 +138,8 @@ class Evaluation:
122
138
  evaluator function. If the function is anonymous, it will be
123
139
  named `evaluator_${index}`, where index is the index of the
124
140
  evaluator function in the list starting from 1.
141
+ group_id (Optional[str], optional): Group id of the evaluation.
142
+ Defaults to "default".
125
143
  name (Optional[str], optional): The name of the evaluation.
126
144
  It will be auto-generated if not provided.
127
145
  batch_size (int, optional): The batch size for evaluation.
@@ -137,11 +155,16 @@ class Evaluation:
137
155
  Defaults to None. If None, all available instruments will be used.
138
156
  """
139
157
 
158
+ if not evaluators:
159
+ raise ValueError("No evaluators provided")
160
+
161
+ # TODO: Compile regex once and then reuse it
162
+ for evaluator_name in evaluators:
163
+ if not re.match(r'^[\w\s-]+$', evaluator_name):
164
+ raise ValueError(f'Invalid evaluator key: "{evaluator_name}". Keys must only contain letters, digits, hyphens, underscores, or spaces.')
165
+
140
166
  self.is_finished = False
141
- self.name = name
142
167
  self.reporter = EvaluationReporter()
143
- self.executor = executor
144
- self.evaluators = evaluators
145
168
  if isinstance(data, list):
146
169
  self.data = [
147
170
  (Datapoint.model_validate(point) if isinstance(point, dict) else point)
@@ -149,6 +172,10 @@ class Evaluation:
149
172
  ]
150
173
  else:
151
174
  self.data = data
175
+ self.executor = executor
176
+ self.evaluators = evaluators
177
+ self.group_id = group_id
178
+ self.name = name
152
179
  self.batch_size = batch_size
153
180
  L.initialize(
154
181
  project_api_key=project_api_key,
@@ -159,23 +186,6 @@ class Evaluation:
159
186
  )
160
187
 
161
188
  def run(self) -> Union[None, Awaitable[None]]:
162
- """Runs the evaluation.
163
-
164
- Creates a new evaluation if no evaluation with such name exists, or
165
- adds data to an existing one otherwise. Evaluates data points in
166
- batches of `self.batch_size`. The executor
167
- function is called on each data point to get the output,
168
- and then evaluate it by each evaluator function.
169
-
170
- Usage:
171
- ```python
172
- # in a synchronous context:
173
- e.run()
174
- # in an asynchronous context:
175
- await e.run()
176
- ```
177
-
178
- """
179
189
  if self.is_finished:
180
190
  raise Exception("Evaluation is already finished")
181
191
 
@@ -186,41 +196,34 @@ class Evaluation:
186
196
  return loop.run_until_complete(self._run())
187
197
 
188
198
  async def _run(self) -> None:
189
- evaluation = L.create_evaluation(self.name)
190
199
  self.reporter.start(
191
- evaluation.name,
192
- evaluation.projectId,
193
- evaluation.id,
194
200
  len(self.data),
195
201
  )
196
202
 
197
203
  try:
198
- await self.evaluate_in_batches(evaluation.id)
204
+ result_datapoints = await self.evaluate_in_batches()
199
205
  except Exception as e:
200
- L.update_evaluation_status(evaluation.id, "Error")
201
206
  self.reporter.stopWithError(e)
202
207
  self.is_finished = True
203
208
  return
209
+ else:
210
+ evaluation = L.create_evaluation(data=result_datapoints, group_id=self.group_id, name=self.name)
211
+ average_scores = get_average_scores(result_datapoints)
212
+ self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
213
+ self.is_finished = True
204
214
 
205
- update_evaluation_response = L.update_evaluation_status(evaluation.id, "Finished")
206
- average_scores = update_evaluation_response.stats.averageScores
207
- self.reporter.stop(average_scores)
208
- self.is_finished = True
209
-
210
- async def evaluate_in_batches(self, evaluation_id: uuid.UUID):
215
+ async def evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
216
+ result_datapoints = []
211
217
  for i in range(0, len(self.data), self.batch_size):
212
218
  batch = (
213
219
  self.data[i: i + self.batch_size]
214
220
  if isinstance(self.data, list)
215
221
  else self.data.slice(i, i + self.batch_size)
216
222
  )
217
- try:
218
- results = await self._evaluate_batch(batch)
219
- L.post_evaluation_results(evaluation_id, results)
220
- except Exception as e:
221
- print(f"Error evaluating batch: {e}")
222
- finally:
223
- self.reporter.update(len(batch))
223
+ batch_datapoints = await self._evaluate_batch(batch)
224
+ result_datapoints.extend(batch_datapoints)
225
+ self.reporter.update(len(batch))
226
+ return result_datapoints
224
227
 
225
228
  async def _evaluate_batch(
226
229
  self, batch: list[Datapoint]
@@ -281,6 +284,7 @@ def evaluate(
281
284
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
282
285
  executor: ExecutorFunction,
283
286
  evaluators: dict[str, EvaluatorFunction],
287
+ group_id: Optional[str] = None,
284
288
  name: Optional[str] = None,
285
289
  batch_size: int = DEFAULT_BATCH_SIZE,
286
290
  project_api_key: Optional[str] = None,
@@ -309,8 +313,11 @@ def evaluate(
309
313
  evaluator function. If the function is anonymous, it will be
310
314
  named `evaluator_${index}`, where index is the index of the
311
315
  evaluator function in the list starting from 1.
312
- name (Optional[str], optional): The name of the evaluation.
313
- It will be auto-generated if not provided.
316
+ group_id (Optional[str], optional): Group name which is same
317
+ as the feature you are evaluating in your project or application.
318
+ Defaults to "default".
319
+ name (Optional[str], optional): Optional name of the evaluation. Used to easily
320
+ identify the evaluation in the group.
314
321
  batch_size (int, optional): The batch size for evaluation.
315
322
  Defaults to DEFAULT_BATCH_SIZE.
316
323
  project_api_key (Optional[str], optional): The project API key.
@@ -330,6 +337,7 @@ def evaluate(
330
337
  data=data,
331
338
  executor=executor,
332
339
  evaluators=evaluators,
340
+ group_id=group_id,
333
341
  name=name,
334
342
  batch_size=batch_size,
335
343
  project_api_key=project_api_key,
lmnr/sdk/laminar.py CHANGED
@@ -47,7 +47,6 @@ from .types import (
47
47
  NodeInput,
48
48
  PipelineRunRequest,
49
49
  TraceType,
50
- UpdateEvaluationResponse,
51
50
  )
52
51
 
53
52
 
@@ -413,10 +412,14 @@ class Laminar:
413
412
  set_association_properties(props)
414
413
 
415
414
  @classmethod
416
- def create_evaluation(cls, name: Optional[str]) -> CreateEvaluationResponse:
415
+ def create_evaluation(cls, data: list[EvaluationResultDatapoint], group_id: Optional[str] = None, name: Optional[str] = None) -> CreateEvaluationResponse:
417
416
  response = requests.post(
418
417
  cls.__base_http_url + "/v1/evaluations",
419
- data=json.dumps({"name": name}),
418
+ data=json.dumps({
419
+ "groupId": group_id,
420
+ "name": name,
421
+ "points": [datapoint.to_dict() for datapoint in data]
422
+ }),
420
423
  headers=cls._headers(),
421
424
  )
422
425
  if response.status_code != 200:
@@ -427,66 +430,6 @@ class Laminar:
427
430
  raise ValueError(f"Error creating evaluation {response.text}")
428
431
  return CreateEvaluationResponse.model_validate(response.json())
429
432
 
430
- @classmethod
431
- def post_evaluation_results(
432
- cls, evaluation_id: uuid.UUID, data: list[EvaluationResultDatapoint]
433
- ) -> requests.Response:
434
- body = {
435
- "evaluationId": str(evaluation_id),
436
- "points": [datapoint.to_dict() for datapoint in data],
437
- }
438
- response = requests.post(
439
- cls.__base_http_url + "/v1/evaluation-datapoints",
440
- data=json.dumps(body),
441
- headers=cls._headers(),
442
- )
443
- if response.status_code != 200:
444
- try:
445
- resp_json = response.json()
446
- raise ValueError(
447
- f"Failed to send evaluation results. Response: {json.dumps(resp_json)}"
448
- )
449
- except Exception:
450
- raise ValueError(
451
- f"Failed to send evaluation results. Error: {response.text}"
452
- )
453
- return response
454
-
455
- @classmethod
456
- def update_evaluation_status(
457
- cls, evaluation_id: str, status: str
458
- ) -> UpdateEvaluationResponse:
459
- """
460
- Updates the status of an evaluation. Returns the updated evaluation object.
461
-
462
- Args:
463
- evaluation_id (str): The ID of the evaluation to update.
464
- status (str): The status to set for the evaluation.
465
-
466
- Returns:
467
- UpdateEvaluationResponse: The updated evaluation response.
468
-
469
- Raises:
470
- ValueError: If the request fails.
471
- """
472
- body = {
473
- "status": status,
474
- }
475
- url = f"{cls.__base_http_url}/v1/evaluations/{evaluation_id}"
476
-
477
- response = requests.post(
478
- url,
479
- data=json.dumps(body),
480
- headers=cls._headers(),
481
- )
482
- if response.status_code != 200:
483
- raise ValueError(
484
- f"Failed to update evaluation status {evaluation_id}. "
485
- f"Response: {response.text}"
486
- )
487
-
488
- return UpdateEvaluationResponse.model_validate(response.json())
489
-
490
433
  @classmethod
491
434
  def _headers(cls):
492
435
  assert cls.__project_api_key is not None, "Project API key is not set"
lmnr/sdk/types.py CHANGED
@@ -2,7 +2,7 @@ import datetime
2
2
  from enum import Enum
3
3
  import pydantic
4
4
  import requests
5
- from typing import Any, Awaitable, Callable, Literal, Optional, Union
5
+ from typing import Any, Awaitable, Callable, Optional, Union
6
6
  import uuid
7
7
 
8
8
  from .utils import serialize
@@ -107,24 +107,13 @@ EvaluatorFunction = Callable[
107
107
  Union[EvaluatorFunctionReturnType, Awaitable[EvaluatorFunctionReturnType]],
108
108
  ]
109
109
 
110
- EvaluationStatus = Literal["Started", "Finished", "Error"]
111
-
112
110
 
113
111
  class CreateEvaluationResponse(pydantic.BaseModel):
114
112
  id: uuid.UUID
115
113
  createdAt: datetime.datetime
114
+ groupId: str
116
115
  name: str
117
- status: EvaluationStatus
118
116
  projectId: uuid.UUID
119
- metadata: Optional[dict[str, Any]] = None
120
-
121
-
122
- class EvaluationStats(pydantic.BaseModel):
123
- averageScores: dict[str, Numeric]
124
-
125
-
126
- class UpdateEvaluationResponse(pydantic.BaseModel):
127
- stats: EvaluationStats
128
117
 
129
118
 
130
119
  class EvaluationResultDatapoint(pydantic.BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.13
3
+ Version: 0.4.14
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -230,7 +230,8 @@ evaluate(
230
230
  executor=write_poem,
231
231
  evaluators={
232
232
  "containsPoem": contains_poem
233
- }
233
+ },
234
+ group_id="my_first_feature"
234
235
  )
235
236
  ```
236
237
 
@@ -2,10 +2,10 @@ lmnr/__init__.py,sha256=5Ks8UIicCzCBgwSz0MOX3I7jVruPMUO3SmxIwUoODzQ,231
2
2
  lmnr/cli.py,sha256=Ptvm5dsNLKUY5lwnN8XkT5GtCYjzpRNi2WvefknB3OQ,1079
3
3
  lmnr/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  lmnr/sdk/decorators.py,sha256=ii7Bqp6flaanIFSK6M1_ZZV-izp4o3hkR1MmY7wnFQQ,2227
5
- lmnr/sdk/evaluations.py,sha256=4VEfhL8DsrQLX96jHrGmBKHxCnbfM-4-6MFOR-XQozM,13525
6
- lmnr/sdk/laminar.py,sha256=jH0-J7S5k8duwivE2giYuh6mx64PswoEWHUdH4GFqoM,18305
5
+ lmnr/sdk/evaluations.py,sha256=Tukl2pW_x13ittzG5XQpF1TweYo3fpD4eLInplQ4YYI,14152
6
+ lmnr/sdk/laminar.py,sha256=d5Vn0eSVIoI_SxFcTh71T-_w7_E_odvFDFiLU2YwzZo,16509
7
7
  lmnr/sdk/log.py,sha256=EgAMY77Zn1bv1imCqrmflD3imoAJ2yveOkIcrIP3e98,1170
8
- lmnr/sdk/types.py,sha256=KUCVIdkyr9pN2KKp-H1O-FU8x5_yKeC3cUP3Je3hY6g,5117
8
+ lmnr/sdk/types.py,sha256=HvaZEqVRduCZbkF7Cp8rgS5oBbc1qPvOD3PP9tFrRu4,4826
9
9
  lmnr/sdk/utils.py,sha256=s81p6uJehgJSaLWy3sR5fTpEDH7vzn3i_UujUHChl6M,3346
10
10
  lmnr/traceloop_sdk/.flake8,sha256=bCxuDlGx3YQ55QHKPiGJkncHanh9qGjQJUujcFa3lAU,150
11
11
  lmnr/traceloop_sdk/.python-version,sha256=9OLQBQVbD4zE4cJsPePhnAfV_snrPSoqEQw-PXgPMOs,6
@@ -44,8 +44,8 @@ lmnr/traceloop_sdk/utils/in_memory_span_exporter.py,sha256=H_4TRaThMO1H6vUQ0OpQv
44
44
  lmnr/traceloop_sdk/utils/json_encoder.py,sha256=dK6b_axr70IYL7Vv-bu4wntvDDuyntoqsHaddqX7P58,463
45
45
  lmnr/traceloop_sdk/utils/package_check.py,sha256=TZSngzJOpFhfUZLXIs38cpMxQiZSmp0D-sCrIyhz7BA,251
46
46
  lmnr/traceloop_sdk/version.py,sha256=OlatFEFA4ttqSSIiV8jdE-sq3KG5zu2hnC4B4mzWF3s,23
47
- lmnr-0.4.13.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
48
- lmnr-0.4.13.dist-info/METADATA,sha256=TgaQ5yPkKErpY9WrLywc84BJyAxsffR1Rf0_N_qeOvA,11233
49
- lmnr-0.4.13.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
50
- lmnr-0.4.13.dist-info/entry_points.txt,sha256=K1jE20ww4jzHNZLnsfWBvU3YKDGBgbOiYG5Y7ivQcq4,37
51
- lmnr-0.4.13.dist-info/RECORD,,
47
+ lmnr-0.4.14.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
48
+ lmnr-0.4.14.dist-info/METADATA,sha256=WSsRng3syFI0DugYr2-V6nRZIA1F_xo3_ikuHE6PDq8,11266
49
+ lmnr-0.4.14.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
50
+ lmnr-0.4.14.dist-info/entry_points.txt,sha256=K1jE20ww4jzHNZLnsfWBvU3YKDGBgbOiYG5Y7ivQcq4,37
51
+ lmnr-0.4.14.dist-info/RECORD,,
File without changes
File without changes