lmnr 0.4.28__tar.gz → 0.4.29b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {lmnr-0.4.28 → lmnr-0.4.29b0}/PKG-INFO +1 -1
  2. {lmnr-0.4.28 → lmnr-0.4.29b0}/pyproject.toml +2 -2
  3. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/__init__.py +7 -1
  4. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/sdk/evaluations.py +43 -12
  5. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/sdk/types.py +12 -0
  6. {lmnr-0.4.28 → lmnr-0.4.29b0}/LICENSE +0 -0
  7. {lmnr-0.4.28 → lmnr-0.4.29b0}/README.md +0 -0
  8. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/cli.py +0 -0
  9. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/sdk/__init__.py +0 -0
  10. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/sdk/datasets.py +0 -0
  11. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/sdk/decorators.py +0 -0
  12. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/sdk/laminar.py +0 -0
  13. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/sdk/log.py +0 -0
  14. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/sdk/utils.py +0 -0
  15. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/.flake8 +0 -0
  16. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/.python-version +0 -0
  17. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/__init__.py +0 -0
  18. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/config/__init__.py +0 -0
  19. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/decorators/__init__.py +0 -0
  20. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/decorators/base.py +0 -0
  21. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/instruments.py +0 -0
  22. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/__init__.py +0 -0
  23. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_and_external_association_properties.yaml +0 -0
  24. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_association_properties/test_langchain_association_properties.yaml +0 -0
  25. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_manual_report.yaml +0 -0
  26. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_manual/test_resource_attributes.yaml +0 -0
  27. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_privacy_no_prompts/test_simple_workflow.yaml +0 -0
  28. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_prompt_management/test_prompt_management.yaml +0 -0
  29. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_sdk_initialization/test_resource_attributes.yaml +0 -0
  30. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_tasks/test_task_io_serialization_with_langchain.yaml +0 -0
  31. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_aworkflow.yaml +0 -0
  32. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_simple_workflow.yaml +0 -0
  33. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/cassettes/test_workflows/test_streaming_workflow.yaml +0 -0
  34. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/conftest.py +0 -0
  35. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_association_properties.py +0 -0
  36. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_manual.py +0 -0
  37. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_nested_tasks.py +0 -0
  38. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_privacy_no_prompts.py +0 -0
  39. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_sdk_initialization.py +0 -0
  40. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_tasks.py +0 -0
  41. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tests/test_workflows.py +0 -0
  42. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/__init__.py +0 -0
  43. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/attributes.py +0 -0
  44. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/content_allow_list.py +0 -0
  45. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/context_manager.py +0 -0
  46. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/tracing/tracing.py +0 -0
  47. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/utils/__init__.py +0 -0
  48. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/utils/in_memory_span_exporter.py +0 -0
  49. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/utils/json_encoder.py +0 -0
  50. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/utils/package_check.py +0 -0
  51. {lmnr-0.4.28 → lmnr-0.4.29b0}/src/lmnr/traceloop_sdk/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lmnr
3
- Version: 0.4.28
3
+ Version: 0.4.29b0
4
4
  Summary: Python SDK for Laminar AI
5
5
  License: Apache-2.0
6
6
  Author: lmnr.ai
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lmnr"
3
- version = "0.4.28"
3
+ version = "0.4.29b0"
4
4
  description = "Python SDK for Laminar AI"
5
5
  authors = [
6
6
  { name = "lmnr.ai", email = "founders@lmnr.ai" }
@@ -11,7 +11,7 @@ license = "Apache-2.0"
11
11
 
12
12
  [tool.poetry]
13
13
  name = "lmnr"
14
- version = "0.4.28"
14
+ version = "0.4.29b0"
15
15
  description = "Python SDK for Laminar AI"
16
16
  authors = ["lmnr.ai"]
17
17
  readme = "README.md"
@@ -1,7 +1,13 @@
1
1
  from .sdk.datasets import EvaluationDataset, LaminarDataset
2
2
  from .sdk.evaluations import evaluate
3
3
  from .sdk.laminar import Laminar
4
- from .sdk.types import ChatMessage, PipelineRunError, PipelineRunResponse, NodeInput
4
+ from .sdk.types import (
5
+ ChatMessage,
6
+ HumanEvaluator,
7
+ NodeInput,
8
+ PipelineRunError,
9
+ PipelineRunResponse,
10
+ )
5
11
  from .sdk.decorators import observe
6
12
  from .traceloop_sdk import Instruments
7
13
  from .traceloop_sdk.tracing.attributes import Attributes
@@ -18,6 +18,7 @@ from .types import (
18
18
  EvaluationResultDatapoint,
19
19
  EvaluatorFunction,
20
20
  ExecutorFunction,
21
+ HumanEvaluator,
21
22
  Numeric,
22
23
  NumericTypes,
23
24
  SpanType,
@@ -99,6 +100,7 @@ class Evaluation:
99
100
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
100
101
  executor: Any,
101
102
  evaluators: dict[str, EvaluatorFunction],
103
+ human_evaluators: dict[str, HumanEvaluator] = {},
102
104
  name: Optional[str] = None,
103
105
  group_id: Optional[str] = None,
104
106
  batch_size: int = DEFAULT_BATCH_SIZE,
@@ -126,6 +128,11 @@ class Evaluation:
126
128
  If the score is a single number, it will be named after the\
127
129
  evaluator function. Evaluator function names must contain only\
128
130
  letters, digits, hyphens, underscores, or spaces.
131
+ human_evaluators (dict[str, HumanEvaluator], optional):\
132
+ [Beta] Dictionary from human evaluator names to instances of\
133
+ HumanEvaluator. For now, human evaluator only holds the queue\
134
+ name.
135
+ Defaults to an empty dictionary.
129
136
  name (Optional[str], optional): Optional name of the evaluation.\
130
137
  Used to identify the evaluation in the group.\
131
138
  If not provided, a random name will be generated.
@@ -159,14 +166,27 @@ class Evaluation:
159
166
  if not evaluators:
160
167
  raise ValueError("No evaluators provided")
161
168
 
162
- # TODO: Compile regex once and then reuse it
169
+ evaluator_name_regex = re.compile(r"^[\w\s-]+$")
163
170
  for evaluator_name in evaluators:
164
- if not re.match(r"^[\w\s-]+$", evaluator_name):
171
+ if not evaluator_name_regex.match(evaluator_name):
165
172
  raise ValueError(
166
173
  f'Invalid evaluator key: "{evaluator_name}". '
167
174
  "Keys must only contain letters, digits, hyphens,"
168
175
  "underscores, or spaces."
169
176
  )
177
+ for evaluator_name in human_evaluators or {}:
178
+ if not evaluator_name_regex.match(evaluator_name):
179
+ raise ValueError(
180
+ f'Invalid human evaluator key: "{evaluator_name}". '
181
+ "Keys must only contain letters, digits, hyphens,"
182
+ "underscores, or spaces."
183
+ )
184
+
185
+ if intersection := set(evaluators.keys()) & set(human_evaluators.keys()):
186
+ raise ValueError(
187
+ "Evaluator and human evaluator names must not overlap. "
188
+ f"Repeated keys: {intersection}"
189
+ )
170
190
 
171
191
  self.is_finished = False
172
192
  self.reporter = EvaluationReporter()
@@ -183,6 +203,7 @@ class Evaluation:
183
203
  self.name = name
184
204
  self.batch_size = batch_size
185
205
  self._logger = get_default_logger(self.__class__.__name__)
206
+ self.human_evaluators = human_evaluators
186
207
  L.initialize(
187
208
  project_api_key=project_api_key,
188
209
  base_url=base_url,
@@ -202,9 +223,7 @@ class Evaluation:
202
223
  return loop.run_until_complete(self._run())
203
224
 
204
225
  async def _run(self) -> None:
205
- self.reporter.start(
206
- len(self.data),
207
- )
226
+ self.reporter.start(len(self.data))
208
227
 
209
228
  try:
210
229
  result_datapoints = await self._evaluate_in_batches()
@@ -212,13 +231,19 @@ class Evaluation:
212
231
  self.reporter.stopWithError(e)
213
232
  self.is_finished = True
214
233
  return
215
- else:
216
- evaluation = L.create_evaluation(
217
- data=result_datapoints, group_id=self.group_id, name=self.name
218
- )
219
- average_scores = get_average_scores(result_datapoints)
220
- self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
221
- self.is_finished = True
234
+
235
+ # For now add all human evaluators to all result datapoints
236
+ # In the future, we will add ways to specify which human evaluators
237
+ # to add to which result datapoints, e.g. sample some randomly
238
+ for result_datapoint in result_datapoints:
239
+ result_datapoint.human_evaluators = self.human_evaluators or {}
240
+
241
+ evaluation = L.create_evaluation(
242
+ data=result_datapoints, group_id=self.group_id, name=self.name
243
+ )
244
+ average_scores = get_average_scores(result_datapoints)
245
+ self.reporter.stop(average_scores, evaluation.projectId, evaluation.id)
246
+ self.is_finished = True
222
247
 
223
248
  async def _evaluate_in_batches(self) -> list[EvaluationResultDatapoint]:
224
249
  result_datapoints = []
@@ -292,6 +317,7 @@ def evaluate(
292
317
  data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
293
318
  executor: ExecutorFunction,
294
319
  evaluators: dict[str, EvaluatorFunction],
320
+ human_evaluators: dict[str, HumanEvaluator] = {},
295
321
  name: Optional[str] = None,
296
322
  group_id: Optional[str] = None,
297
323
  batch_size: int = DEFAULT_BATCH_SIZE,
@@ -326,6 +352,10 @@ def evaluate(
326
352
  If the score is a single number, it will be named after the\
327
353
  evaluator function. Evaluator function names must contain only\
328
354
  letters, digits, hyphens, underscores, or spaces.
355
+ human_evaluators (dict[str, HumanEvaluator], optional):\
356
+ [Beta] Dictionary from human evaluator names to instances of\
357
+ HumanEvaluator. For now, human evaluator only holds the queue name.
358
+ Defaults to an empty dictionary.
329
359
  name (Optional[str], optional): Optional name of the evaluation.\
330
360
  Used to identify the evaluation in the group.\
331
361
  If not provided, a random name will be generated.
@@ -359,6 +389,7 @@ def evaluate(
359
389
  executor=executor,
360
390
  evaluators=evaluators,
361
391
  group_id=group_id,
392
+ human_evaluators=human_evaluators,
362
393
  name=name,
363
394
  batch_size=batch_size,
364
395
  project_api_key=project_api_key,
@@ -110,6 +110,13 @@ EvaluatorFunction = Callable[
110
110
  ]
111
111
 
112
112
 
113
+ class HumanEvaluator(pydantic.BaseModel):
114
+ queueName: str
115
+
116
+ def __init__(self, queue_name: str):
117
+ super().__init__(queueName=queue_name)
118
+
119
+
113
120
  class CreateEvaluationResponse(pydantic.BaseModel):
114
121
  id: uuid.UUID
115
122
  createdAt: datetime.datetime
@@ -123,6 +130,7 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
123
130
  target: EvaluationDatapointTarget
124
131
  executor_output: ExecutorFunctionReturnType
125
132
  scores: dict[str, Numeric]
133
+ human_evaluators: dict[str, HumanEvaluator] = pydantic.Field(default_factory=dict)
126
134
  trace_id: uuid.UUID
127
135
 
128
136
  # uuid is not serializable by default, so we need to convert it to a string
@@ -139,6 +147,10 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
139
147
  "executorOutput": serialize(self.executor_output),
140
148
  "scores": self.scores,
141
149
  "traceId": str(self.trace_id),
150
+ "humanEvaluators": {
151
+ k: v.model_dump() if isinstance(v, pydantic.BaseModel) else serialize(v)
152
+ for k, v in self.human_evaluators.items()
153
+ },
142
154
  }
143
155
 
144
156
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes