lmnr 0.4.29b2__py3-none-any.whl → 0.4.29b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lmnr/sdk/evaluations.py +14 -24
- lmnr/sdk/types.py +6 -4
- {lmnr-0.4.29b2.dist-info → lmnr-0.4.29b3.dist-info}/METADATA +1 -1
- {lmnr-0.4.29b2.dist-info → lmnr-0.4.29b3.dist-info}/RECORD +7 -7
- {lmnr-0.4.29b2.dist-info → lmnr-0.4.29b3.dist-info}/LICENSE +0 -0
- {lmnr-0.4.29b2.dist-info → lmnr-0.4.29b3.dist-info}/WHEEL +0 -0
- {lmnr-0.4.29b2.dist-info → lmnr-0.4.29b3.dist-info}/entry_points.txt +0 -0
lmnr/sdk/evaluations.py
CHANGED
@@ -100,7 +100,7 @@ class Evaluation:
|
|
100
100
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
101
101
|
executor: Any,
|
102
102
|
evaluators: dict[str, EvaluatorFunction],
|
103
|
-
human_evaluators:
|
103
|
+
human_evaluators: list[HumanEvaluator] = [],
|
104
104
|
name: Optional[str] = None,
|
105
105
|
group_id: Optional[str] = None,
|
106
106
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
@@ -128,11 +128,10 @@ class Evaluation:
|
|
128
128
|
If the score is a single number, it will be named after the\
|
129
129
|
evaluator function. Evaluator function names must contain only\
|
130
130
|
letters, digits, hyphens, underscores, or spaces.
|
131
|
-
human_evaluators (
|
132
|
-
[Beta]
|
133
|
-
|
134
|
-
|
135
|
-
Defaults to an empty dictionary.
|
131
|
+
human_evaluators (list[HumanEvaluator], optional):\
|
132
|
+
[Beta] List of instances of HumanEvaluator. For now, human\
|
133
|
+
evaluator only holds the queue name.
|
134
|
+
Defaults to an empty list.
|
136
135
|
name (Optional[str], optional): Optional name of the evaluation.\
|
137
136
|
Used to identify the evaluation in the group.\
|
138
137
|
If not provided, a random name will be generated.
|
@@ -174,19 +173,6 @@ class Evaluation:
|
|
174
173
|
"Keys must only contain letters, digits, hyphens,"
|
175
174
|
"underscores, or spaces."
|
176
175
|
)
|
177
|
-
for evaluator_name in human_evaluators or {}:
|
178
|
-
if not evaluator_name_regex.match(evaluator_name):
|
179
|
-
raise ValueError(
|
180
|
-
f'Invalid human evaluator key: "{evaluator_name}". '
|
181
|
-
"Keys must only contain letters, digits, hyphens,"
|
182
|
-
"underscores, or spaces."
|
183
|
-
)
|
184
|
-
|
185
|
-
if intersection := set(evaluators.keys()) & set(human_evaluators.keys()):
|
186
|
-
raise ValueError(
|
187
|
-
"Evaluator and human evaluator names must not overlap. "
|
188
|
-
f"Repeated keys: {intersection}"
|
189
|
-
)
|
190
176
|
|
191
177
|
self.is_finished = False
|
192
178
|
self.reporter = EvaluationReporter()
|
@@ -281,6 +267,9 @@ class Evaluation:
|
|
281
267
|
else self.executor(datapoint.data)
|
282
268
|
)
|
283
269
|
L.set_span_output(output)
|
270
|
+
executor_span_id = uuid.UUID(
|
271
|
+
int=executor_span.get_span_context().span_id
|
272
|
+
)
|
284
273
|
target = datapoint.target
|
285
274
|
|
286
275
|
# Iterate over evaluators
|
@@ -310,6 +299,7 @@ class Evaluation:
|
|
310
299
|
executor_output=output,
|
311
300
|
scores=scores,
|
312
301
|
trace_id=trace_id,
|
302
|
+
executor_span_id=executor_span_id,
|
313
303
|
)
|
314
304
|
|
315
305
|
|
@@ -317,7 +307,7 @@ def evaluate(
|
|
317
307
|
data: Union[EvaluationDataset, list[Union[Datapoint, dict]]],
|
318
308
|
executor: ExecutorFunction,
|
319
309
|
evaluators: dict[str, EvaluatorFunction],
|
320
|
-
human_evaluators:
|
310
|
+
human_evaluators: list[HumanEvaluator] = [],
|
321
311
|
name: Optional[str] = None,
|
322
312
|
group_id: Optional[str] = None,
|
323
313
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
@@ -352,10 +342,10 @@ def evaluate(
|
|
352
342
|
If the score is a single number, it will be named after the\
|
353
343
|
evaluator function. Evaluator function names must contain only\
|
354
344
|
letters, digits, hyphens, underscores, or spaces.
|
355
|
-
human_evaluators (
|
356
|
-
[Beta]
|
357
|
-
|
358
|
-
Defaults to an empty
|
345
|
+
human_evaluators (list[HumanEvaluator], optional):\
|
346
|
+
[Beta] List of instances of HumanEvaluator. For now, human\
|
347
|
+
evaluator only holds the queue name.
|
348
|
+
Defaults to an empty list.
|
359
349
|
name (Optional[str], optional): Optional name of the evaluation.\
|
360
350
|
Used to identify the evaluation in the group.\
|
361
351
|
If not provided, a random name will be generated.
|
lmnr/sdk/types.py
CHANGED
@@ -131,6 +131,7 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
|
|
131
131
|
scores: dict[str, Numeric]
|
132
132
|
human_evaluators: dict[str, HumanEvaluator] = pydantic.Field(default_factory=dict)
|
133
133
|
trace_id: uuid.UUID
|
134
|
+
executor_span_id: uuid.UUID
|
134
135
|
|
135
136
|
# uuid is not serializable by default, so we need to convert it to a string
|
136
137
|
def to_dict(self):
|
@@ -141,14 +142,15 @@ class EvaluationResultDatapoint(pydantic.BaseModel):
|
|
141
142
|
"executorOutput": serialize(self.executor_output),
|
142
143
|
"scores": self.scores,
|
143
144
|
"traceId": str(self.trace_id),
|
144
|
-
"humanEvaluators":
|
145
|
-
|
145
|
+
"humanEvaluators": [
|
146
|
+
(
|
146
147
|
v.model_dump()
|
147
148
|
if isinstance(v, pydantic.BaseModel)
|
148
149
|
else serialize(v)
|
149
150
|
)
|
150
|
-
for
|
151
|
-
|
151
|
+
for v in self.human_evaluators
|
152
|
+
],
|
153
|
+
"executorSpanId": str(self.executor_span_id),
|
152
154
|
}
|
153
155
|
except Exception as e:
|
154
156
|
raise ValueError(f"Error serializing EvaluationResultDatapoint: {e}")
|
@@ -3,10 +3,10 @@ lmnr/cli.py,sha256=Ptvm5dsNLKUY5lwnN8XkT5GtCYjzpRNi2WvefknB3OQ,1079
|
|
3
3
|
lmnr/sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
lmnr/sdk/datasets.py,sha256=w8U9E6fvetAo65Cb2CbYzlfhY8CfXAR-VysrakG6-4I,1591
|
5
5
|
lmnr/sdk/decorators.py,sha256=ZSDaEZyjo-RUzRCltsNbe6x0t9SKl2xRQ2q4uaKvXtk,2250
|
6
|
-
lmnr/sdk/evaluations.py,sha256=
|
6
|
+
lmnr/sdk/evaluations.py,sha256=5Vfyp0aIjuGpqfuM3cqsaaLpcoO7z6lcOOKxnyHCNHk,16264
|
7
7
|
lmnr/sdk/laminar.py,sha256=H87fXSWb9shcPW4AeoYwvTXJ-jSTjzm2sI1A1U1Vkg8,18780
|
8
8
|
lmnr/sdk/log.py,sha256=cZBeUoSK39LMEV-X4-eEhTWOciULRfHaKfRK8YqIM8I,1532
|
9
|
-
lmnr/sdk/types.py,sha256=
|
9
|
+
lmnr/sdk/types.py,sha256=CHbKYnEkiwsEU3Fcnoz4tDawrjII2RLYhP6hzc3-t_M,5593
|
10
10
|
lmnr/sdk/utils.py,sha256=Uk8y15x-sd5tP2ERONahElLDJVEy_3dA_1_5g9A6auY,3358
|
11
11
|
lmnr/traceloop_sdk/.flake8,sha256=bCxuDlGx3YQ55QHKPiGJkncHanh9qGjQJUujcFa3lAU,150
|
12
12
|
lmnr/traceloop_sdk/.python-version,sha256=9OLQBQVbD4zE4cJsPePhnAfV_snrPSoqEQw-PXgPMOs,6
|
@@ -45,8 +45,8 @@ lmnr/traceloop_sdk/utils/in_memory_span_exporter.py,sha256=H_4TRaThMO1H6vUQ0OpQv
|
|
45
45
|
lmnr/traceloop_sdk/utils/json_encoder.py,sha256=dK6b_axr70IYL7Vv-bu4wntvDDuyntoqsHaddqX7P58,463
|
46
46
|
lmnr/traceloop_sdk/utils/package_check.py,sha256=TZSngzJOpFhfUZLXIs38cpMxQiZSmp0D-sCrIyhz7BA,251
|
47
47
|
lmnr/traceloop_sdk/version.py,sha256=OlatFEFA4ttqSSIiV8jdE-sq3KG5zu2hnC4B4mzWF3s,23
|
48
|
-
lmnr-0.4.
|
49
|
-
lmnr-0.4.
|
50
|
-
lmnr-0.4.
|
51
|
-
lmnr-0.4.
|
52
|
-
lmnr-0.4.
|
48
|
+
lmnr-0.4.29b3.dist-info/LICENSE,sha256=67b_wJHVV1CBaWkrKFWU1wyqTPSdzH77Ls-59631COg,10411
|
49
|
+
lmnr-0.4.29b3.dist-info/METADATA,sha256=UQ97DYAQ772h0XegYk-od_sdoOTnD_hTM-M-wX5TWLQ,10690
|
50
|
+
lmnr-0.4.29b3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
51
|
+
lmnr-0.4.29b3.dist-info/entry_points.txt,sha256=K1jE20ww4jzHNZLnsfWBvU3YKDGBgbOiYG5Y7ivQcq4,37
|
52
|
+
lmnr-0.4.29b3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|