scale-gp-beta 0.1.0a11__py3-none-any.whl → 0.1.0a13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scale_gp_beta/__init__.py +5 -0
- scale_gp_beta/_utils/_proxy.py +4 -1
- scale_gp_beta/_utils/_resources_proxy.py +24 -0
- scale_gp_beta/_version.py +1 -1
- scale_gp_beta/resources/chat/completions.py +136 -2
- scale_gp_beta/resources/dataset_items.py +11 -11
- scale_gp_beta/resources/datasets.py +13 -5
- scale_gp_beta/resources/evaluation_items.py +6 -8
- scale_gp_beta/resources/evaluations.py +138 -21
- scale_gp_beta/resources/files/files.py +5 -5
- scale_gp_beta/resources/models.py +31 -35
- scale_gp_beta/resources/spans.py +42 -22
- scale_gp_beta/types/__init__.py +8 -1
- scale_gp_beta/types/chat/__init__.py +2 -0
- scale_gp_beta/types/chat/completion_models_params.py +29 -0
- scale_gp_beta/types/chat/model_definition.py +32 -0
- scale_gp_beta/types/component.py +18 -0
- scale_gp_beta/types/component_param.py +19 -0
- scale_gp_beta/types/container.py +35 -0
- scale_gp_beta/types/container_param.py +28 -0
- scale_gp_beta/types/dataset_item_list_params.py +4 -5
- scale_gp_beta/types/dataset_item_retrieve_params.py +1 -2
- scale_gp_beta/types/dataset_list_params.py +7 -3
- scale_gp_beta/types/evaluation.py +12 -2
- scale_gp_beta/types/evaluation_create_params.py +5 -5
- scale_gp_beta/types/{evaluation_archive_response.py → evaluation_delete_response.py} +2 -2
- scale_gp_beta/types/evaluation_item_list_params.py +3 -4
- scale_gp_beta/types/evaluation_list_params.py +7 -3
- scale_gp_beta/types/evaluation_task.py +232 -33
- scale_gp_beta/types/evaluation_task_param.py +176 -33
- scale_gp_beta/types/evaluation_update_params.py +17 -0
- scale_gp_beta/types/file_list_params.py +2 -3
- scale_gp_beta/types/inference_model.py +0 -4
- scale_gp_beta/types/item_locator.py +7 -0
- scale_gp_beta/types/item_locator_template.py +7 -0
- scale_gp_beta/types/model_list_params.py +15 -18
- scale_gp_beta/types/span.py +40 -1
- scale_gp_beta/types/span_create_params.py +13 -5
- scale_gp_beta/types/span_list_params.py +4 -5
- scale_gp_beta/types/span_update_params.py +5 -3
- {scale_gp_beta-0.1.0a11.dist-info → scale_gp_beta-0.1.0a13.dist-info}/METADATA +1 -1
- {scale_gp_beta-0.1.0a11.dist-info → scale_gp_beta-0.1.0a13.dist-info}/RECORD +44 -34
- {scale_gp_beta-0.1.0a11.dist-info → scale_gp_beta-0.1.0a13.dist-info}/WHEEL +0 -0
- {scale_gp_beta-0.1.0a11.dist-info → scale_gp_beta-0.1.0a13.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
from typing import TYPE_CHECKING, Dict, List, Union, Optional
|
|
4
6
|
from typing_extensions import Literal, Annotated, TypeAlias
|
|
5
7
|
|
|
6
8
|
from .._utils import PropertyInfo
|
|
9
|
+
from .._compat import PYDANTIC_V2
|
|
7
10
|
from .._models import BaseModel
|
|
11
|
+
from .item_locator import ItemLocator
|
|
8
12
|
|
|
9
13
|
__all__ = [
|
|
10
14
|
"EvaluationTask",
|
|
@@ -21,63 +25,76 @@ __all__ = [
|
|
|
21
25
|
"ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides",
|
|
22
26
|
"ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState",
|
|
23
27
|
"ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace",
|
|
28
|
+
"MetricEvaluationTask",
|
|
29
|
+
"MetricEvaluationTaskConfiguration",
|
|
30
|
+
"MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator",
|
|
31
|
+
"MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator",
|
|
32
|
+
"MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator",
|
|
33
|
+
"MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator",
|
|
34
|
+
"MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator",
|
|
35
|
+
"MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator",
|
|
36
|
+
"MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator",
|
|
37
|
+
"AutoEvaluationQuestionTask",
|
|
38
|
+
"AutoEvaluationQuestionTaskConfiguration",
|
|
39
|
+
"ContributorEvaluationQuestionTask",
|
|
40
|
+
"ContributorEvaluationQuestionTaskConfiguration",
|
|
24
41
|
]
|
|
25
42
|
|
|
26
43
|
|
|
27
44
|
class ChatCompletionEvaluationTaskConfiguration(BaseModel):
|
|
28
|
-
messages: Union[List[Dict[str, object]],
|
|
45
|
+
messages: Union[List[Dict[str, object]], ItemLocator]
|
|
29
46
|
|
|
30
47
|
model: str
|
|
31
48
|
|
|
32
|
-
audio: Union[Dict[str, object],
|
|
49
|
+
audio: Union[Dict[str, object], ItemLocator, None] = None
|
|
33
50
|
|
|
34
|
-
frequency_penalty: Union[float,
|
|
51
|
+
frequency_penalty: Union[float, ItemLocator, None] = None
|
|
35
52
|
|
|
36
|
-
function_call: Union[Dict[str, object],
|
|
53
|
+
function_call: Union[Dict[str, object], ItemLocator, None] = None
|
|
37
54
|
|
|
38
|
-
functions: Union[List[Dict[str, object]],
|
|
55
|
+
functions: Union[List[Dict[str, object]], ItemLocator, None] = None
|
|
39
56
|
|
|
40
|
-
logit_bias: Union[Dict[str, int],
|
|
57
|
+
logit_bias: Union[Dict[str, int], ItemLocator, None] = None
|
|
41
58
|
|
|
42
|
-
logprobs: Union[bool,
|
|
59
|
+
logprobs: Union[bool, ItemLocator, None] = None
|
|
43
60
|
|
|
44
|
-
max_completion_tokens: Union[int,
|
|
61
|
+
max_completion_tokens: Union[int, ItemLocator, None] = None
|
|
45
62
|
|
|
46
|
-
max_tokens: Union[int,
|
|
63
|
+
max_tokens: Union[int, ItemLocator, None] = None
|
|
47
64
|
|
|
48
|
-
metadata: Union[Dict[str, str],
|
|
65
|
+
metadata: Union[Dict[str, str], ItemLocator, None] = None
|
|
49
66
|
|
|
50
|
-
modalities: Union[List[str],
|
|
67
|
+
modalities: Union[List[str], ItemLocator, None] = None
|
|
51
68
|
|
|
52
|
-
n: Union[int,
|
|
69
|
+
n: Union[int, ItemLocator, None] = None
|
|
53
70
|
|
|
54
|
-
parallel_tool_calls: Union[bool,
|
|
71
|
+
parallel_tool_calls: Union[bool, ItemLocator, None] = None
|
|
55
72
|
|
|
56
|
-
prediction: Union[Dict[str, object],
|
|
73
|
+
prediction: Union[Dict[str, object], ItemLocator, None] = None
|
|
57
74
|
|
|
58
|
-
presence_penalty: Union[float,
|
|
75
|
+
presence_penalty: Union[float, ItemLocator, None] = None
|
|
59
76
|
|
|
60
77
|
reasoning_effort: Optional[str] = None
|
|
61
78
|
|
|
62
|
-
response_format: Union[Dict[str, object],
|
|
79
|
+
response_format: Union[Dict[str, object], ItemLocator, None] = None
|
|
63
80
|
|
|
64
|
-
seed: Union[int,
|
|
81
|
+
seed: Union[int, ItemLocator, None] = None
|
|
65
82
|
|
|
66
83
|
stop: Optional[str] = None
|
|
67
84
|
|
|
68
|
-
store: Union[bool,
|
|
85
|
+
store: Union[bool, ItemLocator, None] = None
|
|
69
86
|
|
|
70
|
-
temperature: Union[float,
|
|
87
|
+
temperature: Union[float, ItemLocator, None] = None
|
|
71
88
|
|
|
72
89
|
tool_choice: Optional[str] = None
|
|
73
90
|
|
|
74
|
-
tools: Union[List[Dict[str, object]],
|
|
91
|
+
tools: Union[List[Dict[str, object]], ItemLocator, None] = None
|
|
75
92
|
|
|
76
|
-
top_k: Union[int,
|
|
93
|
+
top_k: Union[int, ItemLocator, None] = None
|
|
77
94
|
|
|
78
|
-
top_logprobs: Union[int,
|
|
95
|
+
top_logprobs: Union[int, ItemLocator, None] = None
|
|
79
96
|
|
|
80
|
-
top_p: Union[float,
|
|
97
|
+
top_p: Union[float, ItemLocator, None] = None
|
|
81
98
|
|
|
82
99
|
if TYPE_CHECKING:
|
|
83
100
|
# Stub to indicate that arbitrary properties are accepted.
|
|
@@ -90,7 +107,7 @@ class ChatCompletionEvaluationTask(BaseModel):
|
|
|
90
107
|
configuration: ChatCompletionEvaluationTaskConfiguration
|
|
91
108
|
|
|
92
109
|
alias: Optional[str] = None
|
|
93
|
-
"""Alias to title the results column. Defaults to the `
|
|
110
|
+
"""Alias to title the results column. Defaults to the `chat_completion`"""
|
|
94
111
|
|
|
95
112
|
task_type: Optional[Literal["chat_completion"]] = None
|
|
96
113
|
|
|
@@ -102,14 +119,14 @@ class GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInf
|
|
|
102
119
|
|
|
103
120
|
|
|
104
121
|
GenericInferenceEvaluationTaskConfigurationInferenceConfiguration: TypeAlias = Union[
|
|
105
|
-
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration,
|
|
122
|
+
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration, ItemLocator
|
|
106
123
|
]
|
|
107
124
|
|
|
108
125
|
|
|
109
126
|
class GenericInferenceEvaluationTaskConfiguration(BaseModel):
|
|
110
127
|
model: str
|
|
111
128
|
|
|
112
|
-
args: Union[Dict[str, object],
|
|
129
|
+
args: Union[Dict[str, object], ItemLocator, None] = None
|
|
113
130
|
|
|
114
131
|
inference_configuration: Optional[GenericInferenceEvaluationTaskConfigurationInferenceConfiguration] = None
|
|
115
132
|
|
|
@@ -118,7 +135,7 @@ class GenericInferenceEvaluationTask(BaseModel):
|
|
|
118
135
|
configuration: GenericInferenceEvaluationTaskConfiguration
|
|
119
136
|
|
|
120
137
|
alias: Optional[str] = None
|
|
121
|
-
"""Alias to title the results column. Defaults to the `
|
|
138
|
+
"""Alias to title the results column. Defaults to the `inference`"""
|
|
122
139
|
|
|
123
140
|
task_type: Optional[Literal["inference"]] = None
|
|
124
141
|
|
|
@@ -169,22 +186,24 @@ class ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplication
|
|
|
169
186
|
List[ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace]
|
|
170
187
|
] = None
|
|
171
188
|
|
|
189
|
+
return_span: Optional[bool] = None
|
|
190
|
+
|
|
172
191
|
use_channels: Optional[bool] = None
|
|
173
192
|
|
|
174
193
|
|
|
175
194
|
ApplicationVariantV1EvaluationTaskConfigurationOverrides: TypeAlias = Union[
|
|
176
|
-
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides,
|
|
195
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides, ItemLocator
|
|
177
196
|
]
|
|
178
197
|
|
|
179
198
|
|
|
180
199
|
class ApplicationVariantV1EvaluationTaskConfiguration(BaseModel):
|
|
181
200
|
application_variant_id: str
|
|
182
201
|
|
|
183
|
-
inputs: Union[Dict[str, object],
|
|
202
|
+
inputs: Union[Dict[str, object], ItemLocator]
|
|
184
203
|
|
|
185
|
-
history: Union[List[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0],
|
|
204
|
+
history: Union[List[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0], ItemLocator, None] = None
|
|
186
205
|
|
|
187
|
-
operation_metadata: Union[Dict[str, object],
|
|
206
|
+
operation_metadata: Union[Dict[str, object], ItemLocator, None] = None
|
|
188
207
|
|
|
189
208
|
overrides: Optional[ApplicationVariantV1EvaluationTaskConfigurationOverrides] = None
|
|
190
209
|
"""Execution override options for agentic applications"""
|
|
@@ -194,12 +213,192 @@ class ApplicationVariantV1EvaluationTask(BaseModel):
|
|
|
194
213
|
configuration: ApplicationVariantV1EvaluationTaskConfiguration
|
|
195
214
|
|
|
196
215
|
alias: Optional[str] = None
|
|
197
|
-
"""Alias to title the results column. Defaults to the `
|
|
216
|
+
"""Alias to title the results column. Defaults to the `application_variant`"""
|
|
198
217
|
|
|
199
218
|
task_type: Optional[Literal["application_variant"]] = None
|
|
200
219
|
|
|
201
220
|
|
|
221
|
+
class MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator(BaseModel):
|
|
222
|
+
candidate: str
|
|
223
|
+
|
|
224
|
+
reference: str
|
|
225
|
+
|
|
226
|
+
type: Literal["bleu"]
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator(BaseModel):
|
|
230
|
+
candidate: str
|
|
231
|
+
|
|
232
|
+
reference: str
|
|
233
|
+
|
|
234
|
+
type: Literal["meteor"]
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
class MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator(BaseModel):
|
|
238
|
+
candidate: str
|
|
239
|
+
|
|
240
|
+
reference: str
|
|
241
|
+
|
|
242
|
+
type: Literal["cosine_similarity"]
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator(BaseModel):
|
|
246
|
+
candidate: str
|
|
247
|
+
|
|
248
|
+
reference: str
|
|
249
|
+
|
|
250
|
+
type: Literal["f1"]
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator(BaseModel):
|
|
254
|
+
candidate: str
|
|
255
|
+
|
|
256
|
+
reference: str
|
|
257
|
+
|
|
258
|
+
type: Literal["rouge1"]
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator(BaseModel):
|
|
262
|
+
candidate: str
|
|
263
|
+
|
|
264
|
+
reference: str
|
|
265
|
+
|
|
266
|
+
type: Literal["rouge2"]
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator(BaseModel):
|
|
270
|
+
candidate: str
|
|
271
|
+
|
|
272
|
+
reference: str
|
|
273
|
+
|
|
274
|
+
type: Literal["rougeL"]
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
MetricEvaluationTaskConfiguration: TypeAlias = Annotated[
|
|
278
|
+
Union[
|
|
279
|
+
MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator,
|
|
280
|
+
MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator,
|
|
281
|
+
MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator,
|
|
282
|
+
MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator,
|
|
283
|
+
MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator,
|
|
284
|
+
MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator,
|
|
285
|
+
MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator,
|
|
286
|
+
],
|
|
287
|
+
PropertyInfo(discriminator="type"),
|
|
288
|
+
]
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class MetricEvaluationTask(BaseModel):
|
|
292
|
+
configuration: MetricEvaluationTaskConfiguration
|
|
293
|
+
|
|
294
|
+
alias: Optional[str] = None
|
|
295
|
+
"""Alias to title the results column.
|
|
296
|
+
|
|
297
|
+
Defaults to the metric type specified in the configuration
|
|
298
|
+
"""
|
|
299
|
+
|
|
300
|
+
task_type: Optional[Literal["metric"]] = None
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
class AutoEvaluationQuestionTaskConfiguration(BaseModel):
|
|
304
|
+
model: str
|
|
305
|
+
"""model specified as `model_vendor/model_name`"""
|
|
306
|
+
|
|
307
|
+
prompt: str
|
|
308
|
+
|
|
309
|
+
question_id: str
|
|
310
|
+
"""question to be evaluated"""
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class AutoEvaluationQuestionTask(BaseModel):
|
|
314
|
+
configuration: AutoEvaluationQuestionTaskConfiguration
|
|
315
|
+
|
|
316
|
+
alias: Optional[str] = None
|
|
317
|
+
"""Alias to title the results column. Defaults to the `auto_evaluation_question`"""
|
|
318
|
+
|
|
319
|
+
task_type: Optional[Literal["auto_evaluation.question"]] = None
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
class ContributorEvaluationQuestionTaskConfiguration(BaseModel):
|
|
323
|
+
layout: "Container"
|
|
324
|
+
|
|
325
|
+
question_id: str
|
|
326
|
+
|
|
327
|
+
queue_id: Optional[str] = None
|
|
328
|
+
"""The contributor annotation queue to include this task in. Defaults to `default`"""
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class ContributorEvaluationQuestionTask(BaseModel):
|
|
332
|
+
configuration: ContributorEvaluationQuestionTaskConfiguration
|
|
333
|
+
|
|
334
|
+
alias: Optional[str] = None
|
|
335
|
+
"""Alias to title the results column.
|
|
336
|
+
|
|
337
|
+
Defaults to the `contributor_evaluation_question`
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
task_type: Optional[Literal["contributor_evaluation.question"]] = None
|
|
341
|
+
|
|
342
|
+
|
|
202
343
|
EvaluationTask: TypeAlias = Annotated[
|
|
203
|
-
Union[
|
|
344
|
+
Union[
|
|
345
|
+
ChatCompletionEvaluationTask,
|
|
346
|
+
GenericInferenceEvaluationTask,
|
|
347
|
+
ApplicationVariantV1EvaluationTask,
|
|
348
|
+
MetricEvaluationTask,
|
|
349
|
+
AutoEvaluationQuestionTask,
|
|
350
|
+
ContributorEvaluationQuestionTask,
|
|
351
|
+
],
|
|
204
352
|
PropertyInfo(discriminator="task_type"),
|
|
205
353
|
]
|
|
354
|
+
|
|
355
|
+
from .container import Container
|
|
356
|
+
|
|
357
|
+
if PYDANTIC_V2:
|
|
358
|
+
ChatCompletionEvaluationTask.model_rebuild()
|
|
359
|
+
ChatCompletionEvaluationTaskConfiguration.model_rebuild()
|
|
360
|
+
GenericInferenceEvaluationTask.model_rebuild()
|
|
361
|
+
GenericInferenceEvaluationTaskConfiguration.model_rebuild()
|
|
362
|
+
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration.model_rebuild()
|
|
363
|
+
ApplicationVariantV1EvaluationTask.model_rebuild()
|
|
364
|
+
ApplicationVariantV1EvaluationTaskConfiguration.model_rebuild()
|
|
365
|
+
ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0.model_rebuild()
|
|
366
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides.model_rebuild()
|
|
367
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState.model_rebuild()
|
|
368
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace.model_rebuild()
|
|
369
|
+
MetricEvaluationTask.model_rebuild()
|
|
370
|
+
MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator.model_rebuild()
|
|
371
|
+
MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator.model_rebuild()
|
|
372
|
+
MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator.model_rebuild()
|
|
373
|
+
MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator.model_rebuild()
|
|
374
|
+
MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator.model_rebuild()
|
|
375
|
+
MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator.model_rebuild()
|
|
376
|
+
MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator.model_rebuild()
|
|
377
|
+
AutoEvaluationQuestionTask.model_rebuild()
|
|
378
|
+
AutoEvaluationQuestionTaskConfiguration.model_rebuild()
|
|
379
|
+
ContributorEvaluationQuestionTask.model_rebuild()
|
|
380
|
+
ContributorEvaluationQuestionTaskConfiguration.model_rebuild()
|
|
381
|
+
else:
|
|
382
|
+
ChatCompletionEvaluationTask.update_forward_refs() # type: ignore
|
|
383
|
+
ChatCompletionEvaluationTaskConfiguration.update_forward_refs() # type: ignore
|
|
384
|
+
GenericInferenceEvaluationTask.update_forward_refs() # type: ignore
|
|
385
|
+
GenericInferenceEvaluationTaskConfiguration.update_forward_refs() # type: ignore
|
|
386
|
+
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration.update_forward_refs() # type: ignore
|
|
387
|
+
ApplicationVariantV1EvaluationTask.update_forward_refs() # type: ignore
|
|
388
|
+
ApplicationVariantV1EvaluationTaskConfiguration.update_forward_refs() # type: ignore
|
|
389
|
+
ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0.update_forward_refs() # type: ignore
|
|
390
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides.update_forward_refs() # type: ignore
|
|
391
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState.update_forward_refs() # type: ignore
|
|
392
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace.update_forward_refs() # type: ignore
|
|
393
|
+
MetricEvaluationTask.update_forward_refs() # type: ignore
|
|
394
|
+
MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
395
|
+
MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
396
|
+
MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
397
|
+
MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
398
|
+
MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
399
|
+
MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
400
|
+
MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
401
|
+
AutoEvaluationQuestionTask.update_forward_refs() # type: ignore
|
|
402
|
+
AutoEvaluationQuestionTaskConfiguration.update_forward_refs() # type: ignore
|
|
403
|
+
ContributorEvaluationQuestionTask.update_forward_refs() # type: ignore
|
|
404
|
+
ContributorEvaluationQuestionTaskConfiguration.update_forward_refs() # type: ignore
|
|
@@ -5,6 +5,8 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Dict, List, Union, Iterable
|
|
6
6
|
from typing_extensions import Literal, Required, TypeAlias, TypedDict
|
|
7
7
|
|
|
8
|
+
from .item_locator import ItemLocator
|
|
9
|
+
|
|
8
10
|
__all__ = [
|
|
9
11
|
"EvaluationTaskParam",
|
|
10
12
|
"ChatCompletionEvaluationTask",
|
|
@@ -20,63 +22,76 @@ __all__ = [
|
|
|
20
22
|
"ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides",
|
|
21
23
|
"ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState",
|
|
22
24
|
"ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace",
|
|
25
|
+
"MetricEvaluationTask",
|
|
26
|
+
"MetricEvaluationTaskConfiguration",
|
|
27
|
+
"MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator",
|
|
28
|
+
"MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator",
|
|
29
|
+
"MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator",
|
|
30
|
+
"MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator",
|
|
31
|
+
"MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator",
|
|
32
|
+
"MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator",
|
|
33
|
+
"MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator",
|
|
34
|
+
"AutoEvaluationQuestionTask",
|
|
35
|
+
"AutoEvaluationQuestionTaskConfiguration",
|
|
36
|
+
"ContributorEvaluationQuestionTask",
|
|
37
|
+
"ContributorEvaluationQuestionTaskConfiguration",
|
|
23
38
|
]
|
|
24
39
|
|
|
25
40
|
|
|
26
41
|
class ChatCompletionEvaluationTaskConfigurationTyped(TypedDict, total=False):
|
|
27
|
-
messages: Required[Union[Iterable[Dict[str, object]],
|
|
42
|
+
messages: Required[Union[Iterable[Dict[str, object]], ItemLocator]]
|
|
28
43
|
|
|
29
44
|
model: Required[str]
|
|
30
45
|
|
|
31
|
-
audio: Union[Dict[str, object],
|
|
46
|
+
audio: Union[Dict[str, object], ItemLocator]
|
|
32
47
|
|
|
33
|
-
frequency_penalty: Union[float,
|
|
48
|
+
frequency_penalty: Union[float, ItemLocator]
|
|
34
49
|
|
|
35
|
-
function_call: Union[Dict[str, object],
|
|
50
|
+
function_call: Union[Dict[str, object], ItemLocator]
|
|
36
51
|
|
|
37
|
-
functions: Union[Iterable[Dict[str, object]],
|
|
52
|
+
functions: Union[Iterable[Dict[str, object]], ItemLocator]
|
|
38
53
|
|
|
39
|
-
logit_bias: Union[Dict[str, int],
|
|
54
|
+
logit_bias: Union[Dict[str, int], ItemLocator]
|
|
40
55
|
|
|
41
|
-
logprobs: Union[bool,
|
|
56
|
+
logprobs: Union[bool, ItemLocator]
|
|
42
57
|
|
|
43
|
-
max_completion_tokens: Union[int,
|
|
58
|
+
max_completion_tokens: Union[int, ItemLocator]
|
|
44
59
|
|
|
45
|
-
max_tokens: Union[int,
|
|
60
|
+
max_tokens: Union[int, ItemLocator]
|
|
46
61
|
|
|
47
|
-
metadata: Union[Dict[str, str],
|
|
62
|
+
metadata: Union[Dict[str, str], ItemLocator]
|
|
48
63
|
|
|
49
|
-
modalities: Union[List[str],
|
|
64
|
+
modalities: Union[List[str], ItemLocator]
|
|
50
65
|
|
|
51
|
-
n: Union[int,
|
|
66
|
+
n: Union[int, ItemLocator]
|
|
52
67
|
|
|
53
|
-
parallel_tool_calls: Union[bool,
|
|
68
|
+
parallel_tool_calls: Union[bool, ItemLocator]
|
|
54
69
|
|
|
55
|
-
prediction: Union[Dict[str, object],
|
|
70
|
+
prediction: Union[Dict[str, object], ItemLocator]
|
|
56
71
|
|
|
57
|
-
presence_penalty: Union[float,
|
|
72
|
+
presence_penalty: Union[float, ItemLocator]
|
|
58
73
|
|
|
59
74
|
reasoning_effort: str
|
|
60
75
|
|
|
61
|
-
response_format: Union[Dict[str, object],
|
|
76
|
+
response_format: Union[Dict[str, object], ItemLocator]
|
|
62
77
|
|
|
63
|
-
seed: Union[int,
|
|
78
|
+
seed: Union[int, ItemLocator]
|
|
64
79
|
|
|
65
80
|
stop: str
|
|
66
81
|
|
|
67
|
-
store: Union[bool,
|
|
82
|
+
store: Union[bool, ItemLocator]
|
|
68
83
|
|
|
69
|
-
temperature: Union[float,
|
|
84
|
+
temperature: Union[float, ItemLocator]
|
|
70
85
|
|
|
71
86
|
tool_choice: str
|
|
72
87
|
|
|
73
|
-
tools: Union[Iterable[Dict[str, object]],
|
|
88
|
+
tools: Union[Iterable[Dict[str, object]], ItemLocator]
|
|
74
89
|
|
|
75
|
-
top_k: Union[int,
|
|
90
|
+
top_k: Union[int, ItemLocator]
|
|
76
91
|
|
|
77
|
-
top_logprobs: Union[int,
|
|
92
|
+
top_logprobs: Union[int, ItemLocator]
|
|
78
93
|
|
|
79
|
-
top_p: Union[float,
|
|
94
|
+
top_p: Union[float, ItemLocator]
|
|
80
95
|
|
|
81
96
|
|
|
82
97
|
ChatCompletionEvaluationTaskConfiguration: TypeAlias = Union[
|
|
@@ -88,7 +103,7 @@ class ChatCompletionEvaluationTask(TypedDict, total=False):
|
|
|
88
103
|
configuration: Required[ChatCompletionEvaluationTaskConfiguration]
|
|
89
104
|
|
|
90
105
|
alias: str
|
|
91
|
-
"""Alias to title the results column. Defaults to the `
|
|
106
|
+
"""Alias to title the results column. Defaults to the `chat_completion`"""
|
|
92
107
|
|
|
93
108
|
task_type: Literal["chat_completion"]
|
|
94
109
|
|
|
@@ -102,14 +117,14 @@ class GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInf
|
|
|
102
117
|
|
|
103
118
|
|
|
104
119
|
GenericInferenceEvaluationTaskConfigurationInferenceConfiguration: TypeAlias = Union[
|
|
105
|
-
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration,
|
|
120
|
+
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration, ItemLocator
|
|
106
121
|
]
|
|
107
122
|
|
|
108
123
|
|
|
109
124
|
class GenericInferenceEvaluationTaskConfiguration(TypedDict, total=False):
|
|
110
125
|
model: Required[str]
|
|
111
126
|
|
|
112
|
-
args: Union[Dict[str, object],
|
|
127
|
+
args: Union[Dict[str, object], ItemLocator]
|
|
113
128
|
|
|
114
129
|
inference_configuration: GenericInferenceEvaluationTaskConfigurationInferenceConfiguration
|
|
115
130
|
|
|
@@ -118,7 +133,7 @@ class GenericInferenceEvaluationTask(TypedDict, total=False):
|
|
|
118
133
|
configuration: Required[GenericInferenceEvaluationTaskConfiguration]
|
|
119
134
|
|
|
120
135
|
alias: str
|
|
121
|
-
"""Alias to title the results column. Defaults to the `
|
|
136
|
+
"""Alias to title the results column. Defaults to the `inference`"""
|
|
122
137
|
|
|
123
138
|
task_type: Literal["inference"]
|
|
124
139
|
|
|
@@ -171,22 +186,24 @@ class ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplication
|
|
|
171
186
|
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace
|
|
172
187
|
]
|
|
173
188
|
|
|
189
|
+
return_span: bool
|
|
190
|
+
|
|
174
191
|
use_channels: bool
|
|
175
192
|
|
|
176
193
|
|
|
177
194
|
ApplicationVariantV1EvaluationTaskConfigurationOverrides: TypeAlias = Union[
|
|
178
|
-
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides,
|
|
195
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides, ItemLocator
|
|
179
196
|
]
|
|
180
197
|
|
|
181
198
|
|
|
182
199
|
class ApplicationVariantV1EvaluationTaskConfiguration(TypedDict, total=False):
|
|
183
200
|
application_variant_id: Required[str]
|
|
184
201
|
|
|
185
|
-
inputs: Required[Union[Dict[str, object],
|
|
202
|
+
inputs: Required[Union[Dict[str, object], ItemLocator]]
|
|
186
203
|
|
|
187
|
-
history: Union[Iterable[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0],
|
|
204
|
+
history: Union[Iterable[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0], ItemLocator]
|
|
188
205
|
|
|
189
|
-
operation_metadata: Union[Dict[str, object],
|
|
206
|
+
operation_metadata: Union[Dict[str, object], ItemLocator]
|
|
190
207
|
|
|
191
208
|
overrides: ApplicationVariantV1EvaluationTaskConfigurationOverrides
|
|
192
209
|
"""Execution override options for agentic applications"""
|
|
@@ -196,11 +213,137 @@ class ApplicationVariantV1EvaluationTask(TypedDict, total=False):
|
|
|
196
213
|
configuration: Required[ApplicationVariantV1EvaluationTaskConfiguration]
|
|
197
214
|
|
|
198
215
|
alias: str
|
|
199
|
-
"""Alias to title the results column. Defaults to the `
|
|
216
|
+
"""Alias to title the results column. Defaults to the `application_variant`"""
|
|
200
217
|
|
|
201
218
|
task_type: Literal["application_variant"]
|
|
202
219
|
|
|
203
220
|
|
|
221
|
+
class MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator(TypedDict, total=False):
|
|
222
|
+
candidate: Required[str]
|
|
223
|
+
|
|
224
|
+
reference: Required[str]
|
|
225
|
+
|
|
226
|
+
type: Required[Literal["bleu"]]
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator(TypedDict, total=False):
|
|
230
|
+
candidate: Required[str]
|
|
231
|
+
|
|
232
|
+
reference: Required[str]
|
|
233
|
+
|
|
234
|
+
type: Required[Literal["meteor"]]
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
class MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator(TypedDict, total=False):
|
|
238
|
+
candidate: Required[str]
|
|
239
|
+
|
|
240
|
+
reference: Required[str]
|
|
241
|
+
|
|
242
|
+
type: Required[Literal["cosine_similarity"]]
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator(TypedDict, total=False):
|
|
246
|
+
candidate: Required[str]
|
|
247
|
+
|
|
248
|
+
reference: Required[str]
|
|
249
|
+
|
|
250
|
+
type: Required[Literal["f1"]]
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator(TypedDict, total=False):
|
|
254
|
+
candidate: Required[str]
|
|
255
|
+
|
|
256
|
+
reference: Required[str]
|
|
257
|
+
|
|
258
|
+
type: Required[Literal["rouge1"]]
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator(TypedDict, total=False):
|
|
262
|
+
candidate: Required[str]
|
|
263
|
+
|
|
264
|
+
reference: Required[str]
|
|
265
|
+
|
|
266
|
+
type: Required[Literal["rouge2"]]
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator(TypedDict, total=False):
|
|
270
|
+
candidate: Required[str]
|
|
271
|
+
|
|
272
|
+
reference: Required[str]
|
|
273
|
+
|
|
274
|
+
type: Required[Literal["rougeL"]]
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
MetricEvaluationTaskConfiguration: TypeAlias = Union[
|
|
278
|
+
MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator,
|
|
279
|
+
MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator,
|
|
280
|
+
MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator,
|
|
281
|
+
MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator,
|
|
282
|
+
MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator,
|
|
283
|
+
MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator,
|
|
284
|
+
MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator,
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class MetricEvaluationTask(TypedDict, total=False):
|
|
289
|
+
configuration: Required[MetricEvaluationTaskConfiguration]
|
|
290
|
+
|
|
291
|
+
alias: str
|
|
292
|
+
"""Alias to title the results column.
|
|
293
|
+
|
|
294
|
+
Defaults to the metric type specified in the configuration
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
task_type: Literal["metric"]
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class AutoEvaluationQuestionTaskConfiguration(TypedDict, total=False):
|
|
301
|
+
model: Required[str]
|
|
302
|
+
"""model specified as `model_vendor/model_name`"""
|
|
303
|
+
|
|
304
|
+
prompt: Required[str]
|
|
305
|
+
|
|
306
|
+
question_id: Required[str]
|
|
307
|
+
"""question to be evaluated"""
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
class AutoEvaluationQuestionTask(TypedDict, total=False):
|
|
311
|
+
configuration: Required[AutoEvaluationQuestionTaskConfiguration]
|
|
312
|
+
|
|
313
|
+
alias: str
|
|
314
|
+
"""Alias to title the results column. Defaults to the `auto_evaluation_question`"""
|
|
315
|
+
|
|
316
|
+
task_type: Literal["auto_evaluation.question"]
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
class ContributorEvaluationQuestionTaskConfiguration(TypedDict, total=False):
|
|
320
|
+
layout: Required["ContainerParam"]
|
|
321
|
+
|
|
322
|
+
question_id: Required[str]
|
|
323
|
+
|
|
324
|
+
queue_id: str
|
|
325
|
+
"""The contributor annotation queue to include this task in. Defaults to `default`"""
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
class ContributorEvaluationQuestionTask(TypedDict, total=False):
|
|
329
|
+
configuration: Required[ContributorEvaluationQuestionTaskConfiguration]
|
|
330
|
+
|
|
331
|
+
alias: str
|
|
332
|
+
"""Alias to title the results column.
|
|
333
|
+
|
|
334
|
+
Defaults to the `contributor_evaluation_question`
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
task_type: Literal["contributor_evaluation.question"]
|
|
338
|
+
|
|
339
|
+
|
|
204
340
|
EvaluationTaskParam: TypeAlias = Union[
|
|
205
|
-
ChatCompletionEvaluationTask,
|
|
341
|
+
ChatCompletionEvaluationTask,
|
|
342
|
+
GenericInferenceEvaluationTask,
|
|
343
|
+
ApplicationVariantV1EvaluationTask,
|
|
344
|
+
MetricEvaluationTask,
|
|
345
|
+
AutoEvaluationQuestionTask,
|
|
346
|
+
ContributorEvaluationQuestionTask,
|
|
206
347
|
]
|
|
348
|
+
|
|
349
|
+
from .container_param import ContainerParam
|