scale-gp-beta 0.1.0a11__py3-none-any.whl → 0.1.0a13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. scale_gp_beta/__init__.py +5 -0
  2. scale_gp_beta/_utils/_proxy.py +4 -1
  3. scale_gp_beta/_utils/_resources_proxy.py +24 -0
  4. scale_gp_beta/_version.py +1 -1
  5. scale_gp_beta/resources/chat/completions.py +136 -2
  6. scale_gp_beta/resources/dataset_items.py +11 -11
  7. scale_gp_beta/resources/datasets.py +13 -5
  8. scale_gp_beta/resources/evaluation_items.py +6 -8
  9. scale_gp_beta/resources/evaluations.py +138 -21
  10. scale_gp_beta/resources/files/files.py +5 -5
  11. scale_gp_beta/resources/models.py +31 -35
  12. scale_gp_beta/resources/spans.py +42 -22
  13. scale_gp_beta/types/__init__.py +8 -1
  14. scale_gp_beta/types/chat/__init__.py +2 -0
  15. scale_gp_beta/types/chat/completion_models_params.py +29 -0
  16. scale_gp_beta/types/chat/model_definition.py +32 -0
  17. scale_gp_beta/types/component.py +18 -0
  18. scale_gp_beta/types/component_param.py +19 -0
  19. scale_gp_beta/types/container.py +35 -0
  20. scale_gp_beta/types/container_param.py +28 -0
  21. scale_gp_beta/types/dataset_item_list_params.py +4 -5
  22. scale_gp_beta/types/dataset_item_retrieve_params.py +1 -2
  23. scale_gp_beta/types/dataset_list_params.py +7 -3
  24. scale_gp_beta/types/evaluation.py +12 -2
  25. scale_gp_beta/types/evaluation_create_params.py +5 -5
  26. scale_gp_beta/types/{evaluation_archive_response.py → evaluation_delete_response.py} +2 -2
  27. scale_gp_beta/types/evaluation_item_list_params.py +3 -4
  28. scale_gp_beta/types/evaluation_list_params.py +7 -3
  29. scale_gp_beta/types/evaluation_task.py +232 -33
  30. scale_gp_beta/types/evaluation_task_param.py +176 -33
  31. scale_gp_beta/types/evaluation_update_params.py +17 -0
  32. scale_gp_beta/types/file_list_params.py +2 -3
  33. scale_gp_beta/types/inference_model.py +0 -4
  34. scale_gp_beta/types/item_locator.py +7 -0
  35. scale_gp_beta/types/item_locator_template.py +7 -0
  36. scale_gp_beta/types/model_list_params.py +15 -18
  37. scale_gp_beta/types/span.py +40 -1
  38. scale_gp_beta/types/span_create_params.py +13 -5
  39. scale_gp_beta/types/span_list_params.py +4 -5
  40. scale_gp_beta/types/span_update_params.py +5 -3
  41. {scale_gp_beta-0.1.0a11.dist-info → scale_gp_beta-0.1.0a13.dist-info}/METADATA +1 -1
  42. {scale_gp_beta-0.1.0a11.dist-info → scale_gp_beta-0.1.0a13.dist-info}/RECORD +44 -34
  43. {scale_gp_beta-0.1.0a11.dist-info → scale_gp_beta-0.1.0a13.dist-info}/WHEEL +0 -0
  44. {scale_gp_beta-0.1.0a11.dist-info → scale_gp_beta-0.1.0a13.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,14 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from typing import TYPE_CHECKING, Dict, List, Union, Optional
4
6
  from typing_extensions import Literal, Annotated, TypeAlias
5
7
 
6
8
  from .._utils import PropertyInfo
9
+ from .._compat import PYDANTIC_V2
7
10
  from .._models import BaseModel
11
+ from .item_locator import ItemLocator
8
12
 
9
13
  __all__ = [
10
14
  "EvaluationTask",
@@ -21,63 +25,76 @@ __all__ = [
21
25
  "ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides",
22
26
  "ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState",
23
27
  "ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace",
28
+ "MetricEvaluationTask",
29
+ "MetricEvaluationTaskConfiguration",
30
+ "MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator",
31
+ "MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator",
32
+ "MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator",
33
+ "MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator",
34
+ "MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator",
35
+ "MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator",
36
+ "MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator",
37
+ "AutoEvaluationQuestionTask",
38
+ "AutoEvaluationQuestionTaskConfiguration",
39
+ "ContributorEvaluationQuestionTask",
40
+ "ContributorEvaluationQuestionTaskConfiguration",
24
41
  ]
25
42
 
26
43
 
27
44
  class ChatCompletionEvaluationTaskConfiguration(BaseModel):
28
- messages: Union[List[Dict[str, object]], str]
45
+ messages: Union[List[Dict[str, object]], ItemLocator]
29
46
 
30
47
  model: str
31
48
 
32
- audio: Union[Dict[str, object], str, None] = None
49
+ audio: Union[Dict[str, object], ItemLocator, None] = None
33
50
 
34
- frequency_penalty: Union[float, str, None] = None
51
+ frequency_penalty: Union[float, ItemLocator, None] = None
35
52
 
36
- function_call: Union[Dict[str, object], str, None] = None
53
+ function_call: Union[Dict[str, object], ItemLocator, None] = None
37
54
 
38
- functions: Union[List[Dict[str, object]], str, None] = None
55
+ functions: Union[List[Dict[str, object]], ItemLocator, None] = None
39
56
 
40
- logit_bias: Union[Dict[str, int], str, None] = None
57
+ logit_bias: Union[Dict[str, int], ItemLocator, None] = None
41
58
 
42
- logprobs: Union[bool, str, None] = None
59
+ logprobs: Union[bool, ItemLocator, None] = None
43
60
 
44
- max_completion_tokens: Union[int, str, None] = None
61
+ max_completion_tokens: Union[int, ItemLocator, None] = None
45
62
 
46
- max_tokens: Union[int, str, None] = None
63
+ max_tokens: Union[int, ItemLocator, None] = None
47
64
 
48
- metadata: Union[Dict[str, str], str, None] = None
65
+ metadata: Union[Dict[str, str], ItemLocator, None] = None
49
66
 
50
- modalities: Union[List[str], str, None] = None
67
+ modalities: Union[List[str], ItemLocator, None] = None
51
68
 
52
- n: Union[int, str, None] = None
69
+ n: Union[int, ItemLocator, None] = None
53
70
 
54
- parallel_tool_calls: Union[bool, str, None] = None
71
+ parallel_tool_calls: Union[bool, ItemLocator, None] = None
55
72
 
56
- prediction: Union[Dict[str, object], str, None] = None
73
+ prediction: Union[Dict[str, object], ItemLocator, None] = None
57
74
 
58
- presence_penalty: Union[float, str, None] = None
75
+ presence_penalty: Union[float, ItemLocator, None] = None
59
76
 
60
77
  reasoning_effort: Optional[str] = None
61
78
 
62
- response_format: Union[Dict[str, object], str, None] = None
79
+ response_format: Union[Dict[str, object], ItemLocator, None] = None
63
80
 
64
- seed: Union[int, str, None] = None
81
+ seed: Union[int, ItemLocator, None] = None
65
82
 
66
83
  stop: Optional[str] = None
67
84
 
68
- store: Union[bool, str, None] = None
85
+ store: Union[bool, ItemLocator, None] = None
69
86
 
70
- temperature: Union[float, str, None] = None
87
+ temperature: Union[float, ItemLocator, None] = None
71
88
 
72
89
  tool_choice: Optional[str] = None
73
90
 
74
- tools: Union[List[Dict[str, object]], str, None] = None
91
+ tools: Union[List[Dict[str, object]], ItemLocator, None] = None
75
92
 
76
- top_k: Union[int, str, None] = None
93
+ top_k: Union[int, ItemLocator, None] = None
77
94
 
78
- top_logprobs: Union[int, str, None] = None
95
+ top_logprobs: Union[int, ItemLocator, None] = None
79
96
 
80
- top_p: Union[float, str, None] = None
97
+ top_p: Union[float, ItemLocator, None] = None
81
98
 
82
99
  if TYPE_CHECKING:
83
100
  # Stub to indicate that arbitrary properties are accepted.
@@ -90,7 +107,7 @@ class ChatCompletionEvaluationTask(BaseModel):
90
107
  configuration: ChatCompletionEvaluationTaskConfiguration
91
108
 
92
109
  alias: Optional[str] = None
93
- """Alias to title the results column. Defaults to the `task_type`"""
110
+ """Alias to title the results column. Defaults to the `chat_completion`"""
94
111
 
95
112
  task_type: Optional[Literal["chat_completion"]] = None
96
113
 
@@ -102,14 +119,14 @@ class GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInf
102
119
 
103
120
 
104
121
  GenericInferenceEvaluationTaskConfigurationInferenceConfiguration: TypeAlias = Union[
105
- GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration, str
122
+ GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration, ItemLocator
106
123
  ]
107
124
 
108
125
 
109
126
  class GenericInferenceEvaluationTaskConfiguration(BaseModel):
110
127
  model: str
111
128
 
112
- args: Union[Dict[str, object], str, None] = None
129
+ args: Union[Dict[str, object], ItemLocator, None] = None
113
130
 
114
131
  inference_configuration: Optional[GenericInferenceEvaluationTaskConfigurationInferenceConfiguration] = None
115
132
 
@@ -118,7 +135,7 @@ class GenericInferenceEvaluationTask(BaseModel):
118
135
  configuration: GenericInferenceEvaluationTaskConfiguration
119
136
 
120
137
  alias: Optional[str] = None
121
- """Alias to title the results column. Defaults to the `task_type`"""
138
+ """Alias to title the results column. Defaults to the `inference`"""
122
139
 
123
140
  task_type: Optional[Literal["inference"]] = None
124
141
 
@@ -169,22 +186,24 @@ class ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplication
169
186
  List[ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace]
170
187
  ] = None
171
188
 
189
+ return_span: Optional[bool] = None
190
+
172
191
  use_channels: Optional[bool] = None
173
192
 
174
193
 
175
194
  ApplicationVariantV1EvaluationTaskConfigurationOverrides: TypeAlias = Union[
176
- ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides, str
195
+ ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides, ItemLocator
177
196
  ]
178
197
 
179
198
 
180
199
  class ApplicationVariantV1EvaluationTaskConfiguration(BaseModel):
181
200
  application_variant_id: str
182
201
 
183
- inputs: Union[Dict[str, object], str]
202
+ inputs: Union[Dict[str, object], ItemLocator]
184
203
 
185
- history: Union[List[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0], str, None] = None
204
+ history: Union[List[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0], ItemLocator, None] = None
186
205
 
187
- operation_metadata: Union[Dict[str, object], str, None] = None
206
+ operation_metadata: Union[Dict[str, object], ItemLocator, None] = None
188
207
 
189
208
  overrides: Optional[ApplicationVariantV1EvaluationTaskConfigurationOverrides] = None
190
209
  """Execution override options for agentic applications"""
@@ -194,12 +213,192 @@ class ApplicationVariantV1EvaluationTask(BaseModel):
194
213
  configuration: ApplicationVariantV1EvaluationTaskConfiguration
195
214
 
196
215
  alias: Optional[str] = None
197
- """Alias to title the results column. Defaults to the `task_type`"""
216
+ """Alias to title the results column. Defaults to the `application_variant`"""
198
217
 
199
218
  task_type: Optional[Literal["application_variant"]] = None
200
219
 
201
220
 
221
+ class MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator(BaseModel):
222
+ candidate: str
223
+
224
+ reference: str
225
+
226
+ type: Literal["bleu"]
227
+
228
+
229
+ class MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator(BaseModel):
230
+ candidate: str
231
+
232
+ reference: str
233
+
234
+ type: Literal["meteor"]
235
+
236
+
237
+ class MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator(BaseModel):
238
+ candidate: str
239
+
240
+ reference: str
241
+
242
+ type: Literal["cosine_similarity"]
243
+
244
+
245
+ class MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator(BaseModel):
246
+ candidate: str
247
+
248
+ reference: str
249
+
250
+ type: Literal["f1"]
251
+
252
+
253
+ class MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator(BaseModel):
254
+ candidate: str
255
+
256
+ reference: str
257
+
258
+ type: Literal["rouge1"]
259
+
260
+
261
+ class MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator(BaseModel):
262
+ candidate: str
263
+
264
+ reference: str
265
+
266
+ type: Literal["rouge2"]
267
+
268
+
269
+ class MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator(BaseModel):
270
+ candidate: str
271
+
272
+ reference: str
273
+
274
+ type: Literal["rougeL"]
275
+
276
+
277
+ MetricEvaluationTaskConfiguration: TypeAlias = Annotated[
278
+ Union[
279
+ MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator,
280
+ MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator,
281
+ MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator,
282
+ MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator,
283
+ MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator,
284
+ MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator,
285
+ MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator,
286
+ ],
287
+ PropertyInfo(discriminator="type"),
288
+ ]
289
+
290
+
291
+ class MetricEvaluationTask(BaseModel):
292
+ configuration: MetricEvaluationTaskConfiguration
293
+
294
+ alias: Optional[str] = None
295
+ """Alias to title the results column.
296
+
297
+ Defaults to the metric type specified in the configuration
298
+ """
299
+
300
+ task_type: Optional[Literal["metric"]] = None
301
+
302
+
303
+ class AutoEvaluationQuestionTaskConfiguration(BaseModel):
304
+ model: str
305
+ """model specified as `model_vendor/model_name`"""
306
+
307
+ prompt: str
308
+
309
+ question_id: str
310
+ """question to be evaluated"""
311
+
312
+
313
+ class AutoEvaluationQuestionTask(BaseModel):
314
+ configuration: AutoEvaluationQuestionTaskConfiguration
315
+
316
+ alias: Optional[str] = None
317
+ """Alias to title the results column. Defaults to the `auto_evaluation_question`"""
318
+
319
+ task_type: Optional[Literal["auto_evaluation.question"]] = None
320
+
321
+
322
+ class ContributorEvaluationQuestionTaskConfiguration(BaseModel):
323
+ layout: "Container"
324
+
325
+ question_id: str
326
+
327
+ queue_id: Optional[str] = None
328
+ """The contributor annotation queue to include this task in. Defaults to `default`"""
329
+
330
+
331
+ class ContributorEvaluationQuestionTask(BaseModel):
332
+ configuration: ContributorEvaluationQuestionTaskConfiguration
333
+
334
+ alias: Optional[str] = None
335
+ """Alias to title the results column.
336
+
337
+ Defaults to the `contributor_evaluation_question`
338
+ """
339
+
340
+ task_type: Optional[Literal["contributor_evaluation.question"]] = None
341
+
342
+
202
343
  EvaluationTask: TypeAlias = Annotated[
203
- Union[ChatCompletionEvaluationTask, GenericInferenceEvaluationTask, ApplicationVariantV1EvaluationTask],
344
+ Union[
345
+ ChatCompletionEvaluationTask,
346
+ GenericInferenceEvaluationTask,
347
+ ApplicationVariantV1EvaluationTask,
348
+ MetricEvaluationTask,
349
+ AutoEvaluationQuestionTask,
350
+ ContributorEvaluationQuestionTask,
351
+ ],
204
352
  PropertyInfo(discriminator="task_type"),
205
353
  ]
354
+
355
+ from .container import Container
356
+
357
+ if PYDANTIC_V2:
358
+ ChatCompletionEvaluationTask.model_rebuild()
359
+ ChatCompletionEvaluationTaskConfiguration.model_rebuild()
360
+ GenericInferenceEvaluationTask.model_rebuild()
361
+ GenericInferenceEvaluationTaskConfiguration.model_rebuild()
362
+ GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration.model_rebuild()
363
+ ApplicationVariantV1EvaluationTask.model_rebuild()
364
+ ApplicationVariantV1EvaluationTaskConfiguration.model_rebuild()
365
+ ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0.model_rebuild()
366
+ ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides.model_rebuild()
367
+ ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState.model_rebuild()
368
+ ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace.model_rebuild()
369
+ MetricEvaluationTask.model_rebuild()
370
+ MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator.model_rebuild()
371
+ MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator.model_rebuild()
372
+ MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator.model_rebuild()
373
+ MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator.model_rebuild()
374
+ MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator.model_rebuild()
375
+ MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator.model_rebuild()
376
+ MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator.model_rebuild()
377
+ AutoEvaluationQuestionTask.model_rebuild()
378
+ AutoEvaluationQuestionTaskConfiguration.model_rebuild()
379
+ ContributorEvaluationQuestionTask.model_rebuild()
380
+ ContributorEvaluationQuestionTaskConfiguration.model_rebuild()
381
+ else:
382
+ ChatCompletionEvaluationTask.update_forward_refs() # type: ignore
383
+ ChatCompletionEvaluationTaskConfiguration.update_forward_refs() # type: ignore
384
+ GenericInferenceEvaluationTask.update_forward_refs() # type: ignore
385
+ GenericInferenceEvaluationTaskConfiguration.update_forward_refs() # type: ignore
386
+ GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration.update_forward_refs() # type: ignore
387
+ ApplicationVariantV1EvaluationTask.update_forward_refs() # type: ignore
388
+ ApplicationVariantV1EvaluationTaskConfiguration.update_forward_refs() # type: ignore
389
+ ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0.update_forward_refs() # type: ignore
390
+ ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides.update_forward_refs() # type: ignore
391
+ ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState.update_forward_refs() # type: ignore
392
+ ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace.update_forward_refs() # type: ignore
393
+ MetricEvaluationTask.update_forward_refs() # type: ignore
394
+ MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator.update_forward_refs() # type: ignore
395
+ MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator.update_forward_refs() # type: ignore
396
+ MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator.update_forward_refs() # type: ignore
397
+ MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator.update_forward_refs() # type: ignore
398
+ MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator.update_forward_refs() # type: ignore
399
+ MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator.update_forward_refs() # type: ignore
400
+ MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator.update_forward_refs() # type: ignore
401
+ AutoEvaluationQuestionTask.update_forward_refs() # type: ignore
402
+ AutoEvaluationQuestionTaskConfiguration.update_forward_refs() # type: ignore
403
+ ContributorEvaluationQuestionTask.update_forward_refs() # type: ignore
404
+ ContributorEvaluationQuestionTaskConfiguration.update_forward_refs() # type: ignore
@@ -5,6 +5,8 @@ from __future__ import annotations
5
5
  from typing import Dict, List, Union, Iterable
6
6
  from typing_extensions import Literal, Required, TypeAlias, TypedDict
7
7
 
8
+ from .item_locator import ItemLocator
9
+
8
10
  __all__ = [
9
11
  "EvaluationTaskParam",
10
12
  "ChatCompletionEvaluationTask",
@@ -20,63 +22,76 @@ __all__ = [
20
22
  "ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides",
21
23
  "ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState",
22
24
  "ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace",
25
+ "MetricEvaluationTask",
26
+ "MetricEvaluationTaskConfiguration",
27
+ "MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator",
28
+ "MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator",
29
+ "MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator",
30
+ "MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator",
31
+ "MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator",
32
+ "MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator",
33
+ "MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator",
34
+ "AutoEvaluationQuestionTask",
35
+ "AutoEvaluationQuestionTaskConfiguration",
36
+ "ContributorEvaluationQuestionTask",
37
+ "ContributorEvaluationQuestionTaskConfiguration",
23
38
  ]
24
39
 
25
40
 
26
41
  class ChatCompletionEvaluationTaskConfigurationTyped(TypedDict, total=False):
27
- messages: Required[Union[Iterable[Dict[str, object]], str]]
42
+ messages: Required[Union[Iterable[Dict[str, object]], ItemLocator]]
28
43
 
29
44
  model: Required[str]
30
45
 
31
- audio: Union[Dict[str, object], str]
46
+ audio: Union[Dict[str, object], ItemLocator]
32
47
 
33
- frequency_penalty: Union[float, str]
48
+ frequency_penalty: Union[float, ItemLocator]
34
49
 
35
- function_call: Union[Dict[str, object], str]
50
+ function_call: Union[Dict[str, object], ItemLocator]
36
51
 
37
- functions: Union[Iterable[Dict[str, object]], str]
52
+ functions: Union[Iterable[Dict[str, object]], ItemLocator]
38
53
 
39
- logit_bias: Union[Dict[str, int], str]
54
+ logit_bias: Union[Dict[str, int], ItemLocator]
40
55
 
41
- logprobs: Union[bool, str]
56
+ logprobs: Union[bool, ItemLocator]
42
57
 
43
- max_completion_tokens: Union[int, str]
58
+ max_completion_tokens: Union[int, ItemLocator]
44
59
 
45
- max_tokens: Union[int, str]
60
+ max_tokens: Union[int, ItemLocator]
46
61
 
47
- metadata: Union[Dict[str, str], str]
62
+ metadata: Union[Dict[str, str], ItemLocator]
48
63
 
49
- modalities: Union[List[str], str]
64
+ modalities: Union[List[str], ItemLocator]
50
65
 
51
- n: Union[int, str]
66
+ n: Union[int, ItemLocator]
52
67
 
53
- parallel_tool_calls: Union[bool, str]
68
+ parallel_tool_calls: Union[bool, ItemLocator]
54
69
 
55
- prediction: Union[Dict[str, object], str]
70
+ prediction: Union[Dict[str, object], ItemLocator]
56
71
 
57
- presence_penalty: Union[float, str]
72
+ presence_penalty: Union[float, ItemLocator]
58
73
 
59
74
  reasoning_effort: str
60
75
 
61
- response_format: Union[Dict[str, object], str]
76
+ response_format: Union[Dict[str, object], ItemLocator]
62
77
 
63
- seed: Union[int, str]
78
+ seed: Union[int, ItemLocator]
64
79
 
65
80
  stop: str
66
81
 
67
- store: Union[bool, str]
82
+ store: Union[bool, ItemLocator]
68
83
 
69
- temperature: Union[float, str]
84
+ temperature: Union[float, ItemLocator]
70
85
 
71
86
  tool_choice: str
72
87
 
73
- tools: Union[Iterable[Dict[str, object]], str]
88
+ tools: Union[Iterable[Dict[str, object]], ItemLocator]
74
89
 
75
- top_k: Union[int, str]
90
+ top_k: Union[int, ItemLocator]
76
91
 
77
- top_logprobs: Union[int, str]
92
+ top_logprobs: Union[int, ItemLocator]
78
93
 
79
- top_p: Union[float, str]
94
+ top_p: Union[float, ItemLocator]
80
95
 
81
96
 
82
97
  ChatCompletionEvaluationTaskConfiguration: TypeAlias = Union[
@@ -88,7 +103,7 @@ class ChatCompletionEvaluationTask(TypedDict, total=False):
88
103
  configuration: Required[ChatCompletionEvaluationTaskConfiguration]
89
104
 
90
105
  alias: str
91
- """Alias to title the results column. Defaults to the `task_type`"""
106
+ """Alias to title the results column. Defaults to the `chat_completion`"""
92
107
 
93
108
  task_type: Literal["chat_completion"]
94
109
 
@@ -102,14 +117,14 @@ class GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInf
102
117
 
103
118
 
104
119
  GenericInferenceEvaluationTaskConfigurationInferenceConfiguration: TypeAlias = Union[
105
- GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration, str
120
+ GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration, ItemLocator
106
121
  ]
107
122
 
108
123
 
109
124
  class GenericInferenceEvaluationTaskConfiguration(TypedDict, total=False):
110
125
  model: Required[str]
111
126
 
112
- args: Union[Dict[str, object], str]
127
+ args: Union[Dict[str, object], ItemLocator]
113
128
 
114
129
  inference_configuration: GenericInferenceEvaluationTaskConfigurationInferenceConfiguration
115
130
 
@@ -118,7 +133,7 @@ class GenericInferenceEvaluationTask(TypedDict, total=False):
118
133
  configuration: Required[GenericInferenceEvaluationTaskConfiguration]
119
134
 
120
135
  alias: str
121
- """Alias to title the results column. Defaults to the `task_type`"""
136
+ """Alias to title the results column. Defaults to the `inference`"""
122
137
 
123
138
  task_type: Literal["inference"]
124
139
 
@@ -171,22 +186,24 @@ class ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplication
171
186
  ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace
172
187
  ]
173
188
 
189
+ return_span: bool
190
+
174
191
  use_channels: bool
175
192
 
176
193
 
177
194
  ApplicationVariantV1EvaluationTaskConfigurationOverrides: TypeAlias = Union[
178
- ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides, str
195
+ ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides, ItemLocator
179
196
  ]
180
197
 
181
198
 
182
199
  class ApplicationVariantV1EvaluationTaskConfiguration(TypedDict, total=False):
183
200
  application_variant_id: Required[str]
184
201
 
185
- inputs: Required[Union[Dict[str, object], str]]
202
+ inputs: Required[Union[Dict[str, object], ItemLocator]]
186
203
 
187
- history: Union[Iterable[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0], str]
204
+ history: Union[Iterable[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0], ItemLocator]
188
205
 
189
- operation_metadata: Union[Dict[str, object], str]
206
+ operation_metadata: Union[Dict[str, object], ItemLocator]
190
207
 
191
208
  overrides: ApplicationVariantV1EvaluationTaskConfigurationOverrides
192
209
  """Execution override options for agentic applications"""
@@ -196,11 +213,137 @@ class ApplicationVariantV1EvaluationTask(TypedDict, total=False):
196
213
  configuration: Required[ApplicationVariantV1EvaluationTaskConfiguration]
197
214
 
198
215
  alias: str
199
- """Alias to title the results column. Defaults to the `task_type`"""
216
+ """Alias to title the results column. Defaults to the `application_variant`"""
200
217
 
201
218
  task_type: Literal["application_variant"]
202
219
 
203
220
 
221
+ class MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator(TypedDict, total=False):
222
+ candidate: Required[str]
223
+
224
+ reference: Required[str]
225
+
226
+ type: Required[Literal["bleu"]]
227
+
228
+
229
+ class MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator(TypedDict, total=False):
230
+ candidate: Required[str]
231
+
232
+ reference: Required[str]
233
+
234
+ type: Required[Literal["meteor"]]
235
+
236
+
237
+ class MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator(TypedDict, total=False):
238
+ candidate: Required[str]
239
+
240
+ reference: Required[str]
241
+
242
+ type: Required[Literal["cosine_similarity"]]
243
+
244
+
245
+ class MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator(TypedDict, total=False):
246
+ candidate: Required[str]
247
+
248
+ reference: Required[str]
249
+
250
+ type: Required[Literal["f1"]]
251
+
252
+
253
+ class MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator(TypedDict, total=False):
254
+ candidate: Required[str]
255
+
256
+ reference: Required[str]
257
+
258
+ type: Required[Literal["rouge1"]]
259
+
260
+
261
+ class MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator(TypedDict, total=False):
262
+ candidate: Required[str]
263
+
264
+ reference: Required[str]
265
+
266
+ type: Required[Literal["rouge2"]]
267
+
268
+
269
+ class MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator(TypedDict, total=False):
270
+ candidate: Required[str]
271
+
272
+ reference: Required[str]
273
+
274
+ type: Required[Literal["rougeL"]]
275
+
276
+
277
+ MetricEvaluationTaskConfiguration: TypeAlias = Union[
278
+ MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator,
279
+ MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator,
280
+ MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator,
281
+ MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator,
282
+ MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator,
283
+ MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator,
284
+ MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator,
285
+ ]
286
+
287
+
288
+ class MetricEvaluationTask(TypedDict, total=False):
289
+ configuration: Required[MetricEvaluationTaskConfiguration]
290
+
291
+ alias: str
292
+ """Alias to title the results column.
293
+
294
+ Defaults to the metric type specified in the configuration
295
+ """
296
+
297
+ task_type: Literal["metric"]
298
+
299
+
300
+ class AutoEvaluationQuestionTaskConfiguration(TypedDict, total=False):
301
+ model: Required[str]
302
+ """model specified as `model_vendor/model_name`"""
303
+
304
+ prompt: Required[str]
305
+
306
+ question_id: Required[str]
307
+ """question to be evaluated"""
308
+
309
+
310
+ class AutoEvaluationQuestionTask(TypedDict, total=False):
311
+ configuration: Required[AutoEvaluationQuestionTaskConfiguration]
312
+
313
+ alias: str
314
+ """Alias to title the results column. Defaults to the `auto_evaluation_question`"""
315
+
316
+ task_type: Literal["auto_evaluation.question"]
317
+
318
+
319
+ class ContributorEvaluationQuestionTaskConfiguration(TypedDict, total=False):
320
+ layout: Required["ContainerParam"]
321
+
322
+ question_id: Required[str]
323
+
324
+ queue_id: str
325
+ """The contributor annotation queue to include this task in. Defaults to `default`"""
326
+
327
+
328
+ class ContributorEvaluationQuestionTask(TypedDict, total=False):
329
+ configuration: Required[ContributorEvaluationQuestionTaskConfiguration]
330
+
331
+ alias: str
332
+ """Alias to title the results column.
333
+
334
+ Defaults to the `contributor_evaluation_question`
335
+ """
336
+
337
+ task_type: Literal["contributor_evaluation.question"]
338
+
339
+
204
340
  EvaluationTaskParam: TypeAlias = Union[
205
- ChatCompletionEvaluationTask, GenericInferenceEvaluationTask, ApplicationVariantV1EvaluationTask
341
+ ChatCompletionEvaluationTask,
342
+ GenericInferenceEvaluationTask,
343
+ ApplicationVariantV1EvaluationTask,
344
+ MetricEvaluationTask,
345
+ AutoEvaluationQuestionTask,
346
+ ContributorEvaluationQuestionTask,
206
347
  ]
348
+
349
+ from .container_param import ContainerParam