scale-gp-beta 0.1.0a12__py3-none-any.whl → 0.1.0a14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scale_gp_beta/__init__.py +5 -0
- scale_gp_beta/_utils/_proxy.py +4 -1
- scale_gp_beta/_utils/_resources_proxy.py +24 -0
- scale_gp_beta/_version.py +1 -1
- scale_gp_beta/pagination.py +39 -1
- scale_gp_beta/resources/chat/completions.py +136 -1
- scale_gp_beta/resources/dataset_items.py +29 -21
- scale_gp_beta/resources/datasets.py +18 -5
- scale_gp_beta/resources/evaluation_items.py +11 -7
- scale_gp_beta/resources/evaluations.py +142 -21
- scale_gp_beta/resources/files/files.py +10 -5
- scale_gp_beta/resources/models.py +35 -35
- scale_gp_beta/resources/spans.py +312 -24
- scale_gp_beta/types/__init__.py +9 -2
- scale_gp_beta/types/chat/__init__.py +3 -0
- scale_gp_beta/types/chat/completion_models_params.py +31 -0
- scale_gp_beta/types/{dataset_item_batch_create_response.py → chat/completion_models_response.py} +5 -5
- scale_gp_beta/types/chat/model_definition.py +32 -0
- scale_gp_beta/types/component.py +18 -0
- scale_gp_beta/types/component_param.py +19 -0
- scale_gp_beta/types/container.py +35 -0
- scale_gp_beta/types/container_param.py +28 -0
- scale_gp_beta/types/dataset_item.py +2 -0
- scale_gp_beta/types/dataset_item_list_params.py +7 -6
- scale_gp_beta/types/dataset_item_retrieve_params.py +1 -2
- scale_gp_beta/types/dataset_list_params.py +10 -4
- scale_gp_beta/types/evaluation.py +12 -2
- scale_gp_beta/types/evaluation_create_params.py +5 -5
- scale_gp_beta/types/{evaluation_archive_response.py → evaluation_delete_response.py} +2 -2
- scale_gp_beta/types/evaluation_item_list_params.py +6 -5
- scale_gp_beta/types/evaluation_list_params.py +9 -3
- scale_gp_beta/types/evaluation_task.py +139 -33
- scale_gp_beta/types/evaluation_task_param.py +88 -33
- scale_gp_beta/types/evaluation_update_params.py +17 -0
- scale_gp_beta/types/file_list_params.py +5 -4
- scale_gp_beta/types/inference_model.py +0 -4
- scale_gp_beta/types/item_locator.py +7 -0
- scale_gp_beta/types/item_locator_template.py +7 -0
- scale_gp_beta/types/model_list_params.py +17 -18
- scale_gp_beta/types/span.py +40 -1
- scale_gp_beta/types/span_batch_params.py +130 -0
- scale_gp_beta/types/span_create_params.py +71 -3
- scale_gp_beta/types/span_list_params.py +7 -6
- scale_gp_beta/types/span_update_params.py +5 -3
- {scale_gp_beta-0.1.0a12.dist-info → scale_gp_beta-0.1.0a14.dist-info}/METADATA +1 -1
- {scale_gp_beta-0.1.0a12.dist-info → scale_gp_beta-0.1.0a14.dist-info}/RECORD +48 -37
- {scale_gp_beta-0.1.0a12.dist-info → scale_gp_beta-0.1.0a14.dist-info}/WHEEL +0 -0
- {scale_gp_beta-0.1.0a12.dist-info → scale_gp_beta-0.1.0a14.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing_extensions import Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import Field as FieldInfo
|
|
6
|
+
|
|
7
|
+
from ..._models import BaseModel
|
|
8
|
+
|
|
9
|
+
__all__ = ["ModelDefinition"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ModelDefinition(BaseModel):
|
|
13
|
+
api_model_name: str = FieldInfo(alias="model_name")
|
|
14
|
+
"""model name, for example `gpt-4o`"""
|
|
15
|
+
|
|
16
|
+
api_model_type: Literal["generic", "completion", "chat_completion"] = FieldInfo(alias="model_type")
|
|
17
|
+
"""model type, for example `chat_completion`"""
|
|
18
|
+
|
|
19
|
+
api_model_vendor: Literal[
|
|
20
|
+
"openai",
|
|
21
|
+
"cohere",
|
|
22
|
+
"vertex_ai",
|
|
23
|
+
"anthropic",
|
|
24
|
+
"azure",
|
|
25
|
+
"gemini",
|
|
26
|
+
"launch",
|
|
27
|
+
"llmengine",
|
|
28
|
+
"model_zoo",
|
|
29
|
+
"bedrock",
|
|
30
|
+
"xai",
|
|
31
|
+
] = FieldInfo(alias="model_vendor")
|
|
32
|
+
"""model vendor, for example `openai`"""
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from .._models import BaseModel
|
|
6
|
+
from .item_locator import ItemLocator
|
|
7
|
+
|
|
8
|
+
__all__ = ["Component"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Component(BaseModel):
|
|
12
|
+
data: ItemLocator
|
|
13
|
+
"""
|
|
14
|
+
A pointer to the data in each evaluation item to be displayed within the
|
|
15
|
+
component
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
label: Optional[str] = None
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Required, TypedDict
|
|
6
|
+
|
|
7
|
+
from .item_locator import ItemLocator
|
|
8
|
+
|
|
9
|
+
__all__ = ["ComponentParam"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ComponentParam(TypedDict, total=False):
|
|
13
|
+
data: Required[ItemLocator]
|
|
14
|
+
"""
|
|
15
|
+
A pointer to the data in each evaluation item to be displayed within the
|
|
16
|
+
component
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
label: str
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, List, Union, Optional
|
|
6
|
+
from typing_extensions import Literal, TypeAlias, TypeAliasType
|
|
7
|
+
|
|
8
|
+
from .._compat import PYDANTIC_V2
|
|
9
|
+
from .._models import BaseModel
|
|
10
|
+
from .component import Component
|
|
11
|
+
|
|
12
|
+
__all__ = ["Container", "Child"]
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING or PYDANTIC_V2:
|
|
15
|
+
Child = TypeAliasType("Child", Union["Container", Component])
|
|
16
|
+
else:
|
|
17
|
+
Child: TypeAlias = Union["Container", Component]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Container(BaseModel):
|
|
21
|
+
children: List[Child]
|
|
22
|
+
"""The children to be displayed within the container"""
|
|
23
|
+
|
|
24
|
+
direction: Optional[Literal["row", "column"]] = None
|
|
25
|
+
"""The axis that children are placed in the container.
|
|
26
|
+
|
|
27
|
+
Based on CSS `flex-direction` (see:
|
|
28
|
+
https://developer.mozilla.org/en-US/docs/Web/CSS/flex-direction)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
if PYDANTIC_V2:
|
|
33
|
+
Container.model_rebuild()
|
|
34
|
+
else:
|
|
35
|
+
Container.update_forward_refs() # type: ignore
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Union, Iterable
|
|
6
|
+
from typing_extensions import Literal, Required, TypeAlias, TypedDict, TypeAliasType
|
|
7
|
+
|
|
8
|
+
from .._compat import PYDANTIC_V2
|
|
9
|
+
from .component_param import ComponentParam
|
|
10
|
+
|
|
11
|
+
__all__ = ["ContainerParam", "Child"]
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING or PYDANTIC_V2:
|
|
14
|
+
Child = TypeAliasType("Child", Union["ContainerParam", ComponentParam])
|
|
15
|
+
else:
|
|
16
|
+
Child: TypeAlias = Union["ContainerParam", ComponentParam]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ContainerParam(TypedDict, total=False):
|
|
20
|
+
children: Required[Iterable[Child]]
|
|
21
|
+
"""The children to be displayed within the container"""
|
|
22
|
+
|
|
23
|
+
direction: Literal["row", "column"]
|
|
24
|
+
"""The axis that children are placed in the container.
|
|
25
|
+
|
|
26
|
+
Based on CSS `flex-direction` (see:
|
|
27
|
+
https://developer.mozilla.org/en-US/docs/Web/CSS/flex-direction)
|
|
28
|
+
"""
|
|
@@ -2,28 +2,29 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from typing_extensions import TypedDict
|
|
5
|
+
from typing_extensions import Literal, TypedDict
|
|
7
6
|
|
|
8
7
|
__all__ = ["DatasetItemListParams"]
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
class DatasetItemListParams(TypedDict, total=False):
|
|
12
|
-
dataset_id:
|
|
11
|
+
dataset_id: str
|
|
13
12
|
"""Optional dataset identifier.
|
|
14
13
|
|
|
15
14
|
Must be provided if a specific version is requested.
|
|
16
15
|
"""
|
|
17
16
|
|
|
18
|
-
ending_before:
|
|
17
|
+
ending_before: str
|
|
19
18
|
|
|
20
19
|
include_archived: bool
|
|
21
20
|
|
|
22
21
|
limit: int
|
|
23
22
|
|
|
24
|
-
|
|
23
|
+
sort_order: Literal["asc", "desc"]
|
|
25
24
|
|
|
26
|
-
|
|
25
|
+
starting_after: str
|
|
26
|
+
|
|
27
|
+
version: int
|
|
27
28
|
"""Optional dataset version.
|
|
28
29
|
|
|
29
30
|
When unset, returns the latest version. Requires a valid dataset_id when set.
|
|
@@ -2,12 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import Optional
|
|
6
5
|
from typing_extensions import TypedDict
|
|
7
6
|
|
|
8
7
|
__all__ = ["DatasetItemRetrieveParams"]
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
class DatasetItemRetrieveParams(TypedDict, total=False):
|
|
12
|
-
version:
|
|
11
|
+
version: int
|
|
13
12
|
"""Optional dataset version. When unset, returns the latest version."""
|
|
@@ -2,17 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import
|
|
6
|
-
from typing_extensions import TypedDict
|
|
5
|
+
from typing import List
|
|
6
|
+
from typing_extensions import Literal, TypedDict
|
|
7
7
|
|
|
8
8
|
__all__ = ["DatasetListParams"]
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class DatasetListParams(TypedDict, total=False):
|
|
12
|
-
ending_before:
|
|
12
|
+
ending_before: str
|
|
13
13
|
|
|
14
14
|
include_archived: bool
|
|
15
15
|
|
|
16
16
|
limit: int
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
name: str
|
|
19
|
+
|
|
20
|
+
sort_order: Literal["asc", "desc"]
|
|
21
|
+
|
|
22
|
+
starting_after: str
|
|
23
|
+
|
|
24
|
+
tags: List[str]
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
from typing import List, Optional
|
|
4
6
|
from datetime import datetime
|
|
5
7
|
from typing_extensions import Literal
|
|
6
8
|
|
|
7
9
|
from .dataset import Dataset
|
|
10
|
+
from .._compat import PYDANTIC_V2
|
|
8
11
|
from .._models import BaseModel
|
|
9
|
-
from .evaluation_task import EvaluationTask
|
|
10
12
|
|
|
11
13
|
__all__ = ["Evaluation"]
|
|
12
14
|
|
|
@@ -33,5 +35,13 @@ class Evaluation(BaseModel):
|
|
|
33
35
|
|
|
34
36
|
object: Optional[Literal["evaluation"]] = None
|
|
35
37
|
|
|
36
|
-
tasks: Optional[List[EvaluationTask]] = None
|
|
38
|
+
tasks: Optional[List["EvaluationTask"]] = None
|
|
37
39
|
"""Tasks executed during evaluation. Populated with optional `task` view."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
from .evaluation_task import EvaluationTask
|
|
43
|
+
|
|
44
|
+
if PYDANTIC_V2:
|
|
45
|
+
Evaluation.model_rebuild()
|
|
46
|
+
else:
|
|
47
|
+
Evaluation.update_forward_refs() # type: ignore
|
|
@@ -5,8 +5,6 @@ from __future__ import annotations
|
|
|
5
5
|
from typing import Dict, List, Union, Iterable
|
|
6
6
|
from typing_extensions import Required, TypeAlias, TypedDict
|
|
7
7
|
|
|
8
|
-
from .evaluation_task_param import EvaluationTaskParam
|
|
9
|
-
|
|
10
8
|
__all__ = [
|
|
11
9
|
"EvaluationCreateParams",
|
|
12
10
|
"EvaluationStandaloneCreateRequest",
|
|
@@ -28,7 +26,7 @@ class EvaluationStandaloneCreateRequest(TypedDict, total=False):
|
|
|
28
26
|
tags: List[str]
|
|
29
27
|
"""The tags associated with the entity"""
|
|
30
28
|
|
|
31
|
-
tasks: Iterable[EvaluationTaskParam]
|
|
29
|
+
tasks: Iterable["EvaluationTaskParam"]
|
|
32
30
|
"""Tasks allow you to augment and evaluate your data"""
|
|
33
31
|
|
|
34
32
|
|
|
@@ -46,7 +44,7 @@ class EvaluationFromDatasetCreateRequest(TypedDict, total=False):
|
|
|
46
44
|
tags: List[str]
|
|
47
45
|
"""The tags associated with the entity"""
|
|
48
46
|
|
|
49
|
-
tasks: Iterable[EvaluationTaskParam]
|
|
47
|
+
tasks: Iterable["EvaluationTaskParam"]
|
|
50
48
|
"""Tasks allow you to augment and evaluate your data"""
|
|
51
49
|
|
|
52
50
|
|
|
@@ -73,7 +71,7 @@ class EvaluationWithDatasetCreateRequest(TypedDict, total=False):
|
|
|
73
71
|
tags: List[str]
|
|
74
72
|
"""The tags associated with the entity"""
|
|
75
73
|
|
|
76
|
-
tasks: Iterable[EvaluationTaskParam]
|
|
74
|
+
tasks: Iterable["EvaluationTaskParam"]
|
|
77
75
|
"""Tasks allow you to augment and evaluate your data"""
|
|
78
76
|
|
|
79
77
|
|
|
@@ -95,3 +93,5 @@ class EvaluationWithDatasetCreateRequestDataset(TypedDict, total=False):
|
|
|
95
93
|
EvaluationCreateParams: TypeAlias = Union[
|
|
96
94
|
EvaluationStandaloneCreateRequest, EvaluationFromDatasetCreateRequest, EvaluationWithDatasetCreateRequest
|
|
97
95
|
]
|
|
96
|
+
|
|
97
|
+
from .evaluation_task_param import EvaluationTaskParam
|
|
@@ -5,10 +5,10 @@ from typing_extensions import Literal
|
|
|
5
5
|
|
|
6
6
|
from .._models import BaseModel
|
|
7
7
|
|
|
8
|
-
__all__ = ["
|
|
8
|
+
__all__ = ["EvaluationDeleteResponse"]
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
class
|
|
11
|
+
class EvaluationDeleteResponse(BaseModel):
|
|
12
12
|
id: str
|
|
13
13
|
|
|
14
14
|
deleted: bool
|
|
@@ -2,19 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from typing_extensions import TypedDict
|
|
5
|
+
from typing_extensions import Literal, TypedDict
|
|
7
6
|
|
|
8
7
|
__all__ = ["EvaluationItemListParams"]
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
class EvaluationItemListParams(TypedDict, total=False):
|
|
12
|
-
ending_before:
|
|
11
|
+
ending_before: str
|
|
13
12
|
|
|
14
|
-
evaluation_id:
|
|
13
|
+
evaluation_id: str
|
|
15
14
|
|
|
16
15
|
include_archived: bool
|
|
17
16
|
|
|
18
17
|
limit: int
|
|
19
18
|
|
|
20
|
-
|
|
19
|
+
sort_order: Literal["asc", "desc"]
|
|
20
|
+
|
|
21
|
+
starting_after: str
|
|
@@ -2,19 +2,25 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import List
|
|
5
|
+
from typing import List
|
|
6
6
|
from typing_extensions import Literal, TypedDict
|
|
7
7
|
|
|
8
8
|
__all__ = ["EvaluationListParams"]
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class EvaluationListParams(TypedDict, total=False):
|
|
12
|
-
ending_before:
|
|
12
|
+
ending_before: str
|
|
13
13
|
|
|
14
14
|
include_archived: bool
|
|
15
15
|
|
|
16
16
|
limit: int
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
name: str
|
|
19
|
+
|
|
20
|
+
sort_order: Literal["asc", "desc"]
|
|
21
|
+
|
|
22
|
+
starting_after: str
|
|
23
|
+
|
|
24
|
+
tags: List[str]
|
|
19
25
|
|
|
20
26
|
views: List[Literal["tasks"]]
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
from typing import TYPE_CHECKING, Dict, List, Union, Optional
|
|
4
6
|
from typing_extensions import Literal, Annotated, TypeAlias
|
|
5
7
|
|
|
6
8
|
from .._utils import PropertyInfo
|
|
9
|
+
from .._compat import PYDANTIC_V2
|
|
7
10
|
from .._models import BaseModel
|
|
11
|
+
from .item_locator import ItemLocator
|
|
8
12
|
|
|
9
13
|
__all__ = [
|
|
10
14
|
"EvaluationTask",
|
|
@@ -30,63 +34,67 @@ __all__ = [
|
|
|
30
34
|
"MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator",
|
|
31
35
|
"MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator",
|
|
32
36
|
"MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator",
|
|
37
|
+
"AutoEvaluationQuestionTask",
|
|
38
|
+
"AutoEvaluationQuestionTaskConfiguration",
|
|
39
|
+
"ContributorEvaluationQuestionTask",
|
|
40
|
+
"ContributorEvaluationQuestionTaskConfiguration",
|
|
33
41
|
]
|
|
34
42
|
|
|
35
43
|
|
|
36
44
|
class ChatCompletionEvaluationTaskConfiguration(BaseModel):
|
|
37
|
-
messages: Union[List[Dict[str, object]],
|
|
45
|
+
messages: Union[List[Dict[str, object]], ItemLocator]
|
|
38
46
|
|
|
39
47
|
model: str
|
|
40
48
|
|
|
41
|
-
audio: Union[Dict[str, object],
|
|
49
|
+
audio: Union[Dict[str, object], ItemLocator, None] = None
|
|
42
50
|
|
|
43
|
-
frequency_penalty: Union[float,
|
|
51
|
+
frequency_penalty: Union[float, ItemLocator, None] = None
|
|
44
52
|
|
|
45
|
-
function_call: Union[Dict[str, object],
|
|
53
|
+
function_call: Union[Dict[str, object], ItemLocator, None] = None
|
|
46
54
|
|
|
47
|
-
functions: Union[List[Dict[str, object]],
|
|
55
|
+
functions: Union[List[Dict[str, object]], ItemLocator, None] = None
|
|
48
56
|
|
|
49
|
-
logit_bias: Union[Dict[str, int],
|
|
57
|
+
logit_bias: Union[Dict[str, int], ItemLocator, None] = None
|
|
50
58
|
|
|
51
|
-
logprobs: Union[bool,
|
|
59
|
+
logprobs: Union[bool, ItemLocator, None] = None
|
|
52
60
|
|
|
53
|
-
max_completion_tokens: Union[int,
|
|
61
|
+
max_completion_tokens: Union[int, ItemLocator, None] = None
|
|
54
62
|
|
|
55
|
-
max_tokens: Union[int,
|
|
63
|
+
max_tokens: Union[int, ItemLocator, None] = None
|
|
56
64
|
|
|
57
|
-
metadata: Union[Dict[str, str],
|
|
65
|
+
metadata: Union[Dict[str, str], ItemLocator, None] = None
|
|
58
66
|
|
|
59
|
-
modalities: Union[List[str],
|
|
67
|
+
modalities: Union[List[str], ItemLocator, None] = None
|
|
60
68
|
|
|
61
|
-
n: Union[int,
|
|
69
|
+
n: Union[int, ItemLocator, None] = None
|
|
62
70
|
|
|
63
|
-
parallel_tool_calls: Union[bool,
|
|
71
|
+
parallel_tool_calls: Union[bool, ItemLocator, None] = None
|
|
64
72
|
|
|
65
|
-
prediction: Union[Dict[str, object],
|
|
73
|
+
prediction: Union[Dict[str, object], ItemLocator, None] = None
|
|
66
74
|
|
|
67
|
-
presence_penalty: Union[float,
|
|
75
|
+
presence_penalty: Union[float, ItemLocator, None] = None
|
|
68
76
|
|
|
69
77
|
reasoning_effort: Optional[str] = None
|
|
70
78
|
|
|
71
|
-
response_format: Union[Dict[str, object],
|
|
79
|
+
response_format: Union[Dict[str, object], ItemLocator, None] = None
|
|
72
80
|
|
|
73
|
-
seed: Union[int,
|
|
81
|
+
seed: Union[int, ItemLocator, None] = None
|
|
74
82
|
|
|
75
83
|
stop: Optional[str] = None
|
|
76
84
|
|
|
77
|
-
store: Union[bool,
|
|
85
|
+
store: Union[bool, ItemLocator, None] = None
|
|
78
86
|
|
|
79
|
-
temperature: Union[float,
|
|
87
|
+
temperature: Union[float, ItemLocator, None] = None
|
|
80
88
|
|
|
81
89
|
tool_choice: Optional[str] = None
|
|
82
90
|
|
|
83
|
-
tools: Union[List[Dict[str, object]],
|
|
91
|
+
tools: Union[List[Dict[str, object]], ItemLocator, None] = None
|
|
84
92
|
|
|
85
|
-
top_k: Union[int,
|
|
93
|
+
top_k: Union[int, ItemLocator, None] = None
|
|
86
94
|
|
|
87
|
-
top_logprobs: Union[int,
|
|
95
|
+
top_logprobs: Union[int, ItemLocator, None] = None
|
|
88
96
|
|
|
89
|
-
top_p: Union[float,
|
|
97
|
+
top_p: Union[float, ItemLocator, None] = None
|
|
90
98
|
|
|
91
99
|
if TYPE_CHECKING:
|
|
92
100
|
# Stub to indicate that arbitrary properties are accepted.
|
|
@@ -99,7 +107,7 @@ class ChatCompletionEvaluationTask(BaseModel):
|
|
|
99
107
|
configuration: ChatCompletionEvaluationTaskConfiguration
|
|
100
108
|
|
|
101
109
|
alias: Optional[str] = None
|
|
102
|
-
"""Alias to title the results column. Defaults to the `
|
|
110
|
+
"""Alias to title the results column. Defaults to the `chat_completion`"""
|
|
103
111
|
|
|
104
112
|
task_type: Optional[Literal["chat_completion"]] = None
|
|
105
113
|
|
|
@@ -111,14 +119,14 @@ class GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInf
|
|
|
111
119
|
|
|
112
120
|
|
|
113
121
|
GenericInferenceEvaluationTaskConfigurationInferenceConfiguration: TypeAlias = Union[
|
|
114
|
-
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration,
|
|
122
|
+
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration, ItemLocator
|
|
115
123
|
]
|
|
116
124
|
|
|
117
125
|
|
|
118
126
|
class GenericInferenceEvaluationTaskConfiguration(BaseModel):
|
|
119
127
|
model: str
|
|
120
128
|
|
|
121
|
-
args: Union[Dict[str, object],
|
|
129
|
+
args: Union[Dict[str, object], ItemLocator, None] = None
|
|
122
130
|
|
|
123
131
|
inference_configuration: Optional[GenericInferenceEvaluationTaskConfigurationInferenceConfiguration] = None
|
|
124
132
|
|
|
@@ -127,7 +135,7 @@ class GenericInferenceEvaluationTask(BaseModel):
|
|
|
127
135
|
configuration: GenericInferenceEvaluationTaskConfiguration
|
|
128
136
|
|
|
129
137
|
alias: Optional[str] = None
|
|
130
|
-
"""Alias to title the results column. Defaults to the `
|
|
138
|
+
"""Alias to title the results column. Defaults to the `inference`"""
|
|
131
139
|
|
|
132
140
|
task_type: Optional[Literal["inference"]] = None
|
|
133
141
|
|
|
@@ -178,22 +186,24 @@ class ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplication
|
|
|
178
186
|
List[ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace]
|
|
179
187
|
] = None
|
|
180
188
|
|
|
189
|
+
return_span: Optional[bool] = None
|
|
190
|
+
|
|
181
191
|
use_channels: Optional[bool] = None
|
|
182
192
|
|
|
183
193
|
|
|
184
194
|
ApplicationVariantV1EvaluationTaskConfigurationOverrides: TypeAlias = Union[
|
|
185
|
-
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides,
|
|
195
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides, ItemLocator
|
|
186
196
|
]
|
|
187
197
|
|
|
188
198
|
|
|
189
199
|
class ApplicationVariantV1EvaluationTaskConfiguration(BaseModel):
|
|
190
200
|
application_variant_id: str
|
|
191
201
|
|
|
192
|
-
inputs: Union[Dict[str, object],
|
|
202
|
+
inputs: Union[Dict[str, object], ItemLocator]
|
|
193
203
|
|
|
194
|
-
history: Union[List[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0],
|
|
204
|
+
history: Union[List[ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0], ItemLocator, None] = None
|
|
195
205
|
|
|
196
|
-
operation_metadata: Union[Dict[str, object],
|
|
206
|
+
operation_metadata: Union[Dict[str, object], ItemLocator, None] = None
|
|
197
207
|
|
|
198
208
|
overrides: Optional[ApplicationVariantV1EvaluationTaskConfigurationOverrides] = None
|
|
199
209
|
"""Execution override options for agentic applications"""
|
|
@@ -203,7 +213,7 @@ class ApplicationVariantV1EvaluationTask(BaseModel):
|
|
|
203
213
|
configuration: ApplicationVariantV1EvaluationTaskConfiguration
|
|
204
214
|
|
|
205
215
|
alias: Optional[str] = None
|
|
206
|
-
"""Alias to title the results column. Defaults to the `
|
|
216
|
+
"""Alias to title the results column. Defaults to the `application_variant`"""
|
|
207
217
|
|
|
208
218
|
task_type: Optional[Literal["application_variant"]] = None
|
|
209
219
|
|
|
@@ -282,17 +292,113 @@ class MetricEvaluationTask(BaseModel):
|
|
|
282
292
|
configuration: MetricEvaluationTaskConfiguration
|
|
283
293
|
|
|
284
294
|
alias: Optional[str] = None
|
|
285
|
-
"""Alias to title the results column.
|
|
295
|
+
"""Alias to title the results column.
|
|
296
|
+
|
|
297
|
+
Defaults to the metric type specified in the configuration
|
|
298
|
+
"""
|
|
286
299
|
|
|
287
300
|
task_type: Optional[Literal["metric"]] = None
|
|
288
301
|
|
|
289
302
|
|
|
303
|
+
class AutoEvaluationQuestionTaskConfiguration(BaseModel):
|
|
304
|
+
model: str
|
|
305
|
+
"""model specified as `model_vendor/model_name`"""
|
|
306
|
+
|
|
307
|
+
prompt: str
|
|
308
|
+
|
|
309
|
+
question_id: str
|
|
310
|
+
"""question to be evaluated"""
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class AutoEvaluationQuestionTask(BaseModel):
|
|
314
|
+
configuration: AutoEvaluationQuestionTaskConfiguration
|
|
315
|
+
|
|
316
|
+
alias: Optional[str] = None
|
|
317
|
+
"""Alias to title the results column. Defaults to the `auto_evaluation_question`"""
|
|
318
|
+
|
|
319
|
+
task_type: Optional[Literal["auto_evaluation.question"]] = None
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
class ContributorEvaluationQuestionTaskConfiguration(BaseModel):
|
|
323
|
+
layout: "Container"
|
|
324
|
+
|
|
325
|
+
question_id: str
|
|
326
|
+
|
|
327
|
+
queue_id: Optional[str] = None
|
|
328
|
+
"""The contributor annotation queue to include this task in. Defaults to `default`"""
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class ContributorEvaluationQuestionTask(BaseModel):
|
|
332
|
+
configuration: ContributorEvaluationQuestionTaskConfiguration
|
|
333
|
+
|
|
334
|
+
alias: Optional[str] = None
|
|
335
|
+
"""Alias to title the results column.
|
|
336
|
+
|
|
337
|
+
Defaults to the `contributor_evaluation_question`
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
task_type: Optional[Literal["contributor_evaluation.question"]] = None
|
|
341
|
+
|
|
342
|
+
|
|
290
343
|
EvaluationTask: TypeAlias = Annotated[
|
|
291
344
|
Union[
|
|
292
345
|
ChatCompletionEvaluationTask,
|
|
293
346
|
GenericInferenceEvaluationTask,
|
|
294
347
|
ApplicationVariantV1EvaluationTask,
|
|
295
348
|
MetricEvaluationTask,
|
|
349
|
+
AutoEvaluationQuestionTask,
|
|
350
|
+
ContributorEvaluationQuestionTask,
|
|
296
351
|
],
|
|
297
352
|
PropertyInfo(discriminator="task_type"),
|
|
298
353
|
]
|
|
354
|
+
|
|
355
|
+
from .container import Container
|
|
356
|
+
|
|
357
|
+
if PYDANTIC_V2:
|
|
358
|
+
ChatCompletionEvaluationTask.model_rebuild()
|
|
359
|
+
ChatCompletionEvaluationTaskConfiguration.model_rebuild()
|
|
360
|
+
GenericInferenceEvaluationTask.model_rebuild()
|
|
361
|
+
GenericInferenceEvaluationTaskConfiguration.model_rebuild()
|
|
362
|
+
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration.model_rebuild()
|
|
363
|
+
ApplicationVariantV1EvaluationTask.model_rebuild()
|
|
364
|
+
ApplicationVariantV1EvaluationTaskConfiguration.model_rebuild()
|
|
365
|
+
ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0.model_rebuild()
|
|
366
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides.model_rebuild()
|
|
367
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState.model_rebuild()
|
|
368
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace.model_rebuild()
|
|
369
|
+
MetricEvaluationTask.model_rebuild()
|
|
370
|
+
MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator.model_rebuild()
|
|
371
|
+
MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator.model_rebuild()
|
|
372
|
+
MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator.model_rebuild()
|
|
373
|
+
MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator.model_rebuild()
|
|
374
|
+
MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator.model_rebuild()
|
|
375
|
+
MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator.model_rebuild()
|
|
376
|
+
MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator.model_rebuild()
|
|
377
|
+
AutoEvaluationQuestionTask.model_rebuild()
|
|
378
|
+
AutoEvaluationQuestionTaskConfiguration.model_rebuild()
|
|
379
|
+
ContributorEvaluationQuestionTask.model_rebuild()
|
|
380
|
+
ContributorEvaluationQuestionTaskConfiguration.model_rebuild()
|
|
381
|
+
else:
|
|
382
|
+
ChatCompletionEvaluationTask.update_forward_refs() # type: ignore
|
|
383
|
+
ChatCompletionEvaluationTaskConfiguration.update_forward_refs() # type: ignore
|
|
384
|
+
GenericInferenceEvaluationTask.update_forward_refs() # type: ignore
|
|
385
|
+
GenericInferenceEvaluationTaskConfiguration.update_forward_refs() # type: ignore
|
|
386
|
+
GenericInferenceEvaluationTaskConfigurationInferenceConfigurationLaunchInferenceConfiguration.update_forward_refs() # type: ignore
|
|
387
|
+
ApplicationVariantV1EvaluationTask.update_forward_refs() # type: ignore
|
|
388
|
+
ApplicationVariantV1EvaluationTaskConfiguration.update_forward_refs() # type: ignore
|
|
389
|
+
ApplicationVariantV1EvaluationTaskConfigurationHistoryUnionMember0.update_forward_refs() # type: ignore
|
|
390
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverrides.update_forward_refs() # type: ignore
|
|
391
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesInitialState.update_forward_refs() # type: ignore
|
|
392
|
+
ApplicationVariantV1EvaluationTaskConfigurationOverridesAgenticApplicationOverridesPartialTrace.update_forward_refs() # type: ignore
|
|
393
|
+
MetricEvaluationTask.update_forward_refs() # type: ignore
|
|
394
|
+
MetricEvaluationTaskConfigurationBleuScorerConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
395
|
+
MetricEvaluationTaskConfigurationMeteorScorerConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
396
|
+
MetricEvaluationTaskConfigurationCosineSimilarityScorerConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
397
|
+
MetricEvaluationTaskConfigurationF1ScorerConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
398
|
+
MetricEvaluationTaskConfigurationRougeScorer1ConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
399
|
+
MetricEvaluationTaskConfigurationRougeScorer2ConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
400
|
+
MetricEvaluationTaskConfigurationRougeScorerLConfigWithItemLocator.update_forward_refs() # type: ignore
|
|
401
|
+
AutoEvaluationQuestionTask.update_forward_refs() # type: ignore
|
|
402
|
+
AutoEvaluationQuestionTaskConfiguration.update_forward_refs() # type: ignore
|
|
403
|
+
ContributorEvaluationQuestionTask.update_forward_refs() # type: ignore
|
|
404
|
+
ContributorEvaluationQuestionTaskConfiguration.update_forward_refs() # type: ignore
|