agenta 0.12.7__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of agenta might be problematic. Click here for more details.

Files changed (88) hide show
  1. agenta/__init__.py +3 -1
  2. agenta/cli/helper.py +1 -1
  3. agenta/cli/main.py +1 -1
  4. agenta/cli/variant_commands.py +7 -5
  5. agenta/client/api.py +1 -1
  6. agenta/client/backend/__init__.py +78 -18
  7. agenta/client/backend/client.py +1031 -5526
  8. agenta/client/backend/resources/__init__.py +31 -0
  9. agenta/client/backend/resources/apps/__init__.py +1 -0
  10. agenta/client/backend/resources/apps/client.py +977 -0
  11. agenta/client/backend/resources/bases/__init__.py +1 -0
  12. agenta/client/backend/resources/bases/client.py +127 -0
  13. agenta/client/backend/resources/configs/__init__.py +1 -0
  14. agenta/client/backend/resources/configs/client.py +377 -0
  15. agenta/client/backend/resources/containers/__init__.py +5 -0
  16. agenta/client/backend/resources/containers/client.py +383 -0
  17. agenta/client/backend/resources/containers/types/__init__.py +5 -0
  18. agenta/client/backend/{types → resources/containers/types}/container_templates_response.py +1 -1
  19. agenta/client/backend/resources/environments/__init__.py +1 -0
  20. agenta/client/backend/resources/environments/client.py +131 -0
  21. agenta/client/backend/resources/evaluations/__init__.py +1 -0
  22. agenta/client/backend/resources/evaluations/client.py +1008 -0
  23. agenta/client/backend/resources/evaluators/__init__.py +1 -0
  24. agenta/client/backend/resources/evaluators/client.py +594 -0
  25. agenta/client/backend/resources/observability/__init__.py +1 -0
  26. agenta/client/backend/resources/observability/client.py +1184 -0
  27. agenta/client/backend/resources/testsets/__init__.py +1 -0
  28. agenta/client/backend/resources/testsets/client.py +689 -0
  29. agenta/client/backend/resources/variants/__init__.py +5 -0
  30. agenta/client/backend/resources/variants/client.py +796 -0
  31. agenta/client/backend/resources/variants/types/__init__.py +7 -0
  32. agenta/client/backend/resources/variants/types/add_variant_from_base_and_config_response.py +7 -0
  33. agenta/client/backend/types/__init__.py +54 -22
  34. agenta/client/backend/types/aggregated_result.py +2 -2
  35. agenta/client/backend/types/aggregated_result_evaluator_config.py +9 -0
  36. agenta/client/backend/types/{app_variant_output.py → app_variant_response.py} +4 -2
  37. agenta/client/backend/types/{trace.py → create_span.py} +20 -10
  38. agenta/client/backend/types/create_trace_response.py +37 -0
  39. agenta/client/backend/types/environment_output.py +3 -1
  40. agenta/client/backend/types/environment_output_extended.py +45 -0
  41. agenta/client/backend/types/environment_revision.py +41 -0
  42. agenta/client/backend/types/error.py +37 -0
  43. agenta/client/backend/types/evaluation.py +6 -3
  44. agenta/client/backend/types/evaluation_scenario_output.py +4 -2
  45. agenta/client/backend/types/{delete_evaluation.py → evaluation_scenario_score_update.py} +2 -2
  46. agenta/client/backend/types/evaluation_status_enum.py +4 -0
  47. agenta/client/backend/types/evaluator.py +1 -0
  48. agenta/client/backend/types/{get_config_reponse.py → get_config_response.py} +1 -2
  49. agenta/client/backend/types/human_evaluation_scenario.py +2 -2
  50. agenta/client/backend/types/{app_variant_output_extended.py → human_evaluation_scenario_update.py} +11 -16
  51. agenta/client/backend/types/human_evaluation_update.py +37 -0
  52. agenta/client/backend/types/image.py +1 -0
  53. agenta/client/backend/types/invite_request.py +1 -0
  54. agenta/client/backend/types/{list_api_keys_output.py → list_api_keys_response.py} +1 -1
  55. agenta/client/backend/types/llm_tokens.py +38 -0
  56. agenta/client/backend/types/new_human_evaluation.py +42 -0
  57. agenta/client/backend/types/organization.py +1 -0
  58. agenta/client/backend/types/permission.py +141 -0
  59. agenta/client/backend/types/result.py +2 -0
  60. agenta/client/backend/types/{human_evaluation_scenario_score.py → score.py} +1 -1
  61. agenta/client/backend/types/span.py +18 -16
  62. agenta/client/backend/types/span_detail.py +52 -0
  63. agenta/client/backend/types/span_kind.py +49 -0
  64. agenta/client/backend/types/span_status_code.py +29 -0
  65. agenta/client/backend/types/span_variant.py +38 -0
  66. agenta/client/backend/types/trace_detail.py +52 -0
  67. agenta/client/backend/types/with_pagination.py +40 -0
  68. agenta/client/backend/types/workspace_member_response.py +38 -0
  69. agenta/client/backend/types/workspace_permission.py +40 -0
  70. agenta/client/backend/types/workspace_response.py +44 -0
  71. agenta/client/backend/types/workspace_role.py +41 -0
  72. agenta/client/backend/types/workspace_role_response.py +38 -0
  73. agenta/docker/docker_utils.py +1 -5
  74. agenta/sdk/__init__.py +3 -1
  75. agenta/sdk/agenta_decorator.py +68 -18
  76. agenta/sdk/agenta_init.py +53 -21
  77. agenta/sdk/tracing/context_manager.py +13 -0
  78. agenta/sdk/tracing/decorators.py +41 -0
  79. agenta/sdk/tracing/llm_tracing.py +220 -0
  80. agenta/sdk/tracing/logger.py +19 -0
  81. agenta/sdk/tracing/tasks_manager.py +130 -0
  82. {agenta-0.12.7.dist-info → agenta-0.13.0.dist-info}/METADATA +47 -96
  83. agenta-0.13.0.dist-info/RECORD +161 -0
  84. agenta/client/backend/types/add_variant_from_base_and_config_response.py +0 -7
  85. agenta/client/backend/types/human_evaluation_scenario_update_score.py +0 -5
  86. agenta-0.12.7.dist-info/RECORD +0 -114
  87. {agenta-0.12.7.dist-info → agenta-0.13.0.dist-info}/WHEEL +0 -0
  88. {agenta-0.12.7.dist-info → agenta-0.13.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,7 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from .add_variant_from_base_and_config_response import (
4
+ AddVariantFromBaseAndConfigResponse,
5
+ )
6
+
7
+ __all__ = ["AddVariantFromBaseAndConfigResponse"]
@@ -0,0 +1,7 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ from ....types.app_variant_response import AppVariantResponse
6
+
7
+ AddVariantFromBaseAndConfigResponse = typing.Union[AppVariantResponse, typing.Any]
@@ -1,113 +1,145 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
- from .add_variant_from_base_and_config_response import (
4
- AddVariantFromBaseAndConfigResponse,
5
- )
6
3
  from .aggregated_result import AggregatedResult
4
+ from .aggregated_result_evaluator_config import AggregatedResultEvaluatorConfig
7
5
  from .app import App
8
- from .app_variant_output import AppVariantOutput
9
- from .app_variant_output_extended import AppVariantOutputExtended
6
+ from .app_variant_response import AppVariantResponse
10
7
  from .app_variant_revision import AppVariantRevision
11
8
  from .base_output import BaseOutput
12
9
  from .body_import_testset import BodyImportTestset
13
10
  from .config_db import ConfigDb
14
- from .container_templates_response import ContainerTemplatesResponse
15
11
  from .create_app_output import CreateAppOutput
16
- from .delete_evaluation import DeleteEvaluation
12
+ from .create_span import CreateSpan
13
+ from .create_trace_response import CreateTraceResponse
17
14
  from .docker_env_vars import DockerEnvVars
18
15
  from .environment_output import EnvironmentOutput
16
+ from .environment_output_extended import EnvironmentOutputExtended
17
+ from .environment_revision import EnvironmentRevision
18
+ from .error import Error
19
19
  from .evaluation import Evaluation
20
20
  from .evaluation_scenario import EvaluationScenario
21
21
  from .evaluation_scenario_input import EvaluationScenarioInput
22
22
  from .evaluation_scenario_output import EvaluationScenarioOutput
23
23
  from .evaluation_scenario_result import EvaluationScenarioResult
24
+ from .evaluation_scenario_score_update import EvaluationScenarioScoreUpdate
24
25
  from .evaluation_status_enum import EvaluationStatusEnum
25
26
  from .evaluation_type import EvaluationType
26
27
  from .evaluation_webhook import EvaluationWebhook
27
28
  from .evaluator import Evaluator
28
29
  from .evaluator_config import EvaluatorConfig
29
30
  from .feedback import Feedback
30
- from .get_config_reponse import GetConfigReponse
31
+ from .get_config_response import GetConfigResponse
31
32
  from .http_validation_error import HttpValidationError
32
33
  from .human_evaluation import HumanEvaluation
33
34
  from .human_evaluation_scenario import HumanEvaluationScenario
34
35
  from .human_evaluation_scenario_input import HumanEvaluationScenarioInput
35
36
  from .human_evaluation_scenario_output import HumanEvaluationScenarioOutput
36
- from .human_evaluation_scenario_score import HumanEvaluationScenarioScore
37
- from .human_evaluation_scenario_update_score import HumanEvaluationScenarioUpdateScore
37
+ from .human_evaluation_scenario_update import HumanEvaluationScenarioUpdate
38
+ from .human_evaluation_update import HumanEvaluationUpdate
38
39
  from .image import Image
39
40
  from .invite_request import InviteRequest
40
- from .list_api_keys_output import ListApiKeysOutput
41
+ from .list_api_keys_response import ListApiKeysResponse
41
42
  from .llm_run_rate_limit import LlmRunRateLimit
43
+ from .llm_tokens import LlmTokens
44
+ from .new_human_evaluation import NewHumanEvaluation
42
45
  from .new_testset import NewTestset
43
46
  from .organization import Organization
44
47
  from .organization_output import OrganizationOutput
48
+ from .permission import Permission
45
49
  from .result import Result
50
+ from .score import Score
46
51
  from .simple_evaluation_output import SimpleEvaluationOutput
47
52
  from .span import Span
53
+ from .span_detail import SpanDetail
54
+ from .span_kind import SpanKind
55
+ from .span_status_code import SpanStatusCode
56
+ from .span_variant import SpanVariant
48
57
  from .template import Template
49
58
  from .template_image_info import TemplateImageInfo
50
59
  from .test_set_output_response import TestSetOutputResponse
51
60
  from .test_set_simple_response import TestSetSimpleResponse
52
- from .trace import Trace
61
+ from .trace_detail import TraceDetail
53
62
  from .uri import Uri
54
63
  from .validation_error import ValidationError
55
64
  from .validation_error_loc_item import ValidationErrorLocItem
56
65
  from .variant_action import VariantAction
57
66
  from .variant_action_enum import VariantActionEnum
67
+ from .with_pagination import WithPagination
68
+ from .workspace_member_response import WorkspaceMemberResponse
69
+ from .workspace_permission import WorkspacePermission
70
+ from .workspace_response import WorkspaceResponse
71
+ from .workspace_role import WorkspaceRole
72
+ from .workspace_role_response import WorkspaceRoleResponse
58
73
 
59
74
  __all__ = [
60
- "AddVariantFromBaseAndConfigResponse",
61
75
  "AggregatedResult",
76
+ "AggregatedResultEvaluatorConfig",
62
77
  "App",
63
- "AppVariantOutput",
64
- "AppVariantOutputExtended",
78
+ "AppVariantResponse",
65
79
  "AppVariantRevision",
66
80
  "BaseOutput",
67
81
  "BodyImportTestset",
68
82
  "ConfigDb",
69
- "ContainerTemplatesResponse",
70
83
  "CreateAppOutput",
71
- "DeleteEvaluation",
84
+ "CreateSpan",
85
+ "CreateTraceResponse",
72
86
  "DockerEnvVars",
73
87
  "EnvironmentOutput",
88
+ "EnvironmentOutputExtended",
89
+ "EnvironmentRevision",
90
+ "Error",
74
91
  "Evaluation",
75
92
  "EvaluationScenario",
76
93
  "EvaluationScenarioInput",
77
94
  "EvaluationScenarioOutput",
78
95
  "EvaluationScenarioResult",
96
+ "EvaluationScenarioScoreUpdate",
79
97
  "EvaluationStatusEnum",
80
98
  "EvaluationType",
81
99
  "EvaluationWebhook",
82
100
  "Evaluator",
83
101
  "EvaluatorConfig",
84
102
  "Feedback",
85
- "GetConfigReponse",
103
+ "GetConfigResponse",
86
104
  "HttpValidationError",
87
105
  "HumanEvaluation",
88
106
  "HumanEvaluationScenario",
89
107
  "HumanEvaluationScenarioInput",
90
108
  "HumanEvaluationScenarioOutput",
91
- "HumanEvaluationScenarioScore",
92
- "HumanEvaluationScenarioUpdateScore",
109
+ "HumanEvaluationScenarioUpdate",
110
+ "HumanEvaluationUpdate",
93
111
  "Image",
94
112
  "InviteRequest",
95
- "ListApiKeysOutput",
113
+ "ListApiKeysResponse",
96
114
  "LlmRunRateLimit",
115
+ "LlmTokens",
116
+ "NewHumanEvaluation",
97
117
  "NewTestset",
98
118
  "Organization",
99
119
  "OrganizationOutput",
120
+ "Permission",
100
121
  "Result",
122
+ "Score",
101
123
  "SimpleEvaluationOutput",
102
124
  "Span",
125
+ "SpanDetail",
126
+ "SpanKind",
127
+ "SpanStatusCode",
128
+ "SpanVariant",
103
129
  "Template",
104
130
  "TemplateImageInfo",
105
131
  "TestSetOutputResponse",
106
132
  "TestSetSimpleResponse",
107
- "Trace",
133
+ "TraceDetail",
108
134
  "Uri",
109
135
  "ValidationError",
110
136
  "ValidationErrorLocItem",
111
137
  "VariantAction",
112
138
  "VariantActionEnum",
139
+ "WithPagination",
140
+ "WorkspaceMemberResponse",
141
+ "WorkspacePermission",
142
+ "WorkspaceResponse",
143
+ "WorkspaceRole",
144
+ "WorkspaceRoleResponse",
113
145
  ]
@@ -4,7 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .evaluator_config import EvaluatorConfig
7
+ from .aggregated_result_evaluator_config import AggregatedResultEvaluatorConfig
8
8
  from .result import Result
9
9
 
10
10
  try:
@@ -14,7 +14,7 @@ except ImportError:
14
14
 
15
15
 
16
16
  class AggregatedResult(pydantic.BaseModel):
17
- evaluator_config: EvaluatorConfig
17
+ evaluator_config: AggregatedResultEvaluatorConfig
18
18
  result: Result
19
19
 
20
20
  def json(self, **kwargs: typing.Any) -> str:
@@ -0,0 +1,9 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import typing
4
+
5
+ from .evaluator_config import EvaluatorConfig
6
+
7
+ AggregatedResultEvaluatorConfig = typing.Union[
8
+ EvaluatorConfig, typing.Dict[str, typing.Any]
9
+ ]
@@ -11,19 +11,21 @@ except ImportError:
11
11
  import pydantic # type: ignore
12
12
 
13
13
 
14
- class AppVariantOutput(pydantic.BaseModel):
14
+ class AppVariantResponse(pydantic.BaseModel):
15
15
  app_id: str
16
16
  app_name: str
17
17
  variant_id: str
18
18
  variant_name: str
19
19
  parameters: typing.Optional[typing.Dict[str, typing.Any]]
20
20
  previous_variant_name: typing.Optional[str]
21
- organization_id: typing.Optional[str]
22
21
  user_id: str
23
22
  base_name: str
24
23
  base_id: str
25
24
  config_name: str
26
25
  uri: typing.Optional[str]
26
+ revision: int
27
+ organization_id: typing.Optional[str]
28
+ workspace_id: typing.Optional[str]
27
29
 
28
30
  def json(self, **kwargs: typing.Any) -> str:
29
31
  kwargs_with_defaults: typing.Any = {
@@ -4,7 +4,9 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .feedback import Feedback
7
+ from .llm_tokens import LlmTokens
8
+ from .span_kind import SpanKind
9
+ from .span_status_code import SpanStatusCode
8
10
 
9
11
  try:
10
12
  import pydantic.v1 as pydantic # type: ignore
@@ -12,19 +14,27 @@ except ImportError:
12
14
  import pydantic # type: ignore
13
15
 
14
16
 
15
- class Trace(pydantic.BaseModel):
17
+ class CreateSpan(pydantic.BaseModel):
18
+ id: str
16
19
  app_id: typing.Optional[str]
17
20
  variant_id: typing.Optional[str]
18
- cost: typing.Optional[float]
19
- latency: float
20
- status: str
21
- token_consumption: typing.Optional[int]
21
+ variant_name: typing.Optional[str]
22
+ inputs: typing.Optional[typing.Dict[str, typing.Any]]
23
+ outputs: typing.Optional[typing.List[str]]
24
+ config: typing.Optional[typing.Dict[str, typing.Any]]
25
+ environment: typing.Optional[str]
22
26
  tags: typing.Optional[typing.List[str]]
27
+ token_consumption: typing.Optional[int]
28
+ name: str
29
+ parent_span_id: typing.Optional[str]
30
+ attributes: typing.Optional[typing.Dict[str, typing.Any]]
31
+ spankind: str
32
+ status: str
33
+ user: typing.Optional[str]
23
34
  start_time: dt.datetime
24
- end_time: dt.datetime
25
- trace_id: str
26
- spans: typing.List[str]
27
- feedbacks: typing.Optional[typing.List[Feedback]]
35
+ end_time: typing.Optional[dt.datetime]
36
+ tokens: typing.Optional[LlmTokens]
37
+ cost: typing.Optional[float]
28
38
 
29
39
  def json(self, **kwargs: typing.Any) -> str:
30
40
  kwargs_with_defaults: typing.Any = {
@@ -0,0 +1,37 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic.v1 as pydantic # type: ignore
10
+ except ImportError:
11
+ import pydantic # type: ignore
12
+
13
+
14
+ class CreateTraceResponse(pydantic.BaseModel):
15
+ message: str
16
+ data: typing.Dict[str, typing.Any]
17
+
18
+ def json(self, **kwargs: typing.Any) -> str:
19
+ kwargs_with_defaults: typing.Any = {
20
+ "by_alias": True,
21
+ "exclude_unset": True,
22
+ **kwargs,
23
+ }
24
+ return super().json(**kwargs_with_defaults)
25
+
26
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
27
+ kwargs_with_defaults: typing.Any = {
28
+ "by_alias": True,
29
+ "exclude_unset": True,
30
+ **kwargs,
31
+ }
32
+ return super().dict(**kwargs_with_defaults)
33
+
34
+ class Config:
35
+ frozen = True
36
+ smart_union = True
37
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -17,7 +17,9 @@ class EnvironmentOutput(pydantic.BaseModel):
17
17
  deployed_app_variant_id: typing.Optional[str]
18
18
  deployed_variant_name: typing.Optional[str]
19
19
  deployed_app_variant_revision_id: typing.Optional[str]
20
- revision: typing.Optional[str]
20
+ revision: typing.Optional[int]
21
+ organization_id: typing.Optional[str]
22
+ workspace_id: typing.Optional[str]
21
23
 
22
24
  def json(self, **kwargs: typing.Any) -> str:
23
25
  kwargs_with_defaults: typing.Any = {
@@ -0,0 +1,45 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .environment_revision import EnvironmentRevision
8
+
9
+ try:
10
+ import pydantic.v1 as pydantic # type: ignore
11
+ except ImportError:
12
+ import pydantic # type: ignore
13
+
14
+
15
+ class EnvironmentOutputExtended(pydantic.BaseModel):
16
+ name: str
17
+ app_id: str
18
+ deployed_app_variant_id: typing.Optional[str]
19
+ deployed_variant_name: typing.Optional[str]
20
+ deployed_app_variant_revision_id: typing.Optional[str]
21
+ revision: typing.Optional[int]
22
+ revisions: typing.List[EnvironmentRevision]
23
+ organization_id: typing.Optional[str]
24
+ workspace_id: typing.Optional[str]
25
+
26
+ def json(self, **kwargs: typing.Any) -> str:
27
+ kwargs_with_defaults: typing.Any = {
28
+ "by_alias": True,
29
+ "exclude_unset": True,
30
+ **kwargs,
31
+ }
32
+ return super().json(**kwargs_with_defaults)
33
+
34
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
35
+ kwargs_with_defaults: typing.Any = {
36
+ "by_alias": True,
37
+ "exclude_unset": True,
38
+ **kwargs,
39
+ }
40
+ return super().dict(**kwargs_with_defaults)
41
+
42
+ class Config:
43
+ frozen = True
44
+ smart_union = True
45
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,41 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic.v1 as pydantic # type: ignore
10
+ except ImportError:
11
+ import pydantic # type: ignore
12
+
13
+
14
+ class EnvironmentRevision(pydantic.BaseModel):
15
+ id: str
16
+ revision: int
17
+ modified_by: str
18
+ deployed_app_variant_revision: typing.Optional[str]
19
+ deployment: typing.Optional[str]
20
+ created_at: dt.datetime
21
+
22
+ def json(self, **kwargs: typing.Any) -> str:
23
+ kwargs_with_defaults: typing.Any = {
24
+ "by_alias": True,
25
+ "exclude_unset": True,
26
+ **kwargs,
27
+ }
28
+ return super().json(**kwargs_with_defaults)
29
+
30
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
31
+ kwargs_with_defaults: typing.Any = {
32
+ "by_alias": True,
33
+ "exclude_unset": True,
34
+ **kwargs,
35
+ }
36
+ return super().dict(**kwargs_with_defaults)
37
+
38
+ class Config:
39
+ frozen = True
40
+ smart_union = True
41
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,37 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic.v1 as pydantic # type: ignore
10
+ except ImportError:
11
+ import pydantic # type: ignore
12
+
13
+
14
+ class Error(pydantic.BaseModel):
15
+ message: str
16
+ stacktrace: typing.Optional[str]
17
+
18
+ def json(self, **kwargs: typing.Any) -> str:
19
+ kwargs_with_defaults: typing.Any = {
20
+ "by_alias": True,
21
+ "exclude_unset": True,
22
+ **kwargs,
23
+ }
24
+ return super().json(**kwargs_with_defaults)
25
+
26
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
27
+ kwargs_with_defaults: typing.Any = {
28
+ "by_alias": True,
29
+ "exclude_unset": True,
30
+ **kwargs,
31
+ }
32
+ return super().dict(**kwargs_with_defaults)
33
+
34
+ class Config:
35
+ frozen = True
36
+ smart_union = True
37
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -5,6 +5,7 @@ import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
7
  from .aggregated_result import AggregatedResult
8
+ from .result import Result
8
9
 
9
10
  try:
10
11
  import pydantic.v1 as pydantic # type: ignore
@@ -21,10 +22,12 @@ class Evaluation(pydantic.BaseModel):
21
22
  variant_names: typing.List[str]
22
23
  variant_revision_ids: typing.List[str]
23
24
  revisions: typing.List[str]
24
- testset_id: str
25
- testset_name: str
26
- status: str
25
+ testset_id: typing.Optional[str]
26
+ testset_name: typing.Optional[str]
27
+ status: Result
27
28
  aggregated_results: typing.List[AggregatedResult]
29
+ average_cost: typing.Optional[Result]
30
+ average_latency: typing.Optional[Result]
28
31
  created_at: dt.datetime
29
32
  updated_at: dt.datetime
30
33
 
@@ -4,6 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
+ from .result import Result
7
8
 
8
9
  try:
9
10
  import pydantic.v1 as pydantic # type: ignore
@@ -12,8 +13,9 @@ except ImportError:
12
13
 
13
14
 
14
15
  class EvaluationScenarioOutput(pydantic.BaseModel):
15
- type: str
16
- value: typing.Optional[typing.Any]
16
+ result: Result
17
+ cost: typing.Optional[float]
18
+ latency: typing.Optional[float]
17
19
 
18
20
  def json(self, **kwargs: typing.Any) -> str:
19
21
  kwargs_with_defaults: typing.Any = {
@@ -11,8 +11,8 @@ except ImportError:
11
11
  import pydantic # type: ignore
12
12
 
13
13
 
14
- class DeleteEvaluation(pydantic.BaseModel):
15
- evaluations_ids: typing.List[str]
14
+ class EvaluationScenarioScoreUpdate(pydantic.BaseModel):
15
+ score: float
16
16
 
17
17
  def json(self, **kwargs: typing.Any) -> str:
18
18
  kwargs_with_defaults: typing.Any = {
@@ -14,6 +14,7 @@ class EvaluationStatusEnum(str, enum.Enum):
14
14
  EVALUATION_INITIALIZED = "EVALUATION_INITIALIZED"
15
15
  EVALUATION_STARTED = "EVALUATION_STARTED"
16
16
  EVALUATION_FINISHED = "EVALUATION_FINISHED"
17
+ EVALUATION_FINISHED_WITH_ERRORS = "EVALUATION_FINISHED_WITH_ERRORS"
17
18
  EVALUATION_FAILED = "EVALUATION_FAILED"
18
19
 
19
20
  def visit(
@@ -21,6 +22,7 @@ class EvaluationStatusEnum(str, enum.Enum):
21
22
  evaluation_initialized: typing.Callable[[], T_Result],
22
23
  evaluation_started: typing.Callable[[], T_Result],
23
24
  evaluation_finished: typing.Callable[[], T_Result],
25
+ evaluation_finished_with_errors: typing.Callable[[], T_Result],
24
26
  evaluation_failed: typing.Callable[[], T_Result],
25
27
  ) -> T_Result:
26
28
  if self is EvaluationStatusEnum.EVALUATION_INITIALIZED:
@@ -29,5 +31,7 @@ class EvaluationStatusEnum(str, enum.Enum):
29
31
  return evaluation_started()
30
32
  if self is EvaluationStatusEnum.EVALUATION_FINISHED:
31
33
  return evaluation_finished()
34
+ if self is EvaluationStatusEnum.EVALUATION_FINISHED_WITH_ERRORS:
35
+ return evaluation_finished_with_errors()
32
36
  if self is EvaluationStatusEnum.EVALUATION_FAILED:
33
37
  return evaluation_failed()
@@ -16,6 +16,7 @@ class Evaluator(pydantic.BaseModel):
16
16
  key: str
17
17
  direct_use: bool
18
18
  settings_template: typing.Dict[str, typing.Any]
19
+ description: typing.Optional[str]
19
20
 
20
21
  def json(self, **kwargs: typing.Any) -> str:
21
22
  kwargs_with_defaults: typing.Any = {
@@ -11,8 +11,7 @@ except ImportError:
11
11
  import pydantic # type: ignore
12
12
 
13
13
 
14
- class GetConfigReponse(pydantic.BaseModel):
15
- config_id: str
14
+ class GetConfigResponse(pydantic.BaseModel):
16
15
  config_name: str
17
16
  current_version: int
18
17
  parameters: typing.Dict[str, typing.Any]
@@ -6,7 +6,7 @@ import typing
6
6
  from ..core.datetime_utils import serialize_datetime
7
7
  from .human_evaluation_scenario_input import HumanEvaluationScenarioInput
8
8
  from .human_evaluation_scenario_output import HumanEvaluationScenarioOutput
9
- from .human_evaluation_scenario_score import HumanEvaluationScenarioScore
9
+ from .score import Score
10
10
 
11
11
  try:
12
12
  import pydantic.v1 as pydantic # type: ignore
@@ -20,7 +20,7 @@ class HumanEvaluationScenario(pydantic.BaseModel):
20
20
  inputs: typing.List[HumanEvaluationScenarioInput]
21
21
  outputs: typing.List[HumanEvaluationScenarioOutput]
22
22
  vote: typing.Optional[str]
23
- score: typing.Optional[HumanEvaluationScenarioScore]
23
+ score: typing.Optional[Score]
24
24
  evaluation: typing.Optional[str]
25
25
  correct_answer: typing.Optional[str]
26
26
  is_pinned: typing.Optional[bool]
@@ -4,7 +4,9 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .app_variant_revision import AppVariantRevision
7
+ from .human_evaluation_scenario_input import HumanEvaluationScenarioInput
8
+ from .human_evaluation_scenario_output import HumanEvaluationScenarioOutput
9
+ from .score import Score
8
10
 
9
11
  try:
10
12
  import pydantic.v1 as pydantic # type: ignore
@@ -12,21 +14,14 @@ except ImportError:
12
14
  import pydantic # type: ignore
13
15
 
14
16
 
15
- class AppVariantOutputExtended(pydantic.BaseModel):
16
- app_id: str
17
- app_name: str
18
- variant_id: str
19
- variant_name: str
20
- parameters: typing.Optional[typing.Dict[str, typing.Any]]
21
- previous_variant_name: typing.Optional[str]
22
- organization_id: str
23
- user_id: str
24
- base_name: str
25
- base_id: str
26
- config_name: str
27
- uri: typing.Optional[str]
28
- revision: int
29
- revisions: typing.List[AppVariantRevision]
17
+ class HumanEvaluationScenarioUpdate(pydantic.BaseModel):
18
+ vote: typing.Optional[str]
19
+ score: typing.Optional[Score]
20
+ correct_answer: typing.Optional[str]
21
+ outputs: typing.Optional[typing.List[HumanEvaluationScenarioOutput]]
22
+ inputs: typing.Optional[typing.List[HumanEvaluationScenarioInput]]
23
+ is_pinned: typing.Optional[bool]
24
+ note: typing.Optional[str]
30
25
 
31
26
  def json(self, **kwargs: typing.Any) -> str:
32
27
  kwargs_with_defaults: typing.Any = {
@@ -0,0 +1,37 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .evaluation_status_enum import EvaluationStatusEnum
8
+
9
+ try:
10
+ import pydantic.v1 as pydantic # type: ignore
11
+ except ImportError:
12
+ import pydantic # type: ignore
13
+
14
+
15
+ class HumanEvaluationUpdate(pydantic.BaseModel):
16
+ status: typing.Optional[EvaluationStatusEnum]
17
+
18
+ def json(self, **kwargs: typing.Any) -> str:
19
+ kwargs_with_defaults: typing.Any = {
20
+ "by_alias": True,
21
+ "exclude_unset": True,
22
+ **kwargs,
23
+ }
24
+ return super().json(**kwargs_with_defaults)
25
+
26
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
27
+ kwargs_with_defaults: typing.Any = {
28
+ "by_alias": True,
29
+ "exclude_unset": True,
30
+ **kwargs,
31
+ }
32
+ return super().dict(**kwargs_with_defaults)
33
+
34
+ class Config:
35
+ frozen = True
36
+ smart_union = True
37
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -16,6 +16,7 @@ class Image(pydantic.BaseModel):
16
16
  docker_id: str
17
17
  tags: str
18
18
  organization_id: typing.Optional[str]
19
+ workspace_id: typing.Optional[str]
19
20
 
20
21
  def json(self, **kwargs: typing.Any) -> str:
21
22
  kwargs_with_defaults: typing.Any = {