agenta 0.52.6__py3-none-any.whl → 0.63.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agenta/__init__.py +12 -3
- agenta/client/__init__.py +4 -4
- agenta/client/backend/__init__.py +4 -4
- agenta/client/backend/api_keys/client.py +2 -2
- agenta/client/backend/billing/client.py +2 -2
- agenta/client/backend/billing/raw_client.py +2 -2
- agenta/client/backend/client.py +56 -48
- agenta/client/backend/core/client_wrapper.py +2 -2
- agenta/client/backend/core/file.py +3 -1
- agenta/client/backend/core/http_client.py +3 -3
- agenta/client/backend/core/pydantic_utilities.py +13 -3
- agenta/client/backend/human_evaluations/client.py +2 -2
- agenta/client/backend/human_evaluations/raw_client.py +2 -2
- agenta/client/backend/organization/client.py +46 -34
- agenta/client/backend/organization/raw_client.py +32 -26
- agenta/client/backend/raw_client.py +26 -26
- agenta/client/backend/testsets/client.py +18 -18
- agenta/client/backend/testsets/raw_client.py +30 -30
- agenta/client/backend/types/__init__.py +4 -4
- agenta/client/backend/types/account_request.py +3 -1
- agenta/client/backend/types/account_response.py +3 -1
- agenta/client/backend/types/agenta_node_dto.py +3 -1
- agenta/client/backend/types/agenta_nodes_response.py +3 -1
- agenta/client/backend/types/agenta_root_dto.py +3 -1
- agenta/client/backend/types/agenta_roots_response.py +3 -1
- agenta/client/backend/types/agenta_tree_dto.py +3 -1
- agenta/client/backend/types/agenta_trees_response.py +3 -1
- agenta/client/backend/types/aggregated_result.py +3 -1
- agenta/client/backend/types/analytics_response.py +3 -1
- agenta/client/backend/types/annotation.py +6 -4
- agenta/client/backend/types/annotation_create.py +3 -1
- agenta/client/backend/types/annotation_edit.py +3 -1
- agenta/client/backend/types/annotation_link.py +3 -1
- agenta/client/backend/types/annotation_link_response.py +3 -1
- agenta/client/backend/types/annotation_query.py +3 -1
- agenta/client/backend/types/annotation_query_request.py +3 -1
- agenta/client/backend/types/annotation_reference.py +3 -1
- agenta/client/backend/types/annotation_references.py +3 -1
- agenta/client/backend/types/annotation_response.py +3 -1
- agenta/client/backend/types/annotations_response.py +3 -1
- agenta/client/backend/types/app.py +3 -1
- agenta/client/backend/types/app_variant_response.py +3 -1
- agenta/client/backend/types/app_variant_revision.py +3 -1
- agenta/client/backend/types/artifact.py +6 -4
- agenta/client/backend/types/base_output.py +3 -1
- agenta/client/backend/types/body_fetch_workflow_revision.py +3 -1
- agenta/client/backend/types/body_import_testset.py +3 -1
- agenta/client/backend/types/bucket_dto.py +3 -1
- agenta/client/backend/types/collect_status_response.py +3 -1
- agenta/client/backend/types/config_db.py +3 -1
- agenta/client/backend/types/config_dto.py +3 -1
- agenta/client/backend/types/config_response_model.py +3 -1
- agenta/client/backend/types/correct_answer.py +3 -1
- agenta/client/backend/types/create_app_output.py +3 -1
- agenta/client/backend/types/custom_model_settings_dto.py +3 -1
- agenta/client/backend/types/custom_provider_dto.py +3 -1
- agenta/client/backend/types/custom_provider_kind.py +1 -1
- agenta/client/backend/types/custom_provider_settings_dto.py +3 -1
- agenta/client/backend/types/delete_evaluation.py +3 -1
- agenta/client/backend/types/environment_output.py +3 -1
- agenta/client/backend/types/environment_output_extended.py +3 -1
- agenta/client/backend/types/environment_revision.py +3 -1
- agenta/client/backend/types/error.py +3 -1
- agenta/client/backend/types/evaluation.py +3 -1
- agenta/client/backend/types/evaluation_scenario.py +3 -1
- agenta/client/backend/types/evaluation_scenario_input.py +3 -1
- agenta/client/backend/types/evaluation_scenario_output.py +3 -1
- agenta/client/backend/types/evaluation_scenario_result.py +3 -1
- agenta/client/backend/types/evaluator.py +6 -4
- agenta/client/backend/types/evaluator_config.py +6 -4
- agenta/client/backend/types/evaluator_flags.py +3 -1
- agenta/client/backend/types/evaluator_mapping_output_interface.py +3 -1
- agenta/client/backend/types/evaluator_output_interface.py +3 -1
- agenta/client/backend/types/evaluator_query.py +3 -1
- agenta/client/backend/types/evaluator_query_request.py +3 -1
- agenta/client/backend/types/evaluator_request.py +3 -1
- agenta/client/backend/types/evaluator_response.py +3 -1
- agenta/client/backend/types/evaluators_response.py +3 -1
- agenta/client/backend/types/exception_dto.py +3 -1
- agenta/client/backend/types/extended_o_tel_tracing_response.py +3 -1
- agenta/client/backend/types/get_config_response.py +3 -1
- agenta/client/backend/types/header.py +3 -1
- agenta/client/backend/types/http_validation_error.py +3 -1
- agenta/client/backend/types/human_evaluation.py +3 -1
- agenta/client/backend/types/human_evaluation_scenario.py +3 -1
- agenta/client/backend/types/human_evaluation_scenario_input.py +3 -1
- agenta/client/backend/types/human_evaluation_scenario_output.py +3 -1
- agenta/client/backend/types/invite_request.py +3 -1
- agenta/client/backend/types/legacy_analytics_response.py +3 -1
- agenta/client/backend/types/legacy_data_point.py +3 -1
- agenta/client/backend/types/legacy_evaluator.py +3 -1
- agenta/client/backend/types/legacy_scope_request.py +3 -1
- agenta/client/backend/types/legacy_scopes_response.py +3 -1
- agenta/client/backend/types/legacy_subscription_request.py +3 -1
- agenta/client/backend/types/legacy_user_request.py +3 -1
- agenta/client/backend/types/legacy_user_response.py +3 -1
- agenta/client/backend/types/lifecycle_dto.py +3 -1
- agenta/client/backend/types/link_dto.py +3 -1
- agenta/client/backend/types/list_api_keys_response.py +3 -1
- agenta/client/backend/types/llm_run_rate_limit.py +3 -1
- agenta/client/backend/types/meta_request.py +3 -1
- agenta/client/backend/types/metrics_dto.py +3 -1
- agenta/client/backend/types/new_testset.py +3 -1
- agenta/client/backend/types/node_dto.py +3 -1
- agenta/client/backend/types/o_tel_context_dto.py +3 -1
- agenta/client/backend/types/o_tel_event.py +6 -4
- agenta/client/backend/types/o_tel_event_dto.py +3 -1
- agenta/client/backend/types/o_tel_extra_dto.py +3 -1
- agenta/client/backend/types/o_tel_flat_span.py +6 -4
- agenta/client/backend/types/o_tel_link.py +6 -4
- agenta/client/backend/types/o_tel_link_dto.py +3 -1
- agenta/client/backend/types/o_tel_links_response.py +3 -1
- agenta/client/backend/types/o_tel_span.py +1 -1
- agenta/client/backend/types/o_tel_span_dto.py +3 -1
- agenta/client/backend/types/o_tel_spans_tree.py +3 -1
- agenta/client/backend/types/o_tel_tracing_data_response.py +3 -1
- agenta/client/backend/types/o_tel_tracing_request.py +3 -1
- agenta/client/backend/types/o_tel_tracing_response.py +3 -1
- agenta/client/backend/types/organization.py +3 -1
- agenta/client/backend/types/organization_details.py +3 -1
- agenta/client/backend/types/organization_membership_request.py +3 -1
- agenta/client/backend/types/organization_output.py +3 -1
- agenta/client/backend/types/organization_request.py +3 -1
- agenta/client/backend/types/parent_dto.py +3 -1
- agenta/client/backend/types/project_membership_request.py +3 -1
- agenta/client/backend/types/project_request.py +3 -1
- agenta/client/backend/types/project_scope.py +3 -1
- agenta/client/backend/types/projects_response.py +3 -1
- agenta/client/backend/types/reference.py +6 -4
- agenta/client/backend/types/reference_dto.py +3 -1
- agenta/client/backend/types/reference_request_model.py +3 -1
- agenta/client/backend/types/result.py +3 -1
- agenta/client/backend/types/root_dto.py +3 -1
- agenta/client/backend/types/scopes_response_model.py +3 -1
- agenta/client/backend/types/secret_dto.py +3 -1
- agenta/client/backend/types/secret_response_dto.py +3 -1
- agenta/client/backend/types/simple_evaluation_output.py +3 -1
- agenta/client/backend/types/span_dto.py +6 -4
- agenta/client/backend/types/standard_provider_dto.py +3 -1
- agenta/client/backend/types/standard_provider_settings_dto.py +3 -1
- agenta/client/backend/types/status_dto.py +3 -1
- agenta/client/backend/types/tags_request.py +3 -1
- agenta/client/backend/types/testcase_response.py +6 -4
- agenta/client/backend/types/testset.py +6 -4
- agenta/client/backend/types/{test_set_output_response.py → testset_output_response.py} +4 -2
- agenta/client/backend/types/testset_request.py +3 -1
- agenta/client/backend/types/testset_response.py +3 -1
- agenta/client/backend/types/{test_set_simple_response.py → testset_simple_response.py} +4 -2
- agenta/client/backend/types/testsets_response.py +3 -1
- agenta/client/backend/types/time_dto.py +3 -1
- agenta/client/backend/types/tree_dto.py +3 -1
- agenta/client/backend/types/update_app_output.py +3 -1
- agenta/client/backend/types/user_request.py +3 -1
- agenta/client/backend/types/validation_error.py +3 -1
- agenta/client/backend/types/workflow_artifact.py +6 -4
- agenta/client/backend/types/workflow_data.py +3 -1
- agenta/client/backend/types/workflow_flags.py +3 -1
- agenta/client/backend/types/workflow_request.py +3 -1
- agenta/client/backend/types/workflow_response.py +3 -1
- agenta/client/backend/types/workflow_revision.py +6 -4
- agenta/client/backend/types/workflow_revision_request.py +3 -1
- agenta/client/backend/types/workflow_revision_response.py +3 -1
- agenta/client/backend/types/workflow_revisions_response.py +3 -1
- agenta/client/backend/types/workflow_variant.py +6 -4
- agenta/client/backend/types/workflow_variant_request.py +3 -1
- agenta/client/backend/types/workflow_variant_response.py +3 -1
- agenta/client/backend/types/workflow_variants_response.py +3 -1
- agenta/client/backend/types/workflows_response.py +3 -1
- agenta/client/backend/types/workspace.py +3 -1
- agenta/client/backend/types/workspace_member_response.py +3 -1
- agenta/client/backend/types/workspace_membership_request.py +3 -1
- agenta/client/backend/types/workspace_permission.py +3 -1
- agenta/client/backend/types/workspace_request.py +3 -1
- agenta/client/backend/types/workspace_response.py +3 -1
- agenta/client/backend/vault/raw_client.py +4 -4
- agenta/client/backend/workspace/client.py +2 -2
- agenta/client/client.py +102 -88
- agenta/sdk/__init__.py +52 -3
- agenta/sdk/agenta_init.py +43 -16
- agenta/sdk/assets.py +23 -15
- agenta/sdk/context/serving.py +20 -8
- agenta/sdk/context/tracing.py +40 -22
- agenta/sdk/contexts/__init__.py +0 -0
- agenta/sdk/contexts/routing.py +38 -0
- agenta/sdk/contexts/running.py +57 -0
- agenta/sdk/contexts/tracing.py +86 -0
- agenta/sdk/decorators/__init__.py +1 -0
- agenta/sdk/decorators/routing.py +284 -0
- agenta/sdk/decorators/running.py +692 -98
- agenta/sdk/decorators/serving.py +20 -21
- agenta/sdk/decorators/tracing.py +176 -131
- agenta/sdk/engines/__init__.py +0 -0
- agenta/sdk/engines/running/__init__.py +0 -0
- agenta/sdk/engines/running/utils.py +17 -0
- agenta/sdk/engines/tracing/__init__.py +1 -0
- agenta/sdk/engines/tracing/attributes.py +185 -0
- agenta/sdk/engines/tracing/conventions.py +49 -0
- agenta/sdk/engines/tracing/exporters.py +130 -0
- agenta/sdk/engines/tracing/inline.py +1154 -0
- agenta/sdk/engines/tracing/processors.py +190 -0
- agenta/sdk/engines/tracing/propagation.py +102 -0
- agenta/sdk/engines/tracing/spans.py +136 -0
- agenta/sdk/engines/tracing/tracing.py +324 -0
- agenta/sdk/evaluations/__init__.py +2 -0
- agenta/sdk/evaluations/metrics.py +37 -0
- agenta/sdk/evaluations/preview/__init__.py +0 -0
- agenta/sdk/evaluations/preview/evaluate.py +765 -0
- agenta/sdk/evaluations/preview/utils.py +861 -0
- agenta/sdk/evaluations/results.py +66 -0
- agenta/sdk/evaluations/runs.py +153 -0
- agenta/sdk/evaluations/scenarios.py +48 -0
- agenta/sdk/litellm/litellm.py +12 -0
- agenta/sdk/litellm/mockllm.py +6 -8
- agenta/sdk/litellm/mocks/__init__.py +5 -5
- agenta/sdk/managers/applications.py +304 -0
- agenta/sdk/managers/config.py +2 -2
- agenta/sdk/managers/evaluations.py +0 -0
- agenta/sdk/managers/evaluators.py +303 -0
- agenta/sdk/managers/secrets.py +161 -24
- agenta/sdk/managers/shared.py +3 -1
- agenta/sdk/managers/testsets.py +441 -0
- agenta/sdk/managers/vault.py +3 -3
- agenta/sdk/middleware/auth.py +0 -176
- agenta/sdk/middleware/config.py +27 -9
- agenta/sdk/middleware/vault.py +204 -9
- agenta/sdk/middlewares/__init__.py +0 -0
- agenta/sdk/middlewares/routing/__init__.py +0 -0
- agenta/sdk/middlewares/routing/auth.py +263 -0
- agenta/sdk/middlewares/routing/cors.py +30 -0
- agenta/sdk/middlewares/routing/otel.py +29 -0
- agenta/sdk/middlewares/running/__init__.py +0 -0
- agenta/sdk/middlewares/running/normalizer.py +321 -0
- agenta/sdk/middlewares/running/resolver.py +161 -0
- agenta/sdk/middlewares/running/vault.py +140 -0
- agenta/sdk/models/__init__.py +0 -0
- agenta/sdk/models/blobs.py +33 -0
- agenta/sdk/models/evaluations.py +119 -0
- agenta/sdk/models/git.py +126 -0
- agenta/sdk/models/shared.py +167 -0
- agenta/sdk/models/testsets.py +163 -0
- agenta/sdk/models/tracing.py +202 -0
- agenta/sdk/models/workflows.py +753 -0
- agenta/sdk/tracing/attributes.py +4 -4
- agenta/sdk/tracing/exporters.py +67 -17
- agenta/sdk/tracing/inline.py +37 -45
- agenta/sdk/tracing/processors.py +97 -0
- agenta/sdk/tracing/propagation.py +3 -1
- agenta/sdk/tracing/spans.py +4 -0
- agenta/sdk/tracing/tracing.py +13 -15
- agenta/sdk/types.py +222 -22
- agenta/sdk/utils/cache.py +1 -1
- agenta/sdk/utils/client.py +38 -0
- agenta/sdk/utils/helpers.py +13 -12
- agenta/sdk/utils/logging.py +18 -78
- agenta/sdk/utils/references.py +23 -0
- agenta/sdk/workflows/builtin.py +600 -0
- agenta/sdk/workflows/configurations.py +22 -0
- agenta/sdk/workflows/errors.py +292 -0
- agenta/sdk/workflows/handlers.py +1791 -0
- agenta/sdk/workflows/interfaces.py +948 -0
- agenta/sdk/workflows/sandbox.py +118 -0
- agenta/sdk/workflows/utils.py +303 -6
- {agenta-0.52.6.dist-info → agenta-0.63.2.dist-info}/METADATA +37 -33
- agenta-0.63.2.dist-info/RECORD +421 -0
- {agenta-0.52.6.dist-info → agenta-0.63.2.dist-info}/WHEEL +1 -1
- agenta/sdk/middleware/adapt.py +0 -253
- agenta/sdk/middleware/base.py +0 -40
- agenta/sdk/middleware/flags.py +0 -40
- agenta/sdk/workflows/types.py +0 -472
- agenta-0.52.6.dist-info/RECORD +0 -371
- /agenta/sdk/{workflows → engines/running}/registry.py +0 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from typing import Optional, Dict, Any
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
4
|
+
from agenta.sdk.utils.client import authed_api
|
|
5
|
+
from agenta.sdk.models.evaluations import EvaluationResult
|
|
6
|
+
|
|
7
|
+
# TODO: ADD TYPES
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def acreate(
|
|
11
|
+
*,
|
|
12
|
+
run_id: UUID,
|
|
13
|
+
scenario_id: UUID,
|
|
14
|
+
step_key: str,
|
|
15
|
+
# repeat_idx: str,
|
|
16
|
+
# timestamp: datetime,
|
|
17
|
+
# interval: float,
|
|
18
|
+
#
|
|
19
|
+
testcase_id: Optional[UUID] = None,
|
|
20
|
+
trace_id: Optional[str] = None,
|
|
21
|
+
error: Optional[dict] = None,
|
|
22
|
+
#
|
|
23
|
+
flags: Optional[Dict[str, Any]] = None,
|
|
24
|
+
tags: Optional[Dict[str, Any]] = None,
|
|
25
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
26
|
+
) -> EvaluationResult:
|
|
27
|
+
payload = dict(
|
|
28
|
+
results=[
|
|
29
|
+
dict(
|
|
30
|
+
flags=flags,
|
|
31
|
+
tags=tags,
|
|
32
|
+
meta=meta,
|
|
33
|
+
#
|
|
34
|
+
testcase_id=str(testcase_id) if testcase_id else None,
|
|
35
|
+
trace_id=trace_id,
|
|
36
|
+
error=error,
|
|
37
|
+
#
|
|
38
|
+
# interval=interval,
|
|
39
|
+
# timestamp=timestamp,
|
|
40
|
+
# repeat_idx=repeat_idx,
|
|
41
|
+
step_key=step_key,
|
|
42
|
+
run_id=str(run_id),
|
|
43
|
+
scenario_id=str(scenario_id),
|
|
44
|
+
#
|
|
45
|
+
status="success",
|
|
46
|
+
)
|
|
47
|
+
]
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
response = authed_api()(
|
|
51
|
+
method="POST",
|
|
52
|
+
endpoint=f"/preview/evaluations/results/",
|
|
53
|
+
json=payload,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
response.raise_for_status()
|
|
58
|
+
except:
|
|
59
|
+
print(response.text)
|
|
60
|
+
raise
|
|
61
|
+
|
|
62
|
+
response = response.json()
|
|
63
|
+
|
|
64
|
+
result = EvaluationResult(**response["results"][0])
|
|
65
|
+
|
|
66
|
+
return result
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from typing import Optional, Dict, Any
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
4
|
+
from agenta.sdk.utils.client import authed_api
|
|
5
|
+
from agenta.sdk.models.evaluations import EvaluationRun, Target
|
|
6
|
+
|
|
7
|
+
import agenta as ag
|
|
8
|
+
|
|
9
|
+
# TODO: ADD TYPES
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def afetch(
|
|
13
|
+
*,
|
|
14
|
+
run_id: UUID,
|
|
15
|
+
) -> Optional[EvaluationRun]:
|
|
16
|
+
response = authed_api()(
|
|
17
|
+
method="GET",
|
|
18
|
+
endpoint=f"/preview/evaluations/runs/{run_id}",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
response.raise_for_status()
|
|
23
|
+
except:
|
|
24
|
+
print(response.text)
|
|
25
|
+
raise
|
|
26
|
+
|
|
27
|
+
response = response.json()
|
|
28
|
+
|
|
29
|
+
if (not "count" in response) or (response["count"] == 0) or (not "run" in response):
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
run = EvaluationRun(**response["run"])
|
|
33
|
+
|
|
34
|
+
return run
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
async def acreate(
|
|
38
|
+
*,
|
|
39
|
+
name: Optional[str] = None,
|
|
40
|
+
description: Optional[str] = None,
|
|
41
|
+
#
|
|
42
|
+
flags: Optional[Dict[str, Any]] = None,
|
|
43
|
+
tags: Optional[Dict[str, Any]] = None,
|
|
44
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
45
|
+
#
|
|
46
|
+
query_steps: Optional[Target] = None,
|
|
47
|
+
testset_steps: Optional[Target] = None,
|
|
48
|
+
application_steps: Optional[Target] = None,
|
|
49
|
+
evaluator_steps: Optional[Target] = None,
|
|
50
|
+
#
|
|
51
|
+
repeats: Optional[int] = None,
|
|
52
|
+
) -> Optional[EvaluationRun]:
|
|
53
|
+
payload = dict(
|
|
54
|
+
evaluation=dict(
|
|
55
|
+
name=name,
|
|
56
|
+
description=description,
|
|
57
|
+
#
|
|
58
|
+
flags=flags,
|
|
59
|
+
tags=tags,
|
|
60
|
+
meta=meta,
|
|
61
|
+
#
|
|
62
|
+
data=dict(
|
|
63
|
+
status="running",
|
|
64
|
+
query_steps=query_steps,
|
|
65
|
+
testset_steps=testset_steps,
|
|
66
|
+
application_steps=application_steps,
|
|
67
|
+
evaluator_steps=evaluator_steps,
|
|
68
|
+
repeats=repeats,
|
|
69
|
+
),
|
|
70
|
+
#
|
|
71
|
+
jit={"testsets": True, "evaluators": False},
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
response = authed_api()(
|
|
76
|
+
method="POST",
|
|
77
|
+
endpoint=f"/preview/simple/evaluations/",
|
|
78
|
+
json=payload,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
response.raise_for_status()
|
|
83
|
+
except:
|
|
84
|
+
print(response.text)
|
|
85
|
+
raise
|
|
86
|
+
|
|
87
|
+
response = response.json()
|
|
88
|
+
|
|
89
|
+
if (not "evaluation" in response) or (not "id" in response["evaluation"]):
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
run_id = UUID(response["evaluation"]["id"])
|
|
93
|
+
|
|
94
|
+
return await afetch(run_id=run_id)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
async def aclose(
|
|
98
|
+
*,
|
|
99
|
+
run_id: UUID,
|
|
100
|
+
#
|
|
101
|
+
status: Optional[str] = "success",
|
|
102
|
+
) -> Optional[EvaluationRun]:
|
|
103
|
+
response = authed_api()(
|
|
104
|
+
method="POST",
|
|
105
|
+
endpoint=f"/preview/evaluations/runs/{run_id}/close/{status}",
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
response.raise_for_status()
|
|
110
|
+
except:
|
|
111
|
+
print(response.text)
|
|
112
|
+
raise
|
|
113
|
+
|
|
114
|
+
response = response.json()
|
|
115
|
+
|
|
116
|
+
if (not "run" in response) or (not "id" in response["run"]):
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
run_id = UUID(response["run"]["id"])
|
|
120
|
+
|
|
121
|
+
return await afetch(run_id=run_id)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
async def aurl(
|
|
125
|
+
*,
|
|
126
|
+
run_id: UUID,
|
|
127
|
+
) -> str:
|
|
128
|
+
response = authed_api()(
|
|
129
|
+
method="GET",
|
|
130
|
+
endpoint=f"/projects",
|
|
131
|
+
params={"scope": "project"},
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
response.raise_for_status()
|
|
136
|
+
except:
|
|
137
|
+
print(response.text)
|
|
138
|
+
raise
|
|
139
|
+
|
|
140
|
+
if len(response.json()) != 1:
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
project_info = response.json()[0]
|
|
144
|
+
|
|
145
|
+
workspace_id = project_info.get("workspace_id")
|
|
146
|
+
project_id = project_info.get("project_id")
|
|
147
|
+
|
|
148
|
+
return (
|
|
149
|
+
f"{ag.DEFAULT_AGENTA_SINGLETON_INSTANCE.host}"
|
|
150
|
+
f"/w/{workspace_id}"
|
|
151
|
+
f"/p/{project_id}"
|
|
152
|
+
f"/evaluations/results/{run_id}"
|
|
153
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from typing import Optional, Dict, Any
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
4
|
+
from agenta.sdk.utils.client import authed_api
|
|
5
|
+
from agenta.sdk.models.evaluations import EvaluationScenario
|
|
6
|
+
|
|
7
|
+
# TODO: ADD TYPES
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def acreate(
|
|
11
|
+
*,
|
|
12
|
+
run_id: UUID,
|
|
13
|
+
#
|
|
14
|
+
flags: Optional[Dict[str, Any]] = None,
|
|
15
|
+
tags: Optional[Dict[str, Any]] = None,
|
|
16
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
17
|
+
) -> EvaluationScenario:
|
|
18
|
+
payload = dict(
|
|
19
|
+
scenarios=[
|
|
20
|
+
dict(
|
|
21
|
+
flags=flags,
|
|
22
|
+
tags=tags,
|
|
23
|
+
meta=meta,
|
|
24
|
+
#
|
|
25
|
+
run_id=str(run_id),
|
|
26
|
+
#
|
|
27
|
+
status="success",
|
|
28
|
+
)
|
|
29
|
+
]
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
response = authed_api()(
|
|
33
|
+
method="POST",
|
|
34
|
+
endpoint=f"/preview/evaluations/scenarios/",
|
|
35
|
+
json=payload,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
response.raise_for_status()
|
|
40
|
+
except:
|
|
41
|
+
print(response.text)
|
|
42
|
+
raise
|
|
43
|
+
|
|
44
|
+
response = response.json()
|
|
45
|
+
|
|
46
|
+
scenario = EvaluationScenario(**response["scenarios"][0])
|
|
47
|
+
|
|
48
|
+
return scenario
|
agenta/sdk/litellm/litellm.py
CHANGED
|
@@ -193,6 +193,9 @@ def litellm_handler():
|
|
|
193
193
|
|
|
194
194
|
span.end()
|
|
195
195
|
|
|
196
|
+
# Clean up span from dictionary to prevent memory leak
|
|
197
|
+
del self.span[litellm_call_id]
|
|
198
|
+
|
|
196
199
|
def log_failure_event(
|
|
197
200
|
self,
|
|
198
201
|
kwargs,
|
|
@@ -221,6 +224,9 @@ def litellm_handler():
|
|
|
221
224
|
|
|
222
225
|
span.end()
|
|
223
226
|
|
|
227
|
+
# Clean up span from dictionary to prevent memory leak
|
|
228
|
+
del self.span[litellm_call_id]
|
|
229
|
+
|
|
224
230
|
async def async_log_stream_event(
|
|
225
231
|
self,
|
|
226
232
|
kwargs,
|
|
@@ -321,6 +327,9 @@ def litellm_handler():
|
|
|
321
327
|
|
|
322
328
|
span.end()
|
|
323
329
|
|
|
330
|
+
# Clean up span from dictionary to prevent memory leak
|
|
331
|
+
del self.span[litellm_call_id]
|
|
332
|
+
|
|
324
333
|
async def async_log_failure_event(
|
|
325
334
|
self,
|
|
326
335
|
kwargs,
|
|
@@ -349,4 +358,7 @@ def litellm_handler():
|
|
|
349
358
|
|
|
350
359
|
span.end()
|
|
351
360
|
|
|
361
|
+
# Clean up span from dictionary to prevent memory leak
|
|
362
|
+
del self.span[litellm_call_id]
|
|
363
|
+
|
|
352
364
|
return LitellmHandler()
|
agenta/sdk/litellm/mockllm.py
CHANGED
|
@@ -2,10 +2,12 @@ from typing import Optional, Protocol, Any
|
|
|
2
2
|
from os import environ
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
|
|
5
|
+
import litellm
|
|
6
|
+
|
|
5
7
|
from agenta.sdk.utils.logging import get_module_logger
|
|
6
8
|
|
|
7
9
|
from agenta.sdk.litellm.mocks import MOCKS
|
|
8
|
-
from agenta.sdk.
|
|
10
|
+
from agenta.sdk.contexts.routing import RoutingContext
|
|
9
11
|
|
|
10
12
|
AGENTA_LITELLM_MOCK = environ.get("AGENTA_LITELLM_MOCK") or None
|
|
11
13
|
|
|
@@ -65,18 +67,14 @@ def user_aws_credentials_from(ps: dict):
|
|
|
65
67
|
|
|
66
68
|
|
|
67
69
|
class LitellmProtocol(Protocol):
|
|
68
|
-
async def acompletion(self, *args: Any, **kwargs: Any) -> Any:
|
|
69
|
-
...
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
litellm: Optional[LitellmProtocol] = None # pylint: disable=invalid-name
|
|
70
|
+
async def acompletion(self, *args: Any, **kwargs: Any) -> Any: ...
|
|
73
71
|
|
|
74
72
|
|
|
75
73
|
async def acompletion(*args, **kwargs):
|
|
76
|
-
mock = AGENTA_LITELLM_MOCK or
|
|
74
|
+
mock = AGENTA_LITELLM_MOCK or RoutingContext.get().mock
|
|
77
75
|
|
|
78
76
|
if mock:
|
|
79
|
-
log.debug("Mocking litellm: %s.", mock)
|
|
77
|
+
# log.debug("Mocking litellm: %s.", mock)
|
|
80
78
|
|
|
81
79
|
if mock not in MOCKS:
|
|
82
80
|
mock = "hello"
|
|
@@ -3,7 +3,7 @@ from asyncio import sleep
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
from agenta.sdk.decorators.tracing import instrument
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class MockMessageModel(BaseModel):
|
|
@@ -18,7 +18,7 @@ class MockResponseModel(BaseModel):
|
|
|
18
18
|
choices: list[MockChoiceModel]
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
@
|
|
21
|
+
@instrument()
|
|
22
22
|
def hello_mock_response(*args, **kwargs) -> MockResponseModel:
|
|
23
23
|
return MockResponseModel(
|
|
24
24
|
choices=[
|
|
@@ -31,7 +31,7 @@ def hello_mock_response(*args, **kwargs) -> MockResponseModel:
|
|
|
31
31
|
)
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
@
|
|
34
|
+
@instrument()
|
|
35
35
|
def chat_mock_response(*args, **kwargs) -> MockResponseModel:
|
|
36
36
|
return MockResponseModel(
|
|
37
37
|
choices=[
|
|
@@ -45,7 +45,7 @@ def chat_mock_response(*args, **kwargs) -> MockResponseModel:
|
|
|
45
45
|
)
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
@
|
|
48
|
+
@instrument()
|
|
49
49
|
def delay_mock_response(*args, **kwargs) -> MockResponseModel:
|
|
50
50
|
sleep(2)
|
|
51
51
|
|
|
@@ -60,7 +60,7 @@ def delay_mock_response(*args, **kwargs) -> MockResponseModel:
|
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
@
|
|
63
|
+
@instrument()
|
|
64
64
|
def capital_mock_response(*args, **kwargs) -> MockResponseModel:
|
|
65
65
|
country = kwargs.get("messages", [{}, {}])[1].get(
|
|
66
66
|
"content", "What is the capital of _____?"
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
from typing import Dict, Any, Callable, Optional
|
|
2
|
+
from uuid import uuid4, UUID
|
|
3
|
+
|
|
4
|
+
from agenta.sdk.utils.client import authed_api
|
|
5
|
+
from agenta.sdk.decorators.running import auto_workflow, is_workflow
|
|
6
|
+
from agenta.sdk.models.workflows import (
|
|
7
|
+
ApplicationRevision,
|
|
8
|
+
#
|
|
9
|
+
ApplicationRevisionResponse,
|
|
10
|
+
#
|
|
11
|
+
LegacyApplicationFlags,
|
|
12
|
+
LegacyApplicationData,
|
|
13
|
+
LegacyApplicationCreate,
|
|
14
|
+
LegacyApplicationEdit,
|
|
15
|
+
#
|
|
16
|
+
LegacyApplicationResponse,
|
|
17
|
+
#
|
|
18
|
+
Reference,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from agenta.sdk.utils.references import get_slug_from_name_and_id
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
async def _retrieve_application(
|
|
25
|
+
application_id: Optional[UUID] = None,
|
|
26
|
+
application_slug: Optional[str] = None,
|
|
27
|
+
application_revision_id: Optional[UUID] = None,
|
|
28
|
+
application_revision_slug: Optional[str] = None,
|
|
29
|
+
) -> Optional[ApplicationRevision]:
|
|
30
|
+
payload = {
|
|
31
|
+
"application_ref": (
|
|
32
|
+
{
|
|
33
|
+
"id": str(application_id) if application_id else None,
|
|
34
|
+
"slug": str(application_slug),
|
|
35
|
+
}
|
|
36
|
+
if application_id or application_slug
|
|
37
|
+
else None
|
|
38
|
+
),
|
|
39
|
+
"application_revision_ref": (
|
|
40
|
+
{
|
|
41
|
+
"id": (
|
|
42
|
+
str(application_revision_id) if application_revision_id else None
|
|
43
|
+
),
|
|
44
|
+
"slug": application_revision_slug,
|
|
45
|
+
}
|
|
46
|
+
if application_revision_id or application_revision_slug
|
|
47
|
+
else None
|
|
48
|
+
),
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# print(" --- payload:", payload)
|
|
52
|
+
|
|
53
|
+
response = authed_api()(
|
|
54
|
+
method="POST",
|
|
55
|
+
endpoint=f"/preview/legacy/applications/revisions/retrieve",
|
|
56
|
+
json=payload,
|
|
57
|
+
)
|
|
58
|
+
response.raise_for_status()
|
|
59
|
+
|
|
60
|
+
application_revision_response = ApplicationRevisionResponse(**response.json())
|
|
61
|
+
|
|
62
|
+
application_revision = application_revision_response.application_revision
|
|
63
|
+
|
|
64
|
+
# print(" --- application_revision:", application_revision)
|
|
65
|
+
|
|
66
|
+
return application_revision
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
async def aretrieve(
|
|
70
|
+
application_revision_id: Optional[UUID] = None,
|
|
71
|
+
) -> Optional[ApplicationRevision]:
|
|
72
|
+
# print("\n--------- RETRIEVE APPLICATION")
|
|
73
|
+
|
|
74
|
+
response = await _retrieve_application(
|
|
75
|
+
application_revision_id=application_revision_id,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return response
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
async def aupsert(
|
|
82
|
+
*,
|
|
83
|
+
application_id: Optional[UUID] = None,
|
|
84
|
+
application_slug: Optional[str] = None,
|
|
85
|
+
application_revision_id: Optional[UUID] = None,
|
|
86
|
+
application_revision_slug: Optional[str] = None,
|
|
87
|
+
#
|
|
88
|
+
handler: Callable,
|
|
89
|
+
script: Optional[str] = None,
|
|
90
|
+
parameters: Optional[Dict[str, Any]] = None,
|
|
91
|
+
#
|
|
92
|
+
name: Optional[str] = None,
|
|
93
|
+
description: Optional[str] = None,
|
|
94
|
+
) -> Optional[UUID]:
|
|
95
|
+
# print("\n--------- UPSERT APPLICATION")
|
|
96
|
+
try:
|
|
97
|
+
if not is_workflow(handler):
|
|
98
|
+
application_workflow = auto_workflow(
|
|
99
|
+
handler,
|
|
100
|
+
#
|
|
101
|
+
script=script,
|
|
102
|
+
parameters=parameters,
|
|
103
|
+
#
|
|
104
|
+
name=name,
|
|
105
|
+
description=description,
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
application_workflow = handler
|
|
109
|
+
|
|
110
|
+
req = await application_workflow.inspect()
|
|
111
|
+
|
|
112
|
+
legacy_application_flags = LegacyApplicationFlags(**req.flags)
|
|
113
|
+
|
|
114
|
+
legacy_application_data = LegacyApplicationData(
|
|
115
|
+
**(
|
|
116
|
+
req.interface.model_dump(mode="json", exclude_none=True)
|
|
117
|
+
if req and req.interface
|
|
118
|
+
else {}
|
|
119
|
+
),
|
|
120
|
+
**(
|
|
121
|
+
req.configuration.model_dump(mode="json", exclude_none=True)
|
|
122
|
+
if req and req.configuration
|
|
123
|
+
else {}
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# print(
|
|
128
|
+
# " ---:", legacy_application_data.model_dump(mode="json", exclude_none=True)
|
|
129
|
+
# )
|
|
130
|
+
|
|
131
|
+
retrieve_response = None
|
|
132
|
+
|
|
133
|
+
if req.references is not None:
|
|
134
|
+
_application_revision_ref = req.references.get("application_revision", {})
|
|
135
|
+
if isinstance(_application_revision_ref, Reference):
|
|
136
|
+
_application_revision_ref = _application_revision_ref.model_dump(
|
|
137
|
+
mode="json",
|
|
138
|
+
exclude_none=True,
|
|
139
|
+
)
|
|
140
|
+
if not isinstance(_application_revision_ref, dict):
|
|
141
|
+
_application_revision_ref = {}
|
|
142
|
+
_application_revision_id = _application_revision_ref.get("id")
|
|
143
|
+
_application_revision_slug = _application_revision_ref.get("slug")
|
|
144
|
+
|
|
145
|
+
application_revision_id = (
|
|
146
|
+
application_revision_id or _application_revision_id
|
|
147
|
+
)
|
|
148
|
+
application_revision_slug = (
|
|
149
|
+
application_revision_slug or _application_revision_slug
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
_application_ref = req.references.get("application", {})
|
|
153
|
+
if isinstance(_application_ref, Reference):
|
|
154
|
+
_application_ref = _application_ref.model_dump(
|
|
155
|
+
mode="json",
|
|
156
|
+
exclude_none=True,
|
|
157
|
+
)
|
|
158
|
+
if not isinstance(_application_ref, dict):
|
|
159
|
+
_application_ref = {}
|
|
160
|
+
_application_id = _application_ref.get("id")
|
|
161
|
+
_application_slug = _application_ref.get("slug")
|
|
162
|
+
|
|
163
|
+
application_id = application_id or _application_id
|
|
164
|
+
application_slug = application_slug or _application_slug
|
|
165
|
+
|
|
166
|
+
revision = req.data.revision if req and req.data else None
|
|
167
|
+
if revision:
|
|
168
|
+
name = name or revision.get("name")
|
|
169
|
+
description = description or revision.get("description")
|
|
170
|
+
|
|
171
|
+
name = (
|
|
172
|
+
name or req.data.revision.get("name")
|
|
173
|
+
if req and req.data and req.data.revision
|
|
174
|
+
else None
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
description = (
|
|
178
|
+
description or req.data.revision.get("description")
|
|
179
|
+
if req and req.data and req.data.revision
|
|
180
|
+
else None
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
application_slug = (
|
|
184
|
+
application_slug
|
|
185
|
+
or get_slug_from_name_and_id(
|
|
186
|
+
name=name,
|
|
187
|
+
id=application_id or uuid4(),
|
|
188
|
+
)
|
|
189
|
+
if name
|
|
190
|
+
else uuid4().hex[-12:]
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# print(
|
|
194
|
+
# application_id,
|
|
195
|
+
# application_slug,
|
|
196
|
+
# application_revision_id,
|
|
197
|
+
# application_revision_slug,
|
|
198
|
+
# )
|
|
199
|
+
|
|
200
|
+
if application_revision_id or application_revision_slug:
|
|
201
|
+
retrieve_response = await _retrieve_application(
|
|
202
|
+
application_revision_id=application_revision_id,
|
|
203
|
+
application_revision_slug=application_revision_slug,
|
|
204
|
+
)
|
|
205
|
+
elif application_id or application_slug:
|
|
206
|
+
retrieve_response = await _retrieve_application(
|
|
207
|
+
application_id=application_id,
|
|
208
|
+
application_slug=application_slug,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
print("[ERROR]: Failed to prepare application:", e)
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
# print("Retrieve response:", retrieve_response)
|
|
216
|
+
|
|
217
|
+
if retrieve_response and retrieve_response.id and retrieve_response.application_id:
|
|
218
|
+
application_id = retrieve_response.application_id
|
|
219
|
+
# print(" --- Updating application...", application_id)
|
|
220
|
+
application_edit_request = LegacyApplicationEdit(
|
|
221
|
+
id=application_id,
|
|
222
|
+
#
|
|
223
|
+
name=name,
|
|
224
|
+
description=description,
|
|
225
|
+
#
|
|
226
|
+
flags=legacy_application_flags,
|
|
227
|
+
#
|
|
228
|
+
data=legacy_application_data,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# print(" --- application_edit_request:", application_edit_request)
|
|
232
|
+
|
|
233
|
+
response = authed_api()(
|
|
234
|
+
method="PUT",
|
|
235
|
+
endpoint=f"/preview/legacy/applications/{application_id}",
|
|
236
|
+
json={
|
|
237
|
+
"application": application_edit_request.model_dump(
|
|
238
|
+
mode="json",
|
|
239
|
+
exclude_none=True,
|
|
240
|
+
)
|
|
241
|
+
},
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# print(" --- response:", response.status_code, response.text)
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
response.raise_for_status()
|
|
248
|
+
except Exception as e:
|
|
249
|
+
print("[ERROR]: Failed to update application:", e)
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
else:
|
|
253
|
+
# print(" --- Creating application...")
|
|
254
|
+
application_create_request = LegacyApplicationCreate(
|
|
255
|
+
slug=application_slug or uuid4().hex[-12:],
|
|
256
|
+
#
|
|
257
|
+
name=name,
|
|
258
|
+
description=description,
|
|
259
|
+
#
|
|
260
|
+
flags=legacy_application_flags,
|
|
261
|
+
#
|
|
262
|
+
data=legacy_application_data,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# print(" --- application_create_request:", application_create_request)
|
|
266
|
+
|
|
267
|
+
response = authed_api()(
|
|
268
|
+
method="POST",
|
|
269
|
+
endpoint="/preview/legacy/applications/",
|
|
270
|
+
json={
|
|
271
|
+
"application": application_create_request.model_dump(
|
|
272
|
+
mode="json",
|
|
273
|
+
exclude_none=True,
|
|
274
|
+
)
|
|
275
|
+
},
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# print(" --- response:", response.status_code, response.text)
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
response.raise_for_status()
|
|
282
|
+
except Exception as e:
|
|
283
|
+
print("[ERROR]: Failed to create application:", e)
|
|
284
|
+
return None
|
|
285
|
+
|
|
286
|
+
application_response = LegacyApplicationResponse(**response.json())
|
|
287
|
+
|
|
288
|
+
application = application_response.application
|
|
289
|
+
|
|
290
|
+
if not application or not application.id:
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
# print(" --- application:", application)
|
|
294
|
+
|
|
295
|
+
application_revision = await _retrieve_application(
|
|
296
|
+
application_id=application.id,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
if not application_revision or not application_revision.id:
|
|
300
|
+
return None
|
|
301
|
+
|
|
302
|
+
# print(application_revision, "----------")
|
|
303
|
+
|
|
304
|
+
return application_revision.id
|
agenta/sdk/managers/config.py
CHANGED
|
@@ -7,7 +7,7 @@ from pydantic import BaseModel
|
|
|
7
7
|
|
|
8
8
|
from agenta.sdk.utils.logging import get_module_logger
|
|
9
9
|
from agenta.sdk.managers.shared import SharedManager
|
|
10
|
-
from agenta.sdk.
|
|
10
|
+
from agenta.sdk.contexts.routing import RoutingContext
|
|
11
11
|
|
|
12
12
|
T = TypeVar("T", bound=BaseModel)
|
|
13
13
|
|
|
@@ -45,7 +45,7 @@ class ConfigManager:
|
|
|
45
45
|
Only one of these should be provided.
|
|
46
46
|
"""
|
|
47
47
|
|
|
48
|
-
context =
|
|
48
|
+
context = RoutingContext.get()
|
|
49
49
|
|
|
50
50
|
parameters = context.parameters
|
|
51
51
|
|
|
File without changes
|