arize-phoenix 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +124 -72
- arize_phoenix-12.28.1.dist-info/RECORD +499 -0
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
- phoenix/__generated__/__init__.py +0 -0
- phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
- phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
- phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
- phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
- phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
- phoenix/__init__.py +5 -4
- phoenix/auth.py +39 -2
- phoenix/config.py +1763 -91
- phoenix/datetime_utils.py +120 -2
- phoenix/db/README.md +595 -25
- phoenix/db/bulk_inserter.py +145 -103
- phoenix/db/engines.py +140 -33
- phoenix/db/enums.py +3 -12
- phoenix/db/facilitator.py +302 -35
- phoenix/db/helpers.py +1000 -65
- phoenix/db/iam_auth.py +64 -0
- phoenix/db/insertion/dataset.py +135 -2
- phoenix/db/insertion/document_annotation.py +9 -6
- phoenix/db/insertion/evaluation.py +2 -3
- phoenix/db/insertion/helpers.py +17 -2
- phoenix/db/insertion/session_annotation.py +176 -0
- phoenix/db/insertion/span.py +15 -11
- phoenix/db/insertion/span_annotation.py +3 -4
- phoenix/db/insertion/trace_annotation.py +3 -4
- phoenix/db/insertion/types.py +50 -20
- phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
- phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
- phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
- phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
- phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
- phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
- phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
- phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
- phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
- phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
- phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
- phoenix/db/models.py +669 -56
- phoenix/db/pg_config.py +10 -0
- phoenix/db/types/model_provider.py +4 -0
- phoenix/db/types/token_price_customization.py +29 -0
- phoenix/db/types/trace_retention.py +23 -15
- phoenix/experiments/evaluators/utils.py +3 -3
- phoenix/experiments/functions.py +160 -52
- phoenix/experiments/tracing.py +2 -2
- phoenix/experiments/types.py +1 -1
- phoenix/inferences/inferences.py +1 -2
- phoenix/server/api/auth.py +38 -7
- phoenix/server/api/auth_messages.py +46 -0
- phoenix/server/api/context.py +100 -4
- phoenix/server/api/dataloaders/__init__.py +79 -5
- phoenix/server/api/dataloaders/annotation_configs_by_project.py +31 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
- phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
- phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
- phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
- phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
- phoenix/server/api/dataloaders/dataset_labels.py +36 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
- phoenix/server/api/dataloaders/document_evaluations.py +6 -9
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
- phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
- phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
- phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
- phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
- phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
- phoenix/server/api/dataloaders/record_counts.py +37 -10
- phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
- phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
- phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
- phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +57 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project.py +152 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
- phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
- phoenix/server/api/dataloaders/span_costs.py +29 -0
- phoenix/server/api/dataloaders/table_fields.py +2 -2
- phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
- phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
- phoenix/server/api/dataloaders/types.py +29 -0
- phoenix/server/api/exceptions.py +11 -1
- phoenix/server/api/helpers/dataset_helpers.py +5 -1
- phoenix/server/api/helpers/playground_clients.py +1243 -292
- phoenix/server/api/helpers/playground_registry.py +2 -2
- phoenix/server/api/helpers/playground_spans.py +8 -4
- phoenix/server/api/helpers/playground_users.py +26 -0
- phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
- phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
- phoenix/server/api/helpers/prompts/models.py +205 -22
- phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
- phoenix/server/api/input_types/ChatCompletionInput.py +6 -2
- phoenix/server/api/input_types/CreateProjectInput.py +27 -0
- phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
- phoenix/server/api/input_types/DatasetFilter.py +17 -0
- phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
- phoenix/server/api/input_types/GenerativeCredentialInput.py +9 -0
- phoenix/server/api/input_types/GenerativeModelInput.py +5 -0
- phoenix/server/api/input_types/ProjectSessionSort.py +161 -1
- phoenix/server/api/input_types/PromptFilter.py +14 -0
- phoenix/server/api/input_types/PromptVersionInput.py +52 -1
- phoenix/server/api/input_types/SpanSort.py +44 -7
- phoenix/server/api/input_types/TimeBinConfig.py +23 -0
- phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
- phoenix/server/api/input_types/UserRoleInput.py +1 -0
- phoenix/server/api/mutations/__init__.py +10 -0
- phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
- phoenix/server/api/mutations/api_key_mutations.py +19 -23
- phoenix/server/api/mutations/chat_mutations.py +154 -47
- phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
- phoenix/server/api/mutations/dataset_mutations.py +21 -16
- phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
- phoenix/server/api/mutations/experiment_mutations.py +2 -2
- phoenix/server/api/mutations/export_events_mutations.py +3 -3
- phoenix/server/api/mutations/model_mutations.py +210 -0
- phoenix/server/api/mutations/project_mutations.py +49 -10
- phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
- phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
- phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
- phoenix/server/api/mutations/prompt_mutations.py +65 -129
- phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
- phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
- phoenix/server/api/mutations/trace_annotations_mutations.py +14 -10
- phoenix/server/api/mutations/trace_mutations.py +47 -3
- phoenix/server/api/mutations/user_mutations.py +66 -41
- phoenix/server/api/queries.py +768 -293
- phoenix/server/api/routers/__init__.py +2 -2
- phoenix/server/api/routers/auth.py +154 -88
- phoenix/server/api/routers/ldap.py +229 -0
- phoenix/server/api/routers/oauth2.py +369 -106
- phoenix/server/api/routers/v1/__init__.py +24 -4
- phoenix/server/api/routers/v1/annotation_configs.py +23 -31
- phoenix/server/api/routers/v1/annotations.py +481 -17
- phoenix/server/api/routers/v1/datasets.py +395 -81
- phoenix/server/api/routers/v1/documents.py +142 -0
- phoenix/server/api/routers/v1/evaluations.py +24 -31
- phoenix/server/api/routers/v1/experiment_evaluations.py +19 -8
- phoenix/server/api/routers/v1/experiment_runs.py +337 -59
- phoenix/server/api/routers/v1/experiments.py +479 -48
- phoenix/server/api/routers/v1/models.py +7 -0
- phoenix/server/api/routers/v1/projects.py +18 -49
- phoenix/server/api/routers/v1/prompts.py +54 -40
- phoenix/server/api/routers/v1/sessions.py +108 -0
- phoenix/server/api/routers/v1/spans.py +1091 -81
- phoenix/server/api/routers/v1/traces.py +132 -78
- phoenix/server/api/routers/v1/users.py +389 -0
- phoenix/server/api/routers/v1/utils.py +3 -7
- phoenix/server/api/subscriptions.py +305 -88
- phoenix/server/api/types/Annotation.py +90 -23
- phoenix/server/api/types/ApiKey.py +13 -17
- phoenix/server/api/types/AuthMethod.py +1 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
- phoenix/server/api/types/CostBreakdown.py +12 -0
- phoenix/server/api/types/Dataset.py +226 -72
- phoenix/server/api/types/DatasetExample.py +88 -18
- phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
- phoenix/server/api/types/DatasetLabel.py +57 -0
- phoenix/server/api/types/DatasetSplit.py +98 -0
- phoenix/server/api/types/DatasetVersion.py +49 -4
- phoenix/server/api/types/DocumentAnnotation.py +212 -0
- phoenix/server/api/types/Experiment.py +264 -59
- phoenix/server/api/types/ExperimentComparison.py +5 -10
- phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
- phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
- phoenix/server/api/types/ExperimentRun.py +169 -65
- phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
- phoenix/server/api/types/GenerativeModel.py +245 -3
- phoenix/server/api/types/GenerativeProvider.py +70 -11
- phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
- phoenix/server/api/types/ModelInterface.py +16 -0
- phoenix/server/api/types/PlaygroundModel.py +20 -0
- phoenix/server/api/types/Project.py +1278 -216
- phoenix/server/api/types/ProjectSession.py +188 -28
- phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
- phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
- phoenix/server/api/types/Prompt.py +119 -39
- phoenix/server/api/types/PromptLabel.py +42 -25
- phoenix/server/api/types/PromptVersion.py +11 -8
- phoenix/server/api/types/PromptVersionTag.py +65 -25
- phoenix/server/api/types/ServerStatus.py +6 -0
- phoenix/server/api/types/Span.py +167 -123
- phoenix/server/api/types/SpanAnnotation.py +189 -42
- phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
- phoenix/server/api/types/SpanCostSummary.py +10 -0
- phoenix/server/api/types/SystemApiKey.py +65 -1
- phoenix/server/api/types/TokenPrice.py +16 -0
- phoenix/server/api/types/TokenUsage.py +3 -3
- phoenix/server/api/types/Trace.py +223 -51
- phoenix/server/api/types/TraceAnnotation.py +149 -50
- phoenix/server/api/types/User.py +137 -32
- phoenix/server/api/types/UserApiKey.py +73 -26
- phoenix/server/api/types/node.py +10 -0
- phoenix/server/api/types/pagination.py +11 -2
- phoenix/server/app.py +290 -45
- phoenix/server/authorization.py +38 -3
- phoenix/server/bearer_auth.py +34 -24
- phoenix/server/cost_tracking/cost_details_calculator.py +196 -0
- phoenix/server/cost_tracking/cost_model_lookup.py +179 -0
- phoenix/server/cost_tracking/helpers.py +68 -0
- phoenix/server/cost_tracking/model_cost_manifest.json +3657 -830
- phoenix/server/cost_tracking/regex_specificity.py +397 -0
- phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
- phoenix/server/daemons/__init__.py +0 -0
- phoenix/server/daemons/db_disk_usage_monitor.py +214 -0
- phoenix/server/daemons/generative_model_store.py +103 -0
- phoenix/server/daemons/span_cost_calculator.py +99 -0
- phoenix/server/dml_event.py +17 -0
- phoenix/server/dml_event_handler.py +5 -0
- phoenix/server/email/sender.py +56 -3
- phoenix/server/email/templates/db_disk_usage_notification.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/experiments/__init__.py +0 -0
- phoenix/server/experiments/utils.py +14 -0
- phoenix/server/grpc_server.py +11 -11
- phoenix/server/jwt_store.py +17 -15
- phoenix/server/ldap.py +1449 -0
- phoenix/server/main.py +26 -10
- phoenix/server/oauth2.py +330 -12
- phoenix/server/prometheus.py +66 -6
- phoenix/server/rate_limiters.py +4 -9
- phoenix/server/retention.py +33 -20
- phoenix/server/session_filters.py +49 -0
- phoenix/server/static/.vite/manifest.json +55 -51
- phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
- phoenix/server/static/assets/{index-E0M82BdE.js → index-CTQoemZv.js} +140 -56
- phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
- phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
- phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
- phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
- phoenix/server/static/assets/vendor-recharts-V9cwpXsm.js +37 -0
- phoenix/server/static/assets/vendor-shiki-Do--csgv.js +5 -0
- phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
- phoenix/server/templates/index.html +40 -6
- phoenix/server/thread_server.py +1 -2
- phoenix/server/types.py +14 -4
- phoenix/server/utils.py +74 -0
- phoenix/session/client.py +56 -3
- phoenix/session/data_extractor.py +5 -0
- phoenix/session/evaluation.py +14 -5
- phoenix/session/session.py +45 -9
- phoenix/settings.py +5 -0
- phoenix/trace/attributes.py +80 -13
- phoenix/trace/dsl/helpers.py +90 -1
- phoenix/trace/dsl/query.py +8 -6
- phoenix/trace/projects.py +5 -0
- phoenix/utilities/template_formatters.py +1 -1
- phoenix/version.py +1 -1
- arize_phoenix-10.0.4.dist-info/RECORD +0 -405
- phoenix/server/api/types/Evaluation.py +0 -39
- phoenix/server/cost_tracking/cost_lookup.py +0 -255
- phoenix/server/static/assets/components-DULKeDfL.js +0 -4365
- phoenix/server/static/assets/pages-Cl0A-0U2.js +0 -7430
- phoenix/server/static/assets/vendor-WIZid84E.css +0 -1
- phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js +0 -649
- phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js +0 -33
- phoenix/server/static/assets/vendor-oB4u9zuV.js +0 -905
- phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js +0 -59
- phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js +0 -5
- phoenix/server/static/assets/vendor-three-C5WAXd5r.js +0 -2998
- phoenix/utilities/deprecation.py +0 -31
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0
phoenix/server/authorization.py
CHANGED
|
@@ -23,8 +23,8 @@ Usage:
|
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
from fastapi import HTTPException, Request
|
|
26
|
-
from fastapi import status as fastapi_status
|
|
27
26
|
|
|
27
|
+
from phoenix.config import get_env_support_email
|
|
28
28
|
from phoenix.server.bearer_auth import PhoenixUser
|
|
29
29
|
|
|
30
30
|
|
|
@@ -42,12 +42,47 @@ def require_admin(request: Request) -> None:
|
|
|
42
42
|
Behavior:
|
|
43
43
|
- Allows access if the authenticated user is an admin or a system user.
|
|
44
44
|
- Raises HTTP 403 Forbidden if the user is not authorized.
|
|
45
|
-
-
|
|
45
|
+
- Allows access if authentication is not enabled.
|
|
46
46
|
"""
|
|
47
|
+
if not request.app.state.authentication_enabled:
|
|
48
|
+
return
|
|
47
49
|
user = getattr(request, "user", None)
|
|
48
50
|
# System users have all privileges
|
|
49
51
|
if not (isinstance(user, PhoenixUser) and user.is_admin):
|
|
50
52
|
raise HTTPException(
|
|
51
|
-
status_code=
|
|
53
|
+
status_code=403,
|
|
52
54
|
detail="Only admin or system users can perform this action.",
|
|
53
55
|
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def is_not_locked(request: Request) -> None:
|
|
59
|
+
"""
|
|
60
|
+
FastAPI dependency to ensure database operations are not locked due to insufficient storage.
|
|
61
|
+
|
|
62
|
+
This dependency checks if data insertion and update operations are disabled due to
|
|
63
|
+
storage capacity limits. When storage thresholds are exceeded, it raises an HTTP 507
|
|
64
|
+
error with actionable guidance for users.
|
|
65
|
+
|
|
66
|
+
Usage:
|
|
67
|
+
Add as a dependency to any route that modifies data:
|
|
68
|
+
|
|
69
|
+
@router.post("/create-data", dependencies=[Depends(is_not_locked)])
|
|
70
|
+
async def create_data(...):
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
Raises:
|
|
74
|
+
HTTPException: HTTP 507 Insufficient Storage when database operations are locked.
|
|
75
|
+
The error includes guidance on resolving storage issues and support contact
|
|
76
|
+
information if configured.
|
|
77
|
+
"""
|
|
78
|
+
if request.app.state.db.should_not_insert_or_update:
|
|
79
|
+
detail = (
|
|
80
|
+
"Database operations are disabled due to insufficient storage. "
|
|
81
|
+
"Please delete old data or increase storage."
|
|
82
|
+
)
|
|
83
|
+
if support_email := get_env_support_email():
|
|
84
|
+
detail += f" Need help? Contact us at {support_email}"
|
|
85
|
+
raise HTTPException(
|
|
86
|
+
status_code=507,
|
|
87
|
+
detail=detail,
|
|
88
|
+
)
|
phoenix/server/bearer_auth.py
CHANGED
|
@@ -7,10 +7,9 @@ from typing import Any, Optional, cast
|
|
|
7
7
|
import grpc
|
|
8
8
|
from fastapi import HTTPException, Request, WebSocket, WebSocketException
|
|
9
9
|
from grpc_interceptor import AsyncServerInterceptor
|
|
10
|
-
from grpc_interceptor.exceptions import Unauthenticated
|
|
11
10
|
from starlette.authentication import AuthCredentials, AuthenticationBackend, BaseUser
|
|
12
11
|
from starlette.requests import HTTPConnection
|
|
13
|
-
from
|
|
12
|
+
from typing_extensions import override
|
|
14
13
|
|
|
15
14
|
from phoenix import config
|
|
16
15
|
from phoenix.auth import (
|
|
@@ -20,7 +19,7 @@ from phoenix.auth import (
|
|
|
20
19
|
Token,
|
|
21
20
|
)
|
|
22
21
|
from phoenix.config import get_env_phoenix_admin_secret
|
|
23
|
-
from phoenix.db import
|
|
22
|
+
from phoenix.db import models
|
|
24
23
|
from phoenix.server.types import (
|
|
25
24
|
AccessToken,
|
|
26
25
|
AccessTokenAttributes,
|
|
@@ -74,14 +73,20 @@ class PhoenixUser(BaseUser):
|
|
|
74
73
|
self.claims = claims
|
|
75
74
|
assert claims.attributes
|
|
76
75
|
self._is_admin = (
|
|
77
|
-
claims.status is ClaimSetStatus.VALID
|
|
78
|
-
|
|
76
|
+
claims.status is ClaimSetStatus.VALID and claims.attributes.user_role == "ADMIN"
|
|
77
|
+
)
|
|
78
|
+
self._is_viewer = (
|
|
79
|
+
claims.status is ClaimSetStatus.VALID and claims.attributes.user_role == "VIEWER"
|
|
79
80
|
)
|
|
80
81
|
|
|
81
82
|
@cached_property
|
|
82
83
|
def is_admin(self) -> bool:
|
|
83
84
|
return self._is_admin
|
|
84
85
|
|
|
86
|
+
@cached_property
|
|
87
|
+
def is_viewer(self) -> bool:
|
|
88
|
+
return self._is_viewer
|
|
89
|
+
|
|
85
90
|
@cached_property
|
|
86
91
|
def identity(self) -> UserId:
|
|
87
92
|
return self._user_id
|
|
@@ -94,6 +99,8 @@ class PhoenixUser(BaseUser):
|
|
|
94
99
|
class PhoenixSystemUser(PhoenixUser):
|
|
95
100
|
def __init__(self, user_id: UserId) -> None:
|
|
96
101
|
self._user_id = user_id
|
|
102
|
+
self._is_admin = True # System users have admin privileges
|
|
103
|
+
self._is_viewer = False # System users are not viewers
|
|
97
104
|
|
|
98
105
|
@property
|
|
99
106
|
def is_admin(self) -> bool:
|
|
@@ -101,16 +108,19 @@ class PhoenixSystemUser(PhoenixUser):
|
|
|
101
108
|
|
|
102
109
|
|
|
103
110
|
class ApiKeyInterceptor(HasTokenStore, AsyncServerInterceptor):
|
|
111
|
+
@override
|
|
104
112
|
async def intercept(
|
|
105
113
|
self,
|
|
106
|
-
method: Callable[[Any, grpc.ServicerContext], Awaitable[Any]],
|
|
114
|
+
method: Callable[[Any, grpc.aio.ServicerContext], Awaitable[Any]],
|
|
107
115
|
request_or_iterator: Any,
|
|
108
|
-
context: grpc.ServicerContext,
|
|
116
|
+
context: grpc.aio.ServicerContext,
|
|
109
117
|
method_name: str,
|
|
110
118
|
) -> Any:
|
|
111
|
-
for
|
|
112
|
-
if
|
|
113
|
-
|
|
119
|
+
for key, value in context.invocation_metadata() or ():
|
|
120
|
+
if key.lower() == "authorization":
|
|
121
|
+
if isinstance(value, bytes):
|
|
122
|
+
value = value.decode("utf-8")
|
|
123
|
+
scheme, _, token = value.partition(" ")
|
|
114
124
|
if scheme.lower() != "bearer" or not token:
|
|
115
125
|
break
|
|
116
126
|
if (
|
|
@@ -120,16 +130,16 @@ class ApiKeyInterceptor(HasTokenStore, AsyncServerInterceptor):
|
|
|
120
130
|
):
|
|
121
131
|
return await method(request_or_iterator, context)
|
|
122
132
|
claims = await self._token_store.read(Token(token))
|
|
123
|
-
if
|
|
133
|
+
if (
|
|
134
|
+
not (
|
|
135
|
+
isinstance(claims, (ApiKeyClaims, AccessTokenClaims))
|
|
136
|
+
and isinstance(claims.subject, UserId)
|
|
137
|
+
)
|
|
138
|
+
or claims.status is not ClaimSetStatus.VALID
|
|
139
|
+
):
|
|
124
140
|
break
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
if claims.status is ClaimSetStatus.EXPIRED:
|
|
128
|
-
raise Unauthenticated(details="Expired token")
|
|
129
|
-
if claims.status is ClaimSetStatus.VALID:
|
|
130
|
-
return await method(request_or_iterator, context)
|
|
131
|
-
raise Unauthenticated()
|
|
132
|
-
raise Unauthenticated()
|
|
141
|
+
return await method(request_or_iterator, context)
|
|
142
|
+
await context.abort(grpc.StatusCode.UNAUTHENTICATED)
|
|
133
143
|
|
|
134
144
|
|
|
135
145
|
async def is_authenticated(
|
|
@@ -142,16 +152,16 @@ async def is_authenticated(
|
|
|
142
152
|
"""
|
|
143
153
|
assert request or websocket
|
|
144
154
|
if request and not isinstance((user := request.user), PhoenixUser):
|
|
145
|
-
raise HTTPException(status_code=
|
|
155
|
+
raise HTTPException(status_code=401, detail="Invalid token")
|
|
146
156
|
if websocket and not isinstance((user := websocket.user), PhoenixUser):
|
|
147
|
-
raise WebSocketException(code=
|
|
157
|
+
raise WebSocketException(code=401, reason="Invalid token")
|
|
148
158
|
if isinstance(user, PhoenixSystemUser):
|
|
149
159
|
return
|
|
150
160
|
claims = user.claims
|
|
151
161
|
if claims.status is ClaimSetStatus.EXPIRED:
|
|
152
|
-
raise HTTPException(status_code=
|
|
162
|
+
raise HTTPException(status_code=401, detail="Expired token")
|
|
153
163
|
if claims.status is not ClaimSetStatus.VALID:
|
|
154
|
-
raise HTTPException(status_code=
|
|
164
|
+
raise HTTPException(status_code=401, detail="Invalid token")
|
|
155
165
|
|
|
156
166
|
|
|
157
167
|
async def create_access_and_refresh_tokens(
|
|
@@ -163,7 +173,7 @@ async def create_access_and_refresh_tokens(
|
|
|
163
173
|
) -> tuple[AccessToken, RefreshToken]:
|
|
164
174
|
issued_at = datetime.now(timezone.utc)
|
|
165
175
|
user_id = UserId(user.id)
|
|
166
|
-
user_role =
|
|
176
|
+
user_role = user.role.name
|
|
167
177
|
refresh_token_claims = RefreshTokenClaims(
|
|
168
178
|
subject=user_id,
|
|
169
179
|
issued_at=issued_at,
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
from itertools import chain
|
|
2
|
+
from typing import Any, Iterable, Mapping, Optional
|
|
3
|
+
|
|
4
|
+
from typing_extensions import TypeAlias
|
|
5
|
+
|
|
6
|
+
from phoenix.db import models
|
|
7
|
+
from phoenix.server.cost_tracking.helpers import get_aggregated_tokens
|
|
8
|
+
from phoenix.server.cost_tracking.token_cost_calculator import (
|
|
9
|
+
TokenCostCalculator,
|
|
10
|
+
create_token_cost_calculator,
|
|
11
|
+
)
|
|
12
|
+
from phoenix.trace.attributes import get_attribute_value
|
|
13
|
+
|
|
14
|
+
_TokenType: TypeAlias = str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SpanCostDetailsCalculator:
|
|
18
|
+
"""
|
|
19
|
+
Calculates detailed cost breakdowns for LLM spans based on token usage and pricing.
|
|
20
|
+
|
|
21
|
+
This calculator processes both detailed token counts (from span attributes) and
|
|
22
|
+
aggregated token totals to provide comprehensive cost analysis for prompt and
|
|
23
|
+
completion tokens. It handles multiple token types (e.g., "input", "output",
|
|
24
|
+
"image", "audio", "video", "document", "reasoning", etc.) and calculates costs
|
|
25
|
+
using configured pricing models with fallback behavior.
|
|
26
|
+
|
|
27
|
+
**Fallback Behavior:**
|
|
28
|
+
- If a specific token type has a configured calculator, it uses that calculator
|
|
29
|
+
- If no specific calculator exists, it falls back to the default calculator:
|
|
30
|
+
- Prompt tokens (is_prompt=True) fall back to "input" calculator
|
|
31
|
+
- Completion tokens (is_prompt=False) fall back to "output" calculator
|
|
32
|
+
|
|
33
|
+
This ensures all token types get cost calculations even if not explicitly configured.
|
|
34
|
+
|
|
35
|
+
The calculator expects token prices to include at least:
|
|
36
|
+
- An "input" token type for prompt tokens (used as fallback for unconfigured prompt token types)
|
|
37
|
+
- An "output" token type for completion tokens (used as fallback for unconfigured completion token types)
|
|
38
|
+
|
|
39
|
+
Additional token types can be configured for more granular cost tracking.
|
|
40
|
+
""" # noqa: E501
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
prices: Iterable[models.TokenPrice],
|
|
45
|
+
) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Initialize the cost calculator with token pricing configuration.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
prices: Collection of token price configurations defining rates for
|
|
51
|
+
different token types and whether they're prompt or completion tokens.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ValueError: If required "input" (prompt) or "output" (completion)
|
|
55
|
+
token types are missing from the pricing configuration.
|
|
56
|
+
"""
|
|
57
|
+
# Create calculators for prompt token types (is_prompt=True)
|
|
58
|
+
self._prompt: Mapping[_TokenType, TokenCostCalculator] = {
|
|
59
|
+
p.token_type: create_token_cost_calculator(p.base_rate, p.customization)
|
|
60
|
+
for p in prices
|
|
61
|
+
if p.is_prompt
|
|
62
|
+
}
|
|
63
|
+
if self._prompt and "input" not in self._prompt:
|
|
64
|
+
raise ValueError("Token prices for prompt must include an 'input' token type")
|
|
65
|
+
|
|
66
|
+
# Create calculators for completion token types (is_prompt=False)
|
|
67
|
+
self._completion: Mapping[_TokenType, TokenCostCalculator] = {
|
|
68
|
+
p.token_type: create_token_cost_calculator(p.base_rate, p.customization)
|
|
69
|
+
for p in prices
|
|
70
|
+
if not p.is_prompt
|
|
71
|
+
}
|
|
72
|
+
if self._completion and "output" not in self._completion:
|
|
73
|
+
raise ValueError("Token prices for completion must include an 'output' token type")
|
|
74
|
+
|
|
75
|
+
def calculate_details(
|
|
76
|
+
self,
|
|
77
|
+
attributes: Mapping[str, Any],
|
|
78
|
+
) -> list[models.SpanCostDetail]:
|
|
79
|
+
"""
|
|
80
|
+
Calculate detailed cost breakdown for a given span.
|
|
81
|
+
|
|
82
|
+
This method processes token usage in two phases:
|
|
83
|
+
1. **Detailed token processing**: Extracts specific token counts from span attributes
|
|
84
|
+
(e.g., "llm.token_count.prompt_details", "llm.token_count.completion_details")
|
|
85
|
+
and calculates costs for each token type found. Uses fallback behavior for
|
|
86
|
+
token types without specific calculators.
|
|
87
|
+
|
|
88
|
+
2. **Aggregated token processing**: For default token types ("input"/"output") that
|
|
89
|
+
weren't found in detailed processing, calculates remaining tokens by subtracting
|
|
90
|
+
detailed counts from total aggregated tokens.
|
|
91
|
+
|
|
92
|
+
**Fallback Calculation Logic:**
|
|
93
|
+
- For each token type in detailed processing:
|
|
94
|
+
- If a specific calculator exists for the token type, use it
|
|
95
|
+
- Otherwise, fall back to the default calculator ("input" for prompt tokens,
|
|
96
|
+
"output" for completion tokens)
|
|
97
|
+
- This ensures all token types receive cost calculations regardless of
|
|
98
|
+
specific calculator configuration
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
attributes: Dictionary containing span attributes with token usage data.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of SpanCostDetail objects containing token counts, costs, and cost-per-token
|
|
105
|
+
for each token type found in the span.
|
|
106
|
+
|
|
107
|
+
Note:
|
|
108
|
+
- Token counts are validated and converted to non-negative integers
|
|
109
|
+
- All token types receive cost calculations via fallback mechanism
|
|
110
|
+
- Cost-per-token is calculated only when both cost and token count are positive
|
|
111
|
+
- If cost is 0.0, cost-per-token will be None (not 0.0) due to falsy evaluation
|
|
112
|
+
"""
|
|
113
|
+
prompt_details: dict[_TokenType, models.SpanCostDetail] = {}
|
|
114
|
+
completion_details: dict[_TokenType, models.SpanCostDetail] = {}
|
|
115
|
+
calculator: Optional[TokenCostCalculator]
|
|
116
|
+
cost: Optional[float]
|
|
117
|
+
cost_per_token: Optional[float]
|
|
118
|
+
|
|
119
|
+
# Phase 1: Process detailed token counts from span attributes
|
|
120
|
+
for is_prompt, prefix, calculators, results in (
|
|
121
|
+
(True, "prompt", self._prompt, prompt_details),
|
|
122
|
+
(False, "completion", self._completion, completion_details),
|
|
123
|
+
):
|
|
124
|
+
# Extract detailed token counts from span attributes
|
|
125
|
+
details = get_attribute_value(attributes, f"llm.token_count.{prefix}_details")
|
|
126
|
+
if isinstance(details, dict) and details:
|
|
127
|
+
for token_type, token_count in details.items():
|
|
128
|
+
# Validate token count is numeric
|
|
129
|
+
if not isinstance(token_count, (int, float)):
|
|
130
|
+
continue
|
|
131
|
+
tokens = max(0, int(token_count))
|
|
132
|
+
|
|
133
|
+
# Calculate cost using specific calculator or fallback to default
|
|
134
|
+
calculator = None
|
|
135
|
+
calculator_key = "input" if is_prompt else "output"
|
|
136
|
+
if token_type in calculators:
|
|
137
|
+
# Use specific calculator for this token type
|
|
138
|
+
calculator = calculators[token_type]
|
|
139
|
+
elif calculator_key in calculators:
|
|
140
|
+
calculator = calculators[calculator_key]
|
|
141
|
+
|
|
142
|
+
cost = None
|
|
143
|
+
cost_per_token = None
|
|
144
|
+
if calculator:
|
|
145
|
+
cost = calculator.calculate_cost(attributes, tokens)
|
|
146
|
+
cost_per_token = cost / tokens if tokens else None
|
|
147
|
+
|
|
148
|
+
detail = models.SpanCostDetail(
|
|
149
|
+
token_type=token_type,
|
|
150
|
+
is_prompt=is_prompt,
|
|
151
|
+
tokens=tokens,
|
|
152
|
+
cost=cost,
|
|
153
|
+
cost_per_token=cost_per_token,
|
|
154
|
+
)
|
|
155
|
+
results[token_type] = detail
|
|
156
|
+
|
|
157
|
+
# Get aggregated token totals for fallback calculations
|
|
158
|
+
prompt_tokens, completion_tokens, _ = get_aggregated_tokens(attributes)
|
|
159
|
+
|
|
160
|
+
# Phase 2: Process remaining tokens for default token types
|
|
161
|
+
for is_prompt, token_type, total, calculators, results in (
|
|
162
|
+
(True, "input", prompt_tokens, self._prompt, prompt_details),
|
|
163
|
+
(False, "output", completion_tokens, self._completion, completion_details),
|
|
164
|
+
):
|
|
165
|
+
# Skip if this token type was already processed in detailed phase
|
|
166
|
+
if token_type in results:
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
# Calculate remaining tokens by subtracting detailed counts from total
|
|
170
|
+
tokens = total - sum(
|
|
171
|
+
int(d.tokens or 0) for d in results.values() if d.is_prompt == is_prompt
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Skip if no remaining tokens or negative (shouldn't happen with valid data)
|
|
175
|
+
if tokens <= 0:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# Calculate cost using calculator if available
|
|
179
|
+
cost = None
|
|
180
|
+
cost_per_token = None
|
|
181
|
+
if token_type in calculators:
|
|
182
|
+
calculator = calculators[token_type]
|
|
183
|
+
cost = calculator.calculate_cost(attributes, tokens)
|
|
184
|
+
cost_per_token = cost / tokens if tokens else None
|
|
185
|
+
|
|
186
|
+
detail = models.SpanCostDetail(
|
|
187
|
+
token_type=token_type,
|
|
188
|
+
is_prompt=is_prompt,
|
|
189
|
+
tokens=tokens,
|
|
190
|
+
cost=cost,
|
|
191
|
+
cost_per_token=cost_per_token,
|
|
192
|
+
)
|
|
193
|
+
results[token_type] = detail
|
|
194
|
+
|
|
195
|
+
# Return combined results from both prompt and completion processing
|
|
196
|
+
return list(chain(prompt_details.values(), completion_details.values()))
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Any, Iterable, Mapping, Optional
|
|
3
|
+
|
|
4
|
+
from openinference.semconv.trace import SpanAttributes
|
|
5
|
+
from typing_extensions import TypeAlias
|
|
6
|
+
|
|
7
|
+
from phoenix.datetime_utils import is_timezone_aware
|
|
8
|
+
from phoenix.db import models
|
|
9
|
+
from phoenix.server.cost_tracking import regex_specificity
|
|
10
|
+
from phoenix.trace.attributes import get_attribute_value
|
|
11
|
+
|
|
12
|
+
_RegexPatternStr: TypeAlias = str
|
|
13
|
+
_RegexSpecificityScore: TypeAlias = int
|
|
14
|
+
_TieBreakerId: TypeAlias = int
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CostModelLookup:
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
generative_models: Iterable[models.GenerativeModel] = (),
|
|
21
|
+
) -> None:
|
|
22
|
+
self._models_by_id: dict[int, models.GenerativeModel] = {}
|
|
23
|
+
self._model_priority: dict[
|
|
24
|
+
int, tuple[_RegexSpecificityScore, float, _TieBreakerId]
|
|
25
|
+
] = {} # higher is better
|
|
26
|
+
|
|
27
|
+
for m in generative_models:
|
|
28
|
+
self._add_or_update_model(m)
|
|
29
|
+
|
|
30
|
+
def _add_or_update_model(self, model: models.GenerativeModel) -> None:
|
|
31
|
+
"""Add or update a single model in the lookup."""
|
|
32
|
+
self._models_by_id[model.id] = model
|
|
33
|
+
|
|
34
|
+
specificity_score = regex_specificity.score(model.name_pattern)
|
|
35
|
+
|
|
36
|
+
# For built-in models, use negative ID so that earlier IDs win
|
|
37
|
+
# For user-defined models, use positive ID so later IDs win
|
|
38
|
+
tie_breaker = -model.id if model.is_built_in else model.id
|
|
39
|
+
|
|
40
|
+
self._model_priority[model.id] = (
|
|
41
|
+
specificity_score,
|
|
42
|
+
model.start_time.timestamp() if model.start_time else 0.0,
|
|
43
|
+
tie_breaker,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def _remove_model(self, model_id: int) -> None:
|
|
47
|
+
"""Remove a model from the lookup."""
|
|
48
|
+
if model_id in self._models_by_id:
|
|
49
|
+
del self._models_by_id[model_id]
|
|
50
|
+
if model_id in self._model_priority:
|
|
51
|
+
del self._model_priority[model_id]
|
|
52
|
+
|
|
53
|
+
def merge(self, models: Iterable[models.GenerativeModel]) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Merge a collection of models into the existing lookup.
|
|
56
|
+
|
|
57
|
+
For each model:
|
|
58
|
+
- If deleted_at is set, remove it from the lookup
|
|
59
|
+
- Otherwise, add or update it in the lookup
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
models: An iterable of GenerativeModel objects to merge
|
|
63
|
+
"""
|
|
64
|
+
for model in models:
|
|
65
|
+
if model.deleted_at is not None:
|
|
66
|
+
self._remove_model(model.id)
|
|
67
|
+
else:
|
|
68
|
+
self._add_or_update_model(model)
|
|
69
|
+
|
|
70
|
+
def find_model(
|
|
71
|
+
self,
|
|
72
|
+
start_time: datetime,
|
|
73
|
+
attributes: Mapping[str, Any],
|
|
74
|
+
) -> Optional[models.GenerativeModel]:
|
|
75
|
+
"""
|
|
76
|
+
Find the most appropriate generative model for cost tracking based on attributes and time.
|
|
77
|
+
|
|
78
|
+
This method implements a sophisticated model lookup system that filters and prioritizes
|
|
79
|
+
generative models based on the provided attributes and timestamp. The lookup follows
|
|
80
|
+
a specific priority hierarchy to ensure consistent and predictable model selection.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
start_time: The timestamp for which to find a model. Must be timezone-aware.
|
|
84
|
+
Models with start_time greater than this value will be excluded.
|
|
85
|
+
attributes: A mapping containing span attributes. Must include:
|
|
86
|
+
- SpanAttributes.LLM_MODEL_NAME: The name of the LLM model to match
|
|
87
|
+
- SpanAttributes.LLM_PROVIDER: (Optional) The provider of the LLM model
|
|
88
|
+
|
|
89
|
+
Raises:
|
|
90
|
+
TypeError: If start_time is not timezone-aware (tzinfo is None)
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The most appropriate GenerativeModel that matches the criteria, or None if no
|
|
94
|
+
suitable model is found.
|
|
95
|
+
|
|
96
|
+
Model Selection Logic:
|
|
97
|
+
1. **Input Validation**: Returns None if model name is empty or whitespace-only
|
|
98
|
+
2. **Time and Regex Filtering**: Only models that satisfy both conditions:
|
|
99
|
+
- start_time <= start_time or start_time=None (active models)
|
|
100
|
+
- name_pattern regex matches the model name from attributes
|
|
101
|
+
3. **Early Return Optimization**: If only one candidate remains, return it immediately
|
|
102
|
+
4. **Two-Tier Priority System**: Models are processed in tiers:
|
|
103
|
+
- User-defined models (is_built_in=False) are processed first
|
|
104
|
+
- Built-in models (is_built_in=True) are processed second
|
|
105
|
+
- If a tier has only one model, return it immediately
|
|
106
|
+
5. **Provider Filtering**: Within each tier, if provider is specified:
|
|
107
|
+
- Prefer models with matching provider
|
|
108
|
+
- Fall back to provider-agnostic models if no provider-specific matches exist
|
|
109
|
+
6. **Priority Selection**: Select the model with the highest priority tuple:
|
|
110
|
+
(regex_specificity_score, start_time.timestamp, tie_breaker)
|
|
111
|
+
|
|
112
|
+
Priority Tuple Components:
|
|
113
|
+
- regex_specificity_score: More specific regex patterns have higher priority
|
|
114
|
+
- start_time.timestamp: Models with later start times have higher priority
|
|
115
|
+
- tie_breaker: For built-in models, uses negative ID (lower IDs win);
|
|
116
|
+
for user-defined models, uses positive ID (higher IDs win)
|
|
117
|
+
|
|
118
|
+
Examples:
|
|
119
|
+
>>> lookup = CostModelLookup([model1, model2, model3])
|
|
120
|
+
>>> model = lookup.find_model(
|
|
121
|
+
... start_time=datetime(2024, 1, 1, tzinfo=timezone.utc),
|
|
122
|
+
... attributes={"llm": {"model_name": "gpt-3.5-turbo", "provider": "openai"}}
|
|
123
|
+
... )
|
|
124
|
+
""" # noqa: E501
|
|
125
|
+
# 1. extract and validate inputs
|
|
126
|
+
if not is_timezone_aware(start_time):
|
|
127
|
+
raise TypeError("start_time must be timezone-aware")
|
|
128
|
+
|
|
129
|
+
model_name = str(
|
|
130
|
+
get_attribute_value(attributes, SpanAttributes.LLM_MODEL_NAME) or ""
|
|
131
|
+
).strip()
|
|
132
|
+
if not model_name:
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
# 2. only include models that are active and match the regex pattern
|
|
136
|
+
candidates = [
|
|
137
|
+
model
|
|
138
|
+
for model in self._models_by_id.values()
|
|
139
|
+
if (not model.start_time or model.start_time <= start_time)
|
|
140
|
+
and model.name_pattern.search(model_name)
|
|
141
|
+
]
|
|
142
|
+
if not candidates:
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
# 3. early return: if only one candidate remains, return it
|
|
146
|
+
if len(candidates) == 1:
|
|
147
|
+
return candidates[0]
|
|
148
|
+
|
|
149
|
+
provider = str(get_attribute_value(attributes, SpanAttributes.LLM_PROVIDER) or "").strip()
|
|
150
|
+
|
|
151
|
+
# 4. priority-based selection: user-defined models first, then built-in models
|
|
152
|
+
for is_built_in in (False, True): # False = user-defined, True = built-in
|
|
153
|
+
# get candidates for current tier (user-defined or built-in)
|
|
154
|
+
tier_candidates = [model for model in candidates if model.is_built_in == is_built_in]
|
|
155
|
+
|
|
156
|
+
if not tier_candidates:
|
|
157
|
+
continue # try next tier
|
|
158
|
+
|
|
159
|
+
# early return: if only one candidate in this tier, return it
|
|
160
|
+
if len(tier_candidates) == 1:
|
|
161
|
+
return tier_candidates[0]
|
|
162
|
+
|
|
163
|
+
# 5. provider filtering: if provider specified, prefer provider-specific models
|
|
164
|
+
if provider:
|
|
165
|
+
provider_specific_models = [
|
|
166
|
+
model
|
|
167
|
+
for model in tier_candidates
|
|
168
|
+
if model.provider and model.provider == provider
|
|
169
|
+
]
|
|
170
|
+
# only use provider-specific models if any exist
|
|
171
|
+
# this allows fallback to provider-agnostic models when no match
|
|
172
|
+
if provider_specific_models:
|
|
173
|
+
tier_candidates = provider_specific_models
|
|
174
|
+
|
|
175
|
+
# 6. select best model in this tier
|
|
176
|
+
return max(tier_candidates, key=lambda model: self._model_priority[model.id])
|
|
177
|
+
|
|
178
|
+
# 7. no suitable model found
|
|
179
|
+
return None
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Mapping
|
|
3
|
+
|
|
4
|
+
from openinference.semconv.trace import SpanAttributes
|
|
5
|
+
from typing_extensions import TypeAlias
|
|
6
|
+
|
|
7
|
+
from phoenix.trace.attributes import get_attribute_value
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
_PromptTokens: TypeAlias = int
|
|
12
|
+
_CompletionTokens: TypeAlias = int
|
|
13
|
+
_TotalTokens: TypeAlias = int
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_aggregated_tokens(
|
|
17
|
+
attributes: Mapping[str, Any],
|
|
18
|
+
) -> tuple[_PromptTokens, _CompletionTokens, _TotalTokens]:
|
|
19
|
+
"""Return the total, prompt, and completion token counts from the span attributes."""
|
|
20
|
+
try:
|
|
21
|
+
prompt_tokens_value = get_attribute_value(
|
|
22
|
+
attributes,
|
|
23
|
+
SpanAttributes.LLM_TOKEN_COUNT_PROMPT,
|
|
24
|
+
)
|
|
25
|
+
prompt_tokens: int = (
|
|
26
|
+
0
|
|
27
|
+
if not isinstance(prompt_tokens_value, (int, float))
|
|
28
|
+
else max(0, int(prompt_tokens_value))
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
completion_tokens_value = get_attribute_value(
|
|
32
|
+
attributes,
|
|
33
|
+
SpanAttributes.LLM_TOKEN_COUNT_COMPLETION,
|
|
34
|
+
)
|
|
35
|
+
completion_tokens: int = (
|
|
36
|
+
0
|
|
37
|
+
if not isinstance(completion_tokens_value, (int, float))
|
|
38
|
+
else max(0, int(completion_tokens_value))
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
total_tokens_value = get_attribute_value(
|
|
42
|
+
attributes,
|
|
43
|
+
SpanAttributes.LLM_TOKEN_COUNT_TOTAL,
|
|
44
|
+
)
|
|
45
|
+
total_tokens: int = (
|
|
46
|
+
0
|
|
47
|
+
if not isinstance(total_tokens_value, (int, float))
|
|
48
|
+
else max(0, int(total_tokens_value))
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
assert prompt_tokens >= 0
|
|
52
|
+
assert completion_tokens >= 0
|
|
53
|
+
assert total_tokens >= 0
|
|
54
|
+
|
|
55
|
+
calculated_total = prompt_tokens + completion_tokens
|
|
56
|
+
|
|
57
|
+
if total_tokens > calculated_total:
|
|
58
|
+
if not prompt_tokens:
|
|
59
|
+
prompt_tokens = total_tokens - completion_tokens
|
|
60
|
+
else:
|
|
61
|
+
completion_tokens = total_tokens - prompt_tokens
|
|
62
|
+
else:
|
|
63
|
+
total_tokens = calculated_total
|
|
64
|
+
|
|
65
|
+
return prompt_tokens, completion_tokens, total_tokens
|
|
66
|
+
except Exception as e:
|
|
67
|
+
logger.error(f"Error getting aggregated tokens: {e}")
|
|
68
|
+
return 0, 0, 0
|