PyPI - arize-phoenix - Versions diffs - 10.14.0__py3-none-any.whl → 11.0.0__py3-none-any.whl - Mend

arize-phoenix 10.14.0py3-none-any.whl → 11.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (84) hide show

{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/METADATA +3 -2
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/RECORD +82 -50
phoenix/config.py +5 -2
phoenix/datetime_utils.py +8 -1
phoenix/db/bulk_inserter.py +40 -1
phoenix/db/facilitator.py +263 -4
phoenix/db/insertion/helpers.py +15 -0
phoenix/db/insertion/span.py +3 -1
phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
phoenix/db/models.py +267 -9
phoenix/db/types/model_provider.py +1 -0
phoenix/db/types/token_price_customization.py +29 -0
phoenix/server/api/context.py +38 -4
phoenix/server/api/dataloaders/__init__.py +41 -5
phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +58 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project.py +140 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
phoenix/server/api/dataloaders/span_costs.py +35 -0
phoenix/server/api/dataloaders/types.py +29 -0
phoenix/server/api/helpers/playground_clients.py +562 -12
phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
phoenix/server/api/helpers/prompts/models.py +67 -0
phoenix/server/api/input_types/GenerativeModelInput.py +2 -0
phoenix/server/api/input_types/ProjectSessionSort.py +3 -0
phoenix/server/api/input_types/SpanSort.py +17 -0
phoenix/server/api/mutations/__init__.py +2 -0
phoenix/server/api/mutations/chat_mutations.py +17 -0
phoenix/server/api/mutations/model_mutations.py +208 -0
phoenix/server/api/queries.py +82 -41
phoenix/server/api/routers/v1/traces.py +11 -4
phoenix/server/api/subscriptions.py +36 -2
phoenix/server/api/types/CostBreakdown.py +15 -0
phoenix/server/api/types/Experiment.py +59 -1
phoenix/server/api/types/ExperimentRun.py +58 -4
phoenix/server/api/types/GenerativeModel.py +143 -2
phoenix/server/api/types/GenerativeProvider.py +33 -20
phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
phoenix/server/api/types/ModelInterface.py +11 -0
phoenix/server/api/types/PlaygroundModel.py +10 -0
phoenix/server/api/types/Project.py +42 -0
phoenix/server/api/types/ProjectSession.py +44 -0
phoenix/server/api/types/Span.py +137 -0
phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
phoenix/server/api/types/SpanCostSummary.py +10 -0
phoenix/server/api/types/TokenPrice.py +16 -0
phoenix/server/api/types/TokenUsage.py +3 -3
phoenix/server/api/types/Trace.py +41 -0
phoenix/server/app.py +59 -0
phoenix/server/cost_tracking/cost_details_calculator.py +190 -0
phoenix/server/cost_tracking/cost_model_lookup.py +151 -0
phoenix/server/cost_tracking/helpers.py +68 -0
phoenix/server/cost_tracking/model_cost_manifest.json +59 -329
phoenix/server/cost_tracking/regex_specificity.py +397 -0
phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
phoenix/server/daemons/__init__.py +0 -0
phoenix/server/daemons/generative_model_store.py +51 -0
phoenix/server/daemons/span_cost_calculator.py +103 -0
phoenix/server/dml_event_handler.py +1 -0
phoenix/server/static/.vite/manifest.json +36 -36
phoenix/server/static/assets/components-BnK9kodr.js +5055 -0
phoenix/server/static/assets/{index-qiubV_74.js → index-S3YKLmbo.js} +13 -13
phoenix/server/static/assets/{pages-C4V07ozl.js → pages-BW6PBHZb.js} +809 -417
phoenix/server/static/assets/{vendor-Bfsiga8H.js → vendor-DqQvHbPa.js} +147 -147
phoenix/server/static/assets/{vendor-arizeai-CQOWsrzm.js → vendor-arizeai-CLX44PFA.js} +1 -1
phoenix/server/static/assets/{vendor-codemirror-CrcGVhB2.js → vendor-codemirror-Du3XyJnB.js} +1 -1
phoenix/server/static/assets/{vendor-recharts-Yyg3G-Rq.js → vendor-recharts-B2PJDrnX.js} +25 -25
phoenix/server/static/assets/{vendor-shiki-OPjag7Hm.js → vendor-shiki-CNbrFjf9.js} +1 -1
phoenix/version.py +1 -1
phoenix/server/cost_tracking/cost_lookup.py +0 -255
phoenix/server/static/assets/components-CUUWyAMo.js +0 -4509
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/WHEEL +0 -0
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/entry_points.txt +0 -0
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/licenses/LICENSE +0 -0

phoenix/server/api/types/Trace.py CHANGED Viewed

@@ -14,6 +14,7 @@ from typing_extensions import TypeAlias
 from phoenix.db import models
 from phoenix.server.api.context import Context
 from phoenix.server.api.input_types.TraceAnnotationSort import TraceAnnotationSort
+from phoenix.server.api.types.CostBreakdown import CostBreakdown
 from phoenix.server.api.types.pagination import (
     ConnectionArgs,
     CursorString,
@@ -21,6 +22,8 @@ from phoenix.server.api.types.pagination import (
 )
 from phoenix.server.api.types.SortDir import SortDir
 from phoenix.server.api.types.Span import Span
+from phoenix.server.api.types.SpanCostDetailSummaryEntry import SpanCostDetailSummaryEntry
+from phoenix.server.api.types.SpanCostSummary import SpanCostSummary
 from phoenix.server.api.types.TraceAnnotation import TraceAnnotation, to_gql_trace_annotation
 if TYPE_CHECKING:
@@ -226,6 +229,44 @@ class Trace(Node):
             annotations = await session.scalars(stmt)
         return [to_gql_trace_annotation(annotation) for annotation in annotations]
+    @strawberry.field
+    async def cost_summary(
+        self,
+        info: Info[Context, None],
+    ) -> SpanCostSummary:
+        loader = info.context.data_loaders.span_cost_summary_by_trace
+        summary = await loader.load(self.trace_rowid)
+        return SpanCostSummary(
+            prompt=CostBreakdown(
+                tokens=summary.prompt.tokens,
+                cost=summary.prompt.cost,
+            ),
+            completion=CostBreakdown(
+                tokens=summary.completion.tokens,
+                cost=summary.completion.cost,
+            ),
+            total=CostBreakdown(
+                tokens=summary.total.tokens,
+                cost=summary.total.cost,
+            ),
+        )
+    @strawberry.field
+    async def cost_detail_summary_entries(
+        self,
+        info: Info[Context, None],
+    ) -> list[SpanCostDetailSummaryEntry]:
+        loader = info.context.data_loaders.span_cost_detail_summary_entries_by_trace
+        entries = await loader.load(self.trace_rowid)
+        return [
+            SpanCostDetailSummaryEntry(
+                token_type=entry.token_type,
+                is_prompt=entry.is_prompt,
+                value=CostBreakdown(tokens=entry.value.tokens, cost=entry.value.cost),
+            )
+            for entry in entries
+        ]
 INPUT_VALUE = SpanAttributes.INPUT_VALUE.split(".")
 OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE.split(".")

phoenix/server/app.py CHANGED Viewed

@@ -85,6 +85,7 @@ from phoenix.server.api.dataloaders import (
     ExperimentRunAnnotations,
     ExperimentRunCountsDataLoader,
     ExperimentSequenceNumberDataLoader,
+    LastUsedTimesByGenerativeModelIdDataLoader,
     LatencyMsQuantileDataLoader,
     MinStartOrMaxEndTimeDataLoader,
     NumChildSpansDataLoader,
@@ -100,6 +101,18 @@ from phoenix.server.api.dataloaders import (
     SessionTraceLatencyMsQuantileDataLoader,
     SpanAnnotationsDataLoader,
     SpanByIdDataLoader,
+    SpanCostBySpanDataLoader,
+    SpanCostDetailsBySpanCostDataLoader,
+    SpanCostDetailSummaryEntriesByGenerativeModelDataLoader,
+    SpanCostDetailSummaryEntriesByProjectSessionDataLoader,
+    SpanCostDetailSummaryEntriesBySpanDataLoader,
+    SpanCostDetailSummaryEntriesByTraceDataLoader,
+    SpanCostSummaryByExperimentDataLoader,
+    SpanCostSummaryByExperimentRunDataLoader,
+    SpanCostSummaryByGenerativeModelDataLoader,
+    SpanCostSummaryByProjectDataLoader,
+    SpanCostSummaryByProjectSessionDataLoader,
+    SpanCostSummaryByTraceDataLoader,
     SpanDatasetExamplesDataLoader,
     SpanDescendantsDataLoader,
     SpanProjectsDataLoader,
@@ -120,6 +133,8 @@ from phoenix.server.api.routers import (
 from phoenix.server.api.routers.v1 import REST_API_VERSION
 from phoenix.server.api.schema import build_graphql_schema
 from phoenix.server.bearer_auth import BearerTokenAuthBackend, is_authenticated
+from phoenix.server.daemons.generative_model_store import GenerativeModelStore
+from phoenix.server.daemons.span_cost_calculator import SpanCostCalculator
 from phoenix.server.dml_event import DmlEvent
 from phoenix.server.dml_event_handler import DmlEventHandler
 from phoenix.server.email.types import EmailSender
@@ -502,6 +517,8 @@ def _lifespan(
     bulk_inserter: BulkInserter,
     dml_event_handler: DmlEventHandler,
     trace_data_sweeper: Optional[TraceDataSweeper],
+    span_cost_calculator: SpanCostCalculator,
+    generative_model_store: GenerativeModelStore,
     token_store: Optional[TokenStore] = None,
     tracer_provider: Optional["TracerProvider"] = None,
     enable_prometheus: bool = False,
@@ -536,6 +553,8 @@ def _lifespan(
             await stack.enter_async_context(dml_event_handler)
             if trace_data_sweeper:
                 await stack.enter_async_context(trace_data_sweeper)
+            await stack.enter_async_context(span_cost_calculator)
+            await stack.enter_async_context(generative_model_store)
             if scaffolder_config:
                 scaffolder = Scaffolder(
                     config=scaffolder_config,
@@ -583,6 +602,7 @@ def create_graphql_router(
     export_path: Path,
     last_updated_at: CanGetLastUpdatedAt,
     authentication_enabled: bool,
+    span_cost_calculator: SpanCostCalculator,
     corpus: Optional[Model] = None,
     cache_for_dataloaders: Optional[CacheForDataLoaders] = None,
     event_queue: CanPutItem[DmlEvent],
@@ -600,6 +620,7 @@ def create_graphql_router(
         export_path (Path): the file path to export data to for download (legacy)
         last_updated_at (CanGetLastUpdatedAt): How to get the last updated timestamp for updates.
         authentication_enabled (bool): Whether authentication is enabled.
+        span_cost_calculator (SpanCostCalculator): The span cost calculator for calculating costs.
         event_queue (CanPutItem[DmlEvent]): The event queue for DML events.
         corpus (Optional[Model], optional): the corpus for UMAP projection. Defaults to None.
         cache_for_dataloaders (Optional[CacheForDataLoaders], optional): GraphQL data loaders.
@@ -645,6 +666,9 @@ def create_graphql_router(
                 experiment_run_annotations=ExperimentRunAnnotations(db),
                 experiment_run_counts=ExperimentRunCountsDataLoader(db),
                 experiment_sequence_number=ExperimentSequenceNumberDataLoader(db),
+                last_used_times_by_generative_model_id=LastUsedTimesByGenerativeModelIdDataLoader(
+                    db
+                ),
                 latency_ms_quantile=LatencyMsQuantileDataLoader(
                     db,
                     cache_map=(
@@ -679,6 +703,31 @@ def create_graphql_router(
                 span_annotations=SpanAnnotationsDataLoader(db),
                 span_fields=TableFieldsDataLoader(db, models.Span),
                 span_by_id=SpanByIdDataLoader(db),
+                span_cost_by_span=SpanCostBySpanDataLoader(db),
+                span_cost_detail_summary_entries_by_generative_model=SpanCostDetailSummaryEntriesByGenerativeModelDataLoader(
+                    db
+                ),
+                span_cost_detail_summary_entries_by_project_session=SpanCostDetailSummaryEntriesByProjectSessionDataLoader(
+                    db
+                ),
+                span_cost_detail_summary_entries_by_span=SpanCostDetailSummaryEntriesBySpanDataLoader(
+                    db
+                ),
+                span_cost_detail_summary_entries_by_trace=SpanCostDetailSummaryEntriesByTraceDataLoader(
+                    db
+                ),
+                span_cost_details_by_span_cost=SpanCostDetailsBySpanCostDataLoader(db),
+                span_cost_detail_fields=TableFieldsDataLoader(db, models.SpanCostDetail),
+                span_cost_fields=TableFieldsDataLoader(db, models.SpanCost),
+                span_cost_summary_by_generative_model=SpanCostSummaryByGenerativeModelDataLoader(
+                    db
+                ),
+                span_cost_summary_by_project=SpanCostSummaryByProjectDataLoader(
+                    db,
+                    cache_map=cache_for_dataloaders.token_cost if cache_for_dataloaders else None,
+                ),
+                span_cost_summary_by_project_session=SpanCostSummaryByProjectSessionDataLoader(db),
+                span_cost_summary_by_trace=SpanCostSummaryByTraceDataLoader(db),
                 span_dataset_examples=SpanDatasetExamplesDataLoader(db),
                 span_descendants=SpanDescendantsDataLoader(db),
                 span_projects=SpanProjectsDataLoader(db),
@@ -698,6 +747,8 @@ def create_graphql_router(
                 project_by_name=ProjectByNameDataLoader(db),
                 users=UsersDataLoader(db),
                 user_roles=UserRolesDataLoader(db),
+                span_cost_summary_by_experiment=SpanCostSummaryByExperimentDataLoader(db),
+                span_cost_summary_by_experiment_run=SpanCostSummaryByExperimentRunDataLoader(db),
             ),
             cache_for_dataloaders=cache_for_dataloaders,
             read_only=read_only,
@@ -705,6 +756,7 @@ def create_graphql_router(
             secret=secret,
             token_store=token_store,
             email_sender=email_sender,
+            span_cost_calculator=span_cost_calculator,
         )
     return GraphQLRouter(
@@ -860,9 +912,12 @@ def create_app(
         db=db,
         dml_event_handler=dml_event_handler,
     )
+    generative_model_store = GenerativeModelStore(db)
+    span_cost_calculator = SpanCostCalculator(db, generative_model_store)
     bulk_inserter = bulk_inserter_factory(
         db,
         enable_prometheus=enable_prometheus,
+        span_cost_calculator=span_cost_calculator,
         event_queue=dml_event_handler,
         initial_batch_of_spans=initial_batch_of_spans,
         initial_batch_of_evaluations=initial_batch_of_evaluations,
@@ -904,6 +959,7 @@ def create_app(
         secret=secret,
         token_store=token_store,
         email_sender=email_sender,
+        span_cost_calculator=span_cost_calculator,
     )
     if enable_prometheus:
         from phoenix.server.prometheus import PrometheusMiddleware
@@ -918,6 +974,8 @@ def create_app(
             bulk_inserter=bulk_inserter,
             dml_event_handler=dml_event_handler,
             trace_data_sweeper=trace_data_sweeper,
+            span_cost_calculator=span_cost_calculator,
+            generative_model_store=generative_model_store,
             token_store=token_store,
             tracer_provider=tracer_provider,
             enable_prometheus=enable_prometheus,
@@ -981,6 +1039,7 @@ def create_app(
     app.state.oauth2_clients = OAuth2Clients.from_configs(oauth2_client_configs or [])
     app.state.db = db
     app.state.email_sender = email_sender
+    app.state.span_cost_calculator = span_cost_calculator
     app = _add_get_secret_method(app=app, secret=secret)
     app = _add_get_token_store_method(app=app, token_store=token_store)
     if tracer_provider:

phoenix/server/cost_tracking/cost_details_calculator.py ADDED Viewed

@@ -0,0 +1,190 @@
+from itertools import chain
+from typing import Any, Iterable, Mapping
+from typing_extensions import TypeAlias
+from phoenix.db import models
+from phoenix.server.cost_tracking.helpers import get_aggregated_tokens
+from phoenix.server.cost_tracking.token_cost_calculator import (
+    TokenCostCalculator,
+    create_token_cost_calculator,
+)
+from phoenix.trace.attributes import get_attribute_value
+_TokenType: TypeAlias = str
+class SpanCostDetailsCalculator:
+    """
+    Calculates detailed cost breakdowns for LLM spans based on token usage and pricing.
+    This calculator processes both detailed token counts (from span attributes) and
+    aggregated token totals to provide comprehensive cost analysis for prompt and
+    completion tokens. It handles multiple token types (e.g., "input", "output",
+    "image", "audio", "video", "document", "reasoning", etc.) and calculates costs
+    using configured pricing models with fallback behavior.
+    **Fallback Behavior:**
+    - If a specific token type has a configured calculator, it uses that calculator
+    - If no specific calculator exists, it falls back to the default calculator:
+      - Prompt tokens (is_prompt=True) fall back to "input" calculator
+      - Completion tokens (is_prompt=False) fall back to "output" calculator
+    This ensures all token types get cost calculations even if not explicitly configured.
+    The calculator expects token prices to include at least:
+    - An "input" token type for prompt tokens (used as fallback for unconfigured prompt token types)
+    - An "output" token type for completion tokens (used as fallback for unconfigured completion token types)
+    Additional token types can be configured for more granular cost tracking.
+    """  # noqa: E501
+    def __init__(
+        self,
+        prices: Iterable[models.TokenPrice],
+    ) -> None:
+        """
+        Initialize the cost calculator with token pricing configuration.
+        Args:
+            prices: Collection of token price configurations defining rates for
+                   different token types and whether they're prompt or completion tokens.
+        Raises:
+            ValueError: If required "input" (prompt) or "output" (completion)
+                       token types are missing from the pricing configuration.
+        """
+        # Create calculators for prompt token types (is_prompt=True)
+        self._prompt: Mapping[_TokenType, TokenCostCalculator] = {
+            p.token_type: create_token_cost_calculator(p.base_rate, p.customization)
+            for p in prices
+            if p.is_prompt
+        }
+        if "input" not in self._prompt:
+            raise ValueError("Token prices for prompt must include an 'input' token type")
+        # Create calculators for completion token types (is_prompt=False)
+        self._completion: Mapping[_TokenType, TokenCostCalculator] = {
+            p.token_type: create_token_cost_calculator(p.base_rate, p.customization)
+            for p in prices
+            if not p.is_prompt
+        }
+        if "output" not in self._completion:
+            raise ValueError("Token prices for completion must include an 'output' token type")
+    def calculate_details(
+        self,
+        attributes: Mapping[str, Any],
+    ) -> list[models.SpanCostDetail]:
+        """
+        Calculate detailed cost breakdown for a given span.
+        This method processes token usage in two phases:
+        1. **Detailed token processing**: Extracts specific token counts from span attributes
+           (e.g., "llm.token_count.prompt_details", "llm.token_count.completion_details")
+           and calculates costs for each token type found. Uses fallback behavior for
+           token types without specific calculators.
+        2. **Aggregated token processing**: For default token types ("input"/"output") that
+           weren't found in detailed processing, calculates remaining tokens by subtracting
+           detailed counts from total aggregated tokens.
+        **Fallback Calculation Logic:**
+        - For each token type in detailed processing:
+          - If a specific calculator exists for the token type, use it
+          - Otherwise, fall back to the default calculator ("input" for prompt tokens,
+            "output" for completion tokens)
+        - This ensures all token types receive cost calculations regardless of
+          specific calculator configuration
+        Args:
+            attributes: Dictionary containing span attributes with token usage data.
+        Returns:
+            List of SpanCostDetail objects containing token counts, costs, and cost-per-token
+            for each token type found in the span.
+        Note:
+            - Token counts are validated and converted to non-negative integers
+            - All token types receive cost calculations via fallback mechanism
+            - Cost-per-token is calculated only when both cost and token count are positive
+            - If cost is 0.0, cost-per-token will be None (not 0.0) due to falsy evaluation
+        """
+        prompt_details: dict[_TokenType, models.SpanCostDetail] = {}
+        completion_details: dict[_TokenType, models.SpanCostDetail] = {}
+        # Phase 1: Process detailed token counts from span attributes
+        for is_prompt, prefix, calculators, results in (
+            (True, "prompt", self._prompt, prompt_details),
+            (False, "completion", self._completion, completion_details),
+        ):
+            # Extract detailed token counts from span attributes
+            details = get_attribute_value(attributes, f"llm.token_count.{prefix}_details")
+            if isinstance(details, dict) and details:
+                for token_type, token_count in details.items():
+                    # Validate token count is numeric
+                    if not isinstance(token_count, (int, float)):
+                        continue
+                    tokens = max(0, int(token_count))
+                    # Calculate cost using specific calculator or fallback to default
+                    if token_type in calculators:
+                        # Use specific calculator for this token type
+                        calculator = calculators[token_type]
+                    else:
+                        # Fallback to default calculator: "input" for prompts,
+                        # "output" for completions
+                        key = "input" if is_prompt else "output"
+                        calculator = calculators[key]
+                    cost = calculator.calculate_cost(attributes, tokens)
+                    # Calculate cost per token (avoid division by zero)
+                    cost_per_token = cost / tokens if tokens else None
+                    detail = models.SpanCostDetail(
+                        token_type=token_type,
+                        is_prompt=is_prompt,
+                        tokens=tokens,
+                        cost=cost,
+                        cost_per_token=cost_per_token,
+                    )
+                    results[token_type] = detail
+        # Get aggregated token totals for fallback calculations
+        prompt_tokens, completion_tokens, _ = get_aggregated_tokens(attributes)
+        # Phase 2: Process remaining tokens for default token types
+        for is_prompt, token_type, total, calculators, results in (
+            (True, "input", prompt_tokens, self._prompt, prompt_details),
+            (False, "output", completion_tokens, self._completion, completion_details),
+        ):
+            # Skip if this token type was already processed in detailed phase
+            if token_type in results:
+                continue
+            # Calculate remaining tokens by subtracting detailed counts from total
+            tokens = total - sum(
+                int(d.tokens or 0) for d in results.values() if d.is_prompt == is_prompt
+            )
+            # Skip if no remaining tokens or negative (shouldn't happen with valid data)
+            if tokens <= 0:
+                continue
+            # Calculate cost using guaranteed default calculator (input/output are required)
+            cost = calculators[token_type].calculate_cost(attributes, tokens)
+            # Calculate cost per token (avoid division by zero)
+            cost_per_token = cost / tokens if cost and tokens else None
+            detail = models.SpanCostDetail(
+                token_type=token_type,
+                is_prompt=is_prompt,
+                tokens=tokens,
+                cost=cost,
+                cost_per_token=cost_per_token,
+            )
+            results[token_type] = detail
+        # Return combined results from both prompt and completion processing
+        return list(chain(prompt_details.values(), completion_details.values()))

phoenix/server/cost_tracking/cost_model_lookup.py ADDED Viewed

@@ -0,0 +1,151 @@
+import re
+from datetime import datetime
+from typing import Any, Iterable, Mapping, Optional
+from openinference.semconv.trace import SpanAttributes
+from typing_extensions import TypeAlias
+from phoenix.datetime_utils import is_timezone_aware
+from phoenix.db import models
+from phoenix.server.cost_tracking import regex_specificity
+from phoenix.trace.attributes import get_attribute_value
+_RegexPatternStr: TypeAlias = str
+_RegexSpecificityScore: TypeAlias = int
+_TieBreakerId: TypeAlias = int
+class CostModelLookup:
+    def __init__(
+        self,
+        generative_models: Iterable[models.GenerativeModel] = (),
+    ) -> None:
+        self._models = tuple(generative_models)
+        self._model_priority: dict[
+            int, tuple[_RegexSpecificityScore, float, _TieBreakerId]
+        ] = {}  # higher is better
+        self._regex_specificity_score: dict[re.Pattern[str], _RegexSpecificityScore] = {}
+        for m in self._models:
+            self._regex_specificity_score[m.name_pattern] = regex_specificity.score(m.name_pattern)
+            # For built-in models, use negative ID so that earlier IDs win
+            # For user-defined models, use positive ID so later IDs win
+            tie_breaker = -m.id if m.is_built_in else m.id
+            self._model_priority[m.id] = (
+                self._regex_specificity_score[m.name_pattern],
+                m.start_time.timestamp() if m.start_time else 0.0,
+                tie_breaker,
+            )
+    def find_model(
+        self,
+        start_time: datetime,
+        attributes: Mapping[str, Any],
+    ) -> Optional[models.GenerativeModel]:
+        """
+        Find the most appropriate generative model for cost tracking based on attributes and time.
+        This method implements a sophisticated model lookup system that filters and prioritizes
+        generative models based on the provided attributes and timestamp. The lookup follows
+        a specific priority hierarchy to ensure consistent and predictable model selection.
+        Args:
+            start_time: The timestamp for which to find a model. Must be timezone-aware.
+                Models with start_time greater than this value will be excluded.
+            attributes: A mapping containing span attributes. Must include:
+                - SpanAttributes.LLM_MODEL_NAME: The name of the LLM model to match
+                - SpanAttributes.LLM_PROVIDER: (Optional) The provider of the LLM model
+        Raises:
+            TypeError: If start_time is not timezone-aware (tzinfo is None)
+        Returns:
+            The most appropriate GenerativeModel that matches the criteria, or None if no
+            suitable model is found.
+        Model Selection Logic:
+            1. **Input Validation**: Returns None if model name is empty or whitespace-only
+            2. **Time and Regex Filtering**: Only models that satisfy both conditions:
+               - start_time <= start_time or start_time=None (active models)
+               - name_pattern regex matches the model name from attributes
+            3. **Early Return Optimization**: If only one candidate remains, return it immediately
+            4. **Two-Tier Priority System**: Models are processed in tiers:
+               - User-defined models (is_built_in=False) are processed first
+               - Built-in models (is_built_in=True) are processed second
+               - If a tier has only one model, return it immediately
+            5. **Provider Filtering**: Within each tier, if provider is specified:
+               - Prefer models with matching provider
+               - Fall back to provider-agnostic models if no provider-specific matches exist
+            6. **Priority Selection**: Select the model with the highest priority tuple:
+               (regex_specificity_score, start_time.timestamp, tie_breaker)
+        Priority Tuple Components:
+            - regex_specificity_score: More specific regex patterns have higher priority
+            - start_time.timestamp: Models with later start times have higher priority
+            - tie_breaker: For built-in models, uses negative ID (lower IDs win);
+              for user-defined models, uses positive ID (higher IDs win)
+        Examples:
+            >>> lookup = CostModelLookup([model1, model2, model3])
+            >>> model = lookup.find_model(
+            ...     start_time=datetime(2024, 1, 1, tzinfo=timezone.utc),
+            ...     attributes={"llm": {"model_name": "gpt-3.5-turbo", "provider": "openai"}}
+            ... )
+        """  # noqa: E501
+        # 1. extract and validate inputs
+        if not is_timezone_aware(start_time):
+            raise TypeError("start_time must be timezone-aware")
+        model_name = str(
+            get_attribute_value(attributes, SpanAttributes.LLM_MODEL_NAME) or ""
+        ).strip()
+        if not model_name:
+            return None
+        # 2. only include models that are active and match the regex pattern
+        candidates = [
+            model
+            for model in self._models
+            if (not model.start_time or model.start_time <= start_time)
+            and model.name_pattern.match(model_name)
+        ]
+        if not candidates:
+            return None
+        # 3. early return: if only one candidate remains, return it
+        if len(candidates) == 1:
+            return candidates[0]
+        provider = str(get_attribute_value(attributes, SpanAttributes.LLM_PROVIDER) or "").strip()
+        # 4. priority-based selection: user-defined models first, then built-in models
+        for is_built_in in (False, True):  # False = user-defined, True = built-in
+            # get candidates for current tier (user-defined or built-in)
+            tier_candidates = [model for model in candidates if model.is_built_in == is_built_in]
+            if not tier_candidates:
+                continue  # try next tier
+            # early return: if only one candidate in this tier, return it
+            if len(tier_candidates) == 1:
+                return tier_candidates[0]
+            # 5. provider filtering: if provider specified, prefer provider-specific models
+            if provider:
+                provider_specific_models = [
+                    model
+                    for model in tier_candidates
+                    if model.provider and model.provider == provider
+                ]
+                # only use provider-specific models if any exist
+                # this allows fallback to provider-agnostic models when no match
+                if provider_specific_models:
+                    tier_candidates = provider_specific_models
+            # 6. select best model in this tier
+            return max(tier_candidates, key=lambda model: self._model_priority[model.id])
+        # 7. no suitable model found
+        return None

phoenix/server/cost_tracking/helpers.py ADDED Viewed

@@ -0,0 +1,68 @@
+import logging
+from typing import Any, Mapping
+from openinference.semconv.trace import SpanAttributes
+from typing_extensions import TypeAlias
+from phoenix.trace.attributes import get_attribute_value
+logger = logging.getLogger(__name__)
+_PromptTokens: TypeAlias = int
+_CompletionTokens: TypeAlias = int
+_TotalTokens: TypeAlias = int
+def get_aggregated_tokens(
+    attributes: Mapping[str, Any],
+) -> tuple[_PromptTokens, _CompletionTokens, _TotalTokens]:
+    """Return the total, prompt, and completion token counts from the span attributes."""
+    try:
+        prompt_tokens_value = get_attribute_value(
+            attributes,
+            SpanAttributes.LLM_TOKEN_COUNT_PROMPT,
+        )
+        prompt_tokens: int = (
+            0
+            if not isinstance(prompt_tokens_value, (int, float))
+            else max(0, int(prompt_tokens_value))
+        )
+        completion_tokens_value = get_attribute_value(
+            attributes,
+            SpanAttributes.LLM_TOKEN_COUNT_COMPLETION,
+        )
+        completion_tokens: int = (
+            0
+            if not isinstance(completion_tokens_value, (int, float))
+            else max(0, int(completion_tokens_value))
+        )
+        total_tokens_value = get_attribute_value(
+            attributes,
+            SpanAttributes.LLM_TOKEN_COUNT_TOTAL,
+        )
+        total_tokens: int = (
+            0
+            if not isinstance(total_tokens_value, (int, float))
+            else max(0, int(total_tokens_value))
+        )
+        assert prompt_tokens >= 0
+        assert completion_tokens >= 0
+        assert total_tokens >= 0
+        calculated_total = prompt_tokens + completion_tokens
+        if total_tokens > calculated_total:
+            if not prompt_tokens:
+                prompt_tokens = total_tokens - completion_tokens
+            else:
+                completion_tokens = total_tokens - prompt_tokens
+        else:
+            total_tokens = calculated_total
+        return prompt_tokens, completion_tokens, total_tokens
+    except Exception as e:
+        logger.error(f"Error getting aggregated tokens: {e}")
+        return 0, 0, 0

arize-phoenix 10.14.0__py3-none-any.whl → 11.0.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 10.14.0py3-none-any.whl → 11.0.0py3-none-any.whl