PyPI - arize-phoenix - Versions diffs - 10.14.0__py3-none-any.whl → 11.0.0__py3-none-any.whl - Mend

arize-phoenix 10.14.0py3-none-any.whl → 11.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (84) hide show

{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/METADATA +3 -2
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/RECORD +82 -50
phoenix/config.py +5 -2
phoenix/datetime_utils.py +8 -1
phoenix/db/bulk_inserter.py +40 -1
phoenix/db/facilitator.py +263 -4
phoenix/db/insertion/helpers.py +15 -0
phoenix/db/insertion/span.py +3 -1
phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
phoenix/db/models.py +267 -9
phoenix/db/types/model_provider.py +1 -0
phoenix/db/types/token_price_customization.py +29 -0
phoenix/server/api/context.py +38 -4
phoenix/server/api/dataloaders/__init__.py +41 -5
phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +58 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project.py +140 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
phoenix/server/api/dataloaders/span_costs.py +35 -0
phoenix/server/api/dataloaders/types.py +29 -0
phoenix/server/api/helpers/playground_clients.py +562 -12
phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
phoenix/server/api/helpers/prompts/models.py +67 -0
phoenix/server/api/input_types/GenerativeModelInput.py +2 -0
phoenix/server/api/input_types/ProjectSessionSort.py +3 -0
phoenix/server/api/input_types/SpanSort.py +17 -0
phoenix/server/api/mutations/__init__.py +2 -0
phoenix/server/api/mutations/chat_mutations.py +17 -0
phoenix/server/api/mutations/model_mutations.py +208 -0
phoenix/server/api/queries.py +82 -41
phoenix/server/api/routers/v1/traces.py +11 -4
phoenix/server/api/subscriptions.py +36 -2
phoenix/server/api/types/CostBreakdown.py +15 -0
phoenix/server/api/types/Experiment.py +59 -1
phoenix/server/api/types/ExperimentRun.py +58 -4
phoenix/server/api/types/GenerativeModel.py +143 -2
phoenix/server/api/types/GenerativeProvider.py +33 -20
phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
phoenix/server/api/types/ModelInterface.py +11 -0
phoenix/server/api/types/PlaygroundModel.py +10 -0
phoenix/server/api/types/Project.py +42 -0
phoenix/server/api/types/ProjectSession.py +44 -0
phoenix/server/api/types/Span.py +137 -0
phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
phoenix/server/api/types/SpanCostSummary.py +10 -0
phoenix/server/api/types/TokenPrice.py +16 -0
phoenix/server/api/types/TokenUsage.py +3 -3
phoenix/server/api/types/Trace.py +41 -0
phoenix/server/app.py +59 -0
phoenix/server/cost_tracking/cost_details_calculator.py +190 -0
phoenix/server/cost_tracking/cost_model_lookup.py +151 -0
phoenix/server/cost_tracking/helpers.py +68 -0
phoenix/server/cost_tracking/model_cost_manifest.json +59 -329
phoenix/server/cost_tracking/regex_specificity.py +397 -0
phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
phoenix/server/daemons/__init__.py +0 -0
phoenix/server/daemons/generative_model_store.py +51 -0
phoenix/server/daemons/span_cost_calculator.py +103 -0
phoenix/server/dml_event_handler.py +1 -0
phoenix/server/static/.vite/manifest.json +36 -36
phoenix/server/static/assets/components-BnK9kodr.js +5055 -0
phoenix/server/static/assets/{index-qiubV_74.js → index-S3YKLmbo.js} +13 -13
phoenix/server/static/assets/{pages-C4V07ozl.js → pages-BW6PBHZb.js} +809 -417
phoenix/server/static/assets/{vendor-Bfsiga8H.js → vendor-DqQvHbPa.js} +147 -147
phoenix/server/static/assets/{vendor-arizeai-CQOWsrzm.js → vendor-arizeai-CLX44PFA.js} +1 -1
phoenix/server/static/assets/{vendor-codemirror-CrcGVhB2.js → vendor-codemirror-Du3XyJnB.js} +1 -1
phoenix/server/static/assets/{vendor-recharts-Yyg3G-Rq.js → vendor-recharts-B2PJDrnX.js} +25 -25
phoenix/server/static/assets/{vendor-shiki-OPjag7Hm.js → vendor-shiki-CNbrFjf9.js} +1 -1
phoenix/version.py +1 -1
phoenix/server/cost_tracking/cost_lookup.py +0 -255
phoenix/server/static/assets/components-CUUWyAMo.js +0 -4509
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/WHEEL +0 -0
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/entry_points.txt +0 -0
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-10.14.0.dist-info → arize_phoenix-11.0.0.dist-info}/licenses/LICENSE +0 -0

phoenix/server/api/helpers/playground_clients.py CHANGED Viewed

@@ -463,6 +463,35 @@ class OpenAIBaseStreamingClient(PlaygroundStreamingClient):
         yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
         yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
+        if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details is not None:
+            prompt_details = usage.prompt_tokens_details
+            if (
+                hasattr(prompt_details, "cached_tokens")
+                and prompt_details.cached_tokens is not None
+            ):
+                yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, prompt_details.cached_tokens
+            if hasattr(prompt_details, "audio_tokens") and prompt_details.audio_tokens is not None:
+                yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, prompt_details.audio_tokens
+        if (
+            hasattr(usage, "completion_tokens_details")
+            and usage.completion_tokens_details is not None
+        ):
+            completion_details = usage.completion_tokens_details
+            if (
+                hasattr(completion_details, "reasoning_tokens")
+                and completion_details.reasoning_tokens is not None
+            ):
+                yield (
+                    LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
+                    completion_details.reasoning_tokens,
+                )
+            if (
+                hasattr(completion_details, "audio_tokens")
+                and completion_details.audio_tokens is not None
+            ):
+                yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, completion_details.audio_tokens
 def _get_credential_value(
     credentials: Optional[list[PlaygroundClientCredential]], env_var_name: str
@@ -597,6 +626,465 @@ class OllamaStreamingClient(OpenAIBaseStreamingClient):
         self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.OPENAI.value
+@register_llm_client(
+    provider_key=GenerativeProviderKey.AWS,
+    model_names=[
+        PROVIDER_DEFAULT,
+        "anthropic.claude-3-5-sonnet-20240620-v1:0",
+        "anthropic.claude-3-7-sonnet-20250219-v1:0",
+        "anthropic.claude-3-haiku-20240307-v1:0",
+        "anthropic.claude-3-5-sonnet-20241022-v2:0",
+        "anthropic.claude-3-5-haiku-20241022-v1:0",
+        "anthropic.claude-opus-4-20250514-v1:0",
+        "anthropic.claude-sonnet-4-20250514-v1:0",
+        "amazon.titan-embed-text-v2:0",
+        "amazon.nova-pro-v1:0",
+        "amazon.nova-premier-v1:0:8k",
+        "amazon.nova-premier-v1:0:20k",
+        "amazon.nova-premier-v1:0:1000k",
+        "amazon.nova-premier-v1:0:mm",
+        "amazon.nova-premier-v1:0",
+        "amazon.nova-lite-v1:0",
+        "amazon.nova-micro-v1:0",
+        "deepseek.r1-v1:0",
+        "mistral.pixtral-large-2502-v1:0",
+        "meta.llama3-1-8b-instruct-v1:0:128k",
+        "meta.llama3-1-8b-instruct-v1:0",
+        "meta.llama3-1-70b-instruct-v1:0:128k",
+        "meta.llama3-1-70b-instruct-v1:0",
+        "meta.llama3-1-405b-instruct-v1:0",
+        "meta.llama3-2-11b-instruct-v1:0",
+        "meta.llama3-2-90b-instruct-v1:0",
+        "meta.llama3-2-1b-instruct-v1:0",
+        "meta.llama3-2-3b-instruct-v1:0",
+        "meta.llama3-3-70b-instruct-v1:0",
+        "meta.llama4-scout-17b-instruct-v1:0",
+        "meta.llama4-maverick-17b-instruct-v1:0",
+    ],
+)
+class BedrockStreamingClient(PlaygroundStreamingClient):
+    def __init__(
+        self,
+        model: GenerativeModelInput,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
+    ) -> None:
+        import boto3  # type: ignore[import-untyped]
+        super().__init__(model=model, credentials=credentials)
+        self.region = model.region or "us-east-1"
+        self.api = "converse"
+        self.aws_access_key_id = _get_credential_value(credentials, "AWS_ACCESS_KEY_ID") or getenv(
+            "AWS_ACCESS_KEY_ID"
+        )
+        self.aws_secret_access_key = _get_credential_value(
+            credentials, "AWS_SECRET_ACCESS_KEY"
+        ) or getenv("AWS_SECRET_ACCESS_KEY")
+        self.aws_session_token = _get_credential_value(credentials, "AWS_SESSION_TOKEN") or getenv(
+            "AWS_SESSION_TOKEN"
+        )
+        self.model_name = model.name
+        self.client = boto3.client(
+            service_name="bedrock-runtime",
+            region_name="us-east-1",  # match the default region in the UI
+            aws_access_key_id=self.aws_access_key_id,
+            aws_secret_access_key=self.aws_secret_access_key,
+            aws_session_token=self.aws_session_token,
+        )
+        self._attributes[LLM_PROVIDER] = "aws"
+        self._attributes[LLM_SYSTEM] = "aws"
+    @classmethod
+    def dependencies(cls) -> list[Dependency]:
+        return [Dependency(name="boto3")]
+    @classmethod
+    def supported_invocation_parameters(cls) -> list[InvocationParameter]:
+        return [
+            IntInvocationParameter(
+                invocation_name="max_tokens",
+                canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
+                label="Max Tokens",
+                default_value=1024,
+            ),
+            BoundedFloatInvocationParameter(
+                invocation_name="temperature",
+                canonical_name=CanonicalParameterName.TEMPERATURE,
+                label="Temperature",
+                default_value=1.0,
+                min_value=0.0,
+                max_value=1.0,
+            ),
+            BoundedFloatInvocationParameter(
+                invocation_name="top_p",
+                canonical_name=CanonicalParameterName.TOP_P,
+                label="Top P",
+                default_value=1.0,
+                min_value=0.0,
+                max_value=1.0,
+            ),
+            JSONInvocationParameter(
+                invocation_name="tool_choice",
+                label="Tool Choice",
+                canonical_name=CanonicalParameterName.TOOL_CHOICE,
+            ),
+        ]
+    async def chat_completion_create(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
+        **invocation_parameters: Any,
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        import boto3
+        if (
+            self.client.meta.region_name != self.region
+        ):  # override the region if it's different from the default
+            self.client = boto3.client(
+                "bedrock-runtime",
+                region_name=self.region,
+                aws_access_key_id=self.aws_access_key_id,
+                aws_secret_access_key=self.aws_secret_access_key,
+                aws_session_token=self.aws_session_token,
+            )
+        if self.api == "invoke":
+            async for chunk in self._handle_invoke_api(messages, tools, invocation_parameters):
+                yield chunk
+        else:
+            async for chunk in self._handle_converse_api(messages, tools, invocation_parameters):
+                yield chunk
+    async def _handle_converse_api(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
+        invocation_parameters: dict[str, Any],
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        """
+        Handle the converse API.
+        """
+        # Build messages in Converse API format
+        converse_messages = self._build_converse_messages(messages)
+        # Build the request parameters for Converse API
+        converse_params: dict[str, Any] = {
+            "modelId": f"us.{self.model_name}",
+            "messages": converse_messages,
+            "inferenceConfig": {
+                "maxTokens": invocation_parameters["max_tokens"],
+                "temperature": invocation_parameters["temperature"],
+                "topP": invocation_parameters["top_p"],
+            },
+        }
+        # Add system prompt if available
+        system_prompt = self._extract_system_prompt(messages)
+        if system_prompt:
+            converse_params["system"] = [{"text": system_prompt}]
+        # Add tools if provided
+        if tools:
+            converse_params["toolConfig"] = {"tools": tools}
+            if (
+                "tool_choice" in invocation_parameters
+                and invocation_parameters["tool_choice"]["type"] != "none"
+            ):
+                converse_params["toolConfig"]["toolChoice"] = {}
+                if invocation_parameters["tool_choice"]["type"] == "auto":
+                    converse_params["toolConfig"]["toolChoice"]["auto"] = {}
+                elif invocation_parameters["tool_choice"]["type"] == "any":
+                    converse_params["toolConfig"]["toolChoice"]["any"] = {}
+                else:
+                    converse_params["toolConfig"]["toolChoice"]["tool"] = {
+                        "name": invocation_parameters["tool_choice"]["name"],
+                    }
+        # Make the streaming API call
+        response = self.client.converse_stream(**converse_params)
+        # Track active tool calls
+        active_tool_calls = {}  # contentBlockIndex -> {id, name, arguments_buffer}
+        # Process the event stream
+        event_stream = response.get("stream")
+        for event in event_stream:
+            # Handle content block start events
+            if "contentBlockStart" in event:
+                content_block_start = event["contentBlockStart"]
+                start_event = content_block_start.get("start", {})
+                block_index = content_block_start.get(
+                    "contentBlockIndex", 0
+                )  # Get the actual index
+                if "toolUse" in start_event:
+                    tool_use = start_event["toolUse"]
+                    active_tool_calls[block_index] = {  # Use the actual block index
+                        "id": tool_use.get("toolUseId"),
+                        "name": tool_use.get("name"),
+                        "arguments_buffer": "",
+                    }
+                    # Yield initial tool call chunk
+                    yield ToolCallChunk(
+                        id=tool_use.get("toolUseId"),
+                        function=FunctionCallChunk(
+                            name=tool_use.get("name"),
+                            arguments="",
+                        ),
+                    )
+            # Handle content block delta events
+            elif "contentBlockDelta" in event:
+                content_delta = event["contentBlockDelta"]
+                delta = content_delta.get("delta", {})
+                delta_index = content_delta.get("contentBlockIndex", 0)
+                # Handle text delta
+                if "text" in delta:
+                    yield TextChunk(content=delta["text"])
+                # Handle tool use delta
+                elif "toolUse" in delta:
+                    tool_delta = delta["toolUse"]
+                    if "input" in tool_delta and delta_index in active_tool_calls:
+                        # Accumulate tool arguments
+                        json_chunk = tool_delta["input"]
+                        active_tool_calls[delta_index]["arguments_buffer"] += json_chunk
+                        # Yield incremental argument update
+                        yield ToolCallChunk(
+                            id=active_tool_calls[delta_index]["id"],
+                            function=FunctionCallChunk(
+                                name=active_tool_calls[delta_index]["name"],
+                                arguments=json_chunk,
+                            ),
+                        )
+            # Handle content block stop events
+            elif "contentBlockStop" in event:
+                stop_index = event["contentBlockStop"].get("contentBlockIndex", 0)
+                if stop_index in active_tool_calls:
+                    del active_tool_calls[stop_index]
+            elif "metadata" in event:
+                self._attributes.update(
+                    {
+                        LLM_TOKEN_COUNT_PROMPT: event.get("metadata")
+                        .get("usage", {})
+                        .get("inputTokens", 0)
+                    }
+                )
+                self._attributes.update(
+                    {
+                        LLM_TOKEN_COUNT_COMPLETION: event.get("metadata")
+                        .get("usage", {})
+                        .get("outputTokens", 0)
+                    }
+                )
+                self._attributes.update(
+                    {
+                        LLM_TOKEN_COUNT_TOTAL: event.get("metadata")
+                        .get("usage", {})
+                        .get("totalTokens", 0)
+                    }
+                )
+    async def _handle_invoke_api(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
+        invocation_parameters: dict[str, Any],
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        if "anthropic" not in self.model_name:
+            raise ValueError("Invoke API is only supported for Anthropic models")
+        bedrock_messages, system_prompt = self._build_bedrock_messages(messages)
+        bedrock_params = {
+            "anthropic_version": "bedrock-2023-05-31",
+            "max_tokens": invocation_parameters["max_tokens"],
+            "messages": bedrock_messages,
+            "system": system_prompt,
+            "temperature": invocation_parameters["temperature"],
+            "top_p": invocation_parameters["top_p"],
+            "tools": tools,
+        }
+        response = self.client.invoke_model_with_response_stream(
+            modelId=f"us.{self.model_name}",  # or another Claude model
+            contentType="application/json",
+            accept="application/json",
+            body=json.dumps(bedrock_params),
+            trace="ENABLED_FULL",
+        )
+        # The response['body'] is an EventStream object
+        event_stream = response["body"]
+        # Track active tool calls and their accumulating arguments
+        active_tool_calls: dict[int, dict[str, Any]] = {}  # index -> {id, name, arguments_buffer}
+        for event in event_stream:
+            if "chunk" in event:
+                chunk_data = json.loads(event["chunk"]["bytes"].decode("utf-8"))
+                # Handle text content
+                if chunk_data.get("type") == "content_block_delta":
+                    delta = chunk_data.get("delta", {})
+                    index = chunk_data.get("index", 0)
+                    if delta.get("type") == "text_delta" and "text" in delta:
+                        yield TextChunk(content=delta["text"])
+                    elif delta.get("type") == "input_json_delta":
+                        # Accumulate tool arguments
+                        if index in active_tool_calls:
+                            active_tool_calls[index]["arguments_buffer"] += delta.get(
+                                "partial_json", ""
+                            )
+                            # Yield incremental argument update
+                            yield ToolCallChunk(
+                                id=active_tool_calls[index]["id"],
+                                function=FunctionCallChunk(
+                                    name=active_tool_calls[index]["name"],
+                                    arguments=delta.get("partial_json", ""),
+                                ),
+                            )
+                # Handle tool call start
+                elif chunk_data.get("type") == "content_block_start":
+                    content_block = chunk_data.get("content_block", {})
+                    index = chunk_data.get("index", 0)
+                    if content_block.get("type") == "tool_use":
+                        # Initialize tool call tracking
+                        active_tool_calls[index] = {
+                            "id": content_block.get("id"),
+                            "name": content_block.get("name"),
+                            "arguments_buffer": "",
+                        }
+                        # Yield initial tool call chunk
+                        yield ToolCallChunk(
+                            id=content_block.get("id"),
+                            function=FunctionCallChunk(
+                                name=content_block.get("name"),
+                                arguments="",  # Start with empty, will be filled by deltas
+                            ),
+                        )
+                # Handle content block stop (tool call complete)
+                elif chunk_data.get("type") == "content_block_stop":
+                    index = chunk_data.get("index", 0)
+                    if index in active_tool_calls:
+                        # Tool call is complete, clean up
+                        del active_tool_calls[index]
+                elif chunk_data.get("type") == "message_stop":
+                    self._attributes.update(
+                        {
+                            LLM_TOKEN_COUNT_COMPLETION: chunk_data.get(
+                                "amazon-bedrock-invocationMetrics", {}
+                            ).get("outputTokenCount", 0)
+                        }
+                    )
+                    self._attributes.update(
+                        {
+                            LLM_TOKEN_COUNT_PROMPT: chunk_data.get(
+                                "amazon-bedrock-invocationMetrics", {}
+                            ).get("inputTokenCount", 0)
+                        }
+                    )
+    def _build_bedrock_messages(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+    ) -> tuple[list[dict[str, Any]], str]:
+        bedrock_messages = []
+        system_prompt = ""
+        for role, content, _, _ in messages:
+            if role == ChatCompletionMessageRole.USER:
+                bedrock_messages.append(
+                    {
+                        "role": "user",
+                        "content": content,
+                    }
+                )
+            elif role == ChatCompletionMessageRole.AI:
+                bedrock_messages.append(
+                    {
+                        "role": "assistant",
+                        "content": content,
+                    }
+                )
+            elif role == ChatCompletionMessageRole.SYSTEM:
+                system_prompt += content + "\n"
+        return bedrock_messages, system_prompt
+    def _extract_system_prompt(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+    ) -> str:
+        """Extract system prompt from messages."""
+        system_prompts = []
+        for role, content, _, _ in messages:
+            if role == ChatCompletionMessageRole.SYSTEM:
+                system_prompts.append(content)
+        return "\n".join(system_prompts)
+    def _build_converse_messages(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+    ) -> list[dict[str, Any]]:
+        """Convert messages to Converse API format."""
+        converse_messages: list[dict[str, Any]] = []
+        for role, content, _id, tool_calls in messages:
+            if role == ChatCompletionMessageRole.USER:
+                converse_messages.append({"role": "user", "content": [{"text": content}]})
+            elif role == ChatCompletionMessageRole.TOOL:
+                converse_messages.append(
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "toolResult": {
+                                    "toolUseId": _id,
+                                    "content": [{"json": json.loads(content)}],
+                                }
+                            }
+                        ],
+                    }
+                )
+            elif role == ChatCompletionMessageRole.AI:
+                # Handle assistant messages with potential tool calls
+                message: dict[str, Any] = {"role": "assistant", "content": []}
+                if content:
+                    message["content"].append({"text": content})
+                if tool_calls:
+                    for tool_call in tool_calls:
+                        message["content"].append(tool_call)
+                converse_messages.append(message)
+        return converse_messages
 @register_llm_client(
     provider_key=GenerativeProviderKey.OPENAI,
     model_names=[
@@ -656,13 +1144,20 @@ class OpenAIStreamingClient(OpenAIBaseStreamingClient):
     provider_key=GenerativeProviderKey.OPENAI,
     model_names=[
         "o1",
+        "o1-pro",
         "o1-2024-12-17",
+        "o1-pro-2025-03-19",
         "o1-mini",
         "o1-mini-2024-09-12",
         "o1-preview",
         "o1-preview-2024-09-12",
+        "o3",
+        "o3-pro",
+        "o3-2025-04-16",
         "o3-mini",
         "o3-mini-2025-01-31",
+        "o4-mini",
+        "o4-mini-2025-04-16",
     ],
 )
 class OpenAIReasoningStreamingClient(OpenAIStreamingClient):
@@ -799,6 +1294,35 @@ class OpenAIReasoningStreamingClient(OpenAIStreamingClient):
         yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
         yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
+        if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details is not None:
+            prompt_details = usage.prompt_tokens_details
+            if (
+                hasattr(prompt_details, "cached_tokens")
+                and prompt_details.cached_tokens is not None
+            ):
+                yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, prompt_details.cached_tokens
+            if hasattr(prompt_details, "audio_tokens") and prompt_details.audio_tokens is not None:
+                yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, prompt_details.audio_tokens
+        if (
+            hasattr(usage, "completion_tokens_details")
+            and usage.completion_tokens_details is not None
+        ):
+            completion_details = usage.completion_tokens_details
+            if (
+                hasattr(completion_details, "reasoning_tokens")
+                and completion_details.reasoning_tokens is not None
+            ):
+                yield (
+                    LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
+                    completion_details.reasoning_tokens,
+                )
+            if (
+                hasattr(completion_details, "audio_tokens")
+                and completion_details.audio_tokens is not None
+            ):
+                yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, completion_details.audio_tokens
 @register_llm_client(
     provider_key=GenerativeProviderKey.AZURE_OPENAI,
@@ -856,12 +1380,6 @@ class AzureOpenAIStreamingClient(OpenAIBaseStreamingClient):
     provider_key=GenerativeProviderKey.ANTHROPIC,
     model_names=[
         PROVIDER_DEFAULT,
-        "claude-sonnet-4-0",
-        "claude-sonnet-4-20250514",
-        "claude-opus-4-0",
-        "claude-opus-4-20250514",
-        "claude-3-7-sonnet-latest",
-        "claude-3-7-sonnet-20250219",
         "claude-3-5-sonnet-latest",
         "claude-3-5-haiku-latest",
         "claude-3-5-sonnet-20241022",
@@ -962,15 +1480,34 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
         async with await throttled_stream(**anthropic_params) as stream:
             async for event in stream:
                 if isinstance(event, anthropic_types.RawMessageStartEvent):
-                    self._attributes.update(
-                        {LLM_TOKEN_COUNT_PROMPT: event.message.usage.input_tokens}
-                    )
+                    usage = event.message.usage
+                    token_counts: dict[str, Any] = {}
+                    if prompt_tokens := (
+                        (usage.input_tokens or 0)
+                        + (getattr(usage, "cache_creation_input_tokens", 0) or 0)
+                        + (getattr(usage, "cache_read_input_tokens", 0) or 0)
+                    ):
+                        token_counts[LLM_TOKEN_COUNT_PROMPT] = prompt_tokens
+                    if cache_creation_tokens := getattr(usage, "cache_creation_input_tokens", None):
+                        if cache_creation_tokens is not None:
+                            token_counts[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE] = (
+                                cache_creation_tokens
+                            )
+                    self._attributes.update(token_counts)
                 elif isinstance(event, anthropic_streaming.TextEvent):
                     yield TextChunk(content=event.text)
                 elif isinstance(event, anthropic_streaming.MessageStopEvent):
-                    self._attributes.update(
-                        {LLM_TOKEN_COUNT_COMPLETION: event.message.usage.output_tokens}
-                    )
+                    usage = event.message.usage
+                    output_token_counts: dict[str, Any] = {}
+                    if usage.output_tokens:
+                        output_token_counts[LLM_TOKEN_COUNT_COMPLETION] = usage.output_tokens
+                    if cache_read_tokens := getattr(usage, "cache_read_input_tokens", None):
+                        if cache_read_tokens is not None:
+                            output_token_counts[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ] = (
+                                cache_read_tokens
+                            )
+                    self._attributes.update(output_token_counts)
                 elif (
                     isinstance(event, anthropic_streaming.ContentBlockStopEvent)
                     and event.content_block.type == "tool_use"
@@ -1055,6 +1592,10 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
 @register_llm_client(
     provider_key=GenerativeProviderKey.ANTHROPIC,
     model_names=[
+        "claude-sonnet-4-0",
+        "claude-sonnet-4-20250514",
+        "claude-opus-4-0",
+        "claude-opus-4-20250514",
         "claude-3-7-sonnet-latest",
         "claude-3-7-sonnet-20250219",
     ],
@@ -1239,6 +1780,15 @@ LLM_SYSTEM = SpanAttributes.LLM_SYSTEM
 LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
 LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
 LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
+LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ
+LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE = (
+    SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE
+)
+LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO
+LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING = (
+    SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING
+)
+LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO
 class _HttpxClient(wrapt.ObjectProxy):  # type: ignore

arize-phoenix 10.14.0__py3-none-any.whl → 11.0.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 10.14.0py3-none-any.whl → 11.0.0py3-none-any.whl