PyPI - arize-phoenix - Versions diffs - 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl - Mend

arize-phoenix 10.0.4py3-none-any.whl → 12.28.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/METADATA +124 -72
arize_phoenix-12.28.1.dist-info/RECORD +499 -0
{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/WHEEL +1 -1
{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/IP_NOTICE +1 -1
phoenix/__generated__/__init__.py +0 -0
phoenix/__generated__/classification_evaluator_configs/__init__.py +20 -0
phoenix/__generated__/classification_evaluator_configs/_document_relevance_classification_evaluator_config.py +17 -0
phoenix/__generated__/classification_evaluator_configs/_hallucination_classification_evaluator_config.py +17 -0
phoenix/__generated__/classification_evaluator_configs/_models.py +18 -0
phoenix/__generated__/classification_evaluator_configs/_tool_selection_classification_evaluator_config.py +17 -0
phoenix/__init__.py +5 -4
phoenix/auth.py +39 -2
phoenix/config.py +1763 -91
phoenix/datetime_utils.py +120 -2
phoenix/db/README.md +595 -25
phoenix/db/bulk_inserter.py +145 -103
phoenix/db/engines.py +140 -33
phoenix/db/enums.py +3 -12
phoenix/db/facilitator.py +302 -35
phoenix/db/helpers.py +1000 -65
phoenix/db/iam_auth.py +64 -0
phoenix/db/insertion/dataset.py +135 -2
phoenix/db/insertion/document_annotation.py +9 -6
phoenix/db/insertion/evaluation.py +2 -3
phoenix/db/insertion/helpers.py +17 -2
phoenix/db/insertion/session_annotation.py +176 -0
phoenix/db/insertion/span.py +15 -11
phoenix/db/insertion/span_annotation.py +3 -4
phoenix/db/insertion/trace_annotation.py +3 -4
phoenix/db/insertion/types.py +50 -20
phoenix/db/migrations/versions/01a8342c9cdf_add_user_id_on_datasets.py +40 -0
phoenix/db/migrations/versions/0df286449799_add_session_annotations_table.py +105 -0
phoenix/db/migrations/versions/272b66ff50f8_drop_single_indices.py +119 -0
phoenix/db/migrations/versions/58228d933c91_dataset_labels.py +67 -0
phoenix/db/migrations/versions/699f655af132_experiment_tags.py +57 -0
phoenix/db/migrations/versions/735d3d93c33e_add_composite_indices.py +41 -0
phoenix/db/migrations/versions/a20694b15f82_cost.py +196 -0
phoenix/db/migrations/versions/ab513d89518b_add_user_id_on_dataset_versions.py +40 -0
phoenix/db/migrations/versions/d0690a79ea51_users_on_experiments.py +40 -0
phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py +139 -0
phoenix/db/migrations/versions/e76cbd66ffc3_add_experiments_dataset_examples.py +87 -0
phoenix/db/models.py +669 -56
phoenix/db/pg_config.py +10 -0
phoenix/db/types/model_provider.py +4 -0
phoenix/db/types/token_price_customization.py +29 -0
phoenix/db/types/trace_retention.py +23 -15
phoenix/experiments/evaluators/utils.py +3 -3
phoenix/experiments/functions.py +160 -52
phoenix/experiments/tracing.py +2 -2
phoenix/experiments/types.py +1 -1
phoenix/inferences/inferences.py +1 -2
phoenix/server/api/auth.py +38 -7
phoenix/server/api/auth_messages.py +46 -0
phoenix/server/api/context.py +100 -4
phoenix/server/api/dataloaders/__init__.py +79 -5
phoenix/server/api/dataloaders/annotation_configs_by_project.py +31 -0
phoenix/server/api/dataloaders/annotation_summaries.py +60 -8
phoenix/server/api/dataloaders/average_experiment_repeated_run_group_latency.py +50 -0
phoenix/server/api/dataloaders/average_experiment_run_latency.py +17 -24
phoenix/server/api/dataloaders/cache/two_tier_cache.py +1 -2
phoenix/server/api/dataloaders/dataset_dataset_splits.py +52 -0
phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -1
phoenix/server/api/dataloaders/dataset_example_splits.py +40 -0
phoenix/server/api/dataloaders/dataset_examples_and_versions_by_experiment_run.py +47 -0
phoenix/server/api/dataloaders/dataset_labels.py +36 -0
phoenix/server/api/dataloaders/document_evaluation_summaries.py +2 -2
phoenix/server/api/dataloaders/document_evaluations.py +6 -9
phoenix/server/api/dataloaders/experiment_annotation_summaries.py +88 -34
phoenix/server/api/dataloaders/experiment_dataset_splits.py +43 -0
phoenix/server/api/dataloaders/experiment_error_rates.py +21 -28
phoenix/server/api/dataloaders/experiment_repeated_run_group_annotation_summaries.py +77 -0
phoenix/server/api/dataloaders/experiment_repeated_run_groups.py +57 -0
phoenix/server/api/dataloaders/experiment_runs_by_experiment_and_example.py +44 -0
phoenix/server/api/dataloaders/last_used_times_by_generative_model_id.py +35 -0
phoenix/server/api/dataloaders/latency_ms_quantile.py +40 -8
phoenix/server/api/dataloaders/record_counts.py +37 -10
phoenix/server/api/dataloaders/session_annotations_by_session.py +29 -0
phoenix/server/api/dataloaders/span_cost_by_span.py +24 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_generative_model.py +56 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_project_session.py +57 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_span.py +43 -0
phoenix/server/api/dataloaders/span_cost_detail_summary_entries_by_trace.py +56 -0
phoenix/server/api/dataloaders/span_cost_details_by_span_cost.py +27 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment.py +57 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment_repeated_run_group.py +64 -0
phoenix/server/api/dataloaders/span_cost_summary_by_experiment_run.py +58 -0
phoenix/server/api/dataloaders/span_cost_summary_by_generative_model.py +55 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project.py +152 -0
phoenix/server/api/dataloaders/span_cost_summary_by_project_session.py +56 -0
phoenix/server/api/dataloaders/span_cost_summary_by_trace.py +55 -0
phoenix/server/api/dataloaders/span_costs.py +29 -0
phoenix/server/api/dataloaders/table_fields.py +2 -2
phoenix/server/api/dataloaders/token_prices_by_model.py +30 -0
phoenix/server/api/dataloaders/trace_annotations_by_trace.py +27 -0
phoenix/server/api/dataloaders/types.py +29 -0
phoenix/server/api/exceptions.py +11 -1
phoenix/server/api/helpers/dataset_helpers.py +5 -1
phoenix/server/api/helpers/playground_clients.py +1243 -292
phoenix/server/api/helpers/playground_registry.py +2 -2
phoenix/server/api/helpers/playground_spans.py +8 -4
phoenix/server/api/helpers/playground_users.py +26 -0
phoenix/server/api/helpers/prompts/conversions/aws.py +83 -0
phoenix/server/api/helpers/prompts/conversions/google.py +103 -0
phoenix/server/api/helpers/prompts/models.py +205 -22
phoenix/server/api/input_types/{SpanAnnotationFilter.py → AnnotationFilter.py} +22 -14
phoenix/server/api/input_types/ChatCompletionInput.py +6 -2
phoenix/server/api/input_types/CreateProjectInput.py +27 -0
phoenix/server/api/input_types/CreateProjectSessionAnnotationInput.py +37 -0
phoenix/server/api/input_types/DatasetFilter.py +17 -0
phoenix/server/api/input_types/ExperimentRunSort.py +237 -0
phoenix/server/api/input_types/GenerativeCredentialInput.py +9 -0
phoenix/server/api/input_types/GenerativeModelInput.py +5 -0
phoenix/server/api/input_types/ProjectSessionSort.py +161 -1
phoenix/server/api/input_types/PromptFilter.py +14 -0
phoenix/server/api/input_types/PromptVersionInput.py +52 -1
phoenix/server/api/input_types/SpanSort.py +44 -7
phoenix/server/api/input_types/TimeBinConfig.py +23 -0
phoenix/server/api/input_types/UpdateAnnotationInput.py +34 -0
phoenix/server/api/input_types/UserRoleInput.py +1 -0
phoenix/server/api/mutations/__init__.py +10 -0
phoenix/server/api/mutations/annotation_config_mutations.py +8 -8
phoenix/server/api/mutations/api_key_mutations.py +19 -23
phoenix/server/api/mutations/chat_mutations.py +154 -47
phoenix/server/api/mutations/dataset_label_mutations.py +243 -0
phoenix/server/api/mutations/dataset_mutations.py +21 -16
phoenix/server/api/mutations/dataset_split_mutations.py +351 -0
phoenix/server/api/mutations/experiment_mutations.py +2 -2
phoenix/server/api/mutations/export_events_mutations.py +3 -3
phoenix/server/api/mutations/model_mutations.py +210 -0
phoenix/server/api/mutations/project_mutations.py +49 -10
phoenix/server/api/mutations/project_session_annotations_mutations.py +158 -0
phoenix/server/api/mutations/project_trace_retention_policy_mutations.py +8 -4
phoenix/server/api/mutations/prompt_label_mutations.py +74 -65
phoenix/server/api/mutations/prompt_mutations.py +65 -129
phoenix/server/api/mutations/prompt_version_tag_mutations.py +11 -8
phoenix/server/api/mutations/span_annotations_mutations.py +15 -10
phoenix/server/api/mutations/trace_annotations_mutations.py +14 -10
phoenix/server/api/mutations/trace_mutations.py +47 -3
phoenix/server/api/mutations/user_mutations.py +66 -41
phoenix/server/api/queries.py +768 -293
phoenix/server/api/routers/__init__.py +2 -2
phoenix/server/api/routers/auth.py +154 -88
phoenix/server/api/routers/ldap.py +229 -0
phoenix/server/api/routers/oauth2.py +369 -106
phoenix/server/api/routers/v1/__init__.py +24 -4
phoenix/server/api/routers/v1/annotation_configs.py +23 -31
phoenix/server/api/routers/v1/annotations.py +481 -17
phoenix/server/api/routers/v1/datasets.py +395 -81
phoenix/server/api/routers/v1/documents.py +142 -0
phoenix/server/api/routers/v1/evaluations.py +24 -31
phoenix/server/api/routers/v1/experiment_evaluations.py +19 -8
phoenix/server/api/routers/v1/experiment_runs.py +337 -59
phoenix/server/api/routers/v1/experiments.py +479 -48
phoenix/server/api/routers/v1/models.py +7 -0
phoenix/server/api/routers/v1/projects.py +18 -49
phoenix/server/api/routers/v1/prompts.py +54 -40
phoenix/server/api/routers/v1/sessions.py +108 -0
phoenix/server/api/routers/v1/spans.py +1091 -81
phoenix/server/api/routers/v1/traces.py +132 -78
phoenix/server/api/routers/v1/users.py +389 -0
phoenix/server/api/routers/v1/utils.py +3 -7
phoenix/server/api/subscriptions.py +305 -88
phoenix/server/api/types/Annotation.py +90 -23
phoenix/server/api/types/ApiKey.py +13 -17
phoenix/server/api/types/AuthMethod.py +1 -0
phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +1 -0
phoenix/server/api/types/CostBreakdown.py +12 -0
phoenix/server/api/types/Dataset.py +226 -72
phoenix/server/api/types/DatasetExample.py +88 -18
phoenix/server/api/types/DatasetExperimentAnnotationSummary.py +10 -0
phoenix/server/api/types/DatasetLabel.py +57 -0
phoenix/server/api/types/DatasetSplit.py +98 -0
phoenix/server/api/types/DatasetVersion.py +49 -4
phoenix/server/api/types/DocumentAnnotation.py +212 -0
phoenix/server/api/types/Experiment.py +264 -59
phoenix/server/api/types/ExperimentComparison.py +5 -10
phoenix/server/api/types/ExperimentRepeatedRunGroup.py +155 -0
phoenix/server/api/types/ExperimentRepeatedRunGroupAnnotationSummary.py +9 -0
phoenix/server/api/types/ExperimentRun.py +169 -65
phoenix/server/api/types/ExperimentRunAnnotation.py +158 -39
phoenix/server/api/types/GenerativeModel.py +245 -3
phoenix/server/api/types/GenerativeProvider.py +70 -11
phoenix/server/api/types/{Model.py → InferenceModel.py} +1 -1
phoenix/server/api/types/ModelInterface.py +16 -0
phoenix/server/api/types/PlaygroundModel.py +20 -0
phoenix/server/api/types/Project.py +1278 -216
phoenix/server/api/types/ProjectSession.py +188 -28
phoenix/server/api/types/ProjectSessionAnnotation.py +187 -0
phoenix/server/api/types/ProjectTraceRetentionPolicy.py +1 -1
phoenix/server/api/types/Prompt.py +119 -39
phoenix/server/api/types/PromptLabel.py +42 -25
phoenix/server/api/types/PromptVersion.py +11 -8
phoenix/server/api/types/PromptVersionTag.py +65 -25
phoenix/server/api/types/ServerStatus.py +6 -0
phoenix/server/api/types/Span.py +167 -123
phoenix/server/api/types/SpanAnnotation.py +189 -42
phoenix/server/api/types/SpanCostDetailSummaryEntry.py +10 -0
phoenix/server/api/types/SpanCostSummary.py +10 -0
phoenix/server/api/types/SystemApiKey.py +65 -1
phoenix/server/api/types/TokenPrice.py +16 -0
phoenix/server/api/types/TokenUsage.py +3 -3
phoenix/server/api/types/Trace.py +223 -51
phoenix/server/api/types/TraceAnnotation.py +149 -50
phoenix/server/api/types/User.py +137 -32
phoenix/server/api/types/UserApiKey.py +73 -26
phoenix/server/api/types/node.py +10 -0
phoenix/server/api/types/pagination.py +11 -2
phoenix/server/app.py +290 -45
phoenix/server/authorization.py +38 -3
phoenix/server/bearer_auth.py +34 -24
phoenix/server/cost_tracking/cost_details_calculator.py +196 -0
phoenix/server/cost_tracking/cost_model_lookup.py +179 -0
phoenix/server/cost_tracking/helpers.py +68 -0
phoenix/server/cost_tracking/model_cost_manifest.json +3657 -830
phoenix/server/cost_tracking/regex_specificity.py +397 -0
phoenix/server/cost_tracking/token_cost_calculator.py +57 -0
phoenix/server/daemons/__init__.py +0 -0
phoenix/server/daemons/db_disk_usage_monitor.py +214 -0
phoenix/server/daemons/generative_model_store.py +103 -0
phoenix/server/daemons/span_cost_calculator.py +99 -0
phoenix/server/dml_event.py +17 -0
phoenix/server/dml_event_handler.py +5 -0
phoenix/server/email/sender.py +56 -3
phoenix/server/email/templates/db_disk_usage_notification.html +19 -0
phoenix/server/email/types.py +11 -0
phoenix/server/experiments/__init__.py +0 -0
phoenix/server/experiments/utils.py +14 -0
phoenix/server/grpc_server.py +11 -11
phoenix/server/jwt_store.py +17 -15
phoenix/server/ldap.py +1449 -0
phoenix/server/main.py +26 -10
phoenix/server/oauth2.py +330 -12
phoenix/server/prometheus.py +66 -6
phoenix/server/rate_limiters.py +4 -9
phoenix/server/retention.py +33 -20
phoenix/server/session_filters.py +49 -0
phoenix/server/static/.vite/manifest.json +55 -51
phoenix/server/static/assets/components-BreFUQQa.js +6702 -0
phoenix/server/static/assets/{index-E0M82BdE.js → index-CTQoemZv.js} +140 -56
phoenix/server/static/assets/pages-DBE5iYM3.js +9524 -0
phoenix/server/static/assets/vendor-BGzfc4EU.css +1 -0
phoenix/server/static/assets/vendor-DCE4v-Ot.js +920 -0
phoenix/server/static/assets/vendor-codemirror-D5f205eT.js +25 -0
phoenix/server/static/assets/vendor-recharts-V9cwpXsm.js +37 -0
phoenix/server/static/assets/vendor-shiki-Do--csgv.js +5 -0
phoenix/server/static/assets/vendor-three-CmB8bl_y.js +3840 -0
phoenix/server/templates/index.html +40 -6
phoenix/server/thread_server.py +1 -2
phoenix/server/types.py +14 -4
phoenix/server/utils.py +74 -0
phoenix/session/client.py +56 -3
phoenix/session/data_extractor.py +5 -0
phoenix/session/evaluation.py +14 -5
phoenix/session/session.py +45 -9
phoenix/settings.py +5 -0
phoenix/trace/attributes.py +80 -13
phoenix/trace/dsl/helpers.py +90 -1
phoenix/trace/dsl/query.py +8 -6
phoenix/trace/projects.py +5 -0
phoenix/utilities/template_formatters.py +1 -1
phoenix/version.py +1 -1
arize_phoenix-10.0.4.dist-info/RECORD +0 -405
phoenix/server/api/types/Evaluation.py +0 -39
phoenix/server/cost_tracking/cost_lookup.py +0 -255
phoenix/server/static/assets/components-DULKeDfL.js +0 -4365
phoenix/server/static/assets/pages-Cl0A-0U2.js +0 -7430
phoenix/server/static/assets/vendor-WIZid84E.css +0 -1
phoenix/server/static/assets/vendor-arizeai-Dy-0mSNw.js +0 -649
phoenix/server/static/assets/vendor-codemirror-DBtifKNr.js +0 -33
phoenix/server/static/assets/vendor-oB4u9zuV.js +0 -905
phoenix/server/static/assets/vendor-recharts-D-T4KPz2.js +0 -59
phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js +0 -5
phoenix/server/static/assets/vendor-three-C5WAXd5r.js +0 -2998
phoenix/utilities/deprecation.py +0 -31
{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/entry_points.txt +0 -0
{arize_phoenix-10.0.4.dist-info → arize_phoenix-12.28.1.dist-info}/licenses/LICENSE +0 -0

phoenix/server/api/helpers/playground_clients.py CHANGED Viewed

@@ -7,6 +7,7 @@ import json
 import time
 from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Callable, Iterator
+from dataclasses import dataclass
 from functools import wraps
 from typing import TYPE_CHECKING, Any, Hashable, Mapping, MutableMapping, Optional, Union
@@ -19,7 +20,7 @@ from openinference.semconv.trace import (
 )
 from strawberry import UNSET
 from strawberry.scalars import JSON as JSONScalarType
-from typing_extensions import TypeAlias, assert_never
+from typing_extensions import TypeAlias, assert_never, override
 from phoenix.config import getenv
 from phoenix.evals.models.rate_limiters import (
@@ -56,6 +57,7 @@ from phoenix.server.api.types.GenerativeProvider import GenerativeProviderKey
 if TYPE_CHECKING:
     import httpx
     from anthropic.types import MessageParam, TextBlockParam, ToolResultBlockParam
+    from botocore.awsrequest import AWSPreparedRequest  # type: ignore[import-untyped]
     from google.generativeai.types import ContentType
     from openai import AsyncAzureOpenAI, AsyncOpenAI
     from openai.types import CompletionUsage
@@ -66,6 +68,16 @@ SetSpanAttributesFn: TypeAlias = Callable[[Mapping[str, Any]], None]
 ChatCompletionChunk: TypeAlias = Union[TextChunk, ToolCallChunk]
+@dataclass
+class PlaygroundClientCredential:
+    """
+    Represents a credential for LLM providers.
+    """
+    env_var_name: str
+    value: str
 class Dependency:
     """
     Set the module_name to the import name if it is different from the install name
@@ -172,9 +184,10 @@ class PlaygroundStreamingClient(ABC):
     def __init__(
         self,
         model: GenerativeModelInput,
-        api_key: Optional[str] = None,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
     ) -> None:
         self._attributes: dict[str, AttributeValue] = dict()
+        self._credentials = credentials or []
     @classmethod
     @abstractmethod
@@ -243,11 +256,11 @@ class OpenAIBaseStreamingClient(PlaygroundStreamingClient):
         *,
         client: Union["AsyncOpenAI", "AsyncAzureOpenAI"],
         model: GenerativeModelInput,
-        api_key: Optional[str] = None,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
     ) -> None:
         from openai import RateLimitError as OpenAIRateLimitError
-        super().__init__(model=model, api_key=api_key)
+        super().__init__(model=model, credentials=credentials)
         self.client = client
         self.model_name = model.name
         self.rate_limiter = PlaygroundRateLimiter(model.provider_key, OpenAIRateLimitError)
@@ -296,7 +309,6 @@ class OpenAIBaseStreamingClient(PlaygroundStreamingClient):
                 invocation_name="top_p",
                 canonical_name=CanonicalParameterName.TOP_P,
                 label="Top P",
-                default_value=1.0,
                 min_value=0.0,
                 max_value=1.0,
             ),
@@ -315,6 +327,10 @@ class OpenAIBaseStreamingClient(PlaygroundStreamingClient):
                 label="Response Format",
                 canonical_name=CanonicalParameterName.RESPONSE_FORMAT,
             ),
+            JSONInvocationParameter(
+                invocation_name="extra_body",
+                label="Extra Body",
+            ),
         ]
     async def chat_completion_create(
@@ -347,7 +363,6 @@ class OpenAIBaseStreamingClient(PlaygroundStreamingClient):
         ):
             if (usage := chunk.usage) is not None:
                 token_usage = usage
-                continue
             if not chunk.choices:
                 # for Azure, initial chunk contains the content filter
                 continue
@@ -426,9 +441,9 @@ class OpenAIBaseStreamingClient(PlaygroundStreamingClient):
         if role is ChatCompletionMessageRole.TOOL:
             if tool_call_id is None:
                 raise ValueError("tool_call_id is required for tool messages")
-        return ChatCompletionToolMessageParam(
-            {"content": content, "role": "tool", "tool_call_id": tool_call_id}
-        )
+            return ChatCompletionToolMessageParam(
+                {"content": content, "role": "tool", "tool_call_id": tool_call_id}
+            )
         assert_never(role)
     def to_openai_tool_call_param(
@@ -452,288 +467,272 @@ class OpenAIBaseStreamingClient(PlaygroundStreamingClient):
         yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
         yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
+        if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details is not None:
+            prompt_details = usage.prompt_tokens_details
+            if (
+                hasattr(prompt_details, "cached_tokens")
+                and prompt_details.cached_tokens is not None
+            ):
+                yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, prompt_details.cached_tokens
+            if hasattr(prompt_details, "audio_tokens") and prompt_details.audio_tokens is not None:
+                yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, prompt_details.audio_tokens
+        if (
+            hasattr(usage, "completion_tokens_details")
+            and usage.completion_tokens_details is not None
+        ):
+            completion_details = usage.completion_tokens_details
+            if (
+                hasattr(completion_details, "reasoning_tokens")
+                and completion_details.reasoning_tokens is not None
+            ):
+                yield (
+                    LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
+                    completion_details.reasoning_tokens,
+                )
+            if (
+                hasattr(completion_details, "audio_tokens")
+                and completion_details.audio_tokens is not None
+            ):
+                yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, completion_details.audio_tokens
+def _get_credential_value(
+    credentials: Optional[list[PlaygroundClientCredential]], env_var_name: str
+) -> Optional[str]:
+    """Helper function to extract credential value from credentials list."""
+    if not credentials:
+        return None
+    return next(
+        (credential.value for credential in credentials if credential.env_var_name == env_var_name),
+        None,
+    )
+def _require_credential(
+    credentials: Optional[list[PlaygroundClientCredential]], env_var_name: str, provider_name: str
+) -> str:
+    """Helper function to require a credential value, raising an exception if not found."""
+    value = _get_credential_value(credentials, env_var_name)
+    if value is None:
+        raise BadRequest(f"Missing required credential '{env_var_name}' for {provider_name}")
+    return value
 @register_llm_client(
-    provider_key=GenerativeProviderKey.OPENAI,
+    provider_key=GenerativeProviderKey.DEEPSEEK,
     model_names=[
         PROVIDER_DEFAULT,
-        "gpt-4.1",
-        "gpt-4.1-mini",
-        "gpt-4.1-nano",
-        "gpt-4.1-2025-04-14",
-        "gpt-4.1-mini-2025-04-14",
-        "gpt-4.1-nano-2025-04-14",
-        "gpt-4o",
-        "gpt-4o-2024-11-20",
-        "gpt-4o-2024-08-06",
-        "gpt-4o-2024-05-13",
-        "chatgpt-4o-latest",
-        "gpt-4o-mini",
-        "gpt-4o-mini-2024-07-18",
-        "gpt-4-turbo",
-        "gpt-4-turbo-2024-04-09",
-        "gpt-4-turbo-preview",
-        "gpt-4-0125-preview",
-        "gpt-4-1106-preview",
-        "gpt-4",
-        "gpt-4-0613",
-        "gpt-3.5-turbo-0125",
-        "gpt-3.5-turbo",
-        "gpt-3.5-turbo-1106",
-        # preview models
-        "gpt-4.5-preview",
+        "deepseek-chat",
+        "deepseek-reasoner",
     ],
 )
-class OpenAIStreamingClient(OpenAIBaseStreamingClient):
+class DeepSeekStreamingClient(OpenAIBaseStreamingClient):
     def __init__(
         self,
         model: GenerativeModelInput,
-        api_key: Optional[str] = None,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
     ) -> None:
         from openai import AsyncOpenAI
-        base_url = model.base_url or getenv("OPENAI_BASE_URL")
-        if not (api_key := api_key or getenv("OPENAI_API_KEY")):
+        base_url = model.base_url or getenv("DEEPSEEK_BASE_URL")
+        # Try to get API key from credentials first, then fallback to env
+        api_key = _get_credential_value(credentials, "DEEPSEEK_API_KEY") or getenv(
+            "DEEPSEEK_API_KEY"
+        )
+        if not api_key:
             if not base_url:
-                raise BadRequest("An API key is required for OpenAI models")
+                raise BadRequest("An API key is required for DeepSeek models")
             api_key = "sk-fake-api-key"
-        client = AsyncOpenAI(api_key=api_key, base_url=base_url)
-        super().__init__(client=client, model=model, api_key=api_key)
-        self._attributes[LLM_PROVIDER] = OpenInferenceLLMProviderValues.OPENAI.value
+        client = AsyncOpenAI(
+            api_key=api_key,
+            base_url=base_url or "https://api.deepseek.com",
+            default_headers=model.custom_headers or None,
+        )
+        super().__init__(client=client, model=model, credentials=credentials)
+        # DeepSeek uses OpenAI-compatible API but we'll track it as a separate provider
+        # Adding a custom "deepseek" provider value to make it distinguishable in traces
+        self._attributes[LLM_PROVIDER] = "deepseek"
         self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.OPENAI.value
 @register_llm_client(
-    provider_key=GenerativeProviderKey.OPENAI,
+    provider_key=GenerativeProviderKey.XAI,
     model_names=[
-        "o1",
-        "o1-2024-12-17",
-        "o1-mini",
-        "o1-mini-2024-09-12",
-        "o1-preview",
-        "o1-preview-2024-09-12",
-        "o3-mini",
-        "o3-mini-2025-01-31",
+        PROVIDER_DEFAULT,
+        "grok-3",
+        "grok-3-fast",
+        "grok-3-mini",
+        "grok-3-mini-fast",
+        "grok-2-1212",
+        "grok-2-vision-1212",
     ],
 )
-class OpenAIReasoningStreamingClient(OpenAIStreamingClient):
-    @classmethod
-    def supported_invocation_parameters(cls) -> list[InvocationParameter]:
-        return [
-            StringInvocationParameter(
-                invocation_name="reasoning_effort",
-                label="Reasoning Effort",
-                canonical_name=CanonicalParameterName.REASONING_EFFORT,
-            ),
-            IntInvocationParameter(
-                invocation_name="max_completion_tokens",
-                canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
-                label="Max Completion Tokens",
-            ),
-            IntInvocationParameter(
-                invocation_name="seed",
-                canonical_name=CanonicalParameterName.RANDOM_SEED,
-                label="Seed",
-            ),
-            JSONInvocationParameter(
-                invocation_name="tool_choice",
-                label="Tool Choice",
-                canonical_name=CanonicalParameterName.TOOL_CHOICE,
-            ),
-            JSONInvocationParameter(
-                invocation_name="response_format",
-                label="Response Format",
-                canonical_name=CanonicalParameterName.RESPONSE_FORMAT,
-            ),
-        ]
-    async def chat_completion_create(
+class XAIStreamingClient(OpenAIBaseStreamingClient):
+    def __init__(
         self,
-        messages: list[
-            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
-        ],
-        tools: list[JSONScalarType],
-        **invocation_parameters: Any,
-    ) -> AsyncIterator[ChatCompletionChunk]:
-        from openai import NOT_GIVEN
-        # Convert standard messages to OpenAI messages
-        openai_messages = []
-        for message in messages:
-            openai_message = self.to_openai_chat_completion_param(*message)
-            if openai_message is not None:
-                openai_messages.append(openai_message)
-        throttled_create = self.rate_limiter._alimit(self.client.chat.completions.create)
-        response = await throttled_create(
-            messages=openai_messages,
-            model=self.model_name,
-            stream=False,
-            tools=tools or NOT_GIVEN,
-            **invocation_parameters,
-        )
-        if response.usage is not None:
-            self._attributes.update(dict(self._llm_token_counts(response.usage)))
+        model: GenerativeModelInput,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
+    ) -> None:
+        from openai import AsyncOpenAI
-        choice = response.choices[0]
-        if choice.message.content:
-            yield TextChunk(content=choice.message.content)
+        base_url = model.base_url or getenv("XAI_BASE_URL")
-        if choice.message.tool_calls:
-            for tool_call in choice.message.tool_calls:
-                yield ToolCallChunk(
-                    id=tool_call.id,
-                    function=FunctionCallChunk(
-                        name=tool_call.function.name,
-                        arguments=tool_call.function.arguments,
-                    ),
-                )
+        # Try to get API key from credentials first, then fallback to env
+        api_key = _get_credential_value(credentials, "XAI_API_KEY") or getenv("XAI_API_KEY")
-    def to_openai_chat_completion_param(
-        self,
-        role: ChatCompletionMessageRole,
-        content: JSONScalarType,
-        tool_call_id: Optional[str] = None,
-        tool_calls: Optional[list[JSONScalarType]] = None,
-    ) -> Optional["ChatCompletionMessageParam"]:
-        from openai.types.chat import (
-            ChatCompletionAssistantMessageParam,
-            ChatCompletionDeveloperMessageParam,
-            ChatCompletionToolMessageParam,
-            ChatCompletionUserMessageParam,
-        )
+        if not api_key:
+            if not base_url:
+                raise BadRequest("An API key is required for xAI models")
+            api_key = "sk-fake-api-key"
-        if role is ChatCompletionMessageRole.USER:
-            return ChatCompletionUserMessageParam(
-                {
-                    "content": content,
-                    "role": "user",
-                }
-            )
-        if role is ChatCompletionMessageRole.SYSTEM:
-            return ChatCompletionDeveloperMessageParam(
-                {
-                    "content": content,
-                    "role": "developer",
-                }
-            )
-        if role is ChatCompletionMessageRole.AI:
-            if tool_calls is None:
-                return ChatCompletionAssistantMessageParam(
-                    {
-                        "content": content,
-                        "role": "assistant",
-                    }
-                )
-            else:
-                return ChatCompletionAssistantMessageParam(
-                    {
-                        "content": content,
-                        "role": "assistant",
-                        "tool_calls": [
-                            self.to_openai_tool_call_param(tool_call) for tool_call in tool_calls
-                        ],
-                    }
-                )
-        if role is ChatCompletionMessageRole.TOOL:
-            if tool_call_id is None:
-                raise ValueError("tool_call_id is required for tool messages")
-        return ChatCompletionToolMessageParam(
-            {"content": content, "role": "tool", "tool_call_id": tool_call_id}
+        client = AsyncOpenAI(
+            api_key=api_key,
+            base_url=base_url or "https://api.x.ai/v1",
+            default_headers=model.custom_headers or None,
         )
-        assert_never(role)
-    @staticmethod
-    def _llm_token_counts(usage: "CompletionUsage") -> Iterator[tuple[str, Any]]:
-        yield LLM_TOKEN_COUNT_PROMPT, usage.prompt_tokens
-        yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
-        yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
+        super().__init__(client=client, model=model, credentials=credentials)
+        # xAI uses OpenAI-compatible API but we'll track it as a separate provider
+        # Adding a custom "xai" provider value to make it distinguishable in traces
+        self._attributes[LLM_PROVIDER] = "xai"
+        self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.OPENAI.value
 @register_llm_client(
-    provider_key=GenerativeProviderKey.AZURE_OPENAI,
+    provider_key=GenerativeProviderKey.OLLAMA,
     model_names=[
         PROVIDER_DEFAULT,
+        "llama3.3",
+        "llama3.2",
+        "llama3.1",
+        "llama3",
+        "llama2",
+        "mistral",
+        "mixtral",
+        "codellama",
+        "phi3",
+        "qwen2.5",
+        "gemma2",
     ],
 )
-class AzureOpenAIStreamingClient(OpenAIBaseStreamingClient):
+class OllamaStreamingClient(OpenAIBaseStreamingClient):
     def __init__(
         self,
         model: GenerativeModelInput,
-        api_key: Optional[str] = None,
-    ):
-        from openai import AsyncAzureOpenAI
-        if not (endpoint := model.endpoint or getenv("AZURE_OPENAI_ENDPOINT")):
-            raise BadRequest("An Azure endpoint is required for Azure OpenAI models")
-        if not (api_version := model.api_version or getenv("OPENAI_API_VERSION")):
-            raise BadRequest("An OpenAI API version is required for Azure OpenAI models")
-        if api_key := api_key or getenv("AZURE_OPENAI_API_KEY"):
-            client = AsyncAzureOpenAI(
-                api_key=api_key,
-                azure_endpoint=endpoint,
-                api_version=api_version,
-            )
-        else:
-            try:
-                from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
-            except ImportError:
-                raise BadRequest(
-                    "Provide an API key for Azure OpenAI models or use azure-identity, see. e.g. "
-                    "https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.environmentcredential?view=azure-python"  # noqa: E501
-                )
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
+    ) -> None:
+        from openai import AsyncOpenAI
-            client = AsyncAzureOpenAI(
-                azure_ad_token_provider=get_bearer_token_provider(
-                    DefaultAzureCredential(),
-                    "https://cognitiveservices.azure.com/.default",
-                ),
-                azure_endpoint=endpoint,
-                api_version=api_version,
-            )
-        super().__init__(client=client, model=model, api_key=api_key)
-        self._attributes[LLM_PROVIDER] = OpenInferenceLLMProviderValues.AZURE.value
+        base_url = model.base_url or getenv("OLLAMA_BASE_URL")
+        if not base_url:
+            raise BadRequest("An Ollama base URL is required for Ollama models")
+        api_key = "ollama"
+        client = AsyncOpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            default_headers=model.custom_headers or None,
+        )
+        super().__init__(client=client, model=model, credentials=credentials)
+        # Ollama uses OpenAI-compatible API but we'll track it as a separate provider
+        # Adding a custom "ollama" provider value to make it distinguishable in traces
+        self._attributes[LLM_PROVIDER] = "ollama"
         self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.OPENAI.value
 @register_llm_client(
-    provider_key=GenerativeProviderKey.ANTHROPIC,
+    provider_key=GenerativeProviderKey.AWS,
     model_names=[
         PROVIDER_DEFAULT,
-        "claude-3-7-sonnet-latest",
-        "claude-3-7-sonnet-20250219",
-        "claude-3-5-sonnet-latest",
-        "claude-3-5-haiku-latest",
-        "claude-3-5-sonnet-20241022",
-        "claude-3-5-haiku-20241022",
-        "claude-3-5-sonnet-20240620",
-        "claude-3-opus-latest",
-        "claude-3-sonnet-20240229",
-        "claude-3-haiku-20240307",
+        "anthropic.claude-opus-4-5-20251101-v1:0",
+        "anthropic.claude-sonnet-4-5-20250929-v1:0",
+        "anthropic.claude-haiku-4-5-20251001-v1:0",
+        "anthropic.claude-opus-4-1-20250805-v1:0",
+        "anthropic.claude-opus-4-20250514-v1:0",
+        "anthropic.claude-sonnet-4-20250514-v1:0",
+        "anthropic.claude-3-7-sonnet-20250219-v1:0",
+        "anthropic.claude-3-5-sonnet-20241022-v2:0",
+        "anthropic.claude-3-5-sonnet-20240620-v1:0",
+        "anthropic.claude-3-5-haiku-20241022-v1:0",
+        "anthropic.claude-3-haiku-20240307-v1:0",
+        "amazon.titan-embed-text-v2:0",
+        "amazon.nova-pro-v1:0",
+        "amazon.nova-premier-v1:0:8k",
+        "amazon.nova-premier-v1:0:20k",
+        "amazon.nova-premier-v1:0:1000k",
+        "amazon.nova-premier-v1:0:mm",
+        "amazon.nova-premier-v1:0",
+        "amazon.nova-lite-v1:0",
+        "amazon.nova-micro-v1:0",
+        "deepseek.r1-v1:0",
+        "mistral.pixtral-large-2502-v1:0",
+        "meta.llama3-1-8b-instruct-v1:0:128k",
+        "meta.llama3-1-8b-instruct-v1:0",
+        "meta.llama3-1-70b-instruct-v1:0:128k",
+        "meta.llama3-1-70b-instruct-v1:0",
+        "meta.llama3-1-405b-instruct-v1:0",
+        "meta.llama3-2-11b-instruct-v1:0",
+        "meta.llama3-2-90b-instruct-v1:0",
+        "meta.llama3-2-1b-instruct-v1:0",
+        "meta.llama3-2-3b-instruct-v1:0",
+        "meta.llama3-3-70b-instruct-v1:0",
+        "meta.llama4-scout-17b-instruct-v1:0",
+        "meta.llama4-maverick-17b-instruct-v1:0",
     ],
 )
-class AnthropicStreamingClient(PlaygroundStreamingClient):
+class BedrockStreamingClient(PlaygroundStreamingClient):
     def __init__(
         self,
         model: GenerativeModelInput,
-        api_key: Optional[str] = None,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
     ) -> None:
-        import anthropic
-        super().__init__(model=model, api_key=api_key)
-        self._attributes[LLM_PROVIDER] = OpenInferenceLLMProviderValues.ANTHROPIC.value
-        self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.ANTHROPIC.value
-        if not (api_key := api_key or getenv("ANTHROPIC_API_KEY")):
-            raise BadRequest("An API key is required for Anthropic models")
-        self.client = anthropic.AsyncAnthropic(api_key=api_key)
+        import boto3  # type: ignore[import-untyped]
+        super().__init__(model=model, credentials=credentials)
+        region = model.region or "us-east-1"
+        self.api = "converse"
+        custom_headers = model.custom_headers
+        aws_access_key_id = _get_credential_value(credentials, "AWS_ACCESS_KEY_ID") or getenv(
+            "AWS_ACCESS_KEY_ID"
+        )
+        aws_secret_access_key = _get_credential_value(
+            credentials, "AWS_SECRET_ACCESS_KEY"
+        ) or getenv("AWS_SECRET_ACCESS_KEY")
+        aws_session_token = _get_credential_value(credentials, "AWS_SESSION_TOKEN") or getenv(
+            "AWS_SESSION_TOKEN"
+        )
         self.model_name = model.name
-        self.rate_limiter = PlaygroundRateLimiter(model.provider_key, anthropic.RateLimitError)
-        self.client._client = _HttpxClient(self.client._client, self._attributes)
+        session = boto3.Session(
+            region_name=region,
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_session_token=aws_session_token,
+        )
+        client = session.client(service_name="bedrock-runtime")
+        # Add custom headers support via boto3 event system
+        if custom_headers:
+            def add_custom_headers(request: "AWSPreparedRequest", **kwargs: Any) -> None:
+                request.headers.update(custom_headers)
+            client.meta.events.register("before-send.*", add_custom_headers)
+        self.client = client
+        self._attributes[LLM_PROVIDER] = "aws"
+        self._attributes[LLM_SYSTEM] = "aws"
+    @staticmethod
+    def _setup_custom_headers(client: Any, custom_headers: Mapping[str, str]) -> None:
+        """Setup custom headers using boto3's event system."""
+        if not custom_headers:
+            return
     @classmethod
     def dependencies(cls) -> list[Dependency]:
-        return [Dependency(name="anthropic")]
+        return [Dependency(name="boto3")]
     @classmethod
     def supported_invocation_parameters(cls) -> list[InvocationParameter]:
@@ -743,7 +742,6 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
                 canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
                 label="Max Tokens",
                 default_value=1024,
-                required=True,
             ),
             BoundedFloatInvocationParameter(
                 invocation_name="temperature",
@@ -753,16 +751,10 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
                 min_value=0.0,
                 max_value=1.0,
             ),
-            StringListInvocationParameter(
-                invocation_name="stop_sequences",
-                canonical_name=CanonicalParameterName.STOP_SEQUENCES,
-                label="Stop Sequences",
-            ),
             BoundedFloatInvocationParameter(
                 invocation_name="top_p",
                 canonical_name=CanonicalParameterName.TOP_P,
                 label="Top P",
-                default_value=1.0,
                 min_value=0.0,
                 max_value=1.0,
             ),
@@ -781,14 +773,806 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
         tools: list[JSONScalarType],
         **invocation_parameters: Any,
     ) -> AsyncIterator[ChatCompletionChunk]:
-        import anthropic.lib.streaming as anthropic_streaming
-        import anthropic.types as anthropic_types
+        if self.api == "invoke":
+            async for chunk in self._handle_invoke_api(messages, tools, invocation_parameters):
+                yield chunk
+        else:
+            async for chunk in self._handle_converse_api(messages, tools, invocation_parameters):
+                yield chunk
-        anthropic_messages, system_prompt = self._build_anthropic_messages(messages)
-        anthropic_params = {
-            "messages": anthropic_messages,
-            "model": self.model_name,
-            "system": system_prompt,
+    async def _handle_converse_api(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
+        invocation_parameters: dict[str, Any],
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        """
+        Handle the converse API.
+        """
+        # Build messages in Converse API format
+        converse_messages = self._build_converse_messages(messages)
+        inference_config = {}
+        if (
+            "max_tokens" in invocation_parameters
+            and invocation_parameters["max_tokens"] is not None
+        ):
+            inference_config["maxTokens"] = invocation_parameters["max_tokens"]
+        if (
+            "temperature" in invocation_parameters
+            and invocation_parameters["temperature"] is not None
+        ):
+            inference_config["temperature"] = invocation_parameters["temperature"]
+        if "top_p" in invocation_parameters and invocation_parameters["top_p"] is not None:
+            inference_config["topP"] = invocation_parameters["top_p"]
+        # Build the request parameters for Converse API
+        converse_params: dict[str, Any] = {
+            "modelId": self.model_name,
+            "messages": converse_messages,
+            "inferenceConfig": inference_config,
+        }
+        # Add system prompt if available
+        system_prompt = self._extract_system_prompt(messages)
+        if system_prompt:
+            converse_params["system"] = [{"text": system_prompt}]
+        # Add tools if provided
+        if tools:
+            converse_params["toolConfig"] = {"tools": tools}
+            if (
+                "tool_choice" in invocation_parameters
+                and invocation_parameters["tool_choice"]["type"] != "none"
+            ):
+                converse_params["toolConfig"]["toolChoice"] = {}
+                if invocation_parameters["tool_choice"]["type"] == "auto":
+                    converse_params["toolConfig"]["toolChoice"]["auto"] = {}
+                elif invocation_parameters["tool_choice"]["type"] == "any":
+                    converse_params["toolConfig"]["toolChoice"]["any"] = {}
+                else:
+                    converse_params["toolConfig"]["toolChoice"]["tool"] = {
+                        "name": invocation_parameters["tool_choice"]["name"],
+                    }
+        # Make the streaming API call
+        response = self.client.converse_stream(**converse_params)
+        # Track active tool calls
+        active_tool_calls = {}  # contentBlockIndex -> {id, name, arguments_buffer}
+        # Process the event stream
+        event_stream = response.get("stream")
+        for event in event_stream:
+            # Handle content block start events
+            if "contentBlockStart" in event:
+                content_block_start = event["contentBlockStart"]
+                start_event = content_block_start.get("start", {})
+                block_index = content_block_start.get(
+                    "contentBlockIndex", 0
+                )  # Get the actual index
+                if "toolUse" in start_event:
+                    tool_use = start_event["toolUse"]
+                    active_tool_calls[block_index] = {  # Use the actual block index
+                        "id": tool_use.get("toolUseId"),
+                        "name": tool_use.get("name"),
+                        "arguments_buffer": "",
+                    }
+                    # Yield initial tool call chunk
+                    yield ToolCallChunk(
+                        id=tool_use.get("toolUseId"),
+                        function=FunctionCallChunk(
+                            name=tool_use.get("name"),
+                            arguments="",
+                        ),
+                    )
+            # Handle content block delta events
+            elif "contentBlockDelta" in event:
+                content_delta = event["contentBlockDelta"]
+                delta = content_delta.get("delta", {})
+                delta_index = content_delta.get("contentBlockIndex", 0)
+                # Handle text delta
+                if "text" in delta:
+                    yield TextChunk(content=delta["text"])
+                # Handle tool use delta
+                elif "toolUse" in delta:
+                    tool_delta = delta["toolUse"]
+                    if "input" in tool_delta and delta_index in active_tool_calls:
+                        # Accumulate tool arguments
+                        json_chunk = tool_delta["input"]
+                        active_tool_calls[delta_index]["arguments_buffer"] += json_chunk
+                        # Yield incremental argument update
+                        yield ToolCallChunk(
+                            id=active_tool_calls[delta_index]["id"],
+                            function=FunctionCallChunk(
+                                name=active_tool_calls[delta_index]["name"],
+                                arguments=json_chunk,
+                            ),
+                        )
+            # Handle content block stop events
+            elif "contentBlockStop" in event:
+                stop_index = event["contentBlockStop"].get("contentBlockIndex", 0)
+                if stop_index in active_tool_calls:
+                    del active_tool_calls[stop_index]
+            elif "metadata" in event:
+                self._attributes.update(
+                    {
+                        LLM_TOKEN_COUNT_PROMPT: event.get("metadata")
+                        .get("usage", {})
+                        .get("inputTokens", 0)
+                    }
+                )
+                self._attributes.update(
+                    {
+                        LLM_TOKEN_COUNT_COMPLETION: event.get("metadata")
+                        .get("usage", {})
+                        .get("outputTokens", 0)
+                    }
+                )
+                self._attributes.update(
+                    {
+                        LLM_TOKEN_COUNT_TOTAL: event.get("metadata")
+                        .get("usage", {})
+                        .get("totalTokens", 0)
+                    }
+                )
+    async def _handle_invoke_api(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
+        invocation_parameters: dict[str, Any],
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        if "anthropic" not in self.model_name:
+            raise ValueError("Invoke API is only supported for Anthropic models")
+        bedrock_messages, system_prompt = self._build_bedrock_messages(messages)
+        bedrock_params = {
+            "anthropic_version": "bedrock-2023-05-31",
+            "messages": bedrock_messages,
+            "system": system_prompt,
+            "tools": tools,
+        }
+        if (
+            "max_tokens" in invocation_parameters
+            and invocation_parameters["max_tokens"] is not None
+        ):
+            bedrock_params["max_tokens"] = invocation_parameters["max_tokens"]
+        if (
+            "temperature" in invocation_parameters
+            and invocation_parameters["temperature"] is not None
+        ):
+            bedrock_params["temperature"] = invocation_parameters["temperature"]
+        if "top_p" in invocation_parameters and invocation_parameters["top_p"] is not None:
+            bedrock_params["top_p"] = invocation_parameters["top_p"]
+        response = self.client.invoke_model_with_response_stream(
+            modelId=self.model_name,
+            contentType="application/json",
+            accept="application/json",
+            body=json.dumps(bedrock_params),
+            trace="ENABLED_FULL",
+        )
+        # The response['body'] is an EventStream object
+        event_stream = response["body"]
+        # Track active tool calls and their accumulating arguments
+        active_tool_calls: dict[int, dict[str, Any]] = {}  # index -> {id, name, arguments_buffer}
+        for event in event_stream:
+            if "chunk" in event:
+                chunk_data = json.loads(event["chunk"]["bytes"].decode("utf-8"))
+                # Handle text content
+                if chunk_data.get("type") == "content_block_delta":
+                    delta = chunk_data.get("delta", {})
+                    index = chunk_data.get("index", 0)
+                    if delta.get("type") == "text_delta" and "text" in delta:
+                        yield TextChunk(content=delta["text"])
+                    elif delta.get("type") == "input_json_delta":
+                        # Accumulate tool arguments
+                        if index in active_tool_calls:
+                            active_tool_calls[index]["arguments_buffer"] += delta.get(
+                                "partial_json", ""
+                            )
+                            # Yield incremental argument update
+                            yield ToolCallChunk(
+                                id=active_tool_calls[index]["id"],
+                                function=FunctionCallChunk(
+                                    name=active_tool_calls[index]["name"],
+                                    arguments=delta.get("partial_json", ""),
+                                ),
+                            )
+                # Handle tool call start
+                elif chunk_data.get("type") == "content_block_start":
+                    content_block = chunk_data.get("content_block", {})
+                    index = chunk_data.get("index", 0)
+                    if content_block.get("type") == "tool_use":
+                        # Initialize tool call tracking
+                        active_tool_calls[index] = {
+                            "id": content_block.get("id"),
+                            "name": content_block.get("name"),
+                            "arguments_buffer": "",
+                        }
+                        # Yield initial tool call chunk
+                        yield ToolCallChunk(
+                            id=content_block.get("id"),
+                            function=FunctionCallChunk(
+                                name=content_block.get("name"),
+                                arguments="",  # Start with empty, will be filled by deltas
+                            ),
+                        )
+                # Handle content block stop (tool call complete)
+                elif chunk_data.get("type") == "content_block_stop":
+                    index = chunk_data.get("index", 0)
+                    if index in active_tool_calls:
+                        # Tool call is complete, clean up
+                        del active_tool_calls[index]
+                elif chunk_data.get("type") == "message_stop":
+                    self._attributes.update(
+                        {
+                            LLM_TOKEN_COUNT_COMPLETION: chunk_data.get(
+                                "amazon-bedrock-invocationMetrics", {}
+                            ).get("outputTokenCount", 0)
+                        }
+                    )
+                    self._attributes.update(
+                        {
+                            LLM_TOKEN_COUNT_PROMPT: chunk_data.get(
+                                "amazon-bedrock-invocationMetrics", {}
+                            ).get("inputTokenCount", 0)
+                        }
+                    )
+    def _build_bedrock_messages(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+    ) -> tuple[list[dict[str, Any]], str]:
+        bedrock_messages = []
+        system_prompt = ""
+        for role, content, _, _ in messages:
+            if role == ChatCompletionMessageRole.USER:
+                bedrock_messages.append(
+                    {
+                        "role": "user",
+                        "content": content,
+                    }
+                )
+            elif role == ChatCompletionMessageRole.AI:
+                bedrock_messages.append(
+                    {
+                        "role": "assistant",
+                        "content": content,
+                    }
+                )
+            elif role == ChatCompletionMessageRole.SYSTEM:
+                system_prompt += content + "\n"
+        return bedrock_messages, system_prompt
+    def _extract_system_prompt(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+    ) -> str:
+        """Extract system prompt from messages."""
+        system_prompts = []
+        for role, content, _, _ in messages:
+            if role == ChatCompletionMessageRole.SYSTEM:
+                system_prompts.append(content)
+        return "\n".join(system_prompts)
+    def _build_converse_messages(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+    ) -> list[dict[str, Any]]:
+        """Convert messages to Converse API format."""
+        converse_messages: list[dict[str, Any]] = []
+        for role, content, _id, tool_calls in messages:
+            if role == ChatCompletionMessageRole.USER:
+                converse_messages.append({"role": "user", "content": [{"text": content}]})
+            elif role == ChatCompletionMessageRole.TOOL:
+                converse_messages.append(
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "toolResult": {
+                                    "toolUseId": _id,
+                                    "content": [{"json": json.loads(content)}],
+                                }
+                            }
+                        ],
+                    }
+                )
+            elif role == ChatCompletionMessageRole.AI:
+                # Handle assistant messages with potential tool calls
+                message: dict[str, Any] = {"role": "assistant", "content": []}
+                if content:
+                    message["content"].append({"text": content})
+                if tool_calls:
+                    for tool_call in tool_calls:
+                        message["content"].append(tool_call)
+                converse_messages.append(message)
+        return converse_messages
+@register_llm_client(
+    provider_key=GenerativeProviderKey.OPENAI,
+    model_names=[
+        PROVIDER_DEFAULT,
+        "gpt-4.1",
+        "gpt-4.1-mini",
+        "gpt-4.1-nano",
+        "gpt-4.1-2025-04-14",
+        "gpt-4.1-mini-2025-04-14",
+        "gpt-4.1-nano-2025-04-14",
+        "gpt-4o",
+        "gpt-4o-2024-11-20",
+        "gpt-4o-2024-08-06",
+        "gpt-4o-2024-05-13",
+        "chatgpt-4o-latest",
+        "gpt-4o-mini",
+        "gpt-4o-mini-2024-07-18",
+        "gpt-4-turbo",
+        "gpt-4-turbo-2024-04-09",
+        "gpt-4-turbo-preview",
+        "gpt-4-0125-preview",
+        "gpt-4-1106-preview",
+        "gpt-4",
+        "gpt-4-0613",
+        "gpt-3.5-turbo-0125",
+        "gpt-3.5-turbo",
+        "gpt-3.5-turbo-1106",
+        # preview models
+        "gpt-4.5-preview",
+    ],
+)
+class OpenAIStreamingClient(OpenAIBaseStreamingClient):
+    def __init__(
+        self,
+        model: GenerativeModelInput,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
+    ) -> None:
+        from openai import AsyncOpenAI
+        base_url = model.base_url or getenv("OPENAI_BASE_URL")
+        # Try to get API key from credentials first, then fallback to env
+        api_key = _get_credential_value(credentials, "OPENAI_API_KEY") or getenv("OPENAI_API_KEY")
+        if not api_key:
+            if not base_url:
+                raise BadRequest("An API key is required for OpenAI models")
+            api_key = "sk-fake-api-key"
+        client = AsyncOpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            default_headers=model.custom_headers or None,
+            timeout=30,
+        )
+        super().__init__(client=client, model=model, credentials=credentials)
+        self._attributes[LLM_PROVIDER] = OpenInferenceLLMProviderValues.OPENAI.value
+        self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.OPENAI.value
+_OPENAI_REASONING_MODELS = [
+    "gpt-5.2",
+    "gpt-5.2-2025-12-11",
+    "gpt-5.2-chat-latest",
+    "gpt-5.1",
+    "gpt-5.1-2025-11-13",
+    "gpt-5.1-chat-latest",
+    "gpt-5",
+    "gpt-5-mini",
+    "gpt-5-nano",
+    "gpt-5-chat-latest",
+    "o1",
+    "o1-pro",
+    "o1-2024-12-17",
+    "o1-pro-2025-03-19",
+    "o1-mini",
+    "o1-mini-2024-09-12",
+    "o1-preview",
+    "o1-preview-2024-09-12",
+    "o3",
+    "o3-pro",
+    "o3-2025-04-16",
+    "o3-mini",
+    "o3-mini-2025-01-31",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+]
+class OpenAIReasoningReasoningModelsMixin:
+    """Mixin class for OpenAI-style reasoning model clients (o1, o3 series)."""
+    @classmethod
+    def supported_invocation_parameters(cls) -> list[InvocationParameter]:
+        return [
+            StringInvocationParameter(
+                invocation_name="reasoning_effort",
+                label="Reasoning Effort",
+                canonical_name=CanonicalParameterName.REASONING_EFFORT,
+            ),
+            IntInvocationParameter(
+                invocation_name="max_completion_tokens",
+                canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
+                label="Max Completion Tokens",
+            ),
+            IntInvocationParameter(
+                invocation_name="seed",
+                canonical_name=CanonicalParameterName.RANDOM_SEED,
+                label="Seed",
+            ),
+            JSONInvocationParameter(
+                invocation_name="tool_choice",
+                label="Tool Choice",
+                canonical_name=CanonicalParameterName.TOOL_CHOICE,
+            ),
+            JSONInvocationParameter(
+                invocation_name="response_format",
+                label="Response Format",
+                canonical_name=CanonicalParameterName.RESPONSE_FORMAT,
+            ),
+            JSONInvocationParameter(
+                invocation_name="extra_body",
+                label="Extra Body",
+            ),
+        ]
+@register_llm_client(
+    provider_key=GenerativeProviderKey.OPENAI,
+    model_names=_OPENAI_REASONING_MODELS,
+)
+class OpenAIReasoningNonStreamingClient(
+    OpenAIReasoningReasoningModelsMixin,
+    OpenAIStreamingClient,
+):
+    def to_openai_chat_completion_param(
+        self,
+        role: ChatCompletionMessageRole,
+        content: JSONScalarType,
+        tool_call_id: Optional[str] = None,
+        tool_calls: Optional[list[JSONScalarType]] = None,
+    ) -> Optional["ChatCompletionMessageParam"]:
+        from openai.types.chat import (
+            ChatCompletionAssistantMessageParam,
+            ChatCompletionDeveloperMessageParam,
+            ChatCompletionToolMessageParam,
+            ChatCompletionUserMessageParam,
+        )
+        if role is ChatCompletionMessageRole.USER:
+            return ChatCompletionUserMessageParam(
+                {
+                    "content": content,
+                    "role": "user",
+                }
+            )
+        if role is ChatCompletionMessageRole.SYSTEM:
+            return ChatCompletionDeveloperMessageParam(
+                {
+                    "content": content,
+                    "role": "developer",
+                }
+            )
+        if role is ChatCompletionMessageRole.AI:
+            if tool_calls is None:
+                return ChatCompletionAssistantMessageParam(
+                    {
+                        "content": content,
+                        "role": "assistant",
+                    }
+                )
+            else:
+                return ChatCompletionAssistantMessageParam(
+                    {
+                        "content": content,
+                        "role": "assistant",
+                        "tool_calls": [
+                            self.to_openai_tool_call_param(tool_call) for tool_call in tool_calls
+                        ],
+                    }
+                )
+        if role is ChatCompletionMessageRole.TOOL:
+            if tool_call_id is None:
+                raise ValueError("tool_call_id is required for tool messages")
+            return ChatCompletionToolMessageParam(
+                {"content": content, "role": "tool", "tool_call_id": tool_call_id}
+            )
+        assert_never(role)
+@register_llm_client(
+    provider_key=GenerativeProviderKey.AZURE_OPENAI,
+    model_names=[
+        PROVIDER_DEFAULT,
+    ],
+)
+class AzureOpenAIStreamingClient(OpenAIBaseStreamingClient):
+    def __init__(
+        self,
+        model: GenerativeModelInput,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
+    ):
+        from openai import AsyncAzureOpenAI
+        if not (endpoint := model.endpoint or getenv("AZURE_OPENAI_ENDPOINT")):
+            raise BadRequest("An Azure endpoint is required for Azure OpenAI models")
+        if not (api_version := model.api_version or getenv("OPENAI_API_VERSION")):
+            raise BadRequest("An OpenAI API version is required for Azure OpenAI models")
+        # Try to get API key from credentials first, then fallback to env
+        api_key = _get_credential_value(credentials, "AZURE_OPENAI_API_KEY") or getenv(
+            "AZURE_OPENAI_API_KEY"
+        )
+        if api_key:
+            client = AsyncAzureOpenAI(
+                api_key=api_key,
+                azure_endpoint=endpoint,
+                api_version=api_version,
+                default_headers=model.custom_headers or None,
+            )
+        else:
+            try:
+                from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
+            except ImportError:
+                raise BadRequest(
+                    "Provide an API key for Azure OpenAI models or use azure-identity, see. e.g. "
+                    "https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.environmentcredential?view=azure-python"  # noqa: E501
+                )
+            client = AsyncAzureOpenAI(
+                azure_ad_token_provider=get_bearer_token_provider(
+                    DefaultAzureCredential(),
+                    "https://cognitiveservices.azure.com/.default",
+                ),
+                azure_endpoint=endpoint,
+                api_version=api_version,
+                default_headers=model.custom_headers or None,
+            )
+        super().__init__(client=client, model=model, credentials=credentials)
+        self._attributes[LLM_PROVIDER] = OpenInferenceLLMProviderValues.AZURE.value
+        self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.OPENAI.value
+@register_llm_client(
+    provider_key=GenerativeProviderKey.AZURE_OPENAI,
+    model_names=_OPENAI_REASONING_MODELS,
+)
+class AzureOpenAIReasoningNonStreamingClient(
+    OpenAIReasoningReasoningModelsMixin,
+    AzureOpenAIStreamingClient,
+):
+    @override
+    async def chat_completion_create(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
+        **invocation_parameters: Any,
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        from openai import NOT_GIVEN
+        # Convert standard messages to OpenAI messages
+        openai_messages = []
+        for message in messages:
+            openai_message = self.to_openai_chat_completion_param(*message)
+            if openai_message is not None:
+                openai_messages.append(openai_message)
+        throttled_create = self.rate_limiter._alimit(self.client.chat.completions.create)
+        response = await throttled_create(
+            messages=openai_messages,
+            model=self.model_name,
+            stream=False,
+            tools=tools or NOT_GIVEN,
+            **invocation_parameters,
+        )
+        if response.usage is not None:
+            self._attributes.update(dict(self._llm_token_counts(response.usage)))
+        choice = response.choices[0]
+        if choice.message.content:
+            yield TextChunk(content=choice.message.content)
+        if choice.message.tool_calls:
+            for tool_call in choice.message.tool_calls:
+                yield ToolCallChunk(
+                    id=tool_call.id,
+                    function=FunctionCallChunk(
+                        name=tool_call.function.name,
+                        arguments=tool_call.function.arguments,
+                    ),
+                )
+    def to_openai_chat_completion_param(
+        self,
+        role: ChatCompletionMessageRole,
+        content: JSONScalarType,
+        tool_call_id: Optional[str] = None,
+        tool_calls: Optional[list[JSONScalarType]] = None,
+    ) -> Optional["ChatCompletionMessageParam"]:
+        from openai.types.chat import (
+            ChatCompletionAssistantMessageParam,
+            ChatCompletionDeveloperMessageParam,
+            ChatCompletionToolMessageParam,
+            ChatCompletionUserMessageParam,
+        )
+        if role is ChatCompletionMessageRole.USER:
+            return ChatCompletionUserMessageParam(
+                {
+                    "content": content,
+                    "role": "user",
+                }
+            )
+        if role is ChatCompletionMessageRole.SYSTEM:
+            return ChatCompletionDeveloperMessageParam(
+                {
+                    "content": content,
+                    "role": "developer",
+                }
+            )
+        if role is ChatCompletionMessageRole.AI:
+            if tool_calls is None:
+                return ChatCompletionAssistantMessageParam(
+                    {
+                        "content": content,
+                        "role": "assistant",
+                    }
+                )
+            else:
+                return ChatCompletionAssistantMessageParam(
+                    {
+                        "content": content,
+                        "role": "assistant",
+                        "tool_calls": [
+                            self.to_openai_tool_call_param(tool_call) for tool_call in tool_calls
+                        ],
+                    }
+                )
+        if role is ChatCompletionMessageRole.TOOL:
+            if tool_call_id is None:
+                raise ValueError("tool_call_id is required for tool messages")
+            return ChatCompletionToolMessageParam(
+                {"content": content, "role": "tool", "tool_call_id": tool_call_id}
+            )
+        assert_never(role)
+@register_llm_client(
+    provider_key=GenerativeProviderKey.ANTHROPIC,
+    model_names=[
+        PROVIDER_DEFAULT,
+        "claude-3-5-haiku-latest",
+        "claude-3-5-haiku-20241022",
+        "claude-3-haiku-20240307",
+    ],
+)
+class AnthropicStreamingClient(PlaygroundStreamingClient):
+    def __init__(
+        self,
+        model: GenerativeModelInput,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
+    ) -> None:
+        import anthropic
+        super().__init__(model=model, credentials=credentials)
+        self._attributes[LLM_PROVIDER] = OpenInferenceLLMProviderValues.ANTHROPIC.value
+        self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.ANTHROPIC.value
+        # Try to get API key from credentials first, then fallback to env
+        api_key = _get_credential_value(credentials, "ANTHROPIC_API_KEY") or getenv(
+            "ANTHROPIC_API_KEY"
+        )
+        if not api_key:
+            raise BadRequest("An API key is required for Anthropic models")
+        self.client = anthropic.AsyncAnthropic(
+            api_key=api_key,
+            default_headers=model.custom_headers or None,
+        )
+        self.model_name = model.name
+        self.rate_limiter = PlaygroundRateLimiter(model.provider_key, anthropic.RateLimitError)
+        self.client._client = _HttpxClient(self.client._client, self._attributes)
+    @classmethod
+    def dependencies(cls) -> list[Dependency]:
+        return [Dependency(name="anthropic")]
+    @classmethod
+    def supported_invocation_parameters(cls) -> list[InvocationParameter]:
+        return [
+            IntInvocationParameter(
+                invocation_name="max_tokens",
+                canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
+                label="Max Tokens",
+                default_value=1024,
+                required=True,
+            ),
+            BoundedFloatInvocationParameter(
+                invocation_name="temperature",
+                canonical_name=CanonicalParameterName.TEMPERATURE,
+                label="Temperature",
+                default_value=1.0,
+                min_value=0.0,
+                max_value=1.0,
+            ),
+            StringListInvocationParameter(
+                invocation_name="stop_sequences",
+                canonical_name=CanonicalParameterName.STOP_SEQUENCES,
+                label="Stop Sequences",
+            ),
+            BoundedFloatInvocationParameter(
+                invocation_name="top_p",
+                canonical_name=CanonicalParameterName.TOP_P,
+                label="Top P",
+                min_value=0.0,
+                max_value=1.0,
+            ),
+            JSONInvocationParameter(
+                invocation_name="tool_choice",
+                label="Tool Choice",
+                canonical_name=CanonicalParameterName.TOOL_CHOICE,
+            ),
+        ]
+    async def chat_completion_create(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
+        **invocation_parameters: Any,
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        import anthropic.lib.streaming as anthropic_streaming
+        import anthropic.types as anthropic_types
+        anthropic_messages, system_prompt = self._build_anthropic_messages(messages)
+        anthropic_params = {
+            "messages": anthropic_messages,
+            "model": self.model_name,
+            "system": system_prompt,
             "tools": tools,
             **invocation_parameters,
         }
@@ -796,15 +1580,34 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
         async with await throttled_stream(**anthropic_params) as stream:
             async for event in stream:
                 if isinstance(event, anthropic_types.RawMessageStartEvent):
-                    self._attributes.update(
-                        {LLM_TOKEN_COUNT_PROMPT: event.message.usage.input_tokens}
-                    )
+                    usage = event.message.usage
+                    token_counts: dict[str, Any] = {}
+                    if prompt_tokens := (
+                        (usage.input_tokens or 0)
+                        + (getattr(usage, "cache_creation_input_tokens", 0) or 0)
+                        + (getattr(usage, "cache_read_input_tokens", 0) or 0)
+                    ):
+                        token_counts[LLM_TOKEN_COUNT_PROMPT] = prompt_tokens
+                    if cache_creation_tokens := getattr(usage, "cache_creation_input_tokens", None):
+                        if cache_creation_tokens is not None:
+                            token_counts[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE] = (
+                                cache_creation_tokens
+                            )
+                    self._attributes.update(token_counts)
                 elif isinstance(event, anthropic_streaming.TextEvent):
                     yield TextChunk(content=event.text)
                 elif isinstance(event, anthropic_streaming.MessageStopEvent):
-                    self._attributes.update(
-                        {LLM_TOKEN_COUNT_COMPLETION: event.message.usage.output_tokens}
-                    )
+                    usage = event.message.usage
+                    output_token_counts: dict[str, Any] = {}
+                    if usage.output_tokens:
+                        output_token_counts[LLM_TOKEN_COUNT_COMPLETION] = usage.output_tokens
+                    if cache_read_tokens := getattr(usage, "cache_read_input_tokens", None):
+                        if cache_read_tokens is not None:
+                            output_token_counts[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ] = (
+                                cache_read_tokens
+                            )
+                    self._attributes.update(output_token_counts)
                 elif (
                     isinstance(event, anthropic_streaming.ContentBlockStopEvent)
                     and event.content_block.type == "tool_use"
@@ -889,6 +1692,18 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
 @register_llm_client(
     provider_key=GenerativeProviderKey.ANTHROPIC,
     model_names=[
+        "claude-opus-4-5",
+        "claude-opus-4-5-20251101",
+        "claude-sonnet-4-5",
+        "claude-sonnet-4-5-20250929",
+        "claude-haiku-4-5",
+        "claude-haiku-4-5-20251001",
+        "claude-opus-4-1",
+        "claude-opus-4-1-20250805",
+        "claude-sonnet-4-0",
+        "claude-sonnet-4-20250514",
+        "claude-opus-4-0",
+        "claude-opus-4-20250514",
         "claude-3-7-sonnet-latest",
         "claude-3-7-sonnet-20250219",
     ],
@@ -911,7 +1726,6 @@ class AnthropicReasoningStreamingClient(AnthropicStreamingClient):
     provider_key=GenerativeProviderKey.GOOGLE,
     model_names=[
         PROVIDER_DEFAULT,
-        "gemini-2.5-pro-preview-03-25",
         "gemini-2.0-flash-lite",
         "gemini-2.0-flash-001",
         "gemini-2.0-flash-thinking-exp-01-21",
@@ -925,21 +1739,31 @@ class GoogleStreamingClient(PlaygroundStreamingClient):
     def __init__(
         self,
         model: GenerativeModelInput,
-        api_key: Optional[str] = None,
+        credentials: Optional[list[PlaygroundClientCredential]] = None,
     ) -> None:
-        import google.generativeai as google_genai
+        import google.genai as google_genai
-        super().__init__(model=model, api_key=api_key)
+        super().__init__(model=model, credentials=credentials)
         self._attributes[LLM_PROVIDER] = OpenInferenceLLMProviderValues.GOOGLE.value
         self._attributes[LLM_SYSTEM] = OpenInferenceLLMSystemValues.VERTEXAI.value
-        if not (api_key := api_key or getenv("GEMINI_API_KEY") or getenv("GOOGLE_API_KEY")):
+        # Try to get API key from credentials first, then fallback to env
+        api_key = (
+            _get_credential_value(credentials, "GEMINI_API_KEY")
+            or _get_credential_value(credentials, "GOOGLE_API_KEY")
+            or getenv("GEMINI_API_KEY")
+            or getenv("GOOGLE_API_KEY")
+        )
+        if not api_key:
             raise BadRequest("An API key is required for Gemini models")
-        google_genai.configure(api_key=api_key)
+        self.client = google_genai.Client(api_key=api_key)
         self.model_name = model.name
     @classmethod
     def dependencies(cls) -> list[Dependency]:
-        return [Dependency(name="google-generativeai", module_name="google.generativeai")]
+        return [Dependency(name="google-genai", module_name="google.genai")]
     @classmethod
     def supported_invocation_parameters(cls) -> list[InvocationParameter]:
@@ -976,7 +1800,6 @@ class GoogleStreamingClient(PlaygroundStreamingClient):
                 invocation_name="top_p",
                 canonical_name=CanonicalParameterName.TOP_P,
                 label="Top P",
-                default_value=1.0,
                 min_value=0.0,
                 max_value=1.0,
             ),
@@ -984,6 +1807,11 @@ class GoogleStreamingClient(PlaygroundStreamingClient):
                 invocation_name="top_k",
                 label="Top K",
             ),
+            JSONInvocationParameter(
+                invocation_name="tool_config",
+                label="Tool Config",
+                canonical_name=CanonicalParameterName.TOOL_CHOICE,
+            ),
         ]
     async def chat_completion_create(
@@ -994,28 +1822,25 @@ class GoogleStreamingClient(PlaygroundStreamingClient):
         tools: list[JSONScalarType],
         **invocation_parameters: Any,
     ) -> AsyncIterator[ChatCompletionChunk]:
-        import google.generativeai as google_genai
+        from google.genai import types
-        google_message_history, current_message, system_prompt = self._build_google_messages(
-            messages
-        )
+        contents, system_prompt = self._build_google_messages(messages)
+        config_dict = invocation_parameters.copy()
-        model_args = {"model_name": self.model_name}
         if system_prompt:
-            model_args["system_instruction"] = system_prompt
-        client = google_genai.GenerativeModel(**model_args)
+            config_dict["system_instruction"] = system_prompt
-        google_config = google_genai.GenerationConfig(
-            **invocation_parameters,
-        )
-        google_params = {
-            "content": current_message,
-            "generation_config": google_config,
-            "stream": True,
-        }
+        if tools:
+            function_declarations = [types.FunctionDeclaration(**tool) for tool in tools]
+            config_dict["tools"] = [types.Tool(function_declarations=function_declarations)]
-        chat = client.start_chat(history=google_message_history)
-        stream = await chat.send_message_async(**google_params)
+        config = types.GenerateContentConfig.model_validate(config_dict)
+        stream = await self.client.aio.models.generate_content_stream(
+            model=f"models/{self.model_name}",
+            contents=contents,
+            config=config,
+        )
         async for event in stream:
             self._attributes.update(
                 {
@@ -1024,31 +1849,148 @@ class GoogleStreamingClient(PlaygroundStreamingClient):
                     LLM_TOKEN_COUNT_TOTAL: event.usage_metadata.total_token_count,
                 }
             )
-            yield TextChunk(content=event.text)
+            if event.candidates:
+                candidate = event.candidates[0]
+                if candidate.content and candidate.content.parts:
+                    for part in candidate.content.parts:
+                        if function_call := part.function_call:
+                            yield ToolCallChunk(
+                                id=function_call.id or "",
+                                function=FunctionCallChunk(
+                                    name=function_call.name or "",
+                                    arguments=json.dumps(function_call.args or {}),
+                                ),
+                            )
+                        elif text := part.text:
+                            yield TextChunk(content=text)
     def _build_google_messages(
         self,
         messages: list[tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[str]]]],
-    ) -> tuple[list["ContentType"], str, str]:
-        google_message_history: list["ContentType"] = []
+    ) -> tuple[list["ContentType"], str]:
+        """Build Google messages following the standard pattern - process ALL messages."""
+        google_messages: list["ContentType"] = []
         system_prompts = []
         for role, content, _tool_call_id, _tool_calls in messages:
             if role == ChatCompletionMessageRole.USER:
-                google_message_history.append({"role": "user", "parts": content})
+                google_messages.append({"role": "user", "parts": [{"text": content}]})
             elif role == ChatCompletionMessageRole.AI:
-                google_message_history.append({"role": "model", "parts": content})
+                google_messages.append({"role": "model", "parts": [{"text": content}]})
             elif role == ChatCompletionMessageRole.SYSTEM:
                 system_prompts.append(content)
             elif role == ChatCompletionMessageRole.TOOL:
                 raise NotImplementedError
             else:
                 assert_never(role)
-        if google_message_history:
-            prompt = google_message_history.pop()["parts"]
-        else:
-            prompt = ""
-        return google_message_history, prompt, "\n".join(system_prompts)
+        return google_messages, "\n".join(system_prompts)
+@register_llm_client(
+    provider_key=GenerativeProviderKey.GOOGLE,
+    model_names=[
+        PROVIDER_DEFAULT,
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gemini-2.5-flash-lite",
+        "gemini-2.5-pro-preview-03-25",
+    ],
+)
+class Gemini25GoogleStreamingClient(GoogleStreamingClient):
+    @classmethod
+    def supported_invocation_parameters(cls) -> list[InvocationParameter]:
+        return [
+            BoundedFloatInvocationParameter(
+                invocation_name="temperature",
+                canonical_name=CanonicalParameterName.TEMPERATURE,
+                label="Temperature",
+                default_value=1.0,
+                min_value=0.0,
+                max_value=2.0,
+            ),
+            IntInvocationParameter(
+                invocation_name="max_output_tokens",
+                canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
+                label="Max Output Tokens",
+            ),
+            StringListInvocationParameter(
+                invocation_name="stop_sequences",
+                canonical_name=CanonicalParameterName.STOP_SEQUENCES,
+                label="Stop Sequences",
+            ),
+            BoundedFloatInvocationParameter(
+                invocation_name="top_p",
+                canonical_name=CanonicalParameterName.TOP_P,
+                label="Top P",
+                min_value=0.0,
+                max_value=1.0,
+            ),
+            FloatInvocationParameter(
+                invocation_name="top_k",
+                label="Top K",
+            ),
+            JSONInvocationParameter(
+                invocation_name="tool_config",
+                label="Tool Choice",
+                canonical_name=CanonicalParameterName.TOOL_CHOICE,
+            ),
+        ]
+@register_llm_client(
+    provider_key=GenerativeProviderKey.GOOGLE,
+    model_names=[
+        "gemini-3-pro-preview",
+    ],
+)
+class Gemini3GoogleStreamingClient(Gemini25GoogleStreamingClient):
+    @classmethod
+    def supported_invocation_parameters(cls) -> list[InvocationParameter]:
+        return [
+            StringInvocationParameter(
+                invocation_name="thinking_level",
+                label="Thinking Level",
+                canonical_name=CanonicalParameterName.REASONING_EFFORT,
+            ),
+            *super().supported_invocation_parameters(),
+        ]
+    async def chat_completion_create(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
+        **invocation_parameters: Any,
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        # Extract thinking_level and construct thinking_config
+        thinking_level = invocation_parameters.pop("thinking_level", None)
+        if thinking_level:
+            try:
+                import google.genai
+                from packaging.version import parse as parse_version
+                if parse_version(google.genai.__version__) < parse_version("1.50.0"):
+                    raise ImportError
+            except (ImportError, AttributeError):
+                raise BadRequest(
+                    "Reasoning capabilities for Gemini models require `google-genai>=1.50.0` "
+                    "and Python >= 3.10."
+                )
+            # NOTE: as of gemini 1.51.0 medium thinking is not supported
+            # but will eventually be added in a future version
+            # we are purposefully allowing users to select medium knowing
+            # it does not work.
+            invocation_parameters["thinking_config"] = {
+                "include_thoughts": True,
+                "thinking_level": thinking_level.upper(),
+            }
+        async for chunk in super().chat_completion_create(messages, tools, **invocation_parameters):
+            yield chunk
 def initialize_playground_clients() -> None:
@@ -1063,6 +2005,15 @@ LLM_SYSTEM = SpanAttributes.LLM_SYSTEM
 LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
 LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
 LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
+LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ
+LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE = (
+    SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE
+)
+LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO
+LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING = (
+    SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING
+)
+LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO
 class _HttpxClient(wrapt.ObjectProxy):  # type: ignore

arize-phoenix 10.0.4__py3-none-any.whl → 12.28.1__py3-none-any.whl

arize-phoenix 10.0.4py3-none-any.whl → 12.28.1py3-none-any.whl