PyPI - arize-phoenix - Versions diffs - 5.5.1__py3-none-any.whl → 5.6.0__py3-none-any.whl - Mend

arize-phoenix 5.5.1py3-none-any.whl → 5.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (172) hide show

{arize_phoenix-5.5.1.dist-info → arize_phoenix-5.6.0.dist-info}/METADATA +8 -11
{arize_phoenix-5.5.1.dist-info → arize_phoenix-5.6.0.dist-info}/RECORD +171 -171
phoenix/config.py +8 -8
phoenix/core/model.py +3 -3
phoenix/core/model_schema.py +41 -50
phoenix/core/model_schema_adapter.py +17 -16
phoenix/datetime_utils.py +2 -2
phoenix/db/bulk_inserter.py +10 -20
phoenix/db/engines.py +2 -1
phoenix/db/enums.py +2 -2
phoenix/db/helpers.py +8 -7
phoenix/db/insertion/dataset.py +9 -19
phoenix/db/insertion/document_annotation.py +14 -13
phoenix/db/insertion/helpers.py +6 -16
phoenix/db/insertion/span_annotation.py +14 -13
phoenix/db/insertion/trace_annotation.py +14 -13
phoenix/db/insertion/types.py +19 -30
phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +8 -8
phoenix/db/models.py +28 -28
phoenix/experiments/evaluators/base.py +2 -1
phoenix/experiments/evaluators/code_evaluators.py +4 -5
phoenix/experiments/evaluators/llm_evaluators.py +157 -4
phoenix/experiments/evaluators/utils.py +3 -2
phoenix/experiments/functions.py +10 -21
phoenix/experiments/tracing.py +2 -1
phoenix/experiments/types.py +20 -29
phoenix/experiments/utils.py +2 -1
phoenix/inferences/errors.py +6 -5
phoenix/inferences/fixtures.py +6 -5
phoenix/inferences/inferences.py +37 -37
phoenix/inferences/schema.py +11 -10
phoenix/inferences/validation.py +13 -14
phoenix/logging/_formatter.py +3 -3
phoenix/metrics/__init__.py +5 -4
phoenix/metrics/binning.py +2 -1
phoenix/metrics/metrics.py +2 -1
phoenix/metrics/mixins.py +7 -6
phoenix/metrics/retrieval_metrics.py +2 -1
phoenix/metrics/timeseries.py +5 -4
phoenix/metrics/wrappers.py +2 -2
phoenix/pointcloud/clustering.py +3 -4
phoenix/pointcloud/pointcloud.py +7 -5
phoenix/pointcloud/umap_parameters.py +2 -1
phoenix/server/api/dataloaders/annotation_summaries.py +12 -19
phoenix/server/api/dataloaders/average_experiment_run_latency.py +2 -2
phoenix/server/api/dataloaders/cache/two_tier_cache.py +3 -2
phoenix/server/api/dataloaders/dataset_example_revisions.py +3 -8
phoenix/server/api/dataloaders/dataset_example_spans.py +2 -5
phoenix/server/api/dataloaders/document_evaluation_summaries.py +12 -18
phoenix/server/api/dataloaders/document_evaluations.py +3 -7
phoenix/server/api/dataloaders/document_retrieval_metrics.py +6 -13
phoenix/server/api/dataloaders/experiment_annotation_summaries.py +4 -8
phoenix/server/api/dataloaders/experiment_error_rates.py +2 -5
phoenix/server/api/dataloaders/experiment_run_annotations.py +3 -7
phoenix/server/api/dataloaders/experiment_run_counts.py +1 -5
phoenix/server/api/dataloaders/experiment_sequence_number.py +2 -5
phoenix/server/api/dataloaders/latency_ms_quantile.py +21 -30
phoenix/server/api/dataloaders/min_start_or_max_end_times.py +7 -13
phoenix/server/api/dataloaders/project_by_name.py +3 -3
phoenix/server/api/dataloaders/record_counts.py +11 -18
phoenix/server/api/dataloaders/span_annotations.py +3 -7
phoenix/server/api/dataloaders/span_dataset_examples.py +3 -8
phoenix/server/api/dataloaders/span_descendants.py +3 -7
phoenix/server/api/dataloaders/span_projects.py +2 -2
phoenix/server/api/dataloaders/token_counts.py +12 -19
phoenix/server/api/dataloaders/trace_row_ids.py +3 -7
phoenix/server/api/dataloaders/user_roles.py +3 -3
phoenix/server/api/dataloaders/users.py +3 -3
phoenix/server/api/helpers/__init__.py +4 -3
phoenix/server/api/helpers/dataset_helpers.py +10 -9
phoenix/server/api/input_types/AddExamplesToDatasetInput.py +2 -2
phoenix/server/api/input_types/AddSpansToDatasetInput.py +2 -2
phoenix/server/api/input_types/ChatCompletionMessageInput.py +13 -1
phoenix/server/api/input_types/ClusterInput.py +2 -2
phoenix/server/api/input_types/DeleteAnnotationsInput.py +1 -3
phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +2 -2
phoenix/server/api/input_types/DeleteExperimentsInput.py +1 -3
phoenix/server/api/input_types/DimensionFilter.py +4 -4
phoenix/server/api/input_types/Granularity.py +1 -1
phoenix/server/api/input_types/InvocationParameters.py +2 -2
phoenix/server/api/input_types/PatchDatasetExamplesInput.py +2 -2
phoenix/server/api/mutations/dataset_mutations.py +4 -4
phoenix/server/api/mutations/experiment_mutations.py +1 -2
phoenix/server/api/mutations/export_events_mutations.py +7 -7
phoenix/server/api/mutations/span_annotations_mutations.py +4 -4
phoenix/server/api/mutations/trace_annotations_mutations.py +4 -4
phoenix/server/api/mutations/user_mutations.py +4 -4
phoenix/server/api/openapi/schema.py +2 -2
phoenix/server/api/queries.py +20 -20
phoenix/server/api/routers/oauth2.py +4 -4
phoenix/server/api/routers/v1/datasets.py +22 -36
phoenix/server/api/routers/v1/evaluations.py +6 -5
phoenix/server/api/routers/v1/experiment_evaluations.py +2 -2
phoenix/server/api/routers/v1/experiment_runs.py +2 -2
phoenix/server/api/routers/v1/experiments.py +4 -4
phoenix/server/api/routers/v1/spans.py +13 -12
phoenix/server/api/routers/v1/traces.py +5 -5
phoenix/server/api/routers/v1/utils.py +5 -5
phoenix/server/api/subscriptions.py +289 -167
phoenix/server/api/types/AnnotationSummary.py +3 -3
phoenix/server/api/types/Cluster.py +8 -7
phoenix/server/api/types/Dataset.py +5 -4
phoenix/server/api/types/Dimension.py +3 -3
phoenix/server/api/types/DocumentEvaluationSummary.py +8 -7
phoenix/server/api/types/EmbeddingDimension.py +6 -5
phoenix/server/api/types/EvaluationSummary.py +3 -3
phoenix/server/api/types/Event.py +7 -7
phoenix/server/api/types/Experiment.py +3 -3
phoenix/server/api/types/ExperimentComparison.py +2 -4
phoenix/server/api/types/Inferences.py +9 -8
phoenix/server/api/types/InferencesRole.py +2 -2
phoenix/server/api/types/Model.py +2 -2
phoenix/server/api/types/Project.py +11 -18
phoenix/server/api/types/Segments.py +3 -3
phoenix/server/api/types/Span.py +8 -7
phoenix/server/api/types/TimeSeries.py +8 -7
phoenix/server/api/types/Trace.py +2 -2
phoenix/server/api/types/UMAPPoints.py +6 -6
phoenix/server/api/types/User.py +3 -3
phoenix/server/api/types/node.py +1 -3
phoenix/server/api/types/pagination.py +4 -4
phoenix/server/api/utils.py +2 -4
phoenix/server/app.py +16 -25
phoenix/server/bearer_auth.py +4 -10
phoenix/server/dml_event.py +3 -3
phoenix/server/dml_event_handler.py +10 -24
phoenix/server/grpc_server.py +3 -2
phoenix/server/jwt_store.py +22 -21
phoenix/server/main.py +3 -3
phoenix/server/oauth2.py +3 -2
phoenix/server/rate_limiters.py +5 -8
phoenix/server/static/.vite/manifest.json +31 -31
phoenix/server/static/assets/components-C70HJiXz.js +1612 -0
phoenix/server/static/assets/{index-BHfTZ6x_.js → index-DLe1Oo3l.js} +2 -2
phoenix/server/static/assets/{pages-aAez_Ntk.js → pages-C8-Sl7JI.js} +269 -434
phoenix/server/static/assets/{vendor-6IcPAw_j.js → vendor-CtqfhlbC.js} +6 -6
phoenix/server/static/assets/{vendor-arizeai-DRZuoyuF.js → vendor-arizeai-C_3SBz56.js} +2 -2
phoenix/server/static/assets/{vendor-codemirror-DVE2_WBr.js → vendor-codemirror-wfdk9cjp.js} +1 -1
phoenix/server/static/assets/{vendor-recharts-DwrexFA4.js → vendor-recharts-BiVnSv90.js} +1 -1
phoenix/server/thread_server.py +1 -1
phoenix/server/types.py +17 -29
phoenix/services.py +4 -3
phoenix/session/client.py +12 -24
phoenix/session/data_extractor.py +3 -3
phoenix/session/evaluation.py +1 -2
phoenix/session/session.py +11 -20
phoenix/trace/attributes.py +16 -28
phoenix/trace/dsl/filter.py +17 -21
phoenix/trace/dsl/helpers.py +3 -3
phoenix/trace/dsl/query.py +13 -22
phoenix/trace/fixtures.py +11 -17
phoenix/trace/otel.py +5 -15
phoenix/trace/projects.py +3 -2
phoenix/trace/schemas.py +2 -2
phoenix/trace/span_evaluations.py +9 -8
phoenix/trace/span_json_decoder.py +3 -3
phoenix/trace/span_json_encoder.py +2 -2
phoenix/trace/trace_dataset.py +6 -5
phoenix/trace/utils.py +6 -6
phoenix/utilities/deprecation.py +3 -2
phoenix/utilities/error_handling.py +3 -2
phoenix/utilities/json.py +2 -1
phoenix/utilities/logging.py +2 -2
phoenix/utilities/project.py +1 -1
phoenix/utilities/re.py +3 -4
phoenix/utilities/template_formatters.py +5 -4
phoenix/version.py +1 -1
phoenix/server/static/assets/components-mVBxvljU.js +0 -1428
{arize_phoenix-5.5.1.dist-info → arize_phoenix-5.6.0.dist-info}/WHEEL +0 -0
{arize_phoenix-5.5.1.dist-info → arize_phoenix-5.6.0.dist-info}/entry_points.txt +0 -0
{arize_phoenix-5.5.1.dist-info → arize_phoenix-5.6.0.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-5.5.1.dist-info → arize_phoenix-5.6.0.dist-info}/licenses/LICENSE +0 -0

phoenix/server/api/subscriptions.py CHANGED Viewed

@@ -1,26 +1,13 @@
 import json
 from abc import ABC, abstractmethod
 from collections import defaultdict
-from dataclasses import fields
-from datetime import datetime
+from collections.abc import AsyncIterator, Callable, Iterable, Iterator, Mapping
+from dataclasses import asdict
+from datetime import datetime, timezone
 from enum import Enum
 from itertools import chain
-from typing import (
-    TYPE_CHECKING,
-    Annotated,
-    Any,
-    AsyncIterator,
-    Callable,
-    DefaultDict,
-    Dict,
-    Iterable,
-    Iterator,
-    List,
-    Optional,
-    Tuple,
-    Type,
-    Union,
-)
+from traceback import format_exc
+from typing import TYPE_CHECKING, Annotated, Any, Optional, Union, cast
 import strawberry
 from openinference.instrumentation import safe_json_dumps
@@ -32,9 +19,7 @@ from openinference.semconv.trace import (
     ToolAttributes,
     ToolCallAttributes,
 )
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from opentelemetry.sdk.trace.id_generator import RandomIdGenerator as DefaultOTelIDGenerator
 from opentelemetry.trace import StatusCode
 from sqlalchemy import insert, select
 from strawberry import UNSET
@@ -42,8 +27,10 @@ from strawberry.scalars import JSON as JSONScalarType
 from strawberry.types import Info
 from typing_extensions import TypeAlias, assert_never
+from phoenix.datetime_utils import local_now, normalize_datetime
 from phoenix.db import models
 from phoenix.server.api.context import Context
+from phoenix.server.api.exceptions import BadRequest
 from phoenix.server.api.input_types.ChatCompletionMessageInput import ChatCompletionMessageInput
 from phoenix.server.api.input_types.InvocationParameters import InvocationParameters
 from phoenix.server.api.types.ChatCompletionMessageRole import ChatCompletionMessageRole
@@ -51,6 +38,10 @@ from phoenix.server.api.types.GenerativeProvider import GenerativeProviderKey
 from phoenix.server.api.types.Span import Span, to_gql_span
 from phoenix.server.dml_event import SpanInsertEvent
 from phoenix.trace.attributes import unflatten
+from phoenix.trace.schemas import (
+    SpanEvent,
+    SpanException,
+)
 from phoenix.utilities.json import jsonify
 from phoenix.utilities.template_formatters import (
     FStringTemplateFormatter,
@@ -61,11 +52,15 @@ from phoenix.utilities.template_formatters import (
 if TYPE_CHECKING:
     from anthropic.types import MessageParam
     from openai.types import CompletionUsage
-    from openai.types.chat import ChatCompletionMessageParam
+    from openai.types.chat import (
+        ChatCompletionMessageParam,
+        ChatCompletionMessageToolCallParam,
+    )
 PLAYGROUND_PROJECT_NAME = "playground"
 ToolCallID: TypeAlias = str
+SetSpanAttributesFn: TypeAlias = Callable[[Mapping[str, Any]], None]
 @strawberry.enum
@@ -97,13 +92,20 @@ class ToolCallChunk:
     function: FunctionCallChunk
+@strawberry.type
+class ChatCompletionSubscriptionError:
+    message: str
 @strawberry.type
 class FinishedChatCompletion:
     span: Span
+ChatCompletionChunk: TypeAlias = Union[TextChunk, ToolCallChunk]
 ChatCompletionSubscriptionPayload: TypeAlias = Annotated[
-    Union[TextChunk, ToolCallChunk, FinishedChatCompletion],
+    Union[TextChunk, ToolCallChunk, FinishedChatCompletion, ChatCompletionSubscriptionError],
     strawberry.union("ChatCompletionSubscriptionPayload"),
 ]
@@ -121,23 +123,23 @@ class GenerativeModelInput:
 @strawberry.input
 class ChatCompletionInput:
-    messages: List[ChatCompletionMessageInput]
+    messages: list[ChatCompletionMessageInput]
     model: GenerativeModelInput
-    invocation_parameters: InvocationParameters
-    tools: Optional[List[JSONScalarType]] = UNSET
+    invocation_parameters: InvocationParameters = strawberry.field(default_factory=dict)
+    tools: Optional[list[JSONScalarType]] = UNSET
     template: Optional[TemplateOptions] = UNSET
     api_key: Optional[str] = strawberry.field(default=None)
-PLAYGROUND_STREAMING_CLIENT_REGISTRY: Dict[
-    GenerativeProviderKey, Type["PlaygroundStreamingClient"]
+PLAYGROUND_STREAMING_CLIENT_REGISTRY: dict[
+    GenerativeProviderKey, type["PlaygroundStreamingClient"]
 ] = {}
 def register_llm_client(
     provider_key: GenerativeProviderKey,
-) -> Callable[[Type["PlaygroundStreamingClient"]], Type["PlaygroundStreamingClient"]]:
-    def decorator(cls: Type["PlaygroundStreamingClient"]) -> Type["PlaygroundStreamingClient"]:
+) -> Callable[[type["PlaygroundStreamingClient"]], type["PlaygroundStreamingClient"]]:
+    def decorator(cls: type["PlaygroundStreamingClient"]) -> type["PlaygroundStreamingClient"]:
         PLAYGROUND_STREAMING_CLIENT_REGISTRY[provider_key] = cls
         return cls
@@ -145,45 +147,56 @@ def register_llm_client(
 class PlaygroundStreamingClient(ABC):
-    def __init__(self, model: GenerativeModelInput, api_key: Optional[str] = None) -> None: ...
+    def __init__(
+        self,
+        model: GenerativeModelInput,
+        api_key: Optional[str] = None,
+        set_span_attributes: Optional[SetSpanAttributesFn] = None,
+    ) -> None:
+        self._set_span_attributes = set_span_attributes
     @abstractmethod
     async def chat_completion_create(
         self,
-        messages: List[Tuple[ChatCompletionMessageRole, str]],
-        tools: List[JSONScalarType],
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
         **invocation_parameters: Any,
-    ) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
+    ) -> AsyncIterator[ChatCompletionChunk]:
         # a yield statement is needed to satisfy the type-checker
         # https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators
         yield TextChunk(content="")
-    @property
-    @abstractmethod
-    def attributes(self) -> Dict[str, Any]: ...
 @register_llm_client(GenerativeProviderKey.OPENAI)
 class OpenAIStreamingClient(PlaygroundStreamingClient):
-    def __init__(self, model: GenerativeModelInput, api_key: Optional[str] = None) -> None:
+    def __init__(
+        self,
+        model: GenerativeModelInput,
+        api_key: Optional[str] = None,
+        set_span_attributes: Optional[SetSpanAttributesFn] = None,
+    ) -> None:
         from openai import AsyncOpenAI
+        super().__init__(model=model, api_key=api_key, set_span_attributes=set_span_attributes)
         self.client = AsyncOpenAI(api_key=api_key)
         self.model_name = model.name
-        self._attributes: Dict[str, Any] = {}
     async def chat_completion_create(
         self,
-        messages: List[Tuple[ChatCompletionMessageRole, str]],
-        tools: List[JSONScalarType],
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
         **invocation_parameters: Any,
-    ) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
+    ) -> AsyncIterator[ChatCompletionChunk]:
         from openai import NOT_GIVEN
         from openai.types.chat import ChatCompletionStreamOptionsParam
         # Convert standard messages to OpenAI messages
         openai_messages = [self.to_openai_chat_completion_param(*message) for message in messages]
-        tool_call_ids: Dict[int, str] = {}
+        tool_call_ids: dict[int, str] = {}
         token_usage: Optional["CompletionUsage"] = None
         async for chunk in await self.client.chat.completions.create(
             messages=openai_messages,
@@ -219,15 +232,20 @@ class OpenAIStreamingClient(PlaygroundStreamingClient):
                                 ),
                             )
                             yield tool_call_chunk
-        if token_usage is not None:
-            self._attributes.update(_llm_token_counts(token_usage))
+        if token_usage is not None and self._set_span_attributes:
+            self._set_span_attributes(dict(self._llm_token_counts(token_usage)))
     def to_openai_chat_completion_param(
-        self, role: ChatCompletionMessageRole, content: JSONScalarType
+        self,
+        role: ChatCompletionMessageRole,
+        content: JSONScalarType,
+        tool_call_id: Optional[str] = None,
+        tool_calls: Optional[list[JSONScalarType]] = None,
     ) -> "ChatCompletionMessageParam":
         from openai.types.chat import (
             ChatCompletionAssistantMessageParam,
             ChatCompletionSystemMessageParam,
+            ChatCompletionToolMessageParam,
             ChatCompletionUserMessageParam,
         )
@@ -246,26 +264,64 @@ class OpenAIStreamingClient(PlaygroundStreamingClient):
                 }
             )
         if role is ChatCompletionMessageRole.AI:
-            return ChatCompletionAssistantMessageParam(
-                {
-                    "content": content,
-                    "role": "assistant",
-                }
-            )
+            if tool_calls is None:
+                return ChatCompletionAssistantMessageParam(
+                    {
+                        "content": content,
+                        "role": "assistant",
+                    }
+                )
+            else:
+                return ChatCompletionAssistantMessageParam(
+                    {
+                        "content": content,
+                        "role": "assistant",
+                        "tool_calls": [
+                            self.to_openai_tool_call_param(tool_call) for tool_call in tool_calls
+                        ],
+                    }
+                )
         if role is ChatCompletionMessageRole.TOOL:
-            raise NotImplementedError
+            if tool_call_id is None:
+                raise ValueError("tool_call_id is required for tool messages")
+        return ChatCompletionToolMessageParam(
+            {"content": content, "role": "tool", "tool_call_id": tool_call_id}
+        )
         assert_never(role)
-    @property
-    def attributes(self) -> Dict[str, Any]:
-        return self._attributes
+    def to_openai_tool_call_param(
+        self,
+        tool_call: JSONScalarType,
+    ) -> "ChatCompletionMessageToolCallParam":
+        from openai.types.chat import ChatCompletionMessageToolCallParam
+        return ChatCompletionMessageToolCallParam(
+            id=tool_call.get("id", ""),
+            function={
+                "name": tool_call.get("function", {}).get("name", ""),
+                "arguments": safe_json_dumps(tool_call.get("function", {}).get("arguments", "")),
+            },
+            type="function",
+        )
+    @staticmethod
+    def _llm_token_counts(usage: "CompletionUsage") -> Iterator[tuple[str, Any]]:
+        yield LLM_TOKEN_COUNT_PROMPT, usage.prompt_tokens
+        yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
+        yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
 @register_llm_client(GenerativeProviderKey.AZURE_OPENAI)
 class AzureOpenAIStreamingClient(OpenAIStreamingClient):
-    def __init__(self, model: GenerativeModelInput, api_key: Optional[str] = None):
+    def __init__(
+        self,
+        model: GenerativeModelInput,
+        api_key: Optional[str] = None,
+        set_span_attributes: Optional[SetSpanAttributesFn] = None,
+    ):
         from openai import AsyncAzureOpenAI
+        super().__init__(model=model, api_key=api_key, set_span_attributes=set_span_attributes)
         if model.endpoint is None or model.api_version is None:
             raise ValueError("endpoint and api_version are required for Azure OpenAI models")
         self.client = AsyncAzureOpenAI(
@@ -277,18 +333,29 @@ class AzureOpenAIStreamingClient(OpenAIStreamingClient):
 @register_llm_client(GenerativeProviderKey.ANTHROPIC)
 class AnthropicStreamingClient(PlaygroundStreamingClient):
-    def __init__(self, model: GenerativeModelInput, api_key: Optional[str] = None) -> None:
+    def __init__(
+        self,
+        model: GenerativeModelInput,
+        api_key: Optional[str] = None,
+        set_span_attributes: Optional[SetSpanAttributesFn] = None,
+    ) -> None:
         import anthropic
+        super().__init__(model=model, api_key=api_key, set_span_attributes=set_span_attributes)
         self.client = anthropic.AsyncAnthropic(api_key=api_key)
         self.model_name = model.name
     async def chat_completion_create(
         self,
-        messages: List[Tuple[ChatCompletionMessageRole, str]],
-        tools: List[JSONScalarType],
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
         **invocation_parameters: Any,
-    ) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        import anthropic.lib.streaming as anthropic_streaming
+        import anthropic.types as anthropic_types
         anthropic_messages, system_prompt = self._build_anthropic_messages(messages)
         anthropic_params = {
@@ -298,17 +365,43 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
             "max_tokens": 1024,
             **invocation_parameters,
         }
         async with self.client.messages.stream(**anthropic_params) as stream:
-            async for text in stream.text_stream:
-                yield TextChunk(content=text)
+            async for event in stream:
+                if isinstance(event, anthropic_types.RawMessageStartEvent):
+                    if self._set_span_attributes:
+                        self._set_span_attributes(
+                            {LLM_TOKEN_COUNT_PROMPT: event.message.usage.input_tokens}
+                        )
+                elif isinstance(event, anthropic_streaming.TextEvent):
+                    yield TextChunk(content=event.text)
+                elif isinstance(event, anthropic_streaming.MessageStopEvent):
+                    if self._set_span_attributes:
+                        self._set_span_attributes(
+                            {LLM_TOKEN_COUNT_COMPLETION: event.message.usage.output_tokens}
+                        )
+                elif isinstance(
+                    event,
+                    (
+                        anthropic_types.RawContentBlockStartEvent,
+                        anthropic_types.RawContentBlockDeltaEvent,
+                        anthropic_types.RawMessageDeltaEvent,
+                        anthropic_streaming.ContentBlockStopEvent,
+                    ),
+                ):
+                    # event types emitted by the stream that don't contain useful information
+                    pass
+                elif isinstance(event, anthropic_streaming.InputJsonEvent):
+                    raise NotImplementedError
+                else:
+                    assert_never(event)
     def _build_anthropic_messages(
-        self, messages: List[Tuple[ChatCompletionMessageRole, str]]
-    ) -> Tuple[List["MessageParam"], str]:
-        anthropic_messages: List["MessageParam"] = []
+        self,
+        messages: list[tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[str]]]],
+    ) -> tuple[list["MessageParam"], str]:
+        anthropic_messages: list["MessageParam"] = []
         system_prompt = ""
-        for role, content in messages:
+        for role, content, _tool_call_id, _tool_calls in messages:
             if role == ChatCompletionMessageRole.USER:
                 anthropic_messages.append({"role": "user", "content": content})
             elif role == ChatCompletionMessageRole.AI:
@@ -322,10 +415,6 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
         return anthropic_messages, system_prompt
-    @property
-    def attributes(self) -> Dict[str, Any]:
-        return dict()
 @strawberry.type
 class Subscription:
@@ -335,44 +424,45 @@ class Subscription:
     ) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
         # Determine which LLM client to use based on provider_key
         provider_key = input.model.provider_key
-        llm_client_class = PLAYGROUND_STREAMING_CLIENT_REGISTRY.get(provider_key)
-        if llm_client_class is None:
-            raise ValueError(f"No LLM client registered for provider '{provider_key}'")
-        llm_client = llm_client_class(model=input.model, api_key=input.api_key)
-        messages = [(message.role, message.content) for message in input.messages]
+        if (llm_client_class := PLAYGROUND_STREAMING_CLIENT_REGISTRY.get(provider_key)) is None:
+            raise BadRequest(f"No LLM client registered for provider '{provider_key}'")
+        llm_client = llm_client_class(
+            model=input.model,
+            api_key=input.api_key,
+            set_span_attributes=lambda attrs: attributes.update(attrs),
+        )
+        messages = [
+            (
+                message.role,
+                message.content,
+                message.tool_call_id if isinstance(message.tool_call_id, str) else None,
+                message.tool_calls if isinstance(message.tool_calls, list) else None,
+            )
+            for message in input.messages
+        ]
         if template_options := input.template:
             messages = list(_formatted_messages(messages, template_options))
         invocation_parameters = jsonify(input.invocation_parameters)
-        in_memory_span_exporter = InMemorySpanExporter()
-        tracer_provider = TracerProvider()
-        tracer_provider.add_span_processor(
-            span_processor=SimpleSpanProcessor(span_exporter=in_memory_span_exporter)
+        attributes = dict(
+            chain(
+                _llm_span_kind(),
+                _llm_model_name(input.model.name),
+                _llm_tools(input.tools or []),
+                _llm_input_messages(messages),
+                _llm_invocation_parameters(invocation_parameters),
+                _input_value_and_mime_type(input),
+            )
         )
-        tracer = tracer_provider.get_tracer(__name__)
-        span_name = "ChatCompletion"
-        with tracer.start_span(
-            span_name,
-            attributes=dict(
-                chain(
-                    _llm_span_kind(),
-                    _llm_model_name(input.model.name),
-                    _llm_tools(input.tools or []),
-                    _llm_input_messages(messages),
-                    _llm_invocation_parameters(invocation_parameters),
-                    _input_value_and_mime_type(input),
-                )
-            ),
-        ) as span:
-            response_chunks = []
-            text_chunks: List[TextChunk] = []
-            tool_call_chunks: DefaultDict[ToolCallID, List[ToolCallChunk]] = defaultdict(list)
+        status_code: StatusCode
+        status_message = ""
+        events: list[SpanEvent] = []
+        start_time: datetime
+        end_time: datetime
+        response_chunks = []
+        text_chunks: list[TextChunk] = []
+        tool_call_chunks: defaultdict[ToolCallID, list[ToolCallChunk]] = defaultdict(list)
+        try:
+            start_time = cast(datetime, normalize_datetime(dt=local_now(), tz=timezone.utc))
             async for chunk in llm_client.chat_completion_create(
                 messages=messages,
                 tools=input.tools or [],
@@ -385,31 +475,35 @@ class Subscription:
                 elif isinstance(chunk, ToolCallChunk):
                     yield chunk
                     tool_call_chunks[chunk.id].append(chunk)
-            span.set_status(StatusCode.OK)
-            llm_client_attributes = llm_client.attributes
-            span.set_attributes(
-                dict(
-                    chain(
-                        _output_value_and_mime_type(response_chunks),
-                        llm_client_attributes.items(),
-                        _llm_output_messages(text_chunks, tool_call_chunks),
-                    )
+                else:
+                    assert_never(chunk)
+            status_code = StatusCode.OK
+        except Exception as error:
+            end_time = cast(datetime, normalize_datetime(dt=local_now(), tz=timezone.utc))
+            status_code = StatusCode.ERROR
+            status_message = str(error)
+            events.append(
+                SpanException(
+                    timestamp=end_time,
+                    message=status_message,
+                    exception_type=type(error).__name__,
+                    exception_escaped=False,
+                    exception_stacktrace=format_exc(),
                 )
             )
-        assert len(spans := in_memory_span_exporter.get_finished_spans()) == 1
-        finished_span = spans[0]
-        assert finished_span.start_time is not None
-        assert finished_span.end_time is not None
-        assert (attributes := finished_span.attributes) is not None
-        start_time = _datetime(epoch_nanoseconds=finished_span.start_time)
-        end_time = _datetime(epoch_nanoseconds=finished_span.end_time)
-        prompt_tokens = llm_client_attributes.get(LLM_TOKEN_COUNT_PROMPT, 0)
-        completion_tokens = llm_client_attributes.get(LLM_TOKEN_COUNT_COMPLETION, 0)
-        trace_id = _hex(finished_span.context.trace_id)
-        span_id = _hex(finished_span.context.span_id)
-        status = finished_span.status
+            yield ChatCompletionSubscriptionError(message=status_message)
+        else:
+            end_time = cast(datetime, normalize_datetime(dt=local_now(), tz=timezone.utc))
+            attributes.update(
+                chain(
+                    _output_value_and_mime_type(response_chunks),
+                    _llm_output_messages(text_chunks, tool_call_chunks),
+                )
+            )
+        prompt_tokens = attributes.get(LLM_TOKEN_COUNT_PROMPT, 0)
+        completion_tokens = attributes.get(LLM_TOKEN_COUNT_COMPLETION, 0)
+        trace_id = _generate_trace_id()
+        span_id = _generate_span_id()
         async with info.context.db() as session:
             if (
                 playground_project_id := await session.scalar(
@@ -434,15 +528,15 @@ class Subscription:
                 trace_rowid=playground_trace.id,
                 span_id=span_id,
                 parent_id=None,
-                name=span_name,
+                name="ChatCompletion",
                 span_kind=LLM,
                 start_time=start_time,
                 end_time=end_time,
                 attributes=unflatten(attributes.items()),
-                events=finished_span.events,
-                status_code=status.status_code.name,
-                status_message=status.description or "",
-                cumulative_error_count=int(not status.is_ok),
+                events=[_serialize_event(event) for event in events],
+                status_code=status_code.name,
+                status_message=status_message,
+                cumulative_error_count=int(status_code is StatusCode.ERROR),
                 cumulative_llm_token_count_prompt=prompt_tokens,
                 cumulative_llm_token_count_completion=completion_tokens,
                 llm_token_count_prompt=prompt_tokens,
@@ -456,56 +550,65 @@ class Subscription:
         info.context.event_queue.put(SpanInsertEvent(ids=(playground_project_id,)))
-def _llm_span_kind() -> Iterator[Tuple[str, Any]]:
+def _llm_span_kind() -> Iterator[tuple[str, Any]]:
     yield OPENINFERENCE_SPAN_KIND, LLM
-def _llm_model_name(model_name: str) -> Iterator[Tuple[str, Any]]:
+def _llm_model_name(model_name: str) -> Iterator[tuple[str, Any]]:
     yield LLM_MODEL_NAME, model_name
-def _llm_invocation_parameters(invocation_parameters: Dict[str, Any]) -> Iterator[Tuple[str, Any]]:
+def _llm_invocation_parameters(invocation_parameters: dict[str, Any]) -> Iterator[tuple[str, Any]]:
     yield LLM_INVOCATION_PARAMETERS, safe_json_dumps(invocation_parameters)
-def _llm_tools(tools: List[JSONScalarType]) -> Iterator[Tuple[str, Any]]:
+def _llm_tools(tools: list[JSONScalarType]) -> Iterator[tuple[str, Any]]:
     for tool_index, tool in enumerate(tools):
         yield f"{LLM_TOOLS}.{tool_index}.{TOOL_JSON_SCHEMA}", json.dumps(tool)
-def _llm_token_counts(usage: "CompletionUsage") -> Iterator[Tuple[str, Any]]:
-    yield LLM_TOKEN_COUNT_PROMPT, usage.prompt_tokens
-    yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
-    yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
-def _input_value_and_mime_type(input: ChatCompletionInput) -> Iterator[Tuple[str, Any]]:
-    assert any(field.name == (api_key := "api_key") for field in fields(ChatCompletionInput))
+def _input_value_and_mime_type(input: ChatCompletionInput) -> Iterator[tuple[str, Any]]:
+    assert (api_key := "api_key") in (input_data := jsonify(input))
+    input_data = {k: v for k, v in input_data.items() if k != api_key}
+    assert api_key not in input_data
     yield INPUT_MIME_TYPE, JSON
-    yield INPUT_VALUE, safe_json_dumps({k: v for k, v in jsonify(input).items() if k != api_key})
+    yield INPUT_VALUE, safe_json_dumps(input_data)
-def _output_value_and_mime_type(output: Any) -> Iterator[Tuple[str, Any]]:
+def _output_value_and_mime_type(output: Any) -> Iterator[tuple[str, Any]]:
     yield OUTPUT_MIME_TYPE, JSON
     yield OUTPUT_VALUE, safe_json_dumps(jsonify(output))
 def _llm_input_messages(
-    messages: Iterable[Tuple[ChatCompletionMessageRole, str]],
-) -> Iterator[Tuple[str, Any]]:
-    for i, (role, content) in enumerate(messages):
+    messages: Iterable[
+        tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+    ],
+) -> Iterator[tuple[str, Any]]:
+    for i, (role, content, _tool_call_id, tool_calls) in enumerate(messages):
         yield f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_ROLE}", role.value.lower()
         yield f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_CONTENT}", content
+        if tool_calls is not None:
+            for tool_call_index, tool_call in enumerate(tool_calls):
+                yield (
+                    f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_NAME}",
+                    tool_call["function"]["name"],
+                )
+                if arguments := tool_call["function"]["arguments"]:
+                    yield (
+                        f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}",
+                        safe_json_dumps(jsonify(arguments)),
+                    )
 def _llm_output_messages(
-    text_chunks: List[TextChunk],
-    tool_call_chunks: DefaultDict[ToolCallID, List[ToolCallChunk]],
-) -> Iterator[Tuple[str, Any]]:
+    text_chunks: list[TextChunk],
+    tool_call_chunks: defaultdict[ToolCallID, list[ToolCallChunk]],
+) -> Iterator[tuple[str, Any]]:
     yield f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"
     if content := "".join(chunk.content for chunk in text_chunks):
         yield f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", content
-    for tool_call_index, tool_call_chunks_ in tool_call_chunks.items():
+    for tool_call_index, (_tool_call_id, tool_call_chunks_) in enumerate(tool_call_chunks.items()):
         if tool_call_chunks_ and (name := tool_call_chunks_[0].function.name):
             yield (
                 f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_NAME}",
@@ -518,34 +621,46 @@ def _llm_output_messages(
             )
-def _hex(number: int) -> str:
+def _generate_trace_id() -> str:
     """
-    Converts an integer to a hexadecimal string.
+    Generates a random trace ID in hexadecimal format.
     """
-    return hex(number)[2:]
+    return _hex(DefaultOTelIDGenerator().generate_trace_id())
-def _datetime(*, epoch_nanoseconds: float) -> datetime:
+def _generate_span_id() -> str:
     """
-    Converts a Unix epoch timestamp in nanoseconds to a datetime.
+    Generates a random span ID in hexadecimal format.
     """
-    epoch_seconds = epoch_nanoseconds / 1e9
-    return datetime.fromtimestamp(epoch_seconds)
+    return _hex(DefaultOTelIDGenerator().generate_span_id())
+def _hex(number: int) -> str:
+    """
+    Converts an integer to a hexadecimal string.
+    """
+    return hex(number)[2:]
 def _formatted_messages(
-    messages: Iterable[Tuple[ChatCompletionMessageRole, str]], template_options: TemplateOptions
-) -> Iterator[Tuple[ChatCompletionMessageRole, str]]:
+    messages: Iterable[tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[str]]]],
+    template_options: TemplateOptions,
+) -> Iterator[tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[str]]]]:
     """
     Formats the messages using the given template options.
     """
     template_formatter = _template_formatter(template_language=template_options.language)
-    roles, templates = zip(*messages)
+    (
+        roles,
+        templates,
+        tool_call_id,
+        tool_calls,
+    ) = zip(*messages)
     formatted_templates = map(
         lambda template: template_formatter.format(template, **template_options.variables),
         templates,
     )
-    formatted_messages = zip(roles, formatted_templates)
+    formatted_messages = zip(roles, formatted_templates, tool_call_id, tool_calls)
     return formatted_messages
@@ -560,6 +675,13 @@ def _template_formatter(template_language: TemplateLanguage) -> TemplateFormatte
     assert_never(template_language)
+def _serialize_event(event: SpanEvent) -> dict[str, Any]:
+    """
+    Serializes a SpanEvent to a dictionary.
+    """
+    return {k: (v.isoformat() if isinstance(v, datetime) else v) for k, v in asdict(event).items()}
 JSON = OpenInferenceMimeTypeValues.JSON.value
 LLM = OpenInferenceSpanKindValues.LLM.value

arize-phoenix 5.5.1__py3-none-any.whl → 5.6.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 5.5.1py3-none-any.whl → 5.6.0py3-none-any.whl