PyPI - arize-phoenix - Versions diffs - 5.5.2__py3-none-any.whl → 5.7.0__py3-none-any.whl - Mend

arize-phoenix 5.5.2py3-none-any.whl → 5.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (186) hide show

{arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/METADATA +4 -7
arize_phoenix-5.7.0.dist-info/RECORD +330 -0
phoenix/config.py +50 -8
phoenix/core/model.py +3 -3
phoenix/core/model_schema.py +41 -50
phoenix/core/model_schema_adapter.py +17 -16
phoenix/datetime_utils.py +2 -2
phoenix/db/bulk_inserter.py +10 -20
phoenix/db/engines.py +2 -1
phoenix/db/enums.py +2 -2
phoenix/db/helpers.py +8 -7
phoenix/db/insertion/dataset.py +9 -19
phoenix/db/insertion/document_annotation.py +14 -13
phoenix/db/insertion/helpers.py +6 -16
phoenix/db/insertion/span_annotation.py +14 -13
phoenix/db/insertion/trace_annotation.py +14 -13
phoenix/db/insertion/types.py +19 -30
phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +8 -8
phoenix/db/models.py +28 -28
phoenix/experiments/evaluators/base.py +2 -1
phoenix/experiments/evaluators/code_evaluators.py +4 -5
phoenix/experiments/evaluators/llm_evaluators.py +157 -4
phoenix/experiments/evaluators/utils.py +3 -2
phoenix/experiments/functions.py +10 -21
phoenix/experiments/tracing.py +2 -1
phoenix/experiments/types.py +20 -29
phoenix/experiments/utils.py +2 -1
phoenix/inferences/errors.py +6 -5
phoenix/inferences/fixtures.py +6 -5
phoenix/inferences/inferences.py +37 -37
phoenix/inferences/schema.py +11 -10
phoenix/inferences/validation.py +13 -14
phoenix/logging/_formatter.py +3 -3
phoenix/metrics/__init__.py +5 -4
phoenix/metrics/binning.py +2 -1
phoenix/metrics/metrics.py +2 -1
phoenix/metrics/mixins.py +7 -6
phoenix/metrics/retrieval_metrics.py +2 -1
phoenix/metrics/timeseries.py +5 -4
phoenix/metrics/wrappers.py +2 -2
phoenix/pointcloud/clustering.py +3 -4
phoenix/pointcloud/pointcloud.py +7 -5
phoenix/pointcloud/umap_parameters.py +2 -1
phoenix/server/api/dataloaders/annotation_summaries.py +12 -19
phoenix/server/api/dataloaders/average_experiment_run_latency.py +2 -2
phoenix/server/api/dataloaders/cache/two_tier_cache.py +3 -2
phoenix/server/api/dataloaders/dataset_example_revisions.py +3 -8
phoenix/server/api/dataloaders/dataset_example_spans.py +2 -5
phoenix/server/api/dataloaders/document_evaluation_summaries.py +12 -18
phoenix/server/api/dataloaders/document_evaluations.py +3 -7
phoenix/server/api/dataloaders/document_retrieval_metrics.py +6 -13
phoenix/server/api/dataloaders/experiment_annotation_summaries.py +4 -8
phoenix/server/api/dataloaders/experiment_error_rates.py +2 -5
phoenix/server/api/dataloaders/experiment_run_annotations.py +3 -7
phoenix/server/api/dataloaders/experiment_run_counts.py +1 -5
phoenix/server/api/dataloaders/experiment_sequence_number.py +2 -5
phoenix/server/api/dataloaders/latency_ms_quantile.py +21 -30
phoenix/server/api/dataloaders/min_start_or_max_end_times.py +7 -13
phoenix/server/api/dataloaders/project_by_name.py +3 -3
phoenix/server/api/dataloaders/record_counts.py +11 -18
phoenix/server/api/dataloaders/span_annotations.py +3 -7
phoenix/server/api/dataloaders/span_dataset_examples.py +3 -8
phoenix/server/api/dataloaders/span_descendants.py +3 -7
phoenix/server/api/dataloaders/span_projects.py +2 -2
phoenix/server/api/dataloaders/token_counts.py +12 -19
phoenix/server/api/dataloaders/trace_row_ids.py +3 -7
phoenix/server/api/dataloaders/user_roles.py +3 -3
phoenix/server/api/dataloaders/users.py +3 -3
phoenix/server/api/helpers/__init__.py +4 -3
phoenix/server/api/helpers/dataset_helpers.py +10 -9
phoenix/server/api/helpers/playground_clients.py +671 -0
phoenix/server/api/helpers/playground_registry.py +70 -0
phoenix/server/api/helpers/playground_spans.py +325 -0
phoenix/server/api/input_types/AddExamplesToDatasetInput.py +2 -2
phoenix/server/api/input_types/AddSpansToDatasetInput.py +2 -2
phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
phoenix/server/api/input_types/ChatCompletionMessageInput.py +13 -1
phoenix/server/api/input_types/ClusterInput.py +2 -2
phoenix/server/api/input_types/DeleteAnnotationsInput.py +1 -3
phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +2 -2
phoenix/server/api/input_types/DeleteExperimentsInput.py +1 -3
phoenix/server/api/input_types/DimensionFilter.py +4 -4
phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
phoenix/server/api/input_types/Granularity.py +1 -1
phoenix/server/api/input_types/InvocationParameters.py +156 -13
phoenix/server/api/input_types/PatchDatasetExamplesInput.py +2 -2
phoenix/server/api/input_types/TemplateOptions.py +10 -0
phoenix/server/api/mutations/__init__.py +4 -0
phoenix/server/api/mutations/chat_mutations.py +374 -0
phoenix/server/api/mutations/dataset_mutations.py +4 -4
phoenix/server/api/mutations/experiment_mutations.py +1 -2
phoenix/server/api/mutations/export_events_mutations.py +7 -7
phoenix/server/api/mutations/span_annotations_mutations.py +4 -4
phoenix/server/api/mutations/trace_annotations_mutations.py +4 -4
phoenix/server/api/mutations/user_mutations.py +4 -4
phoenix/server/api/openapi/schema.py +2 -2
phoenix/server/api/queries.py +61 -72
phoenix/server/api/routers/oauth2.py +4 -4
phoenix/server/api/routers/v1/datasets.py +22 -36
phoenix/server/api/routers/v1/evaluations.py +6 -5
phoenix/server/api/routers/v1/experiment_evaluations.py +2 -2
phoenix/server/api/routers/v1/experiment_runs.py +2 -2
phoenix/server/api/routers/v1/experiments.py +4 -4
phoenix/server/api/routers/v1/spans.py +13 -12
phoenix/server/api/routers/v1/traces.py +5 -5
phoenix/server/api/routers/v1/utils.py +5 -5
phoenix/server/api/schema.py +42 -10
phoenix/server/api/subscriptions.py +347 -494
phoenix/server/api/types/AnnotationSummary.py +3 -3
phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +44 -0
phoenix/server/api/types/Cluster.py +8 -7
phoenix/server/api/types/Dataset.py +5 -4
phoenix/server/api/types/Dimension.py +3 -3
phoenix/server/api/types/DocumentEvaluationSummary.py +8 -7
phoenix/server/api/types/EmbeddingDimension.py +6 -5
phoenix/server/api/types/EvaluationSummary.py +3 -3
phoenix/server/api/types/Event.py +7 -7
phoenix/server/api/types/Experiment.py +3 -3
phoenix/server/api/types/ExperimentComparison.py +2 -4
phoenix/server/api/types/GenerativeProvider.py +27 -3
phoenix/server/api/types/Inferences.py +9 -8
phoenix/server/api/types/InferencesRole.py +2 -2
phoenix/server/api/types/Model.py +2 -2
phoenix/server/api/types/Project.py +11 -18
phoenix/server/api/types/Segments.py +3 -3
phoenix/server/api/types/Span.py +45 -7
phoenix/server/api/types/TemplateLanguage.py +9 -0
phoenix/server/api/types/TimeSeries.py +8 -7
phoenix/server/api/types/Trace.py +2 -2
phoenix/server/api/types/UMAPPoints.py +6 -6
phoenix/server/api/types/User.py +3 -3
phoenix/server/api/types/node.py +1 -3
phoenix/server/api/types/pagination.py +4 -4
phoenix/server/api/utils.py +2 -4
phoenix/server/app.py +76 -37
phoenix/server/bearer_auth.py +4 -10
phoenix/server/dml_event.py +3 -3
phoenix/server/dml_event_handler.py +10 -24
phoenix/server/grpc_server.py +3 -2
phoenix/server/jwt_store.py +22 -21
phoenix/server/main.py +17 -4
phoenix/server/oauth2.py +3 -2
phoenix/server/rate_limiters.py +5 -8
phoenix/server/static/.vite/manifest.json +31 -31
phoenix/server/static/assets/components-Csu8UKOs.js +1612 -0
phoenix/server/static/assets/{index-DCzakdJq.js → index-Bk5C9EA7.js} +2 -2
phoenix/server/static/assets/{pages-CAL1FDMt.js → pages-UeWaKXNs.js} +337 -442
phoenix/server/static/assets/{vendor-6IcPAw_j.js → vendor-CtqfhlbC.js} +6 -6
phoenix/server/static/assets/{vendor-arizeai-DRZuoyuF.js → vendor-arizeai-C_3SBz56.js} +2 -2
phoenix/server/static/assets/{vendor-codemirror-DVE2_WBr.js → vendor-codemirror-wfdk9cjp.js} +1 -1
phoenix/server/static/assets/{vendor-recharts-DwrexFA4.js → vendor-recharts-BiVnSv90.js} +1 -1
phoenix/server/templates/index.html +1 -0
phoenix/server/thread_server.py +1 -1
phoenix/server/types.py +17 -29
phoenix/services.py +8 -3
phoenix/session/client.py +12 -24
phoenix/session/data_extractor.py +3 -3
phoenix/session/evaluation.py +1 -2
phoenix/session/session.py +26 -21
phoenix/trace/attributes.py +16 -28
phoenix/trace/dsl/filter.py +17 -21
phoenix/trace/dsl/helpers.py +3 -3
phoenix/trace/dsl/query.py +13 -22
phoenix/trace/fixtures.py +11 -17
phoenix/trace/otel.py +5 -15
phoenix/trace/projects.py +3 -2
phoenix/trace/schemas.py +2 -2
phoenix/trace/span_evaluations.py +9 -8
phoenix/trace/span_json_decoder.py +3 -3
phoenix/trace/span_json_encoder.py +2 -2
phoenix/trace/trace_dataset.py +6 -5
phoenix/trace/utils.py +6 -6
phoenix/utilities/deprecation.py +3 -2
phoenix/utilities/error_handling.py +3 -2
phoenix/utilities/json.py +2 -1
phoenix/utilities/logging.py +2 -2
phoenix/utilities/project.py +1 -1
phoenix/utilities/re.py +3 -4
phoenix/utilities/template_formatters.py +16 -5
phoenix/version.py +1 -1
arize_phoenix-5.5.2.dist-info/RECORD +0 -321
phoenix/server/static/assets/components-hX0LgYz3.js +0 -1428
{arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/WHEEL +0 -0
{arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/entry_points.txt +0 -0
{arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/licenses/IP_NOTICE +0 -0
{arize_phoenix-5.5.2.dist-info → arize_phoenix-5.7.0.dist-info}/licenses/LICENSE +0 -0

phoenix/server/api/subscriptions.py CHANGED Viewed

@@ -1,329 +1,73 @@
-import json
-from abc import ABC, abstractmethod
-from collections import defaultdict
-from datetime import datetime, timezone
-from enum import Enum
-from itertools import chain
+import logging
+from asyncio import FIRST_COMPLETED, Task, create_task, wait
+from collections.abc import Iterator
 from typing import (
-    TYPE_CHECKING,
-    Annotated,
     Any,
     AsyncIterator,
-    Callable,
-    DefaultDict,
-    Dict,
+    Collection,
     Iterable,
-    Iterator,
-    List,
+    Mapping,
     Optional,
-    Tuple,
-    Type,
-    Union,
+    TypeVar,
 )
 import strawberry
-from openinference.instrumentation import safe_json_dumps
-from openinference.semconv.trace import (
-    MessageAttributes,
-    OpenInferenceMimeTypeValues,
-    OpenInferenceSpanKindValues,
-    SpanAttributes,
-    ToolAttributes,
-    ToolCallAttributes,
-)
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
-from opentelemetry.trace import StatusCode
-from sqlalchemy import insert, select
-from strawberry import UNSET
-from strawberry.scalars import JSON as JSONScalarType
+from openinference.semconv.trace import SpanAttributes
+from sqlalchemy import and_, func, insert, select
+from sqlalchemy.orm import load_only
+from strawberry.relay.types import GlobalID
 from strawberry.types import Info
 from typing_extensions import TypeAlias, assert_never
 from phoenix.db import models
 from phoenix.server.api.context import Context
-from phoenix.server.api.input_types.ChatCompletionMessageInput import ChatCompletionMessageInput
-from phoenix.server.api.input_types.InvocationParameters import InvocationParameters
+from phoenix.server.api.exceptions import BadRequest
+from phoenix.server.api.helpers.playground_clients import (
+    PlaygroundStreamingClient,
+    initialize_playground_clients,
+)
+from phoenix.server.api.helpers.playground_registry import (
+    PLAYGROUND_CLIENT_REGISTRY,
+)
+from phoenix.server.api.helpers.playground_spans import streaming_llm_span
+from phoenix.server.api.input_types.ChatCompletionInput import (
+    ChatCompletionInput,
+    ChatCompletionOverDatasetInput,
+)
 from phoenix.server.api.types.ChatCompletionMessageRole import ChatCompletionMessageRole
-from phoenix.server.api.types.GenerativeProvider import GenerativeProviderKey
-from phoenix.server.api.types.Span import Span, to_gql_span
+from phoenix.server.api.types.ChatCompletionSubscriptionPayload import (
+    ChatCompletionOverDatasetSubscriptionResult,
+    ChatCompletionSubscriptionError,
+    ChatCompletionSubscriptionPayload,
+    FinishedChatCompletion,
+)
+from phoenix.server.api.types.Dataset import Dataset
+from phoenix.server.api.types.DatasetExample import DatasetExample
+from phoenix.server.api.types.DatasetVersion import DatasetVersion
+from phoenix.server.api.types.Experiment import to_gql_experiment
+from phoenix.server.api.types.node import from_global_id_with_expected_type
+from phoenix.server.api.types.Span import to_gql_span
+from phoenix.server.api.types.TemplateLanguage import TemplateLanguage
 from phoenix.server.dml_event import SpanInsertEvent
-from phoenix.trace.attributes import unflatten
-from phoenix.utilities.json import jsonify
+from phoenix.trace.attributes import get_attribute_value
 from phoenix.utilities.template_formatters import (
     FStringTemplateFormatter,
     MustacheTemplateFormatter,
     TemplateFormatter,
+    TemplateFormatterError,
 )
-if TYPE_CHECKING:
-    from anthropic.types import MessageParam
-    from openai.types import CompletionUsage
-    from openai.types.chat import ChatCompletionMessageParam
-PLAYGROUND_PROJECT_NAME = "playground"
-ToolCallID: TypeAlias = str
-@strawberry.enum
-class TemplateLanguage(Enum):
-    MUSTACHE = "MUSTACHE"
-    F_STRING = "F_STRING"
+GenericType = TypeVar("GenericType")
+logger = logging.getLogger(__name__)
-@strawberry.input
-class TemplateOptions:
-    variables: JSONScalarType
-    language: TemplateLanguage
-@strawberry.type
-class TextChunk:
-    content: str
-@strawberry.type
-class FunctionCallChunk:
-    name: str
-    arguments: str
-@strawberry.type
-class ToolCallChunk:
-    id: str
-    function: FunctionCallChunk
+initialize_playground_clients()
-@strawberry.type
-class FinishedChatCompletion:
-    span: Span
-ChatCompletionSubscriptionPayload: TypeAlias = Annotated[
-    Union[TextChunk, ToolCallChunk, FinishedChatCompletion],
-    strawberry.union("ChatCompletionSubscriptionPayload"),
+ChatCompletionMessage: TypeAlias = tuple[
+    ChatCompletionMessageRole, str, Optional[str], Optional[list[str]]
 ]
-@strawberry.input
-class GenerativeModelInput:
-    provider_key: GenerativeProviderKey
-    name: str
-    """ The name of the model. Or the Deployment Name for Azure OpenAI models. """
-    endpoint: Optional[str] = UNSET
-    """ The endpoint to use for the model. Only required for Azure OpenAI models. """
-    api_version: Optional[str] = UNSET
-    """ The API version to use for the model. """
-@strawberry.input
-class ChatCompletionInput:
-    messages: List[ChatCompletionMessageInput]
-    model: GenerativeModelInput
-    invocation_parameters: InvocationParameters
-    tools: Optional[List[JSONScalarType]] = UNSET
-    template: Optional[TemplateOptions] = UNSET
-    api_key: Optional[str] = strawberry.field(default=None)
-PLAYGROUND_STREAMING_CLIENT_REGISTRY: Dict[
-    GenerativeProviderKey, Type["PlaygroundStreamingClient"]
-] = {}
-def register_llm_client(
-    provider_key: GenerativeProviderKey,
-) -> Callable[[Type["PlaygroundStreamingClient"]], Type["PlaygroundStreamingClient"]]:
-    def decorator(cls: Type["PlaygroundStreamingClient"]) -> Type["PlaygroundStreamingClient"]:
-        PLAYGROUND_STREAMING_CLIENT_REGISTRY[provider_key] = cls
-        return cls
-    return decorator
-class PlaygroundStreamingClient(ABC):
-    def __init__(self, model: GenerativeModelInput, api_key: Optional[str] = None) -> None: ...
-    @abstractmethod
-    async def chat_completion_create(
-        self,
-        messages: List[Tuple[ChatCompletionMessageRole, str]],
-        tools: List[JSONScalarType],
-        **invocation_parameters: Any,
-    ) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
-        # a yield statement is needed to satisfy the type-checker
-        # https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators
-        yield TextChunk(content="")
-    @property
-    @abstractmethod
-    def attributes(self) -> Dict[str, Any]: ...
-@register_llm_client(GenerativeProviderKey.OPENAI)
-class OpenAIStreamingClient(PlaygroundStreamingClient):
-    def __init__(self, model: GenerativeModelInput, api_key: Optional[str] = None) -> None:
-        from openai import AsyncOpenAI
-        self.client = AsyncOpenAI(api_key=api_key)
-        self.model_name = model.name
-        self._attributes: Dict[str, Any] = {}
-    async def chat_completion_create(
-        self,
-        messages: List[Tuple[ChatCompletionMessageRole, str]],
-        tools: List[JSONScalarType],
-        **invocation_parameters: Any,
-    ) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
-        from openai import NOT_GIVEN
-        from openai.types.chat import ChatCompletionStreamOptionsParam
-        # Convert standard messages to OpenAI messages
-        openai_messages = [self.to_openai_chat_completion_param(*message) for message in messages]
-        tool_call_ids: Dict[int, str] = {}
-        token_usage: Optional["CompletionUsage"] = None
-        async for chunk in await self.client.chat.completions.create(
-            messages=openai_messages,
-            model=self.model_name,
-            stream=True,
-            stream_options=ChatCompletionStreamOptionsParam(include_usage=True),
-            tools=tools or NOT_GIVEN,
-            **invocation_parameters,
-        ):
-            if (usage := chunk.usage) is not None:
-                token_usage = usage
-                continue
-            choice = chunk.choices[0]
-            delta = choice.delta
-            if choice.finish_reason is None:
-                if isinstance(chunk_content := delta.content, str):
-                    text_chunk = TextChunk(content=chunk_content)
-                    yield text_chunk
-                if (tool_calls := delta.tool_calls) is not None:
-                    for tool_call_index, tool_call in enumerate(tool_calls):
-                        tool_call_id = (
-                            tool_call.id
-                            if tool_call.id is not None
-                            else tool_call_ids[tool_call_index]
-                        )
-                        tool_call_ids[tool_call_index] = tool_call_id
-                        if (function := tool_call.function) is not None:
-                            tool_call_chunk = ToolCallChunk(
-                                id=tool_call_id,
-                                function=FunctionCallChunk(
-                                    name=function.name or "",
-                                    arguments=function.arguments or "",
-                                ),
-                            )
-                            yield tool_call_chunk
-        if token_usage is not None:
-            self._attributes.update(_llm_token_counts(token_usage))
-    def to_openai_chat_completion_param(
-        self, role: ChatCompletionMessageRole, content: JSONScalarType
-    ) -> "ChatCompletionMessageParam":
-        from openai.types.chat import (
-            ChatCompletionAssistantMessageParam,
-            ChatCompletionSystemMessageParam,
-            ChatCompletionUserMessageParam,
-        )
-        if role is ChatCompletionMessageRole.USER:
-            return ChatCompletionUserMessageParam(
-                {
-                    "content": content,
-                    "role": "user",
-                }
-            )
-        if role is ChatCompletionMessageRole.SYSTEM:
-            return ChatCompletionSystemMessageParam(
-                {
-                    "content": content,
-                    "role": "system",
-                }
-            )
-        if role is ChatCompletionMessageRole.AI:
-            return ChatCompletionAssistantMessageParam(
-                {
-                    "content": content,
-                    "role": "assistant",
-                }
-            )
-        if role is ChatCompletionMessageRole.TOOL:
-            raise NotImplementedError
-        assert_never(role)
-    @property
-    def attributes(self) -> Dict[str, Any]:
-        return self._attributes
-@register_llm_client(GenerativeProviderKey.AZURE_OPENAI)
-class AzureOpenAIStreamingClient(OpenAIStreamingClient):
-    def __init__(self, model: GenerativeModelInput, api_key: Optional[str] = None):
-        from openai import AsyncAzureOpenAI
-        if model.endpoint is None or model.api_version is None:
-            raise ValueError("endpoint and api_version are required for Azure OpenAI models")
-        self.client = AsyncAzureOpenAI(
-            api_key=api_key,
-            azure_endpoint=model.endpoint,
-            api_version=model.api_version,
-        )
-@register_llm_client(GenerativeProviderKey.ANTHROPIC)
-class AnthropicStreamingClient(PlaygroundStreamingClient):
-    def __init__(self, model: GenerativeModelInput, api_key: Optional[str] = None) -> None:
-        import anthropic
-        self.client = anthropic.AsyncAnthropic(api_key=api_key)
-        self.model_name = model.name
-    async def chat_completion_create(
-        self,
-        messages: List[Tuple[ChatCompletionMessageRole, str]],
-        tools: List[JSONScalarType],
-        **invocation_parameters: Any,
-    ) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
-        anthropic_messages, system_prompt = self._build_anthropic_messages(messages)
-        anthropic_params = {
-            "messages": anthropic_messages,
-            "model": self.model_name,
-            "system": system_prompt,
-            "max_tokens": 1024,
-            **invocation_parameters,
-        }
-        async with self.client.messages.stream(**anthropic_params) as stream:
-            async for text in stream.text_stream:
-                yield TextChunk(content=text)
-    def _build_anthropic_messages(
-        self, messages: List[Tuple[ChatCompletionMessageRole, str]]
-    ) -> Tuple[List["MessageParam"], str]:
-        anthropic_messages: List["MessageParam"] = []
-        system_prompt = ""
-        for role, content in messages:
-            if role == ChatCompletionMessageRole.USER:
-                anthropic_messages.append({"role": "user", "content": content})
-            elif role == ChatCompletionMessageRole.AI:
-                anthropic_messages.append({"role": "assistant", "content": content})
-            elif role == ChatCompletionMessageRole.SYSTEM:
-                system_prompt += content + "\n"
-            elif role == ChatCompletionMessageRole.TOOL:
-                raise NotImplementedError
-            else:
-                assert_never(role)
-        return anthropic_messages, system_prompt
-    @property
-    def attributes(self) -> Dict[str, Any]:
-        return dict()
+DatasetExampleID: TypeAlias = GlobalID
+PLAYGROUND_PROJECT_NAME = "playground"
 @strawberry.type
@@ -332,82 +76,48 @@ class Subscription:
     async def chat_completion(
         self, info: Info[Context, None], input: ChatCompletionInput
     ) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
-        # Determine which LLM client to use based on provider_key
         provider_key = input.model.provider_key
-        llm_client_class = PLAYGROUND_STREAMING_CLIENT_REGISTRY.get(provider_key)
+        llm_client_class = PLAYGROUND_CLIENT_REGISTRY.get_client(provider_key, input.model.name)
         if llm_client_class is None:
-            raise ValueError(f"No LLM client registered for provider '{provider_key}'")
-        llm_client = llm_client_class(model=input.model, api_key=input.api_key)
-        messages = [(message.role, message.content) for message in input.messages]
+            raise BadRequest(f"No LLM client registered for provider '{provider_key}'")
+        llm_client = llm_client_class(
+            model=input.model,
+            api_key=input.api_key,
+        )
+        messages = [
+            (
+                message.role,
+                message.content,
+                message.tool_call_id if isinstance(message.tool_call_id, str) else None,
+                message.tool_calls if isinstance(message.tool_calls, list) else None,
+            )
+            for message in input.messages
+        ]
         if template_options := input.template:
-            messages = list(_formatted_messages(messages, template_options))
-        invocation_parameters = jsonify(input.invocation_parameters)
-        in_memory_span_exporter = InMemorySpanExporter()
-        tracer_provider = TracerProvider()
-        tracer_provider.add_span_processor(
-            span_processor=SimpleSpanProcessor(span_exporter=in_memory_span_exporter)
-        )
-        tracer = tracer_provider.get_tracer(__name__)
-        span_name = "ChatCompletion"
-        with tracer.start_span(
-            span_name,
-            attributes=dict(
-                chain(
-                    _llm_span_kind(),
-                    _llm_model_name(input.model.name),
-                    _llm_tools(input.tools or []),
-                    _llm_input_messages(messages),
-                    _llm_invocation_parameters(invocation_parameters),
-                    _input_value_and_mime_type(input),
+            messages = list(
+                _formatted_messages(
+                    messages=messages,
+                    template_language=template_options.language,
+                    template_variables=template_options.variables,
                 )
-            ),
+            )
+        invocation_parameters = llm_client.construct_invocation_parameters(
+            input.invocation_parameters
+        )
+        async with streaming_llm_span(
+            input=input,
+            messages=messages,
+            invocation_parameters=invocation_parameters,
         ) as span:
-            response_chunks = []
-            text_chunks: List[TextChunk] = []
-            tool_call_chunks: DefaultDict[ToolCallID, List[ToolCallChunk]] = defaultdict(list)
             async for chunk in llm_client.chat_completion_create(
-                messages=messages,
-                tools=input.tools or [],
-                **invocation_parameters,
+                messages=messages, tools=input.tools or [], **invocation_parameters
             ):
-                response_chunks.append(chunk)
-                if isinstance(chunk, TextChunk):
-                    yield chunk
-                    text_chunks.append(chunk)
-                elif isinstance(chunk, ToolCallChunk):
-                    yield chunk
-                    tool_call_chunks[chunk.id].append(chunk)
-            span.set_status(StatusCode.OK)
-            llm_client_attributes = llm_client.attributes
-            span.set_attributes(
-                dict(
-                    chain(
-                        _output_value_and_mime_type(response_chunks),
-                        llm_client_attributes.items(),
-                        _llm_output_messages(text_chunks, tool_call_chunks),
-                    )
-                )
-            )
-        assert len(spans := in_memory_span_exporter.get_finished_spans()) == 1
-        finished_span = spans[0]
-        assert finished_span.start_time is not None
-        assert finished_span.end_time is not None
-        assert (attributes := finished_span.attributes) is not None
-        start_time = _datetime(epoch_nanoseconds=finished_span.start_time)
-        end_time = _datetime(epoch_nanoseconds=finished_span.end_time)
-        prompt_tokens = llm_client_attributes.get(LLM_TOKEN_COUNT_PROMPT, 0)
-        completion_tokens = llm_client_attributes.get(LLM_TOKEN_COUNT_COMPLETION, 0)
-        trace_id = _hex(finished_span.context.trace_id)
-        span_id = _hex(finished_span.context.span_id)
-        status = finished_span.status
+                span.add_response_chunk(chunk)
+                yield chunk
+            span.set_attributes(llm_client.attributes)
+        if span.error_message is not None:
+            yield ChatCompletionSubscriptionError(message=span.error_message)
         async with info.context.db() as session:
             if (
                 playground_project_id := await session.scalar(
@@ -422,130 +132,273 @@ class Subscription:
                         description="Traces from prompt playground",
                     )
                 )
-            playground_trace = models.Trace(
-                project_rowid=playground_project_id,
-                trace_id=trace_id,
-                start_time=start_time,
-                end_time=end_time,
-            )
-            playground_span = models.Span(
-                trace_rowid=playground_trace.id,
-                span_id=span_id,
-                parent_id=None,
-                name=span_name,
-                span_kind=LLM,
-                start_time=start_time,
-                end_time=end_time,
-                attributes=unflatten(attributes.items()),
-                events=finished_span.events,
-                status_code=status.status_code.name,
-                status_message=status.description or "",
-                cumulative_error_count=int(not status.is_ok),
-                cumulative_llm_token_count_prompt=prompt_tokens,
-                cumulative_llm_token_count_completion=completion_tokens,
-                llm_token_count_prompt=prompt_tokens,
-                llm_token_count_completion=completion_tokens,
-                trace=playground_trace,
-            )
-            session.add(playground_trace)
-            session.add(playground_span)
+            db_span = span.add_to_session(session, playground_project_id)
             await session.flush()
-            yield FinishedChatCompletion(span=to_gql_span(playground_span))
+            yield FinishedChatCompletion(span=to_gql_span(db_span))
         info.context.event_queue.put(SpanInsertEvent(ids=(playground_project_id,)))
+    @strawberry.subscription
+    async def chat_completion_over_dataset(
+        self, info: Info[Context, None], input: ChatCompletionOverDatasetInput
+    ) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
+        provider_key = input.model.provider_key
+        llm_client_class = PLAYGROUND_CLIENT_REGISTRY.get_client(provider_key, input.model.name)
+        if llm_client_class is None:
+            raise BadRequest(f"No LLM client registered for provider '{provider_key}'")
-def _llm_span_kind() -> Iterator[Tuple[str, Any]]:
-    yield OPENINFERENCE_SPAN_KIND, LLM
-def _llm_model_name(model_name: str) -> Iterator[Tuple[str, Any]]:
-    yield LLM_MODEL_NAME, model_name
-def _llm_invocation_parameters(invocation_parameters: Dict[str, Any]) -> Iterator[Tuple[str, Any]]:
-    yield LLM_INVOCATION_PARAMETERS, safe_json_dumps(invocation_parameters)
-def _llm_tools(tools: List[JSONScalarType]) -> Iterator[Tuple[str, Any]]:
-    for tool_index, tool in enumerate(tools):
-        yield f"{LLM_TOOLS}.{tool_index}.{TOOL_JSON_SCHEMA}", json.dumps(tool)
-def _llm_token_counts(usage: "CompletionUsage") -> Iterator[Tuple[str, Any]]:
-    yield LLM_TOKEN_COUNT_PROMPT, usage.prompt_tokens
-    yield LLM_TOKEN_COUNT_COMPLETION, usage.completion_tokens
-    yield LLM_TOKEN_COUNT_TOTAL, usage.total_tokens
-def _input_value_and_mime_type(input: ChatCompletionInput) -> Iterator[Tuple[str, Any]]:
-    assert (api_key := "api_key") in (input_data := jsonify(input))
-    input_data = {k: v for k, v in input_data.items() if k != api_key}
-    assert api_key not in input_data
-    yield INPUT_MIME_TYPE, JSON
-    yield INPUT_VALUE, safe_json_dumps(input_data)
-def _output_value_and_mime_type(output: Any) -> Iterator[Tuple[str, Any]]:
-    yield OUTPUT_MIME_TYPE, JSON
-    yield OUTPUT_VALUE, safe_json_dumps(jsonify(output))
-def _llm_input_messages(
-    messages: Iterable[Tuple[ChatCompletionMessageRole, str]],
-) -> Iterator[Tuple[str, Any]]:
-    for i, (role, content) in enumerate(messages):
-        yield f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_ROLE}", role.value.lower()
-        yield f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_CONTENT}", content
+        dataset_id = from_global_id_with_expected_type(input.dataset_id, Dataset.__name__)
+        version_id = (
+            from_global_id_with_expected_type(
+                global_id=input.dataset_version_id, expected_type_name=DatasetVersion.__name__
+            )
+            if input.dataset_version_id
+            else None
+        )
+        revision_ids = (
+            select(func.max(models.DatasetExampleRevision.id))
+            .join(models.DatasetExample)
+            .where(models.DatasetExample.dataset_id == dataset_id)
+            .group_by(models.DatasetExampleRevision.dataset_example_id)
+        )
+        if version_id:
+            version_id_subquery = (
+                select(models.DatasetVersion.id)
+                .where(models.DatasetVersion.dataset_id == dataset_id)
+                .where(models.DatasetVersion.id == version_id)
+                .scalar_subquery()
+            )
+            revision_ids = revision_ids.where(
+                models.DatasetExampleRevision.dataset_version_id <= version_id_subquery
+            )
+        query = (
+            select(models.DatasetExampleRevision)
+            .where(
+                and_(
+                    models.DatasetExampleRevision.id.in_(revision_ids),
+                    models.DatasetExampleRevision.revision_kind != "DELETE",
+                )
+            )
+            .order_by(models.DatasetExampleRevision.dataset_example_id.asc())
+            .options(
+                load_only(
+                    models.DatasetExampleRevision.dataset_example_id,
+                    models.DatasetExampleRevision.input,
+                )
+            )
+        )
+        async with info.context.db() as session:
+            revisions = [revision async for revision in await session.stream_scalars(query)]
+        if not revisions:
+            raise BadRequest("No examples found for the given dataset and version")
+        spans: dict[DatasetExampleID, streaming_llm_span] = {}
+        async for payload in _merge_iterators(
+            [
+                _stream_chat_completion_over_dataset_example(
+                    input=input,
+                    llm_client_class=llm_client_class,
+                    revision=revision,
+                    spans=spans,
+                )
+                for revision in revisions
+            ]
+        ):
+            yield payload
-def _llm_output_messages(
-    text_chunks: List[TextChunk],
-    tool_call_chunks: DefaultDict[ToolCallID, List[ToolCallChunk]],
-) -> Iterator[Tuple[str, Any]]:
-    yield f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"
-    if content := "".join(chunk.content for chunk in text_chunks):
-        yield f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", content
-    for tool_call_index, tool_call_chunks_ in tool_call_chunks.items():
-        if tool_call_chunks_ and (name := tool_call_chunks_[0].function.name):
-            yield (
-                f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_NAME}",
-                name,
+        async with info.context.db() as session:
+            if (
+                playground_project_id := await session.scalar(
+                    select(models.Project.id).where(models.Project.name == PLAYGROUND_PROJECT_NAME)
+                )
+            ) is None:
+                playground_project_id = await session.scalar(
+                    insert(models.Project)
+                    .returning(models.Project.id)
+                    .values(
+                        name=PLAYGROUND_PROJECT_NAME,
+                        description="Traces from prompt playground",
+                    )
+                )
+            db_spans = {
+                example_id: span.add_to_session(session, playground_project_id)
+                for example_id, span in spans.items()
+            }
+            assert (
+                dataset_name := await session.scalar(
+                    select(models.Dataset.name).where(models.Dataset.id == dataset_id)
+                )
+            ) is not None
+            if version_id is None:
+                resolved_version_id = await session.scalar(
+                    select(models.DatasetVersion.id)
+                    .where(models.DatasetVersion.dataset_id == dataset_id)
+                    .order_by(models.DatasetVersion.id.desc())
+                    .limit(1)
+                )
+            else:
+                resolved_version_id = await session.scalar(
+                    select(models.DatasetVersion.id).where(
+                        and_(
+                            models.DatasetVersion.dataset_id == dataset_id,
+                            models.DatasetVersion.id == version_id,
+                        )
+                    )
+                )
+            assert resolved_version_id is not None
+            resolved_version_node_id = GlobalID(DatasetVersion.__name__, str(resolved_version_id))
+            experiment = models.Experiment(
+                dataset_id=from_global_id_with_expected_type(input.dataset_id, Dataset.__name__),
+                dataset_version_id=resolved_version_id,
+                name=input.experiment_name or _DEFAULT_PLAYGROUND_EXPERIMENT_NAME,
+                description=input.experiment_description
+                or _default_playground_experiment_description(dataset_name=dataset_name),
+                repetitions=1,
+                metadata_=input.experiment_metadata
+                or _default_playground_experiment_metadata(
+                    dataset_name=dataset_name,
+                    dataset_id=input.dataset_id,
+                    version_id=resolved_version_node_id,
+                ),
+                project_name=PLAYGROUND_PROJECT_NAME,
             )
-        if arguments := "".join(chunk.function.arguments for chunk in tool_call_chunks_):
-            yield (
-                f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}",
-                arguments,
+            session.add(experiment)
+            await session.flush()
+            runs = [
+                models.ExperimentRun(
+                    experiment_id=experiment.id,
+                    dataset_example_id=from_global_id_with_expected_type(
+                        example_id, DatasetExample.__name__
+                    ),
+                    trace_id=span.trace_id,
+                    output=models.ExperimentRunOutput(
+                        task_output=_get_playground_experiment_task_output(span)
+                    ),
+                    repetition_number=1,
+                    start_time=span.start_time,
+                    end_time=span.end_time,
+                    error=error_message
+                    if (error_message := span.error_message) is not None
+                    else None,
+                    prompt_token_count=get_attribute_value(span.attributes, LLM_TOKEN_COUNT_PROMPT),
+                    completion_token_count=get_attribute_value(
+                        span.attributes, LLM_TOKEN_COUNT_COMPLETION
+                    ),
+                )
+                for example_id, span in spans.items()
+            ]
+            session.add_all(runs)
+            await session.flush()
+        for example_id in spans:
+            yield FinishedChatCompletion(
+                span=to_gql_span(db_spans[example_id]),
+                dataset_example_id=example_id,
             )
+        yield ChatCompletionOverDatasetSubscriptionResult(experiment=to_gql_experiment(experiment))
+async def _stream_chat_completion_over_dataset_example(
+    *,
+    input: ChatCompletionOverDatasetInput,
+    llm_client_class: type["PlaygroundStreamingClient"],
+    revision: models.DatasetExampleRevision,
+    spans: dict[DatasetExampleID, streaming_llm_span],
+) -> AsyncIterator[ChatCompletionSubscriptionPayload]:
+    example_id = GlobalID(DatasetExample.__name__, str(revision.dataset_example_id))
+    llm_client = llm_client_class(
+        model=input.model,
+        api_key=input.api_key,
+    )
+    invocation_parameters = llm_client.construct_invocation_parameters(input.invocation_parameters)
+    messages = [
+        (
+            message.role,
+            message.content,
+            message.tool_call_id if isinstance(message.tool_call_id, str) else None,
+            message.tool_calls if isinstance(message.tool_calls, list) else None,
+        )
+        for message in input.messages
+    ]
+    try:
+        messages = list(
+            _formatted_messages(
+                messages=messages,
+                template_language=input.template_language,
+                template_variables=revision.input,
+            )
+        )
+    except TemplateFormatterError as error:
+        yield ChatCompletionSubscriptionError(message=str(error), dataset_example_id=example_id)
+        return
+    span = streaming_llm_span(
+        input=input,
+        messages=messages,
+        invocation_parameters=invocation_parameters,
+    )
+    spans[example_id] = span
+    async with span:
+        async for chunk in llm_client.chat_completion_create(
+            messages=messages, tools=input.tools or [], **invocation_parameters
+        ):
+            span.add_response_chunk(chunk)
+            chunk.dataset_example_id = example_id
+            yield chunk
+        span.set_attributes(llm_client.attributes)
+    if span.error_message is not None:
+        yield ChatCompletionSubscriptionError(
+            message=span.error_message, dataset_example_id=example_id
+        )
-def _hex(number: int) -> str:
-    """
-    Converts an integer to a hexadecimal string.
-    """
-    return hex(number)[2:]
+async def _merge_iterators(
+    iterators: Collection[AsyncIterator[GenericType]],
+) -> AsyncIterator[GenericType]:
+    tasks: dict[AsyncIterator[GenericType], Task[GenericType]] = {
+        iterable: _as_task(iterable) for iterable in iterators
+    }
+    while tasks:
+        completed_tasks, _ = await wait(tasks.values(), return_when=FIRST_COMPLETED)
+        for task in completed_tasks:
+            iterator = next(it for it, t in tasks.items() if t == task)
+            try:
+                yield task.result()
+            except StopAsyncIteration:
+                del tasks[iterator]
+            except Exception as error:
+                del tasks[iterator]
+                logger.exception(error)
+            else:
+                tasks[iterator] = _as_task(iterator)
-def _datetime(*, epoch_nanoseconds: float) -> datetime:
-    """
-    Converts a Unix epoch timestamp in nanoseconds to a datetime.
-    """
-    epoch_seconds = epoch_nanoseconds / 1e9
-    return datetime.fromtimestamp(epoch_seconds).replace(tzinfo=timezone.utc)
+def _as_task(iterable: AsyncIterator[GenericType]) -> Task[GenericType]:
+    return create_task(_as_coroutine(iterable))
+async def _as_coroutine(iterable: AsyncIterator[GenericType]) -> GenericType:
+    return await iterable.__anext__()
 def _formatted_messages(
-    messages: Iterable[Tuple[ChatCompletionMessageRole, str]], template_options: TemplateOptions
-) -> Iterator[Tuple[ChatCompletionMessageRole, str]]:
+    *,
+    messages: Iterable[ChatCompletionMessage],
+    template_language: TemplateLanguage,
+    template_variables: Mapping[str, Any],
+) -> Iterator[tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[str]]]]:
     """
     Formats the messages using the given template options.
     """
-    template_formatter = _template_formatter(template_language=template_options.language)
-    roles, templates = zip(*messages)
+    template_formatter = _template_formatter(template_language=template_language)
+    (
+        roles,
+        templates,
+        tool_call_id,
+        tool_calls,
+    ) = zip(*messages)
     formatted_templates = map(
-        lambda template: template_formatter.format(template, **template_options.variables),
+        lambda template: template_formatter.format(template, **template_variables),
         templates,
     )
-    formatted_messages = zip(roles, formatted_templates)
+    formatted_messages = zip(roles, formatted_templates, tool_call_id, tool_calls)
     return formatted_messages
@@ -560,29 +413,29 @@ def _template_formatter(template_language: TemplateLanguage) -> TemplateFormatte
     assert_never(template_language)
-JSON = OpenInferenceMimeTypeValues.JSON.value
+def _get_playground_experiment_task_output(
+    span: streaming_llm_span,
+) -> Any:
+    return get_attribute_value(span.attributes, LLM_OUTPUT_MESSAGES)
-LLM = OpenInferenceSpanKindValues.LLM.value
-OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
-INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
-INPUT_VALUE = SpanAttributes.INPUT_VALUE
-OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
-OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
-LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES
-LLM_OUTPUT_MESSAGES = SpanAttributes.LLM_OUTPUT_MESSAGES
-LLM_MODEL_NAME = SpanAttributes.LLM_MODEL_NAME
-LLM_INVOCATION_PARAMETERS = SpanAttributes.LLM_INVOCATION_PARAMETERS
-LLM_TOOLS = SpanAttributes.LLM_TOOLS
-LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
-LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
-LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
+_DEFAULT_PLAYGROUND_EXPERIMENT_NAME = "playground-experiment"
+def _default_playground_experiment_description(dataset_name: str) -> str:
+    return f'Playground experiment for dataset "{dataset_name}"'
-MESSAGE_CONTENT = MessageAttributes.MESSAGE_CONTENT
-MESSAGE_ROLE = MessageAttributes.MESSAGE_ROLE
-MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS
-TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME
-TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON
+def _default_playground_experiment_metadata(
+    dataset_name: str, dataset_id: GlobalID, version_id: GlobalID
+) -> dict[str, Any]:
+    return {
+        "dataset_name": dataset_name,
+        "dataset_id": str(dataset_id),
+        "dataset_version_id": str(version_id),
+    }
-TOOL_JSON_SCHEMA = ToolAttributes.TOOL_JSON_SCHEMA
+LLM_OUTPUT_MESSAGES = SpanAttributes.LLM_OUTPUT_MESSAGES
+LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
+LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT

arize-phoenix 5.5.2__py3-none-any.whl → 5.7.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 5.5.2py3-none-any.whl → 5.7.0py3-none-any.whl