PyPI - arize-phoenix - Versions diffs - 5.7.0__py3-none-any.whl → 5.8.0__py3-none-any.whl - Mend

arize-phoenix 5.7.0py3-none-any.whl → 5.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (24) hide show

phoenix/server/api/helpers/playground_spans.py CHANGED Viewed

@@ -26,7 +26,6 @@ from openinference.semconv.trace import (
 )
 from opentelemetry.sdk.trace.id_generator import RandomIdGenerator as DefaultOTelIDGenerator
 from opentelemetry.trace import StatusCode
-from sqlalchemy.ext.asyncio import AsyncSession
 from strawberry.scalars import JSON as JSONScalarType
 from typing_extensions import Self, TypeAlias, assert_never
@@ -41,7 +40,7 @@ from phoenix.server.api.types.ChatCompletionSubscriptionPayload import (
     TextChunk,
     ToolCallChunk,
 )
-from phoenix.trace.attributes import unflatten
+from phoenix.trace.attributes import get_attribute_value, unflatten
 from phoenix.trace.schemas import (
     SpanEvent,
     SpanException,
@@ -56,7 +55,8 @@ ToolCallID: TypeAlias = str
 class streaming_llm_span:
     """
-    Creates an LLM span for a streaming chat completion.
+    A context manager that records OpenInference attributes for streaming chat
+    completion LLM spans.
     """
     def __init__(
@@ -71,24 +71,23 @@ class streaming_llm_span:
         self._attributes: dict[str, Any] = attributes if attributes is not None else {}
         self._attributes.update(
             chain(
-                _llm_span_kind(),
-                _llm_model_name(input.model.name),
-                _llm_tools(input.tools or []),
-                _llm_input_messages(messages),
-                _llm_invocation_parameters(invocation_parameters),
-                _input_value_and_mime_type(input),
+                llm_span_kind(),
+                llm_model_name(input.model.name),
+                llm_tools(input.tools or []),
+                llm_input_messages(messages),
+                llm_invocation_parameters(invocation_parameters),
+                input_value_and_mime_type(input),
             )
         )
         self._events: list[SpanEvent] = []
-        self._start_time: datetime
-        self._end_time: datetime
-        self._response_chunks: list[Union[TextChunk, ToolCallChunk]] = []
+        self._start_time: Optional[datetime] = None
+        self._end_time: Optional[datetime] = None
         self._text_chunks: list[TextChunk] = []
         self._tool_call_chunks: defaultdict[ToolCallID, list[ToolCallChunk]] = defaultdict(list)
-        self._status_code: StatusCode
-        self._status_message: str
-        self._db_span: models.Span
-        self._db_trace: models.Trace
+        self._status_code: StatusCode = StatusCode.UNSET
+        self._status_message: Optional[str] = None
+        self._trace_id = _generate_trace_id()
+        self._span_id = _generate_span_id()
     async def __aenter__(self) -> Self:
         self._start_time = cast(datetime, normalize_datetime(dt=local_now(), tz=timezone.utc))
@@ -102,7 +101,6 @@ class streaming_llm_span:
     ) -> bool:
         self._end_time = cast(datetime, normalize_datetime(dt=local_now(), tz=timezone.utc))
         self._status_code = StatusCode.OK
-        self._status_message = ""
         if exc_type is not None:
             self._status_code = StatusCode.ERROR
             self._status_message = str(exc_value)
@@ -115,10 +113,10 @@ class streaming_llm_span:
                     exception_stacktrace=format_exc(),
                 )
             )
-        if self._response_chunks:
+        if self._text_chunks or self._tool_call_chunks:
             self._attributes.update(
                 chain(
-                    _output_value_and_mime_type(self._response_chunks),
+                    _output_value_and_mime_type(self._text_chunks, self._tool_call_chunks),
                     _llm_output_messages(self._text_chunks, self._tool_call_chunks),
                 )
             )
@@ -127,46 +125,7 @@ class streaming_llm_span:
     def set_attributes(self, attributes: Mapping[str, Any]) -> None:
         self._attributes.update(attributes)
-    def add_to_session(
-        self,
-        session: AsyncSession,
-        project_id: int,
-    ) -> models.Span:
-        prompt_tokens = self._attributes.get(LLM_TOKEN_COUNT_PROMPT, 0)
-        completion_tokens = self._attributes.get(LLM_TOKEN_COUNT_COMPLETION, 0)
-        trace_id = _generate_trace_id()
-        span_id = _generate_span_id()
-        self._db_trace = models.Trace(
-            project_rowid=project_id,
-            trace_id=trace_id,
-            start_time=self._start_time,
-            end_time=self._end_time,
-        )
-        self._db_span = models.Span(
-            trace_rowid=self._db_trace.id,
-            span_id=span_id,
-            parent_id=None,
-            name="ChatCompletion",
-            span_kind=LLM,
-            start_time=self._start_time,
-            end_time=self._end_time,
-            attributes=unflatten(self._attributes.items()),
-            events=[_serialize_event(event) for event in self._events],
-            status_code=self._status_code.name,
-            status_message=self._status_message,
-            cumulative_error_count=int(self._status_code is StatusCode.ERROR),
-            cumulative_llm_token_count_prompt=prompt_tokens,
-            cumulative_llm_token_count_completion=completion_tokens,
-            llm_token_count_prompt=prompt_tokens,
-            llm_token_count_completion=completion_tokens,
-            trace=self._db_trace,
-        )
-        session.add(self._db_trace)
-        session.add(self._db_span)
-        return self._db_span
     def add_response_chunk(self, chunk: Union[TextChunk, ToolCallChunk]) -> None:
-        self._response_chunks.append(chunk)
         if isinstance(chunk, TextChunk):
             self._text_chunks.append(chunk)
         elif isinstance(chunk, ToolCallChunk):
@@ -174,48 +133,128 @@ class streaming_llm_span:
         else:
             assert_never(chunk)
+    @property
+    def span_id(self) -> str:
+        return self._span_id
+    @property
+    def trace_id(self) -> str:
+        return self._trace_id
     @property
     def start_time(self) -> datetime:
-        return self._db_span.start_time
+        if self._start_time is None:
+            raise ValueError("Cannot access start time before the context manager is entered")
+        return self._start_time
     @property
     def end_time(self) -> datetime:
-        return self._db_span.end_time
+        if self._end_time is None:
+            raise ValueError("Cannot access end time before the context manager is exited")
+        return self._end_time
     @property
-    def error_message(self) -> Optional[str]:
-        return self._status_message if self._status_code is StatusCode.ERROR else None
+    def status_code(self) -> StatusCode:
+        return self._status_code
     @property
-    def trace_id(self) -> str:
-        return self._db_trace.trace_id
+    def status_message(self) -> Optional[str]:
+        if self._status_code is StatusCode.UNSET:
+            raise ValueError("Cannot access status message before the context manager is exited")
+        return self._status_message
     @property
-    def attributes(self) -> dict[str, Any]:
-        return self._db_span.attributes
+    def events(self) -> list[SpanEvent]:
+        return self._events
-def _llm_span_kind() -> Iterator[tuple[str, Any]]:
+    @property
+    def attributes(self) -> dict[str, Any]:
+        return unflatten(self._attributes.items())
+def get_db_trace(span: streaming_llm_span, project_id: int) -> models.Trace:
+    return models.Trace(
+        project_rowid=project_id,
+        trace_id=span.trace_id,
+        start_time=span.start_time,
+        end_time=span.end_time,
+    )
+def get_db_span(
+    span: streaming_llm_span,
+    db_trace: models.Trace,
+) -> models.Span:
+    prompt_tokens = get_attribute_value(span.attributes, LLM_TOKEN_COUNT_PROMPT) or 0
+    completion_tokens = get_attribute_value(span.attributes, LLM_TOKEN_COUNT_COMPLETION) or 0
+    return models.Span(
+        trace_rowid=db_trace.id,
+        span_id=span.span_id,
+        parent_id=None,
+        name="ChatCompletion",
+        span_kind=LLM,
+        start_time=span.start_time,
+        end_time=span.end_time,
+        attributes=span.attributes,
+        events=[_serialize_event(event) for event in span.events],
+        status_code=span.status_code.name,
+        status_message=span.status_message or "",
+        cumulative_error_count=int(span.status_code is StatusCode.ERROR),
+        cumulative_llm_token_count_prompt=prompt_tokens,
+        cumulative_llm_token_count_completion=completion_tokens,
+        llm_token_count_prompt=prompt_tokens,
+        llm_token_count_completion=completion_tokens,
+        trace=db_trace,
+    )
+def get_db_experiment_run(
+    db_span: models.Span,
+    db_trace: models.Trace,
+    *,
+    experiment_id: int,
+    example_id: int,
+) -> models.ExperimentRun:
+    return models.ExperimentRun(
+        experiment_id=experiment_id,
+        dataset_example_id=example_id,
+        trace_id=db_trace.trace_id,
+        output=models.ExperimentRunOutput(
+            task_output=get_attribute_value(db_span.attributes, LLM_OUTPUT_MESSAGES),
+        ),
+        repetition_number=1,
+        start_time=db_span.start_time,
+        end_time=db_span.end_time,
+        error=db_span.status_message or None,
+        prompt_token_count=get_attribute_value(db_span.attributes, LLM_TOKEN_COUNT_PROMPT),
+        completion_token_count=get_attribute_value(db_span.attributes, LLM_TOKEN_COUNT_COMPLETION),
+        trace=db_trace,
+    )
+def llm_span_kind() -> Iterator[tuple[str, Any]]:
     yield OPENINFERENCE_SPAN_KIND, LLM
-def _llm_model_name(model_name: str) -> Iterator[tuple[str, Any]]:
+def llm_model_name(model_name: str) -> Iterator[tuple[str, Any]]:
     yield LLM_MODEL_NAME, model_name
-def _llm_invocation_parameters(
+def llm_invocation_parameters(
     invocation_parameters: Mapping[str, Any],
 ) -> Iterator[tuple[str, Any]]:
     if invocation_parameters:
         yield LLM_INVOCATION_PARAMETERS, safe_json_dumps(invocation_parameters)
-def _llm_tools(tools: list[JSONScalarType]) -> Iterator[tuple[str, Any]]:
+def llm_tools(tools: list[JSONScalarType]) -> Iterator[tuple[str, Any]]:
     for tool_index, tool in enumerate(tools):
         yield f"{LLM_TOOLS}.{tool_index}.{TOOL_JSON_SCHEMA}", json.dumps(tool)
-def _input_value_and_mime_type(input: Any) -> Iterator[tuple[str, Any]]:
+def input_value_and_mime_type(
+    input: Union[ChatCompletionInput, ChatCompletionOverDatasetInput],
+) -> Iterator[tuple[str, Any]]:
     assert (api_key := "api_key") in (input_data := jsonify(input))
     disallowed_keys = {"api_key", "invocation_parameters"}
     input_data = {k: v for k, v in input_data.items() if k not in disallowed_keys}
@@ -224,12 +263,69 @@ def _input_value_and_mime_type(input: Any) -> Iterator[tuple[str, Any]]:
     yield INPUT_VALUE, safe_json_dumps(input_data)
-def _output_value_and_mime_type(output: Any) -> Iterator[tuple[str, Any]]:
-    yield OUTPUT_MIME_TYPE, JSON
-    yield OUTPUT_VALUE, safe_json_dumps(jsonify(output))
+def _merge_tool_call_chunks(
+    chunks_by_id: defaultdict[str, list[ToolCallChunk]],
+) -> list[dict[str, Any]]:
+    merged_tool_calls = []
+    for tool_id, chunks in chunks_by_id.items():
+        if not chunks:
+            continue
+        first_chunk = chunks[0]
+        if not first_chunk:
+            continue
+        if not hasattr(first_chunk, "function") or not hasattr(first_chunk.function, "name"):
+            continue
+        # Combine all argument chunks
+        merged_arguments = "".join(
+            chunk.function.arguments
+            for chunk in chunks
+            if chunk and hasattr(chunk, "function") and hasattr(chunk.function, "arguments")
+        )
+        merged_tool_calls.append(
+            {
+                "id": tool_id,
+                # Only the first chunk has the tool name
+                "function": {
+                    "name": first_chunk.function.name,
+                    "arguments": merged_arguments or "{}",
+                },
+            }
+        )
+    return merged_tool_calls
+def _output_value_and_mime_type(
+    text_chunks: list[TextChunk],
+    tool_call_chunks: defaultdict[ToolCallID, list[ToolCallChunk]],
+) -> Iterator[tuple[str, Any]]:
+    content = "".join(chunk.content for chunk in text_chunks)
+    merged_tool_calls = _merge_tool_call_chunks(tool_call_chunks)
+    if content and merged_tool_calls:
+        yield OUTPUT_MIME_TYPE, JSON
+        yield (
+            OUTPUT_VALUE,
+            safe_json_dumps(
+                {
+                    "content": content,
+                    "tool_calls": jsonify(
+                        merged_tool_calls,
+                    ),
+                }
+            ),
+        )
+    elif merged_tool_calls:
+        yield OUTPUT_MIME_TYPE, JSON
+        yield OUTPUT_VALUE, safe_json_dumps(jsonify(merged_tool_calls))
+    elif content:
+        yield OUTPUT_MIME_TYPE, TEXT
+        yield OUTPUT_VALUE, content
-def _llm_input_messages(
+def llm_input_messages(
     messages: Iterable[
         tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
     ],
@@ -299,6 +395,7 @@ def _serialize_event(event: SpanEvent) -> dict[str, Any]:
 JSON = OpenInferenceMimeTypeValues.JSON.value
+TEXT = OpenInferenceMimeTypeValues.TEXT.value
 LLM = OpenInferenceSpanKindValues.LLM.value

phoenix/server/api/input_types/InvocationParameters.py CHANGED Viewed

@@ -47,25 +47,24 @@ class InvocationParameterBase:
     canonical_name: Optional[CanonicalParameterName] = None
     label: str
     required: bool = False
-    hidden: bool = False
 @strawberry.type
 class IntInvocationParameter(InvocationParameterBase):
     invocation_input_field: InvocationInputField = InvocationInputField.value_int
-    default_value: Optional[int] = UNSET
+    default_value: Optional[int] = None
 @strawberry.type
 class FloatInvocationParameter(InvocationParameterBase):
     invocation_input_field: InvocationInputField = InvocationInputField.value_float
-    default_value: Optional[float] = UNSET
+    default_value: Optional[float] = None
 @strawberry.type
 class BoundedFloatInvocationParameter(InvocationParameterBase):
     invocation_input_field: InvocationInputField = InvocationInputField.value_float
-    default_value: Optional[float] = UNSET
+    default_value: Optional[float] = None
     min_value: float
     max_value: float
@@ -73,25 +72,25 @@ class BoundedFloatInvocationParameter(InvocationParameterBase):
 @strawberry.type
 class StringInvocationParameter(InvocationParameterBase):
     invocation_input_field: InvocationInputField = InvocationInputField.value_string
-    default_value: Optional[str] = UNSET
+    default_value: Optional[str] = None
 @strawberry.type
 class JSONInvocationParameter(InvocationParameterBase):
     invocation_input_field: InvocationInputField = InvocationInputField.value_json
-    default_value: Optional[JSON] = UNSET
+    default_value: Optional[JSON] = None
 @strawberry.type
 class StringListInvocationParameter(InvocationParameterBase):
     invocation_input_field: InvocationInputField = InvocationInputField.value_string_list
-    default_value: Optional[list[str]] = UNSET
+    default_value: Optional[list[str]] = None
 @strawberry.type
 class BooleanInvocationParameter(InvocationParameterBase):
     invocation_input_field: InvocationInputField = InvocationInputField.value_bool
-    default_value: Optional[bool] = UNSET
+    default_value: Optional[bool] = None
 def extract_parameter(

phoenix/server/api/mutations/chat_mutations.py CHANGED Viewed

@@ -6,6 +6,7 @@ from traceback import format_exc
 from typing import Any, Iterable, Iterator, List, Optional
 import strawberry
+from openinference.instrumentation import safe_json_dumps
 from openinference.semconv.trace import (
     MessageAttributes,
     OpenInferenceMimeTypeValues,
@@ -26,6 +27,14 @@ from phoenix.server.api.context import Context
 from phoenix.server.api.exceptions import BadRequest
 from phoenix.server.api.helpers.playground_clients import initialize_playground_clients
 from phoenix.server.api.helpers.playground_registry import PLAYGROUND_CLIENT_REGISTRY
+from phoenix.server.api.helpers.playground_spans import (
+    input_value_and_mime_type,
+    llm_input_messages,
+    llm_invocation_parameters,
+    llm_model_name,
+    llm_span_kind,
+    llm_tools,
+)
 from phoenix.server.api.input_types.ChatCompletionInput import ChatCompletionInput
 from phoenix.server.api.input_types.TemplateOptions import TemplateOptions
 from phoenix.server.api.types.ChatCompletionMessageRole import ChatCompletionMessageRole
@@ -38,6 +47,7 @@ from phoenix.server.api.types.TemplateLanguage import TemplateLanguage
 from phoenix.server.dml_event import SpanInsertEvent
 from phoenix.trace.attributes import unflatten
 from phoenix.trace.schemas import SpanException
+from phoenix.utilities.json import jsonify
 from phoenix.utilities.template_formatters import (
     FStringTemplateFormatter,
     MustacheTemplateFormatter,
@@ -94,7 +104,6 @@ class ChatCompletionMutationMixin:
             )
             for message in input.messages
         ]
         if template_options := input.template:
             messages = list(_formatted_messages(messages, template_options))
@@ -103,16 +112,16 @@ class ChatCompletionMutationMixin:
         )
         text_content = ""
-        tool_calls = []
+        tool_calls: dict[str, ChatCompletionToolCall] = {}
         events = []
         attributes.update(
             chain(
-                _llm_span_kind(),
-                _llm_model_name(input.model.name),
-                _llm_tools(input.tools or []),
-                _llm_input_messages(messages),
-                _llm_invocation_parameters(invocation_parameters),
-                _input_value_and_mime_type(input),
+                llm_span_kind(),
+                llm_model_name(input.model.name),
+                llm_tools(input.tools or []),
+                llm_input_messages(messages),
+                llm_invocation_parameters(invocation_parameters),
+                input_value_and_mime_type(input),
                 **llm_client.attributes,
             )
         )
@@ -128,14 +137,16 @@ class ChatCompletionMutationMixin:
                 if isinstance(chunk, TextChunk):
                     text_content += chunk.content
                 elif isinstance(chunk, ToolCallChunk):
-                    tool_call = ChatCompletionToolCall(
-                        id=chunk.id,
-                        function=ChatCompletionFunctionCall(
-                            name=chunk.function.name,
-                            arguments=chunk.function.arguments,
-                        ),
-                    )
-                    tool_calls.append(tool_call)
+                    if chunk.id not in tool_calls:
+                        tool_calls[chunk.id] = ChatCompletionToolCall(
+                            id=chunk.id,
+                            function=ChatCompletionFunctionCall(
+                                name=chunk.function.name,
+                                arguments=chunk.function.arguments,
+                            ),
+                        )
+                    else:
+                        tool_calls[chunk.id].function.arguments += chunk.function.arguments
                 else:
                     assert_never(chunk)
         except Exception as e:
@@ -159,7 +170,7 @@ class ChatCompletionMutationMixin:
         if text_content or tool_calls:
             attributes.update(
                 chain(
-                    _output_value_and_mime_type({"text": text_content, "tool_calls": tool_calls}),
+                    _output_value_and_mime_type(text_content, tool_calls),
                     _llm_output_messages(text_content, tool_calls),
                 )
             )
@@ -225,7 +236,7 @@ class ChatCompletionMutationMixin:
         else:
             return ChatCompletionMutationPayload(
                 content=text_content if text_content else None,
-                tool_calls=tool_calls,
+                tool_calls=list(tool_calls.values()),
                 span=gql_span,
                 error_message=None,
             )
@@ -264,61 +275,30 @@ def _template_formatter(template_language: TemplateLanguage) -> TemplateFormatte
     assert_never(template_language)
-def _llm_span_kind() -> Iterator[tuple[str, Any]]:
-    yield OPENINFERENCE_SPAN_KIND, LLM
-def _llm_model_name(model_name: str) -> Iterator[tuple[str, Any]]:
-    yield LLM_MODEL_NAME, model_name
-def _llm_invocation_parameters(invocation_parameters: dict[str, Any]) -> Iterator[tuple[str, Any]]:
-    yield LLM_INVOCATION_PARAMETERS, json.dumps(invocation_parameters)
-def _llm_tools(tools: List[Any]) -> Iterator[tuple[str, Any]]:
-    for tool_index, tool in enumerate(tools):
-        yield f"{LLM_TOOLS}.{tool_index}.{TOOL_JSON_SCHEMA}", json.dumps(tool)
-def _input_value_and_mime_type(input: ChatCompletionInput) -> Iterator[tuple[str, Any]]:
-    input_data = input.__dict__.copy()
-    input_data.pop("api_key", None)
-    yield INPUT_MIME_TYPE, JSON
-    yield INPUT_VALUE, json.dumps(input_data)
-def _output_value_and_mime_type(output: Any) -> Iterator[tuple[str, Any]]:
-    yield OUTPUT_MIME_TYPE, JSON
-    yield OUTPUT_VALUE, json.dumps(output)
-def _llm_input_messages(
-    messages: Iterable[ChatCompletionMessage],
+def _output_value_and_mime_type(
+    text: str, tool_calls: dict[str, ChatCompletionToolCall]
 ) -> Iterator[tuple[str, Any]]:
-    for i, (role, content, _tool_call_id, tool_calls) in enumerate(messages):
-        yield f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_ROLE}", role.value.lower()
-        yield f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_CONTENT}", content
-        if tool_calls:
-            for tool_call_index, tool_call in enumerate(tool_calls):
-                yield (
-                    f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_NAME}",
-                    tool_call["function"]["name"],
-                )
-                if arguments := tool_call["function"]["arguments"]:
-                    yield (
-                        f"{LLM_INPUT_MESSAGES}.{i}.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}",
-                        json.dumps(arguments),
-                    )
+    if text and tool_calls:
+        yield OUTPUT_MIME_TYPE, JSON
+        yield (
+            OUTPUT_VALUE,
+            safe_json_dumps({"content": text, "tool_calls": jsonify(list(tool_calls.values()))}),
+        )
+    elif tool_calls:
+        yield OUTPUT_MIME_TYPE, JSON
+        yield OUTPUT_VALUE, safe_json_dumps(jsonify(list(tool_calls.values())))
+    elif text:
+        yield OUTPUT_MIME_TYPE, TEXT
+        yield OUTPUT_VALUE, text
 def _llm_output_messages(
-    text_content: str, tool_calls: List[ChatCompletionToolCall]
+    text_content: str, tool_calls: dict[str, ChatCompletionToolCall]
 ) -> Iterator[tuple[str, Any]]:
     yield f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_ROLE}", "assistant"
     if text_content:
         yield f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_CONTENT}", text_content
-    for tool_call_index, tool_call in enumerate(tool_calls):
+    for tool_call_index, tool_call in enumerate(tool_calls.values()):
         yield (
             f"{LLM_OUTPUT_MESSAGES}.0.{MESSAGE_TOOL_CALLS}.{tool_call_index}.{TOOL_CALL_FUNCTION_NAME}",
             tool_call.function.name,
@@ -347,6 +327,7 @@ def _serialize_event(event: SpanException) -> dict[str, Any]:
 JSON = OpenInferenceMimeTypeValues.JSON.value
+TEXT = OpenInferenceMimeTypeValues.TEXT.value
 LLM = OpenInferenceSpanKindValues.LLM.value
 OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND

arize-phoenix 5.7.0__py3-none-any.whl → 5.8.0__py3-none-any.whl

Potentially problematic release.

arize-phoenix 5.7.0py3-none-any.whl → 5.8.0py3-none-any.whl