PyPI - arize-phoenix - Versions diffs - 5.7.0__py3-none-any.whl → 5.9.0__py3-none-any.whl - Mend - Supply Chain Defender

arize-phoenix 5.7.0py3-none-any.whl → 5.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of arize-phoenix might be problematic. Click here for more details.

Files changed (32) hide show

phoenix/server/api/helpers/playground_clients.py CHANGED Viewed

@@ -1,13 +1,12 @@
+import asyncio
 import importlib.util
+import inspect
+import json
+import time
 from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Callable, Iterator
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Mapping,
-    Optional,
-    Union,
-)
+from functools import wraps
+from typing import TYPE_CHECKING, Any, Hashable, Mapping, Optional, Union
 from openinference.instrumentation import safe_json_dumps
 from openinference.semconv.trace import SpanAttributes
@@ -15,14 +14,19 @@ from strawberry import UNSET
 from strawberry.scalars import JSON as JSONScalarType
 from typing_extensions import TypeAlias, assert_never
-from phoenix.server.api.helpers.playground_registry import (
-    PROVIDER_DEFAULT,
-    register_llm_client,
+from phoenix.evals.models.rate_limiters import (
+    AsyncCallable,
+    GenericType,
+    ParameterSpec,
+    RateLimiter,
+    RateLimitError,
 )
+from phoenix.server.api.helpers.playground_registry import PROVIDER_DEFAULT, register_llm_client
 from phoenix.server.api.input_types.GenerativeModelInput import GenerativeModelInput
 from phoenix.server.api.input_types.InvocationParameters import (
     BoundedFloatInvocationParameter,
     CanonicalParameterName,
+    FloatInvocationParameter,
     IntInvocationParameter,
     InvocationParameter,
     InvocationParameterInput,
@@ -41,17 +45,114 @@ from phoenix.server.api.types.GenerativeProvider import GenerativeProviderKey
 if TYPE_CHECKING:
     from anthropic.types import MessageParam
+    from google.generativeai.types import ContentType
     from openai.types import CompletionUsage
-    from openai.types.chat import (
-        ChatCompletionMessageParam,
-        ChatCompletionMessageToolCallParam,
-    )
+    from openai.types.chat import ChatCompletionMessageParam, ChatCompletionMessageToolCallParam
-DependencyName: TypeAlias = str
 SetSpanAttributesFn: TypeAlias = Callable[[Mapping[str, Any]], None]
 ChatCompletionChunk: TypeAlias = Union[TextChunk, ToolCallChunk]
+class Dependency:
+    """
+    Set the module_name to the import name if it is different from the install name
+    """
+    def __init__(self, name: str, module_name: Optional[str] = None):
+        self.name = name
+        self.module_name = module_name
+    @property
+    def import_name(self) -> str:
+        return self.module_name or self.name
+class KeyedSingleton:
+    _instances: dict[Hashable, "KeyedSingleton"] = {}
+    def __new__(cls, *args: Any, **kwargs: Any) -> "KeyedSingleton":
+        if "singleton_key" in kwargs:
+            singleton_key = kwargs.pop("singleton_key")
+        elif args:
+            singleton_key = args[0]
+            args = args[1:]
+        else:
+            raise ValueError("singleton_key must be provided")
+        instance_key = (cls, singleton_key)
+        if instance_key not in cls._instances:
+            instance = super().__new__(cls)
+            cls._instances[instance_key] = instance
+        return cls._instances[instance_key]
+class PlaygroundRateLimiter(RateLimiter, KeyedSingleton):
+    """
+    A rate rate limiter class that will be instantiated once per `singleton_key`.
+    """
+    def __init__(self, singleton_key: Hashable, rate_limit_error: Optional[type[BaseException]]):
+        super().__init__(
+            rate_limit_error=rate_limit_error,
+            max_rate_limit_retries=3,
+            initial_per_second_request_rate=2.0,
+            maximum_per_second_request_rate=10.0,
+            enforcement_window_minutes=1,
+            rate_reduction_factor=0.5,
+            rate_increase_factor=0.01,
+            cooldown_seconds=5,
+            verbose=False,
+        )
+    # TODO: update the rate limiter class in phoenix.evals to support decorated sync functions
+    def _alimit(
+        self, fn: Callable[ParameterSpec, GenericType]
+    ) -> AsyncCallable[ParameterSpec, GenericType]:
+        @wraps(fn)
+        async def wrapper(*args: Any, **kwargs: Any) -> GenericType:
+            self._initialize_async_primitives()
+            assert self._rate_limit_handling_lock is not None and isinstance(
+                self._rate_limit_handling_lock, asyncio.Lock
+            )
+            assert self._rate_limit_handling is not None and isinstance(
+                self._rate_limit_handling, asyncio.Event
+            )
+            try:
+                try:
+                    await asyncio.wait_for(self._rate_limit_handling.wait(), 120)
+                except asyncio.TimeoutError:
+                    self._rate_limit_handling.set()  # Set the event as a failsafe
+                await self._throttler.async_wait_until_ready()
+                request_start_time = time.time()
+                if inspect.iscoroutinefunction(fn):
+                    return await fn(*args, **kwargs)  # type: ignore
+                else:
+                    return fn(*args, **kwargs)
+            except self._rate_limit_error:
+                async with self._rate_limit_handling_lock:
+                    self._rate_limit_handling.clear()  # prevent new requests from starting
+                    self._throttler.on_rate_limit_error(request_start_time, verbose=self._verbose)
+                    try:
+                        for _attempt in range(self._max_rate_limit_retries):
+                            try:
+                                request_start_time = time.time()
+                                await self._throttler.async_wait_until_ready()
+                                if inspect.iscoroutinefunction(fn):
+                                    return await fn(*args, **kwargs)  # type: ignore
+                                else:
+                                    return fn(*args, **kwargs)
+                            except self._rate_limit_error:
+                                self._throttler.on_rate_limit_error(
+                                    request_start_time, verbose=self._verbose
+                                )
+                                continue
+                    finally:
+                        self._rate_limit_handling.set()  # allow new requests to start
+            raise RateLimitError(f"Exceeded max ({self._max_rate_limit_retries}) retries")
+        return wrapper
 class PlaygroundStreamingClient(ABC):
     def __init__(
         self,
@@ -62,8 +163,8 @@ class PlaygroundStreamingClient(ABC):
     @classmethod
     @abstractmethod
-    def dependencies(cls) -> list[DependencyName]:
-        # A list of dependency names this client needs to run
+    def dependencies(cls) -> list[Dependency]:
+        # A list of dependencies this client needs to run
         ...
     @classmethod
@@ -108,7 +209,8 @@ class PlaygroundStreamingClient(ABC):
     def dependencies_are_installed(cls) -> bool:
         try:
             for dependency in cls.dependencies():
-                if importlib.util.find_spec(dependency) is None:
+                import_name = dependency.import_name
+                if importlib.util.find_spec(import_name) is None:
                     return False
             return True
         except ValueError:
@@ -150,14 +252,16 @@ class OpenAIStreamingClient(PlaygroundStreamingClient):
         api_key: Optional[str] = None,
     ) -> None:
         from openai import AsyncOpenAI
+        from openai import RateLimitError as OpenAIRateLimitError
         super().__init__(model=model, api_key=api_key)
         self.client = AsyncOpenAI(api_key=api_key)
         self.model_name = model.name
+        self.rate_limiter = PlaygroundRateLimiter(model.provider_key, OpenAIRateLimitError)
     @classmethod
-    def dependencies(cls) -> list[DependencyName]:
-        return ["openai"]
+    def dependencies(cls) -> list[Dependency]:
+        return [Dependency(name="openai")]
     @classmethod
     def supported_invocation_parameters(cls) -> list[InvocationParameter]:
@@ -174,19 +278,16 @@ class OpenAIStreamingClient(PlaygroundStreamingClient):
                 invocation_name="max_tokens",
                 canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
                 label="Max Tokens",
-                default_value=UNSET,
             ),
             BoundedFloatInvocationParameter(
                 invocation_name="frequency_penalty",
                 label="Frequency Penalty",
-                default_value=UNSET,
                 min_value=-2.0,
                 max_value=2.0,
             ),
             BoundedFloatInvocationParameter(
                 invocation_name="presence_penalty",
                 label="Presence Penalty",
-                default_value=UNSET,
                 min_value=-2.0,
                 max_value=2.0,
             ),
@@ -194,13 +295,11 @@ class OpenAIStreamingClient(PlaygroundStreamingClient):
                 invocation_name="stop",
                 canonical_name=CanonicalParameterName.STOP_SEQUENCES,
                 label="Stop Sequences",
-                default_value=UNSET,
             ),
             BoundedFloatInvocationParameter(
                 invocation_name="top_p",
                 canonical_name=CanonicalParameterName.TOP_P,
                 label="Top P",
-                default_value=UNSET,
                 min_value=0.0,
                 max_value=1.0,
             ),
@@ -208,20 +307,16 @@ class OpenAIStreamingClient(PlaygroundStreamingClient):
                 invocation_name="seed",
                 canonical_name=CanonicalParameterName.RANDOM_SEED,
                 label="Seed",
-                default_value=UNSET,
             ),
             JSONInvocationParameter(
                 invocation_name="tool_choice",
                 label="Tool Choice",
                 canonical_name=CanonicalParameterName.TOOL_CHOICE,
-                default_value=UNSET,
-                hidden=True,
             ),
             JSONInvocationParameter(
                 invocation_name="response_format",
                 label="Response Format",
                 canonical_name=CanonicalParameterName.RESPONSE_FORMAT,
-                default_value=UNSET,
             ),
         ]
@@ -240,7 +335,8 @@ class OpenAIStreamingClient(PlaygroundStreamingClient):
         openai_messages = [self.to_openai_chat_completion_param(*message) for message in messages]
         tool_call_ids: dict[int, str] = {}
         token_usage: Optional["CompletionUsage"] = None
-        async for chunk in await self.client.chat.completions.create(
+        throttled_create = self.rate_limiter.alimit(self.client.chat.completions.create)
+        async for chunk in await throttled_create(
             messages=openai_messages,
             model=self.model_name,
             stream=True,
@@ -251,6 +347,9 @@ class OpenAIStreamingClient(PlaygroundStreamingClient):
             if (usage := chunk.usage) is not None:
                 token_usage = usage
                 continue
+            if not chunk.choices:
+                # for Azure, initial chunk contains the content filter
+                continue
             choice = chunk.choices[0]
             delta = choice.delta
             if choice.finish_reason is None:
@@ -370,20 +469,16 @@ class OpenAIO1StreamingClient(OpenAIStreamingClient):
                 invocation_name="max_completion_tokens",
                 canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
                 label="Max Completion Tokens",
-                default_value=UNSET,
             ),
             IntInvocationParameter(
                 invocation_name="seed",
                 canonical_name=CanonicalParameterName.RANDOM_SEED,
                 label="Seed",
-                default_value=UNSET,
             ),
             JSONInvocationParameter(
                 invocation_name="tool_choice",
                 label="Tool Choice",
                 canonical_name=CanonicalParameterName.TOOL_CHOICE,
-                default_value=UNSET,
-                hidden=True,
             ),
         ]
@@ -409,7 +504,8 @@ class OpenAIO1StreamingClient(OpenAIStreamingClient):
         tool_call_ids: dict[int, str] = {}
-        response = await self.client.chat.completions.create(
+        throttled_create = self.rate_limiter.alimit(self.client.chat.completions.create)
+        response = await throttled_create(
             messages=openai_messages,
             model=self.model_name,
             tools=tools or NOT_GIVEN,
@@ -544,10 +640,11 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
         super().__init__(model=model, api_key=api_key)
         self.client = anthropic.AsyncAnthropic(api_key=api_key)
         self.model_name = model.name
+        self.rate_limiter = PlaygroundRateLimiter(model.provider_key, anthropic.RateLimitError)
     @classmethod
-    def dependencies(cls) -> list[DependencyName]:
-        return ["anthropic"]
+    def dependencies(cls) -> list[Dependency]:
+        return [Dependency(name="anthropic")]
     @classmethod
     def supported_invocation_parameters(cls) -> list[InvocationParameter]:
@@ -556,14 +653,12 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
                 invocation_name="max_tokens",
                 canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
                 label="Max Tokens",
-                default_value=UNSET,
                 required=True,
             ),
             BoundedFloatInvocationParameter(
                 invocation_name="temperature",
                 canonical_name=CanonicalParameterName.TEMPERATURE,
                 label="Temperature",
-                default_value=UNSET,
                 min_value=0.0,
                 max_value=1.0,
             ),
@@ -571,13 +666,11 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
                 invocation_name="stop_sequences",
                 canonical_name=CanonicalParameterName.STOP_SEQUENCES,
                 label="Stop Sequences",
-                default_value=UNSET,
             ),
             BoundedFloatInvocationParameter(
                 invocation_name="top_p",
                 canonical_name=CanonicalParameterName.TOP_P,
                 label="Top P",
-                default_value=UNSET,
                 min_value=0.0,
                 max_value=1.0,
             ),
@@ -585,8 +678,6 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
                 invocation_name="tool_choice",
                 label="Tool Choice",
                 canonical_name=CanonicalParameterName.TOOL_CHOICE,
-                default_value=UNSET,
-                hidden=True,
             ),
         ]
@@ -608,9 +699,11 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
             "model": self.model_name,
             "system": system_prompt,
             "max_tokens": 1024,
+            "tools": tools,
             **invocation_parameters,
         }
-        async with self.client.messages.stream(**anthropic_params) as stream:
+        throttled_stream = self.rate_limiter._alimit(self.client.messages.stream)
+        async with await throttled_stream(**anthropic_params) as stream:
             async for event in stream:
                 if isinstance(event, anthropic_types.RawMessageStartEvent):
                     self._attributes.update(
@@ -622,6 +715,18 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
                     self._attributes.update(
                         {LLM_TOKEN_COUNT_COMPLETION: event.message.usage.output_tokens}
                     )
+                elif (
+                    isinstance(event, anthropic_streaming.ContentBlockStopEvent)
+                    and event.content_block.type == "tool_use"
+                ):
+                    tool_call_chunk = ToolCallChunk(
+                        id=event.content_block.id,
+                        function=FunctionCallChunk(
+                            name=event.content_block.name,
+                            arguments=json.dumps(event.content_block.input),
+                        ),
+                    )
+                    yield tool_call_chunk
                 elif isinstance(
                     event,
                     (
@@ -629,6 +734,7 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
                         anthropic_types.RawContentBlockDeltaEvent,
                         anthropic_types.RawMessageDeltaEvent,
                         anthropic_streaming.ContentBlockStopEvent,
+                        anthropic_streaming.InputJsonEvent,
                     ),
                 ):
                     # event types emitted by the stream that don't contain useful information
@@ -659,6 +765,139 @@ class AnthropicStreamingClient(PlaygroundStreamingClient):
         return anthropic_messages, system_prompt
+@register_llm_client(
+    provider_key=GenerativeProviderKey.GEMINI,
+    model_names=[
+        PROVIDER_DEFAULT,
+        "gemini-1.5-flash",
+        "gemini-1.5-flash-8b",
+        "gemini-1.5-pro",
+        "gemini-1.0-pro",
+    ],
+)
+class GeminiStreamingClient(PlaygroundStreamingClient):
+    def __init__(
+        self,
+        model: GenerativeModelInput,
+        api_key: Optional[str] = None,
+    ) -> None:
+        import google.generativeai as google_genai
+        super().__init__(model=model, api_key=api_key)
+        google_genai.configure(api_key=api_key)
+        self.model_name = model.name
+    @classmethod
+    def dependencies(cls) -> list[Dependency]:
+        return [Dependency(name="google-generativeai", module_name="google.generativeai")]
+    @classmethod
+    def supported_invocation_parameters(cls) -> list[InvocationParameter]:
+        return [
+            BoundedFloatInvocationParameter(
+                invocation_name="temperature",
+                canonical_name=CanonicalParameterName.TEMPERATURE,
+                label="Temperature",
+                default_value=0.0,
+                min_value=0.0,
+                max_value=2.0,
+            ),
+            IntInvocationParameter(
+                invocation_name="max_output_tokens",
+                canonical_name=CanonicalParameterName.MAX_COMPLETION_TOKENS,
+                label="Max Output Tokens",
+            ),
+            StringListInvocationParameter(
+                invocation_name="stop",
+                canonical_name=CanonicalParameterName.STOP_SEQUENCES,
+                label="Stop Sequences",
+            ),
+            FloatInvocationParameter(
+                invocation_name="presence_penalty",
+                label="Presence Penalty",
+            ),
+            FloatInvocationParameter(
+                invocation_name="frequency_penalty",
+                label="Frequency Penalty",
+            ),
+            BoundedFloatInvocationParameter(
+                invocation_name="top_p",
+                canonical_name=CanonicalParameterName.TOP_P,
+                label="Top P",
+                min_value=0.0,
+                max_value=1.0,
+            ),
+            BoundedFloatInvocationParameter(
+                invocation_name="top_k",
+                label="Top K",
+                min_value=0.0,
+                max_value=1.0,
+            ),
+            IntInvocationParameter(
+                invocation_name="seed",
+                canonical_name=CanonicalParameterName.RANDOM_SEED,
+                label="Seed",
+            ),
+        ]
+    async def chat_completion_create(
+        self,
+        messages: list[
+            tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[JSONScalarType]]]
+        ],
+        tools: list[JSONScalarType],
+        **invocation_parameters: Any,
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        import google.generativeai as google_genai
+        gemini_message_history, current_message, system_prompt = self._build_gemini_messages(
+            messages
+        )
+        model_args = {"model_name": self.model_name}
+        if system_prompt:
+            model_args["system_instruction"] = system_prompt
+        client = google_genai.GenerativeModel(**model_args)
+        gemini_config = google_genai.GenerationConfig(
+            **invocation_parameters,
+        )
+        gemini_params = {
+            "content": current_message,
+            "generation_config": gemini_config,
+            "stream": True,
+        }
+        chat = client.start_chat(history=gemini_message_history)
+        stream = await chat.send_message_async(**gemini_params)
+        async for event in stream:
+            yield TextChunk(content=event.text)
+    def _build_gemini_messages(
+        self,
+        messages: list[tuple[ChatCompletionMessageRole, str, Optional[str], Optional[list[str]]]],
+    ) -> tuple[list["ContentType"], str, str]:
+        gemini_message_history: list["ContentType"] = []
+        system_prompts = []
+        for role, content, _tool_call_id, _tool_calls in messages:
+            if role == ChatCompletionMessageRole.USER:
+                gemini_message_history.append({"role": "user", "parts": content})
+            elif role == ChatCompletionMessageRole.AI:
+                gemini_message_history.append({"role": "model", "parts": content})
+            elif role == ChatCompletionMessageRole.SYSTEM:
+                system_prompts.append(content)
+            elif role == ChatCompletionMessageRole.TOOL:
+                raise NotImplementedError
+            else:
+                assert_never(role)
+        if gemini_message_history:
+            prompt = gemini_message_history.pop()["parts"]
+        else:
+            prompt = ""
+        return gemini_message_history, prompt, "\n".join(system_prompts)
 def initialize_playground_clients() -> None:
     """
     Ensure that all playground clients are registered at import time.