PyPI - pydantic-ai-slim - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

pydantic-ai-slim 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydantic-ai-slim might be problematic. Click here for more details.

Files changed (27) hide show

pydantic_ai/_griffe.py +10 -3
pydantic_ai/_parts_manager.py +239 -0
pydantic_ai/_pydantic.py +17 -3
pydantic_ai/_result.py +26 -21
pydantic_ai/_system_prompt.py +4 -4
pydantic_ai/_utils.py +80 -17
pydantic_ai/agent.py +187 -159
pydantic_ai/format_as_xml.py +2 -1
pydantic_ai/messages.py +217 -15
pydantic_ai/models/__init__.py +58 -71
pydantic_ai/models/anthropic.py +112 -48
pydantic_ai/models/cohere.py +278 -0
pydantic_ai/models/function.py +57 -85
pydantic_ai/models/gemini.py +83 -129
pydantic_ai/models/groq.py +60 -130
pydantic_ai/models/mistral.py +86 -142
pydantic_ai/models/ollama.py +4 -0
pydantic_ai/models/openai.py +75 -136
pydantic_ai/models/test.py +55 -80
pydantic_ai/models/vertexai.py +2 -1
pydantic_ai/result.py +132 -114
pydantic_ai/settings.py +18 -1
pydantic_ai/tools.py +42 -23
{pydantic_ai_slim-0.0.18.dist-info → pydantic_ai_slim-0.0.20.dist-info}/METADATA +7 -3
pydantic_ai_slim-0.0.20.dist-info/RECORD +30 -0
pydantic_ai_slim-0.0.18.dist-info/RECORD +0 -28
{pydantic_ai_slim-0.0.18.dist-info → pydantic_ai_slim-0.0.20.dist-info}/WHEEL +0 -0

pydantic_ai/models/anthropic.py CHANGED Viewed

@@ -1,14 +1,16 @@
 from __future__ import annotations as _annotations
-from collections.abc import AsyncIterator
+from collections.abc import AsyncIterable, AsyncIterator
 from contextlib import asynccontextmanager
 from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from json import JSONDecodeError, loads as json_loads
 from typing import Any, Literal, Union, cast, overload
 from httpx import AsyncClient as AsyncHTTPClient
 from typing_extensions import assert_never
-from .. import result
+from .. import UnexpectedModelBehavior, _utils, usage
 from .._utils import guard_tool_call_id as _guard_tool_call_id
 from ..messages import (
     ArgsDict,
@@ -16,6 +18,7 @@ from ..messages import (
     ModelRequest,
     ModelResponse,
     ModelResponsePart,
+    ModelResponseStreamEvent,
     RetryPromptPart,
     SystemPromptPart,
     TextPart,
@@ -27,8 +30,8 @@ from ..settings import ModelSettings
 from ..tools import ToolDefinition
 from . import (
     AgentModel,
-    EitherStreamedResponse,
     Model,
+    StreamedResponse,
     cached_async_http_client,
     check_allow_model_requests,
 )
@@ -38,11 +41,16 @@ try:
     from anthropic.types import (
         Message as AnthropicMessage,
         MessageParam,
+        RawContentBlockDeltaEvent,
+        RawContentBlockStartEvent,
+        RawContentBlockStopEvent,
         RawMessageDeltaEvent,
         RawMessageStartEvent,
+        RawMessageStopEvent,
         RawMessageStreamEvent,
         TextBlock,
         TextBlockParam,
+        TextDelta,
         ToolChoiceParam,
         ToolParam,
         ToolResultBlockParam,
@@ -152,20 +160,20 @@ class AnthropicAgentModel(AgentModel):
     """Implementation of `AgentModel` for Anthropic models."""
     client: AsyncAnthropic
-    model_name: str
+    model_name: AnthropicModelName
     allow_text_result: bool
     tools: list[ToolParam]
     async def request(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> tuple[ModelResponse, result.Usage]:
+    ) -> tuple[ModelResponse, usage.Usage]:
         response = await self._messages_create(messages, False, model_settings)
         return self._process_response(response), _map_usage(response)
     @asynccontextmanager
     async def request_stream(
         self, messages: list[ModelMessage], model_settings: ModelSettings | None
-    ) -> AsyncIterator[EitherStreamedResponse]:
+    ) -> AsyncIterator[StreamedResponse]:
         response = await self._messages_create(messages, True, model_settings)
         async with response:
             yield await self._process_streamed_response(response)
@@ -186,16 +194,22 @@ class AnthropicAgentModel(AgentModel):
         self, messages: list[ModelMessage], stream: bool, model_settings: ModelSettings | None
     ) -> AnthropicMessage | AsyncStream[RawMessageStreamEvent]:
         # standalone function to make it easier to override
+        model_settings = model_settings or {}
+        tool_choice: ToolChoiceParam | None
         if not self.tools:
-            tool_choice: ToolChoiceParam | None = None
-        elif not self.allow_text_result:
-            tool_choice = {'type': 'any'}
+            tool_choice = None
         else:
-            tool_choice = {'type': 'auto'}
+            if not self.allow_text_result:
+                tool_choice = {'type': 'any'}
+            else:
+                tool_choice = {'type': 'auto'}
-        system_prompt, anthropic_messages = self._map_message(messages)
+            if (allow_parallel_tool_calls := model_settings.get('parallel_tool_calls')) is not None:
+                tool_choice['disable_parallel_tool_use'] = not allow_parallel_tool_calls
-        model_settings = model_settings or {}
+        system_prompt, anthropic_messages = self._map_message(messages)
         return await self.client.messages.create(
             max_tokens=model_settings.get('max_tokens', 1024),
@@ -210,43 +224,33 @@ class AnthropicAgentModel(AgentModel):
             timeout=model_settings.get('timeout', NOT_GIVEN),
         )
-    @staticmethod
-    def _process_response(response: AnthropicMessage) -> ModelResponse:
+    def _process_response(self, response: AnthropicMessage) -> ModelResponse:
         """Process a non-streamed response, and prepare a message to return."""
         items: list[ModelResponsePart] = []
         for item in response.content:
             if isinstance(item, TextBlock):
-                items.append(TextPart(item.text))
+                items.append(TextPart(content=item.text))
             else:
                 assert isinstance(item, ToolUseBlock), 'unexpected item type'
                 items.append(
                     ToolCallPart.from_raw_args(
-                        item.name,
-                        cast(dict[str, Any], item.input),
-                        item.id,
+                        tool_name=item.name,
+                        args=cast(dict[str, Any], item.input),
+                        tool_call_id=item.id,
                     )
                 )
-        return ModelResponse(items)
+        return ModelResponse(items, model_name=self.model_name)
-    @staticmethod
-    async def _process_streamed_response(response: AsyncStream[RawMessageStreamEvent]) -> EitherStreamedResponse:
-        """TODO: Process a streamed response, and prepare a streaming response to return."""
-        # We don't yet support streamed responses from Anthropic, so we raise an error here for now.
-        # Streamed responses will be supported in a future release.
-        raise RuntimeError('Streamed responses are not yet supported for Anthropic models.')
-        # Should be returning some sort of AnthropicStreamTextResponse or AnthropicStreamStructuredResponse
-        # depending on the type of chunk we get, but we need to establish how we handle (and when we get) the following:
-        # RawMessageStartEvent
-        # RawMessageDeltaEvent
-        # RawMessageStopEvent
-        # RawContentBlockStartEvent
-        # RawContentBlockDeltaEvent
-        # RawContentBlockDeltaEvent
-        #
-        # We might refactor streaming internally before we implement this...
+    async def _process_streamed_response(self, response: AsyncStream[RawMessageStreamEvent]) -> StreamedResponse:
+        peekable_response = _utils.PeekableAsyncStream(response)
+        first_chunk = await peekable_response.peek()
+        if isinstance(first_chunk, _utils.Unset):
+            raise UnexpectedModelBehavior('Streamed response ended without content or tool calls')
+        # Since Anthropic doesn't provide a timestamp in the message, we'll use the current time
+        timestamp = datetime.now(tz=timezone.utc)
+        return AnthropicStreamedResponse(_model_name=self.model_name, _response=peekable_response, _timestamp=timestamp)
     @staticmethod
     def _map_message(messages: list[ModelMessage]) -> tuple[str, list[MessageParam]]:
@@ -315,30 +319,90 @@ def _map_tool_call(t: ToolCallPart) -> ToolUseBlockParam:
     )
-def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> result.Usage:
+def _map_usage(message: AnthropicMessage | RawMessageStreamEvent) -> usage.Usage:
     if isinstance(message, AnthropicMessage):
-        usage = message.usage
+        response_usage = message.usage
     else:
         if isinstance(message, RawMessageStartEvent):
-            usage = message.message.usage
+            response_usage = message.message.usage
         elif isinstance(message, RawMessageDeltaEvent):
-            usage = message.usage
+            response_usage = message.usage
         else:
             # No usage information provided in:
             # - RawMessageStopEvent
             # - RawContentBlockStartEvent
             # - RawContentBlockDeltaEvent
             # - RawContentBlockStopEvent
-            usage = None
+            response_usage = None
-    if usage is None:
-        return result.Usage()
+    if response_usage is None:
+        return usage.Usage()
-    request_tokens = getattr(usage, 'input_tokens', None)
+    request_tokens = getattr(response_usage, 'input_tokens', None)
-    return result.Usage(
+    return usage.Usage(
         # Usage coming from the RawMessageDeltaEvent doesn't have input token data, hence this getattr
         request_tokens=request_tokens,
-        response_tokens=usage.output_tokens,
-        total_tokens=(request_tokens or 0) + usage.output_tokens,
+        response_tokens=response_usage.output_tokens,
+        total_tokens=(request_tokens or 0) + response_usage.output_tokens,
     )
+@dataclass
+class AnthropicStreamedResponse(StreamedResponse):
+    """Implementation of `StreamedResponse` for Anthropic models."""
+    _response: AsyncIterable[RawMessageStreamEvent]
+    _timestamp: datetime
+    async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
+        current_block: TextBlock | ToolUseBlock | None = None
+        current_json: str = ''
+        async for event in self._response:
+            self._usage += _map_usage(event)
+            if isinstance(event, RawContentBlockStartEvent):
+                current_block = event.content_block
+                if isinstance(current_block, TextBlock) and current_block.text:
+                    yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=current_block.text)
+                elif isinstance(current_block, ToolUseBlock):
+                    maybe_event = self._parts_manager.handle_tool_call_delta(
+                        vendor_part_id=current_block.id,
+                        tool_name=current_block.name,
+                        args=cast(dict[str, Any], current_block.input),
+                        tool_call_id=current_block.id,
+                    )
+                    if maybe_event is not None:
+                        yield maybe_event
+            elif isinstance(event, RawContentBlockDeltaEvent):
+                if isinstance(event.delta, TextDelta):
+                    yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=event.delta.text)
+                elif (
+                    current_block and event.delta.type == 'input_json_delta' and isinstance(current_block, ToolUseBlock)
+                ):
+                    # Try to parse the JSON immediately, otherwise cache the value for later. This handles
+                    # cases where the JSON is not currently valid but will be valid once we stream more tokens.
+                    try:
+                        parsed_args = json_loads(current_json + event.delta.partial_json)
+                        current_json = ''
+                    except JSONDecodeError:
+                        current_json += event.delta.partial_json
+                        continue
+                    # For tool calls, we need to handle partial JSON updates
+                    maybe_event = self._parts_manager.handle_tool_call_delta(
+                        vendor_part_id=current_block.id,
+                        tool_name='',
+                        args=parsed_args,
+                        tool_call_id=current_block.id,
+                    )
+                    if maybe_event is not None:
+                        yield maybe_event
+            elif isinstance(event, (RawContentBlockStopEvent, RawMessageStopEvent)):
+                current_block = None
+    def timestamp(self) -> datetime:
+        return self._timestamp

pydantic_ai/models/cohere.py ADDED Viewed

@@ -0,0 +1,278 @@
+from __future__ import annotations as _annotations
+from collections.abc import Iterable
+from dataclasses import dataclass, field
+from itertools import chain
+from typing import Literal, TypeAlias, Union
+from cohere import TextAssistantMessageContentItem
+from typing_extensions import assert_never
+from .. import result
+from .._utils import guard_tool_call_id as _guard_tool_call_id
+from ..messages import (
+    ModelMessage,
+    ModelRequest,
+    ModelResponse,
+    ModelResponsePart,
+    RetryPromptPart,
+    SystemPromptPart,
+    TextPart,
+    ToolCallPart,
+    ToolReturnPart,
+    UserPromptPart,
+)
+from ..settings import ModelSettings
+from ..tools import ToolDefinition
+from . import (
+    AgentModel,
+    Model,
+    check_allow_model_requests,
+)
+try:
+    from cohere import (
+        AssistantChatMessageV2,
+        AsyncClientV2,
+        ChatMessageV2,
+        ChatResponse,
+        SystemChatMessageV2,
+        ToolCallV2,
+        ToolCallV2Function,
+        ToolChatMessageV2,
+        ToolV2,
+        ToolV2Function,
+        UserChatMessageV2,
+    )
+    from cohere.v2.client import OMIT
+except ImportError as _import_error:
+    raise ImportError(
+        'Please install `cohere` to use the Cohere model, '
+        "you can use the `cohere` optional group — `pip install 'pydantic-ai-slim[cohere]'`"
+    ) from _import_error
+CohereModelName: TypeAlias = Union[
+    str,
+    Literal[
+        'c4ai-aya-expanse-32b',
+        'c4ai-aya-expanse-8b',
+        'command',
+        'command-light',
+        'command-light-nightly',
+        'command-nightly',
+        'command-r',
+        'command-r-03-2024',
+        'command-r-08-2024',
+        'command-r-plus',
+        'command-r-plus-04-2024',
+        'command-r-plus-08-2024',
+        'command-r7b-12-2024',
+    ],
+]
+@dataclass(init=False)
+class CohereModel(Model):
+    """A model that uses the Cohere API.
+    Internally, this uses the [Cohere Python client](
+    https://github.com/cohere-ai/cohere-python) to interact with the API.
+    Apart from `__init__`, all methods are private or match those of the base class.
+    """
+    model_name: CohereModelName
+    client: AsyncClientV2 = field(repr=False)
+    def __init__(
+        self,
+        model_name: CohereModelName,
+        *,
+        api_key: str | None = None,
+        cohere_client: AsyncClientV2 | None = None,
+    ):
+        """Initialize an Cohere model.
+        Args:
+            model_name: The name of the Cohere model to use. List of model names
+                available [here](https://docs.cohere.com/docs/models#command).
+            api_key: The API key to use for authentication, if not provided, the
+                `COHERE_API_KEY` environment variable will be used if available.
+            cohere_client: An existing Cohere async client to use. If provided,
+                `api_key` must be `None`.
+        """
+        self.model_name: CohereModelName = model_name
+        if cohere_client is not None:
+            assert api_key is None, 'Cannot provide both `cohere_client` and `api_key`'
+            self.client = cohere_client
+        else:
+            self.client = AsyncClientV2(api_key=api_key)  # type: ignore
+    async def agent_model(
+        self,
+        *,
+        function_tools: list[ToolDefinition],
+        allow_text_result: bool,
+        result_tools: list[ToolDefinition],
+    ) -> AgentModel:
+        check_allow_model_requests()
+        tools = [self._map_tool_definition(r) for r in function_tools]
+        if result_tools:
+            tools += [self._map_tool_definition(r) for r in result_tools]
+        return CohereAgentModel(
+            self.client,
+            self.model_name,
+            allow_text_result,
+            tools,
+        )
+    def name(self) -> str:
+        return f'cohere:{self.model_name}'
+    @staticmethod
+    def _map_tool_definition(f: ToolDefinition) -> ToolV2:
+        return ToolV2(
+            type='function',
+            function=ToolV2Function(
+                name=f.name,
+                description=f.description,
+                parameters=f.parameters_json_schema,
+            ),
+        )
+@dataclass
+class CohereAgentModel(AgentModel):
+    """Implementation of `AgentModel` for Cohere models."""
+    client: AsyncClientV2
+    model_name: CohereModelName
+    allow_text_result: bool
+    tools: list[ToolV2]
+    async def request(
+        self, messages: list[ModelMessage], model_settings: ModelSettings | None
+    ) -> tuple[ModelResponse, result.Usage]:
+        response = await self._chat(messages, model_settings)
+        return self._process_response(response), _map_usage(response)
+    async def _chat(
+        self,
+        messages: list[ModelMessage],
+        model_settings: ModelSettings | None,
+    ) -> ChatResponse:
+        cohere_messages = list(chain(*(self._map_message(m) for m in messages)))
+        model_settings = model_settings or {}
+        return await self.client.chat(
+            model=self.model_name,
+            messages=cohere_messages,
+            tools=self.tools or OMIT,
+            max_tokens=model_settings.get('max_tokens', OMIT),
+            temperature=model_settings.get('temperature', OMIT),
+            p=model_settings.get('top_p', OMIT),
+        )
+    def _process_response(self, response: ChatResponse) -> ModelResponse:
+        """Process a non-streamed response, and prepare a message to return."""
+        parts: list[ModelResponsePart] = []
+        if response.message.content is not None and len(response.message.content) > 0:
+            # While Cohere's API returns a list, it only does that for future proofing
+            # and currently only one item is being returned.
+            choice = response.message.content[0]
+            parts.append(TextPart(choice.text))
+        for c in response.message.tool_calls or []:
+            if c.function and c.function.name and c.function.arguments:
+                parts.append(
+                    ToolCallPart.from_raw_args(
+                        tool_name=c.function.name,
+                        args=c.function.arguments,
+                        tool_call_id=c.id,
+                    )
+                )
+        return ModelResponse(parts=parts, model_name=self.model_name)
+    @classmethod
+    def _map_message(cls, message: ModelMessage) -> Iterable[ChatMessageV2]:
+        """Just maps a `pydantic_ai.Message` to a `cohere.ChatMessageV2`."""
+        if isinstance(message, ModelRequest):
+            yield from cls._map_user_message(message)
+        elif isinstance(message, ModelResponse):
+            texts: list[str] = []
+            tool_calls: list[ToolCallV2] = []
+            for item in message.parts:
+                if isinstance(item, TextPart):
+                    texts.append(item.content)
+                elif isinstance(item, ToolCallPart):
+                    tool_calls.append(_map_tool_call(item))
+                else:
+                    assert_never(item)
+            message_param = AssistantChatMessageV2(role='assistant')
+            if texts:
+                message_param.content = [TextAssistantMessageContentItem(text='\n\n'.join(texts))]
+            if tool_calls:
+                message_param.tool_calls = tool_calls
+            yield message_param
+        else:
+            assert_never(message)
+    @classmethod
+    def _map_user_message(cls, message: ModelRequest) -> Iterable[ChatMessageV2]:
+        for part in message.parts:
+            if isinstance(part, SystemPromptPart):
+                yield SystemChatMessageV2(role='system', content=part.content)
+            elif isinstance(part, UserPromptPart):
+                yield UserChatMessageV2(role='user', content=part.content)
+            elif isinstance(part, ToolReturnPart):
+                yield ToolChatMessageV2(
+                    role='tool',
+                    tool_call_id=_guard_tool_call_id(t=part, model_source='Cohere'),
+                    content=part.model_response_str(),
+                )
+            elif isinstance(part, RetryPromptPart):
+                if part.tool_name is None:
+                    yield UserChatMessageV2(role='user', content=part.model_response())
+                else:
+                    yield ToolChatMessageV2(
+                        role='tool',
+                        tool_call_id=_guard_tool_call_id(t=part, model_source='Cohere'),
+                        content=part.model_response(),
+                    )
+            else:
+                assert_never(part)
+def _map_tool_call(t: ToolCallPart) -> ToolCallV2:
+    return ToolCallV2(
+        id=_guard_tool_call_id(t=t, model_source='Cohere'),
+        type='function',
+        function=ToolCallV2Function(
+            name=t.tool_name,
+            arguments=t.args_as_json_str(),
+        ),
+    )
+def _map_usage(response: ChatResponse) -> result.Usage:
+    usage = response.usage
+    if usage is None:
+        return result.Usage()
+    else:
+        details: dict[str, int] = {}
+        if usage.billed_units is not None:
+            if usage.billed_units.input_tokens:
+                details['input_tokens'] = int(usage.billed_units.input_tokens)
+            if usage.billed_units.output_tokens:
+                details['output_tokens'] = int(usage.billed_units.output_tokens)
+            if usage.billed_units.search_units:
+                details['search_units'] = int(usage.billed_units.search_units)
+            if usage.billed_units.classifications:
+                details['classifications'] = int(usage.billed_units.classifications)
+        request_tokens = int(usage.tokens.input_tokens) if usage.tokens and usage.tokens.input_tokens else None
+        response_tokens = int(usage.tokens.output_tokens) if usage.tokens and usage.tokens.output_tokens else None
+        return result.Usage(
+            request_tokens=request_tokens,
+            response_tokens=response_tokens,
+            total_tokens=(request_tokens or 0) + (response_tokens or 0),
+            details=details,
+        )

pydantic-ai-slim 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

Potentially problematic release.

pydantic-ai-slim 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl