PyPI - llama-index-llms-openai - Versions diffs - 0.3.28__py3-none-any.whl → 0.3.29__py3-none-any.whl - Mend

llama-index-llms-openai 0.3.28py3-none-any.whl → 0.3.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

llama_index/llms/openai/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from llama_index.llms.openai.base import AsyncOpenAI, OpenAI, SyncOpenAI, Tokenizer
+from llama_index.llms.openai.responses import OpenAIResponses
-__all__ = ["OpenAI", "Tokenizer", "SyncOpenAI", "AsyncOpenAI"]
+__all__ = ["OpenAI", "OpenAIResponses", "Tokenizer", "SyncOpenAI", "AsyncOpenAI"]

llama_index/llms/openai/responses.py ADDED Viewed

@@ -0,0 +1,952 @@
+import functools
+import httpx
+import tiktoken
+from openai import AsyncOpenAI, AzureOpenAI
+from openai import OpenAI as SyncOpenAI
+from openai.types.responses import (
+    Response,
+    ResponseStreamEvent,
+    ResponseCompletedEvent,
+    ResponseCreatedEvent,
+    ResponseFileSearchCallCompletedEvent,
+    ResponseFunctionCallArgumentsDeltaEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
+    ResponseInProgressEvent,
+    ResponseOutputItemAddedEvent,
+    ResponseTextAnnotationDeltaEvent,
+    ResponseTextDeltaEvent,
+    ResponseWebSearchCallCompletedEvent,
+    ResponseOutputItem,
+    ResponseOutputMessage,
+    ResponseFileSearchToolCall,
+    ResponseFunctionToolCall,
+    ResponseFunctionWebSearch,
+    ResponseComputerToolCall,
+    ResponseReasoningItem,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncGenerator,
+    Callable,
+    Dict,
+    Generator,
+    List,
+    Literal,
+    Optional,
+    Protocol,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+    runtime_checkable,
+)
+import llama_index.core.instrumentation as instrument
+from llama_index.core.base.llms.generic_utils import (
+    achat_to_completion_decorator,
+    astream_chat_to_completion_decorator,
+    chat_to_completion_decorator,
+    stream_chat_to_completion_decorator,
+)
+from llama_index.core.base.llms.types import (
+    ChatMessage,
+    ChatResponse,
+    ChatResponseAsyncGen,
+    ChatResponseGen,
+    CompletionResponse,
+    CompletionResponseAsyncGen,
+    CompletionResponseGen,
+    LLMMetadata,
+    MessageRole,
+    TextBlock,
+)
+from llama_index.core.bridge.pydantic import (
+    Field,
+    PrivateAttr,
+)
+from llama_index.core.constants import (
+    DEFAULT_TEMPERATURE,
+)
+from llama_index.core.llms.callbacks import (
+    llm_chat_callback,
+    llm_completion_callback,
+)
+from llama_index.core.llms.function_calling import FunctionCallingLLM
+from llama_index.core.llms.llm import ToolSelection, Model
+from llama_index.core.llms.utils import parse_partial_json
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.program.utils import FlexibleModel
+from llama_index.llms.openai.utils import (
+    O1_MODELS,
+    create_retry_decorator,
+    is_function_calling_model,
+    openai_modelname_to_contextsize,
+    resolve_openai_credentials,
+    resolve_tool_choice,
+    to_openai_message_dicts,
+)
+dispatcher = instrument.get_dispatcher(__name__)
+if TYPE_CHECKING:
+    from llama_index.core.tools.types import BaseTool
+DEFAULT_OPENAI_MODEL = "gpt-4o-mini"
+def llm_retry_decorator(f: Callable[..., Any]) -> Callable[..., Any]:
+    @functools.wraps(f)
+    def wrapper(self, *args: Any, **kwargs: Any) -> Any:
+        max_retries = getattr(self, "max_retries", 0)
+        if max_retries <= 0:
+            return f(self, *args, **kwargs)
+        retry = create_retry_decorator(
+            max_retries=max_retries,
+            random_exponential=True,
+            stop_after_delay_seconds=60,
+            min_seconds=1,
+            max_seconds=20,
+        )
+        return retry(f)(self, *args, **kwargs)
+    return wrapper
+@runtime_checkable
+class Tokenizer(Protocol):
+    """Tokenizers support an encode function that returns a list of ints."""
+    def encode(self, text: str) -> List[int]:  # fmt: skip
+        ...
+def force_single_tool_call(response: ChatResponse) -> None:
+    tool_calls = response.message.additional_kwargs.get("tool_calls", [])
+    if len(tool_calls) > 1:
+        response.message.additional_kwargs["tool_calls"] = [tool_calls[0]]
+class OpenAIResponses(FunctionCallingLLM):
+    """
+    OpenAI Responses LLM.
+    Args:
+        model: name of the OpenAI model to use.
+        temperature: a float from 0 to 1 controlling randomness in generation; higher will lead to more creative, less deterministic responses.
+        max_output_tokens: the maximum number of tokens to generate.
+        include: Additional output data to include in the model response.
+        instructions: Instructions for the model to follow.
+        track_previous_responses: Whether to track previous responses. If true, the LLM class will statefully track previous responses.
+        store: Whether to store previous responses in OpenAI's storage.
+        built_in_tools: The built-in tools to use for the model to augment responses.
+        truncation: Whether to auto-truncate the input if it exceeds the model's context window.
+        user: An optional identifier to help track the user's requests for abuse.
+        strict: Whether to enforce strict validation of the structured output.
+        additional_kwargs: Add additional parameters to OpenAI request body.
+        max_retries: How many times to retry the API call if it fails.
+        timeout: How long to wait, in seconds, for an API call before failing.
+        api_key: Your OpenAI api key
+        api_base: The base URL of the API to call
+        api_version: the version of the API to call
+        default_headers: override the default headers for API requests.
+        http_client: pass in your own httpx.Client instance.
+        async_http_client: pass in your own httpx.AsyncClient instance.
+    Examples:
+        `pip install llama-index-llms-openai`
+        ```python
+        from llama_index.llms.openai import OpenAIResponses
+        llm = OpenAIResponses(model="gpt-4o-mini", api_key="sk-...")
+        response = llm.complete("Hi, write a short story")
+        print(response.text)
+        ```
+    """
+    model: str = Field(
+        default=DEFAULT_OPENAI_MODEL, description="The OpenAI model to use."
+    )
+    temperature: float = Field(
+        default=DEFAULT_TEMPERATURE,
+        description="The temperature to use during generation.",
+        ge=0.0,
+        le=2.0,
+    )
+    top_p: float = Field(
+        default=1.0,
+        description="The top-p value to use during generation.",
+        ge=0.0,
+        le=1.0,
+    )
+    max_output_tokens: Optional[int] = Field(
+        description="The maximum number of tokens to generate.",
+        gt=0,
+    )
+    include: Optional[List[str]] = Field(
+        default=None,
+        description="Additional output data to include in the model response.",
+    )
+    instructions: Optional[str] = Field(
+        default=None,
+        description="Instructions for the model to follow.",
+    )
+    track_previous_responses: bool = Field(
+        default=False,
+        description="Whether to track previous responses. If true, the LLM class will statefully track previous responses.",
+    )
+    store: bool = Field(
+        default=False,
+        description="Whether to store previous responses in OpenAI's storage.",
+    )
+    built_in_tools: Optional[List[dict]] = Field(
+        default=None,
+        description="The built-in tools to use for the model to augment responses.",
+    )
+    truncation: str = Field(
+        default="disabled",
+        description="Whether to auto-truncate the input if it exceeds the model's context window.",
+    )
+    user: Optional[str] = Field(
+        default=None,
+        description="An optional identifier to help track the user's requests for abuse.",
+    )
+    call_metadata: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Metadata to include in the API call.",
+    )
+    additional_kwargs: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Additional kwargs for the OpenAI API at inference time.",
+    )
+    max_retries: int = Field(
+        default=3,
+        description="The maximum number of API retries.",
+        ge=0,
+    )
+    timeout: float = Field(
+        default=60.0,
+        description="The timeout, in seconds, for API requests.",
+        ge=0,
+    )
+    strict: bool = Field(
+        default=False,
+        description="Whether to enforce strict validation of the structured output.",
+    )
+    default_headers: Optional[Dict[str, str]] = Field(
+        default=None, description="The default headers for API requests."
+    )
+    api_key: str = Field(default=None, description="The OpenAI API key.")
+    api_base: str = Field(description="The base URL for OpenAI API.")
+    api_version: str = Field(description="The API version for OpenAI API.")
+    reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
+        default=None,
+        description="The effort to use for reasoning models.",
+    )
+    _client: SyncOpenAI = PrivateAttr()
+    _aclient: AsyncOpenAI = PrivateAttr()
+    _http_client: Optional[httpx.Client] = PrivateAttr()
+    _async_http_client: Optional[httpx.AsyncClient] = PrivateAttr()
+    _previous_response_id: Optional[str] = PrivateAttr()
+    def __init__(
+        self,
+        model: str = DEFAULT_OPENAI_MODEL,
+        temperature: float = DEFAULT_TEMPERATURE,
+        max_output_tokens: Optional[int] = None,
+        reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
+        include: Optional[List[str]] = None,
+        instructions: Optional[str] = None,
+        track_previous_responses: bool = False,
+        store: bool = False,
+        built_in_tools: Optional[List[dict]] = None,
+        truncation: str = "disabled",
+        user: Optional[str] = None,
+        previous_response_id: Optional[str] = None,
+        call_metadata: Optional[Dict[str, Any]] = None,
+        strict: bool = False,
+        additional_kwargs: Optional[Dict[str, Any]] = None,
+        max_retries: int = 3,
+        timeout: float = 60.0,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        api_version: Optional[str] = None,
+        default_headers: Optional[Dict[str, str]] = None,
+        http_client: Optional[httpx.Client] = None,
+        async_http_client: Optional[httpx.AsyncClient] = None,
+        openai_client: Optional[SyncOpenAI] = None,
+        async_openai_client: Optional[AsyncOpenAI] = None,
+        **kwargs: Any,
+    ) -> None:
+        additional_kwargs = additional_kwargs or {}
+        api_key, api_base, api_version = resolve_openai_credentials(
+            api_key=api_key,
+            api_base=api_base,
+            api_version=api_version,
+        )
+        # TODO: Temp forced to 1.0 for o1
+        if model in O1_MODELS:
+            temperature = 1.0
+        super().__init__(
+            model=model,
+            temperature=temperature,
+            max_output_tokens=max_output_tokens,
+            reasoning_effort=reasoning_effort,
+            include=include,
+            instructions=instructions,
+            track_previous_responses=track_previous_responses,
+            store=store,
+            built_in_tools=built_in_tools,
+            truncation=truncation,
+            user=user,
+            additional_kwargs=additional_kwargs,
+            max_retries=max_retries,
+            api_key=api_key,
+            api_version=api_version,
+            api_base=api_base,
+            timeout=timeout,
+            default_headers=default_headers,
+            call_metadata=call_metadata,
+            strict=strict,
+            **kwargs,
+        )
+        self._previous_response_id = previous_response_id
+        # store is set to true if track_previous_responses is true
+        if self.track_previous_responses:
+            self.store = True
+        self._http_client = http_client
+        self._async_http_client = async_http_client
+        self._client = openai_client or SyncOpenAI(**self._get_credential_kwargs())
+        self._aclient = async_openai_client or AsyncOpenAI(
+            **self._get_credential_kwargs(is_async=True)
+        )
+    @classmethod
+    def class_name(cls) -> str:
+        return "openai_responses_llm"
+    @property
+    def metadata(self) -> LLMMetadata:
+        return LLMMetadata(
+            context_window=openai_modelname_to_contextsize(self._get_model_name()),
+            num_output=self.max_output_tokens or -1,
+            is_chat_model=True,
+            is_function_calling_model=is_function_calling_model(
+                model=self._get_model_name()
+            ),
+            model_name=self.model,
+        )
+    @property
+    def _tokenizer(self) -> Optional[Tokenizer]:
+        """
+        Get a tokenizer for this model, or None if a tokenizing method is unknown.
+        OpenAI can do this using the tiktoken package, subclasses may not have
+        this convenience.
+        """
+        return tiktoken.encoding_for_model(self._get_model_name())
+    def _get_model_name(self) -> str:
+        model_name = self.model
+        if "ft-" in model_name:  # legacy fine-tuning
+            model_name = model_name.split(":")[0]
+        elif model_name.startswith("ft:"):
+            model_name = model_name.split(":")[1]
+        return model_name
+    def _is_azure_client(self) -> bool:
+        return isinstance(self._get_client(), AzureOpenAI)
+    def _get_credential_kwargs(self, is_async: bool = False) -> Dict[str, Any]:
+        return {
+            "api_key": self.api_key,
+            "base_url": self.api_base,
+            "max_retries": self.max_retries,
+            "timeout": self.timeout,
+            "default_headers": self.default_headers,
+            "http_client": self._async_http_client if is_async else self._http_client,
+        }
+    def _get_model_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
+        model_kwargs = {
+            "model": self.model,
+            "include": self.include,
+            "instructions": self.instructions,
+            "max_output_tokens": self.max_output_tokens,
+            "metadata": self.call_metadata,
+            "previous_response_id": self._previous_response_id,
+            "store": self.store,
+            "temperature": self.temperature,
+            "tools": self.built_in_tools,
+            "top_p": self.top_p,
+            "truncation": self.truncation,
+            "user": self.user,
+        }
+        if self.model in O1_MODELS and self.reasoning_effort is not None:
+            # O1 models support reasoning_effort of low, medium, high
+            model_kwargs["reasoning_effort"] = {"effort": self.reasoning_effort}
+        # add tools or extend openai tools
+        if "tools" in kwargs:
+            if isinstance(model_kwargs["tools"], list):
+                model_kwargs["tools"].extend(kwargs.pop("tools"))
+            else:
+                model_kwargs["tools"] = kwargs.pop("tools")
+        # priority is class args > additional_kwargs > runtime args
+        model_kwargs.update(self.additional_kwargs)
+        kwargs = kwargs or {}
+        model_kwargs.update(kwargs)
+        return model_kwargs
+    @llm_chat_callback()
+    def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        return self._chat(messages, **kwargs)
+    @llm_chat_callback()
+    def stream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseGen:
+        return self._stream_chat(messages, **kwargs)
+    @llm_completion_callback()
+    def complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        complete_fn = chat_to_completion_decorator(self._chat)
+        return complete_fn(prompt, **kwargs)
+    @llm_completion_callback()
+    def stream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseGen:
+        stream_complete_fn = stream_chat_to_completion_decorator(self._stream_chat)
+        return stream_complete_fn(prompt, **kwargs)
+    def _parse_response_output(self, output: List[ResponseOutputItem]) -> ChatResponse:
+        message = ChatMessage(role=MessageRole.ASSISTANT, blocks=[])
+        additional_kwargs = {"built_in_tool_calls": []}
+        tool_calls = []
+        for item in output:
+            if isinstance(item, ResponseOutputMessage):
+                blocks = []
+                for part in item.content:
+                    if hasattr(part, "text"):
+                        blocks.append(TextBlock(text=part.text))
+                    if hasattr(part, "annotations"):
+                        additional_kwargs["annotations"] = part.annotations
+                    if hasattr(part, "refusal"):
+                        additional_kwargs["refusal"] = part.refusal
+                message.blocks.extend(blocks)
+            elif isinstance(item, ResponseFileSearchToolCall):
+                additional_kwargs["built_in_tool_calls"].append(item)
+            elif isinstance(item, ResponseFunctionToolCall):
+                tool_calls.append(item)
+            elif isinstance(item, ResponseFunctionWebSearch):
+                additional_kwargs["built_in_tool_calls"].append(item)
+            elif isinstance(item, ResponseComputerToolCall):
+                additional_kwargs["built_in_tool_calls"].append(item)
+            elif isinstance(item, ResponseReasoningItem):
+                additional_kwargs["reasoning"] = item
+        if tool_calls and message:
+            message.additional_kwargs["tool_calls"] = tool_calls
+        return ChatResponse(message=message, additional_kwargs=additional_kwargs)
+    @llm_retry_decorator
+    def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
+        message_dicts = to_openai_message_dicts(
+            messages,
+            model=self.model,
+            is_responses_api=True,
+        )
+        response: Response = self._client.responses.create(
+            input=message_dicts,
+            stream=False,
+            **self._get_model_kwargs(**kwargs),
+        )
+        if self.track_previous_responses:
+            self._previous_response_id = response.id
+        chat_response = self._parse_response_output(response.output)
+        chat_response.raw = response
+        chat_response.additional_kwargs["usage"] = response.usage
+        return chat_response
+    @staticmethod
+    def process_response_event(
+        event: ResponseStreamEvent,
+        content: str,
+        tool_calls: List[ResponseFunctionToolCall],
+        built_in_tool_calls: List[Any],
+        additional_kwargs: Dict[str, Any],
+        current_tool_call: Optional[ResponseFunctionToolCall],
+        track_previous_responses: bool,
+        previous_response_id: Optional[str] = None,
+    ) -> Tuple[
+        str,
+        List[ResponseFunctionToolCall],
+        List[Any],
+        Dict[str, Any],
+        Optional[ResponseFunctionToolCall],
+        Optional[str],
+        str,
+    ]:
+        """
+        Process a ResponseStreamEvent and update the state accordingly.
+        Args:
+            event: The response stream event to process
+            content: Current accumulated content string
+            tool_calls: List of completed tool calls
+            built_in_tool_calls: List of built-in tool calls
+            additional_kwargs: Additional keyword arguments to include in ChatResponse
+            current_tool_call: The currently in-progress tool call, if any
+            track_previous_responses: Whether to track previous response IDs
+            previous_response_id: Previous response ID if tracking
+        Returns:
+            A tuple containing the updated state:
+            (content, tool_calls, built_in_tool_calls, additional_kwargs, current_tool_call, updated_previous_response_id, delta)
+        """
+        delta = ""
+        updated_previous_response_id = previous_response_id
+        if isinstance(event, ResponseCreatedEvent) or isinstance(
+            event, ResponseInProgressEvent
+        ):
+            # Initial events, track the response id
+            if track_previous_responses:
+                updated_previous_response_id = event.response.id
+        elif isinstance(event, ResponseOutputItemAddedEvent):
+            # New output item (message, tool call, etc.)
+            if isinstance(event.item, ResponseFunctionToolCall):
+                current_tool_call = event.item
+        elif isinstance(event, ResponseTextDeltaEvent):
+            # Text content is being added
+            delta = event.delta
+            content += delta
+        elif isinstance(event, ResponseFunctionCallArgumentsDeltaEvent):
+            # Function call arguments are being streamed
+            if current_tool_call is not None:
+                current_tool_call.arguments += event.delta
+        elif isinstance(event, ResponseFunctionCallArgumentsDoneEvent):
+            # Function call arguments are complete
+            if current_tool_call is not None:
+                current_tool_call.arguments = event.arguments
+                current_tool_call.status = "completed"
+                # append a copy of the tool call to the list
+                tool_calls.append(
+                    ResponseFunctionToolCall(**current_tool_call.model_dump())
+                )
+                # clear the current tool call
+                current_tool_call = None
+        elif isinstance(event, ResponseTextAnnotationDeltaEvent):
+            # Annotations for the text
+            annotations = additional_kwargs.get("annotations", [])
+            annotations.append(event.annotation)
+            additional_kwargs["annotations"] = annotations
+        elif isinstance(event, ResponseFileSearchCallCompletedEvent):
+            # File search tool call completed
+            built_in_tool_calls.append(event)
+        elif isinstance(event, ResponseWebSearchCallCompletedEvent):
+            # Web search tool call completed
+            built_in_tool_calls.append(event)
+        elif isinstance(event, ResponseReasoningItem):
+            # Reasoning information
+            additional_kwargs["reasoning"] = event
+        elif isinstance(event, ResponseCompletedEvent):
+            # Response is complete
+            if hasattr(event, "response") and hasattr(event.response, "usage"):
+                additional_kwargs["usage"] = event.response.usage
+        return (
+            content,
+            tool_calls,
+            built_in_tool_calls,
+            additional_kwargs,
+            current_tool_call,
+            updated_previous_response_id,
+            delta,
+        )
+    @llm_retry_decorator
+    def _stream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseGen:
+        message_dicts = to_openai_message_dicts(
+            messages,
+            model=self.model,
+            is_responses_api=True,
+        )
+        def gen() -> ChatResponseGen:
+            content = ""
+            tool_calls = []
+            built_in_tool_calls = []
+            additional_kwargs = {"built_in_tool_calls": []}
+            current_tool_call: Optional[ResponseFunctionToolCall] = None
+            local_previous_response_id = self._previous_response_id
+            for event in self._client.responses.create(
+                input=message_dicts,
+                stream=True,
+                **self._get_model_kwargs(**kwargs),
+            ):
+                # Process the event and update state
+                (
+                    content,
+                    tool_calls,
+                    built_in_tool_calls,
+                    additional_kwargs,
+                    current_tool_call,
+                    local_previous_response_id,
+                    delta,
+                ) = OpenAIResponses.process_response_event(
+                    event=event,
+                    content=content,
+                    tool_calls=tool_calls,
+                    built_in_tool_calls=built_in_tool_calls,
+                    additional_kwargs=additional_kwargs,
+                    current_tool_call=current_tool_call,
+                    track_previous_responses=self.track_previous_responses,
+                    previous_response_id=local_previous_response_id,
+                )
+                if (
+                    self.track_previous_responses
+                    and local_previous_response_id != self._previous_response_id
+                ):
+                    self._previous_response_id = local_previous_response_id
+                if built_in_tool_calls:
+                    additional_kwargs["built_in_tool_calls"] = built_in_tool_calls
+                # For any event, yield a ChatResponse with the current state
+                yield ChatResponse(
+                    message=ChatMessage(
+                        role=MessageRole.ASSISTANT,
+                        content=content,
+                        additional_kwargs={"tool_calls": tool_calls}
+                        if tool_calls
+                        else {},
+                    ),
+                    delta=delta,
+                    raw=event,
+                    additional_kwargs=additional_kwargs,
+                )
+        return gen()
+    # ===== Async Endpoints =====
+    @llm_chat_callback()
+    async def achat(
+        self,
+        messages: Sequence[ChatMessage],
+        **kwargs: Any,
+    ) -> ChatResponse:
+        return await self._achat(messages, **kwargs)
+    @llm_chat_callback()
+    async def astream_chat(
+        self,
+        messages: Sequence[ChatMessage],
+        **kwargs: Any,
+    ) -> ChatResponseAsyncGen:
+        return await self._astream_chat(messages, **kwargs)
+    @llm_completion_callback()
+    async def acomplete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponse:
+        acomplete_fn = achat_to_completion_decorator(self._achat)
+        return await acomplete_fn(prompt, **kwargs)
+    @llm_completion_callback()
+    async def astream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseAsyncGen:
+        astream_complete_fn = astream_chat_to_completion_decorator(self._astream_chat)
+        return await astream_complete_fn(prompt, **kwargs)
+    @llm_retry_decorator
+    async def _achat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponse:
+        message_dicts = to_openai_message_dicts(
+            messages,
+            model=self.model,
+            is_responses_api=True,
+        )
+        response: Response = await self._aclient.responses.create(
+            input=message_dicts,
+            stream=False,
+            **self._get_model_kwargs(**kwargs),
+        )
+        if self.track_previous_responses:
+            self._previous_response_id = response.id
+        chat_response = self._parse_response_output(response.output)
+        chat_response.raw = response
+        chat_response.additional_kwargs["usage"] = response.usage
+        return chat_response
+    @llm_retry_decorator
+    async def _astream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseAsyncGen:
+        message_dicts = to_openai_message_dicts(
+            messages,
+            model=self.model,
+            is_responses_api=True,
+        )
+        async def gen() -> ChatResponseAsyncGen:
+            content = ""
+            tool_calls = []
+            built_in_tool_calls = []
+            additional_kwargs = {"built_in_tool_calls": []}
+            current_tool_call: Optional[ResponseFunctionToolCall] = None
+            local_previous_response_id = self._previous_response_id
+            response_stream = await self._aclient.responses.create(
+                input=message_dicts,
+                stream=True,
+                **self._get_model_kwargs(**kwargs),
+            )
+            async for event in response_stream:
+                # Process the event and update state
+                (
+                    content,
+                    tool_calls,
+                    built_in_tool_calls,
+                    additional_kwargs,
+                    current_tool_call,
+                    local_previous_response_id,
+                    delta,
+                ) = OpenAIResponses.process_response_event(
+                    event=event,
+                    content=content,
+                    tool_calls=tool_calls,
+                    built_in_tool_calls=built_in_tool_calls,
+                    additional_kwargs=additional_kwargs,
+                    current_tool_call=current_tool_call,
+                    track_previous_responses=self.track_previous_responses,
+                    previous_response_id=local_previous_response_id,
+                )
+                if (
+                    self.track_previous_responses
+                    and local_previous_response_id != self._previous_response_id
+                ):
+                    self._previous_response_id = local_previous_response_id
+                if built_in_tool_calls:
+                    additional_kwargs["built_in_tool_calls"] = built_in_tool_calls
+                # For any event, yield a ChatResponse with the current state
+                yield ChatResponse(
+                    message=ChatMessage(
+                        role=MessageRole.ASSISTANT,
+                        content=content,
+                        additional_kwargs={"tool_calls": tool_calls}
+                        if tool_calls
+                        else {},
+                    ),
+                    delta=delta,
+                    raw=event,
+                    additional_kwargs=additional_kwargs,
+                )
+        return gen()
+    def _prepare_chat_with_tools(
+        self,
+        tools: Sequence["BaseTool"],
+        user_msg: Optional[Union[str, ChatMessage]] = None,
+        chat_history: Optional[List[ChatMessage]] = None,
+        allow_parallel_tool_calls: bool = True,
+        tool_choice: Union[str, dict] = "auto",
+        verbose: bool = False,
+        strict: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Predict and call the tool."""
+        # openai responses api has a slightly different tool spec format
+        tool_specs = [
+            {"type": "function", **tool.metadata.to_openai_tool()["function"]}
+            for tool in tools
+        ]
+        if strict is not None:
+            strict = strict
+        else:
+            strict = self.strict
+        if strict:
+            for tool_spec in tool_specs:
+                tool_spec["strict"] = True
+                tool_spec["parameters"]["additionalProperties"] = False
+        if isinstance(user_msg, str):
+            user_msg = ChatMessage(role=MessageRole.USER, content=user_msg)
+        messages = chat_history or []
+        if user_msg:
+            messages.append(user_msg)
+        return {
+            "messages": messages,
+            "tools": tool_specs or None,
+            "tool_choice": resolve_tool_choice(tool_choice) if tool_specs else None,
+            "parallel_tool_calls": allow_parallel_tool_calls,
+            **kwargs,
+        }
+    def get_tool_calls_from_response(
+        self,
+        response: "ChatResponse",
+        error_on_no_tool_call: bool = True,
+        **kwargs: Any,
+    ) -> List[ToolSelection]:
+        """Predict and call the tool."""
+        tool_calls: List[
+            ResponseFunctionToolCall
+        ] = response.message.additional_kwargs.get("tool_calls", [])
+        if len(tool_calls) < 1:
+            if error_on_no_tool_call:
+                raise ValueError(
+                    f"Expected at least one tool call, but got {len(tool_calls)} tool calls."
+                )
+            else:
+                return []
+        tool_selections = []
+        for tool_call in tool_calls:
+            # this should handle both complete and partial jsons
+            try:
+                argument_dict = parse_partial_json(tool_call.arguments)
+            except ValueError:
+                argument_dict = {}
+            tool_selections.append(
+                ToolSelection(
+                    tool_id=tool_call.call_id,
+                    tool_name=tool_call.name,
+                    tool_kwargs=argument_dict,
+                )
+            )
+        return tool_selections
+    @dispatcher.span
+    def structured_predict(
+        self,
+        output_cls: Type[Model],
+        prompt: PromptTemplate,
+        llm_kwargs: Optional[Dict[str, Any]] = None,
+        **prompt_args: Any,
+    ) -> Model:
+        """Structured predict."""
+        llm_kwargs = llm_kwargs or {}
+        llm_kwargs["tool_choice"] = (
+            "required" if "tool_choice" not in llm_kwargs else llm_kwargs["tool_choice"]
+        )
+        # by default structured prediction uses function calling to extract structured outputs
+        # here we force tool_choice to be required
+        return super().structured_predict(
+            output_cls, prompt, llm_kwargs=llm_kwargs, **prompt_args
+        )
+    @dispatcher.span
+    async def astructured_predict(
+        self,
+        output_cls: Type[Model],
+        prompt: PromptTemplate,
+        llm_kwargs: Optional[Dict[str, Any]] = None,
+        **prompt_args: Any,
+    ) -> Model:
+        """Structured predict."""
+        llm_kwargs = llm_kwargs or {}
+        llm_kwargs["tool_choice"] = (
+            "required" if "tool_choice" not in llm_kwargs else llm_kwargs["tool_choice"]
+        )
+        # by default structured prediction uses function calling to extract structured outputs
+        # here we force tool_choice to be required
+        return await super().astructured_predict(
+            output_cls, prompt, llm_kwargs=llm_kwargs, **prompt_args
+        )
+    @dispatcher.span
+    def stream_structured_predict(
+        self,
+        output_cls: Type[Model],
+        prompt: PromptTemplate,
+        llm_kwargs: Optional[Dict[str, Any]] = None,
+        **prompt_args: Any,
+    ) -> Generator[Union[Model, FlexibleModel], None, None]:
+        """Stream structured predict."""
+        llm_kwargs = llm_kwargs or {}
+        llm_kwargs["tool_choice"] = (
+            "required" if "tool_choice" not in llm_kwargs else llm_kwargs["tool_choice"]
+        )
+        # by default structured prediction uses function calling to extract structured outputs
+        # here we force tool_choice to be required
+        return super().stream_structured_predict(
+            output_cls, prompt, llm_kwargs=llm_kwargs, **prompt_args
+        )
+    @dispatcher.span
+    async def astream_structured_predict(
+        self,
+        output_cls: Type[Model],
+        prompt: PromptTemplate,
+        llm_kwargs: Optional[Dict[str, Any]] = None,
+        **prompt_args: Any,
+    ) -> AsyncGenerator[Union[Model, FlexibleModel], None]:
+        """Stream structured predict."""
+        llm_kwargs = llm_kwargs or {}
+        llm_kwargs["tool_choice"] = (
+            "required" if "tool_choice" not in llm_kwargs else llm_kwargs["tool_choice"]
+        )
+        # by default structured prediction uses function calling to extract structured outputs
+        # here we force tool_choice to be required
+        return await super().astream_structured_predict(
+            output_cls, prompt, llm_kwargs=llm_kwargs, **prompt_args
+        )

llama_index/llms/openai/utils.py CHANGED Viewed

@@ -391,20 +391,142 @@ def to_openai_message_dict(
     return message_dict  # type: ignore
+def to_openai_responses_message_dict(
+    message: ChatMessage,
+    drop_none: bool = False,
+    model: Optional[str] = None,
+) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
+    """Convert a ChatMessage to an OpenAI message dict."""
+    content = []
+    content_txt = ""
+    for block in message.blocks:
+        if isinstance(block, TextBlock):
+            content.append({"type": "input_text", "text": block.text})
+            content_txt += block.text
+        elif isinstance(block, ImageBlock):
+            if block.url:
+                content.append(
+                    {
+                        "type": "input_image",
+                        "image_url": str(block.url),
+                        "detail": block.detail or "auto",
+                    }
+                )
+            else:
+                img_bytes = block.resolve_image(as_base64=True).read()
+                img_str = img_bytes.decode("utf-8")
+                content.append(
+                    {
+                        "type": "input_image",
+                        "image_url": f"data:{block.image_mimetype};base64,{img_str}",
+                        "detail": block.detail or "auto",
+                    }
+                )
+        else:
+            msg = f"Unsupported content block type: {type(block).__name__}"
+            raise ValueError(msg)
+    # NOTE: Sending a null value (None) for Tool Message to OpenAI will cause error
+    # It's only Allowed to send None if it's an Assistant Message and either a function call or tool calls were performed
+    # Reference: https://platform.openai.com/docs/api-reference/chat/create
+    content_txt = (
+        None
+        if content_txt == ""
+        and message.role == MessageRole.ASSISTANT
+        and (
+            "function_call" in message.additional_kwargs
+            or "tool_calls" in message.additional_kwargs
+        )
+        else content_txt
+    )
+    # NOTE: Despite what the openai docs say, if the role is ASSISTANT, SYSTEM
+    # or TOOL, 'content' cannot be a list and must be string instead.
+    # Furthermore, if all blocks are text blocks, we can use the content_txt
+    # as the content. This will avoid breaking openai-like APIs.
+    if message.role.value == "tool":
+        call_id = message.additional_kwargs.get(
+            "tool_call_id", message.additional_kwargs.get("call_id")
+        )
+        if call_id is None:
+            raise ValueError(
+                "tool_call_id or call_id is required in additional_kwargs for tool messages"
+            )
+        message_dict = {
+            "type": "function_call_output",
+            "output": content_txt,
+            "call_id": call_id,
+        }
+        return message_dict
+    elif "tool_calls" in message.additional_kwargs:
+        message_dicts = [
+            tool_call if isinstance(tool_call, dict) else tool_call.model_dump()
+            for tool_call in message.additional_kwargs["tool_calls"]
+        ]
+        return message_dicts
+    else:
+        message_dict = {
+            "role": message.role.value,
+            "content": (
+                content_txt
+                if message.role.value in ("assistant", "system", "developer")
+                or all(isinstance(block, TextBlock) for block in message.blocks)
+                else content
+            ),
+        }
+    # TODO: O1 models do not support system prompts
+    if (
+        model is not None
+        and model in O1_MODELS
+        and model not in O1_MODELS_WITHOUT_FUNCTION_CALLING
+    ):
+        if message_dict["role"] == "system":
+            message_dict["role"] = "developer"
+    null_keys = [key for key, value in message_dict.items() if value is None]
+    # if drop_none is True, remove keys with None values
+    if drop_none:
+        for key in null_keys:
+            message_dict.pop(key)
+    return message_dict  # type: ignore
 def to_openai_message_dicts(
     messages: Sequence[ChatMessage],
     drop_none: bool = False,
     model: Optional[str] = None,
+    is_responses_api: bool = False,
 ) -> List[ChatCompletionMessageParam]:
     """Convert generic messages to OpenAI message dicts."""
-    return [
-        to_openai_message_dict(
-            message,
-            drop_none=drop_none,
-            model=model,
-        )
-        for message in messages
-    ]
+    if is_responses_api:
+        final_message_dicts = []
+        for message in messages:
+            message_dicts = to_openai_responses_message_dict(
+                message,
+                drop_none=drop_none,
+                model=model,
+            )
+            if isinstance(message_dicts, list):
+                final_message_dicts.extend(message_dicts)
+            else:
+                final_message_dicts.append(message_dicts)
+        return final_message_dicts
+    else:
+        return [
+            to_openai_message_dict(
+                message,
+                drop_none=drop_none,
+                model=model,
+            )
+            for message in messages
+        ]
 def from_openai_message(

{llama_index_llms_openai-0.3.28.dist-info → llama_index_llms_openai-0.3.29.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llama-index-llms-openai
-Version: 0.3.28
+Version: 0.3.29
 Summary: llama-index llms openai integration
 License: MIT
 Author: llama-index
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Requires-Dist: llama-index-core (>=0.12.17,<0.13.0)
-Requires-Dist: openai (>=1.58.1,<2.0.0)
+Requires-Dist: openai (>=1.66.3,<2.0.0)
 Description-Content-Type: text/markdown
 # LlamaIndex Llms Integration: Openai

llama_index_llms_openai-0.3.29.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+llama_index/llms/openai/__init__.py,sha256=8nmgixeXifQ4eVSgtCic54WxXqrrpXQPL4rhACWCSFs,229
+llama_index/llms/openai/base.py,sha256=RjkISrh-RvbrQWOfdNdH4nimDQN0byUFm_n6r703jdM,38609
+llama_index/llms/openai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+llama_index/llms/openai/responses.py,sha256=TikqnpW-UQmgjmMYGznsBx7eEo5prNIaxE81RO1ZZjE,34465
+llama_index/llms/openai/utils.py,sha256=qp9qpXY7HbUnUsVDx6TgK98feibzTRi-bLdq_F3S0fo,26017
+llama_index_llms_openai-0.3.29.dist-info/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
+llama_index_llms_openai-0.3.29.dist-info/METADATA,sha256=g0FtAtfto485JxdoqCDwulLfoKYoPcglr1ETMZ_lFjg,3322
+llama_index_llms_openai-0.3.29.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+llama_index_llms_openai-0.3.29.dist-info/RECORD,,

llama_index_llms_openai-0.3.28.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-llama_index/llms/openai/__init__.py,sha256=vm3cIBSGkBFlE77GyfyN0EhpJcnJZN95QMhPN53EkbE,148
-llama_index/llms/openai/base.py,sha256=RjkISrh-RvbrQWOfdNdH4nimDQN0byUFm_n6r703jdM,38609
-llama_index/llms/openai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-llama_index/llms/openai/utils.py,sha256=n7GEv864j34idRUR2ouu0McSBqBTVX2Tko9vf1YOl-k,21624
-llama_index_llms_openai-0.3.28.dist-info/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
-llama_index_llms_openai-0.3.28.dist-info/METADATA,sha256=Bmx7FvGOcpdMpwP5gx-Wx2Dlbkp_42N7QC-zVodKXuw,3322
-llama_index_llms_openai-0.3.28.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-llama_index_llms_openai-0.3.28.dist-info/RECORD,,

{llama_index_llms_openai-0.3.28.dist-info → llama_index_llms_openai-0.3.29.dist-info}/LICENSE RENAMED Viewed

File without changes

{llama_index_llms_openai-0.3.28.dist-info → llama_index_llms_openai-0.3.29.dist-info}/WHEEL RENAMED Viewed

File without changes

llama-index-llms-openai 0.3.28__py3-none-any.whl → 0.3.29__py3-none-any.whl

llama-index-llms-openai 0.3.28py3-none-any.whl → 0.3.29py3-none-any.whl