PyPI - llama-index-llms-openai - Versions diffs - 0.3.17__tar.gz → 0.3.19__tar.gz - Mend

llama-index-llms-openai 0.3.17tar.gz → 0.3.19tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{llama_index_llms_openai-0.3.17 → llama_index_llms_openai-0.3.19}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llama-index-llms-openai
-Version: 0.3.17
+Version: 0.3.19
 Summary: llama-index llms openai integration
 License: MIT
 Author: llama-index
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Requires-Dist: llama-index-core (>=0.12.4,<0.13.0)
+Requires-Dist: llama-index-core (>=0.12.17,<0.13.0)
 Requires-Dist: openai (>=1.58.1,<2.0.0)
 Description-Content-Type: text/markdown

{llama_index_llms_openai-0.3.17 → llama_index_llms_openai-0.3.19}/llama_index/llms/openai/base.py RENAMED Viewed

@@ -218,10 +218,18 @@ class OpenAI(FunctionCallingLLM):
         default=False,
         description="Whether to use strict mode for invoking tools/using schemas.",
     )
-    reasoning_effort: Literal["low", "medium", "high"] = Field(
-        default="medium",
+    reasoning_effort: Optional[Literal["low", "medium", "high"]] = Field(
+        default=None,
         description="The effort to use for reasoning models.",
     )
+    modalities: Optional[List[str]] = Field(
+        default=None,
+        description="The output modalities to use for the model.",
+    )
+    audio_config: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="The audio configuration to use for the model.",
+    )
     _client: Optional[SyncOpenAI] = PrivateAttr()
     _aclient: Optional[AsyncOpenAI] = PrivateAttr()
@@ -253,7 +261,9 @@ class OpenAI(FunctionCallingLLM):
         pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,
         output_parser: Optional[BaseOutputParser] = None,
         strict: bool = False,
-        reasoning_effort: Literal["low", "medium", "high"] = "medium",
+        reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
+        modalities: Optional[List[str]] = None,
+        audio_config: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
         additional_kwargs = additional_kwargs or {}
@@ -288,6 +298,8 @@ class OpenAI(FunctionCallingLLM):
             output_parser=output_parser,
             strict=strict,
             reasoning_effort=reasoning_effort,
+            modalities=modalities,
+            audio_config=audio_config,
             **kwargs,
         )
@@ -375,6 +387,11 @@ class OpenAI(FunctionCallingLLM):
     def complete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
     ) -> CompletionResponse:
+        if self.modalities and "audio" in self.modalities:
+            raise ValueError(
+                "Audio is not supported for completion. Use chat/achat instead."
+            )
         if self._use_chat_completions(kwargs):
             complete_fn = chat_to_completion_decorator(self._chat)
         else:
@@ -430,10 +447,15 @@ class OpenAI(FunctionCallingLLM):
                 "max_completion_tokens", all_kwargs["max_tokens"]
             )
             all_kwargs.pop("max_tokens", None)
-        if self.model in O1_MODELS:
+        if self.model in O1_MODELS and self.reasoning_effort is not None:
             # O1 models support reasoning_effort of low, medium, high
             all_kwargs["reasoning_effort"] = self.reasoning_effort
+        if self.modalities is not None:
+            all_kwargs["modalities"] = self.modalities
+        if self.audio_config is not None:
+            all_kwargs["audio"] = self.audio_config
         return all_kwargs
     @llm_retry_decorator
@@ -459,7 +481,9 @@ class OpenAI(FunctionCallingLLM):
                 )
         openai_message = response.choices[0].message
-        message = from_openai_message(openai_message)
+        message = from_openai_message(
+            openai_message, modalities=self.modalities or ["text"]
+        )
         openai_token_logprobs = response.choices[0].logprobs
         logprobs = None
         if openai_token_logprobs and openai_token_logprobs.content:
@@ -476,6 +500,9 @@ class OpenAI(FunctionCallingLLM):
     def _stream_chat(
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseGen:
+        if self.modalities and "audio" in self.modalities:
+            raise ValueError("Audio is not supported for chat streaming")
         client = self._get_client()
         message_dicts = to_openai_message_dicts(
             messages,
@@ -667,6 +694,11 @@ class OpenAI(FunctionCallingLLM):
     async def acomplete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
     ) -> CompletionResponse:
+        if self.modalities and "audio" in self.modalities:
+            raise ValueError(
+                "Audio is not supported for completion. Use chat/achat instead."
+            )
         if self._use_chat_completions(kwargs):
             acomplete_fn = achat_to_completion_decorator(self._achat)
         else:
@@ -708,7 +740,9 @@ class OpenAI(FunctionCallingLLM):
                 )
         openai_message = response.choices[0].message
-        message = from_openai_message(openai_message)
+        message = from_openai_message(
+            openai_message, modalities=self.modalities or ["text"]
+        )
         openai_token_logprobs = response.choices[0].logprobs
         logprobs = None
         if openai_token_logprobs and openai_token_logprobs.content:
@@ -725,6 +759,9 @@ class OpenAI(FunctionCallingLLM):
     async def _astream_chat(
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseAsyncGen:
+        if self.modalities and "audio" in self.modalities:
+            raise ValueError("Audio is not supported for chat streaming")
         aclient = self._get_aclient()
         message_dicts = to_openai_message_dicts(
             messages,

{llama_index_llms_openai-0.3.17 → llama_index_llms_openai-0.3.19}/llama_index/llms/openai/utils.py RENAMED Viewed

@@ -27,6 +27,7 @@ from llama_index.core.base.llms.types import (
     LogProb,
     MessageRole,
     TextBlock,
+    AudioBlock,
 )
 from llama_index.core.bridge.pydantic import BaseModel
@@ -68,6 +69,11 @@ GPT4_MODELS: Dict[str, int] = {
     "gpt-4-turbo-2024-04-09": 128000,
     "gpt-4-turbo": 128000,
     "gpt-4o": 128000,
+    "gpt-4o-audio-preview": 128000,
+    "gpt-4o-audio-preview-2024-12-17": 128000,
+    "gpt-4o-audio-preview-2024-10-01": 128000,
+    "gpt-4o-mini-audio-preview": 128000,
+    "gpt-4o-mini-audio-preview-2024-12-17": 128000,
     "gpt-4o-2024-05-13": 128000,
     "gpt-4o-2024-08-06": 128000,
     "gpt-4o-2024-11-20": 128000,
@@ -270,7 +276,16 @@ def to_openai_message_dict(
     """Convert a ChatMessage to an OpenAI message dict."""
     content = []
     content_txt = ""
+    reference_audio_id = None
     for block in message.blocks:
+        if message.role == MessageRole.ASSISTANT:
+            reference_audio_id = message.additional_kwargs.get(
+                "reference_audio_id", None
+            )
+            # if reference audio id is provided, we don't need to send the audio
+            if reference_audio_id:
+                continue
         if isinstance(block, TextBlock):
             content.append({"type": "text", "text": block.text})
             content_txt += block.text
@@ -291,6 +306,18 @@ def to_openai_message_dict(
                         },
                     }
                 )
+        elif isinstance(block, AudioBlock):
+            audio_bytes = block.resolve_audio(as_base64=True).read()
+            audio_str = audio_bytes.decode("utf-8")
+            content.append(
+                {
+                    "type": "input_audio",
+                    "input_audio": {
+                        "data": audio_str,
+                        "format": block.format,
+                    },
+                }
+            )
         else:
             msg = f"Unsupported content block type: {type(block).__name__}"
             raise ValueError(msg)
@@ -304,19 +331,27 @@ def to_openai_message_dict(
         else content_txt
     )
-    # NOTE: Despite what the openai docs say, if the role is ASSISTANT, SYSTEM
-    # or TOOL, 'content' cannot be a list and must be string instead.
-    # Furthermore, if all blocks are text blocks, we can use the content_txt
-    # as the content. This will avoid breaking openai-like APIs.
-    message_dict = {
-        "role": message.role.value,
-        "content": (
-            content_txt
-            if message.role.value in ("assistant", "tool", "system")
-            or all(isinstance(block, TextBlock) for block in message.blocks)
-            else content
-        ),
-    }
+    # If reference audio id is provided, we don't need to send the audio
+    # NOTE: this is only a thing for assistant messages
+    if reference_audio_id:
+        message_dict = {
+            "role": message.role.value,
+            "audio": {"id": reference_audio_id},
+        }
+    else:
+        # NOTE: Despite what the openai docs say, if the role is ASSISTANT, SYSTEM
+        # or TOOL, 'content' cannot be a list and must be string instead.
+        # Furthermore, if all blocks are text blocks, we can use the content_txt
+        # as the content. This will avoid breaking openai-like APIs.
+        message_dict = {
+            "role": message.role.value,
+            "content": (
+                content_txt
+                if message.role.value in ("assistant", "tool", "system")
+                or all(isinstance(block, TextBlock) for block in message.blocks)
+                else content
+            ),
+        }
     # TODO: O1 models do not support system prompts
     if model is not None and model in O1_MODELS:
@@ -353,20 +388,29 @@ def to_openai_message_dicts(
     ]
-def from_openai_message(openai_message: ChatCompletionMessage) -> ChatMessage:
+def from_openai_message(
+    openai_message: ChatCompletionMessage, modalities: List[str]
+) -> ChatMessage:
     """Convert openai message dict to generic message."""
     role = openai_message.role
     # NOTE: Azure OpenAI returns function calling messages without a content key
-    content = openai_message.content
-    # function_call = None  # deprecated in OpenAI v 1.1.0
+    if "text" in modalities and openai_message.content:
+        blocks = [TextBlock(text=openai_message.content or "")]
+    else:
+        blocks = []
     additional_kwargs: Dict[str, Any] = {}
     if openai_message.tool_calls:
         tool_calls: List[ChatCompletionMessageToolCall] = openai_message.tool_calls
         additional_kwargs.update(tool_calls=tool_calls)
-    return ChatMessage(role=role, content=content, additional_kwargs=additional_kwargs)
+    if openai_message.audio and "audio" in modalities:
+        reference_audio_id = openai_message.audio.id
+        audio_data = openai_message.audio.data
+        additional_kwargs["reference_audio_id"] = reference_audio_id
+        blocks.append(AudioBlock(audio=audio_data, format="mp3"))
+    return ChatMessage(role=role, blocks=blocks, additional_kwargs=additional_kwargs)
 def from_openai_token_logprob(
@@ -421,10 +465,10 @@ def from_openai_completion_logprobs(
 def from_openai_messages(
-    openai_messages: Sequence[ChatCompletionMessage],
+    openai_messages: Sequence[ChatCompletionMessage], modalities: List[str]
 ) -> List[ChatMessage]:
     """Convert openai message dicts to generic messages."""
-    return [from_openai_message(message) for message in openai_messages]
+    return [from_openai_message(message, modalities) for message in openai_messages]
 def from_openai_message_dict(message_dict: dict) -> ChatMessage:

{llama_index_llms_openai-0.3.17 → llama_index_llms_openai-0.3.19}/pyproject.toml RENAMED Viewed

@@ -29,12 +29,12 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-openai"
 readme = "README.md"
-version = "0.3.17"
+version = "0.3.19"
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
 openai = "^1.58.1"
-llama-index-core = "^0.12.4"
+llama-index-core = "^0.12.17"
 [tool.poetry.group.dev.dependencies]
 ipython = "8.10.0"

{llama_index_llms_openai-0.3.17 → llama_index_llms_openai-0.3.19}/README.md RENAMED Viewed

File without changes

{llama_index_llms_openai-0.3.17 → llama_index_llms_openai-0.3.19}/llama_index/llms/openai/__init__.py RENAMED Viewed

File without changes

{llama_index_llms_openai-0.3.17 → llama_index_llms_openai-0.3.19}/llama_index/llms/openai/py.typed RENAMED Viewed

File without changes

llama-index-llms-openai 0.3.17__tar.gz → 0.3.19__tar.gz

llama-index-llms-openai 0.3.17tar.gz → 0.3.19tar.gz