PyPI - llama-index-llms-openai - Versions diffs - 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl - Mend

llama-index-llms-openai 0.3.18py3-none-any.whl → 0.3.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

llama_index/llms/openai/base.py CHANGED Viewed

@@ -222,6 +222,14 @@ class OpenAI(FunctionCallingLLM):
         default=None,
         description="The effort to use for reasoning models.",
     )
+    modalities: Optional[List[str]] = Field(
+        default=None,
+        description="The output modalities to use for the model.",
+    )
+    audio_config: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="The audio configuration to use for the model.",
+    )
     _client: Optional[SyncOpenAI] = PrivateAttr()
     _aclient: Optional[AsyncOpenAI] = PrivateAttr()
@@ -254,6 +262,8 @@ class OpenAI(FunctionCallingLLM):
         output_parser: Optional[BaseOutputParser] = None,
         strict: bool = False,
         reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
+        modalities: Optional[List[str]] = None,
+        audio_config: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> None:
         additional_kwargs = additional_kwargs or {}
@@ -288,6 +298,8 @@ class OpenAI(FunctionCallingLLM):
             output_parser=output_parser,
             strict=strict,
             reasoning_effort=reasoning_effort,
+            modalities=modalities,
+            audio_config=audio_config,
             **kwargs,
         )
@@ -375,6 +387,11 @@ class OpenAI(FunctionCallingLLM):
     def complete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
     ) -> CompletionResponse:
+        if self.modalities and "audio" in self.modalities:
+            raise ValueError(
+                "Audio is not supported for completion. Use chat/achat instead."
+            )
         if self._use_chat_completions(kwargs):
             complete_fn = chat_to_completion_decorator(self._chat)
         else:
@@ -434,6 +451,11 @@ class OpenAI(FunctionCallingLLM):
             # O1 models support reasoning_effort of low, medium, high
             all_kwargs["reasoning_effort"] = self.reasoning_effort
+        if self.modalities is not None:
+            all_kwargs["modalities"] = self.modalities
+        if self.audio_config is not None:
+            all_kwargs["audio"] = self.audio_config
         return all_kwargs
     @llm_retry_decorator
@@ -459,7 +481,9 @@ class OpenAI(FunctionCallingLLM):
                 )
         openai_message = response.choices[0].message
-        message = from_openai_message(openai_message)
+        message = from_openai_message(
+            openai_message, modalities=self.modalities or ["text"]
+        )
         openai_token_logprobs = response.choices[0].logprobs
         logprobs = None
         if openai_token_logprobs and openai_token_logprobs.content:
@@ -476,6 +500,9 @@ class OpenAI(FunctionCallingLLM):
     def _stream_chat(
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseGen:
+        if self.modalities and "audio" in self.modalities:
+            raise ValueError("Audio is not supported for chat streaming")
         client = self._get_client()
         message_dicts = to_openai_message_dicts(
             messages,
@@ -667,6 +694,11 @@ class OpenAI(FunctionCallingLLM):
     async def acomplete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
     ) -> CompletionResponse:
+        if self.modalities and "audio" in self.modalities:
+            raise ValueError(
+                "Audio is not supported for completion. Use chat/achat instead."
+            )
         if self._use_chat_completions(kwargs):
             acomplete_fn = achat_to_completion_decorator(self._achat)
         else:
@@ -708,7 +740,9 @@ class OpenAI(FunctionCallingLLM):
                 )
         openai_message = response.choices[0].message
-        message = from_openai_message(openai_message)
+        message = from_openai_message(
+            openai_message, modalities=self.modalities or ["text"]
+        )
         openai_token_logprobs = response.choices[0].logprobs
         logprobs = None
         if openai_token_logprobs and openai_token_logprobs.content:
@@ -725,6 +759,9 @@ class OpenAI(FunctionCallingLLM):
     async def _astream_chat(
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseAsyncGen:
+        if self.modalities and "audio" in self.modalities:
+            raise ValueError("Audio is not supported for chat streaming")
         aclient = self._get_aclient()
         message_dicts = to_openai_message_dicts(
             messages,

llama_index/llms/openai/utils.py CHANGED Viewed

@@ -27,6 +27,7 @@ from llama_index.core.base.llms.types import (
     LogProb,
     MessageRole,
     TextBlock,
+    AudioBlock,
 )
 from llama_index.core.bridge.pydantic import BaseModel
@@ -68,6 +69,11 @@ GPT4_MODELS: Dict[str, int] = {
     "gpt-4-turbo-2024-04-09": 128000,
     "gpt-4-turbo": 128000,
     "gpt-4o": 128000,
+    "gpt-4o-audio-preview": 128000,
+    "gpt-4o-audio-preview-2024-12-17": 128000,
+    "gpt-4o-audio-preview-2024-10-01": 128000,
+    "gpt-4o-mini-audio-preview": 128000,
+    "gpt-4o-mini-audio-preview-2024-12-17": 128000,
     "gpt-4o-2024-05-13": 128000,
     "gpt-4o-2024-08-06": 128000,
     "gpt-4o-2024-11-20": 128000,
@@ -270,7 +276,16 @@ def to_openai_message_dict(
     """Convert a ChatMessage to an OpenAI message dict."""
     content = []
     content_txt = ""
+    reference_audio_id = None
     for block in message.blocks:
+        if message.role == MessageRole.ASSISTANT:
+            reference_audio_id = message.additional_kwargs.get(
+                "reference_audio_id", None
+            )
+            # if reference audio id is provided, we don't need to send the audio
+            if reference_audio_id:
+                continue
         if isinstance(block, TextBlock):
             content.append({"type": "text", "text": block.text})
             content_txt += block.text
@@ -291,6 +306,18 @@ def to_openai_message_dict(
                         },
                     }
                 )
+        elif isinstance(block, AudioBlock):
+            audio_bytes = block.resolve_audio(as_base64=True).read()
+            audio_str = audio_bytes.decode("utf-8")
+            content.append(
+                {
+                    "type": "input_audio",
+                    "input_audio": {
+                        "data": audio_str,
+                        "format": block.format,
+                    },
+                }
+            )
         else:
             msg = f"Unsupported content block type: {type(block).__name__}"
             raise ValueError(msg)
@@ -304,22 +331,34 @@ def to_openai_message_dict(
         else content_txt
     )
-    # NOTE: Despite what the openai docs say, if the role is ASSISTANT, SYSTEM
-    # or TOOL, 'content' cannot be a list and must be string instead.
-    # Furthermore, if all blocks are text blocks, we can use the content_txt
-    # as the content. This will avoid breaking openai-like APIs.
-    message_dict = {
-        "role": message.role.value,
-        "content": (
-            content_txt
-            if message.role.value in ("assistant", "tool", "system")
-            or all(isinstance(block, TextBlock) for block in message.blocks)
-            else content
-        ),
-    }
+    # If reference audio id is provided, we don't need to send the audio
+    # NOTE: this is only a thing for assistant messages
+    if reference_audio_id:
+        message_dict = {
+            "role": message.role.value,
+            "audio": {"id": reference_audio_id},
+        }
+    else:
+        # NOTE: Despite what the openai docs say, if the role is ASSISTANT, SYSTEM
+        # or TOOL, 'content' cannot be a list and must be string instead.
+        # Furthermore, if all blocks are text blocks, we can use the content_txt
+        # as the content. This will avoid breaking openai-like APIs.
+        message_dict = {
+            "role": message.role.value,
+            "content": (
+                content_txt
+                if message.role.value in ("assistant", "tool", "system")
+                or all(isinstance(block, TextBlock) for block in message.blocks)
+                else content
+            ),
+        }
     # TODO: O1 models do not support system prompts
-    if model is not None and model in O1_MODELS:
+    if (
+        model is not None
+        and model in O1_MODELS
+        and model not in O1_MODELS_WITHOUT_FUNCTION_CALLING
+    ):
         if message_dict["role"] == "system":
             message_dict["role"] = "developer"
@@ -353,20 +392,29 @@ def to_openai_message_dicts(
     ]
-def from_openai_message(openai_message: ChatCompletionMessage) -> ChatMessage:
+def from_openai_message(
+    openai_message: ChatCompletionMessage, modalities: List[str]
+) -> ChatMessage:
     """Convert openai message dict to generic message."""
     role = openai_message.role
     # NOTE: Azure OpenAI returns function calling messages without a content key
-    content = openai_message.content
-    # function_call = None  # deprecated in OpenAI v 1.1.0
+    if "text" in modalities and openai_message.content:
+        blocks = [TextBlock(text=openai_message.content or "")]
+    else:
+        blocks = []
     additional_kwargs: Dict[str, Any] = {}
     if openai_message.tool_calls:
         tool_calls: List[ChatCompletionMessageToolCall] = openai_message.tool_calls
         additional_kwargs.update(tool_calls=tool_calls)
-    return ChatMessage(role=role, content=content, additional_kwargs=additional_kwargs)
+    if openai_message.audio and "audio" in modalities:
+        reference_audio_id = openai_message.audio.id
+        audio_data = openai_message.audio.data
+        additional_kwargs["reference_audio_id"] = reference_audio_id
+        blocks.append(AudioBlock(audio=audio_data, format="mp3"))
+    return ChatMessage(role=role, blocks=blocks, additional_kwargs=additional_kwargs)
 def from_openai_token_logprob(
@@ -421,10 +469,10 @@ def from_openai_completion_logprobs(
 def from_openai_messages(
-    openai_messages: Sequence[ChatCompletionMessage],
+    openai_messages: Sequence[ChatCompletionMessage], modalities: List[str]
 ) -> List[ChatMessage]:
     """Convert openai message dicts to generic messages."""
-    return [from_openai_message(message) for message in openai_messages]
+    return [from_openai_message(message, modalities) for message in openai_messages]
 def from_openai_message_dict(message_dict: dict) -> ChatMessage:

{llama_index_llms_openai-0.3.18.dist-info → llama_index_llms_openai-0.3.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llama-index-llms-openai
-Version: 0.3.18
+Version: 0.3.20
 Summary: llama-index llms openai integration
 License: MIT
 Author: llama-index
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Requires-Dist: llama-index-core (>=0.12.4,<0.13.0)
+Requires-Dist: llama-index-core (>=0.12.17,<0.13.0)
 Requires-Dist: openai (>=1.58.1,<2.0.0)
 Description-Content-Type: text/markdown

llama_index_llms_openai-0.3.20.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+llama_index/llms/openai/__init__.py,sha256=vm3cIBSGkBFlE77GyfyN0EhpJcnJZN95QMhPN53EkbE,148
+llama_index/llms/openai/base.py,sha256=9HBszflpKp81gRSMaWKVa4PnG6sLfV9mWOyAkTM7MUI,38055
+llama_index/llms/openai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+llama_index/llms/openai/utils.py,sha256=rQJ5B_griANqC4oNbkhgtjWijytLfG0HLlvYh9qCjv4,20708
+llama_index_llms_openai-0.3.20.dist-info/METADATA,sha256=V4PGur14J6RiUC8S1YDDfuVTav3smCXx6dN-baidtng,3322
+llama_index_llms_openai-0.3.20.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+llama_index_llms_openai-0.3.20.dist-info/RECORD,,

llama_index_llms_openai-0.3.18.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-llama_index/llms/openai/__init__.py,sha256=vm3cIBSGkBFlE77GyfyN0EhpJcnJZN95QMhPN53EkbE,148
-llama_index/llms/openai/base.py,sha256=Bj7o-NCrUSWK3cES3anFgANMLRbmdLG8AkxC9QrVKqw,36637
-llama_index/llms/openai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-llama_index/llms/openai/utils.py,sha256=se_tHHLsNW4u2Ei_25HRPAm1lmzv-kFp2r2WqqL_jfE,18858
-llama_index_llms_openai-0.3.18.dist-info/METADATA,sha256=4qvbR9QPW-vHG6tsGLEedOUP6Sf15LzW5jESGO64jdk,3321
-llama_index_llms_openai-0.3.18.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-llama_index_llms_openai-0.3.18.dist-info/RECORD,,

{llama_index_llms_openai-0.3.18.dist-info → llama_index_llms_openai-0.3.20.dist-info}/WHEEL RENAMED Viewed

File without changes

llama-index-llms-openai 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl

llama-index-llms-openai 0.3.18py3-none-any.whl → 0.3.20py3-none-any.whl