llama-index-llms-openai 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -222,6 +222,14 @@ class OpenAI(FunctionCallingLLM):
222
222
  default=None,
223
223
  description="The effort to use for reasoning models.",
224
224
  )
225
+ modalities: Optional[List[str]] = Field(
226
+ default=None,
227
+ description="The output modalities to use for the model.",
228
+ )
229
+ audio_config: Optional[Dict[str, Any]] = Field(
230
+ default=None,
231
+ description="The audio configuration to use for the model.",
232
+ )
225
233
 
226
234
  _client: Optional[SyncOpenAI] = PrivateAttr()
227
235
  _aclient: Optional[AsyncOpenAI] = PrivateAttr()
@@ -254,6 +262,8 @@ class OpenAI(FunctionCallingLLM):
254
262
  output_parser: Optional[BaseOutputParser] = None,
255
263
  strict: bool = False,
256
264
  reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
265
+ modalities: Optional[List[str]] = None,
266
+ audio_config: Optional[Dict[str, Any]] = None,
257
267
  **kwargs: Any,
258
268
  ) -> None:
259
269
  additional_kwargs = additional_kwargs or {}
@@ -288,6 +298,8 @@ class OpenAI(FunctionCallingLLM):
288
298
  output_parser=output_parser,
289
299
  strict=strict,
290
300
  reasoning_effort=reasoning_effort,
301
+ modalities=modalities,
302
+ audio_config=audio_config,
291
303
  **kwargs,
292
304
  )
293
305
 
@@ -375,6 +387,11 @@ class OpenAI(FunctionCallingLLM):
375
387
  def complete(
376
388
  self, prompt: str, formatted: bool = False, **kwargs: Any
377
389
  ) -> CompletionResponse:
390
+ if self.modalities and "audio" in self.modalities:
391
+ raise ValueError(
392
+ "Audio is not supported for completion. Use chat/achat instead."
393
+ )
394
+
378
395
  if self._use_chat_completions(kwargs):
379
396
  complete_fn = chat_to_completion_decorator(self._chat)
380
397
  else:
@@ -434,6 +451,11 @@ class OpenAI(FunctionCallingLLM):
434
451
  # O1 models support reasoning_effort of low, medium, high
435
452
  all_kwargs["reasoning_effort"] = self.reasoning_effort
436
453
 
454
+ if self.modalities is not None:
455
+ all_kwargs["modalities"] = self.modalities
456
+ if self.audio_config is not None:
457
+ all_kwargs["audio"] = self.audio_config
458
+
437
459
  return all_kwargs
438
460
 
439
461
  @llm_retry_decorator
@@ -459,7 +481,9 @@ class OpenAI(FunctionCallingLLM):
459
481
  )
460
482
 
461
483
  openai_message = response.choices[0].message
462
- message = from_openai_message(openai_message)
484
+ message = from_openai_message(
485
+ openai_message, modalities=self.modalities or ["text"]
486
+ )
463
487
  openai_token_logprobs = response.choices[0].logprobs
464
488
  logprobs = None
465
489
  if openai_token_logprobs and openai_token_logprobs.content:
@@ -476,6 +500,9 @@ class OpenAI(FunctionCallingLLM):
476
500
  def _stream_chat(
477
501
  self, messages: Sequence[ChatMessage], **kwargs: Any
478
502
  ) -> ChatResponseGen:
503
+ if self.modalities and "audio" in self.modalities:
504
+ raise ValueError("Audio is not supported for chat streaming")
505
+
479
506
  client = self._get_client()
480
507
  message_dicts = to_openai_message_dicts(
481
508
  messages,
@@ -667,6 +694,11 @@ class OpenAI(FunctionCallingLLM):
667
694
  async def acomplete(
668
695
  self, prompt: str, formatted: bool = False, **kwargs: Any
669
696
  ) -> CompletionResponse:
697
+ if self.modalities and "audio" in self.modalities:
698
+ raise ValueError(
699
+ "Audio is not supported for completion. Use chat/achat instead."
700
+ )
701
+
670
702
  if self._use_chat_completions(kwargs):
671
703
  acomplete_fn = achat_to_completion_decorator(self._achat)
672
704
  else:
@@ -708,7 +740,9 @@ class OpenAI(FunctionCallingLLM):
708
740
  )
709
741
 
710
742
  openai_message = response.choices[0].message
711
- message = from_openai_message(openai_message)
743
+ message = from_openai_message(
744
+ openai_message, modalities=self.modalities or ["text"]
745
+ )
712
746
  openai_token_logprobs = response.choices[0].logprobs
713
747
  logprobs = None
714
748
  if openai_token_logprobs and openai_token_logprobs.content:
@@ -725,6 +759,9 @@ class OpenAI(FunctionCallingLLM):
725
759
  async def _astream_chat(
726
760
  self, messages: Sequence[ChatMessage], **kwargs: Any
727
761
  ) -> ChatResponseAsyncGen:
762
+ if self.modalities and "audio" in self.modalities:
763
+ raise ValueError("Audio is not supported for chat streaming")
764
+
728
765
  aclient = self._get_aclient()
729
766
  message_dicts = to_openai_message_dicts(
730
767
  messages,
@@ -27,6 +27,7 @@ from llama_index.core.base.llms.types import (
27
27
  LogProb,
28
28
  MessageRole,
29
29
  TextBlock,
30
+ AudioBlock,
30
31
  )
31
32
  from llama_index.core.bridge.pydantic import BaseModel
32
33
 
@@ -68,6 +69,11 @@ GPT4_MODELS: Dict[str, int] = {
68
69
  "gpt-4-turbo-2024-04-09": 128000,
69
70
  "gpt-4-turbo": 128000,
70
71
  "gpt-4o": 128000,
72
+ "gpt-4o-audio-preview": 128000,
73
+ "gpt-4o-audio-preview-2024-12-17": 128000,
74
+ "gpt-4o-audio-preview-2024-10-01": 128000,
75
+ "gpt-4o-mini-audio-preview": 128000,
76
+ "gpt-4o-mini-audio-preview-2024-12-17": 128000,
71
77
  "gpt-4o-2024-05-13": 128000,
72
78
  "gpt-4o-2024-08-06": 128000,
73
79
  "gpt-4o-2024-11-20": 128000,
@@ -270,7 +276,16 @@ def to_openai_message_dict(
270
276
  """Convert a ChatMessage to an OpenAI message dict."""
271
277
  content = []
272
278
  content_txt = ""
279
+ reference_audio_id = None
273
280
  for block in message.blocks:
281
+ if message.role == MessageRole.ASSISTANT:
282
+ reference_audio_id = message.additional_kwargs.get(
283
+ "reference_audio_id", None
284
+ )
285
+ # if reference audio id is provided, we don't need to send the audio
286
+ if reference_audio_id:
287
+ continue
288
+
274
289
  if isinstance(block, TextBlock):
275
290
  content.append({"type": "text", "text": block.text})
276
291
  content_txt += block.text
@@ -291,6 +306,18 @@ def to_openai_message_dict(
291
306
  },
292
307
  }
293
308
  )
309
+ elif isinstance(block, AudioBlock):
310
+ audio_bytes = block.resolve_audio(as_base64=True).read()
311
+ audio_str = audio_bytes.decode("utf-8")
312
+ content.append(
313
+ {
314
+ "type": "input_audio",
315
+ "input_audio": {
316
+ "data": audio_str,
317
+ "format": block.format,
318
+ },
319
+ }
320
+ )
294
321
  else:
295
322
  msg = f"Unsupported content block type: {type(block).__name__}"
296
323
  raise ValueError(msg)
@@ -304,22 +331,34 @@ def to_openai_message_dict(
304
331
  else content_txt
305
332
  )
306
333
 
307
- # NOTE: Despite what the openai docs say, if the role is ASSISTANT, SYSTEM
308
- # or TOOL, 'content' cannot be a list and must be string instead.
309
- # Furthermore, if all blocks are text blocks, we can use the content_txt
310
- # as the content. This will avoid breaking openai-like APIs.
311
- message_dict = {
312
- "role": message.role.value,
313
- "content": (
314
- content_txt
315
- if message.role.value in ("assistant", "tool", "system")
316
- or all(isinstance(block, TextBlock) for block in message.blocks)
317
- else content
318
- ),
319
- }
334
+ # If reference audio id is provided, we don't need to send the audio
335
+ # NOTE: this is only a thing for assistant messages
336
+ if reference_audio_id:
337
+ message_dict = {
338
+ "role": message.role.value,
339
+ "audio": {"id": reference_audio_id},
340
+ }
341
+ else:
342
+ # NOTE: Despite what the openai docs say, if the role is ASSISTANT, SYSTEM
343
+ # or TOOL, 'content' cannot be a list and must be string instead.
344
+ # Furthermore, if all blocks are text blocks, we can use the content_txt
345
+ # as the content. This will avoid breaking openai-like APIs.
346
+ message_dict = {
347
+ "role": message.role.value,
348
+ "content": (
349
+ content_txt
350
+ if message.role.value in ("assistant", "tool", "system")
351
+ or all(isinstance(block, TextBlock) for block in message.blocks)
352
+ else content
353
+ ),
354
+ }
320
355
 
321
356
  # TODO: O1 models do not support system prompts
322
- if model is not None and model in O1_MODELS:
357
+ if (
358
+ model is not None
359
+ and model in O1_MODELS
360
+ and model not in O1_MODELS_WITHOUT_FUNCTION_CALLING
361
+ ):
323
362
  if message_dict["role"] == "system":
324
363
  message_dict["role"] = "developer"
325
364
 
@@ -353,20 +392,29 @@ def to_openai_message_dicts(
353
392
  ]
354
393
 
355
394
 
356
- def from_openai_message(openai_message: ChatCompletionMessage) -> ChatMessage:
395
+ def from_openai_message(
396
+ openai_message: ChatCompletionMessage, modalities: List[str]
397
+ ) -> ChatMessage:
357
398
  """Convert openai message dict to generic message."""
358
399
  role = openai_message.role
359
400
  # NOTE: Azure OpenAI returns function calling messages without a content key
360
- content = openai_message.content
361
-
362
- # function_call = None # deprecated in OpenAI v 1.1.0
401
+ if "text" in modalities and openai_message.content:
402
+ blocks = [TextBlock(text=openai_message.content or "")]
403
+ else:
404
+ blocks = []
363
405
 
364
406
  additional_kwargs: Dict[str, Any] = {}
365
407
  if openai_message.tool_calls:
366
408
  tool_calls: List[ChatCompletionMessageToolCall] = openai_message.tool_calls
367
409
  additional_kwargs.update(tool_calls=tool_calls)
368
410
 
369
- return ChatMessage(role=role, content=content, additional_kwargs=additional_kwargs)
411
+ if openai_message.audio and "audio" in modalities:
412
+ reference_audio_id = openai_message.audio.id
413
+ audio_data = openai_message.audio.data
414
+ additional_kwargs["reference_audio_id"] = reference_audio_id
415
+ blocks.append(AudioBlock(audio=audio_data, format="mp3"))
416
+
417
+ return ChatMessage(role=role, blocks=blocks, additional_kwargs=additional_kwargs)
370
418
 
371
419
 
372
420
  def from_openai_token_logprob(
@@ -421,10 +469,10 @@ def from_openai_completion_logprobs(
421
469
 
422
470
 
423
471
  def from_openai_messages(
424
- openai_messages: Sequence[ChatCompletionMessage],
472
+ openai_messages: Sequence[ChatCompletionMessage], modalities: List[str]
425
473
  ) -> List[ChatMessage]:
426
474
  """Convert openai message dicts to generic messages."""
427
- return [from_openai_message(message) for message in openai_messages]
475
+ return [from_openai_message(message, modalities) for message in openai_messages]
428
476
 
429
477
 
430
478
  def from_openai_message_dict(message_dict: dict) -> ChatMessage:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-index-llms-openai
3
- Version: 0.3.18
3
+ Version: 0.3.20
4
4
  Summary: llama-index llms openai integration
5
5
  License: MIT
6
6
  Author: llama-index
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
- Requires-Dist: llama-index-core (>=0.12.4,<0.13.0)
14
+ Requires-Dist: llama-index-core (>=0.12.17,<0.13.0)
15
15
  Requires-Dist: openai (>=1.58.1,<2.0.0)
16
16
  Description-Content-Type: text/markdown
17
17
 
@@ -0,0 +1,7 @@
1
+ llama_index/llms/openai/__init__.py,sha256=vm3cIBSGkBFlE77GyfyN0EhpJcnJZN95QMhPN53EkbE,148
2
+ llama_index/llms/openai/base.py,sha256=9HBszflpKp81gRSMaWKVa4PnG6sLfV9mWOyAkTM7MUI,38055
3
+ llama_index/llms/openai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ llama_index/llms/openai/utils.py,sha256=rQJ5B_griANqC4oNbkhgtjWijytLfG0HLlvYh9qCjv4,20708
5
+ llama_index_llms_openai-0.3.20.dist-info/METADATA,sha256=V4PGur14J6RiUC8S1YDDfuVTav3smCXx6dN-baidtng,3322
6
+ llama_index_llms_openai-0.3.20.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
7
+ llama_index_llms_openai-0.3.20.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- llama_index/llms/openai/__init__.py,sha256=vm3cIBSGkBFlE77GyfyN0EhpJcnJZN95QMhPN53EkbE,148
2
- llama_index/llms/openai/base.py,sha256=Bj7o-NCrUSWK3cES3anFgANMLRbmdLG8AkxC9QrVKqw,36637
3
- llama_index/llms/openai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- llama_index/llms/openai/utils.py,sha256=se_tHHLsNW4u2Ei_25HRPAm1lmzv-kFp2r2WqqL_jfE,18858
5
- llama_index_llms_openai-0.3.18.dist-info/METADATA,sha256=4qvbR9QPW-vHG6tsGLEedOUP6Sf15LzW5jESGO64jdk,3321
6
- llama_index_llms_openai-0.3.18.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
7
- llama_index_llms_openai-0.3.18.dist-info/RECORD,,