llama-index-llms-openai 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_index/llms/openai/base.py +39 -2
- llama_index/llms/openai/utils.py +69 -21
- {llama_index_llms_openai-0.3.18.dist-info → llama_index_llms_openai-0.3.20.dist-info}/METADATA +2 -2
- llama_index_llms_openai-0.3.20.dist-info/RECORD +7 -0
- llama_index_llms_openai-0.3.18.dist-info/RECORD +0 -7
- {llama_index_llms_openai-0.3.18.dist-info → llama_index_llms_openai-0.3.20.dist-info}/WHEEL +0 -0
llama_index/llms/openai/base.py
CHANGED
|
@@ -222,6 +222,14 @@ class OpenAI(FunctionCallingLLM):
|
|
|
222
222
|
default=None,
|
|
223
223
|
description="The effort to use for reasoning models.",
|
|
224
224
|
)
|
|
225
|
+
modalities: Optional[List[str]] = Field(
|
|
226
|
+
default=None,
|
|
227
|
+
description="The output modalities to use for the model.",
|
|
228
|
+
)
|
|
229
|
+
audio_config: Optional[Dict[str, Any]] = Field(
|
|
230
|
+
default=None,
|
|
231
|
+
description="The audio configuration to use for the model.",
|
|
232
|
+
)
|
|
225
233
|
|
|
226
234
|
_client: Optional[SyncOpenAI] = PrivateAttr()
|
|
227
235
|
_aclient: Optional[AsyncOpenAI] = PrivateAttr()
|
|
@@ -254,6 +262,8 @@ class OpenAI(FunctionCallingLLM):
|
|
|
254
262
|
output_parser: Optional[BaseOutputParser] = None,
|
|
255
263
|
strict: bool = False,
|
|
256
264
|
reasoning_effort: Optional[Literal["low", "medium", "high"]] = None,
|
|
265
|
+
modalities: Optional[List[str]] = None,
|
|
266
|
+
audio_config: Optional[Dict[str, Any]] = None,
|
|
257
267
|
**kwargs: Any,
|
|
258
268
|
) -> None:
|
|
259
269
|
additional_kwargs = additional_kwargs or {}
|
|
@@ -288,6 +298,8 @@ class OpenAI(FunctionCallingLLM):
|
|
|
288
298
|
output_parser=output_parser,
|
|
289
299
|
strict=strict,
|
|
290
300
|
reasoning_effort=reasoning_effort,
|
|
301
|
+
modalities=modalities,
|
|
302
|
+
audio_config=audio_config,
|
|
291
303
|
**kwargs,
|
|
292
304
|
)
|
|
293
305
|
|
|
@@ -375,6 +387,11 @@ class OpenAI(FunctionCallingLLM):
|
|
|
375
387
|
def complete(
|
|
376
388
|
self, prompt: str, formatted: bool = False, **kwargs: Any
|
|
377
389
|
) -> CompletionResponse:
|
|
390
|
+
if self.modalities and "audio" in self.modalities:
|
|
391
|
+
raise ValueError(
|
|
392
|
+
"Audio is not supported for completion. Use chat/achat instead."
|
|
393
|
+
)
|
|
394
|
+
|
|
378
395
|
if self._use_chat_completions(kwargs):
|
|
379
396
|
complete_fn = chat_to_completion_decorator(self._chat)
|
|
380
397
|
else:
|
|
@@ -434,6 +451,11 @@ class OpenAI(FunctionCallingLLM):
|
|
|
434
451
|
# O1 models support reasoning_effort of low, medium, high
|
|
435
452
|
all_kwargs["reasoning_effort"] = self.reasoning_effort
|
|
436
453
|
|
|
454
|
+
if self.modalities is not None:
|
|
455
|
+
all_kwargs["modalities"] = self.modalities
|
|
456
|
+
if self.audio_config is not None:
|
|
457
|
+
all_kwargs["audio"] = self.audio_config
|
|
458
|
+
|
|
437
459
|
return all_kwargs
|
|
438
460
|
|
|
439
461
|
@llm_retry_decorator
|
|
@@ -459,7 +481,9 @@ class OpenAI(FunctionCallingLLM):
|
|
|
459
481
|
)
|
|
460
482
|
|
|
461
483
|
openai_message = response.choices[0].message
|
|
462
|
-
message = from_openai_message(
|
|
484
|
+
message = from_openai_message(
|
|
485
|
+
openai_message, modalities=self.modalities or ["text"]
|
|
486
|
+
)
|
|
463
487
|
openai_token_logprobs = response.choices[0].logprobs
|
|
464
488
|
logprobs = None
|
|
465
489
|
if openai_token_logprobs and openai_token_logprobs.content:
|
|
@@ -476,6 +500,9 @@ class OpenAI(FunctionCallingLLM):
|
|
|
476
500
|
def _stream_chat(
|
|
477
501
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
478
502
|
) -> ChatResponseGen:
|
|
503
|
+
if self.modalities and "audio" in self.modalities:
|
|
504
|
+
raise ValueError("Audio is not supported for chat streaming")
|
|
505
|
+
|
|
479
506
|
client = self._get_client()
|
|
480
507
|
message_dicts = to_openai_message_dicts(
|
|
481
508
|
messages,
|
|
@@ -667,6 +694,11 @@ class OpenAI(FunctionCallingLLM):
|
|
|
667
694
|
async def acomplete(
|
|
668
695
|
self, prompt: str, formatted: bool = False, **kwargs: Any
|
|
669
696
|
) -> CompletionResponse:
|
|
697
|
+
if self.modalities and "audio" in self.modalities:
|
|
698
|
+
raise ValueError(
|
|
699
|
+
"Audio is not supported for completion. Use chat/achat instead."
|
|
700
|
+
)
|
|
701
|
+
|
|
670
702
|
if self._use_chat_completions(kwargs):
|
|
671
703
|
acomplete_fn = achat_to_completion_decorator(self._achat)
|
|
672
704
|
else:
|
|
@@ -708,7 +740,9 @@ class OpenAI(FunctionCallingLLM):
|
|
|
708
740
|
)
|
|
709
741
|
|
|
710
742
|
openai_message = response.choices[0].message
|
|
711
|
-
message = from_openai_message(
|
|
743
|
+
message = from_openai_message(
|
|
744
|
+
openai_message, modalities=self.modalities or ["text"]
|
|
745
|
+
)
|
|
712
746
|
openai_token_logprobs = response.choices[0].logprobs
|
|
713
747
|
logprobs = None
|
|
714
748
|
if openai_token_logprobs and openai_token_logprobs.content:
|
|
@@ -725,6 +759,9 @@ class OpenAI(FunctionCallingLLM):
|
|
|
725
759
|
async def _astream_chat(
|
|
726
760
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
727
761
|
) -> ChatResponseAsyncGen:
|
|
762
|
+
if self.modalities and "audio" in self.modalities:
|
|
763
|
+
raise ValueError("Audio is not supported for chat streaming")
|
|
764
|
+
|
|
728
765
|
aclient = self._get_aclient()
|
|
729
766
|
message_dicts = to_openai_message_dicts(
|
|
730
767
|
messages,
|
llama_index/llms/openai/utils.py
CHANGED
|
@@ -27,6 +27,7 @@ from llama_index.core.base.llms.types import (
|
|
|
27
27
|
LogProb,
|
|
28
28
|
MessageRole,
|
|
29
29
|
TextBlock,
|
|
30
|
+
AudioBlock,
|
|
30
31
|
)
|
|
31
32
|
from llama_index.core.bridge.pydantic import BaseModel
|
|
32
33
|
|
|
@@ -68,6 +69,11 @@ GPT4_MODELS: Dict[str, int] = {
|
|
|
68
69
|
"gpt-4-turbo-2024-04-09": 128000,
|
|
69
70
|
"gpt-4-turbo": 128000,
|
|
70
71
|
"gpt-4o": 128000,
|
|
72
|
+
"gpt-4o-audio-preview": 128000,
|
|
73
|
+
"gpt-4o-audio-preview-2024-12-17": 128000,
|
|
74
|
+
"gpt-4o-audio-preview-2024-10-01": 128000,
|
|
75
|
+
"gpt-4o-mini-audio-preview": 128000,
|
|
76
|
+
"gpt-4o-mini-audio-preview-2024-12-17": 128000,
|
|
71
77
|
"gpt-4o-2024-05-13": 128000,
|
|
72
78
|
"gpt-4o-2024-08-06": 128000,
|
|
73
79
|
"gpt-4o-2024-11-20": 128000,
|
|
@@ -270,7 +276,16 @@ def to_openai_message_dict(
|
|
|
270
276
|
"""Convert a ChatMessage to an OpenAI message dict."""
|
|
271
277
|
content = []
|
|
272
278
|
content_txt = ""
|
|
279
|
+
reference_audio_id = None
|
|
273
280
|
for block in message.blocks:
|
|
281
|
+
if message.role == MessageRole.ASSISTANT:
|
|
282
|
+
reference_audio_id = message.additional_kwargs.get(
|
|
283
|
+
"reference_audio_id", None
|
|
284
|
+
)
|
|
285
|
+
# if reference audio id is provided, we don't need to send the audio
|
|
286
|
+
if reference_audio_id:
|
|
287
|
+
continue
|
|
288
|
+
|
|
274
289
|
if isinstance(block, TextBlock):
|
|
275
290
|
content.append({"type": "text", "text": block.text})
|
|
276
291
|
content_txt += block.text
|
|
@@ -291,6 +306,18 @@ def to_openai_message_dict(
|
|
|
291
306
|
},
|
|
292
307
|
}
|
|
293
308
|
)
|
|
309
|
+
elif isinstance(block, AudioBlock):
|
|
310
|
+
audio_bytes = block.resolve_audio(as_base64=True).read()
|
|
311
|
+
audio_str = audio_bytes.decode("utf-8")
|
|
312
|
+
content.append(
|
|
313
|
+
{
|
|
314
|
+
"type": "input_audio",
|
|
315
|
+
"input_audio": {
|
|
316
|
+
"data": audio_str,
|
|
317
|
+
"format": block.format,
|
|
318
|
+
},
|
|
319
|
+
}
|
|
320
|
+
)
|
|
294
321
|
else:
|
|
295
322
|
msg = f"Unsupported content block type: {type(block).__name__}"
|
|
296
323
|
raise ValueError(msg)
|
|
@@ -304,22 +331,34 @@ def to_openai_message_dict(
|
|
|
304
331
|
else content_txt
|
|
305
332
|
)
|
|
306
333
|
|
|
307
|
-
#
|
|
308
|
-
#
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
334
|
+
# If reference audio id is provided, we don't need to send the audio
|
|
335
|
+
# NOTE: this is only a thing for assistant messages
|
|
336
|
+
if reference_audio_id:
|
|
337
|
+
message_dict = {
|
|
338
|
+
"role": message.role.value,
|
|
339
|
+
"audio": {"id": reference_audio_id},
|
|
340
|
+
}
|
|
341
|
+
else:
|
|
342
|
+
# NOTE: Despite what the openai docs say, if the role is ASSISTANT, SYSTEM
|
|
343
|
+
# or TOOL, 'content' cannot be a list and must be string instead.
|
|
344
|
+
# Furthermore, if all blocks are text blocks, we can use the content_txt
|
|
345
|
+
# as the content. This will avoid breaking openai-like APIs.
|
|
346
|
+
message_dict = {
|
|
347
|
+
"role": message.role.value,
|
|
348
|
+
"content": (
|
|
349
|
+
content_txt
|
|
350
|
+
if message.role.value in ("assistant", "tool", "system")
|
|
351
|
+
or all(isinstance(block, TextBlock) for block in message.blocks)
|
|
352
|
+
else content
|
|
353
|
+
),
|
|
354
|
+
}
|
|
320
355
|
|
|
321
356
|
# TODO: O1 models do not support system prompts
|
|
322
|
-
if
|
|
357
|
+
if (
|
|
358
|
+
model is not None
|
|
359
|
+
and model in O1_MODELS
|
|
360
|
+
and model not in O1_MODELS_WITHOUT_FUNCTION_CALLING
|
|
361
|
+
):
|
|
323
362
|
if message_dict["role"] == "system":
|
|
324
363
|
message_dict["role"] = "developer"
|
|
325
364
|
|
|
@@ -353,20 +392,29 @@ def to_openai_message_dicts(
|
|
|
353
392
|
]
|
|
354
393
|
|
|
355
394
|
|
|
356
|
-
def from_openai_message(
|
|
395
|
+
def from_openai_message(
|
|
396
|
+
openai_message: ChatCompletionMessage, modalities: List[str]
|
|
397
|
+
) -> ChatMessage:
|
|
357
398
|
"""Convert openai message dict to generic message."""
|
|
358
399
|
role = openai_message.role
|
|
359
400
|
# NOTE: Azure OpenAI returns function calling messages without a content key
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
401
|
+
if "text" in modalities and openai_message.content:
|
|
402
|
+
blocks = [TextBlock(text=openai_message.content or "")]
|
|
403
|
+
else:
|
|
404
|
+
blocks = []
|
|
363
405
|
|
|
364
406
|
additional_kwargs: Dict[str, Any] = {}
|
|
365
407
|
if openai_message.tool_calls:
|
|
366
408
|
tool_calls: List[ChatCompletionMessageToolCall] = openai_message.tool_calls
|
|
367
409
|
additional_kwargs.update(tool_calls=tool_calls)
|
|
368
410
|
|
|
369
|
-
|
|
411
|
+
if openai_message.audio and "audio" in modalities:
|
|
412
|
+
reference_audio_id = openai_message.audio.id
|
|
413
|
+
audio_data = openai_message.audio.data
|
|
414
|
+
additional_kwargs["reference_audio_id"] = reference_audio_id
|
|
415
|
+
blocks.append(AudioBlock(audio=audio_data, format="mp3"))
|
|
416
|
+
|
|
417
|
+
return ChatMessage(role=role, blocks=blocks, additional_kwargs=additional_kwargs)
|
|
370
418
|
|
|
371
419
|
|
|
372
420
|
def from_openai_token_logprob(
|
|
@@ -421,10 +469,10 @@ def from_openai_completion_logprobs(
|
|
|
421
469
|
|
|
422
470
|
|
|
423
471
|
def from_openai_messages(
|
|
424
|
-
openai_messages: Sequence[ChatCompletionMessage],
|
|
472
|
+
openai_messages: Sequence[ChatCompletionMessage], modalities: List[str]
|
|
425
473
|
) -> List[ChatMessage]:
|
|
426
474
|
"""Convert openai message dicts to generic messages."""
|
|
427
|
-
return [from_openai_message(message) for message in openai_messages]
|
|
475
|
+
return [from_openai_message(message, modalities) for message in openai_messages]
|
|
428
476
|
|
|
429
477
|
|
|
430
478
|
def from_openai_message_dict(message_dict: dict) -> ChatMessage:
|
{llama_index_llms_openai-0.3.18.dist-info → llama_index_llms_openai-0.3.20.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama-index-llms-openai
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.20
|
|
4
4
|
Summary: llama-index llms openai integration
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: llama-index
|
|
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
-
Requires-Dist: llama-index-core (>=0.12.
|
|
14
|
+
Requires-Dist: llama-index-core (>=0.12.17,<0.13.0)
|
|
15
15
|
Requires-Dist: openai (>=1.58.1,<2.0.0)
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
llama_index/llms/openai/__init__.py,sha256=vm3cIBSGkBFlE77GyfyN0EhpJcnJZN95QMhPN53EkbE,148
|
|
2
|
+
llama_index/llms/openai/base.py,sha256=9HBszflpKp81gRSMaWKVa4PnG6sLfV9mWOyAkTM7MUI,38055
|
|
3
|
+
llama_index/llms/openai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
llama_index/llms/openai/utils.py,sha256=rQJ5B_griANqC4oNbkhgtjWijytLfG0HLlvYh9qCjv4,20708
|
|
5
|
+
llama_index_llms_openai-0.3.20.dist-info/METADATA,sha256=V4PGur14J6RiUC8S1YDDfuVTav3smCXx6dN-baidtng,3322
|
|
6
|
+
llama_index_llms_openai-0.3.20.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
7
|
+
llama_index_llms_openai-0.3.20.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
llama_index/llms/openai/__init__.py,sha256=vm3cIBSGkBFlE77GyfyN0EhpJcnJZN95QMhPN53EkbE,148
|
|
2
|
-
llama_index/llms/openai/base.py,sha256=Bj7o-NCrUSWK3cES3anFgANMLRbmdLG8AkxC9QrVKqw,36637
|
|
3
|
-
llama_index/llms/openai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
llama_index/llms/openai/utils.py,sha256=se_tHHLsNW4u2Ei_25HRPAm1lmzv-kFp2r2WqqL_jfE,18858
|
|
5
|
-
llama_index_llms_openai-0.3.18.dist-info/METADATA,sha256=4qvbR9QPW-vHG6tsGLEedOUP6Sf15LzW5jESGO64jdk,3321
|
|
6
|
-
llama_index_llms_openai-0.3.18.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
7
|
-
llama_index_llms_openai-0.3.18.dist-info/RECORD,,
|
|
File without changes
|