llama-index-llms-openai 0.3.42__tar.gz → 0.3.44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-index-llms-openai
3
- Version: 0.3.42
3
+ Version: 0.3.44
4
4
  Summary: llama-index llms openai integration
5
5
  Author: llama-index
6
6
  License-Expression: MIT
7
7
  License-File: LICENSE
8
8
  Requires-Python: <4.0,>=3.9
9
- Requires-Dist: llama-index-core<0.13,>=0.12.36
10
- Requires-Dist: openai<2,>=1.66.3
9
+ Requires-Dist: llama-index-core<0.13,>=0.12.37
10
+ Requires-Dist: openai<2,>=1.81.0
11
11
  Description-Content-Type: text/markdown
12
12
 
13
13
  # LlamaIndex Llms Integration: Openai
@@ -1,6 +1,7 @@
1
1
  import functools
2
2
  import httpx
3
3
  import tiktoken
4
+ import base64
4
5
  from openai import AsyncOpenAI, AzureOpenAI
5
6
  from openai import OpenAI as SyncOpenAI
6
7
  from openai.types.responses import (
@@ -13,7 +14,7 @@ from openai.types.responses import (
13
14
  ResponseFunctionCallArgumentsDoneEvent,
14
15
  ResponseInProgressEvent,
15
16
  ResponseOutputItemAddedEvent,
16
- ResponseTextAnnotationDeltaEvent,
17
+ ResponseOutputTextAnnotationAddedEvent,
17
18
  ResponseTextDeltaEvent,
18
19
  ResponseWebSearchCallCompletedEvent,
19
20
  ResponseOutputItem,
@@ -23,7 +24,10 @@ from openai.types.responses import (
23
24
  ResponseFunctionWebSearch,
24
25
  ResponseComputerToolCall,
25
26
  ResponseReasoningItem,
27
+ ResponseCodeInterpreterToolCall,
28
+ ResponseImageGenCallPartialImageEvent,
26
29
  )
30
+ from openai.types.responses.response_output_item import ImageGenerationCall, McpCall
27
31
  from typing import (
28
32
  TYPE_CHECKING,
29
33
  Any,
@@ -59,7 +63,9 @@ from llama_index.core.base.llms.types import (
59
63
  CompletionResponseGen,
60
64
  LLMMetadata,
61
65
  MessageRole,
66
+ ContentBlock,
62
67
  TextBlock,
68
+ ImageBlock,
63
69
  )
64
70
  from llama_index.core.bridge.pydantic import (
65
71
  Field,
@@ -445,9 +451,9 @@ class OpenAIResponses(FunctionCallingLLM):
445
451
  message = ChatMessage(role=MessageRole.ASSISTANT, blocks=[])
446
452
  additional_kwargs = {"built_in_tool_calls": []}
447
453
  tool_calls = []
454
+ blocks: List[ContentBlock] = []
448
455
  for item in output:
449
456
  if isinstance(item, ResponseOutputMessage):
450
- blocks = []
451
457
  for part in item.content:
452
458
  if hasattr(part, "text"):
453
459
  blocks.append(TextBlock(text=part.text))
@@ -457,6 +463,17 @@ class OpenAIResponses(FunctionCallingLLM):
457
463
  additional_kwargs["refusal"] = part.refusal
458
464
 
459
465
  message.blocks.extend(blocks)
466
+ elif isinstance(item, ImageGenerationCall):
467
+ # return an ImageBlock if there is image generation
468
+ if item.status != "failed":
469
+ additional_kwargs["built_in_tool_calls"].append(item)
470
+ if item.result is not None:
471
+ image_bytes = base64.b64decode(item.result)
472
+ blocks.append(ImageBlock(image=image_bytes))
473
+ elif isinstance(item, ResponseCodeInterpreterToolCall):
474
+ additional_kwargs["built_in_tool_calls"].append(item)
475
+ elif isinstance(item, McpCall):
476
+ additional_kwargs["built_in_tool_calls"].append(item)
460
477
  elif isinstance(item, ResponseFileSearchToolCall):
461
478
  additional_kwargs["built_in_tool_calls"].append(item)
462
479
  elif isinstance(item, ResponseFunctionToolCall):
@@ -475,6 +492,7 @@ class OpenAIResponses(FunctionCallingLLM):
475
492
 
476
493
  @llm_retry_decorator
477
494
  def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
495
+ kwargs_dict = self._get_model_kwargs(**kwargs)
478
496
  message_dicts = to_openai_message_dicts(
479
497
  messages,
480
498
  model=self.model,
@@ -484,7 +502,7 @@ class OpenAIResponses(FunctionCallingLLM):
484
502
  response: Response = self._client.responses.create(
485
503
  input=message_dicts,
486
504
  stream=False,
487
- **self._get_model_kwargs(**kwargs),
505
+ **kwargs_dict,
488
506
  )
489
507
 
490
508
  if self.track_previous_responses:
@@ -499,7 +517,6 @@ class OpenAIResponses(FunctionCallingLLM):
499
517
  @staticmethod
500
518
  def process_response_event(
501
519
  event: ResponseStreamEvent,
502
- content: str,
503
520
  tool_calls: List[ResponseFunctionToolCall],
504
521
  built_in_tool_calls: List[Any],
505
522
  additional_kwargs: Dict[str, Any],
@@ -507,7 +524,7 @@ class OpenAIResponses(FunctionCallingLLM):
507
524
  track_previous_responses: bool,
508
525
  previous_response_id: Optional[str] = None,
509
526
  ) -> Tuple[
510
- str,
527
+ List[ContentBlock],
511
528
  List[ResponseFunctionToolCall],
512
529
  List[Any],
513
530
  Dict[str, Any],
@@ -534,7 +551,8 @@ class OpenAIResponses(FunctionCallingLLM):
534
551
  """
535
552
  delta = ""
536
553
  updated_previous_response_id = previous_response_id
537
-
554
+ # we use blocks instead of content, since now we also support images! :)
555
+ blocks: List[ContentBlock] = []
538
556
  if isinstance(event, ResponseCreatedEvent) or isinstance(
539
557
  event, ResponseInProgressEvent
540
558
  ):
@@ -548,7 +566,11 @@ class OpenAIResponses(FunctionCallingLLM):
548
566
  elif isinstance(event, ResponseTextDeltaEvent):
549
567
  # Text content is being added
550
568
  delta = event.delta
551
- content += delta
569
+ blocks.append(TextBlock(text=delta))
570
+ elif isinstance(event, ResponseImageGenCallPartialImageEvent):
571
+ # Partial image
572
+ if event.partial_image_b64:
573
+ blocks.append(ImageBlock(image=base64.b64decode(event.partial_image_b64), detail=f"id_{event.partial_image_index}"))
552
574
  elif isinstance(event, ResponseFunctionCallArgumentsDeltaEvent):
553
575
  # Function call arguments are being streamed
554
576
  if current_tool_call is not None:
@@ -566,7 +588,7 @@ class OpenAIResponses(FunctionCallingLLM):
566
588
 
567
589
  # clear the current tool call
568
590
  current_tool_call = None
569
- elif isinstance(event, ResponseTextAnnotationDeltaEvent):
591
+ elif isinstance(event, ResponseOutputTextAnnotationAddedEvent):
570
592
  # Annotations for the text
571
593
  annotations = additional_kwargs.get("annotations", [])
572
594
  annotations.append(event.annotation)
@@ -586,7 +608,7 @@ class OpenAIResponses(FunctionCallingLLM):
586
608
  additional_kwargs["usage"] = event.response.usage
587
609
 
588
610
  return (
589
- content,
611
+ blocks,
590
612
  tool_calls,
591
613
  built_in_tool_calls,
592
614
  additional_kwargs,
@@ -606,7 +628,6 @@ class OpenAIResponses(FunctionCallingLLM):
606
628
  )
607
629
 
608
630
  def gen() -> ChatResponseGen:
609
- content = ""
610
631
  tool_calls = []
611
632
  built_in_tool_calls = []
612
633
  additional_kwargs = {"built_in_tool_calls": []}
@@ -620,7 +641,7 @@ class OpenAIResponses(FunctionCallingLLM):
620
641
  ):
621
642
  # Process the event and update state
622
643
  (
623
- content,
644
+ blocks,
624
645
  tool_calls,
625
646
  built_in_tool_calls,
626
647
  additional_kwargs,
@@ -629,7 +650,6 @@ class OpenAIResponses(FunctionCallingLLM):
629
650
  delta,
630
651
  ) = OpenAIResponses.process_response_event(
631
652
  event=event,
632
- content=content,
633
653
  tool_calls=tool_calls,
634
654
  built_in_tool_calls=built_in_tool_calls,
635
655
  additional_kwargs=additional_kwargs,
@@ -651,7 +671,7 @@ class OpenAIResponses(FunctionCallingLLM):
651
671
  yield ChatResponse(
652
672
  message=ChatMessage(
653
673
  role=MessageRole.ASSISTANT,
654
- content=content,
674
+ blocks=blocks,
655
675
  additional_kwargs={"tool_calls": tool_calls}
656
676
  if tool_calls
657
677
  else {},
@@ -732,7 +752,6 @@ class OpenAIResponses(FunctionCallingLLM):
732
752
  )
733
753
 
734
754
  async def gen() -> ChatResponseAsyncGen:
735
- content = ""
736
755
  tool_calls = []
737
756
  built_in_tool_calls = []
738
757
  additional_kwargs = {"built_in_tool_calls": []}
@@ -748,7 +767,7 @@ class OpenAIResponses(FunctionCallingLLM):
748
767
  async for event in response_stream:
749
768
  # Process the event and update state
750
769
  (
751
- content,
770
+ blocks,
752
771
  tool_calls,
753
772
  built_in_tool_calls,
754
773
  additional_kwargs,
@@ -757,7 +776,6 @@ class OpenAIResponses(FunctionCallingLLM):
757
776
  delta,
758
777
  ) = OpenAIResponses.process_response_event(
759
778
  event=event,
760
- content=content,
761
779
  tool_calls=tool_calls,
762
780
  built_in_tool_calls=built_in_tool_calls,
763
781
  additional_kwargs=additional_kwargs,
@@ -779,7 +797,7 @@ class OpenAIResponses(FunctionCallingLLM):
779
797
  yield ChatResponse(
780
798
  message=ChatMessage(
781
799
  role=MessageRole.ASSISTANT,
782
- content=content,
800
+ blocks=blocks,
783
801
  additional_kwargs={"tool_calls": tool_calls}
784
802
  if tool_calls
785
803
  else {},
@@ -411,7 +411,7 @@ def to_openai_responses_message_dict(
411
411
  message: ChatMessage,
412
412
  drop_none: bool = False,
413
413
  model: Optional[str] = None,
414
- ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
414
+ ) -> Union[str, Dict[str, Any], List[Dict[str, Any]]]:
415
415
  """Convert a ChatMessage to an OpenAI message dict."""
416
416
  content = []
417
417
  content_txt = ""
@@ -427,7 +427,13 @@ def to_openai_responses_message_dict(
427
427
  mimetype = block._guess_mimetype()
428
428
  else:
429
429
  b64_string = block.data.decode("utf-8")
430
- content.append({"type": "input_file", "filename": block.title, "file_data": f"data:{mimetype};base64,{b64_string}"})
430
+ content.append(
431
+ {
432
+ "type": "input_file",
433
+ "filename": block.title,
434
+ "file_data": f"data:{mimetype};base64,{b64_string}",
435
+ }
436
+ )
431
437
  elif isinstance(block, ImageBlock):
432
438
  if block.url:
433
439
  content.append(
@@ -492,6 +498,16 @@ def to_openai_responses_message_dict(
492
498
  ]
493
499
 
494
500
  return message_dicts
501
+
502
+ # there are some cases (like image generation or MCP tool call) that only support the string input
503
+ # this is why, if context_txt is a non-empty string, all the blocks are TextBlocks and the role is user, we return directly context_txt
504
+ elif (
505
+ isinstance(content_txt, str)
506
+ and len(content_txt) != 0
507
+ and all(item["type"] == "input_text" for item in content)
508
+ and message.role.value == "user"
509
+ ):
510
+ return content_txt
495
511
  else:
496
512
  message_dict = {
497
513
  "role": message.role.value,
@@ -526,10 +542,11 @@ def to_openai_message_dicts(
526
542
  drop_none: bool = False,
527
543
  model: Optional[str] = None,
528
544
  is_responses_api: bool = False,
529
- ) -> List[ChatCompletionMessageParam]:
545
+ ) -> Union[List[ChatCompletionMessageParam], str]:
530
546
  """Convert generic messages to OpenAI message dicts."""
531
547
  if is_responses_api:
532
548
  final_message_dicts = []
549
+ final_message_txt = ""
533
550
  for message in messages:
534
551
  message_dicts = to_openai_responses_message_dict(
535
552
  message,
@@ -538,9 +555,13 @@ def to_openai_message_dicts(
538
555
  )
539
556
  if isinstance(message_dicts, list):
540
557
  final_message_dicts.extend(message_dicts)
558
+ elif isinstance(message_dicts, str):
559
+ final_message_txt += message_dicts
541
560
  else:
542
561
  final_message_dicts.append(message_dicts)
543
-
562
+ # this follows the logic of having a string-only input from to_openai_responses_message_dict
563
+ if final_message_txt:
564
+ return final_message_txt
544
565
  return final_message_dicts
545
566
  else:
546
567
  return [
@@ -27,13 +27,13 @@ dev = [
27
27
 
28
28
  [project]
29
29
  name = "llama-index-llms-openai"
30
- version = "0.3.42"
30
+ version = "0.3.44"
31
31
  description = "llama-index llms openai integration"
32
32
  authors = [{name = "llama-index"}]
33
33
  requires-python = ">=3.9,<4.0"
34
34
  readme = "README.md"
35
35
  license = "MIT"
36
- dependencies = ["openai>=1.66.3,<2", "llama-index-core>=0.12.36,<0.13"]
36
+ dependencies = ["openai>=1.81.0,<2", "llama-index-core>=0.12.37,<0.13"]
37
37
 
38
38
  [tool.codespell]
39
39
  check-filenames = true