llama-index-llms-bedrock-converse 0.9.4__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-index-llms-bedrock-converse
3
- Version: 0.9.4
3
+ Version: 0.10.0
4
4
  Summary: llama-index llms bedrock converse integration
5
5
  Author-email: Your Name <you@example.com>
6
6
  License-Expression: MIT
@@ -8,7 +8,7 @@ License-File: LICENSE
8
8
  Requires-Python: <4.0,>=3.9
9
9
  Requires-Dist: aioboto3<16,>=15.0.0
10
10
  Requires-Dist: boto3<2,>=1.38.27
11
- Requires-Dist: llama-index-core<0.15,>=0.13.0
11
+ Requires-Dist: llama-index-core<0.15,>=0.14.3
12
12
  Description-Content-Type: text/markdown
13
13
 
14
14
  # LlamaIndex Llms Integration: Bedrock Converse
@@ -1,3 +1,4 @@
1
+ import warnings
1
2
  from typing import (
2
3
  Any,
3
4
  Callable,
@@ -20,6 +21,8 @@ from llama_index.core.base.llms.types import (
20
21
  CompletionResponseGen,
21
22
  LLMMetadata,
22
23
  MessageRole,
24
+ TextBlock,
25
+ ThinkingBlock,
23
26
  )
24
27
  from llama_index.core.bridge.pydantic import Field, PrivateAttr
25
28
  from llama_index.core.callbacks import CallbackManager
@@ -46,6 +49,8 @@ from llama_index.llms.bedrock_converse.utils import (
46
49
  join_two_dicts,
47
50
  messages_to_converse_messages,
48
51
  tools_to_converse_tools,
52
+ is_reasoning,
53
+ ThinkingDict,
49
54
  )
50
55
 
51
56
  if TYPE_CHECKING:
@@ -158,6 +163,10 @@ class BedrockConverse(FunctionCallingLLM):
158
163
  trace: Optional[str] = Field(
159
164
  description="Specifies whether to enable or disable the Bedrock trace. If enabled, you can see the full Bedrock trace."
160
165
  )
166
+ thinking: Optional[ThinkingDict] = Field(
167
+ description="Specifies the thinking configuration of a reasoning model. Only applicable to Anthropic and DeepSeek models",
168
+ default=None,
169
+ )
161
170
  additional_kwargs: Dict[str, Any] = Field(
162
171
  default_factory=dict,
163
172
  description="Additional kwargs for the bedrock invokeModel request.",
@@ -200,6 +209,7 @@ class BedrockConverse(FunctionCallingLLM):
200
209
  guardrail_version: Optional[str] = None,
201
210
  application_inference_profile_arn: Optional[str] = None,
202
211
  trace: Optional[str] = None,
212
+ thinking: Optional[ThinkingDict] = None,
203
213
  ) -> None:
204
214
  additional_kwargs = additional_kwargs or {}
205
215
  callback_manager = callback_manager or CallbackManager([])
@@ -213,6 +223,13 @@ class BedrockConverse(FunctionCallingLLM):
213
223
  "botocore_session": botocore_session,
214
224
  }
215
225
 
226
+ if not is_reasoning(model) and thinking is not None:
227
+ thinking = None
228
+ warnings.warn(
229
+ "You set thinking parameters for a non-reasoning models, they will be ignored",
230
+ UserWarning,
231
+ )
232
+
216
233
  super().__init__(
217
234
  temperature=temperature,
218
235
  max_tokens=max_tokens,
@@ -243,6 +260,7 @@ class BedrockConverse(FunctionCallingLLM):
243
260
  guardrail_version=guardrail_version,
244
261
  application_inference_profile_arn=application_inference_profile_arn,
245
262
  trace=trace,
263
+ thinking=thinking,
246
264
  )
247
265
 
248
266
  self._config = None
@@ -330,7 +348,9 @@ class BedrockConverse(FunctionCallingLLM):
330
348
 
331
349
  def _get_content_and_tool_calls(
332
350
  self, response: Optional[Dict[str, Any]] = None, content: Dict[str, Any] = None
333
- ) -> Tuple[str, Dict[str, Any], List[str], List[str]]:
351
+ ) -> Tuple[
352
+ List[Union[TextBlock, ThinkingBlock]], Dict[str, Any], List[str], List[str]
353
+ ]:
334
354
  assert response is not None or content is not None, (
335
355
  f"Either response or content must be provided. Got response: {response}, content: {content}"
336
356
  )
@@ -340,14 +360,26 @@ class BedrockConverse(FunctionCallingLLM):
340
360
  tool_calls = []
341
361
  tool_call_ids = []
342
362
  status = []
343
- text_content = ""
363
+ blocks = []
344
364
  if content is not None:
345
365
  content_list = [content]
346
366
  else:
347
367
  content_list = response["output"]["message"]["content"]
368
+
348
369
  for content_block in content_list:
349
370
  if text := content_block.get("text", None):
350
- text_content += text
371
+ blocks.append(TextBlock(text=text))
372
+ if thinking := content_block.get("reasoningContent", None):
373
+ blocks.append(
374
+ ThinkingBlock(
375
+ content=thinking.get("reasoningText", {}).get("text", None),
376
+ additional_information={
377
+ "signature": thinking.get("reasoningText", {}).get(
378
+ "signature", None
379
+ )
380
+ },
381
+ )
382
+ )
351
383
  if tool_usage := content_block.get("toolUse", None):
352
384
  if "toolUseId" not in tool_usage:
353
385
  tool_usage["toolUseId"] = content_block["toolUseId"]
@@ -361,7 +393,7 @@ class BedrockConverse(FunctionCallingLLM):
361
393
  tool_call_ids.append(tool_result_content.get("toolUseId", ""))
362
394
  status.append(tool_result.get("status", ""))
363
395
 
364
- return text_content, tool_calls, tool_call_ids, status
396
+ return blocks, tool_calls, tool_call_ids, status
365
397
 
366
398
  @llm_chat_callback()
367
399
  def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
@@ -370,6 +402,8 @@ class BedrockConverse(FunctionCallingLLM):
370
402
  messages, self.model
371
403
  )
372
404
  all_kwargs = self._get_all_kwargs(**kwargs)
405
+ if self.thinking is not None:
406
+ all_kwargs["thinking"] = self.thinking
373
407
 
374
408
  # invoke LLM in AWS Bedrock Converse with retry
375
409
  response = converse_with_retry(
@@ -386,14 +420,14 @@ class BedrockConverse(FunctionCallingLLM):
386
420
  **all_kwargs,
387
421
  )
388
422
 
389
- content, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
423
+ blocks, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
390
424
  response
391
425
  )
392
426
 
393
427
  return ChatResponse(
394
428
  message=ChatMessage(
395
429
  role=MessageRole.ASSISTANT,
396
- content=content,
430
+ blocks=blocks,
397
431
  additional_kwargs={
398
432
  "tool_calls": tool_calls,
399
433
  "tool_call_id": tool_call_ids,
@@ -420,6 +454,8 @@ class BedrockConverse(FunctionCallingLLM):
420
454
  messages, self.model
421
455
  )
422
456
  all_kwargs = self._get_all_kwargs(**kwargs)
457
+ if self.thinking is not None:
458
+ all_kwargs["thinking"] = self.thinking
423
459
 
424
460
  # invoke LLM in AWS Bedrock Converse with retry
425
461
  response = converse_with_retry(
@@ -441,12 +477,22 @@ class BedrockConverse(FunctionCallingLLM):
441
477
  tool_calls = [] # Track tool calls separately
442
478
  current_tool_call = None # Track the current tool call being built
443
479
  role = MessageRole.ASSISTANT
480
+ thinking = ""
481
+ thinking_signature = ""
444
482
 
445
483
  for chunk in response["stream"]:
446
484
  if content_block_delta := chunk.get("contentBlockDelta"):
447
485
  content_delta = content_block_delta["delta"]
448
486
  content = join_two_dicts(content, content_delta)
449
487
 
488
+ if "reasoningContent" in content_delta:
489
+ thinking += content_delta.get("reasoningContent", {}).get(
490
+ "text", ""
491
+ )
492
+ thinking_signature += content_delta.get(
493
+ "reasoningContent", {}
494
+ ).get("signature", "")
495
+
450
496
  # If this delta contains tool call info, update current tool call
451
497
  if "toolUse" in content_delta:
452
498
  tool_use_delta = content_delta["toolUse"]
@@ -477,10 +523,24 @@ class BedrockConverse(FunctionCallingLLM):
477
523
  current_tool_call, tool_use_delta
478
524
  )
479
525
 
526
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
527
+ TextBlock(text=content.get("text", ""))
528
+ ]
529
+ if thinking != "":
530
+ blocks.insert(
531
+ 0,
532
+ ThinkingBlock(
533
+ content=thinking,
534
+ additional_information={
535
+ "signature": thinking_signature
536
+ },
537
+ ),
538
+ )
539
+
480
540
  yield ChatResponse(
481
541
  message=ChatMessage(
482
542
  role=role,
483
- content=content.get("text", ""),
543
+ blocks=blocks,
484
544
  additional_kwargs={
485
545
  "tool_calls": tool_calls,
486
546
  "tool_call_id": [
@@ -502,10 +562,24 @@ class BedrockConverse(FunctionCallingLLM):
502
562
  # Add to our list of tool calls
503
563
  tool_calls.append(current_tool_call)
504
564
 
565
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
566
+ TextBlock(text=content.get("text", ""))
567
+ ]
568
+ if thinking != "":
569
+ blocks.insert(
570
+ 0,
571
+ ThinkingBlock(
572
+ content=thinking,
573
+ additional_information={
574
+ "signature": thinking_signature
575
+ },
576
+ ),
577
+ )
578
+
505
579
  yield ChatResponse(
506
580
  message=ChatMessage(
507
581
  role=role,
508
- content=content.get("text", ""),
582
+ blocks=blocks,
509
583
  additional_kwargs={
510
584
  "tool_calls": tool_calls,
511
585
  "tool_call_id": [
@@ -524,10 +598,24 @@ class BedrockConverse(FunctionCallingLLM):
524
598
  # Handle metadata event - this contains the final token usage
525
599
  if usage := metadata.get("usage"):
526
600
  # Yield a final response with correct token usage
601
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
602
+ TextBlock(text=content.get("text", ""))
603
+ ]
604
+ if thinking != "":
605
+ blocks.insert(
606
+ 0,
607
+ ThinkingBlock(
608
+ content=thinking,
609
+ additional_information={
610
+ "signature": thinking_signature
611
+ },
612
+ ),
613
+ )
614
+
527
615
  yield ChatResponse(
528
616
  message=ChatMessage(
529
617
  role=role,
530
- content=content.get("text", ""),
618
+ blocks=blocks,
531
619
  additional_kwargs={
532
620
  "tool_calls": tool_calls,
533
621
  "tool_call_id": [
@@ -559,6 +647,8 @@ class BedrockConverse(FunctionCallingLLM):
559
647
  messages, self.model
560
648
  )
561
649
  all_kwargs = self._get_all_kwargs(**kwargs)
650
+ if self.thinking is not None:
651
+ all_kwargs["thinking"] = self.thinking
562
652
 
563
653
  # invoke LLM in AWS Bedrock Converse with retry
564
654
  response = await converse_with_retry_async(
@@ -577,14 +667,14 @@ class BedrockConverse(FunctionCallingLLM):
577
667
  **all_kwargs,
578
668
  )
579
669
 
580
- content, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
670
+ blocks, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
581
671
  response
582
672
  )
583
673
 
584
674
  return ChatResponse(
585
675
  message=ChatMessage(
586
676
  role=MessageRole.ASSISTANT,
587
- content=content,
677
+ blocks=blocks,
588
678
  additional_kwargs={
589
679
  "tool_calls": tool_calls,
590
680
  "tool_call_id": tool_call_ids,
@@ -611,6 +701,8 @@ class BedrockConverse(FunctionCallingLLM):
611
701
  messages, self.model
612
702
  )
613
703
  all_kwargs = self._get_all_kwargs(**kwargs)
704
+ if self.thinking is not None:
705
+ all_kwargs["thinking"] = self.thinking
614
706
 
615
707
  # invoke LLM in AWS Bedrock Converse with retry
616
708
  response_gen = await converse_with_retry_async(
@@ -634,12 +726,22 @@ class BedrockConverse(FunctionCallingLLM):
634
726
  tool_calls = [] # Track tool calls separately
635
727
  current_tool_call = None # Track the current tool call being built
636
728
  role = MessageRole.ASSISTANT
729
+ thinking = ""
730
+ thinking_signature = ""
637
731
 
638
732
  async for chunk in response_gen:
639
733
  if content_block_delta := chunk.get("contentBlockDelta"):
640
734
  content_delta = content_block_delta["delta"]
641
735
  content = join_two_dicts(content, content_delta)
642
736
 
737
+ if "reasoningContent" in content_delta:
738
+ thinking += content_delta.get("reasoningContent", {}).get(
739
+ "text", ""
740
+ )
741
+ thinking_signature += content_delta.get(
742
+ "reasoningContent", {}
743
+ ).get("signature", "")
744
+
643
745
  # If this delta contains tool call info, update current tool call
644
746
  if "toolUse" in content_delta:
645
747
  tool_use_delta = content_delta["toolUse"]
@@ -669,11 +771,24 @@ class BedrockConverse(FunctionCallingLLM):
669
771
  current_tool_call = join_two_dicts(
670
772
  current_tool_call, tool_use_delta
671
773
  )
774
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
775
+ TextBlock(text=content.get("text", ""))
776
+ ]
777
+ if thinking != "":
778
+ blocks.insert(
779
+ 0,
780
+ ThinkingBlock(
781
+ content=thinking,
782
+ additional_information={
783
+ "signature": thinking_signature
784
+ },
785
+ ),
786
+ )
672
787
 
673
788
  yield ChatResponse(
674
789
  message=ChatMessage(
675
790
  role=role,
676
- content=content.get("text", ""),
791
+ blocks=blocks,
677
792
  additional_kwargs={
678
793
  "tool_calls": tool_calls,
679
794
  "tool_call_id": [
@@ -695,10 +810,24 @@ class BedrockConverse(FunctionCallingLLM):
695
810
  # Add to our list of tool calls
696
811
  tool_calls.append(current_tool_call)
697
812
 
813
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
814
+ TextBlock(text=content.get("text", ""))
815
+ ]
816
+ if thinking != "":
817
+ blocks.insert(
818
+ 0,
819
+ ThinkingBlock(
820
+ content=thinking,
821
+ additional_information={
822
+ "signature": thinking_signature
823
+ },
824
+ ),
825
+ )
826
+
698
827
  yield ChatResponse(
699
828
  message=ChatMessage(
700
829
  role=role,
701
- content=content.get("text", ""),
830
+ blocks=blocks,
702
831
  additional_kwargs={
703
832
  "tool_calls": tool_calls,
704
833
  "tool_call_id": [
@@ -717,10 +846,24 @@ class BedrockConverse(FunctionCallingLLM):
717
846
  # Handle metadata event - this contains the final token usage
718
847
  if usage := metadata.get("usage"):
719
848
  # Yield a final response with correct token usage
849
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
850
+ TextBlock(text=content.get("text", ""))
851
+ ]
852
+ if thinking != "":
853
+ blocks.insert(
854
+ 0,
855
+ ThinkingBlock(
856
+ content=thinking,
857
+ additional_information={
858
+ "signature": thinking_signature
859
+ },
860
+ ),
861
+ )
862
+
720
863
  yield ChatResponse(
721
864
  message=ChatMessage(
722
865
  role=role,
723
- content=content.get("text", ""),
866
+ blocks=blocks,
724
867
  additional_kwargs={
725
868
  "tool_calls": tool_calls,
726
869
  "tool_call_id": [
@@ -1,7 +1,18 @@
1
1
  import base64
2
2
  import json
3
3
  import logging
4
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
4
+ from typing import (
5
+ Any,
6
+ Callable,
7
+ Dict,
8
+ List,
9
+ Optional,
10
+ Sequence,
11
+ Tuple,
12
+ Literal,
13
+ Union,
14
+ )
15
+ from typing_extensions import TypedDict
5
16
  from tenacity import (
6
17
  before_sleep_log,
7
18
  retry,
@@ -20,6 +31,7 @@ from llama_index.core.base.llms.types import (
20
31
  AudioBlock,
21
32
  DocumentBlock,
22
33
  CachePoint,
34
+ ThinkingBlock,
23
35
  )
24
36
 
25
37
 
@@ -95,6 +107,7 @@ BEDROCK_FUNCTION_CALLING_MODELS = (
95
107
  "anthropic.claude-3-7-sonnet-20250219-v1:0",
96
108
  "anthropic.claude-opus-4-20250514-v1:0",
97
109
  "anthropic.claude-sonnet-4-20250514-v1:0",
110
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
98
111
  "cohere.command-r-v1:0",
99
112
  "cohere.command-r-plus-v1:0",
100
113
  "mistral.mistral-large-2402-v1:0",
@@ -124,6 +137,7 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
124
137
  "anthropic.claude-3-7-sonnet-20250219-v1:0",
125
138
  "anthropic.claude-opus-4-20250514-v1:0",
126
139
  "anthropic.claude-sonnet-4-20250514-v1:0",
140
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
127
141
  "meta.llama3-1-8b-instruct-v1:0",
128
142
  "meta.llama3-1-70b-instruct-v1:0",
129
143
  "meta.llama3-2-1b-instruct-v1:0",
@@ -140,14 +154,27 @@ BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
140
154
  "anthropic.claude-3-5-haiku-20241022-v1:0",
141
155
  "anthropic.claude-3-7-sonnet-20250219-v1:0",
142
156
  "anthropic.claude-opus-4-20250514-v1:0",
143
- "anthropic.claude-sonnet-4-20250514-v1:0",
144
157
  "anthropic.claude-opus-4-1-20250805-v1:0",
158
+ "anthropic.claude-sonnet-4-20250514-v1:0",
159
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
145
160
  "amazon.nova-premier-v1:0",
146
161
  "amazon.nova-pro-v1:0",
147
162
  "amazon.nova-lite-v1:0",
148
163
  "amazon.nova-micro-v1:0",
149
164
  )
150
165
 
166
+ BEDROCK_REASONING_MODELS = (
167
+ "anthropic.claude-3-7-sonnet-20250219-v1:0",
168
+ "anthropic.claude-opus-4-20250514-v1:0",
169
+ "anthropic.claude-sonnet-4-20250514-v1:0",
170
+ "deepseek.r1-v1:0",
171
+ )
172
+
173
+
174
+ def is_reasoning(model_name: str) -> bool:
175
+ model_name = get_model_name(model_name)
176
+ return model_name in BEDROCK_REASONING_MODELS
177
+
151
178
 
152
179
  def get_model_name(model_name: str) -> str:
153
180
  """Extract base model name from region-prefixed model identifier."""
@@ -217,6 +244,22 @@ def _content_block_to_bedrock_format(
217
244
  return {
218
245
  "text": block.text,
219
246
  }
247
+ elif isinstance(block, ThinkingBlock):
248
+ if block.content:
249
+ thinking_data = {
250
+ "reasoningContent": {"reasoningText": {"text": block.content}}
251
+ }
252
+ if (
253
+ "signature" in block.additional_information
254
+ and block.additional_information["signature"]
255
+ ):
256
+ thinking_data["reasoningContent"]["reasoningText"]["signature"] = (
257
+ block.additional_information["signature"]
258
+ )
259
+
260
+ return thinking_data
261
+ else:
262
+ return None
220
263
  elif isinstance(block, DocumentBlock):
221
264
  if not block.data:
222
265
  file_buffer = block.resolve_document()
@@ -515,6 +558,10 @@ def converse_with_retry(
515
558
  "temperature": temperature,
516
559
  },
517
560
  }
561
+ if "thinking" in kwargs:
562
+ converse_kwargs["additionalModelRequestFields"] = {
563
+ "thinking": kwargs["thinking"]
564
+ }
518
565
  if system_prompt:
519
566
  if isinstance(system_prompt, str):
520
567
  # if the system prompt is a simple text (for retro compatibility)
@@ -544,7 +591,14 @@ def converse_with_retry(
544
591
  {
545
592
  k: v
546
593
  for k, v in kwargs.items()
547
- if k not in ["tools", "guardrail_identifier", "guardrail_version", "trace"]
594
+ if k
595
+ not in [
596
+ "tools",
597
+ "guardrail_identifier",
598
+ "guardrail_version",
599
+ "trace",
600
+ "thinking",
601
+ ]
548
602
  },
549
603
  )
550
604
 
@@ -586,6 +640,10 @@ async def converse_with_retry_async(
586
640
  "temperature": temperature,
587
641
  },
588
642
  }
643
+ if "thinking" in kwargs:
644
+ converse_kwargs["additionalModelRequestFields"] = {
645
+ "thinking": kwargs["thinking"]
646
+ }
589
647
 
590
648
  if system_prompt:
591
649
  if isinstance(system_prompt, str):
@@ -619,7 +677,14 @@ async def converse_with_retry_async(
619
677
  {
620
678
  k: v
621
679
  for k, v in kwargs.items()
622
- if k not in ["tools", "guardrail_identifier", "guardrail_version", "trace"]
680
+ if k
681
+ not in [
682
+ "tools",
683
+ "guardrail_identifier",
684
+ "guardrail_version",
685
+ "trace",
686
+ "thinking",
687
+ ]
623
688
  },
624
689
  )
625
690
  _boto_client_kwargs = {}
@@ -685,3 +750,8 @@ def join_two_dicts(dict1: Dict[str, Any], dict2: Dict[str, Any]) -> Dict[str, An
685
750
  else:
686
751
  new_dict[key] += value
687
752
  return new_dict
753
+
754
+
755
+ class ThinkingDict(TypedDict):
756
+ type: Literal["enabled"]
757
+ budget_tokens: int
@@ -29,7 +29,7 @@ dev = [
29
29
 
30
30
  [project]
31
31
  name = "llama-index-llms-bedrock-converse"
32
- version = "0.9.4"
32
+ version = "0.10.0"
33
33
  description = "llama-index llms bedrock converse integration"
34
34
  authors = [{name = "Your Name", email = "you@example.com"}]
35
35
  requires-python = ">=3.9,<4.0"
@@ -38,7 +38,7 @@ license = "MIT"
38
38
  dependencies = [
39
39
  "boto3>=1.38.27,<2",
40
40
  "aioboto3>=15.0.0,<16",
41
- "llama-index-core>=0.13.0,<0.15",
41
+ "llama-index-core>=0.14.3,<0.15",
42
42
  ]
43
43
 
44
44
  [tool.codespell]