llama-index-llms-bedrock-converse 0.9.5__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import warnings
1
2
  from typing import (
2
3
  Any,
3
4
  Callable,
@@ -20,6 +21,8 @@ from llama_index.core.base.llms.types import (
20
21
  CompletionResponseGen,
21
22
  LLMMetadata,
22
23
  MessageRole,
24
+ TextBlock,
25
+ ThinkingBlock,
23
26
  )
24
27
  from llama_index.core.bridge.pydantic import Field, PrivateAttr
25
28
  from llama_index.core.callbacks import CallbackManager
@@ -46,6 +49,8 @@ from llama_index.llms.bedrock_converse.utils import (
46
49
  join_two_dicts,
47
50
  messages_to_converse_messages,
48
51
  tools_to_converse_tools,
52
+ is_reasoning,
53
+ ThinkingDict,
49
54
  )
50
55
 
51
56
  if TYPE_CHECKING:
@@ -158,6 +163,10 @@ class BedrockConverse(FunctionCallingLLM):
158
163
  trace: Optional[str] = Field(
159
164
  description="Specifies whether to enable or disable the Bedrock trace. If enabled, you can see the full Bedrock trace."
160
165
  )
166
+ thinking: Optional[ThinkingDict] = Field(
167
+ description="Specifies the thinking configuration of a reasoning model. Only applicable to Anthropic and DeepSeek models",
168
+ default=None,
169
+ )
161
170
  additional_kwargs: Dict[str, Any] = Field(
162
171
  default_factory=dict,
163
172
  description="Additional kwargs for the bedrock invokeModel request.",
@@ -200,6 +209,7 @@ class BedrockConverse(FunctionCallingLLM):
200
209
  guardrail_version: Optional[str] = None,
201
210
  application_inference_profile_arn: Optional[str] = None,
202
211
  trace: Optional[str] = None,
212
+ thinking: Optional[ThinkingDict] = None,
203
213
  ) -> None:
204
214
  additional_kwargs = additional_kwargs or {}
205
215
  callback_manager = callback_manager or CallbackManager([])
@@ -213,6 +223,13 @@ class BedrockConverse(FunctionCallingLLM):
213
223
  "botocore_session": botocore_session,
214
224
  }
215
225
 
226
+ if not is_reasoning(model) and thinking is not None:
227
+ thinking = None
228
+ warnings.warn(
229
+ "You set thinking parameters for a non-reasoning models, they will be ignored",
230
+ UserWarning,
231
+ )
232
+
216
233
  super().__init__(
217
234
  temperature=temperature,
218
235
  max_tokens=max_tokens,
@@ -243,6 +260,7 @@ class BedrockConverse(FunctionCallingLLM):
243
260
  guardrail_version=guardrail_version,
244
261
  application_inference_profile_arn=application_inference_profile_arn,
245
262
  trace=trace,
263
+ thinking=thinking,
246
264
  )
247
265
 
248
266
  self._config = None
@@ -330,7 +348,9 @@ class BedrockConverse(FunctionCallingLLM):
330
348
 
331
349
  def _get_content_and_tool_calls(
332
350
  self, response: Optional[Dict[str, Any]] = None, content: Dict[str, Any] = None
333
- ) -> Tuple[str, Dict[str, Any], List[str], List[str]]:
351
+ ) -> Tuple[
352
+ List[Union[TextBlock, ThinkingBlock]], Dict[str, Any], List[str], List[str]
353
+ ]:
334
354
  assert response is not None or content is not None, (
335
355
  f"Either response or content must be provided. Got response: {response}, content: {content}"
336
356
  )
@@ -340,14 +360,26 @@ class BedrockConverse(FunctionCallingLLM):
340
360
  tool_calls = []
341
361
  tool_call_ids = []
342
362
  status = []
343
- text_content = ""
363
+ blocks = []
344
364
  if content is not None:
345
365
  content_list = [content]
346
366
  else:
347
367
  content_list = response["output"]["message"]["content"]
368
+
348
369
  for content_block in content_list:
349
370
  if text := content_block.get("text", None):
350
- text_content += text
371
+ blocks.append(TextBlock(text=text))
372
+ if thinking := content_block.get("reasoningContent", None):
373
+ blocks.append(
374
+ ThinkingBlock(
375
+ content=thinking.get("reasoningText", {}).get("text", None),
376
+ additional_information={
377
+ "signature": thinking.get("reasoningText", {}).get(
378
+ "signature", None
379
+ )
380
+ },
381
+ )
382
+ )
351
383
  if tool_usage := content_block.get("toolUse", None):
352
384
  if "toolUseId" not in tool_usage:
353
385
  tool_usage["toolUseId"] = content_block["toolUseId"]
@@ -361,7 +393,7 @@ class BedrockConverse(FunctionCallingLLM):
361
393
  tool_call_ids.append(tool_result_content.get("toolUseId", ""))
362
394
  status.append(tool_result.get("status", ""))
363
395
 
364
- return text_content, tool_calls, tool_call_ids, status
396
+ return blocks, tool_calls, tool_call_ids, status
365
397
 
366
398
  @llm_chat_callback()
367
399
  def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
@@ -370,6 +402,8 @@ class BedrockConverse(FunctionCallingLLM):
370
402
  messages, self.model
371
403
  )
372
404
  all_kwargs = self._get_all_kwargs(**kwargs)
405
+ if self.thinking is not None:
406
+ all_kwargs["thinking"] = self.thinking
373
407
 
374
408
  # invoke LLM in AWS Bedrock Converse with retry
375
409
  response = converse_with_retry(
@@ -386,14 +420,14 @@ class BedrockConverse(FunctionCallingLLM):
386
420
  **all_kwargs,
387
421
  )
388
422
 
389
- content, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
423
+ blocks, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
390
424
  response
391
425
  )
392
426
 
393
427
  return ChatResponse(
394
428
  message=ChatMessage(
395
429
  role=MessageRole.ASSISTANT,
396
- content=content,
430
+ blocks=blocks,
397
431
  additional_kwargs={
398
432
  "tool_calls": tool_calls,
399
433
  "tool_call_id": tool_call_ids,
@@ -420,6 +454,8 @@ class BedrockConverse(FunctionCallingLLM):
420
454
  messages, self.model
421
455
  )
422
456
  all_kwargs = self._get_all_kwargs(**kwargs)
457
+ if self.thinking is not None:
458
+ all_kwargs["thinking"] = self.thinking
423
459
 
424
460
  # invoke LLM in AWS Bedrock Converse with retry
425
461
  response = converse_with_retry(
@@ -441,12 +477,22 @@ class BedrockConverse(FunctionCallingLLM):
441
477
  tool_calls = [] # Track tool calls separately
442
478
  current_tool_call = None # Track the current tool call being built
443
479
  role = MessageRole.ASSISTANT
480
+ thinking = ""
481
+ thinking_signature = ""
444
482
 
445
483
  for chunk in response["stream"]:
446
484
  if content_block_delta := chunk.get("contentBlockDelta"):
447
485
  content_delta = content_block_delta["delta"]
448
486
  content = join_two_dicts(content, content_delta)
449
487
 
488
+ if "reasoningContent" in content_delta:
489
+ thinking += content_delta.get("reasoningContent", {}).get(
490
+ "text", ""
491
+ )
492
+ thinking_signature += content_delta.get(
493
+ "reasoningContent", {}
494
+ ).get("signature", "")
495
+
450
496
  # If this delta contains tool call info, update current tool call
451
497
  if "toolUse" in content_delta:
452
498
  tool_use_delta = content_delta["toolUse"]
@@ -477,10 +523,24 @@ class BedrockConverse(FunctionCallingLLM):
477
523
  current_tool_call, tool_use_delta
478
524
  )
479
525
 
526
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
527
+ TextBlock(text=content.get("text", ""))
528
+ ]
529
+ if thinking != "":
530
+ blocks.insert(
531
+ 0,
532
+ ThinkingBlock(
533
+ content=thinking,
534
+ additional_information={
535
+ "signature": thinking_signature
536
+ },
537
+ ),
538
+ )
539
+
480
540
  yield ChatResponse(
481
541
  message=ChatMessage(
482
542
  role=role,
483
- content=content.get("text", ""),
543
+ blocks=blocks,
484
544
  additional_kwargs={
485
545
  "tool_calls": tool_calls,
486
546
  "tool_call_id": [
@@ -502,10 +562,24 @@ class BedrockConverse(FunctionCallingLLM):
502
562
  # Add to our list of tool calls
503
563
  tool_calls.append(current_tool_call)
504
564
 
565
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
566
+ TextBlock(text=content.get("text", ""))
567
+ ]
568
+ if thinking != "":
569
+ blocks.insert(
570
+ 0,
571
+ ThinkingBlock(
572
+ content=thinking,
573
+ additional_information={
574
+ "signature": thinking_signature
575
+ },
576
+ ),
577
+ )
578
+
505
579
  yield ChatResponse(
506
580
  message=ChatMessage(
507
581
  role=role,
508
- content=content.get("text", ""),
582
+ blocks=blocks,
509
583
  additional_kwargs={
510
584
  "tool_calls": tool_calls,
511
585
  "tool_call_id": [
@@ -524,10 +598,24 @@ class BedrockConverse(FunctionCallingLLM):
524
598
  # Handle metadata event - this contains the final token usage
525
599
  if usage := metadata.get("usage"):
526
600
  # Yield a final response with correct token usage
601
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
602
+ TextBlock(text=content.get("text", ""))
603
+ ]
604
+ if thinking != "":
605
+ blocks.insert(
606
+ 0,
607
+ ThinkingBlock(
608
+ content=thinking,
609
+ additional_information={
610
+ "signature": thinking_signature
611
+ },
612
+ ),
613
+ )
614
+
527
615
  yield ChatResponse(
528
616
  message=ChatMessage(
529
617
  role=role,
530
- content=content.get("text", ""),
618
+ blocks=blocks,
531
619
  additional_kwargs={
532
620
  "tool_calls": tool_calls,
533
621
  "tool_call_id": [
@@ -559,6 +647,8 @@ class BedrockConverse(FunctionCallingLLM):
559
647
  messages, self.model
560
648
  )
561
649
  all_kwargs = self._get_all_kwargs(**kwargs)
650
+ if self.thinking is not None:
651
+ all_kwargs["thinking"] = self.thinking
562
652
 
563
653
  # invoke LLM in AWS Bedrock Converse with retry
564
654
  response = await converse_with_retry_async(
@@ -577,14 +667,14 @@ class BedrockConverse(FunctionCallingLLM):
577
667
  **all_kwargs,
578
668
  )
579
669
 
580
- content, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
670
+ blocks, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
581
671
  response
582
672
  )
583
673
 
584
674
  return ChatResponse(
585
675
  message=ChatMessage(
586
676
  role=MessageRole.ASSISTANT,
587
- content=content,
677
+ blocks=blocks,
588
678
  additional_kwargs={
589
679
  "tool_calls": tool_calls,
590
680
  "tool_call_id": tool_call_ids,
@@ -611,6 +701,8 @@ class BedrockConverse(FunctionCallingLLM):
611
701
  messages, self.model
612
702
  )
613
703
  all_kwargs = self._get_all_kwargs(**kwargs)
704
+ if self.thinking is not None:
705
+ all_kwargs["thinking"] = self.thinking
614
706
 
615
707
  # invoke LLM in AWS Bedrock Converse with retry
616
708
  response_gen = await converse_with_retry_async(
@@ -634,12 +726,22 @@ class BedrockConverse(FunctionCallingLLM):
634
726
  tool_calls = [] # Track tool calls separately
635
727
  current_tool_call = None # Track the current tool call being built
636
728
  role = MessageRole.ASSISTANT
729
+ thinking = ""
730
+ thinking_signature = ""
637
731
 
638
732
  async for chunk in response_gen:
639
733
  if content_block_delta := chunk.get("contentBlockDelta"):
640
734
  content_delta = content_block_delta["delta"]
641
735
  content = join_two_dicts(content, content_delta)
642
736
 
737
+ if "reasoningContent" in content_delta:
738
+ thinking += content_delta.get("reasoningContent", {}).get(
739
+ "text", ""
740
+ )
741
+ thinking_signature += content_delta.get(
742
+ "reasoningContent", {}
743
+ ).get("signature", "")
744
+
643
745
  # If this delta contains tool call info, update current tool call
644
746
  if "toolUse" in content_delta:
645
747
  tool_use_delta = content_delta["toolUse"]
@@ -669,11 +771,24 @@ class BedrockConverse(FunctionCallingLLM):
669
771
  current_tool_call = join_two_dicts(
670
772
  current_tool_call, tool_use_delta
671
773
  )
774
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
775
+ TextBlock(text=content.get("text", ""))
776
+ ]
777
+ if thinking != "":
778
+ blocks.insert(
779
+ 0,
780
+ ThinkingBlock(
781
+ content=thinking,
782
+ additional_information={
783
+ "signature": thinking_signature
784
+ },
785
+ ),
786
+ )
672
787
 
673
788
  yield ChatResponse(
674
789
  message=ChatMessage(
675
790
  role=role,
676
- content=content.get("text", ""),
791
+ blocks=blocks,
677
792
  additional_kwargs={
678
793
  "tool_calls": tool_calls,
679
794
  "tool_call_id": [
@@ -695,10 +810,24 @@ class BedrockConverse(FunctionCallingLLM):
695
810
  # Add to our list of tool calls
696
811
  tool_calls.append(current_tool_call)
697
812
 
813
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
814
+ TextBlock(text=content.get("text", ""))
815
+ ]
816
+ if thinking != "":
817
+ blocks.insert(
818
+ 0,
819
+ ThinkingBlock(
820
+ content=thinking,
821
+ additional_information={
822
+ "signature": thinking_signature
823
+ },
824
+ ),
825
+ )
826
+
698
827
  yield ChatResponse(
699
828
  message=ChatMessage(
700
829
  role=role,
701
- content=content.get("text", ""),
830
+ blocks=blocks,
702
831
  additional_kwargs={
703
832
  "tool_calls": tool_calls,
704
833
  "tool_call_id": [
@@ -717,10 +846,24 @@ class BedrockConverse(FunctionCallingLLM):
717
846
  # Handle metadata event - this contains the final token usage
718
847
  if usage := metadata.get("usage"):
719
848
  # Yield a final response with correct token usage
849
+ blocks: List[Union[TextBlock, ThinkingBlock]] = [
850
+ TextBlock(text=content.get("text", ""))
851
+ ]
852
+ if thinking != "":
853
+ blocks.insert(
854
+ 0,
855
+ ThinkingBlock(
856
+ content=thinking,
857
+ additional_information={
858
+ "signature": thinking_signature
859
+ },
860
+ ),
861
+ )
862
+
720
863
  yield ChatResponse(
721
864
  message=ChatMessage(
722
865
  role=role,
723
- content=content.get("text", ""),
866
+ blocks=blocks,
724
867
  additional_kwargs={
725
868
  "tool_calls": tool_calls,
726
869
  "tool_call_id": [
@@ -1,7 +1,18 @@
1
1
  import base64
2
2
  import json
3
3
  import logging
4
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
4
+ from typing import (
5
+ Any,
6
+ Callable,
7
+ Dict,
8
+ List,
9
+ Optional,
10
+ Sequence,
11
+ Tuple,
12
+ Literal,
13
+ Union,
14
+ )
15
+ from typing_extensions import TypedDict
5
16
  from tenacity import (
6
17
  before_sleep_log,
7
18
  retry,
@@ -20,6 +31,7 @@ from llama_index.core.base.llms.types import (
20
31
  AudioBlock,
21
32
  DocumentBlock,
22
33
  CachePoint,
34
+ ThinkingBlock,
23
35
  )
24
36
 
25
37
 
@@ -151,6 +163,19 @@ BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
151
163
  "amazon.nova-micro-v1:0",
152
164
  )
153
165
 
166
+ BEDROCK_REASONING_MODELS = (
167
+ "anthropic.claude-3-7-sonnet-20250219-v1:0",
168
+ "anthropic.claude-opus-4-20250514-v1:0",
169
+ "anthropic.claude-sonnet-4-20250514-v1:0",
170
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
171
+ "deepseek.r1-v1:0",
172
+ )
173
+
174
+
175
+ def is_reasoning(model_name: str) -> bool:
176
+ model_name = get_model_name(model_name)
177
+ return model_name in BEDROCK_REASONING_MODELS
178
+
154
179
 
155
180
  def get_model_name(model_name: str) -> str:
156
181
  """Extract base model name from region-prefixed model identifier."""
@@ -220,6 +245,22 @@ def _content_block_to_bedrock_format(
220
245
  return {
221
246
  "text": block.text,
222
247
  }
248
+ elif isinstance(block, ThinkingBlock):
249
+ if block.content:
250
+ thinking_data = {
251
+ "reasoningContent": {"reasoningText": {"text": block.content}}
252
+ }
253
+ if (
254
+ "signature" in block.additional_information
255
+ and block.additional_information["signature"]
256
+ ):
257
+ thinking_data["reasoningContent"]["reasoningText"]["signature"] = (
258
+ block.additional_information["signature"]
259
+ )
260
+
261
+ return thinking_data
262
+ else:
263
+ return None
223
264
  elif isinstance(block, DocumentBlock):
224
265
  if not block.data:
225
266
  file_buffer = block.resolve_document()
@@ -518,6 +559,10 @@ def converse_with_retry(
518
559
  "temperature": temperature,
519
560
  },
520
561
  }
562
+ if "thinking" in kwargs:
563
+ converse_kwargs["additionalModelRequestFields"] = {
564
+ "thinking": kwargs["thinking"]
565
+ }
521
566
  if system_prompt:
522
567
  if isinstance(system_prompt, str):
523
568
  # if the system prompt is a simple text (for retro compatibility)
@@ -547,7 +592,14 @@ def converse_with_retry(
547
592
  {
548
593
  k: v
549
594
  for k, v in kwargs.items()
550
- if k not in ["tools", "guardrail_identifier", "guardrail_version", "trace"]
595
+ if k
596
+ not in [
597
+ "tools",
598
+ "guardrail_identifier",
599
+ "guardrail_version",
600
+ "trace",
601
+ "thinking",
602
+ ]
551
603
  },
552
604
  )
553
605
 
@@ -589,6 +641,10 @@ async def converse_with_retry_async(
589
641
  "temperature": temperature,
590
642
  },
591
643
  }
644
+ if "thinking" in kwargs:
645
+ converse_kwargs["additionalModelRequestFields"] = {
646
+ "thinking": kwargs["thinking"]
647
+ }
592
648
 
593
649
  if system_prompt:
594
650
  if isinstance(system_prompt, str):
@@ -622,7 +678,14 @@ async def converse_with_retry_async(
622
678
  {
623
679
  k: v
624
680
  for k, v in kwargs.items()
625
- if k not in ["tools", "guardrail_identifier", "guardrail_version", "trace"]
681
+ if k
682
+ not in [
683
+ "tools",
684
+ "guardrail_identifier",
685
+ "guardrail_version",
686
+ "trace",
687
+ "thinking",
688
+ ]
626
689
  },
627
690
  )
628
691
  _boto_client_kwargs = {}
@@ -688,3 +751,8 @@ def join_two_dicts(dict1: Dict[str, Any], dict2: Dict[str, Any]) -> Dict[str, An
688
751
  else:
689
752
  new_dict[key] += value
690
753
  return new_dict
754
+
755
+
756
+ class ThinkingDict(TypedDict):
757
+ type: Literal["enabled"]
758
+ budget_tokens: int
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-index-llms-bedrock-converse
3
- Version: 0.9.5
3
+ Version: 0.10.1
4
4
  Summary: llama-index llms bedrock converse integration
5
5
  Author-email: Your Name <you@example.com>
6
6
  License-Expression: MIT
@@ -8,7 +8,7 @@ License-File: LICENSE
8
8
  Requires-Python: <4.0,>=3.9
9
9
  Requires-Dist: aioboto3<16,>=15.0.0
10
10
  Requires-Dist: boto3<2,>=1.38.27
11
- Requires-Dist: llama-index-core<0.15,>=0.13.0
11
+ Requires-Dist: llama-index-core<0.15,>=0.14.3
12
12
  Description-Content-Type: text/markdown
13
13
 
14
14
  # LlamaIndex Llms Integration: Bedrock Converse
@@ -0,0 +1,7 @@
1
+ llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
2
+ llama_index/llms/bedrock_converse/base.py,sha256=tXQMmgyYZmrudRsQ7uofvQyIKNUTQtTpcdny1EoMCB0,41080
3
+ llama_index/llms/bedrock_converse/utils.py,sha256=O0z1eJVjX_ZdghESiHfpx1KxmS3PQJIjSAUJtUsnH4c,27248
4
+ llama_index_llms_bedrock_converse-0.10.1.dist-info/METADATA,sha256=w6TjnF4jlxMVcXBLEJzSjEUfCWPZ2xkjS84VbF6hbA0,7834
5
+ llama_index_llms_bedrock_converse-0.10.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ llama_index_llms_bedrock_converse-0.10.1.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
7
+ llama_index_llms_bedrock_converse-0.10.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
2
- llama_index/llms/bedrock_converse/base.py,sha256=SHGRmAYcMUMAjmnHWFLYKPzvsAAjCF_A1Mvc7s9I7IM,35233
3
- llama_index/llms/bedrock_converse/utils.py,sha256=Ly-s3mROVreinvYmRcAJU7MksSHqeTEa1tnY3na17wg,25565
4
- llama_index_llms_bedrock_converse-0.9.5.dist-info/METADATA,sha256=ALn1SYaHR7aYuALhDShDhbWBDHOzw9RAHQI1iS0xhLM,7833
5
- llama_index_llms_bedrock_converse-0.9.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- llama_index_llms_bedrock_converse-0.9.5.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
7
- llama_index_llms_bedrock_converse-0.9.5.dist-info/RECORD,,