llama-index-llms-bedrock-converse 0.8.2__py3-none-any.whl → 0.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,10 @@
1
+ import warnings
1
2
  from typing import (
2
3
  Any,
3
4
  Callable,
4
5
  Dict,
5
6
  List,
7
+ Literal,
6
8
  Optional,
7
9
  Sequence,
8
10
  Tuple,
@@ -20,6 +22,9 @@ from llama_index.core.base.llms.types import (
20
22
  CompletionResponseGen,
21
23
  LLMMetadata,
22
24
  MessageRole,
25
+ TextBlock,
26
+ ThinkingBlock,
27
+ ToolCallBlock,
23
28
  )
24
29
  from llama_index.core.bridge.pydantic import Field, PrivateAttr
25
30
  from llama_index.core.callbacks import CallbackManager
@@ -46,6 +51,8 @@ from llama_index.llms.bedrock_converse.utils import (
46
51
  join_two_dicts,
47
52
  messages_to_converse_messages,
48
53
  tools_to_converse_tools,
54
+ is_reasoning,
55
+ ThinkingDict,
49
56
  )
50
57
 
51
58
  if TYPE_CHECKING:
@@ -138,18 +145,41 @@ class BedrockConverse(FunctionCallingLLM):
138
145
  default=60.0,
139
146
  description="The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts.",
140
147
  )
148
+ system_prompt_caching: bool = Field(
149
+ default=False,
150
+ description="Whether to cache the system prompt. If you are using a system prompt, you should set this to True.",
151
+ )
152
+ tool_caching: bool = Field(
153
+ default=False,
154
+ description="Whether to cache the tools. If you are using tools, you should set this to True.",
155
+ )
141
156
  guardrail_identifier: Optional[str] = Field(
142
157
  description="The unique identifier of the guardrail that you want to use. If you don't provide a value, no guardrail is applied to the invocation."
143
158
  )
144
159
  guardrail_version: Optional[str] = Field(
145
160
  description="The version number for the guardrail. The value can also be DRAFT"
146
161
  )
162
+ guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = Field(
163
+ description=(
164
+ "The stream processing mode to use when leveraging a guardrail in a streaming request (ConverseStream). "
165
+ "If set, the specified mode will be included in the request's guardrail configuration object, altering the streaming response behavior. "
166
+ "If a value is not provided, no mode will be explicitly included in the request's guardrail configuration object, and thus Amazon Bedrock's default, Synchronous Mode, will be used."
167
+ )
168
+ )
147
169
  application_inference_profile_arn: Optional[str] = Field(
148
170
  description="The ARN of an application inference profile to invoke in place of the model. If provided, make sure the model argument refers to the same one underlying the application inference profile."
149
171
  )
150
172
  trace: Optional[str] = Field(
151
173
  description="Specifies whether to enable or disable the Bedrock trace. If enabled, you can see the full Bedrock trace."
152
174
  )
175
+ thinking: Optional[ThinkingDict] = Field(
176
+ description="Specifies the thinking configuration of a reasoning model. Only applicable to Anthropic and DeepSeek models",
177
+ default=None,
178
+ )
179
+ supports_forced_tool_calls: bool = Field(
180
+ default=True,
181
+ description="Whether the model supports forced tool calls. If True, the model can be forced to call at least 1 or more tools.",
182
+ )
153
183
  additional_kwargs: Dict[str, Any] = Field(
154
184
  default_factory=dict,
155
185
  description="Additional kwargs for the bedrock invokeModel request.",
@@ -182,14 +212,19 @@ class BedrockConverse(FunctionCallingLLM):
182
212
  additional_kwargs: Optional[Dict[str, Any]] = None,
183
213
  callback_manager: Optional[CallbackManager] = None,
184
214
  system_prompt: Optional[str] = None,
215
+ system_prompt_caching: Optional[bool] = False,
216
+ tool_caching: Optional[bool] = False,
185
217
  messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None,
186
218
  completion_to_prompt: Optional[Callable[[str], str]] = None,
187
219
  pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,
188
220
  output_parser: Optional[BaseOutputParser] = None,
189
221
  guardrail_identifier: Optional[str] = None,
190
222
  guardrail_version: Optional[str] = None,
223
+ guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = None,
191
224
  application_inference_profile_arn: Optional[str] = None,
192
225
  trace: Optional[str] = None,
226
+ thinking: Optional[ThinkingDict] = None,
227
+ supports_forced_tool_calls: bool = True,
193
228
  ) -> None:
194
229
  additional_kwargs = additional_kwargs or {}
195
230
  callback_manager = callback_manager or CallbackManager([])
@@ -203,6 +238,13 @@ class BedrockConverse(FunctionCallingLLM):
203
238
  "botocore_session": botocore_session,
204
239
  }
205
240
 
241
+ if not is_reasoning(model) and thinking is not None:
242
+ thinking = None
243
+ warnings.warn(
244
+ "You set thinking parameters for a non-reasoning models, they will be ignored",
245
+ UserWarning,
246
+ )
247
+
206
248
  super().__init__(
207
249
  temperature=temperature,
208
250
  max_tokens=max_tokens,
@@ -212,6 +254,8 @@ class BedrockConverse(FunctionCallingLLM):
212
254
  model=model,
213
255
  callback_manager=callback_manager,
214
256
  system_prompt=system_prompt,
257
+ system_prompt_caching=system_prompt_caching,
258
+ tool_caching=tool_caching,
215
259
  messages_to_prompt=messages_to_prompt,
216
260
  completion_to_prompt=completion_to_prompt,
217
261
  pydantic_program_mode=pydantic_program_mode,
@@ -229,8 +273,11 @@ class BedrockConverse(FunctionCallingLLM):
229
273
  botocore_config=botocore_config,
230
274
  guardrail_identifier=guardrail_identifier,
231
275
  guardrail_version=guardrail_version,
276
+ guardrail_stream_processing_mode=guardrail_stream_processing_mode,
232
277
  application_inference_profile_arn=application_inference_profile_arn,
233
278
  trace=trace,
279
+ thinking=thinking,
280
+ supports_forced_tool_calls=supports_forced_tool_calls,
234
281
  )
235
282
 
236
283
  self._config = None
@@ -318,30 +365,49 @@ class BedrockConverse(FunctionCallingLLM):
318
365
 
319
366
  def _get_content_and_tool_calls(
320
367
  self, response: Optional[Dict[str, Any]] = None, content: Dict[str, Any] = None
321
- ) -> Tuple[str, Dict[str, Any], List[str], List[str]]:
368
+ ) -> Tuple[
369
+ List[Union[TextBlock, ThinkingBlock, ToolCallBlock]], List[str], List[str]
370
+ ]:
322
371
  assert response is not None or content is not None, (
323
372
  f"Either response or content must be provided. Got response: {response}, content: {content}"
324
373
  )
325
374
  assert response is None or content is None, (
326
375
  f"Only one of response or content should be provided. Got response: {response}, content: {content}"
327
376
  )
328
- tool_calls = []
329
377
  tool_call_ids = []
330
378
  status = []
331
- text_content = ""
379
+ blocks: List[TextBlock | ThinkingBlock | ToolCallBlock] = []
332
380
  if content is not None:
333
381
  content_list = [content]
334
382
  else:
335
383
  content_list = response["output"]["message"]["content"]
384
+
336
385
  for content_block in content_list:
337
386
  if text := content_block.get("text", None):
338
- text_content += text
387
+ blocks.append(TextBlock(text=text))
388
+ if thinking := content_block.get("reasoningContent", None):
389
+ blocks.append(
390
+ ThinkingBlock(
391
+ content=thinking.get("reasoningText", {}).get("text", None),
392
+ additional_information={
393
+ "signature": thinking.get("reasoningText", {}).get(
394
+ "signature", None
395
+ )
396
+ },
397
+ )
398
+ )
339
399
  if tool_usage := content_block.get("toolUse", None):
340
400
  if "toolUseId" not in tool_usage:
341
401
  tool_usage["toolUseId"] = content_block["toolUseId"]
342
402
  if "name" not in tool_usage:
343
403
  tool_usage["name"] = content_block["name"]
344
- tool_calls.append(tool_usage)
404
+ blocks.append(
405
+ ToolCallBlock(
406
+ tool_name=tool_usage.get("name", ""),
407
+ tool_call_id=tool_usage.get("toolUseId"),
408
+ tool_kwargs=tool_usage.get("input", {}),
409
+ )
410
+ )
345
411
  if tool_result := content_block.get("toolResult", None):
346
412
  for tool_result_content in tool_result["content"]:
347
413
  if text := tool_result_content.get("text", None):
@@ -349,19 +415,25 @@ class BedrockConverse(FunctionCallingLLM):
349
415
  tool_call_ids.append(tool_result_content.get("toolUseId", ""))
350
416
  status.append(tool_result.get("status", ""))
351
417
 
352
- return text_content, tool_calls, tool_call_ids, status
418
+ return blocks, tool_call_ids, status
353
419
 
354
420
  @llm_chat_callback()
355
421
  def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
356
422
  # convert Llama Index messages to AWS Bedrock Converse messages
357
- converse_messages, system_prompt = messages_to_converse_messages(messages)
423
+ converse_messages, system_prompt = messages_to_converse_messages(
424
+ messages, self.model
425
+ )
358
426
  all_kwargs = self._get_all_kwargs(**kwargs)
427
+ if self.thinking is not None:
428
+ all_kwargs["thinking"] = self.thinking
359
429
 
360
430
  # invoke LLM in AWS Bedrock Converse with retry
361
431
  response = converse_with_retry(
362
432
  client=self._client,
363
433
  messages=converse_messages,
364
434
  system_prompt=system_prompt,
435
+ system_prompt_caching=self.system_prompt_caching,
436
+ tool_caching=self.tool_caching,
365
437
  max_retries=self.max_retries,
366
438
  stream=False,
367
439
  guardrail_identifier=self.guardrail_identifier,
@@ -370,16 +442,13 @@ class BedrockConverse(FunctionCallingLLM):
370
442
  **all_kwargs,
371
443
  )
372
444
 
373
- content, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
374
- response
375
- )
445
+ blocks, tool_call_ids, status = self._get_content_and_tool_calls(response)
376
446
 
377
447
  return ChatResponse(
378
448
  message=ChatMessage(
379
449
  role=MessageRole.ASSISTANT,
380
- content=content,
450
+ blocks=blocks,
381
451
  additional_kwargs={
382
- "tool_calls": tool_calls,
383
452
  "tool_call_id": tool_call_ids,
384
453
  "status": status,
385
454
  },
@@ -400,18 +469,25 @@ class BedrockConverse(FunctionCallingLLM):
400
469
  self, messages: Sequence[ChatMessage], **kwargs: Any
401
470
  ) -> ChatResponseGen:
402
471
  # convert Llama Index messages to AWS Bedrock Converse messages
403
- converse_messages, system_prompt = messages_to_converse_messages(messages)
472
+ converse_messages, system_prompt = messages_to_converse_messages(
473
+ messages, self.model
474
+ )
404
475
  all_kwargs = self._get_all_kwargs(**kwargs)
476
+ if self.thinking is not None:
477
+ all_kwargs["thinking"] = self.thinking
405
478
 
406
479
  # invoke LLM in AWS Bedrock Converse with retry
407
480
  response = converse_with_retry(
408
481
  client=self._client,
409
482
  messages=converse_messages,
410
483
  system_prompt=system_prompt,
484
+ system_prompt_caching=self.system_prompt_caching,
485
+ tool_caching=self.tool_caching,
411
486
  max_retries=self.max_retries,
412
487
  stream=True,
413
488
  guardrail_identifier=self.guardrail_identifier,
414
489
  guardrail_version=self.guardrail_version,
490
+ guardrail_stream_processing_mode=self.guardrail_stream_processing_mode,
415
491
  trace=self.trace,
416
492
  **all_kwargs,
417
493
  )
@@ -421,12 +497,25 @@ class BedrockConverse(FunctionCallingLLM):
421
497
  tool_calls = [] # Track tool calls separately
422
498
  current_tool_call = None # Track the current tool call being built
423
499
  role = MessageRole.ASSISTANT
500
+ thinking = ""
501
+ thinking_signature = ""
424
502
 
425
503
  for chunk in response["stream"]:
426
504
  if content_block_delta := chunk.get("contentBlockDelta"):
427
505
  content_delta = content_block_delta["delta"]
428
506
  content = join_two_dicts(content, content_delta)
429
507
 
508
+ thinking_delta_value = None
509
+ if "reasoningContent" in content_delta:
510
+ reasoning_text = content_delta.get("reasoningContent", {}).get(
511
+ "text", ""
512
+ )
513
+ thinking += reasoning_text
514
+ thinking_delta_value = reasoning_text
515
+ thinking_signature += content_delta.get(
516
+ "reasoningContent", {}
517
+ ).get("signature", "")
518
+
430
519
  # If this delta contains tool call info, update current tool call
431
520
  if "toolUse" in content_delta:
432
521
  tool_use_delta = content_delta["toolUse"]
@@ -457,12 +546,42 @@ class BedrockConverse(FunctionCallingLLM):
457
546
  current_tool_call, tool_use_delta
458
547
  )
459
548
 
549
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
550
+ TextBlock(text=content.get("text", ""))
551
+ ]
552
+ if thinking != "":
553
+ blocks.insert(
554
+ 0,
555
+ ThinkingBlock(
556
+ content=thinking,
557
+ additional_information={
558
+ "signature": thinking_signature
559
+ },
560
+ ),
561
+ )
562
+ if tool_calls:
563
+ for tool_call in tool_calls:
564
+ blocks.append(
565
+ ToolCallBlock(
566
+ tool_kwargs=tool_call.get("input", {}),
567
+ tool_name=tool_call.get("name", ""),
568
+ tool_call_id=tool_call.get("toolUseId"),
569
+ )
570
+ )
571
+
572
+ response_additional_kwargs = self._get_response_token_counts(
573
+ dict(chunk)
574
+ )
575
+ if thinking_delta_value is not None:
576
+ response_additional_kwargs["thinking_delta"] = (
577
+ thinking_delta_value
578
+ )
579
+
460
580
  yield ChatResponse(
461
581
  message=ChatMessage(
462
582
  role=role,
463
- content=content.get("text", ""),
583
+ blocks=blocks,
464
584
  additional_kwargs={
465
- "tool_calls": tool_calls,
466
585
  "tool_call_id": [
467
586
  tc.get("toolUseId", "") for tc in tool_calls
468
587
  ],
@@ -471,7 +590,7 @@ class BedrockConverse(FunctionCallingLLM):
471
590
  ),
472
591
  delta=content_delta.get("text", ""),
473
592
  raw=chunk,
474
- additional_kwargs=self._get_response_token_counts(dict(chunk)),
593
+ additional_kwargs=response_additional_kwargs,
475
594
  )
476
595
  elif content_block_start := chunk.get("contentBlockStart"):
477
596
  # New tool call starting
@@ -482,12 +601,35 @@ class BedrockConverse(FunctionCallingLLM):
482
601
  # Add to our list of tool calls
483
602
  tool_calls.append(current_tool_call)
484
603
 
604
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
605
+ TextBlock(text=content.get("text", ""))
606
+ ]
607
+ if thinking != "":
608
+ blocks.insert(
609
+ 0,
610
+ ThinkingBlock(
611
+ content=thinking,
612
+ additional_information={
613
+ "signature": thinking_signature
614
+ },
615
+ ),
616
+ )
617
+
618
+ if tool_calls:
619
+ for tool_call in tool_calls:
620
+ blocks.append(
621
+ ToolCallBlock(
622
+ tool_kwargs=tool_call.get("input", {}),
623
+ tool_name=tool_call.get("name", ""),
624
+ tool_call_id=tool_call.get("toolUseId"),
625
+ )
626
+ )
627
+
485
628
  yield ChatResponse(
486
629
  message=ChatMessage(
487
630
  role=role,
488
- content=content.get("text", ""),
631
+ blocks=blocks,
489
632
  additional_kwargs={
490
- "tool_calls": tool_calls,
491
633
  "tool_call_id": [
492
634
  tc.get("toolUseId", "") for tc in tool_calls
493
635
  ],
@@ -504,12 +646,34 @@ class BedrockConverse(FunctionCallingLLM):
504
646
  # Handle metadata event - this contains the final token usage
505
647
  if usage := metadata.get("usage"):
506
648
  # Yield a final response with correct token usage
649
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
650
+ TextBlock(text=content.get("text", ""))
651
+ ]
652
+ if thinking != "":
653
+ blocks.insert(
654
+ 0,
655
+ ThinkingBlock(
656
+ content=thinking,
657
+ additional_information={
658
+ "signature": thinking_signature
659
+ },
660
+ ),
661
+ )
662
+ if tool_calls:
663
+ for tool_call in tool_calls:
664
+ blocks.append(
665
+ ToolCallBlock(
666
+ tool_kwargs=tool_call.get("input", {}),
667
+ tool_name=tool_call.get("name", ""),
668
+ tool_call_id=tool_call.get("toolUseId"),
669
+ )
670
+ )
671
+
507
672
  yield ChatResponse(
508
673
  message=ChatMessage(
509
674
  role=role,
510
- content=content.get("text", ""),
675
+ blocks=blocks,
511
676
  additional_kwargs={
512
- "tool_calls": tool_calls,
513
677
  "tool_call_id": [
514
678
  tc.get("toolUseId", "") for tc in tool_calls
515
679
  ],
@@ -517,6 +681,7 @@ class BedrockConverse(FunctionCallingLLM):
517
681
  },
518
682
  ),
519
683
  delta="",
684
+ thinking_delta=None,
520
685
  raw=chunk,
521
686
  additional_kwargs=self._get_response_token_counts(metadata),
522
687
  )
@@ -535,8 +700,12 @@ class BedrockConverse(FunctionCallingLLM):
535
700
  self, messages: Sequence[ChatMessage], **kwargs: Any
536
701
  ) -> ChatResponse:
537
702
  # convert Llama Index messages to AWS Bedrock Converse messages
538
- converse_messages, system_prompt = messages_to_converse_messages(messages)
703
+ converse_messages, system_prompt = messages_to_converse_messages(
704
+ messages, self.model
705
+ )
539
706
  all_kwargs = self._get_all_kwargs(**kwargs)
707
+ if self.thinking is not None:
708
+ all_kwargs["thinking"] = self.thinking
540
709
 
541
710
  # invoke LLM in AWS Bedrock Converse with retry
542
711
  response = await converse_with_retry_async(
@@ -544,6 +713,8 @@ class BedrockConverse(FunctionCallingLLM):
544
713
  config=self._config,
545
714
  messages=converse_messages,
546
715
  system_prompt=system_prompt,
716
+ system_prompt_caching=self.system_prompt_caching,
717
+ tool_caching=self.tool_caching,
547
718
  max_retries=self.max_retries,
548
719
  stream=False,
549
720
  guardrail_identifier=self.guardrail_identifier,
@@ -553,16 +724,13 @@ class BedrockConverse(FunctionCallingLLM):
553
724
  **all_kwargs,
554
725
  )
555
726
 
556
- content, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
557
- response
558
- )
727
+ blocks, tool_call_ids, status = self._get_content_and_tool_calls(response)
559
728
 
560
729
  return ChatResponse(
561
730
  message=ChatMessage(
562
731
  role=MessageRole.ASSISTANT,
563
- content=content,
732
+ blocks=blocks,
564
733
  additional_kwargs={
565
- "tool_calls": tool_calls,
566
734
  "tool_call_id": tool_call_ids,
567
735
  "status": status,
568
736
  },
@@ -583,8 +751,12 @@ class BedrockConverse(FunctionCallingLLM):
583
751
  self, messages: Sequence[ChatMessage], **kwargs: Any
584
752
  ) -> ChatResponseAsyncGen:
585
753
  # convert Llama Index messages to AWS Bedrock Converse messages
586
- converse_messages, system_prompt = messages_to_converse_messages(messages)
754
+ converse_messages, system_prompt = messages_to_converse_messages(
755
+ messages, self.model
756
+ )
587
757
  all_kwargs = self._get_all_kwargs(**kwargs)
758
+ if self.thinking is not None:
759
+ all_kwargs["thinking"] = self.thinking
588
760
 
589
761
  # invoke LLM in AWS Bedrock Converse with retry
590
762
  response_gen = await converse_with_retry_async(
@@ -592,10 +764,13 @@ class BedrockConverse(FunctionCallingLLM):
592
764
  config=self._config,
593
765
  messages=converse_messages,
594
766
  system_prompt=system_prompt,
767
+ system_prompt_caching=self.system_prompt_caching,
768
+ tool_caching=self.tool_caching,
595
769
  max_retries=self.max_retries,
596
770
  stream=True,
597
771
  guardrail_identifier=self.guardrail_identifier,
598
772
  guardrail_version=self.guardrail_version,
773
+ guardrail_stream_processing_mode=self.guardrail_stream_processing_mode,
599
774
  trace=self.trace,
600
775
  boto_client_kwargs=self._boto_client_kwargs,
601
776
  **all_kwargs,
@@ -606,12 +781,25 @@ class BedrockConverse(FunctionCallingLLM):
606
781
  tool_calls = [] # Track tool calls separately
607
782
  current_tool_call = None # Track the current tool call being built
608
783
  role = MessageRole.ASSISTANT
784
+ thinking = ""
785
+ thinking_signature = ""
609
786
 
610
787
  async for chunk in response_gen:
611
788
  if content_block_delta := chunk.get("contentBlockDelta"):
612
789
  content_delta = content_block_delta["delta"]
613
790
  content = join_two_dicts(content, content_delta)
614
791
 
792
+ thinking_delta_value = None
793
+ if "reasoningContent" in content_delta:
794
+ reasoning_text = content_delta.get("reasoningContent", {}).get(
795
+ "text", ""
796
+ )
797
+ thinking += reasoning_text
798
+ thinking_delta_value = reasoning_text
799
+ thinking_signature += content_delta.get(
800
+ "reasoningContent", {}
801
+ ).get("signature", "")
802
+
615
803
  # If this delta contains tool call info, update current tool call
616
804
  if "toolUse" in content_delta:
617
805
  tool_use_delta = content_delta["toolUse"]
@@ -641,13 +829,43 @@ class BedrockConverse(FunctionCallingLLM):
641
829
  current_tool_call = join_two_dicts(
642
830
  current_tool_call, tool_use_delta
643
831
  )
832
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
833
+ TextBlock(text=content.get("text", ""))
834
+ ]
835
+ if thinking != "":
836
+ blocks.insert(
837
+ 0,
838
+ ThinkingBlock(
839
+ content=thinking,
840
+ additional_information={
841
+ "signature": thinking_signature
842
+ },
843
+ ),
844
+ )
845
+
846
+ if tool_calls:
847
+ for tool_call in tool_calls:
848
+ blocks.append(
849
+ ToolCallBlock(
850
+ tool_kwargs=tool_call.get("input", {}),
851
+ tool_name=tool_call.get("name", ""),
852
+ tool_call_id=tool_call.get("toolUseId"),
853
+ )
854
+ )
855
+
856
+ response_additional_kwargs = self._get_response_token_counts(
857
+ dict(chunk)
858
+ )
859
+ if thinking_delta_value is not None:
860
+ response_additional_kwargs["thinking_delta"] = (
861
+ thinking_delta_value
862
+ )
644
863
 
645
864
  yield ChatResponse(
646
865
  message=ChatMessage(
647
866
  role=role,
648
- content=content.get("text", ""),
867
+ blocks=blocks,
649
868
  additional_kwargs={
650
- "tool_calls": tool_calls,
651
869
  "tool_call_id": [
652
870
  tc.get("toolUseId", "") for tc in tool_calls
653
871
  ],
@@ -656,7 +874,7 @@ class BedrockConverse(FunctionCallingLLM):
656
874
  ),
657
875
  delta=content_delta.get("text", ""),
658
876
  raw=chunk,
659
- additional_kwargs=self._get_response_token_counts(dict(chunk)),
877
+ additional_kwargs=response_additional_kwargs,
660
878
  )
661
879
  elif content_block_start := chunk.get("contentBlockStart"):
662
880
  # New tool call starting
@@ -667,12 +885,35 @@ class BedrockConverse(FunctionCallingLLM):
667
885
  # Add to our list of tool calls
668
886
  tool_calls.append(current_tool_call)
669
887
 
888
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
889
+ TextBlock(text=content.get("text", ""))
890
+ ]
891
+ if thinking != "":
892
+ blocks.insert(
893
+ 0,
894
+ ThinkingBlock(
895
+ content=thinking,
896
+ additional_information={
897
+ "signature": thinking_signature
898
+ },
899
+ ),
900
+ )
901
+
902
+ if tool_calls:
903
+ for tool_call in tool_calls:
904
+ blocks.append(
905
+ ToolCallBlock(
906
+ tool_kwargs=tool_call.get("input", {}),
907
+ tool_name=tool_call.get("name", ""),
908
+ tool_call_id=tool_call.get("toolUseId"),
909
+ )
910
+ )
911
+
670
912
  yield ChatResponse(
671
913
  message=ChatMessage(
672
914
  role=role,
673
- content=content.get("text", ""),
915
+ blocks=blocks,
674
916
  additional_kwargs={
675
- "tool_calls": tool_calls,
676
917
  "tool_call_id": [
677
918
  tc.get("toolUseId", "") for tc in tool_calls
678
919
  ],
@@ -689,12 +930,35 @@ class BedrockConverse(FunctionCallingLLM):
689
930
  # Handle metadata event - this contains the final token usage
690
931
  if usage := metadata.get("usage"):
691
932
  # Yield a final response with correct token usage
933
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
934
+ TextBlock(text=content.get("text", ""))
935
+ ]
936
+ if thinking != "":
937
+ blocks.insert(
938
+ 0,
939
+ ThinkingBlock(
940
+ content=thinking,
941
+ additional_information={
942
+ "signature": thinking_signature
943
+ },
944
+ ),
945
+ )
946
+
947
+ if tool_calls:
948
+ for tool_call in tool_calls:
949
+ blocks.append(
950
+ ToolCallBlock(
951
+ tool_kwargs=tool_call.get("input", {}),
952
+ tool_name=tool_call.get("name", ""),
953
+ tool_call_id=tool_call.get("toolUseId"),
954
+ )
955
+ )
956
+
692
957
  yield ChatResponse(
693
958
  message=ChatMessage(
694
959
  role=role,
695
- content=content.get("text", ""),
960
+ blocks=blocks,
696
961
  additional_kwargs={
697
- "tool_calls": tool_calls,
698
962
  "tool_call_id": [
699
963
  tc.get("toolUseId", "") for tc in tool_calls
700
964
  ],
@@ -702,6 +966,7 @@ class BedrockConverse(FunctionCallingLLM):
702
966
  },
703
967
  ),
704
968
  delta="",
969
+ thinking_delta=None,
705
970
  raw=chunk,
706
971
  additional_kwargs=self._get_response_token_counts(metadata),
707
972
  )
@@ -723,6 +988,7 @@ class BedrockConverse(FunctionCallingLLM):
723
988
  verbose: bool = False,
724
989
  allow_parallel_tool_calls: bool = False,
725
990
  tool_required: bool = False,
991
+ tool_caching: bool = False,
726
992
  tool_choice: Optional[dict] = None,
727
993
  **kwargs: Any,
728
994
  ) -> Dict[str, Any]:
@@ -737,7 +1003,11 @@ class BedrockConverse(FunctionCallingLLM):
737
1003
 
738
1004
  # convert Llama Index tools to AWS Bedrock Converse tools
739
1005
  tool_config = tools_to_converse_tools(
740
- tools, tool_choice=tool_choice, tool_required=tool_required
1006
+ tools,
1007
+ tool_choice=tool_choice,
1008
+ tool_required=tool_required,
1009
+ tool_caching=tool_caching,
1010
+ supports_forced_tool_calls=self.supports_forced_tool_calls,
741
1011
  )
742
1012
 
743
1013
  return {
@@ -766,7 +1036,11 @@ class BedrockConverse(FunctionCallingLLM):
766
1036
  **kwargs: Any,
767
1037
  ) -> List[ToolSelection]:
768
1038
  """Predict and call the tool."""
769
- tool_calls = response.message.additional_kwargs.get("tool_calls", [])
1039
+ tool_calls = [
1040
+ block
1041
+ for block in response.message.blocks
1042
+ if isinstance(block, ToolCallBlock)
1043
+ ]
770
1044
 
771
1045
  if len(tool_calls) < 1:
772
1046
  if error_on_no_tool_call:
@@ -778,26 +1052,23 @@ class BedrockConverse(FunctionCallingLLM):
778
1052
 
779
1053
  tool_selections = []
780
1054
  for tool_call in tool_calls:
781
- if "toolUseId" not in tool_call or "name" not in tool_call:
782
- raise ValueError("Invalid tool call.")
783
-
784
1055
  # handle empty inputs
785
1056
  argument_dict = {}
786
- if "input" in tool_call and isinstance(tool_call["input"], str):
1057
+ if isinstance(tool_call.tool_kwargs, str):
787
1058
  # TODO parse_partial_json is not perfect
788
1059
  try:
789
- argument_dict = parse_partial_json(tool_call["input"])
1060
+ argument_dict = parse_partial_json(tool_call.tool_kwargs)
790
1061
  except ValueError:
791
1062
  argument_dict = {}
792
- elif "input" in tool_call and isinstance(tool_call["input"], dict):
793
- argument_dict = tool_call["input"]
1063
+ elif isinstance(tool_call.tool_kwargs, dict):
1064
+ argument_dict = tool_call.tool_kwargs
794
1065
  else:
795
1066
  continue
796
1067
 
797
1068
  tool_selections.append(
798
1069
  ToolSelection(
799
- tool_id=tool_call["toolUseId"],
800
- tool_name=tool_call["name"],
1070
+ tool_id=tool_call.tool_call_id or "",
1071
+ tool_name=tool_call.tool_name,
801
1072
  tool_kwargs=argument_dict,
802
1073
  )
803
1074
  )
@@ -816,8 +1087,11 @@ class BedrockConverse(FunctionCallingLLM):
816
1087
  return {}
817
1088
 
818
1089
  # Convert Bedrock's token count format to match OpenAI's format
1090
+ # Cache token formats respecting Anthropic format
819
1091
  return {
820
1092
  "prompt_tokens": usage.get("inputTokens", 0),
821
1093
  "completion_tokens": usage.get("outputTokens", 0),
822
1094
  "total_tokens": usage.get("totalTokens", 0),
1095
+ "cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
1096
+ "cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
823
1097
  }