llama-index-llms-bedrock-converse 0.5.4__py3-none-any.whl → 0.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,10 @@
1
+ import warnings
1
2
  from typing import (
2
3
  Any,
3
4
  Callable,
4
5
  Dict,
5
6
  List,
7
+ Literal,
6
8
  Optional,
7
9
  Sequence,
8
10
  Tuple,
@@ -20,6 +22,9 @@ from llama_index.core.base.llms.types import (
20
22
  CompletionResponseGen,
21
23
  LLMMetadata,
22
24
  MessageRole,
25
+ TextBlock,
26
+ ThinkingBlock,
27
+ ToolCallBlock,
23
28
  )
24
29
  from llama_index.core.bridge.pydantic import Field, PrivateAttr
25
30
  from llama_index.core.callbacks import CallbackManager
@@ -46,6 +51,8 @@ from llama_index.llms.bedrock_converse.utils import (
46
51
  join_two_dicts,
47
52
  messages_to_converse_messages,
48
53
  tools_to_converse_tools,
54
+ is_reasoning,
55
+ ThinkingDict,
49
56
  )
50
57
 
51
58
  if TYPE_CHECKING:
@@ -138,18 +145,41 @@ class BedrockConverse(FunctionCallingLLM):
138
145
  default=60.0,
139
146
  description="The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts.",
140
147
  )
148
+ system_prompt_caching: bool = Field(
149
+ default=False,
150
+ description="Whether to cache the system prompt. If you are using a system prompt, you should set this to True.",
151
+ )
152
+ tool_caching: bool = Field(
153
+ default=False,
154
+ description="Whether to cache the tools. If you are using tools, you should set this to True.",
155
+ )
141
156
  guardrail_identifier: Optional[str] = Field(
142
157
  description="The unique identifier of the guardrail that you want to use. If you don't provide a value, no guardrail is applied to the invocation."
143
158
  )
144
159
  guardrail_version: Optional[str] = Field(
145
160
  description="The version number for the guardrail. The value can also be DRAFT"
146
161
  )
162
+ guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = Field(
163
+ description=(
164
+ "The stream processing mode to use when leveraging a guardrail in a streaming request (ConverseStream). "
165
+ "If set, the specified mode will be included in the request's guardrail configuration object, altering the streaming response behavior. "
166
+ "If a value is not provided, no mode will be explicitly included in the request's guardrail configuration object, and thus Amazon Bedrock's default, Synchronous Mode, will be used."
167
+ )
168
+ )
147
169
  application_inference_profile_arn: Optional[str] = Field(
148
170
  description="The ARN of an application inference profile to invoke in place of the model. If provided, make sure the model argument refers to the same one underlying the application inference profile."
149
171
  )
150
172
  trace: Optional[str] = Field(
151
173
  description="Specifies whether to enable or disable the Bedrock trace. If enabled, you can see the full Bedrock trace."
152
174
  )
175
+ thinking: Optional[ThinkingDict] = Field(
176
+ description="Specifies the thinking configuration of a reasoning model. Only applicable to Anthropic and DeepSeek models",
177
+ default=None,
178
+ )
179
+ supports_forced_tool_calls: bool = Field(
180
+ default=True,
181
+ description="Whether the model supports forced tool calls. If True, the model can be forced to call at least 1 or more tools.",
182
+ )
153
183
  additional_kwargs: Dict[str, Any] = Field(
154
184
  default_factory=dict,
155
185
  description="Additional kwargs for the bedrock invokeModel request.",
@@ -182,14 +212,19 @@ class BedrockConverse(FunctionCallingLLM):
182
212
  additional_kwargs: Optional[Dict[str, Any]] = None,
183
213
  callback_manager: Optional[CallbackManager] = None,
184
214
  system_prompt: Optional[str] = None,
215
+ system_prompt_caching: Optional[bool] = False,
216
+ tool_caching: Optional[bool] = False,
185
217
  messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None,
186
218
  completion_to_prompt: Optional[Callable[[str], str]] = None,
187
219
  pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,
188
220
  output_parser: Optional[BaseOutputParser] = None,
189
221
  guardrail_identifier: Optional[str] = None,
190
222
  guardrail_version: Optional[str] = None,
223
+ guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = None,
191
224
  application_inference_profile_arn: Optional[str] = None,
192
225
  trace: Optional[str] = None,
226
+ thinking: Optional[ThinkingDict] = None,
227
+ supports_forced_tool_calls: bool = True,
193
228
  ) -> None:
194
229
  additional_kwargs = additional_kwargs or {}
195
230
  callback_manager = callback_manager or CallbackManager([])
@@ -203,6 +238,13 @@ class BedrockConverse(FunctionCallingLLM):
203
238
  "botocore_session": botocore_session,
204
239
  }
205
240
 
241
+ if not is_reasoning(model) and thinking is not None:
242
+ thinking = None
243
+ warnings.warn(
244
+ "You set thinking parameters for a non-reasoning models, they will be ignored",
245
+ UserWarning,
246
+ )
247
+
206
248
  super().__init__(
207
249
  temperature=temperature,
208
250
  max_tokens=max_tokens,
@@ -212,6 +254,8 @@ class BedrockConverse(FunctionCallingLLM):
212
254
  model=model,
213
255
  callback_manager=callback_manager,
214
256
  system_prompt=system_prompt,
257
+ system_prompt_caching=system_prompt_caching,
258
+ tool_caching=tool_caching,
215
259
  messages_to_prompt=messages_to_prompt,
216
260
  completion_to_prompt=completion_to_prompt,
217
261
  pydantic_program_mode=pydantic_program_mode,
@@ -229,8 +273,11 @@ class BedrockConverse(FunctionCallingLLM):
229
273
  botocore_config=botocore_config,
230
274
  guardrail_identifier=guardrail_identifier,
231
275
  guardrail_version=guardrail_version,
276
+ guardrail_stream_processing_mode=guardrail_stream_processing_mode,
232
277
  application_inference_profile_arn=application_inference_profile_arn,
233
278
  trace=trace,
279
+ thinking=thinking,
280
+ supports_forced_tool_calls=supports_forced_tool_calls,
234
281
  )
235
282
 
236
283
  self._config = None
@@ -252,6 +299,7 @@ class BedrockConverse(FunctionCallingLLM):
252
299
  retries={"max_attempts": max_retries, "mode": "standard"},
253
300
  connect_timeout=timeout,
254
301
  read_timeout=timeout,
302
+ user_agent_extra="x-client-framework:llama_index",
255
303
  )
256
304
  if botocore_config is None
257
305
  else botocore_config
@@ -317,30 +365,49 @@ class BedrockConverse(FunctionCallingLLM):
317
365
 
318
366
  def _get_content_and_tool_calls(
319
367
  self, response: Optional[Dict[str, Any]] = None, content: Dict[str, Any] = None
320
- ) -> Tuple[str, Dict[str, Any], List[str], List[str]]:
321
- assert (
322
- response is not None or content is not None
323
- ), f"Either response or content must be provided. Got response: {response}, content: {content}"
324
- assert (
325
- response is None or content is None
326
- ), f"Only one of response or content should be provided. Got response: {response}, content: {content}"
327
- tool_calls = []
368
+ ) -> Tuple[
369
+ List[Union[TextBlock, ThinkingBlock, ToolCallBlock]], List[str], List[str]
370
+ ]:
371
+ assert response is not None or content is not None, (
372
+ f"Either response or content must be provided. Got response: {response}, content: {content}"
373
+ )
374
+ assert response is None or content is None, (
375
+ f"Only one of response or content should be provided. Got response: {response}, content: {content}"
376
+ )
328
377
  tool_call_ids = []
329
378
  status = []
330
- text_content = ""
379
+ blocks: List[TextBlock | ThinkingBlock | ToolCallBlock] = []
331
380
  if content is not None:
332
381
  content_list = [content]
333
382
  else:
334
383
  content_list = response["output"]["message"]["content"]
384
+
335
385
  for content_block in content_list:
336
386
  if text := content_block.get("text", None):
337
- text_content += text
387
+ blocks.append(TextBlock(text=text))
388
+ if thinking := content_block.get("reasoningContent", None):
389
+ blocks.append(
390
+ ThinkingBlock(
391
+ content=thinking.get("reasoningText", {}).get("text", None),
392
+ additional_information={
393
+ "signature": thinking.get("reasoningText", {}).get(
394
+ "signature", None
395
+ )
396
+ },
397
+ )
398
+ )
338
399
  if tool_usage := content_block.get("toolUse", None):
339
400
  if "toolUseId" not in tool_usage:
340
401
  tool_usage["toolUseId"] = content_block["toolUseId"]
341
402
  if "name" not in tool_usage:
342
403
  tool_usage["name"] = content_block["name"]
343
- tool_calls.append(tool_usage)
404
+ blocks.append(
405
+ ToolCallBlock(
406
+ tool_name=tool_usage.get("name", ""),
407
+ tool_call_id=tool_usage.get("toolUseId"),
408
+ tool_kwargs=tool_usage.get("input", {}),
409
+ )
410
+ )
344
411
  if tool_result := content_block.get("toolResult", None):
345
412
  for tool_result_content in tool_result["content"]:
346
413
  if text := tool_result_content.get("text", None):
@@ -348,19 +415,25 @@ class BedrockConverse(FunctionCallingLLM):
348
415
  tool_call_ids.append(tool_result_content.get("toolUseId", ""))
349
416
  status.append(tool_result.get("status", ""))
350
417
 
351
- return text_content, tool_calls, tool_call_ids, status
418
+ return blocks, tool_call_ids, status
352
419
 
353
420
  @llm_chat_callback()
354
421
  def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
355
422
  # convert Llama Index messages to AWS Bedrock Converse messages
356
- converse_messages, system_prompt = messages_to_converse_messages(messages)
423
+ converse_messages, system_prompt = messages_to_converse_messages(
424
+ messages, self.model
425
+ )
357
426
  all_kwargs = self._get_all_kwargs(**kwargs)
427
+ if self.thinking is not None:
428
+ all_kwargs["thinking"] = self.thinking
358
429
 
359
430
  # invoke LLM in AWS Bedrock Converse with retry
360
431
  response = converse_with_retry(
361
432
  client=self._client,
362
433
  messages=converse_messages,
363
434
  system_prompt=system_prompt,
435
+ system_prompt_caching=self.system_prompt_caching,
436
+ tool_caching=self.tool_caching,
364
437
  max_retries=self.max_retries,
365
438
  stream=False,
366
439
  guardrail_identifier=self.guardrail_identifier,
@@ -369,16 +442,13 @@ class BedrockConverse(FunctionCallingLLM):
369
442
  **all_kwargs,
370
443
  )
371
444
 
372
- content, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
373
- response
374
- )
445
+ blocks, tool_call_ids, status = self._get_content_and_tool_calls(response)
375
446
 
376
447
  return ChatResponse(
377
448
  message=ChatMessage(
378
449
  role=MessageRole.ASSISTANT,
379
- content=content,
450
+ blocks=blocks,
380
451
  additional_kwargs={
381
- "tool_calls": tool_calls,
382
452
  "tool_call_id": tool_call_ids,
383
453
  "status": status,
384
454
  },
@@ -399,18 +469,25 @@ class BedrockConverse(FunctionCallingLLM):
399
469
  self, messages: Sequence[ChatMessage], **kwargs: Any
400
470
  ) -> ChatResponseGen:
401
471
  # convert Llama Index messages to AWS Bedrock Converse messages
402
- converse_messages, system_prompt = messages_to_converse_messages(messages)
472
+ converse_messages, system_prompt = messages_to_converse_messages(
473
+ messages, self.model
474
+ )
403
475
  all_kwargs = self._get_all_kwargs(**kwargs)
476
+ if self.thinking is not None:
477
+ all_kwargs["thinking"] = self.thinking
404
478
 
405
479
  # invoke LLM in AWS Bedrock Converse with retry
406
480
  response = converse_with_retry(
407
481
  client=self._client,
408
482
  messages=converse_messages,
409
483
  system_prompt=system_prompt,
484
+ system_prompt_caching=self.system_prompt_caching,
485
+ tool_caching=self.tool_caching,
410
486
  max_retries=self.max_retries,
411
487
  stream=True,
412
488
  guardrail_identifier=self.guardrail_identifier,
413
489
  guardrail_version=self.guardrail_version,
490
+ guardrail_stream_processing_mode=self.guardrail_stream_processing_mode,
414
491
  trace=self.trace,
415
492
  **all_kwargs,
416
493
  )
@@ -420,11 +497,25 @@ class BedrockConverse(FunctionCallingLLM):
420
497
  tool_calls = [] # Track tool calls separately
421
498
  current_tool_call = None # Track the current tool call being built
422
499
  role = MessageRole.ASSISTANT
500
+ thinking = ""
501
+ thinking_signature = ""
502
+
423
503
  for chunk in response["stream"]:
424
504
  if content_block_delta := chunk.get("contentBlockDelta"):
425
505
  content_delta = content_block_delta["delta"]
426
506
  content = join_two_dicts(content, content_delta)
427
507
 
508
+ thinking_delta_value = None
509
+ if "reasoningContent" in content_delta:
510
+ reasoning_text = content_delta.get("reasoningContent", {}).get(
511
+ "text", ""
512
+ )
513
+ thinking += reasoning_text
514
+ thinking_delta_value = reasoning_text
515
+ thinking_signature += content_delta.get(
516
+ "reasoningContent", {}
517
+ ).get("signature", "")
518
+
428
519
  # If this delta contains tool call info, update current tool call
429
520
  if "toolUse" in content_delta:
430
521
  tool_use_delta = content_delta["toolUse"]
@@ -433,31 +524,73 @@ class BedrockConverse(FunctionCallingLLM):
433
524
  # Handle the input field specially - concatenate partial JSON strings
434
525
  if "input" in tool_use_delta:
435
526
  if "input" in current_tool_call:
436
- current_tool_call["input"] += tool_use_delta["input"]
527
+ current_tool_call["input"] += tool_use_delta[
528
+ "input"
529
+ ]
437
530
  else:
438
531
  current_tool_call["input"] = tool_use_delta["input"]
439
532
 
440
533
  # Remove input from the delta to prevent it from being processed again
441
- tool_use_without_input = {k: v for k, v in tool_use_delta.items() if k != "input"}
534
+ tool_use_without_input = {
535
+ k: v
536
+ for k, v in tool_use_delta.items()
537
+ if k != "input"
538
+ }
442
539
  if tool_use_without_input:
443
- current_tool_call = join_two_dicts(current_tool_call, tool_use_without_input)
540
+ current_tool_call = join_two_dicts(
541
+ current_tool_call, tool_use_without_input
542
+ )
444
543
  else:
445
544
  # For other fields, use the normal joining
446
- current_tool_call = join_two_dicts(current_tool_call, tool_use_delta)
545
+ current_tool_call = join_two_dicts(
546
+ current_tool_call, tool_use_delta
547
+ )
548
+
549
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
550
+ TextBlock(text=content.get("text", ""))
551
+ ]
552
+ if thinking != "":
553
+ blocks.insert(
554
+ 0,
555
+ ThinkingBlock(
556
+ content=thinking,
557
+ additional_information={
558
+ "signature": thinking_signature
559
+ },
560
+ ),
561
+ )
562
+ if tool_calls:
563
+ for tool_call in tool_calls:
564
+ blocks.append(
565
+ ToolCallBlock(
566
+ tool_kwargs=tool_call.get("input", {}),
567
+ tool_name=tool_call.get("name", ""),
568
+ tool_call_id=tool_call.get("toolUseId"),
569
+ )
570
+ )
571
+
572
+ response_additional_kwargs = self._get_response_token_counts(
573
+ dict(chunk)
574
+ )
575
+ if thinking_delta_value is not None:
576
+ response_additional_kwargs["thinking_delta"] = (
577
+ thinking_delta_value
578
+ )
447
579
 
448
580
  yield ChatResponse(
449
581
  message=ChatMessage(
450
582
  role=role,
451
- content=content.get("text", ""),
583
+ blocks=blocks,
452
584
  additional_kwargs={
453
- "tool_calls": tool_calls,
454
- "tool_call_id": [tc.get("toolUseId", "") for tc in tool_calls],
585
+ "tool_call_id": [
586
+ tc.get("toolUseId", "") for tc in tool_calls
587
+ ],
455
588
  "status": [], # Will be populated when tool results come in
456
589
  },
457
590
  ),
458
591
  delta=content_delta.get("text", ""),
459
592
  raw=chunk,
460
- additional_kwargs=self._get_response_token_counts(dict(chunk)),
593
+ additional_kwargs=response_additional_kwargs,
461
594
  )
462
595
  elif content_block_start := chunk.get("contentBlockStart"):
463
596
  # New tool call starting
@@ -468,18 +601,90 @@ class BedrockConverse(FunctionCallingLLM):
468
601
  # Add to our list of tool calls
469
602
  tool_calls.append(current_tool_call)
470
603
 
604
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
605
+ TextBlock(text=content.get("text", ""))
606
+ ]
607
+ if thinking != "":
608
+ blocks.insert(
609
+ 0,
610
+ ThinkingBlock(
611
+ content=thinking,
612
+ additional_information={
613
+ "signature": thinking_signature
614
+ },
615
+ ),
616
+ )
617
+
618
+ if tool_calls:
619
+ for tool_call in tool_calls:
620
+ blocks.append(
621
+ ToolCallBlock(
622
+ tool_kwargs=tool_call.get("input", {}),
623
+ tool_name=tool_call.get("name", ""),
624
+ tool_call_id=tool_call.get("toolUseId"),
625
+ )
626
+ )
627
+
471
628
  yield ChatResponse(
472
629
  message=ChatMessage(
473
630
  role=role,
474
- content=content.get("text", ""),
631
+ blocks=blocks,
475
632
  additional_kwargs={
476
- "tool_calls": tool_calls,
477
- "tool_call_id": [tc.get("toolUseId", "") for tc in tool_calls],
633
+ "tool_call_id": [
634
+ tc.get("toolUseId", "") for tc in tool_calls
635
+ ],
478
636
  "status": [], # Will be populated when tool results come in
479
637
  },
480
638
  ),
481
639
  raw=chunk,
482
640
  )
641
+ elif message_stop := chunk.get("messageStop"):
642
+ # Handle messageStop event - this contains the stop reason
643
+ # We don't yield here, just track the event
644
+ pass
645
+ elif metadata := chunk.get("metadata"):
646
+ # Handle metadata event - this contains the final token usage
647
+ if usage := metadata.get("usage"):
648
+ # Yield a final response with correct token usage
649
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
650
+ TextBlock(text=content.get("text", ""))
651
+ ]
652
+ if thinking != "":
653
+ blocks.insert(
654
+ 0,
655
+ ThinkingBlock(
656
+ content=thinking,
657
+ additional_information={
658
+ "signature": thinking_signature
659
+ },
660
+ ),
661
+ )
662
+ if tool_calls:
663
+ for tool_call in tool_calls:
664
+ blocks.append(
665
+ ToolCallBlock(
666
+ tool_kwargs=tool_call.get("input", {}),
667
+ tool_name=tool_call.get("name", ""),
668
+ tool_call_id=tool_call.get("toolUseId"),
669
+ )
670
+ )
671
+
672
+ yield ChatResponse(
673
+ message=ChatMessage(
674
+ role=role,
675
+ blocks=blocks,
676
+ additional_kwargs={
677
+ "tool_call_id": [
678
+ tc.get("toolUseId", "") for tc in tool_calls
679
+ ],
680
+ "status": [],
681
+ },
682
+ ),
683
+ delta="",
684
+ thinking_delta=None,
685
+ raw=chunk,
686
+ additional_kwargs=self._get_response_token_counts(metadata),
687
+ )
483
688
 
484
689
  return gen()
485
690
 
@@ -495,8 +700,12 @@ class BedrockConverse(FunctionCallingLLM):
495
700
  self, messages: Sequence[ChatMessage], **kwargs: Any
496
701
  ) -> ChatResponse:
497
702
  # convert Llama Index messages to AWS Bedrock Converse messages
498
- converse_messages, system_prompt = messages_to_converse_messages(messages)
703
+ converse_messages, system_prompt = messages_to_converse_messages(
704
+ messages, self.model
705
+ )
499
706
  all_kwargs = self._get_all_kwargs(**kwargs)
707
+ if self.thinking is not None:
708
+ all_kwargs["thinking"] = self.thinking
500
709
 
501
710
  # invoke LLM in AWS Bedrock Converse with retry
502
711
  response = await converse_with_retry_async(
@@ -504,6 +713,8 @@ class BedrockConverse(FunctionCallingLLM):
504
713
  config=self._config,
505
714
  messages=converse_messages,
506
715
  system_prompt=system_prompt,
716
+ system_prompt_caching=self.system_prompt_caching,
717
+ tool_caching=self.tool_caching,
507
718
  max_retries=self.max_retries,
508
719
  stream=False,
509
720
  guardrail_identifier=self.guardrail_identifier,
@@ -513,16 +724,13 @@ class BedrockConverse(FunctionCallingLLM):
513
724
  **all_kwargs,
514
725
  )
515
726
 
516
- content, tool_calls, tool_call_ids, status = self._get_content_and_tool_calls(
517
- response
518
- )
727
+ blocks, tool_call_ids, status = self._get_content_and_tool_calls(response)
519
728
 
520
729
  return ChatResponse(
521
730
  message=ChatMessage(
522
731
  role=MessageRole.ASSISTANT,
523
- content=content,
732
+ blocks=blocks,
524
733
  additional_kwargs={
525
- "tool_calls": tool_calls,
526
734
  "tool_call_id": tool_call_ids,
527
735
  "status": status,
528
736
  },
@@ -543,8 +751,12 @@ class BedrockConverse(FunctionCallingLLM):
543
751
  self, messages: Sequence[ChatMessage], **kwargs: Any
544
752
  ) -> ChatResponseAsyncGen:
545
753
  # convert Llama Index messages to AWS Bedrock Converse messages
546
- converse_messages, system_prompt = messages_to_converse_messages(messages)
754
+ converse_messages, system_prompt = messages_to_converse_messages(
755
+ messages, self.model
756
+ )
547
757
  all_kwargs = self._get_all_kwargs(**kwargs)
758
+ if self.thinking is not None:
759
+ all_kwargs["thinking"] = self.thinking
548
760
 
549
761
  # invoke LLM in AWS Bedrock Converse with retry
550
762
  response_gen = await converse_with_retry_async(
@@ -552,10 +764,13 @@ class BedrockConverse(FunctionCallingLLM):
552
764
  config=self._config,
553
765
  messages=converse_messages,
554
766
  system_prompt=system_prompt,
767
+ system_prompt_caching=self.system_prompt_caching,
768
+ tool_caching=self.tool_caching,
555
769
  max_retries=self.max_retries,
556
770
  stream=True,
557
771
  guardrail_identifier=self.guardrail_identifier,
558
772
  guardrail_version=self.guardrail_version,
773
+ guardrail_stream_processing_mode=self.guardrail_stream_processing_mode,
559
774
  trace=self.trace,
560
775
  boto_client_kwargs=self._boto_client_kwargs,
561
776
  **all_kwargs,
@@ -566,11 +781,25 @@ class BedrockConverse(FunctionCallingLLM):
566
781
  tool_calls = [] # Track tool calls separately
567
782
  current_tool_call = None # Track the current tool call being built
568
783
  role = MessageRole.ASSISTANT
784
+ thinking = ""
785
+ thinking_signature = ""
786
+
569
787
  async for chunk in response_gen:
570
788
  if content_block_delta := chunk.get("contentBlockDelta"):
571
789
  content_delta = content_block_delta["delta"]
572
790
  content = join_two_dicts(content, content_delta)
573
791
 
792
+ thinking_delta_value = None
793
+ if "reasoningContent" in content_delta:
794
+ reasoning_text = content_delta.get("reasoningContent", {}).get(
795
+ "text", ""
796
+ )
797
+ thinking += reasoning_text
798
+ thinking_delta_value = reasoning_text
799
+ thinking_signature += content_delta.get(
800
+ "reasoningContent", {}
801
+ ).get("signature", "")
802
+
574
803
  # If this delta contains tool call info, update current tool call
575
804
  if "toolUse" in content_delta:
576
805
  tool_use_delta = content_delta["toolUse"]
@@ -579,31 +808,73 @@ class BedrockConverse(FunctionCallingLLM):
579
808
  # Handle the input field specially - concatenate partial JSON strings
580
809
  if "input" in tool_use_delta:
581
810
  if "input" in current_tool_call:
582
- current_tool_call["input"] += tool_use_delta["input"]
811
+ current_tool_call["input"] += tool_use_delta[
812
+ "input"
813
+ ]
583
814
  else:
584
815
  current_tool_call["input"] = tool_use_delta["input"]
585
816
 
586
817
  # Remove input from the delta to prevent it from being processed again
587
- tool_use_without_input = {k: v for k, v in tool_use_delta.items() if k != "input"}
818
+ tool_use_without_input = {
819
+ k: v
820
+ for k, v in tool_use_delta.items()
821
+ if k != "input"
822
+ }
588
823
  if tool_use_without_input:
589
- current_tool_call = join_two_dicts(current_tool_call, tool_use_without_input)
824
+ current_tool_call = join_two_dicts(
825
+ current_tool_call, tool_use_without_input
826
+ )
590
827
  else:
591
828
  # For other fields, use the normal joining
592
- current_tool_call = join_two_dicts(current_tool_call, tool_use_delta)
829
+ current_tool_call = join_two_dicts(
830
+ current_tool_call, tool_use_delta
831
+ )
832
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
833
+ TextBlock(text=content.get("text", ""))
834
+ ]
835
+ if thinking != "":
836
+ blocks.insert(
837
+ 0,
838
+ ThinkingBlock(
839
+ content=thinking,
840
+ additional_information={
841
+ "signature": thinking_signature
842
+ },
843
+ ),
844
+ )
845
+
846
+ if tool_calls:
847
+ for tool_call in tool_calls:
848
+ blocks.append(
849
+ ToolCallBlock(
850
+ tool_kwargs=tool_call.get("input", {}),
851
+ tool_name=tool_call.get("name", ""),
852
+ tool_call_id=tool_call.get("toolUseId"),
853
+ )
854
+ )
855
+
856
+ response_additional_kwargs = self._get_response_token_counts(
857
+ dict(chunk)
858
+ )
859
+ if thinking_delta_value is not None:
860
+ response_additional_kwargs["thinking_delta"] = (
861
+ thinking_delta_value
862
+ )
593
863
 
594
864
  yield ChatResponse(
595
865
  message=ChatMessage(
596
866
  role=role,
597
- content=content.get("text", ""),
867
+ blocks=blocks,
598
868
  additional_kwargs={
599
- "tool_calls": tool_calls,
600
- "tool_call_id": [tc.get("toolUseId", "") for tc in tool_calls],
869
+ "tool_call_id": [
870
+ tc.get("toolUseId", "") for tc in tool_calls
871
+ ],
601
872
  "status": [], # Will be populated when tool results come in
602
873
  },
603
874
  ),
604
875
  delta=content_delta.get("text", ""),
605
876
  raw=chunk,
606
- additional_kwargs=self._get_response_token_counts(dict(chunk)),
877
+ additional_kwargs=response_additional_kwargs,
607
878
  )
608
879
  elif content_block_start := chunk.get("contentBlockStart"):
609
880
  # New tool call starting
@@ -614,18 +885,91 @@ class BedrockConverse(FunctionCallingLLM):
614
885
  # Add to our list of tool calls
615
886
  tool_calls.append(current_tool_call)
616
887
 
888
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
889
+ TextBlock(text=content.get("text", ""))
890
+ ]
891
+ if thinking != "":
892
+ blocks.insert(
893
+ 0,
894
+ ThinkingBlock(
895
+ content=thinking,
896
+ additional_information={
897
+ "signature": thinking_signature
898
+ },
899
+ ),
900
+ )
901
+
902
+ if tool_calls:
903
+ for tool_call in tool_calls:
904
+ blocks.append(
905
+ ToolCallBlock(
906
+ tool_kwargs=tool_call.get("input", {}),
907
+ tool_name=tool_call.get("name", ""),
908
+ tool_call_id=tool_call.get("toolUseId"),
909
+ )
910
+ )
911
+
617
912
  yield ChatResponse(
618
913
  message=ChatMessage(
619
914
  role=role,
620
- content=content.get("text", ""),
915
+ blocks=blocks,
621
916
  additional_kwargs={
622
- "tool_calls": tool_calls,
623
- "tool_call_id": [tc.get("toolUseId", "") for tc in tool_calls],
917
+ "tool_call_id": [
918
+ tc.get("toolUseId", "") for tc in tool_calls
919
+ ],
624
920
  "status": [], # Will be populated when tool results come in
625
921
  },
626
922
  ),
627
923
  raw=chunk,
628
924
  )
925
+ elif chunk.get("messageStop"):
926
+ # Handle messageStop event - this contains the stop reason
927
+ # We don't yield here, just track the event
928
+ pass
929
+ elif metadata := chunk.get("metadata"):
930
+ # Handle metadata event - this contains the final token usage
931
+ if usage := metadata.get("usage"):
932
+ # Yield a final response with correct token usage
933
+ blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
934
+ TextBlock(text=content.get("text", ""))
935
+ ]
936
+ if thinking != "":
937
+ blocks.insert(
938
+ 0,
939
+ ThinkingBlock(
940
+ content=thinking,
941
+ additional_information={
942
+ "signature": thinking_signature
943
+ },
944
+ ),
945
+ )
946
+
947
+ if tool_calls:
948
+ for tool_call in tool_calls:
949
+ blocks.append(
950
+ ToolCallBlock(
951
+ tool_kwargs=tool_call.get("input", {}),
952
+ tool_name=tool_call.get("name", ""),
953
+ tool_call_id=tool_call.get("toolUseId"),
954
+ )
955
+ )
956
+
957
+ yield ChatResponse(
958
+ message=ChatMessage(
959
+ role=role,
960
+ blocks=blocks,
961
+ additional_kwargs={
962
+ "tool_call_id": [
963
+ tc.get("toolUseId", "") for tc in tool_calls
964
+ ],
965
+ "status": [],
966
+ },
967
+ ),
968
+ delta="",
969
+ thinking_delta=None,
970
+ raw=chunk,
971
+ additional_kwargs=self._get_response_token_counts(metadata),
972
+ )
629
973
 
630
974
  return gen()
631
975
 
@@ -643,6 +987,8 @@ class BedrockConverse(FunctionCallingLLM):
643
987
  chat_history: Optional[List[ChatMessage]] = None,
644
988
  verbose: bool = False,
645
989
  allow_parallel_tool_calls: bool = False,
990
+ tool_required: bool = False,
991
+ tool_caching: bool = False,
646
992
  tool_choice: Optional[dict] = None,
647
993
  **kwargs: Any,
648
994
  ) -> Dict[str, Any]:
@@ -656,11 +1002,13 @@ class BedrockConverse(FunctionCallingLLM):
656
1002
  chat_history.append(user_msg)
657
1003
 
658
1004
  # convert Llama Index tools to AWS Bedrock Converse tools
659
- tool_config = tools_to_converse_tools(tools)
660
- if tool_choice:
661
- # https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
662
- # e.g. { "auto": {} }
663
- tool_config["toolChoice"] = tool_choice
1005
+ tool_config = tools_to_converse_tools(
1006
+ tools,
1007
+ tool_choice=tool_choice,
1008
+ tool_required=tool_required,
1009
+ tool_caching=tool_caching,
1010
+ supports_forced_tool_calls=self.supports_forced_tool_calls,
1011
+ )
664
1012
 
665
1013
  return {
666
1014
  "messages": chat_history,
@@ -688,7 +1036,11 @@ class BedrockConverse(FunctionCallingLLM):
688
1036
  **kwargs: Any,
689
1037
  ) -> List[ToolSelection]:
690
1038
  """Predict and call the tool."""
691
- tool_calls = response.message.additional_kwargs.get("tool_calls", [])
1039
+ tool_calls = [
1040
+ block
1041
+ for block in response.message.blocks
1042
+ if isinstance(block, ToolCallBlock)
1043
+ ]
692
1044
 
693
1045
  if len(tool_calls) < 1:
694
1046
  if error_on_no_tool_call:
@@ -700,29 +1052,23 @@ class BedrockConverse(FunctionCallingLLM):
700
1052
 
701
1053
  tool_selections = []
702
1054
  for tool_call in tool_calls:
703
- if (
704
- "toolUseId" not in tool_call
705
- or "name" not in tool_call
706
- ):
707
- raise ValueError("Invalid tool call.")
708
-
709
1055
  # handle empty inputs
710
1056
  argument_dict = {}
711
- if tool_call.get("input", False) and isinstance(tool_call["input"], str):
1057
+ if isinstance(tool_call.tool_kwargs, str):
712
1058
  # TODO parse_partial_json is not perfect
713
1059
  try:
714
- argument_dict = parse_partial_json(tool_call["input"])
1060
+ argument_dict = parse_partial_json(tool_call.tool_kwargs)
715
1061
  except ValueError:
716
1062
  argument_dict = {}
717
- elif tool_call.get("input", False) and isinstance(tool_call["input"], dict):
718
- argument_dict = tool_call["input"]
1063
+ elif isinstance(tool_call.tool_kwargs, dict):
1064
+ argument_dict = tool_call.tool_kwargs
719
1065
  else:
720
1066
  continue
721
1067
 
722
1068
  tool_selections.append(
723
1069
  ToolSelection(
724
- tool_id=tool_call["toolUseId"],
725
- tool_name=tool_call["name"],
1070
+ tool_id=tool_call.tool_call_id or "",
1071
+ tool_name=tool_call.tool_name,
726
1072
  tool_kwargs=argument_dict,
727
1073
  )
728
1074
  )
@@ -741,8 +1087,11 @@ class BedrockConverse(FunctionCallingLLM):
741
1087
  return {}
742
1088
 
743
1089
  # Convert Bedrock's token count format to match OpenAI's format
1090
+ # Cache token formats respecting Anthropic format
744
1091
  return {
745
1092
  "prompt_tokens": usage.get("inputTokens", 0),
746
1093
  "completion_tokens": usage.get("outputTokens", 0),
747
1094
  "total_tokens": usage.get("totalTokens", 0),
1095
+ "cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
1096
+ "cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
748
1097
  }