llama-index-llms-bedrock-converse 0.8.2__py3-none-any.whl → 0.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_index/llms/bedrock_converse/base.py +319 -45
- llama_index/llms/bedrock_converse/utils.py +275 -40
- {llama_index_llms_bedrock_converse-0.8.2.dist-info → llama_index_llms_bedrock_converse-0.12.3.dist-info}/METADATA +53 -4
- llama_index_llms_bedrock_converse-0.12.3.dist-info/RECORD +7 -0
- {llama_index_llms_bedrock_converse-0.8.2.dist-info → llama_index_llms_bedrock_converse-0.12.3.dist-info}/WHEEL +1 -1
- llama_index_llms_bedrock_converse-0.8.2.dist-info/RECORD +0 -7
- {llama_index_llms_bedrock_converse-0.8.2.dist-info → llama_index_llms_bedrock_converse-0.12.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from typing import (
|
|
2
3
|
Any,
|
|
3
4
|
Callable,
|
|
4
5
|
Dict,
|
|
5
6
|
List,
|
|
7
|
+
Literal,
|
|
6
8
|
Optional,
|
|
7
9
|
Sequence,
|
|
8
10
|
Tuple,
|
|
@@ -20,6 +22,9 @@ from llama_index.core.base.llms.types import (
|
|
|
20
22
|
CompletionResponseGen,
|
|
21
23
|
LLMMetadata,
|
|
22
24
|
MessageRole,
|
|
25
|
+
TextBlock,
|
|
26
|
+
ThinkingBlock,
|
|
27
|
+
ToolCallBlock,
|
|
23
28
|
)
|
|
24
29
|
from llama_index.core.bridge.pydantic import Field, PrivateAttr
|
|
25
30
|
from llama_index.core.callbacks import CallbackManager
|
|
@@ -46,6 +51,8 @@ from llama_index.llms.bedrock_converse.utils import (
|
|
|
46
51
|
join_two_dicts,
|
|
47
52
|
messages_to_converse_messages,
|
|
48
53
|
tools_to_converse_tools,
|
|
54
|
+
is_reasoning,
|
|
55
|
+
ThinkingDict,
|
|
49
56
|
)
|
|
50
57
|
|
|
51
58
|
if TYPE_CHECKING:
|
|
@@ -138,18 +145,41 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
138
145
|
default=60.0,
|
|
139
146
|
description="The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts.",
|
|
140
147
|
)
|
|
148
|
+
system_prompt_caching: bool = Field(
|
|
149
|
+
default=False,
|
|
150
|
+
description="Whether to cache the system prompt. If you are using a system prompt, you should set this to True.",
|
|
151
|
+
)
|
|
152
|
+
tool_caching: bool = Field(
|
|
153
|
+
default=False,
|
|
154
|
+
description="Whether to cache the tools. If you are using tools, you should set this to True.",
|
|
155
|
+
)
|
|
141
156
|
guardrail_identifier: Optional[str] = Field(
|
|
142
157
|
description="The unique identifier of the guardrail that you want to use. If you don't provide a value, no guardrail is applied to the invocation."
|
|
143
158
|
)
|
|
144
159
|
guardrail_version: Optional[str] = Field(
|
|
145
160
|
description="The version number for the guardrail. The value can also be DRAFT"
|
|
146
161
|
)
|
|
162
|
+
guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = Field(
|
|
163
|
+
description=(
|
|
164
|
+
"The stream processing mode to use when leveraging a guardrail in a streaming request (ConverseStream). "
|
|
165
|
+
"If set, the specified mode will be included in the request's guardrail configuration object, altering the streaming response behavior. "
|
|
166
|
+
"If a value is not provided, no mode will be explicitly included in the request's guardrail configuration object, and thus Amazon Bedrock's default, Synchronous Mode, will be used."
|
|
167
|
+
)
|
|
168
|
+
)
|
|
147
169
|
application_inference_profile_arn: Optional[str] = Field(
|
|
148
170
|
description="The ARN of an application inference profile to invoke in place of the model. If provided, make sure the model argument refers to the same one underlying the application inference profile."
|
|
149
171
|
)
|
|
150
172
|
trace: Optional[str] = Field(
|
|
151
173
|
description="Specifies whether to enable or disable the Bedrock trace. If enabled, you can see the full Bedrock trace."
|
|
152
174
|
)
|
|
175
|
+
thinking: Optional[ThinkingDict] = Field(
|
|
176
|
+
description="Specifies the thinking configuration of a reasoning model. Only applicable to Anthropic and DeepSeek models",
|
|
177
|
+
default=None,
|
|
178
|
+
)
|
|
179
|
+
supports_forced_tool_calls: bool = Field(
|
|
180
|
+
default=True,
|
|
181
|
+
description="Whether the model supports forced tool calls. If True, the model can be forced to call at least 1 or more tools.",
|
|
182
|
+
)
|
|
153
183
|
additional_kwargs: Dict[str, Any] = Field(
|
|
154
184
|
default_factory=dict,
|
|
155
185
|
description="Additional kwargs for the bedrock invokeModel request.",
|
|
@@ -182,14 +212,19 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
182
212
|
additional_kwargs: Optional[Dict[str, Any]] = None,
|
|
183
213
|
callback_manager: Optional[CallbackManager] = None,
|
|
184
214
|
system_prompt: Optional[str] = None,
|
|
215
|
+
system_prompt_caching: Optional[bool] = False,
|
|
216
|
+
tool_caching: Optional[bool] = False,
|
|
185
217
|
messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None,
|
|
186
218
|
completion_to_prompt: Optional[Callable[[str], str]] = None,
|
|
187
219
|
pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,
|
|
188
220
|
output_parser: Optional[BaseOutputParser] = None,
|
|
189
221
|
guardrail_identifier: Optional[str] = None,
|
|
190
222
|
guardrail_version: Optional[str] = None,
|
|
223
|
+
guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = None,
|
|
191
224
|
application_inference_profile_arn: Optional[str] = None,
|
|
192
225
|
trace: Optional[str] = None,
|
|
226
|
+
thinking: Optional[ThinkingDict] = None,
|
|
227
|
+
supports_forced_tool_calls: bool = True,
|
|
193
228
|
) -> None:
|
|
194
229
|
additional_kwargs = additional_kwargs or {}
|
|
195
230
|
callback_manager = callback_manager or CallbackManager([])
|
|
@@ -203,6 +238,13 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
203
238
|
"botocore_session": botocore_session,
|
|
204
239
|
}
|
|
205
240
|
|
|
241
|
+
if not is_reasoning(model) and thinking is not None:
|
|
242
|
+
thinking = None
|
|
243
|
+
warnings.warn(
|
|
244
|
+
"You set thinking parameters for a non-reasoning models, they will be ignored",
|
|
245
|
+
UserWarning,
|
|
246
|
+
)
|
|
247
|
+
|
|
206
248
|
super().__init__(
|
|
207
249
|
temperature=temperature,
|
|
208
250
|
max_tokens=max_tokens,
|
|
@@ -212,6 +254,8 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
212
254
|
model=model,
|
|
213
255
|
callback_manager=callback_manager,
|
|
214
256
|
system_prompt=system_prompt,
|
|
257
|
+
system_prompt_caching=system_prompt_caching,
|
|
258
|
+
tool_caching=tool_caching,
|
|
215
259
|
messages_to_prompt=messages_to_prompt,
|
|
216
260
|
completion_to_prompt=completion_to_prompt,
|
|
217
261
|
pydantic_program_mode=pydantic_program_mode,
|
|
@@ -229,8 +273,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
229
273
|
botocore_config=botocore_config,
|
|
230
274
|
guardrail_identifier=guardrail_identifier,
|
|
231
275
|
guardrail_version=guardrail_version,
|
|
276
|
+
guardrail_stream_processing_mode=guardrail_stream_processing_mode,
|
|
232
277
|
application_inference_profile_arn=application_inference_profile_arn,
|
|
233
278
|
trace=trace,
|
|
279
|
+
thinking=thinking,
|
|
280
|
+
supports_forced_tool_calls=supports_forced_tool_calls,
|
|
234
281
|
)
|
|
235
282
|
|
|
236
283
|
self._config = None
|
|
@@ -318,30 +365,49 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
318
365
|
|
|
319
366
|
def _get_content_and_tool_calls(
|
|
320
367
|
self, response: Optional[Dict[str, Any]] = None, content: Dict[str, Any] = None
|
|
321
|
-
) -> Tuple[
|
|
368
|
+
) -> Tuple[
|
|
369
|
+
List[Union[TextBlock, ThinkingBlock, ToolCallBlock]], List[str], List[str]
|
|
370
|
+
]:
|
|
322
371
|
assert response is not None or content is not None, (
|
|
323
372
|
f"Either response or content must be provided. Got response: {response}, content: {content}"
|
|
324
373
|
)
|
|
325
374
|
assert response is None or content is None, (
|
|
326
375
|
f"Only one of response or content should be provided. Got response: {response}, content: {content}"
|
|
327
376
|
)
|
|
328
|
-
tool_calls = []
|
|
329
377
|
tool_call_ids = []
|
|
330
378
|
status = []
|
|
331
|
-
|
|
379
|
+
blocks: List[TextBlock | ThinkingBlock | ToolCallBlock] = []
|
|
332
380
|
if content is not None:
|
|
333
381
|
content_list = [content]
|
|
334
382
|
else:
|
|
335
383
|
content_list = response["output"]["message"]["content"]
|
|
384
|
+
|
|
336
385
|
for content_block in content_list:
|
|
337
386
|
if text := content_block.get("text", None):
|
|
338
|
-
|
|
387
|
+
blocks.append(TextBlock(text=text))
|
|
388
|
+
if thinking := content_block.get("reasoningContent", None):
|
|
389
|
+
blocks.append(
|
|
390
|
+
ThinkingBlock(
|
|
391
|
+
content=thinking.get("reasoningText", {}).get("text", None),
|
|
392
|
+
additional_information={
|
|
393
|
+
"signature": thinking.get("reasoningText", {}).get(
|
|
394
|
+
"signature", None
|
|
395
|
+
)
|
|
396
|
+
},
|
|
397
|
+
)
|
|
398
|
+
)
|
|
339
399
|
if tool_usage := content_block.get("toolUse", None):
|
|
340
400
|
if "toolUseId" not in tool_usage:
|
|
341
401
|
tool_usage["toolUseId"] = content_block["toolUseId"]
|
|
342
402
|
if "name" not in tool_usage:
|
|
343
403
|
tool_usage["name"] = content_block["name"]
|
|
344
|
-
|
|
404
|
+
blocks.append(
|
|
405
|
+
ToolCallBlock(
|
|
406
|
+
tool_name=tool_usage.get("name", ""),
|
|
407
|
+
tool_call_id=tool_usage.get("toolUseId"),
|
|
408
|
+
tool_kwargs=tool_usage.get("input", {}),
|
|
409
|
+
)
|
|
410
|
+
)
|
|
345
411
|
if tool_result := content_block.get("toolResult", None):
|
|
346
412
|
for tool_result_content in tool_result["content"]:
|
|
347
413
|
if text := tool_result_content.get("text", None):
|
|
@@ -349,19 +415,25 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
349
415
|
tool_call_ids.append(tool_result_content.get("toolUseId", ""))
|
|
350
416
|
status.append(tool_result.get("status", ""))
|
|
351
417
|
|
|
352
|
-
return
|
|
418
|
+
return blocks, tool_call_ids, status
|
|
353
419
|
|
|
354
420
|
@llm_chat_callback()
|
|
355
421
|
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
|
|
356
422
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
357
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
423
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
424
|
+
messages, self.model
|
|
425
|
+
)
|
|
358
426
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
427
|
+
if self.thinking is not None:
|
|
428
|
+
all_kwargs["thinking"] = self.thinking
|
|
359
429
|
|
|
360
430
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
361
431
|
response = converse_with_retry(
|
|
362
432
|
client=self._client,
|
|
363
433
|
messages=converse_messages,
|
|
364
434
|
system_prompt=system_prompt,
|
|
435
|
+
system_prompt_caching=self.system_prompt_caching,
|
|
436
|
+
tool_caching=self.tool_caching,
|
|
365
437
|
max_retries=self.max_retries,
|
|
366
438
|
stream=False,
|
|
367
439
|
guardrail_identifier=self.guardrail_identifier,
|
|
@@ -370,16 +442,13 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
370
442
|
**all_kwargs,
|
|
371
443
|
)
|
|
372
444
|
|
|
373
|
-
|
|
374
|
-
response
|
|
375
|
-
)
|
|
445
|
+
blocks, tool_call_ids, status = self._get_content_and_tool_calls(response)
|
|
376
446
|
|
|
377
447
|
return ChatResponse(
|
|
378
448
|
message=ChatMessage(
|
|
379
449
|
role=MessageRole.ASSISTANT,
|
|
380
|
-
|
|
450
|
+
blocks=blocks,
|
|
381
451
|
additional_kwargs={
|
|
382
|
-
"tool_calls": tool_calls,
|
|
383
452
|
"tool_call_id": tool_call_ids,
|
|
384
453
|
"status": status,
|
|
385
454
|
},
|
|
@@ -400,18 +469,25 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
400
469
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
401
470
|
) -> ChatResponseGen:
|
|
402
471
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
403
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
472
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
473
|
+
messages, self.model
|
|
474
|
+
)
|
|
404
475
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
476
|
+
if self.thinking is not None:
|
|
477
|
+
all_kwargs["thinking"] = self.thinking
|
|
405
478
|
|
|
406
479
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
407
480
|
response = converse_with_retry(
|
|
408
481
|
client=self._client,
|
|
409
482
|
messages=converse_messages,
|
|
410
483
|
system_prompt=system_prompt,
|
|
484
|
+
system_prompt_caching=self.system_prompt_caching,
|
|
485
|
+
tool_caching=self.tool_caching,
|
|
411
486
|
max_retries=self.max_retries,
|
|
412
487
|
stream=True,
|
|
413
488
|
guardrail_identifier=self.guardrail_identifier,
|
|
414
489
|
guardrail_version=self.guardrail_version,
|
|
490
|
+
guardrail_stream_processing_mode=self.guardrail_stream_processing_mode,
|
|
415
491
|
trace=self.trace,
|
|
416
492
|
**all_kwargs,
|
|
417
493
|
)
|
|
@@ -421,12 +497,25 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
421
497
|
tool_calls = [] # Track tool calls separately
|
|
422
498
|
current_tool_call = None # Track the current tool call being built
|
|
423
499
|
role = MessageRole.ASSISTANT
|
|
500
|
+
thinking = ""
|
|
501
|
+
thinking_signature = ""
|
|
424
502
|
|
|
425
503
|
for chunk in response["stream"]:
|
|
426
504
|
if content_block_delta := chunk.get("contentBlockDelta"):
|
|
427
505
|
content_delta = content_block_delta["delta"]
|
|
428
506
|
content = join_two_dicts(content, content_delta)
|
|
429
507
|
|
|
508
|
+
thinking_delta_value = None
|
|
509
|
+
if "reasoningContent" in content_delta:
|
|
510
|
+
reasoning_text = content_delta.get("reasoningContent", {}).get(
|
|
511
|
+
"text", ""
|
|
512
|
+
)
|
|
513
|
+
thinking += reasoning_text
|
|
514
|
+
thinking_delta_value = reasoning_text
|
|
515
|
+
thinking_signature += content_delta.get(
|
|
516
|
+
"reasoningContent", {}
|
|
517
|
+
).get("signature", "")
|
|
518
|
+
|
|
430
519
|
# If this delta contains tool call info, update current tool call
|
|
431
520
|
if "toolUse" in content_delta:
|
|
432
521
|
tool_use_delta = content_delta["toolUse"]
|
|
@@ -457,12 +546,42 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
457
546
|
current_tool_call, tool_use_delta
|
|
458
547
|
)
|
|
459
548
|
|
|
549
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
550
|
+
TextBlock(text=content.get("text", ""))
|
|
551
|
+
]
|
|
552
|
+
if thinking != "":
|
|
553
|
+
blocks.insert(
|
|
554
|
+
0,
|
|
555
|
+
ThinkingBlock(
|
|
556
|
+
content=thinking,
|
|
557
|
+
additional_information={
|
|
558
|
+
"signature": thinking_signature
|
|
559
|
+
},
|
|
560
|
+
),
|
|
561
|
+
)
|
|
562
|
+
if tool_calls:
|
|
563
|
+
for tool_call in tool_calls:
|
|
564
|
+
blocks.append(
|
|
565
|
+
ToolCallBlock(
|
|
566
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
567
|
+
tool_name=tool_call.get("name", ""),
|
|
568
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
569
|
+
)
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
response_additional_kwargs = self._get_response_token_counts(
|
|
573
|
+
dict(chunk)
|
|
574
|
+
)
|
|
575
|
+
if thinking_delta_value is not None:
|
|
576
|
+
response_additional_kwargs["thinking_delta"] = (
|
|
577
|
+
thinking_delta_value
|
|
578
|
+
)
|
|
579
|
+
|
|
460
580
|
yield ChatResponse(
|
|
461
581
|
message=ChatMessage(
|
|
462
582
|
role=role,
|
|
463
|
-
|
|
583
|
+
blocks=blocks,
|
|
464
584
|
additional_kwargs={
|
|
465
|
-
"tool_calls": tool_calls,
|
|
466
585
|
"tool_call_id": [
|
|
467
586
|
tc.get("toolUseId", "") for tc in tool_calls
|
|
468
587
|
],
|
|
@@ -471,7 +590,7 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
471
590
|
),
|
|
472
591
|
delta=content_delta.get("text", ""),
|
|
473
592
|
raw=chunk,
|
|
474
|
-
additional_kwargs=
|
|
593
|
+
additional_kwargs=response_additional_kwargs,
|
|
475
594
|
)
|
|
476
595
|
elif content_block_start := chunk.get("contentBlockStart"):
|
|
477
596
|
# New tool call starting
|
|
@@ -482,12 +601,35 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
482
601
|
# Add to our list of tool calls
|
|
483
602
|
tool_calls.append(current_tool_call)
|
|
484
603
|
|
|
604
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
605
|
+
TextBlock(text=content.get("text", ""))
|
|
606
|
+
]
|
|
607
|
+
if thinking != "":
|
|
608
|
+
blocks.insert(
|
|
609
|
+
0,
|
|
610
|
+
ThinkingBlock(
|
|
611
|
+
content=thinking,
|
|
612
|
+
additional_information={
|
|
613
|
+
"signature": thinking_signature
|
|
614
|
+
},
|
|
615
|
+
),
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
if tool_calls:
|
|
619
|
+
for tool_call in tool_calls:
|
|
620
|
+
blocks.append(
|
|
621
|
+
ToolCallBlock(
|
|
622
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
623
|
+
tool_name=tool_call.get("name", ""),
|
|
624
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
625
|
+
)
|
|
626
|
+
)
|
|
627
|
+
|
|
485
628
|
yield ChatResponse(
|
|
486
629
|
message=ChatMessage(
|
|
487
630
|
role=role,
|
|
488
|
-
|
|
631
|
+
blocks=blocks,
|
|
489
632
|
additional_kwargs={
|
|
490
|
-
"tool_calls": tool_calls,
|
|
491
633
|
"tool_call_id": [
|
|
492
634
|
tc.get("toolUseId", "") for tc in tool_calls
|
|
493
635
|
],
|
|
@@ -504,12 +646,34 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
504
646
|
# Handle metadata event - this contains the final token usage
|
|
505
647
|
if usage := metadata.get("usage"):
|
|
506
648
|
# Yield a final response with correct token usage
|
|
649
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
650
|
+
TextBlock(text=content.get("text", ""))
|
|
651
|
+
]
|
|
652
|
+
if thinking != "":
|
|
653
|
+
blocks.insert(
|
|
654
|
+
0,
|
|
655
|
+
ThinkingBlock(
|
|
656
|
+
content=thinking,
|
|
657
|
+
additional_information={
|
|
658
|
+
"signature": thinking_signature
|
|
659
|
+
},
|
|
660
|
+
),
|
|
661
|
+
)
|
|
662
|
+
if tool_calls:
|
|
663
|
+
for tool_call in tool_calls:
|
|
664
|
+
blocks.append(
|
|
665
|
+
ToolCallBlock(
|
|
666
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
667
|
+
tool_name=tool_call.get("name", ""),
|
|
668
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
669
|
+
)
|
|
670
|
+
)
|
|
671
|
+
|
|
507
672
|
yield ChatResponse(
|
|
508
673
|
message=ChatMessage(
|
|
509
674
|
role=role,
|
|
510
|
-
|
|
675
|
+
blocks=blocks,
|
|
511
676
|
additional_kwargs={
|
|
512
|
-
"tool_calls": tool_calls,
|
|
513
677
|
"tool_call_id": [
|
|
514
678
|
tc.get("toolUseId", "") for tc in tool_calls
|
|
515
679
|
],
|
|
@@ -517,6 +681,7 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
517
681
|
},
|
|
518
682
|
),
|
|
519
683
|
delta="",
|
|
684
|
+
thinking_delta=None,
|
|
520
685
|
raw=chunk,
|
|
521
686
|
additional_kwargs=self._get_response_token_counts(metadata),
|
|
522
687
|
)
|
|
@@ -535,8 +700,12 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
535
700
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
536
701
|
) -> ChatResponse:
|
|
537
702
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
538
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
703
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
704
|
+
messages, self.model
|
|
705
|
+
)
|
|
539
706
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
707
|
+
if self.thinking is not None:
|
|
708
|
+
all_kwargs["thinking"] = self.thinking
|
|
540
709
|
|
|
541
710
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
542
711
|
response = await converse_with_retry_async(
|
|
@@ -544,6 +713,8 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
544
713
|
config=self._config,
|
|
545
714
|
messages=converse_messages,
|
|
546
715
|
system_prompt=system_prompt,
|
|
716
|
+
system_prompt_caching=self.system_prompt_caching,
|
|
717
|
+
tool_caching=self.tool_caching,
|
|
547
718
|
max_retries=self.max_retries,
|
|
548
719
|
stream=False,
|
|
549
720
|
guardrail_identifier=self.guardrail_identifier,
|
|
@@ -553,16 +724,13 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
553
724
|
**all_kwargs,
|
|
554
725
|
)
|
|
555
726
|
|
|
556
|
-
|
|
557
|
-
response
|
|
558
|
-
)
|
|
727
|
+
blocks, tool_call_ids, status = self._get_content_and_tool_calls(response)
|
|
559
728
|
|
|
560
729
|
return ChatResponse(
|
|
561
730
|
message=ChatMessage(
|
|
562
731
|
role=MessageRole.ASSISTANT,
|
|
563
|
-
|
|
732
|
+
blocks=blocks,
|
|
564
733
|
additional_kwargs={
|
|
565
|
-
"tool_calls": tool_calls,
|
|
566
734
|
"tool_call_id": tool_call_ids,
|
|
567
735
|
"status": status,
|
|
568
736
|
},
|
|
@@ -583,8 +751,12 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
583
751
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
584
752
|
) -> ChatResponseAsyncGen:
|
|
585
753
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
586
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
754
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
755
|
+
messages, self.model
|
|
756
|
+
)
|
|
587
757
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
758
|
+
if self.thinking is not None:
|
|
759
|
+
all_kwargs["thinking"] = self.thinking
|
|
588
760
|
|
|
589
761
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
590
762
|
response_gen = await converse_with_retry_async(
|
|
@@ -592,10 +764,13 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
592
764
|
config=self._config,
|
|
593
765
|
messages=converse_messages,
|
|
594
766
|
system_prompt=system_prompt,
|
|
767
|
+
system_prompt_caching=self.system_prompt_caching,
|
|
768
|
+
tool_caching=self.tool_caching,
|
|
595
769
|
max_retries=self.max_retries,
|
|
596
770
|
stream=True,
|
|
597
771
|
guardrail_identifier=self.guardrail_identifier,
|
|
598
772
|
guardrail_version=self.guardrail_version,
|
|
773
|
+
guardrail_stream_processing_mode=self.guardrail_stream_processing_mode,
|
|
599
774
|
trace=self.trace,
|
|
600
775
|
boto_client_kwargs=self._boto_client_kwargs,
|
|
601
776
|
**all_kwargs,
|
|
@@ -606,12 +781,25 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
606
781
|
tool_calls = [] # Track tool calls separately
|
|
607
782
|
current_tool_call = None # Track the current tool call being built
|
|
608
783
|
role = MessageRole.ASSISTANT
|
|
784
|
+
thinking = ""
|
|
785
|
+
thinking_signature = ""
|
|
609
786
|
|
|
610
787
|
async for chunk in response_gen:
|
|
611
788
|
if content_block_delta := chunk.get("contentBlockDelta"):
|
|
612
789
|
content_delta = content_block_delta["delta"]
|
|
613
790
|
content = join_two_dicts(content, content_delta)
|
|
614
791
|
|
|
792
|
+
thinking_delta_value = None
|
|
793
|
+
if "reasoningContent" in content_delta:
|
|
794
|
+
reasoning_text = content_delta.get("reasoningContent", {}).get(
|
|
795
|
+
"text", ""
|
|
796
|
+
)
|
|
797
|
+
thinking += reasoning_text
|
|
798
|
+
thinking_delta_value = reasoning_text
|
|
799
|
+
thinking_signature += content_delta.get(
|
|
800
|
+
"reasoningContent", {}
|
|
801
|
+
).get("signature", "")
|
|
802
|
+
|
|
615
803
|
# If this delta contains tool call info, update current tool call
|
|
616
804
|
if "toolUse" in content_delta:
|
|
617
805
|
tool_use_delta = content_delta["toolUse"]
|
|
@@ -641,13 +829,43 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
641
829
|
current_tool_call = join_two_dicts(
|
|
642
830
|
current_tool_call, tool_use_delta
|
|
643
831
|
)
|
|
832
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
833
|
+
TextBlock(text=content.get("text", ""))
|
|
834
|
+
]
|
|
835
|
+
if thinking != "":
|
|
836
|
+
blocks.insert(
|
|
837
|
+
0,
|
|
838
|
+
ThinkingBlock(
|
|
839
|
+
content=thinking,
|
|
840
|
+
additional_information={
|
|
841
|
+
"signature": thinking_signature
|
|
842
|
+
},
|
|
843
|
+
),
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
if tool_calls:
|
|
847
|
+
for tool_call in tool_calls:
|
|
848
|
+
blocks.append(
|
|
849
|
+
ToolCallBlock(
|
|
850
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
851
|
+
tool_name=tool_call.get("name", ""),
|
|
852
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
853
|
+
)
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
response_additional_kwargs = self._get_response_token_counts(
|
|
857
|
+
dict(chunk)
|
|
858
|
+
)
|
|
859
|
+
if thinking_delta_value is not None:
|
|
860
|
+
response_additional_kwargs["thinking_delta"] = (
|
|
861
|
+
thinking_delta_value
|
|
862
|
+
)
|
|
644
863
|
|
|
645
864
|
yield ChatResponse(
|
|
646
865
|
message=ChatMessage(
|
|
647
866
|
role=role,
|
|
648
|
-
|
|
867
|
+
blocks=blocks,
|
|
649
868
|
additional_kwargs={
|
|
650
|
-
"tool_calls": tool_calls,
|
|
651
869
|
"tool_call_id": [
|
|
652
870
|
tc.get("toolUseId", "") for tc in tool_calls
|
|
653
871
|
],
|
|
@@ -656,7 +874,7 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
656
874
|
),
|
|
657
875
|
delta=content_delta.get("text", ""),
|
|
658
876
|
raw=chunk,
|
|
659
|
-
additional_kwargs=
|
|
877
|
+
additional_kwargs=response_additional_kwargs,
|
|
660
878
|
)
|
|
661
879
|
elif content_block_start := chunk.get("contentBlockStart"):
|
|
662
880
|
# New tool call starting
|
|
@@ -667,12 +885,35 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
667
885
|
# Add to our list of tool calls
|
|
668
886
|
tool_calls.append(current_tool_call)
|
|
669
887
|
|
|
888
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
889
|
+
TextBlock(text=content.get("text", ""))
|
|
890
|
+
]
|
|
891
|
+
if thinking != "":
|
|
892
|
+
blocks.insert(
|
|
893
|
+
0,
|
|
894
|
+
ThinkingBlock(
|
|
895
|
+
content=thinking,
|
|
896
|
+
additional_information={
|
|
897
|
+
"signature": thinking_signature
|
|
898
|
+
},
|
|
899
|
+
),
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
if tool_calls:
|
|
903
|
+
for tool_call in tool_calls:
|
|
904
|
+
blocks.append(
|
|
905
|
+
ToolCallBlock(
|
|
906
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
907
|
+
tool_name=tool_call.get("name", ""),
|
|
908
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
909
|
+
)
|
|
910
|
+
)
|
|
911
|
+
|
|
670
912
|
yield ChatResponse(
|
|
671
913
|
message=ChatMessage(
|
|
672
914
|
role=role,
|
|
673
|
-
|
|
915
|
+
blocks=blocks,
|
|
674
916
|
additional_kwargs={
|
|
675
|
-
"tool_calls": tool_calls,
|
|
676
917
|
"tool_call_id": [
|
|
677
918
|
tc.get("toolUseId", "") for tc in tool_calls
|
|
678
919
|
],
|
|
@@ -689,12 +930,35 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
689
930
|
# Handle metadata event - this contains the final token usage
|
|
690
931
|
if usage := metadata.get("usage"):
|
|
691
932
|
# Yield a final response with correct token usage
|
|
933
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
934
|
+
TextBlock(text=content.get("text", ""))
|
|
935
|
+
]
|
|
936
|
+
if thinking != "":
|
|
937
|
+
blocks.insert(
|
|
938
|
+
0,
|
|
939
|
+
ThinkingBlock(
|
|
940
|
+
content=thinking,
|
|
941
|
+
additional_information={
|
|
942
|
+
"signature": thinking_signature
|
|
943
|
+
},
|
|
944
|
+
),
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
if tool_calls:
|
|
948
|
+
for tool_call in tool_calls:
|
|
949
|
+
blocks.append(
|
|
950
|
+
ToolCallBlock(
|
|
951
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
952
|
+
tool_name=tool_call.get("name", ""),
|
|
953
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
954
|
+
)
|
|
955
|
+
)
|
|
956
|
+
|
|
692
957
|
yield ChatResponse(
|
|
693
958
|
message=ChatMessage(
|
|
694
959
|
role=role,
|
|
695
|
-
|
|
960
|
+
blocks=blocks,
|
|
696
961
|
additional_kwargs={
|
|
697
|
-
"tool_calls": tool_calls,
|
|
698
962
|
"tool_call_id": [
|
|
699
963
|
tc.get("toolUseId", "") for tc in tool_calls
|
|
700
964
|
],
|
|
@@ -702,6 +966,7 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
702
966
|
},
|
|
703
967
|
),
|
|
704
968
|
delta="",
|
|
969
|
+
thinking_delta=None,
|
|
705
970
|
raw=chunk,
|
|
706
971
|
additional_kwargs=self._get_response_token_counts(metadata),
|
|
707
972
|
)
|
|
@@ -723,6 +988,7 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
723
988
|
verbose: bool = False,
|
|
724
989
|
allow_parallel_tool_calls: bool = False,
|
|
725
990
|
tool_required: bool = False,
|
|
991
|
+
tool_caching: bool = False,
|
|
726
992
|
tool_choice: Optional[dict] = None,
|
|
727
993
|
**kwargs: Any,
|
|
728
994
|
) -> Dict[str, Any]:
|
|
@@ -737,7 +1003,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
737
1003
|
|
|
738
1004
|
# convert Llama Index tools to AWS Bedrock Converse tools
|
|
739
1005
|
tool_config = tools_to_converse_tools(
|
|
740
|
-
tools,
|
|
1006
|
+
tools,
|
|
1007
|
+
tool_choice=tool_choice,
|
|
1008
|
+
tool_required=tool_required,
|
|
1009
|
+
tool_caching=tool_caching,
|
|
1010
|
+
supports_forced_tool_calls=self.supports_forced_tool_calls,
|
|
741
1011
|
)
|
|
742
1012
|
|
|
743
1013
|
return {
|
|
@@ -766,7 +1036,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
766
1036
|
**kwargs: Any,
|
|
767
1037
|
) -> List[ToolSelection]:
|
|
768
1038
|
"""Predict and call the tool."""
|
|
769
|
-
tool_calls =
|
|
1039
|
+
tool_calls = [
|
|
1040
|
+
block
|
|
1041
|
+
for block in response.message.blocks
|
|
1042
|
+
if isinstance(block, ToolCallBlock)
|
|
1043
|
+
]
|
|
770
1044
|
|
|
771
1045
|
if len(tool_calls) < 1:
|
|
772
1046
|
if error_on_no_tool_call:
|
|
@@ -778,26 +1052,23 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
778
1052
|
|
|
779
1053
|
tool_selections = []
|
|
780
1054
|
for tool_call in tool_calls:
|
|
781
|
-
if "toolUseId" not in tool_call or "name" not in tool_call:
|
|
782
|
-
raise ValueError("Invalid tool call.")
|
|
783
|
-
|
|
784
1055
|
# handle empty inputs
|
|
785
1056
|
argument_dict = {}
|
|
786
|
-
if
|
|
1057
|
+
if isinstance(tool_call.tool_kwargs, str):
|
|
787
1058
|
# TODO parse_partial_json is not perfect
|
|
788
1059
|
try:
|
|
789
|
-
argument_dict = parse_partial_json(tool_call
|
|
1060
|
+
argument_dict = parse_partial_json(tool_call.tool_kwargs)
|
|
790
1061
|
except ValueError:
|
|
791
1062
|
argument_dict = {}
|
|
792
|
-
elif
|
|
793
|
-
argument_dict = tool_call
|
|
1063
|
+
elif isinstance(tool_call.tool_kwargs, dict):
|
|
1064
|
+
argument_dict = tool_call.tool_kwargs
|
|
794
1065
|
else:
|
|
795
1066
|
continue
|
|
796
1067
|
|
|
797
1068
|
tool_selections.append(
|
|
798
1069
|
ToolSelection(
|
|
799
|
-
tool_id=tool_call
|
|
800
|
-
tool_name=tool_call
|
|
1070
|
+
tool_id=tool_call.tool_call_id or "",
|
|
1071
|
+
tool_name=tool_call.tool_name,
|
|
801
1072
|
tool_kwargs=argument_dict,
|
|
802
1073
|
)
|
|
803
1074
|
)
|
|
@@ -816,8 +1087,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
816
1087
|
return {}
|
|
817
1088
|
|
|
818
1089
|
# Convert Bedrock's token count format to match OpenAI's format
|
|
1090
|
+
# Cache token formats respecting Anthropic format
|
|
819
1091
|
return {
|
|
820
1092
|
"prompt_tokens": usage.get("inputTokens", 0),
|
|
821
1093
|
"completion_tokens": usage.get("outputTokens", 0),
|
|
822
1094
|
"total_tokens": usage.get("totalTokens", 0),
|
|
1095
|
+
"cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
|
|
1096
|
+
"cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
|
|
823
1097
|
}
|