llama-index-llms-bedrock-converse 0.5.4__py3-none-any.whl → 0.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_index/llms/bedrock_converse/base.py +415 -66
- llama_index/llms/bedrock_converse/utils.py +320 -52
- {llama_index_llms_bedrock_converse-0.5.4.dist-info → llama_index_llms_bedrock_converse-0.12.3.dist-info}/METADATA +53 -4
- llama_index_llms_bedrock_converse-0.12.3.dist-info/RECORD +7 -0
- {llama_index_llms_bedrock_converse-0.5.4.dist-info → llama_index_llms_bedrock_converse-0.12.3.dist-info}/WHEEL +1 -1
- llama_index_llms_bedrock_converse-0.5.4.dist-info/RECORD +0 -7
- {llama_index_llms_bedrock_converse-0.5.4.dist-info → llama_index_llms_bedrock_converse-0.12.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from typing import (
|
|
2
3
|
Any,
|
|
3
4
|
Callable,
|
|
4
5
|
Dict,
|
|
5
6
|
List,
|
|
7
|
+
Literal,
|
|
6
8
|
Optional,
|
|
7
9
|
Sequence,
|
|
8
10
|
Tuple,
|
|
@@ -20,6 +22,9 @@ from llama_index.core.base.llms.types import (
|
|
|
20
22
|
CompletionResponseGen,
|
|
21
23
|
LLMMetadata,
|
|
22
24
|
MessageRole,
|
|
25
|
+
TextBlock,
|
|
26
|
+
ThinkingBlock,
|
|
27
|
+
ToolCallBlock,
|
|
23
28
|
)
|
|
24
29
|
from llama_index.core.bridge.pydantic import Field, PrivateAttr
|
|
25
30
|
from llama_index.core.callbacks import CallbackManager
|
|
@@ -46,6 +51,8 @@ from llama_index.llms.bedrock_converse.utils import (
|
|
|
46
51
|
join_two_dicts,
|
|
47
52
|
messages_to_converse_messages,
|
|
48
53
|
tools_to_converse_tools,
|
|
54
|
+
is_reasoning,
|
|
55
|
+
ThinkingDict,
|
|
49
56
|
)
|
|
50
57
|
|
|
51
58
|
if TYPE_CHECKING:
|
|
@@ -138,18 +145,41 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
138
145
|
default=60.0,
|
|
139
146
|
description="The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts.",
|
|
140
147
|
)
|
|
148
|
+
system_prompt_caching: bool = Field(
|
|
149
|
+
default=False,
|
|
150
|
+
description="Whether to cache the system prompt. If you are using a system prompt, you should set this to True.",
|
|
151
|
+
)
|
|
152
|
+
tool_caching: bool = Field(
|
|
153
|
+
default=False,
|
|
154
|
+
description="Whether to cache the tools. If you are using tools, you should set this to True.",
|
|
155
|
+
)
|
|
141
156
|
guardrail_identifier: Optional[str] = Field(
|
|
142
157
|
description="The unique identifier of the guardrail that you want to use. If you don't provide a value, no guardrail is applied to the invocation."
|
|
143
158
|
)
|
|
144
159
|
guardrail_version: Optional[str] = Field(
|
|
145
160
|
description="The version number for the guardrail. The value can also be DRAFT"
|
|
146
161
|
)
|
|
162
|
+
guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = Field(
|
|
163
|
+
description=(
|
|
164
|
+
"The stream processing mode to use when leveraging a guardrail in a streaming request (ConverseStream). "
|
|
165
|
+
"If set, the specified mode will be included in the request's guardrail configuration object, altering the streaming response behavior. "
|
|
166
|
+
"If a value is not provided, no mode will be explicitly included in the request's guardrail configuration object, and thus Amazon Bedrock's default, Synchronous Mode, will be used."
|
|
167
|
+
)
|
|
168
|
+
)
|
|
147
169
|
application_inference_profile_arn: Optional[str] = Field(
|
|
148
170
|
description="The ARN of an application inference profile to invoke in place of the model. If provided, make sure the model argument refers to the same one underlying the application inference profile."
|
|
149
171
|
)
|
|
150
172
|
trace: Optional[str] = Field(
|
|
151
173
|
description="Specifies whether to enable or disable the Bedrock trace. If enabled, you can see the full Bedrock trace."
|
|
152
174
|
)
|
|
175
|
+
thinking: Optional[ThinkingDict] = Field(
|
|
176
|
+
description="Specifies the thinking configuration of a reasoning model. Only applicable to Anthropic and DeepSeek models",
|
|
177
|
+
default=None,
|
|
178
|
+
)
|
|
179
|
+
supports_forced_tool_calls: bool = Field(
|
|
180
|
+
default=True,
|
|
181
|
+
description="Whether the model supports forced tool calls. If True, the model can be forced to call at least 1 or more tools.",
|
|
182
|
+
)
|
|
153
183
|
additional_kwargs: Dict[str, Any] = Field(
|
|
154
184
|
default_factory=dict,
|
|
155
185
|
description="Additional kwargs for the bedrock invokeModel request.",
|
|
@@ -182,14 +212,19 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
182
212
|
additional_kwargs: Optional[Dict[str, Any]] = None,
|
|
183
213
|
callback_manager: Optional[CallbackManager] = None,
|
|
184
214
|
system_prompt: Optional[str] = None,
|
|
215
|
+
system_prompt_caching: Optional[bool] = False,
|
|
216
|
+
tool_caching: Optional[bool] = False,
|
|
185
217
|
messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None,
|
|
186
218
|
completion_to_prompt: Optional[Callable[[str], str]] = None,
|
|
187
219
|
pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,
|
|
188
220
|
output_parser: Optional[BaseOutputParser] = None,
|
|
189
221
|
guardrail_identifier: Optional[str] = None,
|
|
190
222
|
guardrail_version: Optional[str] = None,
|
|
223
|
+
guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = None,
|
|
191
224
|
application_inference_profile_arn: Optional[str] = None,
|
|
192
225
|
trace: Optional[str] = None,
|
|
226
|
+
thinking: Optional[ThinkingDict] = None,
|
|
227
|
+
supports_forced_tool_calls: bool = True,
|
|
193
228
|
) -> None:
|
|
194
229
|
additional_kwargs = additional_kwargs or {}
|
|
195
230
|
callback_manager = callback_manager or CallbackManager([])
|
|
@@ -203,6 +238,13 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
203
238
|
"botocore_session": botocore_session,
|
|
204
239
|
}
|
|
205
240
|
|
|
241
|
+
if not is_reasoning(model) and thinking is not None:
|
|
242
|
+
thinking = None
|
|
243
|
+
warnings.warn(
|
|
244
|
+
"You set thinking parameters for a non-reasoning models, they will be ignored",
|
|
245
|
+
UserWarning,
|
|
246
|
+
)
|
|
247
|
+
|
|
206
248
|
super().__init__(
|
|
207
249
|
temperature=temperature,
|
|
208
250
|
max_tokens=max_tokens,
|
|
@@ -212,6 +254,8 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
212
254
|
model=model,
|
|
213
255
|
callback_manager=callback_manager,
|
|
214
256
|
system_prompt=system_prompt,
|
|
257
|
+
system_prompt_caching=system_prompt_caching,
|
|
258
|
+
tool_caching=tool_caching,
|
|
215
259
|
messages_to_prompt=messages_to_prompt,
|
|
216
260
|
completion_to_prompt=completion_to_prompt,
|
|
217
261
|
pydantic_program_mode=pydantic_program_mode,
|
|
@@ -229,8 +273,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
229
273
|
botocore_config=botocore_config,
|
|
230
274
|
guardrail_identifier=guardrail_identifier,
|
|
231
275
|
guardrail_version=guardrail_version,
|
|
276
|
+
guardrail_stream_processing_mode=guardrail_stream_processing_mode,
|
|
232
277
|
application_inference_profile_arn=application_inference_profile_arn,
|
|
233
278
|
trace=trace,
|
|
279
|
+
thinking=thinking,
|
|
280
|
+
supports_forced_tool_calls=supports_forced_tool_calls,
|
|
234
281
|
)
|
|
235
282
|
|
|
236
283
|
self._config = None
|
|
@@ -252,6 +299,7 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
252
299
|
retries={"max_attempts": max_retries, "mode": "standard"},
|
|
253
300
|
connect_timeout=timeout,
|
|
254
301
|
read_timeout=timeout,
|
|
302
|
+
user_agent_extra="x-client-framework:llama_index",
|
|
255
303
|
)
|
|
256
304
|
if botocore_config is None
|
|
257
305
|
else botocore_config
|
|
@@ -317,30 +365,49 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
317
365
|
|
|
318
366
|
def _get_content_and_tool_calls(
|
|
319
367
|
self, response: Optional[Dict[str, Any]] = None, content: Dict[str, Any] = None
|
|
320
|
-
) -> Tuple[
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
368
|
+
) -> Tuple[
|
|
369
|
+
List[Union[TextBlock, ThinkingBlock, ToolCallBlock]], List[str], List[str]
|
|
370
|
+
]:
|
|
371
|
+
assert response is not None or content is not None, (
|
|
372
|
+
f"Either response or content must be provided. Got response: {response}, content: {content}"
|
|
373
|
+
)
|
|
374
|
+
assert response is None or content is None, (
|
|
375
|
+
f"Only one of response or content should be provided. Got response: {response}, content: {content}"
|
|
376
|
+
)
|
|
328
377
|
tool_call_ids = []
|
|
329
378
|
status = []
|
|
330
|
-
|
|
379
|
+
blocks: List[TextBlock | ThinkingBlock | ToolCallBlock] = []
|
|
331
380
|
if content is not None:
|
|
332
381
|
content_list = [content]
|
|
333
382
|
else:
|
|
334
383
|
content_list = response["output"]["message"]["content"]
|
|
384
|
+
|
|
335
385
|
for content_block in content_list:
|
|
336
386
|
if text := content_block.get("text", None):
|
|
337
|
-
|
|
387
|
+
blocks.append(TextBlock(text=text))
|
|
388
|
+
if thinking := content_block.get("reasoningContent", None):
|
|
389
|
+
blocks.append(
|
|
390
|
+
ThinkingBlock(
|
|
391
|
+
content=thinking.get("reasoningText", {}).get("text", None),
|
|
392
|
+
additional_information={
|
|
393
|
+
"signature": thinking.get("reasoningText", {}).get(
|
|
394
|
+
"signature", None
|
|
395
|
+
)
|
|
396
|
+
},
|
|
397
|
+
)
|
|
398
|
+
)
|
|
338
399
|
if tool_usage := content_block.get("toolUse", None):
|
|
339
400
|
if "toolUseId" not in tool_usage:
|
|
340
401
|
tool_usage["toolUseId"] = content_block["toolUseId"]
|
|
341
402
|
if "name" not in tool_usage:
|
|
342
403
|
tool_usage["name"] = content_block["name"]
|
|
343
|
-
|
|
404
|
+
blocks.append(
|
|
405
|
+
ToolCallBlock(
|
|
406
|
+
tool_name=tool_usage.get("name", ""),
|
|
407
|
+
tool_call_id=tool_usage.get("toolUseId"),
|
|
408
|
+
tool_kwargs=tool_usage.get("input", {}),
|
|
409
|
+
)
|
|
410
|
+
)
|
|
344
411
|
if tool_result := content_block.get("toolResult", None):
|
|
345
412
|
for tool_result_content in tool_result["content"]:
|
|
346
413
|
if text := tool_result_content.get("text", None):
|
|
@@ -348,19 +415,25 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
348
415
|
tool_call_ids.append(tool_result_content.get("toolUseId", ""))
|
|
349
416
|
status.append(tool_result.get("status", ""))
|
|
350
417
|
|
|
351
|
-
return
|
|
418
|
+
return blocks, tool_call_ids, status
|
|
352
419
|
|
|
353
420
|
@llm_chat_callback()
|
|
354
421
|
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
|
|
355
422
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
356
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
423
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
424
|
+
messages, self.model
|
|
425
|
+
)
|
|
357
426
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
427
|
+
if self.thinking is not None:
|
|
428
|
+
all_kwargs["thinking"] = self.thinking
|
|
358
429
|
|
|
359
430
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
360
431
|
response = converse_with_retry(
|
|
361
432
|
client=self._client,
|
|
362
433
|
messages=converse_messages,
|
|
363
434
|
system_prompt=system_prompt,
|
|
435
|
+
system_prompt_caching=self.system_prompt_caching,
|
|
436
|
+
tool_caching=self.tool_caching,
|
|
364
437
|
max_retries=self.max_retries,
|
|
365
438
|
stream=False,
|
|
366
439
|
guardrail_identifier=self.guardrail_identifier,
|
|
@@ -369,16 +442,13 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
369
442
|
**all_kwargs,
|
|
370
443
|
)
|
|
371
444
|
|
|
372
|
-
|
|
373
|
-
response
|
|
374
|
-
)
|
|
445
|
+
blocks, tool_call_ids, status = self._get_content_and_tool_calls(response)
|
|
375
446
|
|
|
376
447
|
return ChatResponse(
|
|
377
448
|
message=ChatMessage(
|
|
378
449
|
role=MessageRole.ASSISTANT,
|
|
379
|
-
|
|
450
|
+
blocks=blocks,
|
|
380
451
|
additional_kwargs={
|
|
381
|
-
"tool_calls": tool_calls,
|
|
382
452
|
"tool_call_id": tool_call_ids,
|
|
383
453
|
"status": status,
|
|
384
454
|
},
|
|
@@ -399,18 +469,25 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
399
469
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
400
470
|
) -> ChatResponseGen:
|
|
401
471
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
402
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
472
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
473
|
+
messages, self.model
|
|
474
|
+
)
|
|
403
475
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
476
|
+
if self.thinking is not None:
|
|
477
|
+
all_kwargs["thinking"] = self.thinking
|
|
404
478
|
|
|
405
479
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
406
480
|
response = converse_with_retry(
|
|
407
481
|
client=self._client,
|
|
408
482
|
messages=converse_messages,
|
|
409
483
|
system_prompt=system_prompt,
|
|
484
|
+
system_prompt_caching=self.system_prompt_caching,
|
|
485
|
+
tool_caching=self.tool_caching,
|
|
410
486
|
max_retries=self.max_retries,
|
|
411
487
|
stream=True,
|
|
412
488
|
guardrail_identifier=self.guardrail_identifier,
|
|
413
489
|
guardrail_version=self.guardrail_version,
|
|
490
|
+
guardrail_stream_processing_mode=self.guardrail_stream_processing_mode,
|
|
414
491
|
trace=self.trace,
|
|
415
492
|
**all_kwargs,
|
|
416
493
|
)
|
|
@@ -420,11 +497,25 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
420
497
|
tool_calls = [] # Track tool calls separately
|
|
421
498
|
current_tool_call = None # Track the current tool call being built
|
|
422
499
|
role = MessageRole.ASSISTANT
|
|
500
|
+
thinking = ""
|
|
501
|
+
thinking_signature = ""
|
|
502
|
+
|
|
423
503
|
for chunk in response["stream"]:
|
|
424
504
|
if content_block_delta := chunk.get("contentBlockDelta"):
|
|
425
505
|
content_delta = content_block_delta["delta"]
|
|
426
506
|
content = join_two_dicts(content, content_delta)
|
|
427
507
|
|
|
508
|
+
thinking_delta_value = None
|
|
509
|
+
if "reasoningContent" in content_delta:
|
|
510
|
+
reasoning_text = content_delta.get("reasoningContent", {}).get(
|
|
511
|
+
"text", ""
|
|
512
|
+
)
|
|
513
|
+
thinking += reasoning_text
|
|
514
|
+
thinking_delta_value = reasoning_text
|
|
515
|
+
thinking_signature += content_delta.get(
|
|
516
|
+
"reasoningContent", {}
|
|
517
|
+
).get("signature", "")
|
|
518
|
+
|
|
428
519
|
# If this delta contains tool call info, update current tool call
|
|
429
520
|
if "toolUse" in content_delta:
|
|
430
521
|
tool_use_delta = content_delta["toolUse"]
|
|
@@ -433,31 +524,73 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
433
524
|
# Handle the input field specially - concatenate partial JSON strings
|
|
434
525
|
if "input" in tool_use_delta:
|
|
435
526
|
if "input" in current_tool_call:
|
|
436
|
-
current_tool_call["input"] += tool_use_delta[
|
|
527
|
+
current_tool_call["input"] += tool_use_delta[
|
|
528
|
+
"input"
|
|
529
|
+
]
|
|
437
530
|
else:
|
|
438
531
|
current_tool_call["input"] = tool_use_delta["input"]
|
|
439
532
|
|
|
440
533
|
# Remove input from the delta to prevent it from being processed again
|
|
441
|
-
tool_use_without_input = {
|
|
534
|
+
tool_use_without_input = {
|
|
535
|
+
k: v
|
|
536
|
+
for k, v in tool_use_delta.items()
|
|
537
|
+
if k != "input"
|
|
538
|
+
}
|
|
442
539
|
if tool_use_without_input:
|
|
443
|
-
current_tool_call = join_two_dicts(
|
|
540
|
+
current_tool_call = join_two_dicts(
|
|
541
|
+
current_tool_call, tool_use_without_input
|
|
542
|
+
)
|
|
444
543
|
else:
|
|
445
544
|
# For other fields, use the normal joining
|
|
446
|
-
current_tool_call = join_two_dicts(
|
|
545
|
+
current_tool_call = join_two_dicts(
|
|
546
|
+
current_tool_call, tool_use_delta
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
550
|
+
TextBlock(text=content.get("text", ""))
|
|
551
|
+
]
|
|
552
|
+
if thinking != "":
|
|
553
|
+
blocks.insert(
|
|
554
|
+
0,
|
|
555
|
+
ThinkingBlock(
|
|
556
|
+
content=thinking,
|
|
557
|
+
additional_information={
|
|
558
|
+
"signature": thinking_signature
|
|
559
|
+
},
|
|
560
|
+
),
|
|
561
|
+
)
|
|
562
|
+
if tool_calls:
|
|
563
|
+
for tool_call in tool_calls:
|
|
564
|
+
blocks.append(
|
|
565
|
+
ToolCallBlock(
|
|
566
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
567
|
+
tool_name=tool_call.get("name", ""),
|
|
568
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
569
|
+
)
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
response_additional_kwargs = self._get_response_token_counts(
|
|
573
|
+
dict(chunk)
|
|
574
|
+
)
|
|
575
|
+
if thinking_delta_value is not None:
|
|
576
|
+
response_additional_kwargs["thinking_delta"] = (
|
|
577
|
+
thinking_delta_value
|
|
578
|
+
)
|
|
447
579
|
|
|
448
580
|
yield ChatResponse(
|
|
449
581
|
message=ChatMessage(
|
|
450
582
|
role=role,
|
|
451
|
-
|
|
583
|
+
blocks=blocks,
|
|
452
584
|
additional_kwargs={
|
|
453
|
-
"
|
|
454
|
-
|
|
585
|
+
"tool_call_id": [
|
|
586
|
+
tc.get("toolUseId", "") for tc in tool_calls
|
|
587
|
+
],
|
|
455
588
|
"status": [], # Will be populated when tool results come in
|
|
456
589
|
},
|
|
457
590
|
),
|
|
458
591
|
delta=content_delta.get("text", ""),
|
|
459
592
|
raw=chunk,
|
|
460
|
-
additional_kwargs=
|
|
593
|
+
additional_kwargs=response_additional_kwargs,
|
|
461
594
|
)
|
|
462
595
|
elif content_block_start := chunk.get("contentBlockStart"):
|
|
463
596
|
# New tool call starting
|
|
@@ -468,18 +601,90 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
468
601
|
# Add to our list of tool calls
|
|
469
602
|
tool_calls.append(current_tool_call)
|
|
470
603
|
|
|
604
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
605
|
+
TextBlock(text=content.get("text", ""))
|
|
606
|
+
]
|
|
607
|
+
if thinking != "":
|
|
608
|
+
blocks.insert(
|
|
609
|
+
0,
|
|
610
|
+
ThinkingBlock(
|
|
611
|
+
content=thinking,
|
|
612
|
+
additional_information={
|
|
613
|
+
"signature": thinking_signature
|
|
614
|
+
},
|
|
615
|
+
),
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
if tool_calls:
|
|
619
|
+
for tool_call in tool_calls:
|
|
620
|
+
blocks.append(
|
|
621
|
+
ToolCallBlock(
|
|
622
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
623
|
+
tool_name=tool_call.get("name", ""),
|
|
624
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
625
|
+
)
|
|
626
|
+
)
|
|
627
|
+
|
|
471
628
|
yield ChatResponse(
|
|
472
629
|
message=ChatMessage(
|
|
473
630
|
role=role,
|
|
474
|
-
|
|
631
|
+
blocks=blocks,
|
|
475
632
|
additional_kwargs={
|
|
476
|
-
"
|
|
477
|
-
|
|
633
|
+
"tool_call_id": [
|
|
634
|
+
tc.get("toolUseId", "") for tc in tool_calls
|
|
635
|
+
],
|
|
478
636
|
"status": [], # Will be populated when tool results come in
|
|
479
637
|
},
|
|
480
638
|
),
|
|
481
639
|
raw=chunk,
|
|
482
640
|
)
|
|
641
|
+
elif message_stop := chunk.get("messageStop"):
|
|
642
|
+
# Handle messageStop event - this contains the stop reason
|
|
643
|
+
# We don't yield here, just track the event
|
|
644
|
+
pass
|
|
645
|
+
elif metadata := chunk.get("metadata"):
|
|
646
|
+
# Handle metadata event - this contains the final token usage
|
|
647
|
+
if usage := metadata.get("usage"):
|
|
648
|
+
# Yield a final response with correct token usage
|
|
649
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
650
|
+
TextBlock(text=content.get("text", ""))
|
|
651
|
+
]
|
|
652
|
+
if thinking != "":
|
|
653
|
+
blocks.insert(
|
|
654
|
+
0,
|
|
655
|
+
ThinkingBlock(
|
|
656
|
+
content=thinking,
|
|
657
|
+
additional_information={
|
|
658
|
+
"signature": thinking_signature
|
|
659
|
+
},
|
|
660
|
+
),
|
|
661
|
+
)
|
|
662
|
+
if tool_calls:
|
|
663
|
+
for tool_call in tool_calls:
|
|
664
|
+
blocks.append(
|
|
665
|
+
ToolCallBlock(
|
|
666
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
667
|
+
tool_name=tool_call.get("name", ""),
|
|
668
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
669
|
+
)
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
yield ChatResponse(
|
|
673
|
+
message=ChatMessage(
|
|
674
|
+
role=role,
|
|
675
|
+
blocks=blocks,
|
|
676
|
+
additional_kwargs={
|
|
677
|
+
"tool_call_id": [
|
|
678
|
+
tc.get("toolUseId", "") for tc in tool_calls
|
|
679
|
+
],
|
|
680
|
+
"status": [],
|
|
681
|
+
},
|
|
682
|
+
),
|
|
683
|
+
delta="",
|
|
684
|
+
thinking_delta=None,
|
|
685
|
+
raw=chunk,
|
|
686
|
+
additional_kwargs=self._get_response_token_counts(metadata),
|
|
687
|
+
)
|
|
483
688
|
|
|
484
689
|
return gen()
|
|
485
690
|
|
|
@@ -495,8 +700,12 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
495
700
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
496
701
|
) -> ChatResponse:
|
|
497
702
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
498
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
703
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
704
|
+
messages, self.model
|
|
705
|
+
)
|
|
499
706
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
707
|
+
if self.thinking is not None:
|
|
708
|
+
all_kwargs["thinking"] = self.thinking
|
|
500
709
|
|
|
501
710
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
502
711
|
response = await converse_with_retry_async(
|
|
@@ -504,6 +713,8 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
504
713
|
config=self._config,
|
|
505
714
|
messages=converse_messages,
|
|
506
715
|
system_prompt=system_prompt,
|
|
716
|
+
system_prompt_caching=self.system_prompt_caching,
|
|
717
|
+
tool_caching=self.tool_caching,
|
|
507
718
|
max_retries=self.max_retries,
|
|
508
719
|
stream=False,
|
|
509
720
|
guardrail_identifier=self.guardrail_identifier,
|
|
@@ -513,16 +724,13 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
513
724
|
**all_kwargs,
|
|
514
725
|
)
|
|
515
726
|
|
|
516
|
-
|
|
517
|
-
response
|
|
518
|
-
)
|
|
727
|
+
blocks, tool_call_ids, status = self._get_content_and_tool_calls(response)
|
|
519
728
|
|
|
520
729
|
return ChatResponse(
|
|
521
730
|
message=ChatMessage(
|
|
522
731
|
role=MessageRole.ASSISTANT,
|
|
523
|
-
|
|
732
|
+
blocks=blocks,
|
|
524
733
|
additional_kwargs={
|
|
525
|
-
"tool_calls": tool_calls,
|
|
526
734
|
"tool_call_id": tool_call_ids,
|
|
527
735
|
"status": status,
|
|
528
736
|
},
|
|
@@ -543,8 +751,12 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
543
751
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
544
752
|
) -> ChatResponseAsyncGen:
|
|
545
753
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
546
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
754
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
755
|
+
messages, self.model
|
|
756
|
+
)
|
|
547
757
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
758
|
+
if self.thinking is not None:
|
|
759
|
+
all_kwargs["thinking"] = self.thinking
|
|
548
760
|
|
|
549
761
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
550
762
|
response_gen = await converse_with_retry_async(
|
|
@@ -552,10 +764,13 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
552
764
|
config=self._config,
|
|
553
765
|
messages=converse_messages,
|
|
554
766
|
system_prompt=system_prompt,
|
|
767
|
+
system_prompt_caching=self.system_prompt_caching,
|
|
768
|
+
tool_caching=self.tool_caching,
|
|
555
769
|
max_retries=self.max_retries,
|
|
556
770
|
stream=True,
|
|
557
771
|
guardrail_identifier=self.guardrail_identifier,
|
|
558
772
|
guardrail_version=self.guardrail_version,
|
|
773
|
+
guardrail_stream_processing_mode=self.guardrail_stream_processing_mode,
|
|
559
774
|
trace=self.trace,
|
|
560
775
|
boto_client_kwargs=self._boto_client_kwargs,
|
|
561
776
|
**all_kwargs,
|
|
@@ -566,11 +781,25 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
566
781
|
tool_calls = [] # Track tool calls separately
|
|
567
782
|
current_tool_call = None # Track the current tool call being built
|
|
568
783
|
role = MessageRole.ASSISTANT
|
|
784
|
+
thinking = ""
|
|
785
|
+
thinking_signature = ""
|
|
786
|
+
|
|
569
787
|
async for chunk in response_gen:
|
|
570
788
|
if content_block_delta := chunk.get("contentBlockDelta"):
|
|
571
789
|
content_delta = content_block_delta["delta"]
|
|
572
790
|
content = join_two_dicts(content, content_delta)
|
|
573
791
|
|
|
792
|
+
thinking_delta_value = None
|
|
793
|
+
if "reasoningContent" in content_delta:
|
|
794
|
+
reasoning_text = content_delta.get("reasoningContent", {}).get(
|
|
795
|
+
"text", ""
|
|
796
|
+
)
|
|
797
|
+
thinking += reasoning_text
|
|
798
|
+
thinking_delta_value = reasoning_text
|
|
799
|
+
thinking_signature += content_delta.get(
|
|
800
|
+
"reasoningContent", {}
|
|
801
|
+
).get("signature", "")
|
|
802
|
+
|
|
574
803
|
# If this delta contains tool call info, update current tool call
|
|
575
804
|
if "toolUse" in content_delta:
|
|
576
805
|
tool_use_delta = content_delta["toolUse"]
|
|
@@ -579,31 +808,73 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
579
808
|
# Handle the input field specially - concatenate partial JSON strings
|
|
580
809
|
if "input" in tool_use_delta:
|
|
581
810
|
if "input" in current_tool_call:
|
|
582
|
-
current_tool_call["input"] += tool_use_delta[
|
|
811
|
+
current_tool_call["input"] += tool_use_delta[
|
|
812
|
+
"input"
|
|
813
|
+
]
|
|
583
814
|
else:
|
|
584
815
|
current_tool_call["input"] = tool_use_delta["input"]
|
|
585
816
|
|
|
586
817
|
# Remove input from the delta to prevent it from being processed again
|
|
587
|
-
tool_use_without_input = {
|
|
818
|
+
tool_use_without_input = {
|
|
819
|
+
k: v
|
|
820
|
+
for k, v in tool_use_delta.items()
|
|
821
|
+
if k != "input"
|
|
822
|
+
}
|
|
588
823
|
if tool_use_without_input:
|
|
589
|
-
current_tool_call = join_two_dicts(
|
|
824
|
+
current_tool_call = join_two_dicts(
|
|
825
|
+
current_tool_call, tool_use_without_input
|
|
826
|
+
)
|
|
590
827
|
else:
|
|
591
828
|
# For other fields, use the normal joining
|
|
592
|
-
current_tool_call = join_two_dicts(
|
|
829
|
+
current_tool_call = join_two_dicts(
|
|
830
|
+
current_tool_call, tool_use_delta
|
|
831
|
+
)
|
|
832
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
833
|
+
TextBlock(text=content.get("text", ""))
|
|
834
|
+
]
|
|
835
|
+
if thinking != "":
|
|
836
|
+
blocks.insert(
|
|
837
|
+
0,
|
|
838
|
+
ThinkingBlock(
|
|
839
|
+
content=thinking,
|
|
840
|
+
additional_information={
|
|
841
|
+
"signature": thinking_signature
|
|
842
|
+
},
|
|
843
|
+
),
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
if tool_calls:
|
|
847
|
+
for tool_call in tool_calls:
|
|
848
|
+
blocks.append(
|
|
849
|
+
ToolCallBlock(
|
|
850
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
851
|
+
tool_name=tool_call.get("name", ""),
|
|
852
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
853
|
+
)
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
response_additional_kwargs = self._get_response_token_counts(
|
|
857
|
+
dict(chunk)
|
|
858
|
+
)
|
|
859
|
+
if thinking_delta_value is not None:
|
|
860
|
+
response_additional_kwargs["thinking_delta"] = (
|
|
861
|
+
thinking_delta_value
|
|
862
|
+
)
|
|
593
863
|
|
|
594
864
|
yield ChatResponse(
|
|
595
865
|
message=ChatMessage(
|
|
596
866
|
role=role,
|
|
597
|
-
|
|
867
|
+
blocks=blocks,
|
|
598
868
|
additional_kwargs={
|
|
599
|
-
"
|
|
600
|
-
|
|
869
|
+
"tool_call_id": [
|
|
870
|
+
tc.get("toolUseId", "") for tc in tool_calls
|
|
871
|
+
],
|
|
601
872
|
"status": [], # Will be populated when tool results come in
|
|
602
873
|
},
|
|
603
874
|
),
|
|
604
875
|
delta=content_delta.get("text", ""),
|
|
605
876
|
raw=chunk,
|
|
606
|
-
additional_kwargs=
|
|
877
|
+
additional_kwargs=response_additional_kwargs,
|
|
607
878
|
)
|
|
608
879
|
elif content_block_start := chunk.get("contentBlockStart"):
|
|
609
880
|
# New tool call starting
|
|
@@ -614,18 +885,91 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
614
885
|
# Add to our list of tool calls
|
|
615
886
|
tool_calls.append(current_tool_call)
|
|
616
887
|
|
|
888
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
889
|
+
TextBlock(text=content.get("text", ""))
|
|
890
|
+
]
|
|
891
|
+
if thinking != "":
|
|
892
|
+
blocks.insert(
|
|
893
|
+
0,
|
|
894
|
+
ThinkingBlock(
|
|
895
|
+
content=thinking,
|
|
896
|
+
additional_information={
|
|
897
|
+
"signature": thinking_signature
|
|
898
|
+
},
|
|
899
|
+
),
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
if tool_calls:
|
|
903
|
+
for tool_call in tool_calls:
|
|
904
|
+
blocks.append(
|
|
905
|
+
ToolCallBlock(
|
|
906
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
907
|
+
tool_name=tool_call.get("name", ""),
|
|
908
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
909
|
+
)
|
|
910
|
+
)
|
|
911
|
+
|
|
617
912
|
yield ChatResponse(
|
|
618
913
|
message=ChatMessage(
|
|
619
914
|
role=role,
|
|
620
|
-
|
|
915
|
+
blocks=blocks,
|
|
621
916
|
additional_kwargs={
|
|
622
|
-
"
|
|
623
|
-
|
|
917
|
+
"tool_call_id": [
|
|
918
|
+
tc.get("toolUseId", "") for tc in tool_calls
|
|
919
|
+
],
|
|
624
920
|
"status": [], # Will be populated when tool results come in
|
|
625
921
|
},
|
|
626
922
|
),
|
|
627
923
|
raw=chunk,
|
|
628
924
|
)
|
|
925
|
+
elif chunk.get("messageStop"):
|
|
926
|
+
# Handle messageStop event - this contains the stop reason
|
|
927
|
+
# We don't yield here, just track the event
|
|
928
|
+
pass
|
|
929
|
+
elif metadata := chunk.get("metadata"):
|
|
930
|
+
# Handle metadata event - this contains the final token usage
|
|
931
|
+
if usage := metadata.get("usage"):
|
|
932
|
+
# Yield a final response with correct token usage
|
|
933
|
+
blocks: List[Union[TextBlock, ThinkingBlock, ToolCallBlock]] = [
|
|
934
|
+
TextBlock(text=content.get("text", ""))
|
|
935
|
+
]
|
|
936
|
+
if thinking != "":
|
|
937
|
+
blocks.insert(
|
|
938
|
+
0,
|
|
939
|
+
ThinkingBlock(
|
|
940
|
+
content=thinking,
|
|
941
|
+
additional_information={
|
|
942
|
+
"signature": thinking_signature
|
|
943
|
+
},
|
|
944
|
+
),
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
if tool_calls:
|
|
948
|
+
for tool_call in tool_calls:
|
|
949
|
+
blocks.append(
|
|
950
|
+
ToolCallBlock(
|
|
951
|
+
tool_kwargs=tool_call.get("input", {}),
|
|
952
|
+
tool_name=tool_call.get("name", ""),
|
|
953
|
+
tool_call_id=tool_call.get("toolUseId"),
|
|
954
|
+
)
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
yield ChatResponse(
|
|
958
|
+
message=ChatMessage(
|
|
959
|
+
role=role,
|
|
960
|
+
blocks=blocks,
|
|
961
|
+
additional_kwargs={
|
|
962
|
+
"tool_call_id": [
|
|
963
|
+
tc.get("toolUseId", "") for tc in tool_calls
|
|
964
|
+
],
|
|
965
|
+
"status": [],
|
|
966
|
+
},
|
|
967
|
+
),
|
|
968
|
+
delta="",
|
|
969
|
+
thinking_delta=None,
|
|
970
|
+
raw=chunk,
|
|
971
|
+
additional_kwargs=self._get_response_token_counts(metadata),
|
|
972
|
+
)
|
|
629
973
|
|
|
630
974
|
return gen()
|
|
631
975
|
|
|
@@ -643,6 +987,8 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
643
987
|
chat_history: Optional[List[ChatMessage]] = None,
|
|
644
988
|
verbose: bool = False,
|
|
645
989
|
allow_parallel_tool_calls: bool = False,
|
|
990
|
+
tool_required: bool = False,
|
|
991
|
+
tool_caching: bool = False,
|
|
646
992
|
tool_choice: Optional[dict] = None,
|
|
647
993
|
**kwargs: Any,
|
|
648
994
|
) -> Dict[str, Any]:
|
|
@@ -656,11 +1002,13 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
656
1002
|
chat_history.append(user_msg)
|
|
657
1003
|
|
|
658
1004
|
# convert Llama Index tools to AWS Bedrock Converse tools
|
|
659
|
-
tool_config = tools_to_converse_tools(
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
1005
|
+
tool_config = tools_to_converse_tools(
|
|
1006
|
+
tools,
|
|
1007
|
+
tool_choice=tool_choice,
|
|
1008
|
+
tool_required=tool_required,
|
|
1009
|
+
tool_caching=tool_caching,
|
|
1010
|
+
supports_forced_tool_calls=self.supports_forced_tool_calls,
|
|
1011
|
+
)
|
|
664
1012
|
|
|
665
1013
|
return {
|
|
666
1014
|
"messages": chat_history,
|
|
@@ -688,7 +1036,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
688
1036
|
**kwargs: Any,
|
|
689
1037
|
) -> List[ToolSelection]:
|
|
690
1038
|
"""Predict and call the tool."""
|
|
691
|
-
tool_calls =
|
|
1039
|
+
tool_calls = [
|
|
1040
|
+
block
|
|
1041
|
+
for block in response.message.blocks
|
|
1042
|
+
if isinstance(block, ToolCallBlock)
|
|
1043
|
+
]
|
|
692
1044
|
|
|
693
1045
|
if len(tool_calls) < 1:
|
|
694
1046
|
if error_on_no_tool_call:
|
|
@@ -700,29 +1052,23 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
700
1052
|
|
|
701
1053
|
tool_selections = []
|
|
702
1054
|
for tool_call in tool_calls:
|
|
703
|
-
if (
|
|
704
|
-
"toolUseId" not in tool_call
|
|
705
|
-
or "name" not in tool_call
|
|
706
|
-
):
|
|
707
|
-
raise ValueError("Invalid tool call.")
|
|
708
|
-
|
|
709
1055
|
# handle empty inputs
|
|
710
1056
|
argument_dict = {}
|
|
711
|
-
if
|
|
1057
|
+
if isinstance(tool_call.tool_kwargs, str):
|
|
712
1058
|
# TODO parse_partial_json is not perfect
|
|
713
1059
|
try:
|
|
714
|
-
argument_dict = parse_partial_json(tool_call
|
|
1060
|
+
argument_dict = parse_partial_json(tool_call.tool_kwargs)
|
|
715
1061
|
except ValueError:
|
|
716
1062
|
argument_dict = {}
|
|
717
|
-
elif
|
|
718
|
-
argument_dict = tool_call
|
|
1063
|
+
elif isinstance(tool_call.tool_kwargs, dict):
|
|
1064
|
+
argument_dict = tool_call.tool_kwargs
|
|
719
1065
|
else:
|
|
720
1066
|
continue
|
|
721
1067
|
|
|
722
1068
|
tool_selections.append(
|
|
723
1069
|
ToolSelection(
|
|
724
|
-
tool_id=tool_call
|
|
725
|
-
tool_name=tool_call
|
|
1070
|
+
tool_id=tool_call.tool_call_id or "",
|
|
1071
|
+
tool_name=tool_call.tool_name,
|
|
726
1072
|
tool_kwargs=argument_dict,
|
|
727
1073
|
)
|
|
728
1074
|
)
|
|
@@ -741,8 +1087,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
741
1087
|
return {}
|
|
742
1088
|
|
|
743
1089
|
# Convert Bedrock's token count format to match OpenAI's format
|
|
1090
|
+
# Cache token formats respecting Anthropic format
|
|
744
1091
|
return {
|
|
745
1092
|
"prompt_tokens": usage.get("inputTokens", 0),
|
|
746
1093
|
"completion_tokens": usage.get("outputTokens", 0),
|
|
747
1094
|
"total_tokens": usage.get("totalTokens", 0),
|
|
1095
|
+
"cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
|
|
1096
|
+
"cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
|
|
748
1097
|
}
|