llama-index-llms-bedrock-converse 0.8.2__py3-none-any.whl → 0.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_index/llms/bedrock_converse/base.py +319 -45
- llama_index/llms/bedrock_converse/utils.py +275 -40
- {llama_index_llms_bedrock_converse-0.8.2.dist-info → llama_index_llms_bedrock_converse-0.12.3.dist-info}/METADATA +53 -4
- llama_index_llms_bedrock_converse-0.12.3.dist-info/RECORD +7 -0
- {llama_index_llms_bedrock_converse-0.8.2.dist-info → llama_index_llms_bedrock_converse-0.12.3.dist-info}/WHEEL +1 -1
- llama_index_llms_bedrock_converse-0.8.2.dist-info/RECORD +0 -7
- {llama_index_llms_bedrock_converse-0.8.2.dist-info → llama_index_llms_bedrock_converse-0.12.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,18 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import (
|
|
5
|
+
Any,
|
|
6
|
+
Callable,
|
|
7
|
+
Dict,
|
|
8
|
+
List,
|
|
9
|
+
Optional,
|
|
10
|
+
Sequence,
|
|
11
|
+
Tuple,
|
|
12
|
+
Literal,
|
|
13
|
+
Union,
|
|
14
|
+
)
|
|
15
|
+
from typing_extensions import TypedDict
|
|
5
16
|
from tenacity import (
|
|
6
17
|
before_sleep_log,
|
|
7
18
|
retry,
|
|
@@ -20,6 +31,8 @@ from llama_index.core.base.llms.types import (
|
|
|
20
31
|
AudioBlock,
|
|
21
32
|
DocumentBlock,
|
|
22
33
|
CachePoint,
|
|
34
|
+
ThinkingBlock,
|
|
35
|
+
ToolCallBlock,
|
|
23
36
|
)
|
|
24
37
|
|
|
25
38
|
|
|
@@ -29,6 +42,7 @@ HUMAN_PREFIX = "\n\nHuman:"
|
|
|
29
42
|
ASSISTANT_PREFIX = "\n\nAssistant:"
|
|
30
43
|
|
|
31
44
|
BEDROCK_MODELS = {
|
|
45
|
+
"amazon.nova-premier-v1:0": 1000000,
|
|
32
46
|
"amazon.nova-pro-v1:0": 300000,
|
|
33
47
|
"amazon.nova-lite-v1:0": 300000,
|
|
34
48
|
"amazon.nova-micro-v1:0": 128000,
|
|
@@ -47,7 +61,11 @@ BEDROCK_MODELS = {
|
|
|
47
61
|
"anthropic.claude-3-5-haiku-20241022-v1:0": 200000,
|
|
48
62
|
"anthropic.claude-3-7-sonnet-20250219-v1:0": 200000,
|
|
49
63
|
"anthropic.claude-opus-4-20250514-v1:0": 200000,
|
|
64
|
+
"anthropic.claude-opus-4-1-20250805-v1:0": 200000,
|
|
65
|
+
"anthropic.claude-opus-4-5-20251101-v1:0": 200000,
|
|
50
66
|
"anthropic.claude-sonnet-4-20250514-v1:0": 200000,
|
|
67
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
|
|
68
|
+
"anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
|
|
51
69
|
"ai21.j2-mid-v1": 8192,
|
|
52
70
|
"ai21.j2-ultra-v1": 8192,
|
|
53
71
|
"cohere.command-text-v14": 4096,
|
|
@@ -80,6 +98,7 @@ BEDROCK_MODELS = {
|
|
|
80
98
|
}
|
|
81
99
|
|
|
82
100
|
BEDROCK_FUNCTION_CALLING_MODELS = (
|
|
101
|
+
"amazon.nova-premier-v1:0",
|
|
83
102
|
"amazon.nova-pro-v1:0",
|
|
84
103
|
"amazon.nova-lite-v1:0",
|
|
85
104
|
"amazon.nova-micro-v1:0",
|
|
@@ -91,7 +110,11 @@ BEDROCK_FUNCTION_CALLING_MODELS = (
|
|
|
91
110
|
"anthropic.claude-3-5-haiku-20241022-v1:0",
|
|
92
111
|
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
93
112
|
"anthropic.claude-opus-4-20250514-v1:0",
|
|
113
|
+
"anthropic.claude-opus-4-1-20250805-v1:0",
|
|
114
|
+
"anthropic.claude-opus-4-5-20251101-v1:0",
|
|
94
115
|
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
116
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
117
|
+
"anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
95
118
|
"cohere.command-r-v1:0",
|
|
96
119
|
"cohere.command-r-plus-v1:0",
|
|
97
120
|
"mistral.mistral-large-2402-v1:0",
|
|
@@ -108,6 +131,7 @@ BEDROCK_FUNCTION_CALLING_MODELS = (
|
|
|
108
131
|
)
|
|
109
132
|
|
|
110
133
|
BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
|
|
134
|
+
"amazon.nova-premier-v1:0",
|
|
111
135
|
"amazon.nova-pro-v1:0",
|
|
112
136
|
"amazon.nova-lite-v1:0",
|
|
113
137
|
"amazon.nova-micro-v1:0",
|
|
@@ -119,7 +143,11 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
|
|
|
119
143
|
"anthropic.claude-3-5-haiku-20241022-v1:0",
|
|
120
144
|
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
121
145
|
"anthropic.claude-opus-4-20250514-v1:0",
|
|
146
|
+
"anthropic.claude-opus-4-1-20250805-v1:0",
|
|
147
|
+
"anthropic.claude-opus-4-5-20251101-v1:0",
|
|
122
148
|
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
149
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
150
|
+
"anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
123
151
|
"meta.llama3-1-8b-instruct-v1:0",
|
|
124
152
|
"meta.llama3-1-70b-instruct-v1:0",
|
|
125
153
|
"meta.llama3-2-1b-instruct-v1:0",
|
|
@@ -131,12 +159,43 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
|
|
|
131
159
|
"meta.llama4-scout-17b-instruct-v1:0",
|
|
132
160
|
"deepseek.r1-v1:0",
|
|
133
161
|
)
|
|
162
|
+
BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
|
|
163
|
+
"anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
164
|
+
"anthropic.claude-3-5-haiku-20241022-v1:0",
|
|
165
|
+
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
166
|
+
"anthropic.claude-opus-4-20250514-v1:0",
|
|
167
|
+
"anthropic.claude-opus-4-1-20250805-v1:0",
|
|
168
|
+
"anthropic.claude-opus-4-5-20251101-v1:0",
|
|
169
|
+
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
170
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
171
|
+
"anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
172
|
+
"amazon.nova-premier-v1:0",
|
|
173
|
+
"amazon.nova-pro-v1:0",
|
|
174
|
+
"amazon.nova-lite-v1:0",
|
|
175
|
+
"amazon.nova-micro-v1:0",
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
BEDROCK_REASONING_MODELS = (
|
|
179
|
+
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
180
|
+
"anthropic.claude-opus-4-20250514-v1:0",
|
|
181
|
+
"anthropic.claude-opus-4-1-20250805-v1:0",
|
|
182
|
+
"anthropic.claude-opus-4-5-20251101-v1:0",
|
|
183
|
+
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
184
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
185
|
+
"anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
186
|
+
"deepseek.r1-v1:0",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def is_reasoning(model_name: str) -> bool:
|
|
191
|
+
model_name = get_model_name(model_name)
|
|
192
|
+
return model_name in BEDROCK_REASONING_MODELS
|
|
134
193
|
|
|
135
194
|
|
|
136
195
|
def get_model_name(model_name: str) -> str:
|
|
137
196
|
"""Extract base model name from region-prefixed model identifier."""
|
|
138
|
-
# Check for region prefixes (us, eu, apac)
|
|
139
|
-
REGION_PREFIXES = ["us.", "eu.", "apac."]
|
|
197
|
+
# Check for region prefixes (us, eu, apac, jp, global)
|
|
198
|
+
REGION_PREFIXES = ["us.", "eu.", "apac.", "jp.", "global."]
|
|
140
199
|
|
|
141
200
|
# If no region prefix, return the original model name
|
|
142
201
|
if not any(prefix in model_name for prefix in REGION_PREFIXES):
|
|
@@ -159,6 +218,10 @@ def is_bedrock_function_calling_model(model_name: str) -> bool:
|
|
|
159
218
|
return get_model_name(model_name) in BEDROCK_FUNCTION_CALLING_MODELS
|
|
160
219
|
|
|
161
220
|
|
|
221
|
+
def is_bedrock_prompt_caching_supported_model(model_name: str) -> bool:
|
|
222
|
+
return get_model_name(model_name) in BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS
|
|
223
|
+
|
|
224
|
+
|
|
162
225
|
def bedrock_modelname_to_context_size(model_name: str) -> int:
|
|
163
226
|
translated_model_name = get_model_name(model_name)
|
|
164
227
|
|
|
@@ -197,6 +260,22 @@ def _content_block_to_bedrock_format(
|
|
|
197
260
|
return {
|
|
198
261
|
"text": block.text,
|
|
199
262
|
}
|
|
263
|
+
elif isinstance(block, ThinkingBlock):
|
|
264
|
+
if block.content:
|
|
265
|
+
thinking_data = {
|
|
266
|
+
"reasoningContent": {"reasoningText": {"text": block.content}}
|
|
267
|
+
}
|
|
268
|
+
if (
|
|
269
|
+
"signature" in block.additional_information
|
|
270
|
+
and block.additional_information["signature"]
|
|
271
|
+
):
|
|
272
|
+
thinking_data["reasoningContent"]["reasoningText"]["signature"] = (
|
|
273
|
+
block.additional_information["signature"]
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
return thinking_data
|
|
277
|
+
else:
|
|
278
|
+
return None
|
|
200
279
|
elif isinstance(block, DocumentBlock):
|
|
201
280
|
if not block.data:
|
|
202
281
|
file_buffer = block.resolve_document()
|
|
@@ -230,6 +309,23 @@ def _content_block_to_bedrock_format(
|
|
|
230
309
|
elif isinstance(block, AudioBlock):
|
|
231
310
|
logger.warning("Audio blocks are not supported in Bedrock Converse API.")
|
|
232
311
|
return None
|
|
312
|
+
elif isinstance(block, ToolCallBlock):
|
|
313
|
+
if isinstance(block.tool_kwargs, str):
|
|
314
|
+
try:
|
|
315
|
+
tool_input = json.loads(block.tool_kwargs or "{}")
|
|
316
|
+
except json.JSONDecodeError:
|
|
317
|
+
tool_input = {}
|
|
318
|
+
else:
|
|
319
|
+
tool_input = block.tool_kwargs
|
|
320
|
+
|
|
321
|
+
return {
|
|
322
|
+
"toolUse": {
|
|
323
|
+
"input": tool_input,
|
|
324
|
+
"toolUseId": block.tool_call_id or "",
|
|
325
|
+
"name": block.tool_name,
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
233
329
|
else:
|
|
234
330
|
logger.warning(f"Unsupported block type: {type(block)}")
|
|
235
331
|
return None
|
|
@@ -254,12 +350,14 @@ def __get_img_format_from_image_mimetype(image_mimetype: str) -> str:
|
|
|
254
350
|
|
|
255
351
|
def messages_to_converse_messages(
|
|
256
352
|
messages: Sequence[ChatMessage],
|
|
257
|
-
|
|
353
|
+
model: Optional[str] = None,
|
|
354
|
+
) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]:
|
|
258
355
|
"""
|
|
259
356
|
Converts a list of generic ChatMessages to AWS Bedrock Converse messages.
|
|
260
357
|
|
|
261
358
|
Args:
|
|
262
359
|
messages: List of ChatMessages
|
|
360
|
+
model: optional model name used to omit cache point if the model does not support it
|
|
263
361
|
|
|
264
362
|
Returns:
|
|
265
363
|
Tuple of:
|
|
@@ -268,10 +366,42 @@ def messages_to_converse_messages(
|
|
|
268
366
|
|
|
269
367
|
"""
|
|
270
368
|
converse_messages = []
|
|
271
|
-
system_prompt =
|
|
369
|
+
system_prompt = []
|
|
370
|
+
current_system_prompt = ""
|
|
371
|
+
|
|
272
372
|
for message in messages:
|
|
273
|
-
|
|
274
|
-
|
|
373
|
+
unique_tool_calls = []
|
|
374
|
+
if message.role == MessageRole.SYSTEM:
|
|
375
|
+
# we iterate over blocks, if content was used, the blocks are added anyway
|
|
376
|
+
for block in message.blocks:
|
|
377
|
+
if isinstance(block, TextBlock):
|
|
378
|
+
if block.text: # Only add non-empty text
|
|
379
|
+
current_system_prompt += block.text + "\n"
|
|
380
|
+
|
|
381
|
+
elif isinstance(block, CachePoint):
|
|
382
|
+
# when we find a cache point we push the current system prompt as a message
|
|
383
|
+
if current_system_prompt != "":
|
|
384
|
+
system_prompt.append({"text": current_system_prompt.strip()})
|
|
385
|
+
current_system_prompt = ""
|
|
386
|
+
# we add the cache point
|
|
387
|
+
if (
|
|
388
|
+
model is None
|
|
389
|
+
or model is not None
|
|
390
|
+
and is_bedrock_prompt_caching_supported_model(model)
|
|
391
|
+
):
|
|
392
|
+
if block.cache_control.type != "default":
|
|
393
|
+
logger.warning(
|
|
394
|
+
"The only allowed caching strategy for Bedrock Converse is 'default', falling back to that..."
|
|
395
|
+
)
|
|
396
|
+
block.cache_control.type = "default"
|
|
397
|
+
system_prompt.append(
|
|
398
|
+
{"cachePoint": {"type": block.cache_control.type}}
|
|
399
|
+
)
|
|
400
|
+
else:
|
|
401
|
+
logger.warning(
|
|
402
|
+
f"Model {model} does not support prompt caching, cache point will be ignored..."
|
|
403
|
+
)
|
|
404
|
+
|
|
275
405
|
elif message.role in [MessageRole.FUNCTION, MessageRole.TOOL]:
|
|
276
406
|
# convert tool output to the AWS Bedrock Converse format
|
|
277
407
|
content = {
|
|
@@ -297,6 +427,13 @@ def messages_to_converse_messages(
|
|
|
297
427
|
)
|
|
298
428
|
if bedrock_format_block:
|
|
299
429
|
content.append(bedrock_format_block)
|
|
430
|
+
if "toolUse" in bedrock_format_block:
|
|
431
|
+
unique_tool_calls.append(
|
|
432
|
+
(
|
|
433
|
+
bedrock_format_block["toolUse"]["toolUseId"],
|
|
434
|
+
bedrock_format_block["toolUse"]["name"],
|
|
435
|
+
)
|
|
436
|
+
)
|
|
300
437
|
|
|
301
438
|
if content:
|
|
302
439
|
converse_messages.append(
|
|
@@ -306,6 +443,7 @@ def messages_to_converse_messages(
|
|
|
306
443
|
}
|
|
307
444
|
)
|
|
308
445
|
|
|
446
|
+
# keep this code here for compatibility with older chat histories
|
|
309
447
|
# convert tool calls to the AWS Bedrock Converse format
|
|
310
448
|
# NOTE tool calls might show up within any message,
|
|
311
449
|
# e.g. within assistant message or in consecutive tool calls,
|
|
@@ -313,25 +451,28 @@ def messages_to_converse_messages(
|
|
|
313
451
|
tool_calls = message.additional_kwargs.get("tool_calls", [])
|
|
314
452
|
content = []
|
|
315
453
|
for tool_call in tool_calls:
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
454
|
+
try:
|
|
455
|
+
assert "toolUseId" in tool_call
|
|
456
|
+
assert "input" in tool_call
|
|
457
|
+
assert "name" in tool_call
|
|
458
|
+
if (tool_call["toolUseId"], tool_call["name"]) not in unique_tool_calls:
|
|
459
|
+
tool_input = tool_call["input"] if tool_call["input"] else {}
|
|
460
|
+
if isinstance(tool_input, str):
|
|
461
|
+
try:
|
|
462
|
+
tool_input = json.loads(tool_input or "{}")
|
|
463
|
+
except json.JSONDecodeError:
|
|
464
|
+
tool_input = {}
|
|
465
|
+
content.append(
|
|
466
|
+
{
|
|
467
|
+
"toolUse": {
|
|
468
|
+
"input": tool_input,
|
|
469
|
+
"toolUseId": tool_call["toolUseId"],
|
|
470
|
+
"name": tool_call["name"],
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
)
|
|
474
|
+
except AssertionError:
|
|
475
|
+
continue
|
|
335
476
|
if len(content) > 0:
|
|
336
477
|
converse_messages.append(
|
|
337
478
|
{
|
|
@@ -339,14 +480,17 @@ def messages_to_converse_messages(
|
|
|
339
480
|
"content": content,
|
|
340
481
|
}
|
|
341
482
|
)
|
|
342
|
-
|
|
343
|
-
|
|
483
|
+
if current_system_prompt != "":
|
|
484
|
+
system_prompt.append({"text": current_system_prompt.strip()})
|
|
485
|
+
return __merge_common_role_msgs(converse_messages), system_prompt
|
|
344
486
|
|
|
345
487
|
|
|
346
488
|
def tools_to_converse_tools(
|
|
347
489
|
tools: List["BaseTool"],
|
|
348
490
|
tool_choice: Optional[dict] = None,
|
|
349
491
|
tool_required: bool = False,
|
|
492
|
+
tool_caching: bool = False,
|
|
493
|
+
supports_forced_tool_calls: bool = True,
|
|
350
494
|
) -> Dict[str, Any]:
|
|
351
495
|
"""
|
|
352
496
|
Converts a list of tools to AWS Bedrock Converse tools.
|
|
@@ -371,18 +515,35 @@ def tools_to_converse_tools(
|
|
|
371
515
|
"inputSchema": {"json": tool.metadata.get_parameters_dict()},
|
|
372
516
|
}
|
|
373
517
|
converse_tools.append({"toolSpec": tool_dict})
|
|
518
|
+
|
|
519
|
+
if tool_caching:
|
|
520
|
+
converse_tools.append({"cachePoint": {"type": "default"}})
|
|
521
|
+
|
|
522
|
+
if tool_choice:
|
|
523
|
+
tool_choice = tool_choice
|
|
524
|
+
elif supports_forced_tool_calls and tool_required:
|
|
525
|
+
tool_choice = {"any": {}}
|
|
526
|
+
else:
|
|
527
|
+
tool_choice = {"auto": {}}
|
|
528
|
+
|
|
374
529
|
return {
|
|
375
530
|
"tools": converse_tools,
|
|
376
531
|
# https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
|
|
377
532
|
# e.g. { "auto": {} }
|
|
378
|
-
"toolChoice": tool_choice
|
|
533
|
+
"toolChoice": tool_choice,
|
|
379
534
|
}
|
|
380
535
|
|
|
381
536
|
|
|
382
537
|
def force_single_tool_call(response: ChatResponse) -> None:
|
|
383
|
-
tool_calls =
|
|
538
|
+
tool_calls = [
|
|
539
|
+
block for block in response.message.blocks if isinstance(block, ToolCallBlock)
|
|
540
|
+
]
|
|
384
541
|
if len(tool_calls) > 1:
|
|
385
|
-
response.message.
|
|
542
|
+
response.message.blocks = [
|
|
543
|
+
block
|
|
544
|
+
for block in response.message.blocks
|
|
545
|
+
if not isinstance(block, ToolCallBlock)
|
|
546
|
+
] + [tool_calls[0]]
|
|
386
547
|
|
|
387
548
|
|
|
388
549
|
def _create_retry_decorator(client: Any, max_retries: int) -> Callable[[Any], Any]:
|
|
@@ -436,12 +597,15 @@ def converse_with_retry(
|
|
|
436
597
|
model: str,
|
|
437
598
|
messages: Sequence[Dict[str, Any]],
|
|
438
599
|
max_retries: int = 3,
|
|
439
|
-
system_prompt: Optional[str] = None,
|
|
600
|
+
system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
|
|
601
|
+
system_prompt_caching: bool = False,
|
|
602
|
+
tool_caching: bool = False,
|
|
440
603
|
max_tokens: int = 1000,
|
|
441
604
|
temperature: float = 0.1,
|
|
442
605
|
stream: bool = False,
|
|
443
606
|
guardrail_identifier: Optional[str] = None,
|
|
444
607
|
guardrail_version: Optional[str] = None,
|
|
608
|
+
guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = None,
|
|
445
609
|
trace: Optional[str] = None,
|
|
446
610
|
**kwargs: Any,
|
|
447
611
|
) -> Any:
|
|
@@ -455,32 +619,62 @@ def converse_with_retry(
|
|
|
455
619
|
"temperature": temperature,
|
|
456
620
|
},
|
|
457
621
|
}
|
|
622
|
+
if "thinking" in kwargs:
|
|
623
|
+
converse_kwargs["additionalModelRequestFields"] = {
|
|
624
|
+
"thinking": kwargs["thinking"]
|
|
625
|
+
}
|
|
458
626
|
if system_prompt:
|
|
459
|
-
|
|
627
|
+
if isinstance(system_prompt, str):
|
|
628
|
+
# if the system prompt is a simple text (for retro compatibility)
|
|
629
|
+
system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
|
|
630
|
+
else:
|
|
631
|
+
system_messages: list[dict[str, Any]] = system_prompt
|
|
632
|
+
if (
|
|
633
|
+
system_prompt_caching
|
|
634
|
+
and len(system_messages) > 0
|
|
635
|
+
and system_messages[-1].get("cachePoint", None) is None
|
|
636
|
+
):
|
|
637
|
+
# "Adding cache point to system prompt if not present"
|
|
638
|
+
system_messages.append({"cachePoint": {"type": "default"}})
|
|
639
|
+
converse_kwargs["system"] = system_messages
|
|
460
640
|
if tool_config := kwargs.get("tools"):
|
|
461
641
|
converse_kwargs["toolConfig"] = tool_config
|
|
642
|
+
|
|
462
643
|
if guardrail_identifier and guardrail_version:
|
|
463
644
|
converse_kwargs["guardrailConfig"] = {}
|
|
464
645
|
converse_kwargs["guardrailConfig"]["guardrailIdentifier"] = guardrail_identifier
|
|
465
646
|
converse_kwargs["guardrailConfig"]["guardrailVersion"] = guardrail_version
|
|
466
647
|
if trace:
|
|
467
648
|
converse_kwargs["guardrailConfig"]["trace"] = trace
|
|
649
|
+
if guardrail_stream_processing_mode and stream:
|
|
650
|
+
converse_kwargs["guardrailConfig"]["streamProcessingMode"] = (
|
|
651
|
+
guardrail_stream_processing_mode
|
|
652
|
+
)
|
|
653
|
+
|
|
468
654
|
converse_kwargs = join_two_dicts(
|
|
469
655
|
converse_kwargs,
|
|
470
656
|
{
|
|
471
657
|
k: v
|
|
472
658
|
for k, v in kwargs.items()
|
|
473
|
-
if k
|
|
659
|
+
if k
|
|
660
|
+
not in [
|
|
661
|
+
"tools",
|
|
662
|
+
"guardrail_identifier",
|
|
663
|
+
"guardrail_version",
|
|
664
|
+
"trace",
|
|
665
|
+
"thinking",
|
|
666
|
+
]
|
|
474
667
|
},
|
|
475
668
|
)
|
|
476
669
|
|
|
477
670
|
@retry_decorator
|
|
478
|
-
def
|
|
671
|
+
def _converse_with_retry(**kwargs: Any) -> Any:
|
|
479
672
|
if stream:
|
|
480
673
|
return client.converse_stream(**kwargs)
|
|
481
|
-
|
|
674
|
+
else:
|
|
675
|
+
return client.converse(**kwargs)
|
|
482
676
|
|
|
483
|
-
return
|
|
677
|
+
return _converse_with_retry(**converse_kwargs)
|
|
484
678
|
|
|
485
679
|
|
|
486
680
|
async def converse_with_retry_async(
|
|
@@ -489,12 +683,15 @@ async def converse_with_retry_async(
|
|
|
489
683
|
model: str,
|
|
490
684
|
messages: Sequence[Dict[str, Any]],
|
|
491
685
|
max_retries: int = 3,
|
|
492
|
-
system_prompt: Optional[str] = None,
|
|
686
|
+
system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
|
|
687
|
+
system_prompt_caching: bool = False,
|
|
688
|
+
tool_caching: bool = False,
|
|
493
689
|
max_tokens: int = 1000,
|
|
494
690
|
temperature: float = 0.1,
|
|
495
691
|
stream: bool = False,
|
|
496
692
|
guardrail_identifier: Optional[str] = None,
|
|
497
693
|
guardrail_version: Optional[str] = None,
|
|
694
|
+
guardrail_stream_processing_mode: Optional[Literal["sync", "async"]] = None,
|
|
498
695
|
trace: Optional[str] = None,
|
|
499
696
|
boto_client_kwargs: Optional[Dict[str, Any]] = None,
|
|
500
697
|
**kwargs: Any,
|
|
@@ -509,22 +706,55 @@ async def converse_with_retry_async(
|
|
|
509
706
|
"temperature": temperature,
|
|
510
707
|
},
|
|
511
708
|
}
|
|
709
|
+
if "thinking" in kwargs:
|
|
710
|
+
converse_kwargs["additionalModelRequestFields"] = {
|
|
711
|
+
"thinking": kwargs["thinking"]
|
|
712
|
+
}
|
|
713
|
+
|
|
512
714
|
if system_prompt:
|
|
513
|
-
|
|
715
|
+
if isinstance(system_prompt, str):
|
|
716
|
+
# if the system prompt is a simple text (for retro compatibility)
|
|
717
|
+
system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
|
|
718
|
+
else:
|
|
719
|
+
system_messages: list[dict[str, Any]] = system_prompt
|
|
720
|
+
if (
|
|
721
|
+
system_prompt_caching
|
|
722
|
+
and len(system_messages) > 0
|
|
723
|
+
and system_messages[-1].get("cachePoint", None) is None
|
|
724
|
+
):
|
|
725
|
+
# "Adding cache point to system prompt if not present"
|
|
726
|
+
system_messages.append({"cachePoint": {"type": "default"}})
|
|
727
|
+
converse_kwargs["system"] = system_messages
|
|
728
|
+
|
|
514
729
|
if tool_config := kwargs.get("tools"):
|
|
515
730
|
converse_kwargs["toolConfig"] = tool_config
|
|
731
|
+
if tool_caching and "tools" in converse_kwargs["toolConfig"]:
|
|
732
|
+
converse_kwargs["toolConfig"]["tools"].append(
|
|
733
|
+
{"cachePoint": {"type": "default"}}
|
|
734
|
+
)
|
|
516
735
|
if guardrail_identifier and guardrail_version:
|
|
517
736
|
converse_kwargs["guardrailConfig"] = {}
|
|
518
737
|
converse_kwargs["guardrailConfig"]["guardrailIdentifier"] = guardrail_identifier
|
|
519
738
|
converse_kwargs["guardrailConfig"]["guardrailVersion"] = guardrail_version
|
|
520
739
|
if trace:
|
|
521
740
|
converse_kwargs["guardrailConfig"]["trace"] = trace
|
|
741
|
+
if guardrail_stream_processing_mode and stream:
|
|
742
|
+
converse_kwargs["guardrailConfig"]["streamProcessingMode"] = (
|
|
743
|
+
guardrail_stream_processing_mode
|
|
744
|
+
)
|
|
522
745
|
converse_kwargs = join_two_dicts(
|
|
523
746
|
converse_kwargs,
|
|
524
747
|
{
|
|
525
748
|
k: v
|
|
526
749
|
for k, v in kwargs.items()
|
|
527
|
-
if k
|
|
750
|
+
if k
|
|
751
|
+
not in [
|
|
752
|
+
"tools",
|
|
753
|
+
"guardrail_identifier",
|
|
754
|
+
"guardrail_version",
|
|
755
|
+
"trace",
|
|
756
|
+
"thinking",
|
|
757
|
+
]
|
|
528
758
|
},
|
|
529
759
|
)
|
|
530
760
|
_boto_client_kwargs = {}
|
|
@@ -590,3 +820,8 @@ def join_two_dicts(dict1: Dict[str, Any], dict2: Dict[str, Any]) -> Dict[str, An
|
|
|
590
820
|
else:
|
|
591
821
|
new_dict[key] += value
|
|
592
822
|
return new_dict
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
class ThinkingDict(TypedDict):
|
|
826
|
+
type: Literal["enabled"]
|
|
827
|
+
budget_tokens: int
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama-index-llms-bedrock-converse
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.3
|
|
4
4
|
Summary: llama-index llms bedrock converse integration
|
|
5
5
|
Author-email: Your Name <you@example.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
License-File: LICENSE
|
|
8
8
|
Requires-Python: <4.0,>=3.9
|
|
9
|
-
Requires-Dist: aioboto3<16,>=
|
|
10
|
-
Requires-Dist: boto3<2,>=1.
|
|
11
|
-
Requires-Dist: llama-index-core<0.
|
|
9
|
+
Requires-Dist: aioboto3<16,>=15.0.0
|
|
10
|
+
Requires-Dist: boto3<2,>=1.38.27
|
|
11
|
+
Requires-Dist: llama-index-core<0.15,>=0.14.5
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
|
|
14
14
|
# LlamaIndex Llms Integration: Bedrock Converse
|
|
@@ -220,6 +220,55 @@ resp = await llm.acomplete("Paul Graham is ")
|
|
|
220
220
|
print(resp)
|
|
221
221
|
```
|
|
222
222
|
|
|
223
|
+
### Prompt Caching System and regular messages
|
|
224
|
+
|
|
225
|
+
You can cache normal and system messages by placing cache points strategically:
|
|
226
|
+
|
|
227
|
+
```py
|
|
228
|
+
from llama_index.core.llms import ChatMessage
|
|
229
|
+
from llama_index.core.base.llms.types import (
|
|
230
|
+
TextBlock,
|
|
231
|
+
CacheControl,
|
|
232
|
+
CachePoint,
|
|
233
|
+
MessageRole,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Cache expensive context but keep dynamic instructions uncached
|
|
237
|
+
cached_context = (
|
|
238
|
+
"""[Large context about company policies, knowledge base, etc...]"""
|
|
239
|
+
)
|
|
240
|
+
dynamic_instructions = (
|
|
241
|
+
"Today's date is 2024-01-15. Focus on recent developments."
|
|
242
|
+
)
|
|
243
|
+
document_text = "[Long document]"
|
|
244
|
+
messages = [
|
|
245
|
+
ChatMessage(
|
|
246
|
+
role=MessageRole.SYSTEM,
|
|
247
|
+
blocks=[
|
|
248
|
+
TextBlock(text=cached_context),
|
|
249
|
+
CachePoint(cache_control=CacheControl(type="default")),
|
|
250
|
+
TextBlock(text=dynamic_instructions),
|
|
251
|
+
],
|
|
252
|
+
),
|
|
253
|
+
ChatMessage(
|
|
254
|
+
role=MessageRole.USER,
|
|
255
|
+
blocks=[
|
|
256
|
+
TextBlock(
|
|
257
|
+
text=f"{document_text}",
|
|
258
|
+
type="text",
|
|
259
|
+
),
|
|
260
|
+
CachePoint(cache_control=CacheControl(type="default")),
|
|
261
|
+
TextBlock(
|
|
262
|
+
text="What's our current policy on remote work?",
|
|
263
|
+
type="text",
|
|
264
|
+
),
|
|
265
|
+
],
|
|
266
|
+
),
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
response = llm.chat(messages)
|
|
270
|
+
```
|
|
271
|
+
|
|
223
272
|
### LLM Implementation example
|
|
224
273
|
|
|
225
274
|
https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
|
|
2
|
+
llama_index/llms/bedrock_converse/base.py,sha256=xKveT_9f_O0LNq4eHl3KntdN2ADoXf7xIUAxWgg5ASc,46145
|
|
3
|
+
llama_index/llms/bedrock_converse/utils.py,sha256=dP24P7SU9l6NiRC4jbBRjnsPoSwEBXDBMzqwSAgSMX0,30001
|
|
4
|
+
llama_index_llms_bedrock_converse-0.12.3.dist-info/METADATA,sha256=249BYMoiTZFGMoBfcYWycS89IzF-WqbjcIN6hqlNKFY,7834
|
|
5
|
+
llama_index_llms_bedrock_converse-0.12.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
llama_index_llms_bedrock_converse-0.12.3.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
|
|
7
|
+
llama_index_llms_bedrock_converse-0.12.3.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
|
|
2
|
-
llama_index/llms/bedrock_converse/base.py,sha256=TVamvIWu2LVURhO8o7CDZfikuu9ulbJYLBGTV4ku-Es,33802
|
|
3
|
-
llama_index/llms/bedrock_converse/utils.py,sha256=ZmYMUWJjW8Ln3CpQ7Gwvc0X0BCKPzK46LffME9BKg5g,21163
|
|
4
|
-
llama_index_llms_bedrock_converse-0.8.2.dist-info/METADATA,sha256=KiEZO591tVTfiJpLSWMOd4Eqv3VLMXunE41nh7Nyt3I,6563
|
|
5
|
-
llama_index_llms_bedrock_converse-0.8.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
llama_index_llms_bedrock_converse-0.8.2.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
|
|
7
|
-
llama_index_llms_bedrock_converse-0.8.2.dist-info/RECORD,,
|