llama-index-llms-bedrock-converse 0.9.3__py3-none-any.whl → 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_index/llms/bedrock_converse/base.py +15 -4
- llama_index/llms/bedrock_converse/utils.py +89 -17
- {llama_index_llms_bedrock_converse-0.9.3.dist-info → llama_index_llms_bedrock_converse-0.9.5.dist-info}/METADATA +52 -3
- llama_index_llms_bedrock_converse-0.9.5.dist-info/RECORD +7 -0
- llama_index_llms_bedrock_converse-0.9.3.dist-info/RECORD +0 -7
- {llama_index_llms_bedrock_converse-0.9.3.dist-info → llama_index_llms_bedrock_converse-0.9.5.dist-info}/WHEEL +0 -0
- {llama_index_llms_bedrock_converse-0.9.3.dist-info → llama_index_llms_bedrock_converse-0.9.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -366,7 +366,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
366
366
|
@llm_chat_callback()
|
|
367
367
|
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
|
|
368
368
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
369
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
369
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
370
|
+
messages, self.model
|
|
371
|
+
)
|
|
370
372
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
371
373
|
|
|
372
374
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -414,7 +416,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
414
416
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
415
417
|
) -> ChatResponseGen:
|
|
416
418
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
417
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
419
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
420
|
+
messages, self.model
|
|
421
|
+
)
|
|
418
422
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
419
423
|
|
|
420
424
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -551,7 +555,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
551
555
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
552
556
|
) -> ChatResponse:
|
|
553
557
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
554
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
558
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
559
|
+
messages, self.model
|
|
560
|
+
)
|
|
555
561
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
556
562
|
|
|
557
563
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -601,7 +607,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
601
607
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
602
608
|
) -> ChatResponseAsyncGen:
|
|
603
609
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
604
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
610
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
611
|
+
messages, self.model
|
|
612
|
+
)
|
|
605
613
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
606
614
|
|
|
607
615
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -840,8 +848,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
840
848
|
return {}
|
|
841
849
|
|
|
842
850
|
# Convert Bedrock's token count format to match OpenAI's format
|
|
851
|
+
# Cache token formats respecting Anthropic format
|
|
843
852
|
return {
|
|
844
853
|
"prompt_tokens": usage.get("inputTokens", 0),
|
|
845
854
|
"completion_tokens": usage.get("outputTokens", 0),
|
|
846
855
|
"total_tokens": usage.get("totalTokens", 0),
|
|
856
|
+
"cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
|
|
857
|
+
"cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
|
|
847
858
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
|
|
4
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
|
|
5
5
|
from tenacity import (
|
|
6
6
|
before_sleep_log,
|
|
7
7
|
retry,
|
|
@@ -95,6 +95,7 @@ BEDROCK_FUNCTION_CALLING_MODELS = (
|
|
|
95
95
|
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
96
96
|
"anthropic.claude-opus-4-20250514-v1:0",
|
|
97
97
|
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
98
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
98
99
|
"cohere.command-r-v1:0",
|
|
99
100
|
"cohere.command-r-plus-v1:0",
|
|
100
101
|
"mistral.mistral-large-2402-v1:0",
|
|
@@ -124,6 +125,7 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
|
|
|
124
125
|
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
125
126
|
"anthropic.claude-opus-4-20250514-v1:0",
|
|
126
127
|
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
128
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
127
129
|
"meta.llama3-1-8b-instruct-v1:0",
|
|
128
130
|
"meta.llama3-1-70b-instruct-v1:0",
|
|
129
131
|
"meta.llama3-2-1b-instruct-v1:0",
|
|
@@ -135,6 +137,19 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
|
|
|
135
137
|
"meta.llama4-scout-17b-instruct-v1:0",
|
|
136
138
|
"deepseek.r1-v1:0",
|
|
137
139
|
)
|
|
140
|
+
BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
|
|
141
|
+
"anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
142
|
+
"anthropic.claude-3-5-haiku-20241022-v1:0",
|
|
143
|
+
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
144
|
+
"anthropic.claude-opus-4-20250514-v1:0",
|
|
145
|
+
"anthropic.claude-opus-4-1-20250805-v1:0",
|
|
146
|
+
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
147
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
148
|
+
"amazon.nova-premier-v1:0",
|
|
149
|
+
"amazon.nova-pro-v1:0",
|
|
150
|
+
"amazon.nova-lite-v1:0",
|
|
151
|
+
"amazon.nova-micro-v1:0",
|
|
152
|
+
)
|
|
138
153
|
|
|
139
154
|
|
|
140
155
|
def get_model_name(model_name: str) -> str:
|
|
@@ -163,6 +178,10 @@ def is_bedrock_function_calling_model(model_name: str) -> bool:
|
|
|
163
178
|
return get_model_name(model_name) in BEDROCK_FUNCTION_CALLING_MODELS
|
|
164
179
|
|
|
165
180
|
|
|
181
|
+
def is_bedrock_prompt_caching_supported_model(model_name: str) -> bool:
|
|
182
|
+
return get_model_name(model_name) in BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS
|
|
183
|
+
|
|
184
|
+
|
|
166
185
|
def bedrock_modelname_to_context_size(model_name: str) -> int:
|
|
167
186
|
translated_model_name = get_model_name(model_name)
|
|
168
187
|
|
|
@@ -258,12 +277,14 @@ def __get_img_format_from_image_mimetype(image_mimetype: str) -> str:
|
|
|
258
277
|
|
|
259
278
|
def messages_to_converse_messages(
|
|
260
279
|
messages: Sequence[ChatMessage],
|
|
261
|
-
|
|
280
|
+
model: Optional[str] = None,
|
|
281
|
+
) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]:
|
|
262
282
|
"""
|
|
263
283
|
Converts a list of generic ChatMessages to AWS Bedrock Converse messages.
|
|
264
284
|
|
|
265
285
|
Args:
|
|
266
286
|
messages: List of ChatMessages
|
|
287
|
+
model: optional model name used to omit cache point if the model does not support it
|
|
267
288
|
|
|
268
289
|
Returns:
|
|
269
290
|
Tuple of:
|
|
@@ -272,10 +293,40 @@ def messages_to_converse_messages(
|
|
|
272
293
|
|
|
273
294
|
"""
|
|
274
295
|
converse_messages = []
|
|
275
|
-
system_prompt =
|
|
296
|
+
system_prompt = []
|
|
297
|
+
current_system_prompt = ""
|
|
276
298
|
for message in messages:
|
|
277
|
-
if message.role == MessageRole.SYSTEM
|
|
278
|
-
|
|
299
|
+
if message.role == MessageRole.SYSTEM:
|
|
300
|
+
# we iterate over blocks, if content was used, the blocks are added anyway
|
|
301
|
+
for block in message.blocks:
|
|
302
|
+
if isinstance(block, TextBlock):
|
|
303
|
+
if block.text: # Only add non-empty text
|
|
304
|
+
current_system_prompt += block.text + "\n"
|
|
305
|
+
|
|
306
|
+
elif isinstance(block, CachePoint):
|
|
307
|
+
# when we find a cache point we push the current system prompt as a message
|
|
308
|
+
if current_system_prompt != "":
|
|
309
|
+
system_prompt.append({"text": current_system_prompt.strip()})
|
|
310
|
+
current_system_prompt = ""
|
|
311
|
+
# we add the cache point
|
|
312
|
+
if (
|
|
313
|
+
model is None
|
|
314
|
+
or model is not None
|
|
315
|
+
and is_bedrock_prompt_caching_supported_model(model)
|
|
316
|
+
):
|
|
317
|
+
if block.cache_control.type != "default":
|
|
318
|
+
logger.warning(
|
|
319
|
+
"The only allowed caching strategy for Bedrock Converse is 'default', falling back to that..."
|
|
320
|
+
)
|
|
321
|
+
block.cache_control.type = "default"
|
|
322
|
+
system_prompt.append(
|
|
323
|
+
{"cachePoint": {"type": block.cache_control.type}}
|
|
324
|
+
)
|
|
325
|
+
else:
|
|
326
|
+
logger.warning(
|
|
327
|
+
f"Model {model} does not support prompt caching, cache point will be ignored..."
|
|
328
|
+
)
|
|
329
|
+
|
|
279
330
|
elif message.role in [MessageRole.FUNCTION, MessageRole.TOOL]:
|
|
280
331
|
# convert tool output to the AWS Bedrock Converse format
|
|
281
332
|
content = {
|
|
@@ -343,8 +394,9 @@ def messages_to_converse_messages(
|
|
|
343
394
|
"content": content,
|
|
344
395
|
}
|
|
345
396
|
)
|
|
346
|
-
|
|
347
|
-
|
|
397
|
+
if current_system_prompt != "":
|
|
398
|
+
system_prompt.append({"text": current_system_prompt.strip()})
|
|
399
|
+
return __merge_common_role_msgs(converse_messages), system_prompt
|
|
348
400
|
|
|
349
401
|
|
|
350
402
|
def tools_to_converse_tools(
|
|
@@ -445,7 +497,7 @@ def converse_with_retry(
|
|
|
445
497
|
model: str,
|
|
446
498
|
messages: Sequence[Dict[str, Any]],
|
|
447
499
|
max_retries: int = 3,
|
|
448
|
-
system_prompt: Optional[str] = None,
|
|
500
|
+
system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
|
|
449
501
|
system_prompt_caching: bool = False,
|
|
450
502
|
tool_caching: bool = False,
|
|
451
503
|
max_tokens: int = 1000,
|
|
@@ -467,11 +519,19 @@ def converse_with_retry(
|
|
|
467
519
|
},
|
|
468
520
|
}
|
|
469
521
|
if system_prompt:
|
|
470
|
-
|
|
471
|
-
|
|
522
|
+
if isinstance(system_prompt, str):
|
|
523
|
+
# if the system prompt is a simple text (for retro compatibility)
|
|
524
|
+
system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
|
|
525
|
+
else:
|
|
526
|
+
system_messages: list[dict[str, Any]] = system_prompt
|
|
527
|
+
if (
|
|
528
|
+
system_prompt_caching
|
|
529
|
+
and len(system_messages) > 0
|
|
530
|
+
and system_messages[-1].get("cachePoint", None) is None
|
|
531
|
+
):
|
|
532
|
+
# "Adding cache point to system prompt if not present"
|
|
472
533
|
system_messages.append({"cachePoint": {"type": "default"}})
|
|
473
534
|
converse_kwargs["system"] = system_messages
|
|
474
|
-
|
|
475
535
|
if tool_config := kwargs.get("tools"):
|
|
476
536
|
converse_kwargs["toolConfig"] = tool_config
|
|
477
537
|
|
|
@@ -492,12 +552,13 @@ def converse_with_retry(
|
|
|
492
552
|
)
|
|
493
553
|
|
|
494
554
|
@retry_decorator
|
|
495
|
-
def
|
|
555
|
+
def _converse_with_retry(**kwargs: Any) -> Any:
|
|
496
556
|
if stream:
|
|
497
557
|
return client.converse_stream(**kwargs)
|
|
498
|
-
|
|
558
|
+
else:
|
|
559
|
+
return client.converse(**kwargs)
|
|
499
560
|
|
|
500
|
-
return
|
|
561
|
+
return _converse_with_retry(**converse_kwargs)
|
|
501
562
|
|
|
502
563
|
|
|
503
564
|
async def converse_with_retry_async(
|
|
@@ -506,7 +567,7 @@ async def converse_with_retry_async(
|
|
|
506
567
|
model: str,
|
|
507
568
|
messages: Sequence[Dict[str, Any]],
|
|
508
569
|
max_retries: int = 3,
|
|
509
|
-
system_prompt: Optional[str] = None,
|
|
570
|
+
system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
|
|
510
571
|
system_prompt_caching: bool = False,
|
|
511
572
|
tool_caching: bool = False,
|
|
512
573
|
max_tokens: int = 1000,
|
|
@@ -528,11 +589,22 @@ async def converse_with_retry_async(
|
|
|
528
589
|
"temperature": temperature,
|
|
529
590
|
},
|
|
530
591
|
}
|
|
592
|
+
|
|
531
593
|
if system_prompt:
|
|
532
|
-
|
|
533
|
-
|
|
594
|
+
if isinstance(system_prompt, str):
|
|
595
|
+
# if the system prompt is a simple text (for retro compatibility)
|
|
596
|
+
system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
|
|
597
|
+
else:
|
|
598
|
+
system_messages: list[dict[str, Any]] = system_prompt
|
|
599
|
+
if (
|
|
600
|
+
system_prompt_caching
|
|
601
|
+
and len(system_messages) > 0
|
|
602
|
+
and system_messages[-1].get("cachePoint", None) is None
|
|
603
|
+
):
|
|
604
|
+
# "Adding cache point to system prompt if not present"
|
|
534
605
|
system_messages.append({"cachePoint": {"type": "default"}})
|
|
535
606
|
converse_kwargs["system"] = system_messages
|
|
607
|
+
|
|
536
608
|
if tool_config := kwargs.get("tools"):
|
|
537
609
|
converse_kwargs["toolConfig"] = tool_config
|
|
538
610
|
if tool_caching and "tools" in converse_kwargs["toolConfig"]:
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama-index-llms-bedrock-converse
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.5
|
|
4
4
|
Summary: llama-index llms bedrock converse integration
|
|
5
5
|
Author-email: Your Name <you@example.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
License-File: LICENSE
|
|
8
8
|
Requires-Python: <4.0,>=3.9
|
|
9
|
-
Requires-Dist: aioboto3<16,>=
|
|
10
|
-
Requires-Dist: boto3<2,>=1.
|
|
9
|
+
Requires-Dist: aioboto3<16,>=15.0.0
|
|
10
|
+
Requires-Dist: boto3<2,>=1.38.27
|
|
11
11
|
Requires-Dist: llama-index-core<0.15,>=0.13.0
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
|
|
@@ -220,6 +220,55 @@ resp = await llm.acomplete("Paul Graham is ")
|
|
|
220
220
|
print(resp)
|
|
221
221
|
```
|
|
222
222
|
|
|
223
|
+
### Prompt Caching System and regular messages
|
|
224
|
+
|
|
225
|
+
You can cache normal and system messages by placing cache points strategically:
|
|
226
|
+
|
|
227
|
+
```py
|
|
228
|
+
from llama_index.core.llms import ChatMessage
|
|
229
|
+
from llama_index.core.base.llms.types import (
|
|
230
|
+
TextBlock,
|
|
231
|
+
CacheControl,
|
|
232
|
+
CachePoint,
|
|
233
|
+
MessageRole,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Cache expensive context but keep dynamic instructions uncached
|
|
237
|
+
cached_context = (
|
|
238
|
+
"""[Large context about company policies, knowledge base, etc...]"""
|
|
239
|
+
)
|
|
240
|
+
dynamic_instructions = (
|
|
241
|
+
"Today's date is 2024-01-15. Focus on recent developments."
|
|
242
|
+
)
|
|
243
|
+
document_text = "[Long document]"
|
|
244
|
+
messages = [
|
|
245
|
+
ChatMessage(
|
|
246
|
+
role=MessageRole.SYSTEM,
|
|
247
|
+
blocks=[
|
|
248
|
+
TextBlock(text=cached_context),
|
|
249
|
+
CachePoint(cache_control=CacheControl(type="default")),
|
|
250
|
+
TextBlock(text=dynamic_instructions),
|
|
251
|
+
],
|
|
252
|
+
),
|
|
253
|
+
ChatMessage(
|
|
254
|
+
role=MessageRole.USER,
|
|
255
|
+
blocks=[
|
|
256
|
+
TextBlock(
|
|
257
|
+
text=f"{document_text}",
|
|
258
|
+
type="text",
|
|
259
|
+
),
|
|
260
|
+
CachePoint(cache_control=CacheControl(type="default")),
|
|
261
|
+
TextBlock(
|
|
262
|
+
text="What's our current policy on remote work?",
|
|
263
|
+
type="text",
|
|
264
|
+
),
|
|
265
|
+
],
|
|
266
|
+
),
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
response = llm.chat(messages)
|
|
270
|
+
```
|
|
271
|
+
|
|
223
272
|
### LLM Implementation example
|
|
224
273
|
|
|
225
274
|
https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
|
|
2
|
+
llama_index/llms/bedrock_converse/base.py,sha256=SHGRmAYcMUMAjmnHWFLYKPzvsAAjCF_A1Mvc7s9I7IM,35233
|
|
3
|
+
llama_index/llms/bedrock_converse/utils.py,sha256=Ly-s3mROVreinvYmRcAJU7MksSHqeTEa1tnY3na17wg,25565
|
|
4
|
+
llama_index_llms_bedrock_converse-0.9.5.dist-info/METADATA,sha256=ALn1SYaHR7aYuALhDShDhbWBDHOzw9RAHQI1iS0xhLM,7833
|
|
5
|
+
llama_index_llms_bedrock_converse-0.9.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
llama_index_llms_bedrock_converse-0.9.5.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
|
|
7
|
+
llama_index_llms_bedrock_converse-0.9.5.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
|
|
2
|
-
llama_index/llms/bedrock_converse/base.py,sha256=0rmV73HRrZK6lb1AX_sCFrqwwivMegJ9X1GHuECJbVQ,34880
|
|
3
|
-
llama_index/llms/bedrock_converse/utils.py,sha256=kDOs_h7NgvNTHIUU36Gzp0zUNuq5QGeLOAp0ttxtocg,22131
|
|
4
|
-
llama_index_llms_bedrock_converse-0.9.3.dist-info/METADATA,sha256=gfCs7mFvOsuCjCjKNm-IQxQaEEAEqvJTjStqsr_C52k,6563
|
|
5
|
-
llama_index_llms_bedrock_converse-0.9.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
llama_index_llms_bedrock_converse-0.9.3.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
|
|
7
|
-
llama_index_llms_bedrock_converse-0.9.3.dist-info/RECORD,,
|
|
File without changes
|