llama-index-llms-bedrock-converse 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_index/llms/bedrock_converse/base.py +15 -4
- llama_index/llms/bedrock_converse/utils.py +87 -17
- {llama_index_llms_bedrock_converse-0.9.2.dist-info → llama_index_llms_bedrock_converse-0.9.4.dist-info}/METADATA +52 -3
- llama_index_llms_bedrock_converse-0.9.4.dist-info/RECORD +7 -0
- llama_index_llms_bedrock_converse-0.9.2.dist-info/RECORD +0 -7
- {llama_index_llms_bedrock_converse-0.9.2.dist-info → llama_index_llms_bedrock_converse-0.9.4.dist-info}/WHEEL +0 -0
- {llama_index_llms_bedrock_converse-0.9.2.dist-info → llama_index_llms_bedrock_converse-0.9.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -366,7 +366,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
366
366
|
@llm_chat_callback()
|
|
367
367
|
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
|
|
368
368
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
369
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
369
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
370
|
+
messages, self.model
|
|
371
|
+
)
|
|
370
372
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
371
373
|
|
|
372
374
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -414,7 +416,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
414
416
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
415
417
|
) -> ChatResponseGen:
|
|
416
418
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
417
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
419
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
420
|
+
messages, self.model
|
|
421
|
+
)
|
|
418
422
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
419
423
|
|
|
420
424
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -551,7 +555,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
551
555
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
552
556
|
) -> ChatResponse:
|
|
553
557
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
554
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
558
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
559
|
+
messages, self.model
|
|
560
|
+
)
|
|
555
561
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
556
562
|
|
|
557
563
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -601,7 +607,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
601
607
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
602
608
|
) -> ChatResponseAsyncGen:
|
|
603
609
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
604
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
610
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
611
|
+
messages, self.model
|
|
612
|
+
)
|
|
605
613
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
606
614
|
|
|
607
615
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -840,8 +848,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
840
848
|
return {}
|
|
841
849
|
|
|
842
850
|
# Convert Bedrock's token count format to match OpenAI's format
|
|
851
|
+
# Cache token formats respecting Anthropic format
|
|
843
852
|
return {
|
|
844
853
|
"prompt_tokens": usage.get("inputTokens", 0),
|
|
845
854
|
"completion_tokens": usage.get("outputTokens", 0),
|
|
846
855
|
"total_tokens": usage.get("totalTokens", 0),
|
|
856
|
+
"cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
|
|
857
|
+
"cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
|
|
847
858
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
|
|
4
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
|
|
5
5
|
from tenacity import (
|
|
6
6
|
before_sleep_log,
|
|
7
7
|
retry,
|
|
@@ -49,6 +49,7 @@ BEDROCK_MODELS = {
|
|
|
49
49
|
"anthropic.claude-3-7-sonnet-20250219-v1:0": 200000,
|
|
50
50
|
"anthropic.claude-opus-4-20250514-v1:0": 200000,
|
|
51
51
|
"anthropic.claude-sonnet-4-20250514-v1:0": 200000,
|
|
52
|
+
"anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
|
|
52
53
|
"ai21.j2-mid-v1": 8192,
|
|
53
54
|
"ai21.j2-ultra-v1": 8192,
|
|
54
55
|
"cohere.command-text-v14": 4096,
|
|
@@ -134,6 +135,18 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
|
|
|
134
135
|
"meta.llama4-scout-17b-instruct-v1:0",
|
|
135
136
|
"deepseek.r1-v1:0",
|
|
136
137
|
)
|
|
138
|
+
BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
|
|
139
|
+
"anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
140
|
+
"anthropic.claude-3-5-haiku-20241022-v1:0",
|
|
141
|
+
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
142
|
+
"anthropic.claude-opus-4-20250514-v1:0",
|
|
143
|
+
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
144
|
+
"anthropic.claude-opus-4-1-20250805-v1:0",
|
|
145
|
+
"amazon.nova-premier-v1:0",
|
|
146
|
+
"amazon.nova-pro-v1:0",
|
|
147
|
+
"amazon.nova-lite-v1:0",
|
|
148
|
+
"amazon.nova-micro-v1:0",
|
|
149
|
+
)
|
|
137
150
|
|
|
138
151
|
|
|
139
152
|
def get_model_name(model_name: str) -> str:
|
|
@@ -162,6 +175,10 @@ def is_bedrock_function_calling_model(model_name: str) -> bool:
|
|
|
162
175
|
return get_model_name(model_name) in BEDROCK_FUNCTION_CALLING_MODELS
|
|
163
176
|
|
|
164
177
|
|
|
178
|
+
def is_bedrock_prompt_caching_supported_model(model_name: str) -> bool:
|
|
179
|
+
return get_model_name(model_name) in BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS
|
|
180
|
+
|
|
181
|
+
|
|
165
182
|
def bedrock_modelname_to_context_size(model_name: str) -> int:
|
|
166
183
|
translated_model_name = get_model_name(model_name)
|
|
167
184
|
|
|
@@ -257,12 +274,14 @@ def __get_img_format_from_image_mimetype(image_mimetype: str) -> str:
|
|
|
257
274
|
|
|
258
275
|
def messages_to_converse_messages(
|
|
259
276
|
messages: Sequence[ChatMessage],
|
|
260
|
-
|
|
277
|
+
model: Optional[str] = None,
|
|
278
|
+
) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]:
|
|
261
279
|
"""
|
|
262
280
|
Converts a list of generic ChatMessages to AWS Bedrock Converse messages.
|
|
263
281
|
|
|
264
282
|
Args:
|
|
265
283
|
messages: List of ChatMessages
|
|
284
|
+
model: optional model name used to omit cache point if the model does not support it
|
|
266
285
|
|
|
267
286
|
Returns:
|
|
268
287
|
Tuple of:
|
|
@@ -271,10 +290,40 @@ def messages_to_converse_messages(
|
|
|
271
290
|
|
|
272
291
|
"""
|
|
273
292
|
converse_messages = []
|
|
274
|
-
system_prompt =
|
|
293
|
+
system_prompt = []
|
|
294
|
+
current_system_prompt = ""
|
|
275
295
|
for message in messages:
|
|
276
|
-
if message.role == MessageRole.SYSTEM
|
|
277
|
-
|
|
296
|
+
if message.role == MessageRole.SYSTEM:
|
|
297
|
+
# we iterate over blocks, if content was used, the blocks are added anyway
|
|
298
|
+
for block in message.blocks:
|
|
299
|
+
if isinstance(block, TextBlock):
|
|
300
|
+
if block.text: # Only add non-empty text
|
|
301
|
+
current_system_prompt += block.text + "\n"
|
|
302
|
+
|
|
303
|
+
elif isinstance(block, CachePoint):
|
|
304
|
+
# when we find a cache point we push the current system prompt as a message
|
|
305
|
+
if current_system_prompt != "":
|
|
306
|
+
system_prompt.append({"text": current_system_prompt.strip()})
|
|
307
|
+
current_system_prompt = ""
|
|
308
|
+
# we add the cache point
|
|
309
|
+
if (
|
|
310
|
+
model is None
|
|
311
|
+
or model is not None
|
|
312
|
+
and is_bedrock_prompt_caching_supported_model(model)
|
|
313
|
+
):
|
|
314
|
+
if block.cache_control.type != "default":
|
|
315
|
+
logger.warning(
|
|
316
|
+
"The only allowed caching strategy for Bedrock Converse is 'default', falling back to that..."
|
|
317
|
+
)
|
|
318
|
+
block.cache_control.type = "default"
|
|
319
|
+
system_prompt.append(
|
|
320
|
+
{"cachePoint": {"type": block.cache_control.type}}
|
|
321
|
+
)
|
|
322
|
+
else:
|
|
323
|
+
logger.warning(
|
|
324
|
+
f"Model {model} does not support prompt caching, cache point will be ignored..."
|
|
325
|
+
)
|
|
326
|
+
|
|
278
327
|
elif message.role in [MessageRole.FUNCTION, MessageRole.TOOL]:
|
|
279
328
|
# convert tool output to the AWS Bedrock Converse format
|
|
280
329
|
content = {
|
|
@@ -342,8 +391,9 @@ def messages_to_converse_messages(
|
|
|
342
391
|
"content": content,
|
|
343
392
|
}
|
|
344
393
|
)
|
|
345
|
-
|
|
346
|
-
|
|
394
|
+
if current_system_prompt != "":
|
|
395
|
+
system_prompt.append({"text": current_system_prompt.strip()})
|
|
396
|
+
return __merge_common_role_msgs(converse_messages), system_prompt
|
|
347
397
|
|
|
348
398
|
|
|
349
399
|
def tools_to_converse_tools(
|
|
@@ -444,7 +494,7 @@ def converse_with_retry(
|
|
|
444
494
|
model: str,
|
|
445
495
|
messages: Sequence[Dict[str, Any]],
|
|
446
496
|
max_retries: int = 3,
|
|
447
|
-
system_prompt: Optional[str] = None,
|
|
497
|
+
system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
|
|
448
498
|
system_prompt_caching: bool = False,
|
|
449
499
|
tool_caching: bool = False,
|
|
450
500
|
max_tokens: int = 1000,
|
|
@@ -466,11 +516,19 @@ def converse_with_retry(
|
|
|
466
516
|
},
|
|
467
517
|
}
|
|
468
518
|
if system_prompt:
|
|
469
|
-
|
|
470
|
-
|
|
519
|
+
if isinstance(system_prompt, str):
|
|
520
|
+
# if the system prompt is a simple text (for retro compatibility)
|
|
521
|
+
system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
|
|
522
|
+
else:
|
|
523
|
+
system_messages: list[dict[str, Any]] = system_prompt
|
|
524
|
+
if (
|
|
525
|
+
system_prompt_caching
|
|
526
|
+
and len(system_messages) > 0
|
|
527
|
+
and system_messages[-1].get("cachePoint", None) is None
|
|
528
|
+
):
|
|
529
|
+
# "Adding cache point to system prompt if not present"
|
|
471
530
|
system_messages.append({"cachePoint": {"type": "default"}})
|
|
472
531
|
converse_kwargs["system"] = system_messages
|
|
473
|
-
|
|
474
532
|
if tool_config := kwargs.get("tools"):
|
|
475
533
|
converse_kwargs["toolConfig"] = tool_config
|
|
476
534
|
|
|
@@ -491,12 +549,13 @@ def converse_with_retry(
|
|
|
491
549
|
)
|
|
492
550
|
|
|
493
551
|
@retry_decorator
|
|
494
|
-
def
|
|
552
|
+
def _converse_with_retry(**kwargs: Any) -> Any:
|
|
495
553
|
if stream:
|
|
496
554
|
return client.converse_stream(**kwargs)
|
|
497
|
-
|
|
555
|
+
else:
|
|
556
|
+
return client.converse(**kwargs)
|
|
498
557
|
|
|
499
|
-
return
|
|
558
|
+
return _converse_with_retry(**converse_kwargs)
|
|
500
559
|
|
|
501
560
|
|
|
502
561
|
async def converse_with_retry_async(
|
|
@@ -505,7 +564,7 @@ async def converse_with_retry_async(
|
|
|
505
564
|
model: str,
|
|
506
565
|
messages: Sequence[Dict[str, Any]],
|
|
507
566
|
max_retries: int = 3,
|
|
508
|
-
system_prompt: Optional[str] = None,
|
|
567
|
+
system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
|
|
509
568
|
system_prompt_caching: bool = False,
|
|
510
569
|
tool_caching: bool = False,
|
|
511
570
|
max_tokens: int = 1000,
|
|
@@ -527,11 +586,22 @@ async def converse_with_retry_async(
|
|
|
527
586
|
"temperature": temperature,
|
|
528
587
|
},
|
|
529
588
|
}
|
|
589
|
+
|
|
530
590
|
if system_prompt:
|
|
531
|
-
|
|
532
|
-
|
|
591
|
+
if isinstance(system_prompt, str):
|
|
592
|
+
# if the system prompt is a simple text (for retro compatibility)
|
|
593
|
+
system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
|
|
594
|
+
else:
|
|
595
|
+
system_messages: list[dict[str, Any]] = system_prompt
|
|
596
|
+
if (
|
|
597
|
+
system_prompt_caching
|
|
598
|
+
and len(system_messages) > 0
|
|
599
|
+
and system_messages[-1].get("cachePoint", None) is None
|
|
600
|
+
):
|
|
601
|
+
# "Adding cache point to system prompt if not present"
|
|
533
602
|
system_messages.append({"cachePoint": {"type": "default"}})
|
|
534
603
|
converse_kwargs["system"] = system_messages
|
|
604
|
+
|
|
535
605
|
if tool_config := kwargs.get("tools"):
|
|
536
606
|
converse_kwargs["toolConfig"] = tool_config
|
|
537
607
|
if tool_caching and "tools" in converse_kwargs["toolConfig"]:
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama-index-llms-bedrock-converse
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.4
|
|
4
4
|
Summary: llama-index llms bedrock converse integration
|
|
5
5
|
Author-email: Your Name <you@example.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
License-File: LICENSE
|
|
8
8
|
Requires-Python: <4.0,>=3.9
|
|
9
|
-
Requires-Dist: aioboto3<16,>=
|
|
10
|
-
Requires-Dist: boto3<2,>=1.
|
|
9
|
+
Requires-Dist: aioboto3<16,>=15.0.0
|
|
10
|
+
Requires-Dist: boto3<2,>=1.38.27
|
|
11
11
|
Requires-Dist: llama-index-core<0.15,>=0.13.0
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
|
|
@@ -220,6 +220,55 @@ resp = await llm.acomplete("Paul Graham is ")
|
|
|
220
220
|
print(resp)
|
|
221
221
|
```
|
|
222
222
|
|
|
223
|
+
### Prompt Caching System and regular messages
|
|
224
|
+
|
|
225
|
+
You can cache normal and system messages by placing cache points strategically:
|
|
226
|
+
|
|
227
|
+
```py
|
|
228
|
+
from llama_index.core.llms import ChatMessage
|
|
229
|
+
from llama_index.core.base.llms.types import (
|
|
230
|
+
TextBlock,
|
|
231
|
+
CacheControl,
|
|
232
|
+
CachePoint,
|
|
233
|
+
MessageRole,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Cache expensive context but keep dynamic instructions uncached
|
|
237
|
+
cached_context = (
|
|
238
|
+
"""[Large context about company policies, knowledge base, etc...]"""
|
|
239
|
+
)
|
|
240
|
+
dynamic_instructions = (
|
|
241
|
+
"Today's date is 2024-01-15. Focus on recent developments."
|
|
242
|
+
)
|
|
243
|
+
document_text = "[Long document]"
|
|
244
|
+
messages = [
|
|
245
|
+
ChatMessage(
|
|
246
|
+
role=MessageRole.SYSTEM,
|
|
247
|
+
blocks=[
|
|
248
|
+
TextBlock(text=cached_context),
|
|
249
|
+
CachePoint(cache_control=CacheControl(type="default")),
|
|
250
|
+
TextBlock(text=dynamic_instructions),
|
|
251
|
+
],
|
|
252
|
+
),
|
|
253
|
+
ChatMessage(
|
|
254
|
+
role=MessageRole.USER,
|
|
255
|
+
blocks=[
|
|
256
|
+
TextBlock(
|
|
257
|
+
text=f"{document_text}",
|
|
258
|
+
type="text",
|
|
259
|
+
),
|
|
260
|
+
CachePoint(cache_control=CacheControl(type="default")),
|
|
261
|
+
TextBlock(
|
|
262
|
+
text="What's our current policy on remote work?",
|
|
263
|
+
type="text",
|
|
264
|
+
),
|
|
265
|
+
],
|
|
266
|
+
),
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
response = llm.chat(messages)
|
|
270
|
+
```
|
|
271
|
+
|
|
223
272
|
### LLM Implementation example
|
|
224
273
|
|
|
225
274
|
https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
|
|
2
|
+
llama_index/llms/bedrock_converse/base.py,sha256=SHGRmAYcMUMAjmnHWFLYKPzvsAAjCF_A1Mvc7s9I7IM,35233
|
|
3
|
+
llama_index/llms/bedrock_converse/utils.py,sha256=oX1ksJsUccNcCUURB9FZDSmxGGg1Q5EbpaQ4oRtlGXY,25418
|
|
4
|
+
llama_index_llms_bedrock_converse-0.9.4.dist-info/METADATA,sha256=qqNYHGddynWmeGnW3bv75HSdwF5NVxQwxF5QdXNRW7I,7833
|
|
5
|
+
llama_index_llms_bedrock_converse-0.9.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
llama_index_llms_bedrock_converse-0.9.4.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
|
|
7
|
+
llama_index_llms_bedrock_converse-0.9.4.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
|
|
2
|
-
llama_index/llms/bedrock_converse/base.py,sha256=0rmV73HRrZK6lb1AX_sCFrqwwivMegJ9X1GHuECJbVQ,34880
|
|
3
|
-
llama_index/llms/bedrock_converse/utils.py,sha256=69-NGsEV5GpECvuLEAjz5tQu6OMxwcOvJKBnfXvCVNM,22074
|
|
4
|
-
llama_index_llms_bedrock_converse-0.9.2.dist-info/METADATA,sha256=xsgEpces2jzNUwCcOD3vcB0XIJajlzXggbsaHqLHl4k,6563
|
|
5
|
-
llama_index_llms_bedrock_converse-0.9.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
llama_index_llms_bedrock_converse-0.9.2.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
|
|
7
|
-
llama_index_llms_bedrock_converse-0.9.2.dist-info/RECORD,,
|
|
File without changes
|