llama-index-llms-bedrock-converse 0.9.3__py3-none-any.whl → 0.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_index/llms/bedrock_converse/base.py +15 -4
- llama_index/llms/bedrock_converse/utils.py +86 -17
- {llama_index_llms_bedrock_converse-0.9.3.dist-info → llama_index_llms_bedrock_converse-0.9.4.dist-info}/METADATA +52 -3
- llama_index_llms_bedrock_converse-0.9.4.dist-info/RECORD +7 -0
- llama_index_llms_bedrock_converse-0.9.3.dist-info/RECORD +0 -7
- {llama_index_llms_bedrock_converse-0.9.3.dist-info → llama_index_llms_bedrock_converse-0.9.4.dist-info}/WHEEL +0 -0
- {llama_index_llms_bedrock_converse-0.9.3.dist-info → llama_index_llms_bedrock_converse-0.9.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -366,7 +366,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
366
366
|
@llm_chat_callback()
|
|
367
367
|
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
|
|
368
368
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
369
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
369
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
370
|
+
messages, self.model
|
|
371
|
+
)
|
|
370
372
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
371
373
|
|
|
372
374
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -414,7 +416,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
414
416
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
415
417
|
) -> ChatResponseGen:
|
|
416
418
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
417
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
419
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
420
|
+
messages, self.model
|
|
421
|
+
)
|
|
418
422
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
419
423
|
|
|
420
424
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -551,7 +555,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
551
555
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
552
556
|
) -> ChatResponse:
|
|
553
557
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
554
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
558
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
559
|
+
messages, self.model
|
|
560
|
+
)
|
|
555
561
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
556
562
|
|
|
557
563
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -601,7 +607,9 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
601
607
|
self, messages: Sequence[ChatMessage], **kwargs: Any
|
|
602
608
|
) -> ChatResponseAsyncGen:
|
|
603
609
|
# convert Llama Index messages to AWS Bedrock Converse messages
|
|
604
|
-
converse_messages, system_prompt = messages_to_converse_messages(
|
|
610
|
+
converse_messages, system_prompt = messages_to_converse_messages(
|
|
611
|
+
messages, self.model
|
|
612
|
+
)
|
|
605
613
|
all_kwargs = self._get_all_kwargs(**kwargs)
|
|
606
614
|
|
|
607
615
|
# invoke LLM in AWS Bedrock Converse with retry
|
|
@@ -840,8 +848,11 @@ class BedrockConverse(FunctionCallingLLM):
|
|
|
840
848
|
return {}
|
|
841
849
|
|
|
842
850
|
# Convert Bedrock's token count format to match OpenAI's format
|
|
851
|
+
# Cache token formats respecting Anthropic format
|
|
843
852
|
return {
|
|
844
853
|
"prompt_tokens": usage.get("inputTokens", 0),
|
|
845
854
|
"completion_tokens": usage.get("outputTokens", 0),
|
|
846
855
|
"total_tokens": usage.get("totalTokens", 0),
|
|
856
|
+
"cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
|
|
857
|
+
"cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
|
|
847
858
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
|
|
4
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
|
|
5
5
|
from tenacity import (
|
|
6
6
|
before_sleep_log,
|
|
7
7
|
retry,
|
|
@@ -135,6 +135,18 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
|
|
|
135
135
|
"meta.llama4-scout-17b-instruct-v1:0",
|
|
136
136
|
"deepseek.r1-v1:0",
|
|
137
137
|
)
|
|
138
|
+
BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
|
|
139
|
+
"anthropic.claude-3-5-sonnet-20241022-v2:0",
|
|
140
|
+
"anthropic.claude-3-5-haiku-20241022-v1:0",
|
|
141
|
+
"anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
142
|
+
"anthropic.claude-opus-4-20250514-v1:0",
|
|
143
|
+
"anthropic.claude-sonnet-4-20250514-v1:0",
|
|
144
|
+
"anthropic.claude-opus-4-1-20250805-v1:0",
|
|
145
|
+
"amazon.nova-premier-v1:0",
|
|
146
|
+
"amazon.nova-pro-v1:0",
|
|
147
|
+
"amazon.nova-lite-v1:0",
|
|
148
|
+
"amazon.nova-micro-v1:0",
|
|
149
|
+
)
|
|
138
150
|
|
|
139
151
|
|
|
140
152
|
def get_model_name(model_name: str) -> str:
|
|
@@ -163,6 +175,10 @@ def is_bedrock_function_calling_model(model_name: str) -> bool:
|
|
|
163
175
|
return get_model_name(model_name) in BEDROCK_FUNCTION_CALLING_MODELS
|
|
164
176
|
|
|
165
177
|
|
|
178
|
+
def is_bedrock_prompt_caching_supported_model(model_name: str) -> bool:
|
|
179
|
+
return get_model_name(model_name) in BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS
|
|
180
|
+
|
|
181
|
+
|
|
166
182
|
def bedrock_modelname_to_context_size(model_name: str) -> int:
|
|
167
183
|
translated_model_name = get_model_name(model_name)
|
|
168
184
|
|
|
@@ -258,12 +274,14 @@ def __get_img_format_from_image_mimetype(image_mimetype: str) -> str:
|
|
|
258
274
|
|
|
259
275
|
def messages_to_converse_messages(
|
|
260
276
|
messages: Sequence[ChatMessage],
|
|
261
|
-
|
|
277
|
+
model: Optional[str] = None,
|
|
278
|
+
) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]:
|
|
262
279
|
"""
|
|
263
280
|
Converts a list of generic ChatMessages to AWS Bedrock Converse messages.
|
|
264
281
|
|
|
265
282
|
Args:
|
|
266
283
|
messages: List of ChatMessages
|
|
284
|
+
model: optional model name used to omit cache point if the model does not support it
|
|
267
285
|
|
|
268
286
|
Returns:
|
|
269
287
|
Tuple of:
|
|
@@ -272,10 +290,40 @@ def messages_to_converse_messages(
|
|
|
272
290
|
|
|
273
291
|
"""
|
|
274
292
|
converse_messages = []
|
|
275
|
-
system_prompt =
|
|
293
|
+
system_prompt = []
|
|
294
|
+
current_system_prompt = ""
|
|
276
295
|
for message in messages:
|
|
277
|
-
if message.role == MessageRole.SYSTEM
|
|
278
|
-
|
|
296
|
+
if message.role == MessageRole.SYSTEM:
|
|
297
|
+
# we iterate over blocks, if content was used, the blocks are added anyway
|
|
298
|
+
for block in message.blocks:
|
|
299
|
+
if isinstance(block, TextBlock):
|
|
300
|
+
if block.text: # Only add non-empty text
|
|
301
|
+
current_system_prompt += block.text + "\n"
|
|
302
|
+
|
|
303
|
+
elif isinstance(block, CachePoint):
|
|
304
|
+
# when we find a cache point we push the current system prompt as a message
|
|
305
|
+
if current_system_prompt != "":
|
|
306
|
+
system_prompt.append({"text": current_system_prompt.strip()})
|
|
307
|
+
current_system_prompt = ""
|
|
308
|
+
# we add the cache point
|
|
309
|
+
if (
|
|
310
|
+
model is None
|
|
311
|
+
or model is not None
|
|
312
|
+
and is_bedrock_prompt_caching_supported_model(model)
|
|
313
|
+
):
|
|
314
|
+
if block.cache_control.type != "default":
|
|
315
|
+
logger.warning(
|
|
316
|
+
"The only allowed caching strategy for Bedrock Converse is 'default', falling back to that..."
|
|
317
|
+
)
|
|
318
|
+
block.cache_control.type = "default"
|
|
319
|
+
system_prompt.append(
|
|
320
|
+
{"cachePoint": {"type": block.cache_control.type}}
|
|
321
|
+
)
|
|
322
|
+
else:
|
|
323
|
+
logger.warning(
|
|
324
|
+
f"Model {model} does not support prompt caching, cache point will be ignored..."
|
|
325
|
+
)
|
|
326
|
+
|
|
279
327
|
elif message.role in [MessageRole.FUNCTION, MessageRole.TOOL]:
|
|
280
328
|
# convert tool output to the AWS Bedrock Converse format
|
|
281
329
|
content = {
|
|
@@ -343,8 +391,9 @@ def messages_to_converse_messages(
|
|
|
343
391
|
"content": content,
|
|
344
392
|
}
|
|
345
393
|
)
|
|
346
|
-
|
|
347
|
-
|
|
394
|
+
if current_system_prompt != "":
|
|
395
|
+
system_prompt.append({"text": current_system_prompt.strip()})
|
|
396
|
+
return __merge_common_role_msgs(converse_messages), system_prompt
|
|
348
397
|
|
|
349
398
|
|
|
350
399
|
def tools_to_converse_tools(
|
|
@@ -445,7 +494,7 @@ def converse_with_retry(
|
|
|
445
494
|
model: str,
|
|
446
495
|
messages: Sequence[Dict[str, Any]],
|
|
447
496
|
max_retries: int = 3,
|
|
448
|
-
system_prompt: Optional[str] = None,
|
|
497
|
+
system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
|
|
449
498
|
system_prompt_caching: bool = False,
|
|
450
499
|
tool_caching: bool = False,
|
|
451
500
|
max_tokens: int = 1000,
|
|
@@ -467,11 +516,19 @@ def converse_with_retry(
|
|
|
467
516
|
},
|
|
468
517
|
}
|
|
469
518
|
if system_prompt:
|
|
470
|
-
|
|
471
|
-
|
|
519
|
+
if isinstance(system_prompt, str):
|
|
520
|
+
# if the system prompt is a simple text (for retro compatibility)
|
|
521
|
+
system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
|
|
522
|
+
else:
|
|
523
|
+
system_messages: list[dict[str, Any]] = system_prompt
|
|
524
|
+
if (
|
|
525
|
+
system_prompt_caching
|
|
526
|
+
and len(system_messages) > 0
|
|
527
|
+
and system_messages[-1].get("cachePoint", None) is None
|
|
528
|
+
):
|
|
529
|
+
# "Adding cache point to system prompt if not present"
|
|
472
530
|
system_messages.append({"cachePoint": {"type": "default"}})
|
|
473
531
|
converse_kwargs["system"] = system_messages
|
|
474
|
-
|
|
475
532
|
if tool_config := kwargs.get("tools"):
|
|
476
533
|
converse_kwargs["toolConfig"] = tool_config
|
|
477
534
|
|
|
@@ -492,12 +549,13 @@ def converse_with_retry(
|
|
|
492
549
|
)
|
|
493
550
|
|
|
494
551
|
@retry_decorator
|
|
495
|
-
def
|
|
552
|
+
def _converse_with_retry(**kwargs: Any) -> Any:
|
|
496
553
|
if stream:
|
|
497
554
|
return client.converse_stream(**kwargs)
|
|
498
|
-
|
|
555
|
+
else:
|
|
556
|
+
return client.converse(**kwargs)
|
|
499
557
|
|
|
500
|
-
return
|
|
558
|
+
return _converse_with_retry(**converse_kwargs)
|
|
501
559
|
|
|
502
560
|
|
|
503
561
|
async def converse_with_retry_async(
|
|
@@ -506,7 +564,7 @@ async def converse_with_retry_async(
|
|
|
506
564
|
model: str,
|
|
507
565
|
messages: Sequence[Dict[str, Any]],
|
|
508
566
|
max_retries: int = 3,
|
|
509
|
-
system_prompt: Optional[str] = None,
|
|
567
|
+
system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
|
|
510
568
|
system_prompt_caching: bool = False,
|
|
511
569
|
tool_caching: bool = False,
|
|
512
570
|
max_tokens: int = 1000,
|
|
@@ -528,11 +586,22 @@ async def converse_with_retry_async(
|
|
|
528
586
|
"temperature": temperature,
|
|
529
587
|
},
|
|
530
588
|
}
|
|
589
|
+
|
|
531
590
|
if system_prompt:
|
|
532
|
-
|
|
533
|
-
|
|
591
|
+
if isinstance(system_prompt, str):
|
|
592
|
+
# if the system prompt is a simple text (for retro compatibility)
|
|
593
|
+
system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
|
|
594
|
+
else:
|
|
595
|
+
system_messages: list[dict[str, Any]] = system_prompt
|
|
596
|
+
if (
|
|
597
|
+
system_prompt_caching
|
|
598
|
+
and len(system_messages) > 0
|
|
599
|
+
and system_messages[-1].get("cachePoint", None) is None
|
|
600
|
+
):
|
|
601
|
+
# "Adding cache point to system prompt if not present"
|
|
534
602
|
system_messages.append({"cachePoint": {"type": "default"}})
|
|
535
603
|
converse_kwargs["system"] = system_messages
|
|
604
|
+
|
|
536
605
|
if tool_config := kwargs.get("tools"):
|
|
537
606
|
converse_kwargs["toolConfig"] = tool_config
|
|
538
607
|
if tool_caching and "tools" in converse_kwargs["toolConfig"]:
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama-index-llms-bedrock-converse
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.4
|
|
4
4
|
Summary: llama-index llms bedrock converse integration
|
|
5
5
|
Author-email: Your Name <you@example.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
7
|
License-File: LICENSE
|
|
8
8
|
Requires-Python: <4.0,>=3.9
|
|
9
|
-
Requires-Dist: aioboto3<16,>=
|
|
10
|
-
Requires-Dist: boto3<2,>=1.
|
|
9
|
+
Requires-Dist: aioboto3<16,>=15.0.0
|
|
10
|
+
Requires-Dist: boto3<2,>=1.38.27
|
|
11
11
|
Requires-Dist: llama-index-core<0.15,>=0.13.0
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
|
|
@@ -220,6 +220,55 @@ resp = await llm.acomplete("Paul Graham is ")
|
|
|
220
220
|
print(resp)
|
|
221
221
|
```
|
|
222
222
|
|
|
223
|
+
### Prompt Caching System and regular messages
|
|
224
|
+
|
|
225
|
+
You can cache normal and system messages by placing cache points strategically:
|
|
226
|
+
|
|
227
|
+
```py
|
|
228
|
+
from llama_index.core.llms import ChatMessage
|
|
229
|
+
from llama_index.core.base.llms.types import (
|
|
230
|
+
TextBlock,
|
|
231
|
+
CacheControl,
|
|
232
|
+
CachePoint,
|
|
233
|
+
MessageRole,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Cache expensive context but keep dynamic instructions uncached
|
|
237
|
+
cached_context = (
|
|
238
|
+
"""[Large context about company policies, knowledge base, etc...]"""
|
|
239
|
+
)
|
|
240
|
+
dynamic_instructions = (
|
|
241
|
+
"Today's date is 2024-01-15. Focus on recent developments."
|
|
242
|
+
)
|
|
243
|
+
document_text = "[Long document]"
|
|
244
|
+
messages = [
|
|
245
|
+
ChatMessage(
|
|
246
|
+
role=MessageRole.SYSTEM,
|
|
247
|
+
blocks=[
|
|
248
|
+
TextBlock(text=cached_context),
|
|
249
|
+
CachePoint(cache_control=CacheControl(type="default")),
|
|
250
|
+
TextBlock(text=dynamic_instructions),
|
|
251
|
+
],
|
|
252
|
+
),
|
|
253
|
+
ChatMessage(
|
|
254
|
+
role=MessageRole.USER,
|
|
255
|
+
blocks=[
|
|
256
|
+
TextBlock(
|
|
257
|
+
text=f"{document_text}",
|
|
258
|
+
type="text",
|
|
259
|
+
),
|
|
260
|
+
CachePoint(cache_control=CacheControl(type="default")),
|
|
261
|
+
TextBlock(
|
|
262
|
+
text="What's our current policy on remote work?",
|
|
263
|
+
type="text",
|
|
264
|
+
),
|
|
265
|
+
],
|
|
266
|
+
),
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
response = llm.chat(messages)
|
|
270
|
+
```
|
|
271
|
+
|
|
223
272
|
### LLM Implementation example
|
|
224
273
|
|
|
225
274
|
https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
|
|
2
|
+
llama_index/llms/bedrock_converse/base.py,sha256=SHGRmAYcMUMAjmnHWFLYKPzvsAAjCF_A1Mvc7s9I7IM,35233
|
|
3
|
+
llama_index/llms/bedrock_converse/utils.py,sha256=oX1ksJsUccNcCUURB9FZDSmxGGg1Q5EbpaQ4oRtlGXY,25418
|
|
4
|
+
llama_index_llms_bedrock_converse-0.9.4.dist-info/METADATA,sha256=qqNYHGddynWmeGnW3bv75HSdwF5NVxQwxF5QdXNRW7I,7833
|
|
5
|
+
llama_index_llms_bedrock_converse-0.9.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
llama_index_llms_bedrock_converse-0.9.4.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
|
|
7
|
+
llama_index_llms_bedrock_converse-0.9.4.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
|
|
2
|
-
llama_index/llms/bedrock_converse/base.py,sha256=0rmV73HRrZK6lb1AX_sCFrqwwivMegJ9X1GHuECJbVQ,34880
|
|
3
|
-
llama_index/llms/bedrock_converse/utils.py,sha256=kDOs_h7NgvNTHIUU36Gzp0zUNuq5QGeLOAp0ttxtocg,22131
|
|
4
|
-
llama_index_llms_bedrock_converse-0.9.3.dist-info/METADATA,sha256=gfCs7mFvOsuCjCjKNm-IQxQaEEAEqvJTjStqsr_C52k,6563
|
|
5
|
-
llama_index_llms_bedrock_converse-0.9.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
llama_index_llms_bedrock_converse-0.9.3.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
|
|
7
|
-
llama_index_llms_bedrock_converse-0.9.3.dist-info/RECORD,,
|
|
File without changes
|