llama-index-llms-bedrock-converse 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -366,7 +366,9 @@ class BedrockConverse(FunctionCallingLLM):
366
366
  @llm_chat_callback()
367
367
  def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
368
368
  # convert Llama Index messages to AWS Bedrock Converse messages
369
- converse_messages, system_prompt = messages_to_converse_messages(messages)
369
+ converse_messages, system_prompt = messages_to_converse_messages(
370
+ messages, self.model
371
+ )
370
372
  all_kwargs = self._get_all_kwargs(**kwargs)
371
373
 
372
374
  # invoke LLM in AWS Bedrock Converse with retry
@@ -414,7 +416,9 @@ class BedrockConverse(FunctionCallingLLM):
414
416
  self, messages: Sequence[ChatMessage], **kwargs: Any
415
417
  ) -> ChatResponseGen:
416
418
  # convert Llama Index messages to AWS Bedrock Converse messages
417
- converse_messages, system_prompt = messages_to_converse_messages(messages)
419
+ converse_messages, system_prompt = messages_to_converse_messages(
420
+ messages, self.model
421
+ )
418
422
  all_kwargs = self._get_all_kwargs(**kwargs)
419
423
 
420
424
  # invoke LLM in AWS Bedrock Converse with retry
@@ -551,7 +555,9 @@ class BedrockConverse(FunctionCallingLLM):
551
555
  self, messages: Sequence[ChatMessage], **kwargs: Any
552
556
  ) -> ChatResponse:
553
557
  # convert Llama Index messages to AWS Bedrock Converse messages
554
- converse_messages, system_prompt = messages_to_converse_messages(messages)
558
+ converse_messages, system_prompt = messages_to_converse_messages(
559
+ messages, self.model
560
+ )
555
561
  all_kwargs = self._get_all_kwargs(**kwargs)
556
562
 
557
563
  # invoke LLM in AWS Bedrock Converse with retry
@@ -601,7 +607,9 @@ class BedrockConverse(FunctionCallingLLM):
601
607
  self, messages: Sequence[ChatMessage], **kwargs: Any
602
608
  ) -> ChatResponseAsyncGen:
603
609
  # convert Llama Index messages to AWS Bedrock Converse messages
604
- converse_messages, system_prompt = messages_to_converse_messages(messages)
610
+ converse_messages, system_prompt = messages_to_converse_messages(
611
+ messages, self.model
612
+ )
605
613
  all_kwargs = self._get_all_kwargs(**kwargs)
606
614
 
607
615
  # invoke LLM in AWS Bedrock Converse with retry
@@ -840,8 +848,11 @@ class BedrockConverse(FunctionCallingLLM):
840
848
  return {}
841
849
 
842
850
  # Convert Bedrock's token count format to match OpenAI's format
851
+ # Cache token formats respecting Anthropic format
843
852
  return {
844
853
  "prompt_tokens": usage.get("inputTokens", 0),
845
854
  "completion_tokens": usage.get("outputTokens", 0),
846
855
  "total_tokens": usage.get("totalTokens", 0),
856
+ "cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
857
+ "cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
847
858
  }
@@ -1,7 +1,7 @@
1
1
  import base64
2
2
  import json
3
3
  import logging
4
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
4
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
5
5
  from tenacity import (
6
6
  before_sleep_log,
7
7
  retry,
@@ -49,6 +49,7 @@ BEDROCK_MODELS = {
49
49
  "anthropic.claude-3-7-sonnet-20250219-v1:0": 200000,
50
50
  "anthropic.claude-opus-4-20250514-v1:0": 200000,
51
51
  "anthropic.claude-sonnet-4-20250514-v1:0": 200000,
52
+ "anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
52
53
  "ai21.j2-mid-v1": 8192,
53
54
  "ai21.j2-ultra-v1": 8192,
54
55
  "cohere.command-text-v14": 4096,
@@ -134,6 +135,18 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
134
135
  "meta.llama4-scout-17b-instruct-v1:0",
135
136
  "deepseek.r1-v1:0",
136
137
  )
138
+ BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
139
+ "anthropic.claude-3-5-sonnet-20241022-v2:0",
140
+ "anthropic.claude-3-5-haiku-20241022-v1:0",
141
+ "anthropic.claude-3-7-sonnet-20250219-v1:0",
142
+ "anthropic.claude-opus-4-20250514-v1:0",
143
+ "anthropic.claude-sonnet-4-20250514-v1:0",
144
+ "anthropic.claude-opus-4-1-20250805-v1:0",
145
+ "amazon.nova-premier-v1:0",
146
+ "amazon.nova-pro-v1:0",
147
+ "amazon.nova-lite-v1:0",
148
+ "amazon.nova-micro-v1:0",
149
+ )
137
150
 
138
151
 
139
152
  def get_model_name(model_name: str) -> str:
@@ -162,6 +175,10 @@ def is_bedrock_function_calling_model(model_name: str) -> bool:
162
175
  return get_model_name(model_name) in BEDROCK_FUNCTION_CALLING_MODELS
163
176
 
164
177
 
178
+ def is_bedrock_prompt_caching_supported_model(model_name: str) -> bool:
179
+ return get_model_name(model_name) in BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS
180
+
181
+
165
182
  def bedrock_modelname_to_context_size(model_name: str) -> int:
166
183
  translated_model_name = get_model_name(model_name)
167
184
 
@@ -257,12 +274,14 @@ def __get_img_format_from_image_mimetype(image_mimetype: str) -> str:
257
274
 
258
275
  def messages_to_converse_messages(
259
276
  messages: Sequence[ChatMessage],
260
- ) -> Tuple[Sequence[Dict[str, Any]], str]:
277
+ model: Optional[str] = None,
278
+ ) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]:
261
279
  """
262
280
  Converts a list of generic ChatMessages to AWS Bedrock Converse messages.
263
281
 
264
282
  Args:
265
283
  messages: List of ChatMessages
284
+ model: optional model name used to omit cache point if the model does not support it
266
285
 
267
286
  Returns:
268
287
  Tuple of:
@@ -271,10 +290,40 @@ def messages_to_converse_messages(
271
290
 
272
291
  """
273
292
  converse_messages = []
274
- system_prompt = ""
293
+ system_prompt = []
294
+ current_system_prompt = ""
275
295
  for message in messages:
276
- if message.role == MessageRole.SYSTEM and message.content:
277
- system_prompt += (message.content) + "\n"
296
+ if message.role == MessageRole.SYSTEM:
297
+ # we iterate over blocks, if content was used, the blocks are added anyway
298
+ for block in message.blocks:
299
+ if isinstance(block, TextBlock):
300
+ if block.text: # Only add non-empty text
301
+ current_system_prompt += block.text + "\n"
302
+
303
+ elif isinstance(block, CachePoint):
304
+ # when we find a cache point we push the current system prompt as a message
305
+ if current_system_prompt != "":
306
+ system_prompt.append({"text": current_system_prompt.strip()})
307
+ current_system_prompt = ""
308
+ # we add the cache point
309
+ if (
310
+ model is None
311
+ or model is not None
312
+ and is_bedrock_prompt_caching_supported_model(model)
313
+ ):
314
+ if block.cache_control.type != "default":
315
+ logger.warning(
316
+ "The only allowed caching strategy for Bedrock Converse is 'default', falling back to that..."
317
+ )
318
+ block.cache_control.type = "default"
319
+ system_prompt.append(
320
+ {"cachePoint": {"type": block.cache_control.type}}
321
+ )
322
+ else:
323
+ logger.warning(
324
+ f"Model {model} does not support prompt caching, cache point will be ignored..."
325
+ )
326
+
278
327
  elif message.role in [MessageRole.FUNCTION, MessageRole.TOOL]:
279
328
  # convert tool output to the AWS Bedrock Converse format
280
329
  content = {
@@ -342,8 +391,9 @@ def messages_to_converse_messages(
342
391
  "content": content,
343
392
  }
344
393
  )
345
-
346
- return __merge_common_role_msgs(converse_messages), system_prompt.strip()
394
+ if current_system_prompt != "":
395
+ system_prompt.append({"text": current_system_prompt.strip()})
396
+ return __merge_common_role_msgs(converse_messages), system_prompt
347
397
 
348
398
 
349
399
  def tools_to_converse_tools(
@@ -444,7 +494,7 @@ def converse_with_retry(
444
494
  model: str,
445
495
  messages: Sequence[Dict[str, Any]],
446
496
  max_retries: int = 3,
447
- system_prompt: Optional[str] = None,
497
+ system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
448
498
  system_prompt_caching: bool = False,
449
499
  tool_caching: bool = False,
450
500
  max_tokens: int = 1000,
@@ -466,11 +516,19 @@ def converse_with_retry(
466
516
  },
467
517
  }
468
518
  if system_prompt:
469
- system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
470
- if system_prompt_caching:
519
+ if isinstance(system_prompt, str):
520
+ # if the system prompt is a simple text (for retro compatibility)
521
+ system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
522
+ else:
523
+ system_messages: list[dict[str, Any]] = system_prompt
524
+ if (
525
+ system_prompt_caching
526
+ and len(system_messages) > 0
527
+ and system_messages[-1].get("cachePoint", None) is None
528
+ ):
529
+ # "Adding cache point to system prompt if not present"
471
530
  system_messages.append({"cachePoint": {"type": "default"}})
472
531
  converse_kwargs["system"] = system_messages
473
-
474
532
  if tool_config := kwargs.get("tools"):
475
533
  converse_kwargs["toolConfig"] = tool_config
476
534
 
@@ -491,12 +549,13 @@ def converse_with_retry(
491
549
  )
492
550
 
493
551
  @retry_decorator
494
- def _conversion_with_retry(**kwargs: Any) -> Any:
552
+ def _converse_with_retry(**kwargs: Any) -> Any:
495
553
  if stream:
496
554
  return client.converse_stream(**kwargs)
497
- return client.converse(**kwargs)
555
+ else:
556
+ return client.converse(**kwargs)
498
557
 
499
- return _conversion_with_retry(**converse_kwargs)
558
+ return _converse_with_retry(**converse_kwargs)
500
559
 
501
560
 
502
561
  async def converse_with_retry_async(
@@ -505,7 +564,7 @@ async def converse_with_retry_async(
505
564
  model: str,
506
565
  messages: Sequence[Dict[str, Any]],
507
566
  max_retries: int = 3,
508
- system_prompt: Optional[str] = None,
567
+ system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
509
568
  system_prompt_caching: bool = False,
510
569
  tool_caching: bool = False,
511
570
  max_tokens: int = 1000,
@@ -527,11 +586,22 @@ async def converse_with_retry_async(
527
586
  "temperature": temperature,
528
587
  },
529
588
  }
589
+
530
590
  if system_prompt:
531
- system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
532
- if system_prompt_caching:
591
+ if isinstance(system_prompt, str):
592
+ # if the system prompt is a simple text (for retro compatibility)
593
+ system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
594
+ else:
595
+ system_messages: list[dict[str, Any]] = system_prompt
596
+ if (
597
+ system_prompt_caching
598
+ and len(system_messages) > 0
599
+ and system_messages[-1].get("cachePoint", None) is None
600
+ ):
601
+ # "Adding cache point to system prompt if not present"
533
602
  system_messages.append({"cachePoint": {"type": "default"}})
534
603
  converse_kwargs["system"] = system_messages
604
+
535
605
  if tool_config := kwargs.get("tools"):
536
606
  converse_kwargs["toolConfig"] = tool_config
537
607
  if tool_caching and "tools" in converse_kwargs["toolConfig"]:
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-index-llms-bedrock-converse
3
- Version: 0.9.2
3
+ Version: 0.9.4
4
4
  Summary: llama-index llms bedrock converse integration
5
5
  Author-email: Your Name <you@example.com>
6
6
  License-Expression: MIT
7
7
  License-File: LICENSE
8
8
  Requires-Python: <4.0,>=3.9
9
- Requires-Dist: aioboto3<16,>=13.1.1
10
- Requires-Dist: boto3<2,>=1.34.122
9
+ Requires-Dist: aioboto3<16,>=15.0.0
10
+ Requires-Dist: boto3<2,>=1.38.27
11
11
  Requires-Dist: llama-index-core<0.15,>=0.13.0
12
12
  Description-Content-Type: text/markdown
13
13
 
@@ -220,6 +220,55 @@ resp = await llm.acomplete("Paul Graham is ")
220
220
  print(resp)
221
221
  ```
222
222
 
223
+ ### Prompt Caching System and regular messages
224
+
225
+ You can cache normal and system messages by placing cache points strategically:
226
+
227
+ ```py
228
+ from llama_index.core.llms import ChatMessage
229
+ from llama_index.core.base.llms.types import (
230
+ TextBlock,
231
+ CacheControl,
232
+ CachePoint,
233
+ MessageRole,
234
+ )
235
+
236
+ # Cache expensive context but keep dynamic instructions uncached
237
+ cached_context = (
238
+ """[Large context about company policies, knowledge base, etc...]"""
239
+ )
240
+ dynamic_instructions = (
241
+ "Today's date is 2024-01-15. Focus on recent developments."
242
+ )
243
+ document_text = "[Long document]"
244
+ messages = [
245
+ ChatMessage(
246
+ role=MessageRole.SYSTEM,
247
+ blocks=[
248
+ TextBlock(text=cached_context),
249
+ CachePoint(cache_control=CacheControl(type="default")),
250
+ TextBlock(text=dynamic_instructions),
251
+ ],
252
+ ),
253
+ ChatMessage(
254
+ role=MessageRole.USER,
255
+ blocks=[
256
+ TextBlock(
257
+ text=f"{document_text}",
258
+ type="text",
259
+ ),
260
+ CachePoint(cache_control=CacheControl(type="default")),
261
+ TextBlock(
262
+ text="What's our current policy on remote work?",
263
+ type="text",
264
+ ),
265
+ ],
266
+ ),
267
+ ]
268
+
269
+ response = llm.chat(messages)
270
+ ```
271
+
223
272
  ### LLM Implementation example
224
273
 
225
274
  https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
@@ -0,0 +1,7 @@
1
+ llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
2
+ llama_index/llms/bedrock_converse/base.py,sha256=SHGRmAYcMUMAjmnHWFLYKPzvsAAjCF_A1Mvc7s9I7IM,35233
3
+ llama_index/llms/bedrock_converse/utils.py,sha256=oX1ksJsUccNcCUURB9FZDSmxGGg1Q5EbpaQ4oRtlGXY,25418
4
+ llama_index_llms_bedrock_converse-0.9.4.dist-info/METADATA,sha256=qqNYHGddynWmeGnW3bv75HSdwF5NVxQwxF5QdXNRW7I,7833
5
+ llama_index_llms_bedrock_converse-0.9.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ llama_index_llms_bedrock_converse-0.9.4.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
7
+ llama_index_llms_bedrock_converse-0.9.4.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
2
- llama_index/llms/bedrock_converse/base.py,sha256=0rmV73HRrZK6lb1AX_sCFrqwwivMegJ9X1GHuECJbVQ,34880
3
- llama_index/llms/bedrock_converse/utils.py,sha256=69-NGsEV5GpECvuLEAjz5tQu6OMxwcOvJKBnfXvCVNM,22074
4
- llama_index_llms_bedrock_converse-0.9.2.dist-info/METADATA,sha256=xsgEpces2jzNUwCcOD3vcB0XIJajlzXggbsaHqLHl4k,6563
5
- llama_index_llms_bedrock_converse-0.9.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- llama_index_llms_bedrock_converse-0.9.2.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
7
- llama_index_llms_bedrock_converse-0.9.2.dist-info/RECORD,,