llama-index-llms-bedrock-converse 0.9.3__py3-none-any.whl → 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -366,7 +366,9 @@ class BedrockConverse(FunctionCallingLLM):
366
366
  @llm_chat_callback()
367
367
  def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
368
368
  # convert Llama Index messages to AWS Bedrock Converse messages
369
- converse_messages, system_prompt = messages_to_converse_messages(messages)
369
+ converse_messages, system_prompt = messages_to_converse_messages(
370
+ messages, self.model
371
+ )
370
372
  all_kwargs = self._get_all_kwargs(**kwargs)
371
373
 
372
374
  # invoke LLM in AWS Bedrock Converse with retry
@@ -414,7 +416,9 @@ class BedrockConverse(FunctionCallingLLM):
414
416
  self, messages: Sequence[ChatMessage], **kwargs: Any
415
417
  ) -> ChatResponseGen:
416
418
  # convert Llama Index messages to AWS Bedrock Converse messages
417
- converse_messages, system_prompt = messages_to_converse_messages(messages)
419
+ converse_messages, system_prompt = messages_to_converse_messages(
420
+ messages, self.model
421
+ )
418
422
  all_kwargs = self._get_all_kwargs(**kwargs)
419
423
 
420
424
  # invoke LLM in AWS Bedrock Converse with retry
@@ -551,7 +555,9 @@ class BedrockConverse(FunctionCallingLLM):
551
555
  self, messages: Sequence[ChatMessage], **kwargs: Any
552
556
  ) -> ChatResponse:
553
557
  # convert Llama Index messages to AWS Bedrock Converse messages
554
- converse_messages, system_prompt = messages_to_converse_messages(messages)
558
+ converse_messages, system_prompt = messages_to_converse_messages(
559
+ messages, self.model
560
+ )
555
561
  all_kwargs = self._get_all_kwargs(**kwargs)
556
562
 
557
563
  # invoke LLM in AWS Bedrock Converse with retry
@@ -601,7 +607,9 @@ class BedrockConverse(FunctionCallingLLM):
601
607
  self, messages: Sequence[ChatMessage], **kwargs: Any
602
608
  ) -> ChatResponseAsyncGen:
603
609
  # convert Llama Index messages to AWS Bedrock Converse messages
604
- converse_messages, system_prompt = messages_to_converse_messages(messages)
610
+ converse_messages, system_prompt = messages_to_converse_messages(
611
+ messages, self.model
612
+ )
605
613
  all_kwargs = self._get_all_kwargs(**kwargs)
606
614
 
607
615
  # invoke LLM in AWS Bedrock Converse with retry
@@ -840,8 +848,11 @@ class BedrockConverse(FunctionCallingLLM):
840
848
  return {}
841
849
 
842
850
  # Convert Bedrock's token count format to match OpenAI's format
851
+ # Cache token formats respecting Anthropic format
843
852
  return {
844
853
  "prompt_tokens": usage.get("inputTokens", 0),
845
854
  "completion_tokens": usage.get("outputTokens", 0),
846
855
  "total_tokens": usage.get("totalTokens", 0),
856
+ "cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
857
+ "cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
847
858
  }
@@ -1,7 +1,7 @@
1
1
  import base64
2
2
  import json
3
3
  import logging
4
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
4
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
5
5
  from tenacity import (
6
6
  before_sleep_log,
7
7
  retry,
@@ -95,6 +95,7 @@ BEDROCK_FUNCTION_CALLING_MODELS = (
95
95
  "anthropic.claude-3-7-sonnet-20250219-v1:0",
96
96
  "anthropic.claude-opus-4-20250514-v1:0",
97
97
  "anthropic.claude-sonnet-4-20250514-v1:0",
98
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
98
99
  "cohere.command-r-v1:0",
99
100
  "cohere.command-r-plus-v1:0",
100
101
  "mistral.mistral-large-2402-v1:0",
@@ -124,6 +125,7 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
124
125
  "anthropic.claude-3-7-sonnet-20250219-v1:0",
125
126
  "anthropic.claude-opus-4-20250514-v1:0",
126
127
  "anthropic.claude-sonnet-4-20250514-v1:0",
128
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
127
129
  "meta.llama3-1-8b-instruct-v1:0",
128
130
  "meta.llama3-1-70b-instruct-v1:0",
129
131
  "meta.llama3-2-1b-instruct-v1:0",
@@ -135,6 +137,19 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
135
137
  "meta.llama4-scout-17b-instruct-v1:0",
136
138
  "deepseek.r1-v1:0",
137
139
  )
140
+ BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
141
+ "anthropic.claude-3-5-sonnet-20241022-v2:0",
142
+ "anthropic.claude-3-5-haiku-20241022-v1:0",
143
+ "anthropic.claude-3-7-sonnet-20250219-v1:0",
144
+ "anthropic.claude-opus-4-20250514-v1:0",
145
+ "anthropic.claude-opus-4-1-20250805-v1:0",
146
+ "anthropic.claude-sonnet-4-20250514-v1:0",
147
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
148
+ "amazon.nova-premier-v1:0",
149
+ "amazon.nova-pro-v1:0",
150
+ "amazon.nova-lite-v1:0",
151
+ "amazon.nova-micro-v1:0",
152
+ )
138
153
 
139
154
 
140
155
  def get_model_name(model_name: str) -> str:
@@ -163,6 +178,10 @@ def is_bedrock_function_calling_model(model_name: str) -> bool:
163
178
  return get_model_name(model_name) in BEDROCK_FUNCTION_CALLING_MODELS
164
179
 
165
180
 
181
+ def is_bedrock_prompt_caching_supported_model(model_name: str) -> bool:
182
+ return get_model_name(model_name) in BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS
183
+
184
+
166
185
  def bedrock_modelname_to_context_size(model_name: str) -> int:
167
186
  translated_model_name = get_model_name(model_name)
168
187
 
@@ -258,12 +277,14 @@ def __get_img_format_from_image_mimetype(image_mimetype: str) -> str:
258
277
 
259
278
  def messages_to_converse_messages(
260
279
  messages: Sequence[ChatMessage],
261
- ) -> Tuple[Sequence[Dict[str, Any]], str]:
280
+ model: Optional[str] = None,
281
+ ) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]:
262
282
  """
263
283
  Converts a list of generic ChatMessages to AWS Bedrock Converse messages.
264
284
 
265
285
  Args:
266
286
  messages: List of ChatMessages
287
+ model: optional model name used to omit cache point if the model does not support it
267
288
 
268
289
  Returns:
269
290
  Tuple of:
@@ -272,10 +293,40 @@ def messages_to_converse_messages(
272
293
 
273
294
  """
274
295
  converse_messages = []
275
- system_prompt = ""
296
+ system_prompt = []
297
+ current_system_prompt = ""
276
298
  for message in messages:
277
- if message.role == MessageRole.SYSTEM and message.content:
278
- system_prompt += (message.content) + "\n"
299
+ if message.role == MessageRole.SYSTEM:
300
+ # we iterate over blocks, if content was used, the blocks are added anyway
301
+ for block in message.blocks:
302
+ if isinstance(block, TextBlock):
303
+ if block.text: # Only add non-empty text
304
+ current_system_prompt += block.text + "\n"
305
+
306
+ elif isinstance(block, CachePoint):
307
+ # when we find a cache point we push the current system prompt as a message
308
+ if current_system_prompt != "":
309
+ system_prompt.append({"text": current_system_prompt.strip()})
310
+ current_system_prompt = ""
311
+ # we add the cache point
312
+ if (
313
+ model is None
314
+ or model is not None
315
+ and is_bedrock_prompt_caching_supported_model(model)
316
+ ):
317
+ if block.cache_control.type != "default":
318
+ logger.warning(
319
+ "The only allowed caching strategy for Bedrock Converse is 'default', falling back to that..."
320
+ )
321
+ block.cache_control.type = "default"
322
+ system_prompt.append(
323
+ {"cachePoint": {"type": block.cache_control.type}}
324
+ )
325
+ else:
326
+ logger.warning(
327
+ f"Model {model} does not support prompt caching, cache point will be ignored..."
328
+ )
329
+
279
330
  elif message.role in [MessageRole.FUNCTION, MessageRole.TOOL]:
280
331
  # convert tool output to the AWS Bedrock Converse format
281
332
  content = {
@@ -343,8 +394,9 @@ def messages_to_converse_messages(
343
394
  "content": content,
344
395
  }
345
396
  )
346
-
347
- return __merge_common_role_msgs(converse_messages), system_prompt.strip()
397
+ if current_system_prompt != "":
398
+ system_prompt.append({"text": current_system_prompt.strip()})
399
+ return __merge_common_role_msgs(converse_messages), system_prompt
348
400
 
349
401
 
350
402
  def tools_to_converse_tools(
@@ -445,7 +497,7 @@ def converse_with_retry(
445
497
  model: str,
446
498
  messages: Sequence[Dict[str, Any]],
447
499
  max_retries: int = 3,
448
- system_prompt: Optional[str] = None,
500
+ system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
449
501
  system_prompt_caching: bool = False,
450
502
  tool_caching: bool = False,
451
503
  max_tokens: int = 1000,
@@ -467,11 +519,19 @@ def converse_with_retry(
467
519
  },
468
520
  }
469
521
  if system_prompt:
470
- system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
471
- if system_prompt_caching:
522
+ if isinstance(system_prompt, str):
523
+ # if the system prompt is a simple text (for retro compatibility)
524
+ system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
525
+ else:
526
+ system_messages: list[dict[str, Any]] = system_prompt
527
+ if (
528
+ system_prompt_caching
529
+ and len(system_messages) > 0
530
+ and system_messages[-1].get("cachePoint", None) is None
531
+ ):
532
+ # "Adding cache point to system prompt if not present"
472
533
  system_messages.append({"cachePoint": {"type": "default"}})
473
534
  converse_kwargs["system"] = system_messages
474
-
475
535
  if tool_config := kwargs.get("tools"):
476
536
  converse_kwargs["toolConfig"] = tool_config
477
537
 
@@ -492,12 +552,13 @@ def converse_with_retry(
492
552
  )
493
553
 
494
554
  @retry_decorator
495
- def _conversion_with_retry(**kwargs: Any) -> Any:
555
+ def _converse_with_retry(**kwargs: Any) -> Any:
496
556
  if stream:
497
557
  return client.converse_stream(**kwargs)
498
- return client.converse(**kwargs)
558
+ else:
559
+ return client.converse(**kwargs)
499
560
 
500
- return _conversion_with_retry(**converse_kwargs)
561
+ return _converse_with_retry(**converse_kwargs)
501
562
 
502
563
 
503
564
  async def converse_with_retry_async(
@@ -506,7 +567,7 @@ async def converse_with_retry_async(
506
567
  model: str,
507
568
  messages: Sequence[Dict[str, Any]],
508
569
  max_retries: int = 3,
509
- system_prompt: Optional[str] = None,
570
+ system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
510
571
  system_prompt_caching: bool = False,
511
572
  tool_caching: bool = False,
512
573
  max_tokens: int = 1000,
@@ -528,11 +589,22 @@ async def converse_with_retry_async(
528
589
  "temperature": temperature,
529
590
  },
530
591
  }
592
+
531
593
  if system_prompt:
532
- system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
533
- if system_prompt_caching:
594
+ if isinstance(system_prompt, str):
595
+ # if the system prompt is a simple text (for retro compatibility)
596
+ system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
597
+ else:
598
+ system_messages: list[dict[str, Any]] = system_prompt
599
+ if (
600
+ system_prompt_caching
601
+ and len(system_messages) > 0
602
+ and system_messages[-1].get("cachePoint", None) is None
603
+ ):
604
+ # "Adding cache point to system prompt if not present"
534
605
  system_messages.append({"cachePoint": {"type": "default"}})
535
606
  converse_kwargs["system"] = system_messages
607
+
536
608
  if tool_config := kwargs.get("tools"):
537
609
  converse_kwargs["toolConfig"] = tool_config
538
610
  if tool_caching and "tools" in converse_kwargs["toolConfig"]:
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama-index-llms-bedrock-converse
3
- Version: 0.9.3
3
+ Version: 0.9.5
4
4
  Summary: llama-index llms bedrock converse integration
5
5
  Author-email: Your Name <you@example.com>
6
6
  License-Expression: MIT
7
7
  License-File: LICENSE
8
8
  Requires-Python: <4.0,>=3.9
9
- Requires-Dist: aioboto3<16,>=13.1.1
10
- Requires-Dist: boto3<2,>=1.34.122
9
+ Requires-Dist: aioboto3<16,>=15.0.0
10
+ Requires-Dist: boto3<2,>=1.38.27
11
11
  Requires-Dist: llama-index-core<0.15,>=0.13.0
12
12
  Description-Content-Type: text/markdown
13
13
 
@@ -220,6 +220,55 @@ resp = await llm.acomplete("Paul Graham is ")
220
220
  print(resp)
221
221
  ```
222
222
 
223
+ ### Prompt Caching System and regular messages
224
+
225
+ You can cache normal and system messages by placing cache points strategically:
226
+
227
+ ```py
228
+ from llama_index.core.llms import ChatMessage
229
+ from llama_index.core.base.llms.types import (
230
+ TextBlock,
231
+ CacheControl,
232
+ CachePoint,
233
+ MessageRole,
234
+ )
235
+
236
+ # Cache expensive context but keep dynamic instructions uncached
237
+ cached_context = (
238
+ """[Large context about company policies, knowledge base, etc...]"""
239
+ )
240
+ dynamic_instructions = (
241
+ "Today's date is 2024-01-15. Focus on recent developments."
242
+ )
243
+ document_text = "[Long document]"
244
+ messages = [
245
+ ChatMessage(
246
+ role=MessageRole.SYSTEM,
247
+ blocks=[
248
+ TextBlock(text=cached_context),
249
+ CachePoint(cache_control=CacheControl(type="default")),
250
+ TextBlock(text=dynamic_instructions),
251
+ ],
252
+ ),
253
+ ChatMessage(
254
+ role=MessageRole.USER,
255
+ blocks=[
256
+ TextBlock(
257
+ text=f"{document_text}",
258
+ type="text",
259
+ ),
260
+ CachePoint(cache_control=CacheControl(type="default")),
261
+ TextBlock(
262
+ text="What's our current policy on remote work?",
263
+ type="text",
264
+ ),
265
+ ],
266
+ ),
267
+ ]
268
+
269
+ response = llm.chat(messages)
270
+ ```
271
+
223
272
  ### LLM Implementation example
224
273
 
225
274
  https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
@@ -0,0 +1,7 @@
1
+ llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
2
+ llama_index/llms/bedrock_converse/base.py,sha256=SHGRmAYcMUMAjmnHWFLYKPzvsAAjCF_A1Mvc7s9I7IM,35233
3
+ llama_index/llms/bedrock_converse/utils.py,sha256=Ly-s3mROVreinvYmRcAJU7MksSHqeTEa1tnY3na17wg,25565
4
+ llama_index_llms_bedrock_converse-0.9.5.dist-info/METADATA,sha256=ALn1SYaHR7aYuALhDShDhbWBDHOzw9RAHQI1iS0xhLM,7833
5
+ llama_index_llms_bedrock_converse-0.9.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ llama_index_llms_bedrock_converse-0.9.5.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
7
+ llama_index_llms_bedrock_converse-0.9.5.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- llama_index/llms/bedrock_converse/__init__.py,sha256=xE3ZHLXqFr7TTTgQlYH9bLLPRZAV3dJyiz_iUFXBfak,98
2
- llama_index/llms/bedrock_converse/base.py,sha256=0rmV73HRrZK6lb1AX_sCFrqwwivMegJ9X1GHuECJbVQ,34880
3
- llama_index/llms/bedrock_converse/utils.py,sha256=kDOs_h7NgvNTHIUU36Gzp0zUNuq5QGeLOAp0ttxtocg,22131
4
- llama_index_llms_bedrock_converse-0.9.3.dist-info/METADATA,sha256=gfCs7mFvOsuCjCjKNm-IQxQaEEAEqvJTjStqsr_C52k,6563
5
- llama_index_llms_bedrock_converse-0.9.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- llama_index_llms_bedrock_converse-0.9.3.dist-info/licenses/LICENSE,sha256=JPQLUZD9rKvCTdu192Nk0V5PAwklIg6jANii3UmTyMs,1065
7
- llama_index_llms_bedrock_converse-0.9.3.dist-info/RECORD,,