llama-index-llms-bedrock-converse 0.9.3__tar.gz → 0.9.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: llama-index-llms-bedrock-converse
3
+ Version: 0.9.4
4
+ Summary: llama-index llms bedrock converse integration
5
+ Author-email: Your Name <you@example.com>
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: <4.0,>=3.9
9
+ Requires-Dist: aioboto3<16,>=15.0.0
10
+ Requires-Dist: boto3<2,>=1.38.27
11
+ Requires-Dist: llama-index-core<0.15,>=0.13.0
12
+ Description-Content-Type: text/markdown
13
+
1
14
  # LlamaIndex Llms Integration: Bedrock Converse
2
15
 
3
16
  ### Installation
@@ -207,6 +220,55 @@ resp = await llm.acomplete("Paul Graham is ")
207
220
  print(resp)
208
221
  ```
209
222
 
223
+ ### Prompt Caching System and regular messages
224
+
225
+ You can cache normal and system messages by placing cache points strategically:
226
+
227
+ ```py
228
+ from llama_index.core.llms import ChatMessage
229
+ from llama_index.core.base.llms.types import (
230
+ TextBlock,
231
+ CacheControl,
232
+ CachePoint,
233
+ MessageRole,
234
+ )
235
+
236
+ # Cache expensive context but keep dynamic instructions uncached
237
+ cached_context = (
238
+ """[Large context about company policies, knowledge base, etc...]"""
239
+ )
240
+ dynamic_instructions = (
241
+ "Today's date is 2024-01-15. Focus on recent developments."
242
+ )
243
+ document_text = "[Long document]"
244
+ messages = [
245
+ ChatMessage(
246
+ role=MessageRole.SYSTEM,
247
+ blocks=[
248
+ TextBlock(text=cached_context),
249
+ CachePoint(cache_control=CacheControl(type="default")),
250
+ TextBlock(text=dynamic_instructions),
251
+ ],
252
+ ),
253
+ ChatMessage(
254
+ role=MessageRole.USER,
255
+ blocks=[
256
+ TextBlock(
257
+ text=f"{document_text}",
258
+ type="text",
259
+ ),
260
+ CachePoint(cache_control=CacheControl(type="default")),
261
+ TextBlock(
262
+ text="What's our current policy on remote work?",
263
+ type="text",
264
+ ),
265
+ ],
266
+ ),
267
+ ]
268
+
269
+ response = llm.chat(messages)
270
+ ```
271
+
210
272
  ### LLM Implementation example
211
273
 
212
274
  https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
@@ -1,16 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: llama-index-llms-bedrock-converse
3
- Version: 0.9.3
4
- Summary: llama-index llms bedrock converse integration
5
- Author-email: Your Name <you@example.com>
6
- License-Expression: MIT
7
- License-File: LICENSE
8
- Requires-Python: <4.0,>=3.9
9
- Requires-Dist: aioboto3<16,>=13.1.1
10
- Requires-Dist: boto3<2,>=1.34.122
11
- Requires-Dist: llama-index-core<0.15,>=0.13.0
12
- Description-Content-Type: text/markdown
13
-
14
1
  # LlamaIndex Llms Integration: Bedrock Converse
15
2
 
16
3
  ### Installation
@@ -220,6 +207,55 @@ resp = await llm.acomplete("Paul Graham is ")
220
207
  print(resp)
221
208
  ```
222
209
 
210
+ ### Prompt Caching System and regular messages
211
+
212
+ You can cache normal and system messages by placing cache points strategically:
213
+
214
+ ```py
215
+ from llama_index.core.llms import ChatMessage
216
+ from llama_index.core.base.llms.types import (
217
+ TextBlock,
218
+ CacheControl,
219
+ CachePoint,
220
+ MessageRole,
221
+ )
222
+
223
+ # Cache expensive context but keep dynamic instructions uncached
224
+ cached_context = (
225
+ """[Large context about company policies, knowledge base, etc...]"""
226
+ )
227
+ dynamic_instructions = (
228
+ "Today's date is 2024-01-15. Focus on recent developments."
229
+ )
230
+ document_text = "[Long document]"
231
+ messages = [
232
+ ChatMessage(
233
+ role=MessageRole.SYSTEM,
234
+ blocks=[
235
+ TextBlock(text=cached_context),
236
+ CachePoint(cache_control=CacheControl(type="default")),
237
+ TextBlock(text=dynamic_instructions),
238
+ ],
239
+ ),
240
+ ChatMessage(
241
+ role=MessageRole.USER,
242
+ blocks=[
243
+ TextBlock(
244
+ text=f"{document_text}",
245
+ type="text",
246
+ ),
247
+ CachePoint(cache_control=CacheControl(type="default")),
248
+ TextBlock(
249
+ text="What's our current policy on remote work?",
250
+ type="text",
251
+ ),
252
+ ],
253
+ ),
254
+ ]
255
+
256
+ response = llm.chat(messages)
257
+ ```
258
+
223
259
  ### LLM Implementation example
224
260
 
225
261
  https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/
@@ -366,7 +366,9 @@ class BedrockConverse(FunctionCallingLLM):
366
366
  @llm_chat_callback()
367
367
  def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
368
368
  # convert Llama Index messages to AWS Bedrock Converse messages
369
- converse_messages, system_prompt = messages_to_converse_messages(messages)
369
+ converse_messages, system_prompt = messages_to_converse_messages(
370
+ messages, self.model
371
+ )
370
372
  all_kwargs = self._get_all_kwargs(**kwargs)
371
373
 
372
374
  # invoke LLM in AWS Bedrock Converse with retry
@@ -414,7 +416,9 @@ class BedrockConverse(FunctionCallingLLM):
414
416
  self, messages: Sequence[ChatMessage], **kwargs: Any
415
417
  ) -> ChatResponseGen:
416
418
  # convert Llama Index messages to AWS Bedrock Converse messages
417
- converse_messages, system_prompt = messages_to_converse_messages(messages)
419
+ converse_messages, system_prompt = messages_to_converse_messages(
420
+ messages, self.model
421
+ )
418
422
  all_kwargs = self._get_all_kwargs(**kwargs)
419
423
 
420
424
  # invoke LLM in AWS Bedrock Converse with retry
@@ -551,7 +555,9 @@ class BedrockConverse(FunctionCallingLLM):
551
555
  self, messages: Sequence[ChatMessage], **kwargs: Any
552
556
  ) -> ChatResponse:
553
557
  # convert Llama Index messages to AWS Bedrock Converse messages
554
- converse_messages, system_prompt = messages_to_converse_messages(messages)
558
+ converse_messages, system_prompt = messages_to_converse_messages(
559
+ messages, self.model
560
+ )
555
561
  all_kwargs = self._get_all_kwargs(**kwargs)
556
562
 
557
563
  # invoke LLM in AWS Bedrock Converse with retry
@@ -601,7 +607,9 @@ class BedrockConverse(FunctionCallingLLM):
601
607
  self, messages: Sequence[ChatMessage], **kwargs: Any
602
608
  ) -> ChatResponseAsyncGen:
603
609
  # convert Llama Index messages to AWS Bedrock Converse messages
604
- converse_messages, system_prompt = messages_to_converse_messages(messages)
610
+ converse_messages, system_prompt = messages_to_converse_messages(
611
+ messages, self.model
612
+ )
605
613
  all_kwargs = self._get_all_kwargs(**kwargs)
606
614
 
607
615
  # invoke LLM in AWS Bedrock Converse with retry
@@ -840,8 +848,11 @@ class BedrockConverse(FunctionCallingLLM):
840
848
  return {}
841
849
 
842
850
  # Convert Bedrock's token count format to match OpenAI's format
851
+ # Cache token formats respecting Anthropic format
843
852
  return {
844
853
  "prompt_tokens": usage.get("inputTokens", 0),
845
854
  "completion_tokens": usage.get("outputTokens", 0),
846
855
  "total_tokens": usage.get("totalTokens", 0),
856
+ "cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
857
+ "cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
847
858
  }
@@ -1,7 +1,7 @@
1
1
  import base64
2
2
  import json
3
3
  import logging
4
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
4
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
5
5
  from tenacity import (
6
6
  before_sleep_log,
7
7
  retry,
@@ -135,6 +135,18 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
135
135
  "meta.llama4-scout-17b-instruct-v1:0",
136
136
  "deepseek.r1-v1:0",
137
137
  )
138
+ BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
139
+ "anthropic.claude-3-5-sonnet-20241022-v2:0",
140
+ "anthropic.claude-3-5-haiku-20241022-v1:0",
141
+ "anthropic.claude-3-7-sonnet-20250219-v1:0",
142
+ "anthropic.claude-opus-4-20250514-v1:0",
143
+ "anthropic.claude-sonnet-4-20250514-v1:0",
144
+ "anthropic.claude-opus-4-1-20250805-v1:0",
145
+ "amazon.nova-premier-v1:0",
146
+ "amazon.nova-pro-v1:0",
147
+ "amazon.nova-lite-v1:0",
148
+ "amazon.nova-micro-v1:0",
149
+ )
138
150
 
139
151
 
140
152
  def get_model_name(model_name: str) -> str:
@@ -163,6 +175,10 @@ def is_bedrock_function_calling_model(model_name: str) -> bool:
163
175
  return get_model_name(model_name) in BEDROCK_FUNCTION_CALLING_MODELS
164
176
 
165
177
 
178
+ def is_bedrock_prompt_caching_supported_model(model_name: str) -> bool:
179
+ return get_model_name(model_name) in BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS
180
+
181
+
166
182
  def bedrock_modelname_to_context_size(model_name: str) -> int:
167
183
  translated_model_name = get_model_name(model_name)
168
184
 
@@ -258,12 +274,14 @@ def __get_img_format_from_image_mimetype(image_mimetype: str) -> str:
258
274
 
259
275
  def messages_to_converse_messages(
260
276
  messages: Sequence[ChatMessage],
261
- ) -> Tuple[Sequence[Dict[str, Any]], str]:
277
+ model: Optional[str] = None,
278
+ ) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]:
262
279
  """
263
280
  Converts a list of generic ChatMessages to AWS Bedrock Converse messages.
264
281
 
265
282
  Args:
266
283
  messages: List of ChatMessages
284
+ model: optional model name used to omit cache point if the model does not support it
267
285
 
268
286
  Returns:
269
287
  Tuple of:
@@ -272,10 +290,40 @@ def messages_to_converse_messages(
272
290
 
273
291
  """
274
292
  converse_messages = []
275
- system_prompt = ""
293
+ system_prompt = []
294
+ current_system_prompt = ""
276
295
  for message in messages:
277
- if message.role == MessageRole.SYSTEM and message.content:
278
- system_prompt += (message.content) + "\n"
296
+ if message.role == MessageRole.SYSTEM:
297
+ # we iterate over blocks, if content was used, the blocks are added anyway
298
+ for block in message.blocks:
299
+ if isinstance(block, TextBlock):
300
+ if block.text: # Only add non-empty text
301
+ current_system_prompt += block.text + "\n"
302
+
303
+ elif isinstance(block, CachePoint):
304
+ # when we find a cache point we push the current system prompt as a message
305
+ if current_system_prompt != "":
306
+ system_prompt.append({"text": current_system_prompt.strip()})
307
+ current_system_prompt = ""
308
+ # we add the cache point
309
+ if (
310
+ model is None
311
+ or model is not None
312
+ and is_bedrock_prompt_caching_supported_model(model)
313
+ ):
314
+ if block.cache_control.type != "default":
315
+ logger.warning(
316
+ "The only allowed caching strategy for Bedrock Converse is 'default', falling back to that..."
317
+ )
318
+ block.cache_control.type = "default"
319
+ system_prompt.append(
320
+ {"cachePoint": {"type": block.cache_control.type}}
321
+ )
322
+ else:
323
+ logger.warning(
324
+ f"Model {model} does not support prompt caching, cache point will be ignored..."
325
+ )
326
+
279
327
  elif message.role in [MessageRole.FUNCTION, MessageRole.TOOL]:
280
328
  # convert tool output to the AWS Bedrock Converse format
281
329
  content = {
@@ -343,8 +391,9 @@ def messages_to_converse_messages(
343
391
  "content": content,
344
392
  }
345
393
  )
346
-
347
- return __merge_common_role_msgs(converse_messages), system_prompt.strip()
394
+ if current_system_prompt != "":
395
+ system_prompt.append({"text": current_system_prompt.strip()})
396
+ return __merge_common_role_msgs(converse_messages), system_prompt
348
397
 
349
398
 
350
399
  def tools_to_converse_tools(
@@ -445,7 +494,7 @@ def converse_with_retry(
445
494
  model: str,
446
495
  messages: Sequence[Dict[str, Any]],
447
496
  max_retries: int = 3,
448
- system_prompt: Optional[str] = None,
497
+ system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
449
498
  system_prompt_caching: bool = False,
450
499
  tool_caching: bool = False,
451
500
  max_tokens: int = 1000,
@@ -467,11 +516,19 @@ def converse_with_retry(
467
516
  },
468
517
  }
469
518
  if system_prompt:
470
- system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
471
- if system_prompt_caching:
519
+ if isinstance(system_prompt, str):
520
+ # if the system prompt is a simple text (for retro compatibility)
521
+ system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
522
+ else:
523
+ system_messages: list[dict[str, Any]] = system_prompt
524
+ if (
525
+ system_prompt_caching
526
+ and len(system_messages) > 0
527
+ and system_messages[-1].get("cachePoint", None) is None
528
+ ):
529
+ # "Adding cache point to system prompt if not present"
472
530
  system_messages.append({"cachePoint": {"type": "default"}})
473
531
  converse_kwargs["system"] = system_messages
474
-
475
532
  if tool_config := kwargs.get("tools"):
476
533
  converse_kwargs["toolConfig"] = tool_config
477
534
 
@@ -492,12 +549,13 @@ def converse_with_retry(
492
549
  )
493
550
 
494
551
  @retry_decorator
495
- def _conversion_with_retry(**kwargs: Any) -> Any:
552
+ def _converse_with_retry(**kwargs: Any) -> Any:
496
553
  if stream:
497
554
  return client.converse_stream(**kwargs)
498
- return client.converse(**kwargs)
555
+ else:
556
+ return client.converse(**kwargs)
499
557
 
500
- return _conversion_with_retry(**converse_kwargs)
558
+ return _converse_with_retry(**converse_kwargs)
501
559
 
502
560
 
503
561
  async def converse_with_retry_async(
@@ -506,7 +564,7 @@ async def converse_with_retry_async(
506
564
  model: str,
507
565
  messages: Sequence[Dict[str, Any]],
508
566
  max_retries: int = 3,
509
- system_prompt: Optional[str] = None,
567
+ system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
510
568
  system_prompt_caching: bool = False,
511
569
  tool_caching: bool = False,
512
570
  max_tokens: int = 1000,
@@ -528,11 +586,22 @@ async def converse_with_retry_async(
528
586
  "temperature": temperature,
529
587
  },
530
588
  }
589
+
531
590
  if system_prompt:
532
- system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
533
- if system_prompt_caching:
591
+ if isinstance(system_prompt, str):
592
+ # if the system prompt is a simple text (for retro compatibility)
593
+ system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
594
+ else:
595
+ system_messages: list[dict[str, Any]] = system_prompt
596
+ if (
597
+ system_prompt_caching
598
+ and len(system_messages) > 0
599
+ and system_messages[-1].get("cachePoint", None) is None
600
+ ):
601
+ # "Adding cache point to system prompt if not present"
534
602
  system_messages.append({"cachePoint": {"type": "default"}})
535
603
  converse_kwargs["system"] = system_messages
604
+
536
605
  if tool_config := kwargs.get("tools"):
537
606
  converse_kwargs["toolConfig"] = tool_config
538
607
  if tool_caching and "tools" in converse_kwargs["toolConfig"]:
@@ -29,15 +29,15 @@ dev = [
29
29
 
30
30
  [project]
31
31
  name = "llama-index-llms-bedrock-converse"
32
- version = "0.9.3"
32
+ version = "0.9.4"
33
33
  description = "llama-index llms bedrock converse integration"
34
34
  authors = [{name = "Your Name", email = "you@example.com"}]
35
35
  requires-python = ">=3.9,<4.0"
36
36
  readme = "README.md"
37
37
  license = "MIT"
38
38
  dependencies = [
39
- "boto3>=1.34.122,<2",
40
- "aioboto3>=13.1.1,<16",
39
+ "boto3>=1.38.27,<2",
40
+ "aioboto3>=15.0.0,<16",
41
41
  "llama-index-core>=0.13.0,<0.15",
42
42
  ]
43
43