PyPI - llama-index-llms-bedrock-converse - Versions diffs - 0.9.3__tar.gz → 0.9.4__tar.gz - Mend

llama-index-llms-bedrock-converse 0.9.3tar.gz → 0.9.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

llama_index_llms_bedrock_converse-0.9.3/README.md → llama_index_llms_bedrock_converse-0.9.4/PKG-INFO RENAMED Viewed

@@ -1,3 +1,16 @@
+Metadata-Version: 2.4
+Name: llama-index-llms-bedrock-converse
+Version: 0.9.4
+Summary: llama-index llms bedrock converse integration
+Author-email: Your Name <you@example.com>
+License-Expression: MIT
+License-File: LICENSE
+Requires-Python: <4.0,>=3.9
+Requires-Dist: aioboto3<16,>=15.0.0
+Requires-Dist: boto3<2,>=1.38.27
+Requires-Dist: llama-index-core<0.15,>=0.13.0
+Description-Content-Type: text/markdown
 # LlamaIndex Llms Integration: Bedrock Converse
 ### Installation
@@ -207,6 +220,55 @@ resp = await llm.acomplete("Paul Graham is ")
 print(resp)
 ```
+### Prompt Caching System and regular messages
+You can cache normal and system messages by placing cache points strategically:
+```py
+from llama_index.core.llms import ChatMessage
+from llama_index.core.base.llms.types import (
+    TextBlock,
+    CacheControl,
+    CachePoint,
+    MessageRole,
+)
+# Cache expensive context but keep dynamic instructions uncached
+cached_context = (
+    """[Large context about company policies, knowledge base, etc...]"""
+)
+dynamic_instructions = (
+    "Today's date is 2024-01-15. Focus on recent developments."
+)
+document_text = "[Long document]"
+messages = [
+    ChatMessage(
+        role=MessageRole.SYSTEM,
+        blocks=[
+            TextBlock(text=cached_context),
+            CachePoint(cache_control=CacheControl(type="default")),
+            TextBlock(text=dynamic_instructions),
+        ],
+    ),
+    ChatMessage(
+        role=MessageRole.USER,
+        blocks=[
+            TextBlock(
+                text=f"{document_text}",
+                type="text",
+            ),
+            CachePoint(cache_control=CacheControl(type="default")),
+            TextBlock(
+                text="What's our current policy on remote work?",
+                type="text",
+            ),
+        ],
+    ),
+]
+response = llm.chat(messages)
+```
 ### LLM Implementation example
 https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/

llama_index_llms_bedrock_converse-0.9.3/PKG-INFO → llama_index_llms_bedrock_converse-0.9.4/README.md RENAMED Viewed

@@ -1,16 +1,3 @@
-Metadata-Version: 2.4
-Name: llama-index-llms-bedrock-converse
-Version: 0.9.3
-Summary: llama-index llms bedrock converse integration
-Author-email: Your Name <you@example.com>
-License-Expression: MIT
-License-File: LICENSE
-Requires-Python: <4.0,>=3.9
-Requires-Dist: aioboto3<16,>=13.1.1
-Requires-Dist: boto3<2,>=1.34.122
-Requires-Dist: llama-index-core<0.15,>=0.13.0
-Description-Content-Type: text/markdown
 # LlamaIndex Llms Integration: Bedrock Converse
 ### Installation
@@ -220,6 +207,55 @@ resp = await llm.acomplete("Paul Graham is ")
 print(resp)
 ```
+### Prompt Caching System and regular messages
+You can cache normal and system messages by placing cache points strategically:
+```py
+from llama_index.core.llms import ChatMessage
+from llama_index.core.base.llms.types import (
+    TextBlock,
+    CacheControl,
+    CachePoint,
+    MessageRole,
+)
+# Cache expensive context but keep dynamic instructions uncached
+cached_context = (
+    """[Large context about company policies, knowledge base, etc...]"""
+)
+dynamic_instructions = (
+    "Today's date is 2024-01-15. Focus on recent developments."
+)
+document_text = "[Long document]"
+messages = [
+    ChatMessage(
+        role=MessageRole.SYSTEM,
+        blocks=[
+            TextBlock(text=cached_context),
+            CachePoint(cache_control=CacheControl(type="default")),
+            TextBlock(text=dynamic_instructions),
+        ],
+    ),
+    ChatMessage(
+        role=MessageRole.USER,
+        blocks=[
+            TextBlock(
+                text=f"{document_text}",
+                type="text",
+            ),
+            CachePoint(cache_control=CacheControl(type="default")),
+            TextBlock(
+                text="What's our current policy on remote work?",
+                type="text",
+            ),
+        ],
+    ),
+]
+response = llm.chat(messages)
+```
 ### LLM Implementation example
 https://docs.llamaindex.ai/en/stable/examples/llm/bedrock_converse/

{llama_index_llms_bedrock_converse-0.9.3 → llama_index_llms_bedrock_converse-0.9.4}/llama_index/llms/bedrock_converse/base.py RENAMED Viewed

@@ -366,7 +366,9 @@ class BedrockConverse(FunctionCallingLLM):
     @llm_chat_callback()
     def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
         # convert Llama Index messages to AWS Bedrock Converse messages
-        converse_messages, system_prompt = messages_to_converse_messages(messages)
+        converse_messages, system_prompt = messages_to_converse_messages(
+            messages, self.model
+        )
         all_kwargs = self._get_all_kwargs(**kwargs)
         # invoke LLM in AWS Bedrock Converse with retry
@@ -414,7 +416,9 @@ class BedrockConverse(FunctionCallingLLM):
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseGen:
         # convert Llama Index messages to AWS Bedrock Converse messages
-        converse_messages, system_prompt = messages_to_converse_messages(messages)
+        converse_messages, system_prompt = messages_to_converse_messages(
+            messages, self.model
+        )
         all_kwargs = self._get_all_kwargs(**kwargs)
         # invoke LLM in AWS Bedrock Converse with retry
@@ -551,7 +555,9 @@ class BedrockConverse(FunctionCallingLLM):
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponse:
         # convert Llama Index messages to AWS Bedrock Converse messages
-        converse_messages, system_prompt = messages_to_converse_messages(messages)
+        converse_messages, system_prompt = messages_to_converse_messages(
+            messages, self.model
+        )
         all_kwargs = self._get_all_kwargs(**kwargs)
         # invoke LLM in AWS Bedrock Converse with retry
@@ -601,7 +607,9 @@ class BedrockConverse(FunctionCallingLLM):
         self, messages: Sequence[ChatMessage], **kwargs: Any
     ) -> ChatResponseAsyncGen:
         # convert Llama Index messages to AWS Bedrock Converse messages
-        converse_messages, system_prompt = messages_to_converse_messages(messages)
+        converse_messages, system_prompt = messages_to_converse_messages(
+            messages, self.model
+        )
         all_kwargs = self._get_all_kwargs(**kwargs)
         # invoke LLM in AWS Bedrock Converse with retry
@@ -840,8 +848,11 @@ class BedrockConverse(FunctionCallingLLM):
             return {}
         # Convert Bedrock's token count format to match OpenAI's format
+        # Cache token formats respecting Anthropic format
         return {
             "prompt_tokens": usage.get("inputTokens", 0),
             "completion_tokens": usage.get("outputTokens", 0),
             "total_tokens": usage.get("totalTokens", 0),
+            "cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
+            "cache_creation_input_tokens": usage.get("cacheWriteInputTokens", 0),
         }

{llama_index_llms_bedrock_converse-0.9.3 → llama_index_llms_bedrock_converse-0.9.4}/llama_index/llms/bedrock_converse/utils.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import base64
 import json
 import logging
-from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 from tenacity import (
     before_sleep_log,
     retry,
@@ -135,6 +135,18 @@ BEDROCK_INFERENCE_PROFILE_SUPPORTED_MODELS = (
     "meta.llama4-scout-17b-instruct-v1:0",
     "deepseek.r1-v1:0",
 )
+BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS = (
+    "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    "anthropic.claude-3-5-haiku-20241022-v1:0",
+    "anthropic.claude-3-7-sonnet-20250219-v1:0",
+    "anthropic.claude-opus-4-20250514-v1:0",
+    "anthropic.claude-sonnet-4-20250514-v1:0",
+    "anthropic.claude-opus-4-1-20250805-v1:0",
+    "amazon.nova-premier-v1:0",
+    "amazon.nova-pro-v1:0",
+    "amazon.nova-lite-v1:0",
+    "amazon.nova-micro-v1:0",
+)
 def get_model_name(model_name: str) -> str:
@@ -163,6 +175,10 @@ def is_bedrock_function_calling_model(model_name: str) -> bool:
     return get_model_name(model_name) in BEDROCK_FUNCTION_CALLING_MODELS
+def is_bedrock_prompt_caching_supported_model(model_name: str) -> bool:
+    return get_model_name(model_name) in BEDROCK_PROMPT_CACHING_SUPPORTED_MODELS
 def bedrock_modelname_to_context_size(model_name: str) -> int:
     translated_model_name = get_model_name(model_name)
@@ -258,12 +274,14 @@ def __get_img_format_from_image_mimetype(image_mimetype: str) -> str:
 def messages_to_converse_messages(
     messages: Sequence[ChatMessage],
-) -> Tuple[Sequence[Dict[str, Any]], str]:
+    model: Optional[str] = None,
+) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]:
     """
     Converts a list of generic ChatMessages to AWS Bedrock Converse messages.
     Args:
         messages: List of ChatMessages
+        model: optional  model name used to omit cache point if the model does not support it
     Returns:
         Tuple of:
@@ -272,10 +290,40 @@ def messages_to_converse_messages(
     """
     converse_messages = []
-    system_prompt = ""
+    system_prompt = []
+    current_system_prompt = ""
     for message in messages:
-        if message.role == MessageRole.SYSTEM and message.content:
-            system_prompt += (message.content) + "\n"
+        if message.role == MessageRole.SYSTEM:
+            # we iterate over blocks, if content was used, the blocks are added anyway
+            for block in message.blocks:
+                if isinstance(block, TextBlock):
+                    if block.text:  # Only add non-empty text
+                        current_system_prompt += block.text + "\n"
+                elif isinstance(block, CachePoint):
+                    # when we find a cache point we push the current system prompt as a message
+                    if current_system_prompt != "":
+                        system_prompt.append({"text": current_system_prompt.strip()})
+                        current_system_prompt = ""
+                    # we add the cache point
+                    if (
+                        model is None
+                        or model is not None
+                        and is_bedrock_prompt_caching_supported_model(model)
+                    ):
+                        if block.cache_control.type != "default":
+                            logger.warning(
+                                "The only allowed caching strategy for Bedrock Converse is 'default', falling back to that..."
+                            )
+                            block.cache_control.type = "default"
+                        system_prompt.append(
+                            {"cachePoint": {"type": block.cache_control.type}}
+                        )
+                    else:
+                        logger.warning(
+                            f"Model {model} does not support prompt caching, cache point will be ignored..."
+                        )
         elif message.role in [MessageRole.FUNCTION, MessageRole.TOOL]:
             # convert tool output to the AWS Bedrock Converse format
             content = {
@@ -343,8 +391,9 @@ def messages_to_converse_messages(
                     "content": content,
                 }
             )
-    return __merge_common_role_msgs(converse_messages), system_prompt.strip()
+    if current_system_prompt != "":
+        system_prompt.append({"text": current_system_prompt.strip()})
+    return __merge_common_role_msgs(converse_messages), system_prompt
 def tools_to_converse_tools(
@@ -445,7 +494,7 @@ def converse_with_retry(
     model: str,
     messages: Sequence[Dict[str, Any]],
     max_retries: int = 3,
-    system_prompt: Optional[str] = None,
+    system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
     system_prompt_caching: bool = False,
     tool_caching: bool = False,
     max_tokens: int = 1000,
@@ -467,11 +516,19 @@ def converse_with_retry(
         },
     }
     if system_prompt:
-        system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
-        if system_prompt_caching:
+        if isinstance(system_prompt, str):
+            # if the system prompt is a simple text (for retro compatibility)
+            system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
+        else:
+            system_messages: list[dict[str, Any]] = system_prompt
+        if (
+            system_prompt_caching
+            and len(system_messages) > 0
+            and system_messages[-1].get("cachePoint", None) is None
+        ):
+            # "Adding cache point to system prompt if not present"
             system_messages.append({"cachePoint": {"type": "default"}})
         converse_kwargs["system"] = system_messages
     if tool_config := kwargs.get("tools"):
         converse_kwargs["toolConfig"] = tool_config
@@ -492,12 +549,13 @@ def converse_with_retry(
     )
     @retry_decorator
-    def _conversion_with_retry(**kwargs: Any) -> Any:
+    def _converse_with_retry(**kwargs: Any) -> Any:
         if stream:
             return client.converse_stream(**kwargs)
-        return client.converse(**kwargs)
+        else:
+            return client.converse(**kwargs)
-    return _conversion_with_retry(**converse_kwargs)
+    return _converse_with_retry(**converse_kwargs)
 async def converse_with_retry_async(
@@ -506,7 +564,7 @@ async def converse_with_retry_async(
     model: str,
     messages: Sequence[Dict[str, Any]],
     max_retries: int = 3,
-    system_prompt: Optional[str] = None,
+    system_prompt: Optional[Union[str, Sequence[Dict[str, Any]]]] = None,
     system_prompt_caching: bool = False,
     tool_caching: bool = False,
     max_tokens: int = 1000,
@@ -528,11 +586,22 @@ async def converse_with_retry_async(
             "temperature": temperature,
         },
     }
     if system_prompt:
-        system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
-        if system_prompt_caching:
+        if isinstance(system_prompt, str):
+            # if the system prompt is a simple text (for retro compatibility)
+            system_messages: list[dict[str, Any]] = [{"text": system_prompt}]
+        else:
+            system_messages: list[dict[str, Any]] = system_prompt
+        if (
+            system_prompt_caching
+            and len(system_messages) > 0
+            and system_messages[-1].get("cachePoint", None) is None
+        ):
+            # "Adding cache point to system prompt if not present"
             system_messages.append({"cachePoint": {"type": "default"}})
         converse_kwargs["system"] = system_messages
     if tool_config := kwargs.get("tools"):
         converse_kwargs["toolConfig"] = tool_config
         if tool_caching and "tools" in converse_kwargs["toolConfig"]:

{llama_index_llms_bedrock_converse-0.9.3 → llama_index_llms_bedrock_converse-0.9.4}/pyproject.toml RENAMED Viewed

@@ -29,15 +29,15 @@ dev = [
 [project]
 name = "llama-index-llms-bedrock-converse"
-version = "0.9.3"
+version = "0.9.4"
 description = "llama-index llms bedrock converse integration"
 authors = [{name = "Your Name", email = "you@example.com"}]
 requires-python = ">=3.9,<4.0"
 readme = "README.md"
 license = "MIT"
 dependencies = [
-    "boto3>=1.34.122,<2",
-    "aioboto3>=13.1.1,<16",
+    "boto3>=1.38.27,<2",
+    "aioboto3>=15.0.0,<16",
     "llama-index-core>=0.13.0,<0.15",
 ]

{llama_index_llms_bedrock_converse-0.9.3 → llama_index_llms_bedrock_converse-0.9.4}/.gitignore RENAMED Viewed

File without changes

{llama_index_llms_bedrock_converse-0.9.3 → llama_index_llms_bedrock_converse-0.9.4}/LICENSE RENAMED Viewed

File without changes

{llama_index_llms_bedrock_converse-0.9.3 → llama_index_llms_bedrock_converse-0.9.4}/llama_index/llms/bedrock_converse/__init__.py RENAMED Viewed

File without changes

llama-index-llms-bedrock-converse 0.9.3__tar.gz → 0.9.4__tar.gz

llama-index-llms-bedrock-converse 0.9.3tar.gz → 0.9.4tar.gz