PyPI - amazon-bedrock-haystack - Versions diffs - 3.9.1__py3-none-any.whl → 3.11.0__py3-none-any.whl - Mend

amazon-bedrock-haystack 3.9.1py3-none-any.whl → 3.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{amazon_bedrock_haystack-3.9.1.dist-info → amazon_bedrock_haystack-3.11.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: amazon-bedrock-haystack
-Version: 3.9.1
+Version: 3.11.0
 Summary: An integration of Amazon Bedrock as an AmazonBedrockGenerator component.
 Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
 Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -29,43 +29,19 @@ Description-Content-Type: text/markdown
 [![PyPI - Version](https://img.shields.io/pypi/v/amazon-bedrock-haystack.svg)](https://pypi.org/project/amazon-bedrock-haystack)
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/amazon-bedrock-haystack.svg)](https://pypi.org/project/amazon-bedrock-haystack)
+- [Integration page](https://haystack.deepset.ai/integrations/amazon-bedrock)
+- [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/amazon_bedrock/CHANGELOG.md)
 -----
-**Table of Contents**
-- [Installation](#installation)
-- [Contributing](#contributing)
-- [License](#license)
-## Installation
-```console
-pip install amazon-bedrock-haystack
-```
 ## Contributing
-`hatch` is the best way to interact with this project, to install it:
-```sh
-pip install hatch
-```
-With `hatch` installed, to run all the tests:
-```
-hatch run test:all
-```
+Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
-To format your code and perform linting using Ruff (with automatic fixes), run:
+To run integration tests locally, you need to authenticate with AWS.
+For example, you can do that by exporting the following environment variables:
 ```
-hatch run fmt
+export AWS_ACCESS_KEY_ID=...
+export AWS_SECRET_ACCESS_KEY=...
+export AWS_SESSION_TOKEN=...
+export AWS_DEFAULT_REGION=...
 ```
-To check for static type errors, run:
-```console
-$ hatch run test:types
-```
-## License
-`amazon-bedrock-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license.

{amazon_bedrock_haystack-3.9.1.dist-info → amazon_bedrock_haystack-3.11.0.dist-info}/RECORD RENAMED Viewed

@@ -3,20 +3,21 @@ haystack_integrations/common/amazon_bedrock/__init__.py,sha256=6GZ8Y3Lw0rLOsOAqi
 haystack_integrations/common/amazon_bedrock/errors.py,sha256=ReheDbY7L3EJkWcUoih6lWHjbPHg2TlUs9SnXIKK7Gg,744
 haystack_integrations/common/amazon_bedrock/utils.py,sha256=ASAwEhInF9F6rhL4CbXFQUFU1pSdscWvG6jcrXkEUhc,2735
 haystack_integrations/components/embedders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-haystack_integrations/components/embedders/amazon_bedrock/__init__.py,sha256=CFqYmAVq2aavlMkZHYScKHOTwwETdRzRZITMqGhJ9Kw,298
-haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py,sha256=YBVlFIo9t2qzVkNWaFKc-FNRo7R_pKfHmqNRkoMZ9K0,12952
-haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py,sha256=KNvsUP-YZD17_zVBwMs42v0S2uuTE_ajMaj9bjt1XlE,9036
+haystack_integrations/components/embedders/amazon_bedrock/__init__.py,sha256=7GlhHJ4jFHCxq5QN5losGuGtrGNjvEx2dSQvEYD2yG0,408
+haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py,sha256=DD34-HAGwGwTU7KWGqKXXlFdwIs21JavBRDHrBqC-m4,13060
+haystack_integrations/components/embedders/amazon_bedrock/document_image_embedder.py,sha256=CHNH0Dt7JQqYNbZi1lKsGvarnEhJn3UNGdghF0IhqWw,16163
+haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py,sha256=3eSqt3XpH2thblTeOPf-ej1V2UbdG2z50d3jInq1bYc,9144
 haystack_integrations/components/generators/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 haystack_integrations/components/generators/amazon_bedrock/__init__.py,sha256=lv4NouIVm78YavUssWQrHHP_81u-7j21qW8v1kZMJPQ,284
 haystack_integrations/components/generators/amazon_bedrock/adapters.py,sha256=yBC-3YwV6qAwSXMtdZiLSYh2lUpPQIDy7Efl7w-Cu-k,19640
-haystack_integrations/components/generators/amazon_bedrock/generator.py,sha256=c_saV5zxFYQVJT0Hzo80lKty46itL0Dp31VuDueYa3M,14716
+haystack_integrations/components/generators/amazon_bedrock/generator.py,sha256=Brzw0XvtPJhz2kR2I3liAqWHRmDR6p5HzJerEAPhoJU,14743
 haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py,sha256=6GZ8Y3Lw0rLOsOAqi6Tu5mZC977UzQvgDxKpOWr8IQw,110
-haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py,sha256=iIaMsOOX9eYvR1GNgpxNKxaOli91ShrCv3MuBBK1NSs,24743
-haystack_integrations/components/generators/amazon_bedrock/chat/utils.py,sha256=bDNaExYhrhxLHyOdu6EHC8Ixdpg43IIPJldjddzV4GE,23236
+haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py,sha256=_0dpBoZGY9kgK9zQOTskcjElcTifwhyBAixXDliK-vY,24918
+haystack_integrations/components/generators/amazon_bedrock/chat/utils.py,sha256=g2SZV8LdLobaCZpwWCreBJn1BtS1V3-wQkpisStJrcY,29015
 haystack_integrations/components/rankers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 haystack_integrations/components/rankers/amazon_bedrock/__init__.py,sha256=Zrc3BSVkEaXYpliEi6hKG9bqW4J7DNk93p50SuoyT1Q,107
 haystack_integrations/components/rankers/amazon_bedrock/ranker.py,sha256=enAjf2QyDwfpidKkFCdLz954cx-Tjh9emrOS3vINJDg,12344
-amazon_bedrock_haystack-3.9.1.dist-info/METADATA,sha256=wByKDtTt_NpNsmtNh9t3-8izh0dKl04569OfCt6xR3w,2287
-amazon_bedrock_haystack-3.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-amazon_bedrock_haystack-3.9.1.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
-amazon_bedrock_haystack-3.9.1.dist-info/RECORD,,
+amazon_bedrock_haystack-3.11.0.dist-info/METADATA,sha256=5nA_v2Ze5xk1p-RQxbshQ0XGa3LYFljVGvNi2VvKU7o,2225
+amazon_bedrock_haystack-3.11.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+amazon_bedrock_haystack-3.11.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
+amazon_bedrock_haystack-3.11.0.dist-info/RECORD,,

haystack_integrations/components/embedders/amazon_bedrock/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 from .document_embedder import AmazonBedrockDocumentEmbedder
+from .document_image_embedder import AmazonBedrockDocumentImageEmbedder
 from .text_embedder import AmazonBedrockTextEmbedder
-__all__ = ["AmazonBedrockDocumentEmbedder", "AmazonBedrockTextEmbedder"]
+__all__ = ["AmazonBedrockDocumentEmbedder", "AmazonBedrockDocumentImageEmbedder", "AmazonBedrockTextEmbedder"]

haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py CHANGED Viewed

@@ -21,6 +21,7 @@ SUPPORTED_EMBEDDING_MODELS = [
     "cohere.embed-english-v3",
     "cohere.embed-multilingual-v3",
     "amazon.titan-embed-text-v2:0",
+    "amazon.titan-embed-image-v1",
 ]
@@ -38,7 +39,7 @@ class AmazonBedrockDocumentEmbedder:
     os.environ["AWS_ACCESS_KEY_ID"] = "..."
     os.environ["AWS_SECRET_ACCESS_KEY_ID"] = "..."
-    os.environ["AWS_REGION_NAME"] = "..."
+    os.environ["AWS_DEFAULT_REGION"] = "..."
     embedder = AmazonBedrockDocumentEmbedder(
         model="cohere.embed-english-v3",
@@ -61,6 +62,7 @@ class AmazonBedrockDocumentEmbedder:
             "cohere.embed-english-v3",
             "cohere.embed-multilingual-v3",
             "amazon.titan-embed-text-v2:0",
+            "amazon.titan-embed-image-v1",
         ],
         aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False),  # noqa: B008
         aws_secret_access_key: Optional[Secret] = Secret.from_env_var(  # noqa: B008
@@ -136,9 +138,9 @@ class AmazonBedrockDocumentEmbedder:
                 aws_region_name=resolve_secret(aws_region_name),
                 aws_profile_name=resolve_secret(aws_profile_name),
             )
-            config: Optional[Config] = None
-            if self.boto3_config:
-                config = Config(**self.boto3_config)
+            config = Config(
+                user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
+            )
             self._client = session.client("bedrock-runtime", config=config)
         except Exception as exception:
             msg = (

haystack_integrations/components/embedders/amazon_bedrock/document_image_embedder.py ADDED Viewed

@@ -0,0 +1,365 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+import json
+from dataclasses import replace
+from typing import Any, Dict, List, Literal, Optional, Tuple
+from botocore.config import Config
+from botocore.exceptions import ClientError
+from haystack import Document, component, default_from_dict, default_to_dict, logging
+from haystack.components.converters.image.image_utils import (
+    _batch_convert_pdf_pages_to_images,
+    _encode_image_to_base64,
+    _extract_image_sources_info,
+    _PDFPageInfo,
+)
+from haystack.dataclasses import ByteStream
+from haystack.utils.auth import Secret, deserialize_secrets_inplace
+from tqdm import tqdm
+from haystack_integrations.common.amazon_bedrock.errors import (
+    AmazonBedrockConfigurationError,
+    AmazonBedrockInferenceError,
+)
+from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
+logger = logging.getLogger(__name__)
+SUPPORTED_EMBEDDING_MODELS = ["amazon.titan-embed-image-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"]
+@component
+class AmazonBedrockDocumentImageEmbedder:
+    """
+    A component for computing Document embeddings based on images using Amazon Bedrock models.
+    The embedding of each Document is stored in the `embedding` field of the Document.
+    ### Usage example
+    ```python
+    from haystack import Document
+    rom haystack_integrations.components.embedders.amazon_bedrock import AmazonBedrockDocumentImageEmbedder
+    os.environ["AWS_ACCESS_KEY_ID"] = "..."
+    os.environ["AWS_SECRET_ACCESS_KEY_ID"] = "..."
+    os.environ["AWS_DEFAULT_REGION"] = "..."
+    embedder = AmazonBedrockDocumentImageEmbedder(model="amazon.titan-embed-image-v1")
+    documents = [
+        Document(content="A photo of a cat", meta={"file_path": "cat.jpg"}),
+        Document(content="A photo of a dog", meta={"file_path": "dog.jpg"}),
+    ]
+    result = embedder.run(documents=documents)
+    documents_with_embeddings = result["documents"]
+    print(documents_with_embeddings)
+    # [Document(id=...,
+    #           content='A photo of a cat',
+    #           meta={'file_path': 'cat.jpg',
+    #                 'embedding_source': {'type': 'image', 'file_path_meta_field': 'file_path'}},
+    #           embedding=vector of size 512),
+    #  ...]
+    ```
+    """
+    def __init__(
+        self,
+        *,
+        model: Literal["amazon.titan-embed-image-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"],
+        aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False),  # noqa: B008
+        aws_secret_access_key: Optional[Secret] = Secret.from_env_var(  # noqa: B008
+            "AWS_SECRET_ACCESS_KEY", strict=False
+        ),
+        aws_session_token: Optional[Secret] = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False),  # noqa: B008
+        aws_region_name: Optional[Secret] = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False),  # noqa: B008
+        aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False),  # noqa: B008
+        file_path_meta_field: str = "file_path",
+        root_path: Optional[str] = None,
+        image_size: Optional[Tuple[int, int]] = None,
+        progress_bar: bool = True,
+        boto3_config: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Creates a AmazonBedrockDocumentImageEmbedder component.
+        :param model:
+            The Bedrock model to use for calculating embeddings. Pass a valid model ID.
+            Supported models:
+            - "amazon.titan-embed-image-v1"
+            - "cohere.embed-english-v3"
+            - "cohere.embed-multilingual-v3"
+        :param aws_access_key_id: AWS access key ID.
+        :param aws_secret_access_key: AWS secret access key.
+        :param aws_session_token: AWS session token.
+        :param aws_region_name: AWS region name.
+        :param aws_profile_name: AWS profile name.
+        :param file_path_meta_field: The metadata field in the Document that contains the file path to the image or PDF.
+        :param root_path: The root directory path where document files are located. If provided, file paths in
+            document metadata will be resolved relative to this path. If None, file paths are treated as absolute paths.
+        :param image_size:
+            If provided, resizes the image to fit within the specified dimensions (width, height) while
+            maintaining aspect ratio. This reduces file size, memory usage, and processing time, which is beneficial
+            when working with models that have resolution constraints or when transmitting images to remote services.
+        :param progress_bar:
+            If `True`, shows a progress bar when embedding documents.
+        :param boto3_config: The configuration for the boto3 client.
+        :param kwargs: Additional parameters to pass for model inference.
+            For example, `embeddingConfig` for Amazon Titan models and
+            `embedding_types` for Cohere models.
+        :raises ValueError: If the model is not supported.
+        :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly.
+        """
+        if not model or model not in SUPPORTED_EMBEDDING_MODELS:
+            msg = "Please provide a valid model from the list of supported models: " + ", ".join(
+                SUPPORTED_EMBEDDING_MODELS
+            )
+            raise ValueError(msg)
+        self.file_path_meta_field = file_path_meta_field
+        self.root_path = root_path or ""
+        self.model = model
+        self.boto3_config = boto3_config
+        self.aws_access_key_id = aws_access_key_id
+        self.aws_secret_access_key = aws_secret_access_key
+        self.aws_session_token = aws_session_token
+        self.aws_region_name = aws_region_name
+        self.aws_profile_name = aws_profile_name
+        self.image_size = image_size
+        self.progress_bar = progress_bar
+        self.kwargs = kwargs
+        self.embedding_types = None
+        if emmbedding_types := self.kwargs.get("embedding_types"):
+            if len(emmbedding_types) > 1:
+                msg = (
+                    "You have provided multiple embedding_types for Cohere model. "
+                    "AmazonBedrockDocumentImageEmbedder only supports one embedding_type at a time."
+                )
+                raise ValueError(msg)
+            self.embedding_types = emmbedding_types
+        def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
+            return secret.resolve_value() if secret else None
+        try:
+            session = get_aws_session(
+                aws_access_key_id=resolve_secret(aws_access_key_id),
+                aws_secret_access_key=resolve_secret(aws_secret_access_key),
+                aws_session_token=resolve_secret(aws_session_token),
+                aws_region_name=resolve_secret(aws_region_name),
+                aws_profile_name=resolve_secret(aws_profile_name),
+            )
+            config = Config(
+                user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
+            )
+            self._client = session.client("bedrock-runtime", config=config)
+        except Exception as exception:
+            msg = (
+                "Could not connect to Amazon Bedrock. Make sure the AWS environment is configured correctly. "
+                "See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration"
+            )
+            raise AmazonBedrockConfigurationError(msg) from exception
+    def to_dict(self) -> dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+        :returns:
+            Dictionary with serialized data.
+        """
+        serialization_dict = default_to_dict(
+            self,
+            file_path_meta_field=self.file_path_meta_field,
+            root_path=self.root_path,
+            model=self.model,
+            aws_access_key_id=self.aws_access_key_id.to_dict() if self.aws_access_key_id else None,
+            aws_secret_access_key=self.aws_secret_access_key.to_dict() if self.aws_secret_access_key else None,
+            aws_session_token=self.aws_session_token.to_dict() if self.aws_session_token else None,
+            aws_region_name=self.aws_region_name.to_dict() if self.aws_region_name else None,
+            aws_profile_name=self.aws_profile_name.to_dict() if self.aws_profile_name else None,
+            progress_bar=self.progress_bar,
+            boto3_config=self.boto3_config,
+            image_size=self.image_size,
+            **self.kwargs,
+        )
+        return serialization_dict
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "AmazonBedrockDocumentImageEmbedder":
+        """
+        Deserializes the component from a dictionary.
+        :param data:
+            Dictionary to deserialize from.
+        :returns:
+            Deserialized component.
+        """
+        init_params = data["init_parameters"]
+        deserialize_secrets_inplace(
+            init_params,
+            keys=[
+                "aws_access_key_id",
+                "aws_secret_access_key",
+                "aws_session_token",
+                "aws_region_name",
+                "aws_profile_name",
+            ],
+        )
+        return default_from_dict(cls, data)
+    @component.output_types(documents=list[Document])
+    def run(self, documents: list[Document]) -> dict[str, list[Document]]:
+        """
+        Embed a list of images.
+        :param documents:
+            Documents to embed.
+        :returns:
+            A dictionary with the following keys:
+            - `documents`: Documents with embeddings.
+        """
+        if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)):
+            msg = (
+                "AmazonBedrockDocumentImageEmbedder expects a list of Documents as input. "
+                "In case you want to embed a string, please use the AmazonBedrockTextEmbedder."
+            )
+            raise TypeError(msg)
+        images_source_info = _extract_image_sources_info(
+            documents=documents, file_path_meta_field=self.file_path_meta_field, root_path=self.root_path
+        )
+        images_to_embed: list = [None] * len(documents)
+        pdf_page_infos: list[_PDFPageInfo] = []
+        for doc_idx, image_source_info in enumerate(images_source_info):
+            if image_source_info["mime_type"] == "application/pdf":
+                # Store PDF documents for later processing
+                page_number = image_source_info.get("page_number")
+                pdf_page_info: _PDFPageInfo = {
+                    "doc_idx": doc_idx,
+                    "path": image_source_info["path"],
+                    # page_number is added but mypy doesn't know that
+                    "page_number": page_number,  # type: ignore[typeddict-item]
+                }
+                pdf_page_infos.append(pdf_page_info)
+            else:
+                # Process images directly
+                image_byte_stream = ByteStream.from_file_path(
+                    filepath=image_source_info["path"], mime_type=image_source_info["mime_type"]
+                )
+                mime_type, base64_image = _encode_image_to_base64(bytestream=image_byte_stream, size=self.image_size)
+                if "cohere" in self.model:
+                    images_to_embed[doc_idx] = f"data:{mime_type};base64,{base64_image}"
+                else:
+                    images_to_embed[doc_idx] = base64_image
+        pdf_images_by_doc_idx = _batch_convert_pdf_pages_to_images(
+            pdf_page_infos=pdf_page_infos, return_base64=True, size=self.image_size
+        )
+        # the pdf_images_by_doc_idx has base64 images but mypy cant detect that
+        for doc_idx, base64_image in pdf_images_by_doc_idx.items():  # type: ignore[assignment]
+            pdf_image_uri = f"data:application/pdf;base64,{base64_image}" if "cohere" in self.model else base64_image
+            images_to_embed[doc_idx] = pdf_image_uri
+        none_images_doc_ids = [documents[doc_idx].id for doc_idx, image in enumerate(images_to_embed) if image is None]
+        if none_images_doc_ids:
+            msg = f"Conversion failed for some documents. Document IDs: {none_images_doc_ids}."
+            raise RuntimeError(msg)
+        if "cohere" in self.model:
+            embeddings = self._embed_cohere(image_uris=images_to_embed)
+        elif "titan" in self.model:
+            embeddings = self._embed_titan(images=images_to_embed)
+        else:
+            msg = f"Model {self.model} is not supported. Supported models are: {', '.join(SUPPORTED_EMBEDDING_MODELS)}."
+            raise ValueError(msg)
+        docs_with_embeddings = []
+        for doc, emb in zip(documents, embeddings):
+            # we store this information for later inspection
+            new_meta = {
+                **doc.meta,
+                "embedding_source": {"type": "image", "file_path_meta_field": self.file_path_meta_field},
+            }
+            new_doc = replace(doc, meta=new_meta, embedding=emb)
+            docs_with_embeddings.append(new_doc)
+        return {"documents": docs_with_embeddings}
+    def _embed_titan(self, images: List[str]) -> List[List[float]]:
+        """
+        Internal method to embed base64 images using Amazon Titan models.
+        :param images: List of base64 images.
+        :return: List of embeddings.
+        """
+        titan_body = {}
+        if embedding_config := self.kwargs.get("embeddingConfig"):
+            titan_body["embeddingConfig"] = embedding_config  # optional parameter for Amazon Titan models
+        all_embeddings = []
+        for image in tqdm(images, disable=not self.progress_bar, desc="Creating embeddings"):
+            body = {"inputImage": image, **titan_body}
+            try:
+                response = self._client.invoke_model(
+                    body=json.dumps(body), modelId=self.model, accept="*/*", contentType="application/json"
+                )
+            except ClientError as exception:
+                msg = f"Could not perform inference for Amazon Bedrock model {self.model} due to:\n{exception}"
+                raise AmazonBedrockInferenceError(msg) from exception
+            response_body = json.loads(response.get("body").read())
+            embedding = response_body["embedding"]
+            all_embeddings.append(embedding)
+        return all_embeddings
+    def _embed_cohere(self, image_uris: List[str]) -> List[List[float]]:
+        """
+        Internal method to embed base64 images using Cohere models.
+        :param image_uris: List of image uris containing the base64 image and the mime type.
+        :return: List of embeddings.
+        """
+        cohere_body = {"input_type": "image"}
+        if self.embedding_types:
+            cohere_body["embedding_types"] = self.embedding_types
+        all_embeddings = []
+        for image in tqdm(image_uris, disable=not self.progress_bar, desc="Creating embeddings"):
+            body = {"images": [image], **cohere_body}
+            try:
+                response = self._client.invoke_model(
+                    body=json.dumps(body), modelId=self.model, accept="*/*", contentType="application/json"
+                )
+            except ClientError as exception:
+                msg = f"Could not perform inference for Amazon Bedrock model {self.model} due to:\n{exception}"
+                raise AmazonBedrockInferenceError(msg) from exception
+            response_body = json.loads(response.get("body").read())
+            embeddings = response_body["embeddings"]
+            # if embedding_types is specified, cohere returns a dict with the embedding types as keys
+            if isinstance(embeddings, dict):
+                for embedding in embeddings.values():
+                    all_embeddings.append(embedding[0])
+            else:
+                # if embedding_types is not specified, cohere returns
+                # a nested list of float embeddings
+                all_embeddings.append(embeddings[0])
+        return all_embeddings

haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py CHANGED Viewed

@@ -19,6 +19,7 @@ SUPPORTED_EMBEDDING_MODELS = [
     "cohere.embed-english-v3",
     "cohere.embed-multilingual-v3",
     "amazon.titan-embed-text-v2:0",
+    "amazon.titan-embed-image-v1",
 ]
@@ -34,7 +35,7 @@ class AmazonBedrockTextEmbedder:
     os.environ["AWS_ACCESS_KEY_ID"] = "..."
     os.environ["AWS_SECRET_ACCESS_KEY_ID"] = "..."
-    os.environ["AWS_REGION_NAME"] = "..."
+    os.environ["AWS_DEFAULT_REGION"] = "..."
     embedder = AmazonBedrockTextEmbedder(
         model="cohere.embed-english-v3",
@@ -54,6 +55,7 @@ class AmazonBedrockTextEmbedder:
             "cohere.embed-english-v3",
             "cohere.embed-multilingual-v3",
             "amazon.titan-embed-text-v2:0",
+            "amazon.titan-embed-image-v1",
         ],
         aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False),  # noqa: B008
         aws_secret_access_key: Optional[Secret] = Secret.from_env_var(  # noqa: B008
@@ -114,9 +116,9 @@ class AmazonBedrockTextEmbedder:
                 aws_region_name=resolve_secret(aws_region_name),
                 aws_profile_name=resolve_secret(aws_profile_name),
             )
-            config: Optional[Config] = None
-            if self.boto3_config:
-                config = Config(**self.boto3_config)
+            config = Config(
+                user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
+            )
             self._client = session.client("bedrock-runtime", config=config)
         except Exception as exception:
             msg = (

haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py CHANGED Viewed

@@ -213,9 +213,9 @@ class AmazonBedrockChatGenerator:
         def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
             return secret.resolve_value() if secret else None
-        config: Optional[Config] = None
-        if self.boto3_config:
-            config = Config(**self.boto3_config)
+        config = Config(
+            user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
+        )
         try:
             # sync session
@@ -226,6 +226,7 @@ class AmazonBedrockChatGenerator:
                 aws_region_name=resolve_secret(aws_region_name),
                 aws_profile_name=resolve_secret(aws_profile_name),
             )
             self.client = session.client("bedrock-runtime", config=config)
         except Exception as exception:
@@ -498,7 +499,10 @@ class AmazonBedrockChatGenerator:
             session = self._get_async_session()
             # Note: https://aioboto3.readthedocs.io/en/latest/usage.html
             # we need to create a new client for each request
-            async with session.client("bedrock-runtime", config=self.boto3_config) as async_client:
+            config = Config(
+                user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
+            )
+            async with session.client("bedrock-runtime", config=config) as async_client:
                 if callback:
                     response = await async_client.converse_stream(**params)
                     response_stream: EventStream = response.get("stream")

haystack_integrations/components/generators/amazon_bedrock/chat/utils.py CHANGED Viewed

@@ -55,6 +55,11 @@ def _format_tool_call_message(tool_call_message: ChatMessage) -> Dict[str, Any]:
         Dictionary representing the tool call message in Bedrock's expected format
     """
     content: List[Dict[str, Any]] = []
+    # tool call messages can contain reasoning content
+    if reasoning_contents := tool_call_message.meta.get("reasoning_contents"):
+        content.extend(_format_reasoning_contents(reasoning_contents=reasoning_contents))
     # Tool call message can contain text
     if tool_call_message.text:
         content.append({"text": tool_call_message.text})
@@ -157,6 +162,24 @@ def _repair_tool_result_messages(bedrock_formatted_messages: List[Dict[str, Any]
     return [msg for _, msg in repaired_bedrock_formatted_messages]
+def _format_reasoning_contents(reasoning_contents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Format reasoning contents to match Bedrock's expected structure.
+    :param reasoning_contents: List of reasoning content dictionaries from Haystack ChatMessage metadata.
+    :returns: List of formatted reasoning content dictionaries for Bedrock.
+    """
+    formatted_contents = []
+    for reasoning_content in reasoning_contents:
+        formatted_content = {"reasoningContent": reasoning_content["reasoning_content"]}
+        if reasoning_text := formatted_content["reasoningContent"].pop("reasoning_text", None):
+            formatted_content["reasoningContent"]["reasoningText"] = reasoning_text
+        if redacted_content := formatted_content["reasoningContent"].pop("redacted_content", None):
+            formatted_content["reasoningContent"]["redactedContent"] = redacted_content
+        formatted_contents.append(formatted_content)
+    return formatted_contents
 def _format_text_image_message(message: ChatMessage) -> Dict[str, Any]:
     """
     Format a Haystack ChatMessage containing text and optional image content into Bedrock format.
@@ -168,6 +191,10 @@ def _format_text_image_message(message: ChatMessage) -> Dict[str, Any]:
     content_parts = message._content
     bedrock_content_blocks: List[Dict[str, Any]] = []
+    # Add reasoning content if available as the first content block
+    if message.meta.get("reasoning_contents"):
+        bedrock_content_blocks.extend(_format_reasoning_contents(reasoning_contents=message.meta["reasoning_contents"]))
     for part in content_parts:
         if isinstance(part, TextContent):
             bedrock_content_blocks.append({"text": part.text})
@@ -221,7 +248,6 @@ def _format_messages(messages: List[ChatMessage]) -> Tuple[List[Dict[str, Any]],
     return system_prompts, repaired_bedrock_formatted_messages
-# Bedrock to Haystack util method
 def _parse_completion_response(response_body: Dict[str, Any], model: str) -> List[ChatMessage]:
     """
     Parse a Bedrock API response into Haystack ChatMessage objects.
@@ -255,6 +281,7 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
             # Process all content blocks and combine them into a single message
             text_content = []
             tool_calls = []
+            reasoning_contents = []
             for content_block in content_blocks:
                 if "text" in content_block:
                     text_content.append(content_block["text"])
@@ -267,6 +294,17 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
                         arguments=tool_use.get("input", {}),
                     )
                     tool_calls.append(tool_call)
+                elif "reasoningContent" in content_block:
+                    reasoning_content = content_block["reasoningContent"]
+                    # If reasoningText is present, replace it with reasoning_text
+                    if "reasoningText" in reasoning_content:
+                        reasoning_content["reasoning_text"] = reasoning_content.pop("reasoningText")
+                    if "redactedContent" in reasoning_content:
+                        reasoning_content["redacted_content"] = reasoning_content.pop("redactedContent")
+                    reasoning_contents.append({"reasoning_content": reasoning_content})
+            # If reasoning contents were found, add them to the base meta
+            base_meta.update({"reasoning_contents": reasoning_contents})
             # Create a single ChatMessage with combined text and tool calls
             replies.append(ChatMessage.from_assistant(" ".join(text_content), tool_calls=tool_calls, meta=base_meta))
@@ -274,7 +312,6 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
     return replies
-# Bedrock streaming to Haystack util methods
 def _convert_event_to_streaming_chunk(
     event: Dict[str, Any], model: str, component_info: ComponentInfo
 ) -> StreamingChunk:
@@ -367,6 +404,22 @@ def _convert_event_to_streaming_chunk(
                     "received_at": datetime.now(timezone.utc).isoformat(),
                 },
             )
+        # This is for accumulating reasoning content deltas
+        elif "reasoningContent" in delta:
+            reasoning_content = delta["reasoningContent"]
+            if "redactedContent" in reasoning_content:
+                reasoning_content["redacted_content"] = reasoning_content.pop("redactedContent")
+            streaming_chunk = StreamingChunk(
+                content="",
+                meta={
+                    "model": model,
+                    "index": 0,
+                    "tool_calls": None,
+                    "finish_reason": None,
+                    "received_at": datetime.now(timezone.utc).isoformat(),
+                    "reasoning_contents": [{"index": block_idx, "reasoning_content": reasoning_content}],
+                },
+            )
     elif "messageStop" in event:
         finish_reason = event["messageStop"].get("stopReason")
@@ -406,6 +459,66 @@ def _convert_event_to_streaming_chunk(
     return streaming_chunk
+def _process_reasoning_contents(chunks: List[StreamingChunk]) -> List[Dict[str, Any]]:
+    """
+    Process reasoning contents from a list of StreamingChunk objects into the Bedrock expected format.
+    :param chunks: List of StreamingChunk objects potentially containing reasoning contents.
+    :returns: List of Bedrock formatted reasoning content dictionaries
+    """
+    formatted_reasoning_contents = []
+    current_index = None
+    reasoning_text = ""
+    reasoning_signature = None
+    redacted_content = None
+    for chunk in chunks:
+        reasoning_contents = chunk.meta.get("reasoning_contents", [])
+        for reasoning_content in reasoning_contents:
+            content_block_index = reasoning_content["index"]
+            # Start new group when index changes
+            if current_index is not None and content_block_index != current_index:
+                # Finalize current group
+                if reasoning_text:
+                    formatted_reasoning_contents.append(
+                        {
+                            "reasoning_content": {
+                                "reasoning_text": {"text": reasoning_text, "signature": reasoning_signature},
+                            }
+                        }
+                    )
+                if redacted_content:
+                    formatted_reasoning_contents.append({"reasoning_content": {"redacted_content": redacted_content}})
+                reasoning_text = ""
+                reasoning_signature = None
+                redacted_content = None
+            # Accumulate content for current index
+            current_index = content_block_index
+            reasoning_text += reasoning_content["reasoning_content"].get("text", "")
+            if "redacted_content" in reasoning_content["reasoning_content"]:
+                redacted_content = reasoning_content["reasoning_content"]["redacted_content"]
+            if "signature" in reasoning_content["reasoning_content"]:
+                reasoning_signature = reasoning_content["reasoning_content"]["signature"]
+    # Finalize the last group
+    if current_index is not None:
+        if reasoning_text:
+            formatted_reasoning_contents.append(
+                {
+                    "reasoning_content": {
+                        "reasoning_text": {"text": reasoning_text, "signature": reasoning_signature},
+                    }
+                }
+            )
+        if redacted_content:
+            formatted_reasoning_contents.append({"reasoning_content": {"redacted_content": redacted_content}})
+    return formatted_reasoning_contents
 def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> ChatMessage:
     """
     Converts a list of streaming chunks into a ChatMessage object.
@@ -421,8 +534,12 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C
         A ChatMessage object constructed from the streaming chunks, containing the aggregated text, processed tool
         calls, and metadata.
     """
+    # Join all text content from the chunks
     text = "".join([chunk.content for chunk in chunks])
+    # If reasoning content is present in any chunk, accumulate it
+    reasoning_contents = _process_reasoning_contents(chunks=chunks)
     # Process tool calls if present in any chunk
     tool_calls = []
     tool_call_data: Dict[int, Dict[str, str]] = {}  # Track tool calls by index
@@ -474,6 +591,7 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C
         "finish_reason": finish_reason,
         "completion_start_time": chunks[0].meta.get("received_at"),  # first chunk received
         "usage": usage,
+        "reasoning_contents": reasoning_contents,
     }
     return ChatMessage.from_assistant(text=text or None, tool_calls=tool_calls, meta=meta)

haystack_integrations/components/generators/amazon_bedrock/generator.py CHANGED Viewed

@@ -167,9 +167,9 @@ class AmazonBedrockGenerator:
                 aws_region_name=resolve_secret(aws_region_name),
                 aws_profile_name=resolve_secret(aws_profile_name),
             )
-            config: Optional[Config] = None
-            if self.boto3_config:
-                config = Config(**self.boto3_config)
+            config = Config(
+                user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
+            )
             self.client = session.client("bedrock-runtime", config=config)
         except Exception as exception:
             msg = (

{amazon_bedrock_haystack-3.9.1.dist-info → amazon_bedrock_haystack-3.11.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{amazon_bedrock_haystack-3.9.1.dist-info → amazon_bedrock_haystack-3.11.0.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

amazon-bedrock-haystack 3.9.1__py3-none-any.whl → 3.11.0__py3-none-any.whl

amazon-bedrock-haystack 3.9.1py3-none-any.whl → 3.11.0py3-none-any.whl