amazon-bedrock-haystack 3.9.1__py3-none-any.whl → 3.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: amazon-bedrock-haystack
3
- Version: 3.9.1
3
+ Version: 3.11.0
4
4
  Summary: An integration of Amazon Bedrock as an AmazonBedrockGenerator component.
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -29,43 +29,19 @@ Description-Content-Type: text/markdown
29
29
  [![PyPI - Version](https://img.shields.io/pypi/v/amazon-bedrock-haystack.svg)](https://pypi.org/project/amazon-bedrock-haystack)
30
30
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/amazon-bedrock-haystack.svg)](https://pypi.org/project/amazon-bedrock-haystack)
31
31
 
32
+ - [Integration page](https://haystack.deepset.ai/integrations/amazon-bedrock)
33
+ - [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/amazon_bedrock/CHANGELOG.md)
32
34
  -----
33
35
 
34
- **Table of Contents**
35
-
36
- - [Installation](#installation)
37
- - [Contributing](#contributing)
38
- - [License](#license)
39
-
40
- ## Installation
41
-
42
- ```console
43
- pip install amazon-bedrock-haystack
44
- ```
45
-
46
36
  ## Contributing
47
37
 
48
- `hatch` is the best way to interact with this project, to install it:
49
- ```sh
50
- pip install hatch
51
- ```
52
-
53
- With `hatch` installed, to run all the tests:
54
- ```
55
- hatch run test:all
56
- ```
38
+ Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
57
39
 
58
- To format your code and perform linting using Ruff (with automatic fixes), run:
40
+ To run integration tests locally, you need to authenticate with AWS.
41
+ For example, you can do that by exporting the following environment variables:
59
42
  ```
60
- hatch run fmt
43
+ export AWS_ACCESS_KEY_ID=...
44
+ export AWS_SECRET_ACCESS_KEY=...
45
+ export AWS_SESSION_TOKEN=...
46
+ export AWS_DEFAULT_REGION=...
61
47
  ```
62
-
63
- To check for static type errors, run:
64
-
65
- ```console
66
- $ hatch run test:types
67
- ```
68
-
69
- ## License
70
-
71
- `amazon-bedrock-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license.
@@ -3,20 +3,21 @@ haystack_integrations/common/amazon_bedrock/__init__.py,sha256=6GZ8Y3Lw0rLOsOAqi
3
3
  haystack_integrations/common/amazon_bedrock/errors.py,sha256=ReheDbY7L3EJkWcUoih6lWHjbPHg2TlUs9SnXIKK7Gg,744
4
4
  haystack_integrations/common/amazon_bedrock/utils.py,sha256=ASAwEhInF9F6rhL4CbXFQUFU1pSdscWvG6jcrXkEUhc,2735
5
5
  haystack_integrations/components/embedders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- haystack_integrations/components/embedders/amazon_bedrock/__init__.py,sha256=CFqYmAVq2aavlMkZHYScKHOTwwETdRzRZITMqGhJ9Kw,298
7
- haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py,sha256=YBVlFIo9t2qzVkNWaFKc-FNRo7R_pKfHmqNRkoMZ9K0,12952
8
- haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py,sha256=KNvsUP-YZD17_zVBwMs42v0S2uuTE_ajMaj9bjt1XlE,9036
6
+ haystack_integrations/components/embedders/amazon_bedrock/__init__.py,sha256=7GlhHJ4jFHCxq5QN5losGuGtrGNjvEx2dSQvEYD2yG0,408
7
+ haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py,sha256=DD34-HAGwGwTU7KWGqKXXlFdwIs21JavBRDHrBqC-m4,13060
8
+ haystack_integrations/components/embedders/amazon_bedrock/document_image_embedder.py,sha256=CHNH0Dt7JQqYNbZi1lKsGvarnEhJn3UNGdghF0IhqWw,16163
9
+ haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py,sha256=3eSqt3XpH2thblTeOPf-ej1V2UbdG2z50d3jInq1bYc,9144
9
10
  haystack_integrations/components/generators/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
11
  haystack_integrations/components/generators/amazon_bedrock/__init__.py,sha256=lv4NouIVm78YavUssWQrHHP_81u-7j21qW8v1kZMJPQ,284
11
12
  haystack_integrations/components/generators/amazon_bedrock/adapters.py,sha256=yBC-3YwV6qAwSXMtdZiLSYh2lUpPQIDy7Efl7w-Cu-k,19640
12
- haystack_integrations/components/generators/amazon_bedrock/generator.py,sha256=c_saV5zxFYQVJT0Hzo80lKty46itL0Dp31VuDueYa3M,14716
13
+ haystack_integrations/components/generators/amazon_bedrock/generator.py,sha256=Brzw0XvtPJhz2kR2I3liAqWHRmDR6p5HzJerEAPhoJU,14743
13
14
  haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py,sha256=6GZ8Y3Lw0rLOsOAqi6Tu5mZC977UzQvgDxKpOWr8IQw,110
14
- haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py,sha256=iIaMsOOX9eYvR1GNgpxNKxaOli91ShrCv3MuBBK1NSs,24743
15
- haystack_integrations/components/generators/amazon_bedrock/chat/utils.py,sha256=bDNaExYhrhxLHyOdu6EHC8Ixdpg43IIPJldjddzV4GE,23236
15
+ haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py,sha256=_0dpBoZGY9kgK9zQOTskcjElcTifwhyBAixXDliK-vY,24918
16
+ haystack_integrations/components/generators/amazon_bedrock/chat/utils.py,sha256=g2SZV8LdLobaCZpwWCreBJn1BtS1V3-wQkpisStJrcY,29015
16
17
  haystack_integrations/components/rankers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
18
  haystack_integrations/components/rankers/amazon_bedrock/__init__.py,sha256=Zrc3BSVkEaXYpliEi6hKG9bqW4J7DNk93p50SuoyT1Q,107
18
19
  haystack_integrations/components/rankers/amazon_bedrock/ranker.py,sha256=enAjf2QyDwfpidKkFCdLz954cx-Tjh9emrOS3vINJDg,12344
19
- amazon_bedrock_haystack-3.9.1.dist-info/METADATA,sha256=wByKDtTt_NpNsmtNh9t3-8izh0dKl04569OfCt6xR3w,2287
20
- amazon_bedrock_haystack-3.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
21
- amazon_bedrock_haystack-3.9.1.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
22
- amazon_bedrock_haystack-3.9.1.dist-info/RECORD,,
20
+ amazon_bedrock_haystack-3.11.0.dist-info/METADATA,sha256=5nA_v2Ze5xk1p-RQxbshQ0XGa3LYFljVGvNi2VvKU7o,2225
21
+ amazon_bedrock_haystack-3.11.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
+ amazon_bedrock_haystack-3.11.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
23
+ amazon_bedrock_haystack-3.11.0.dist-info/RECORD,,
@@ -2,6 +2,7 @@
2
2
  #
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from .document_embedder import AmazonBedrockDocumentEmbedder
5
+ from .document_image_embedder import AmazonBedrockDocumentImageEmbedder
5
6
  from .text_embedder import AmazonBedrockTextEmbedder
6
7
 
7
- __all__ = ["AmazonBedrockDocumentEmbedder", "AmazonBedrockTextEmbedder"]
8
+ __all__ = ["AmazonBedrockDocumentEmbedder", "AmazonBedrockDocumentImageEmbedder", "AmazonBedrockTextEmbedder"]
@@ -21,6 +21,7 @@ SUPPORTED_EMBEDDING_MODELS = [
21
21
  "cohere.embed-english-v3",
22
22
  "cohere.embed-multilingual-v3",
23
23
  "amazon.titan-embed-text-v2:0",
24
+ "amazon.titan-embed-image-v1",
24
25
  ]
25
26
 
26
27
 
@@ -38,7 +39,7 @@ class AmazonBedrockDocumentEmbedder:
38
39
 
39
40
  os.environ["AWS_ACCESS_KEY_ID"] = "..."
40
41
  os.environ["AWS_SECRET_ACCESS_KEY_ID"] = "..."
41
- os.environ["AWS_REGION_NAME"] = "..."
42
+ os.environ["AWS_DEFAULT_REGION"] = "..."
42
43
 
43
44
  embedder = AmazonBedrockDocumentEmbedder(
44
45
  model="cohere.embed-english-v3",
@@ -61,6 +62,7 @@ class AmazonBedrockDocumentEmbedder:
61
62
  "cohere.embed-english-v3",
62
63
  "cohere.embed-multilingual-v3",
63
64
  "amazon.titan-embed-text-v2:0",
65
+ "amazon.titan-embed-image-v1",
64
66
  ],
65
67
  aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
66
68
  aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
@@ -136,9 +138,9 @@ class AmazonBedrockDocumentEmbedder:
136
138
  aws_region_name=resolve_secret(aws_region_name),
137
139
  aws_profile_name=resolve_secret(aws_profile_name),
138
140
  )
139
- config: Optional[Config] = None
140
- if self.boto3_config:
141
- config = Config(**self.boto3_config)
141
+ config = Config(
142
+ user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
143
+ )
142
144
  self._client = session.client("bedrock-runtime", config=config)
143
145
  except Exception as exception:
144
146
  msg = (
@@ -0,0 +1,365 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import json
6
+ from dataclasses import replace
7
+ from typing import Any, Dict, List, Literal, Optional, Tuple
8
+
9
+ from botocore.config import Config
10
+ from botocore.exceptions import ClientError
11
+ from haystack import Document, component, default_from_dict, default_to_dict, logging
12
+ from haystack.components.converters.image.image_utils import (
13
+ _batch_convert_pdf_pages_to_images,
14
+ _encode_image_to_base64,
15
+ _extract_image_sources_info,
16
+ _PDFPageInfo,
17
+ )
18
+ from haystack.dataclasses import ByteStream
19
+ from haystack.utils.auth import Secret, deserialize_secrets_inplace
20
+ from tqdm import tqdm
21
+
22
+ from haystack_integrations.common.amazon_bedrock.errors import (
23
+ AmazonBedrockConfigurationError,
24
+ AmazonBedrockInferenceError,
25
+ )
26
+ from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ SUPPORTED_EMBEDDING_MODELS = ["amazon.titan-embed-image-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"]
31
+
32
+
33
+ @component
34
+ class AmazonBedrockDocumentImageEmbedder:
35
+ """
36
+ A component for computing Document embeddings based on images using Amazon Bedrock models.
37
+
38
+ The embedding of each Document is stored in the `embedding` field of the Document.
39
+
40
+ ### Usage example
41
+ ```python
42
+ from haystack import Document
43
+ rom haystack_integrations.components.embedders.amazon_bedrock import AmazonBedrockDocumentImageEmbedder
44
+
45
+ os.environ["AWS_ACCESS_KEY_ID"] = "..."
46
+ os.environ["AWS_SECRET_ACCESS_KEY_ID"] = "..."
47
+ os.environ["AWS_DEFAULT_REGION"] = "..."
48
+
49
+ embedder = AmazonBedrockDocumentImageEmbedder(model="amazon.titan-embed-image-v1")
50
+
51
+ documents = [
52
+ Document(content="A photo of a cat", meta={"file_path": "cat.jpg"}),
53
+ Document(content="A photo of a dog", meta={"file_path": "dog.jpg"}),
54
+ ]
55
+
56
+ result = embedder.run(documents=documents)
57
+ documents_with_embeddings = result["documents"]
58
+ print(documents_with_embeddings)
59
+
60
+ # [Document(id=...,
61
+ # content='A photo of a cat',
62
+ # meta={'file_path': 'cat.jpg',
63
+ # 'embedding_source': {'type': 'image', 'file_path_meta_field': 'file_path'}},
64
+ # embedding=vector of size 512),
65
+ # ...]
66
+ ```
67
+ """
68
+
69
+ def __init__(
70
+ self,
71
+ *,
72
+ model: Literal["amazon.titan-embed-image-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"],
73
+ aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
74
+ aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
75
+ "AWS_SECRET_ACCESS_KEY", strict=False
76
+ ),
77
+ aws_session_token: Optional[Secret] = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
78
+ aws_region_name: Optional[Secret] = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
79
+ aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
80
+ file_path_meta_field: str = "file_path",
81
+ root_path: Optional[str] = None,
82
+ image_size: Optional[Tuple[int, int]] = None,
83
+ progress_bar: bool = True,
84
+ boto3_config: Optional[Dict[str, Any]] = None,
85
+ **kwargs: Any,
86
+ ) -> None:
87
+ """
88
+ Creates a AmazonBedrockDocumentImageEmbedder component.
89
+
90
+ :param model:
91
+ The Bedrock model to use for calculating embeddings. Pass a valid model ID.
92
+ Supported models:
93
+ - "amazon.titan-embed-image-v1"
94
+ - "cohere.embed-english-v3"
95
+ - "cohere.embed-multilingual-v3"
96
+ :param aws_access_key_id: AWS access key ID.
97
+ :param aws_secret_access_key: AWS secret access key.
98
+ :param aws_session_token: AWS session token.
99
+ :param aws_region_name: AWS region name.
100
+ :param aws_profile_name: AWS profile name.
101
+ :param file_path_meta_field: The metadata field in the Document that contains the file path to the image or PDF.
102
+ :param root_path: The root directory path where document files are located. If provided, file paths in
103
+ document metadata will be resolved relative to this path. If None, file paths are treated as absolute paths.
104
+ :param image_size:
105
+ If provided, resizes the image to fit within the specified dimensions (width, height) while
106
+ maintaining aspect ratio. This reduces file size, memory usage, and processing time, which is beneficial
107
+ when working with models that have resolution constraints or when transmitting images to remote services.
108
+ :param progress_bar:
109
+ If `True`, shows a progress bar when embedding documents.
110
+ :param boto3_config: The configuration for the boto3 client.
111
+ :param kwargs: Additional parameters to pass for model inference.
112
+ For example, `embeddingConfig` for Amazon Titan models and
113
+ `embedding_types` for Cohere models.
114
+ :raises ValueError: If the model is not supported.
115
+ :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly.
116
+ """
117
+ if not model or model not in SUPPORTED_EMBEDDING_MODELS:
118
+ msg = "Please provide a valid model from the list of supported models: " + ", ".join(
119
+ SUPPORTED_EMBEDDING_MODELS
120
+ )
121
+ raise ValueError(msg)
122
+
123
+ self.file_path_meta_field = file_path_meta_field
124
+ self.root_path = root_path or ""
125
+ self.model = model
126
+ self.boto3_config = boto3_config
127
+
128
+ self.aws_access_key_id = aws_access_key_id
129
+ self.aws_secret_access_key = aws_secret_access_key
130
+ self.aws_session_token = aws_session_token
131
+ self.aws_region_name = aws_region_name
132
+ self.aws_profile_name = aws_profile_name
133
+ self.image_size = image_size
134
+ self.progress_bar = progress_bar
135
+ self.kwargs = kwargs
136
+ self.embedding_types = None
137
+
138
+ if emmbedding_types := self.kwargs.get("embedding_types"):
139
+ if len(emmbedding_types) > 1:
140
+ msg = (
141
+ "You have provided multiple embedding_types for Cohere model. "
142
+ "AmazonBedrockDocumentImageEmbedder only supports one embedding_type at a time."
143
+ )
144
+ raise ValueError(msg)
145
+ self.embedding_types = emmbedding_types
146
+
147
+ def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
148
+ return secret.resolve_value() if secret else None
149
+
150
+ try:
151
+ session = get_aws_session(
152
+ aws_access_key_id=resolve_secret(aws_access_key_id),
153
+ aws_secret_access_key=resolve_secret(aws_secret_access_key),
154
+ aws_session_token=resolve_secret(aws_session_token),
155
+ aws_region_name=resolve_secret(aws_region_name),
156
+ aws_profile_name=resolve_secret(aws_profile_name),
157
+ )
158
+ config = Config(
159
+ user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
160
+ )
161
+ self._client = session.client("bedrock-runtime", config=config)
162
+ except Exception as exception:
163
+ msg = (
164
+ "Could not connect to Amazon Bedrock. Make sure the AWS environment is configured correctly. "
165
+ "See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration"
166
+ )
167
+ raise AmazonBedrockConfigurationError(msg) from exception
168
+
169
+ def to_dict(self) -> dict[str, Any]:
170
+ """
171
+ Serializes the component to a dictionary.
172
+
173
+ :returns:
174
+ Dictionary with serialized data.
175
+ """
176
+ serialization_dict = default_to_dict(
177
+ self,
178
+ file_path_meta_field=self.file_path_meta_field,
179
+ root_path=self.root_path,
180
+ model=self.model,
181
+ aws_access_key_id=self.aws_access_key_id.to_dict() if self.aws_access_key_id else None,
182
+ aws_secret_access_key=self.aws_secret_access_key.to_dict() if self.aws_secret_access_key else None,
183
+ aws_session_token=self.aws_session_token.to_dict() if self.aws_session_token else None,
184
+ aws_region_name=self.aws_region_name.to_dict() if self.aws_region_name else None,
185
+ aws_profile_name=self.aws_profile_name.to_dict() if self.aws_profile_name else None,
186
+ progress_bar=self.progress_bar,
187
+ boto3_config=self.boto3_config,
188
+ image_size=self.image_size,
189
+ **self.kwargs,
190
+ )
191
+ return serialization_dict
192
+
193
+ @classmethod
194
+ def from_dict(cls, data: dict[str, Any]) -> "AmazonBedrockDocumentImageEmbedder":
195
+ """
196
+ Deserializes the component from a dictionary.
197
+
198
+ :param data:
199
+ Dictionary to deserialize from.
200
+ :returns:
201
+ Deserialized component.
202
+ """
203
+ init_params = data["init_parameters"]
204
+ deserialize_secrets_inplace(
205
+ init_params,
206
+ keys=[
207
+ "aws_access_key_id",
208
+ "aws_secret_access_key",
209
+ "aws_session_token",
210
+ "aws_region_name",
211
+ "aws_profile_name",
212
+ ],
213
+ )
214
+ return default_from_dict(cls, data)
215
+
216
+ @component.output_types(documents=list[Document])
217
+ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
218
+ """
219
+ Embed a list of images.
220
+
221
+ :param documents:
222
+ Documents to embed.
223
+
224
+ :returns:
225
+ A dictionary with the following keys:
226
+ - `documents`: Documents with embeddings.
227
+ """
228
+ if not isinstance(documents, list) or (documents and not isinstance(documents[0], Document)):
229
+ msg = (
230
+ "AmazonBedrockDocumentImageEmbedder expects a list of Documents as input. "
231
+ "In case you want to embed a string, please use the AmazonBedrockTextEmbedder."
232
+ )
233
+ raise TypeError(msg)
234
+ images_source_info = _extract_image_sources_info(
235
+ documents=documents, file_path_meta_field=self.file_path_meta_field, root_path=self.root_path
236
+ )
237
+
238
+ images_to_embed: list = [None] * len(documents)
239
+ pdf_page_infos: list[_PDFPageInfo] = []
240
+
241
+ for doc_idx, image_source_info in enumerate(images_source_info):
242
+ if image_source_info["mime_type"] == "application/pdf":
243
+ # Store PDF documents for later processing
244
+ page_number = image_source_info.get("page_number")
245
+
246
+ pdf_page_info: _PDFPageInfo = {
247
+ "doc_idx": doc_idx,
248
+ "path": image_source_info["path"],
249
+ # page_number is added but mypy doesn't know that
250
+ "page_number": page_number, # type: ignore[typeddict-item]
251
+ }
252
+ pdf_page_infos.append(pdf_page_info)
253
+ else:
254
+ # Process images directly
255
+ image_byte_stream = ByteStream.from_file_path(
256
+ filepath=image_source_info["path"], mime_type=image_source_info["mime_type"]
257
+ )
258
+ mime_type, base64_image = _encode_image_to_base64(bytestream=image_byte_stream, size=self.image_size)
259
+ if "cohere" in self.model:
260
+ images_to_embed[doc_idx] = f"data:{mime_type};base64,{base64_image}"
261
+ else:
262
+ images_to_embed[doc_idx] = base64_image
263
+
264
+ pdf_images_by_doc_idx = _batch_convert_pdf_pages_to_images(
265
+ pdf_page_infos=pdf_page_infos, return_base64=True, size=self.image_size
266
+ )
267
+
268
+ # the pdf_images_by_doc_idx has base64 images but mypy cant detect that
269
+ for doc_idx, base64_image in pdf_images_by_doc_idx.items(): # type: ignore[assignment]
270
+ pdf_image_uri = f"data:application/pdf;base64,{base64_image}" if "cohere" in self.model else base64_image
271
+ images_to_embed[doc_idx] = pdf_image_uri
272
+
273
+ none_images_doc_ids = [documents[doc_idx].id for doc_idx, image in enumerate(images_to_embed) if image is None]
274
+ if none_images_doc_ids:
275
+ msg = f"Conversion failed for some documents. Document IDs: {none_images_doc_ids}."
276
+ raise RuntimeError(msg)
277
+
278
+ if "cohere" in self.model:
279
+ embeddings = self._embed_cohere(image_uris=images_to_embed)
280
+ elif "titan" in self.model:
281
+ embeddings = self._embed_titan(images=images_to_embed)
282
+ else:
283
+ msg = f"Model {self.model} is not supported. Supported models are: {', '.join(SUPPORTED_EMBEDDING_MODELS)}."
284
+ raise ValueError(msg)
285
+
286
+ docs_with_embeddings = []
287
+
288
+ for doc, emb in zip(documents, embeddings):
289
+ # we store this information for later inspection
290
+ new_meta = {
291
+ **doc.meta,
292
+ "embedding_source": {"type": "image", "file_path_meta_field": self.file_path_meta_field},
293
+ }
294
+ new_doc = replace(doc, meta=new_meta, embedding=emb)
295
+ docs_with_embeddings.append(new_doc)
296
+
297
+ return {"documents": docs_with_embeddings}
298
+
299
+ def _embed_titan(self, images: List[str]) -> List[List[float]]:
300
+ """
301
+ Internal method to embed base64 images using Amazon Titan models.
302
+
303
+ :param images: List of base64 images.
304
+ :return: List of embeddings.
305
+ """
306
+
307
+ titan_body = {}
308
+ if embedding_config := self.kwargs.get("embeddingConfig"):
309
+ titan_body["embeddingConfig"] = embedding_config # optional parameter for Amazon Titan models
310
+
311
+ all_embeddings = []
312
+
313
+ for image in tqdm(images, disable=not self.progress_bar, desc="Creating embeddings"):
314
+ body = {"inputImage": image, **titan_body}
315
+ try:
316
+ response = self._client.invoke_model(
317
+ body=json.dumps(body), modelId=self.model, accept="*/*", contentType="application/json"
318
+ )
319
+ except ClientError as exception:
320
+ msg = f"Could not perform inference for Amazon Bedrock model {self.model} due to:\n{exception}"
321
+ raise AmazonBedrockInferenceError(msg) from exception
322
+
323
+ response_body = json.loads(response.get("body").read())
324
+ embedding = response_body["embedding"]
325
+ all_embeddings.append(embedding)
326
+
327
+ return all_embeddings
328
+
329
+ def _embed_cohere(self, image_uris: List[str]) -> List[List[float]]:
330
+ """
331
+ Internal method to embed base64 images using Cohere models.
332
+
333
+ :param image_uris: List of image uris containing the base64 image and the mime type.
334
+ :return: List of embeddings.
335
+ """
336
+
337
+ cohere_body = {"input_type": "image"}
338
+ if self.embedding_types:
339
+ cohere_body["embedding_types"] = self.embedding_types
340
+
341
+ all_embeddings = []
342
+
343
+ for image in tqdm(image_uris, disable=not self.progress_bar, desc="Creating embeddings"):
344
+ body = {"images": [image], **cohere_body}
345
+ try:
346
+ response = self._client.invoke_model(
347
+ body=json.dumps(body), modelId=self.model, accept="*/*", contentType="application/json"
348
+ )
349
+ except ClientError as exception:
350
+ msg = f"Could not perform inference for Amazon Bedrock model {self.model} due to:\n{exception}"
351
+ raise AmazonBedrockInferenceError(msg) from exception
352
+
353
+ response_body = json.loads(response.get("body").read())
354
+ embeddings = response_body["embeddings"]
355
+
356
+ # if embedding_types is specified, cohere returns a dict with the embedding types as keys
357
+ if isinstance(embeddings, dict):
358
+ for embedding in embeddings.values():
359
+ all_embeddings.append(embedding[0])
360
+ else:
361
+ # if embedding_types is not specified, cohere returns
362
+ # a nested list of float embeddings
363
+ all_embeddings.append(embeddings[0])
364
+
365
+ return all_embeddings
@@ -19,6 +19,7 @@ SUPPORTED_EMBEDDING_MODELS = [
19
19
  "cohere.embed-english-v3",
20
20
  "cohere.embed-multilingual-v3",
21
21
  "amazon.titan-embed-text-v2:0",
22
+ "amazon.titan-embed-image-v1",
22
23
  ]
23
24
 
24
25
 
@@ -34,7 +35,7 @@ class AmazonBedrockTextEmbedder:
34
35
 
35
36
  os.environ["AWS_ACCESS_KEY_ID"] = "..."
36
37
  os.environ["AWS_SECRET_ACCESS_KEY_ID"] = "..."
37
- os.environ["AWS_REGION_NAME"] = "..."
38
+ os.environ["AWS_DEFAULT_REGION"] = "..."
38
39
 
39
40
  embedder = AmazonBedrockTextEmbedder(
40
41
  model="cohere.embed-english-v3",
@@ -54,6 +55,7 @@ class AmazonBedrockTextEmbedder:
54
55
  "cohere.embed-english-v3",
55
56
  "cohere.embed-multilingual-v3",
56
57
  "amazon.titan-embed-text-v2:0",
58
+ "amazon.titan-embed-image-v1",
57
59
  ],
58
60
  aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
59
61
  aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
@@ -114,9 +116,9 @@ class AmazonBedrockTextEmbedder:
114
116
  aws_region_name=resolve_secret(aws_region_name),
115
117
  aws_profile_name=resolve_secret(aws_profile_name),
116
118
  )
117
- config: Optional[Config] = None
118
- if self.boto3_config:
119
- config = Config(**self.boto3_config)
119
+ config = Config(
120
+ user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
121
+ )
120
122
  self._client = session.client("bedrock-runtime", config=config)
121
123
  except Exception as exception:
122
124
  msg = (
@@ -213,9 +213,9 @@ class AmazonBedrockChatGenerator:
213
213
  def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
214
214
  return secret.resolve_value() if secret else None
215
215
 
216
- config: Optional[Config] = None
217
- if self.boto3_config:
218
- config = Config(**self.boto3_config)
216
+ config = Config(
217
+ user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
218
+ )
219
219
 
220
220
  try:
221
221
  # sync session
@@ -226,6 +226,7 @@ class AmazonBedrockChatGenerator:
226
226
  aws_region_name=resolve_secret(aws_region_name),
227
227
  aws_profile_name=resolve_secret(aws_profile_name),
228
228
  )
229
+
229
230
  self.client = session.client("bedrock-runtime", config=config)
230
231
 
231
232
  except Exception as exception:
@@ -498,7 +499,10 @@ class AmazonBedrockChatGenerator:
498
499
  session = self._get_async_session()
499
500
  # Note: https://aioboto3.readthedocs.io/en/latest/usage.html
500
501
  # we need to create a new client for each request
501
- async with session.client("bedrock-runtime", config=self.boto3_config) as async_client:
502
+ config = Config(
503
+ user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
504
+ )
505
+ async with session.client("bedrock-runtime", config=config) as async_client:
502
506
  if callback:
503
507
  response = await async_client.converse_stream(**params)
504
508
  response_stream: EventStream = response.get("stream")
@@ -55,6 +55,11 @@ def _format_tool_call_message(tool_call_message: ChatMessage) -> Dict[str, Any]:
55
55
  Dictionary representing the tool call message in Bedrock's expected format
56
56
  """
57
57
  content: List[Dict[str, Any]] = []
58
+
59
+ # tool call messages can contain reasoning content
60
+ if reasoning_contents := tool_call_message.meta.get("reasoning_contents"):
61
+ content.extend(_format_reasoning_contents(reasoning_contents=reasoning_contents))
62
+
58
63
  # Tool call message can contain text
59
64
  if tool_call_message.text:
60
65
  content.append({"text": tool_call_message.text})
@@ -157,6 +162,24 @@ def _repair_tool_result_messages(bedrock_formatted_messages: List[Dict[str, Any]
157
162
  return [msg for _, msg in repaired_bedrock_formatted_messages]
158
163
 
159
164
 
165
+ def _format_reasoning_contents(reasoning_contents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
166
+ """
167
+ Format reasoning contents to match Bedrock's expected structure.
168
+
169
+ :param reasoning_contents: List of reasoning content dictionaries from Haystack ChatMessage metadata.
170
+ :returns: List of formatted reasoning content dictionaries for Bedrock.
171
+ """
172
+ formatted_contents = []
173
+ for reasoning_content in reasoning_contents:
174
+ formatted_content = {"reasoningContent": reasoning_content["reasoning_content"]}
175
+ if reasoning_text := formatted_content["reasoningContent"].pop("reasoning_text", None):
176
+ formatted_content["reasoningContent"]["reasoningText"] = reasoning_text
177
+ if redacted_content := formatted_content["reasoningContent"].pop("redacted_content", None):
178
+ formatted_content["reasoningContent"]["redactedContent"] = redacted_content
179
+ formatted_contents.append(formatted_content)
180
+ return formatted_contents
181
+
182
+
160
183
  def _format_text_image_message(message: ChatMessage) -> Dict[str, Any]:
161
184
  """
162
185
  Format a Haystack ChatMessage containing text and optional image content into Bedrock format.
@@ -168,6 +191,10 @@ def _format_text_image_message(message: ChatMessage) -> Dict[str, Any]:
168
191
  content_parts = message._content
169
192
 
170
193
  bedrock_content_blocks: List[Dict[str, Any]] = []
194
+ # Add reasoning content if available as the first content block
195
+ if message.meta.get("reasoning_contents"):
196
+ bedrock_content_blocks.extend(_format_reasoning_contents(reasoning_contents=message.meta["reasoning_contents"]))
197
+
171
198
  for part in content_parts:
172
199
  if isinstance(part, TextContent):
173
200
  bedrock_content_blocks.append({"text": part.text})
@@ -221,7 +248,6 @@ def _format_messages(messages: List[ChatMessage]) -> Tuple[List[Dict[str, Any]],
221
248
  return system_prompts, repaired_bedrock_formatted_messages
222
249
 
223
250
 
224
- # Bedrock to Haystack util method
225
251
  def _parse_completion_response(response_body: Dict[str, Any], model: str) -> List[ChatMessage]:
226
252
  """
227
253
  Parse a Bedrock API response into Haystack ChatMessage objects.
@@ -255,6 +281,7 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
255
281
  # Process all content blocks and combine them into a single message
256
282
  text_content = []
257
283
  tool_calls = []
284
+ reasoning_contents = []
258
285
  for content_block in content_blocks:
259
286
  if "text" in content_block:
260
287
  text_content.append(content_block["text"])
@@ -267,6 +294,17 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
267
294
  arguments=tool_use.get("input", {}),
268
295
  )
269
296
  tool_calls.append(tool_call)
297
+ elif "reasoningContent" in content_block:
298
+ reasoning_content = content_block["reasoningContent"]
299
+ # If reasoningText is present, replace it with reasoning_text
300
+ if "reasoningText" in reasoning_content:
301
+ reasoning_content["reasoning_text"] = reasoning_content.pop("reasoningText")
302
+ if "redactedContent" in reasoning_content:
303
+ reasoning_content["redacted_content"] = reasoning_content.pop("redactedContent")
304
+ reasoning_contents.append({"reasoning_content": reasoning_content})
305
+
306
+ # If reasoning contents were found, add them to the base meta
307
+ base_meta.update({"reasoning_contents": reasoning_contents})
270
308
 
271
309
  # Create a single ChatMessage with combined text and tool calls
272
310
  replies.append(ChatMessage.from_assistant(" ".join(text_content), tool_calls=tool_calls, meta=base_meta))
@@ -274,7 +312,6 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
274
312
  return replies
275
313
 
276
314
 
277
- # Bedrock streaming to Haystack util methods
278
315
  def _convert_event_to_streaming_chunk(
279
316
  event: Dict[str, Any], model: str, component_info: ComponentInfo
280
317
  ) -> StreamingChunk:
@@ -367,6 +404,22 @@ def _convert_event_to_streaming_chunk(
367
404
  "received_at": datetime.now(timezone.utc).isoformat(),
368
405
  },
369
406
  )
407
+ # This is for accumulating reasoning content deltas
408
+ elif "reasoningContent" in delta:
409
+ reasoning_content = delta["reasoningContent"]
410
+ if "redactedContent" in reasoning_content:
411
+ reasoning_content["redacted_content"] = reasoning_content.pop("redactedContent")
412
+ streaming_chunk = StreamingChunk(
413
+ content="",
414
+ meta={
415
+ "model": model,
416
+ "index": 0,
417
+ "tool_calls": None,
418
+ "finish_reason": None,
419
+ "received_at": datetime.now(timezone.utc).isoformat(),
420
+ "reasoning_contents": [{"index": block_idx, "reasoning_content": reasoning_content}],
421
+ },
422
+ )
370
423
 
371
424
  elif "messageStop" in event:
372
425
  finish_reason = event["messageStop"].get("stopReason")
@@ -406,6 +459,66 @@ def _convert_event_to_streaming_chunk(
406
459
  return streaming_chunk
407
460
 
408
461
 
462
+ def _process_reasoning_contents(chunks: List[StreamingChunk]) -> List[Dict[str, Any]]:
463
+ """
464
+ Process reasoning contents from a list of StreamingChunk objects into the Bedrock expected format.
465
+
466
+ :param chunks: List of StreamingChunk objects potentially containing reasoning contents.
467
+
468
+ :returns: List of Bedrock formatted reasoning content dictionaries
469
+ """
470
+ formatted_reasoning_contents = []
471
+ current_index = None
472
+ reasoning_text = ""
473
+ reasoning_signature = None
474
+ redacted_content = None
475
+ for chunk in chunks:
476
+ reasoning_contents = chunk.meta.get("reasoning_contents", [])
477
+
478
+ for reasoning_content in reasoning_contents:
479
+ content_block_index = reasoning_content["index"]
480
+
481
+ # Start new group when index changes
482
+ if current_index is not None and content_block_index != current_index:
483
+ # Finalize current group
484
+ if reasoning_text:
485
+ formatted_reasoning_contents.append(
486
+ {
487
+ "reasoning_content": {
488
+ "reasoning_text": {"text": reasoning_text, "signature": reasoning_signature},
489
+ }
490
+ }
491
+ )
492
+ if redacted_content:
493
+ formatted_reasoning_contents.append({"reasoning_content": {"redacted_content": redacted_content}})
494
+ reasoning_text = ""
495
+ reasoning_signature = None
496
+ redacted_content = None
497
+
498
+ # Accumulate content for current index
499
+ current_index = content_block_index
500
+ reasoning_text += reasoning_content["reasoning_content"].get("text", "")
501
+ if "redacted_content" in reasoning_content["reasoning_content"]:
502
+ redacted_content = reasoning_content["reasoning_content"]["redacted_content"]
503
+ if "signature" in reasoning_content["reasoning_content"]:
504
+ reasoning_signature = reasoning_content["reasoning_content"]["signature"]
505
+
506
+ # Finalize the last group
507
+ if current_index is not None:
508
+ if reasoning_text:
509
+ formatted_reasoning_contents.append(
510
+ {
511
+ "reasoning_content": {
512
+ "reasoning_text": {"text": reasoning_text, "signature": reasoning_signature},
513
+ }
514
+ }
515
+ )
516
+ if redacted_content:
517
+ formatted_reasoning_contents.append({"reasoning_content": {"redacted_content": redacted_content}})
518
+
519
+ return formatted_reasoning_contents
520
+
521
+
409
522
  def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> ChatMessage:
410
523
  """
411
524
  Converts a list of streaming chunks into a ChatMessage object.
@@ -421,8 +534,12 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C
421
534
  A ChatMessage object constructed from the streaming chunks, containing the aggregated text, processed tool
422
535
  calls, and metadata.
423
536
  """
537
+ # Join all text content from the chunks
424
538
  text = "".join([chunk.content for chunk in chunks])
425
539
 
540
+ # If reasoning content is present in any chunk, accumulate it
541
+ reasoning_contents = _process_reasoning_contents(chunks=chunks)
542
+
426
543
  # Process tool calls if present in any chunk
427
544
  tool_calls = []
428
545
  tool_call_data: Dict[int, Dict[str, str]] = {} # Track tool calls by index
@@ -474,6 +591,7 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C
474
591
  "finish_reason": finish_reason,
475
592
  "completion_start_time": chunks[0].meta.get("received_at"), # first chunk received
476
593
  "usage": usage,
594
+ "reasoning_contents": reasoning_contents,
477
595
  }
478
596
 
479
597
  return ChatMessage.from_assistant(text=text or None, tool_calls=tool_calls, meta=meta)
@@ -167,9 +167,9 @@ class AmazonBedrockGenerator:
167
167
  aws_region_name=resolve_secret(aws_region_name),
168
168
  aws_profile_name=resolve_secret(aws_profile_name),
169
169
  )
170
- config: Optional[Config] = None
171
- if self.boto3_config:
172
- config = Config(**self.boto3_config)
170
+ config = Config(
171
+ user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
172
+ )
173
173
  self.client = session.client("bedrock-runtime", config=config)
174
174
  except Exception as exception:
175
175
  msg = (