amazon-bedrock-haystack 5.2.0__tar.gz → 5.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/CHANGELOG.md +24 -7
  2. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/PKG-INFO +1 -1
  3. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/pyproject.toml +3 -3
  4. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/downloaders/s3/s3_downloader.py +10 -10
  5. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py +27 -35
  6. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/embedders/amazon_bedrock/document_image_embedder.py +31 -30
  7. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py +23 -29
  8. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py +33 -33
  9. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +17 -17
  10. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py +22 -22
  11. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/generators/amazon_bedrock/generator.py +10 -10
  12. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/rankers/amazon_bedrock/ranker.py +7 -7
  13. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_chat_generator.py +2 -2
  14. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_document_embedder.py +35 -5
  15. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_document_image_embedder.py +15 -7
  16. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_generator.py +5 -5
  17. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_s3_downloader.py +3 -3
  18. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_text_embedder.py +27 -2
  19. amazon_bedrock_haystack-5.2.0/pydoc/config.yml +0 -42
  20. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/.gitignore +0 -0
  21. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/LICENSE.txt +0 -0
  22. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/README.md +0 -0
  23. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/examples/bedrock_ranker_example.py +0 -0
  24. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/examples/chatgenerator_example.py +0 -0
  25. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/examples/embedders_generator_with_rag_example.py +0 -0
  26. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/examples/s3_downloader_example.py +0 -0
  27. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/pydoc/config_docusaurus.yml +0 -0
  28. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/common/amazon_bedrock/__init__.py +0 -0
  29. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/common/amazon_bedrock/errors.py +0 -0
  30. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/common/amazon_bedrock/utils.py +0 -0
  31. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/common/py.typed +0 -0
  32. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/common/s3/__init__.py +0 -0
  33. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/common/s3/errors.py +0 -0
  34. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/common/s3/utils.py +0 -0
  35. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/downloaders/py.typed +0 -0
  36. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/downloaders/s3/__init__.py +0 -0
  37. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/embedders/amazon_bedrock/__init__.py +0 -0
  38. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/embedders/py.typed +0 -0
  39. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/generators/amazon_bedrock/__init__.py +0 -0
  40. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py +0 -0
  41. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/generators/py.typed +0 -0
  42. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/rankers/amazon_bedrock/__init__.py +0 -0
  43. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/src/haystack_integrations/components/rankers/py.typed +0 -0
  44. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/__init__.py +0 -0
  45. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/conftest.py +0 -0
  46. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_chat_generator_utils.py +0 -0
  47. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_files/apple.jpg +0 -0
  48. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_files/haystack-logo.png +0 -0
  49. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_files/sample_pdf_1.pdf +0 -0
  50. {amazon_bedrock_haystack-5.2.0 → amazon_bedrock_haystack-5.3.1}/tests/test_ranker.py +0 -0
@@ -1,32 +1,49 @@
1
1
  # Changelog
2
2
 
3
- ## [integrations/amazon_bedrock-v5.1.0] - 2025-09-29
3
+ ## [integrations/amazon_bedrock-v5.3.0] - 2025-12-17
4
4
 
5
5
  ### 🚀 Features
6
6
 
7
- - S3Downloader - add `s3_key_generation_function` param to customize S3 key generation (#2343)
7
+ - `AmazonBedrockChatGenerator` update tools param to ToolsType (#2415)
8
+ - Cohere Embed v4 support in Bedrock (#2612)
8
9
 
9
- ## [integrations/amazon_bedrock-v5.0.0] - 2025-09-22
10
+ ### 📚 Documentation
11
+
12
+ - Add pydoc configurations for Docusaurus (#2411)
13
+
14
+ ### ⚙️ CI
15
+
16
+ - Change pytest command (#2475)
10
17
 
11
18
  ### 🧹 Chores
12
19
 
13
- - [**breaking**] Remove deprecated `BedrockRanker` (use `AmazonBedrockRanker` instead) (#2287)
20
+ - Remove Readme API CI workflow and configs (#2573)
21
+
22
+ ### 🌀 Miscellaneous
14
23
 
15
- ## [integrations/amazon_bedrock-v4.2.0] - 2025-09-19
24
+ - Adopt PEP 585 type hinting (part 2) (#2508)
25
+
26
+ ## [integrations/amazon_bedrock-v5.1.0] - 2025-09-29
16
27
 
17
28
  ### 🚀 Features
18
29
 
19
- - Add a new `S3Downloader` component (#2192)
30
+ - S3Downloader - add `s3_key_generation_function` param to customize S3 key generation (#2343)
31
+
20
32
 
21
- ## [integrations/amazon_bedrock-v4.1.0] - 2025-09-19
33
+ ## [integrations/amazon_bedrock-v5.0.0] - 2025-09-22
22
34
 
23
35
  ### 🚀 Features
24
36
 
25
37
  - Support AWS Bedrock Guardrails in `AmazonBedrockChatGenerator` (#2284)
38
+ - Add a new `S3Downloader` component (#2192)
39
+
40
+ ### 📚 Documentation
41
+
26
42
 
27
43
  ### 🧹 Chores
28
44
 
29
45
  - Bedrock - remove unused `stop_words` init parameter (#2275)
46
+ - [**breaking**] Remove deprecated `BedrockRanker` (use `AmazonBedrockRanker` instead) (#2287)
30
47
 
31
48
  ### 🌀 Miscellaneous
32
49
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: amazon-bedrock-haystack
3
- Version: 5.2.0
3
+ Version: 5.3.1
4
4
  Summary: An integration of AWS S3 and Bedrock as a Downloader and Generator components.
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -46,7 +46,7 @@ installer = "uv"
46
46
  dependencies = ["haystack-pydoc-tools", "ruff"]
47
47
 
48
48
  [tool.hatch.envs.default.scripts]
49
- docs = ["pydoc-markdown pydoc/config.yml"]
49
+ docs = ["pydoc-markdown pydoc/config_docusaurus.yml"]
50
50
  fmt = "ruff check --fix {args} && ruff format {args}"
51
51
  fmt-check = "ruff check {args} && ruff format --check {args}"
52
52
 
@@ -66,7 +66,7 @@ dependencies = [
66
66
  unit = 'pytest -m "not integration" {args:tests}'
67
67
  integration = 'pytest -m "integration" {args:tests}'
68
68
  all = 'pytest {args:tests}'
69
- cov-retry = 'all --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x'
69
+ cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}'
70
70
 
71
71
  types = """mypy -p haystack_integrations.common.amazon_bedrock \
72
72
  -p haystack_integrations.components.embedders.amazon_bedrock \
@@ -92,7 +92,7 @@ module = [
92
92
  ignore_missing_imports = true
93
93
 
94
94
  [tool.ruff]
95
- target-version = "py38"
95
+ target-version = "py39"
96
96
  line-length = 120
97
97
 
98
98
  [tool.ruff.lint]
@@ -5,7 +5,7 @@
5
5
  import os
6
6
  from concurrent.futures import ThreadPoolExecutor
7
7
  from pathlib import Path
8
- from typing import Any, Callable, Dict, List, Optional
8
+ from typing import Any, Callable, Optional
9
9
 
10
10
  from botocore.config import Config
11
11
  from haystack import component, default_from_dict, default_to_dict, logging
@@ -36,9 +36,9 @@ class S3Downloader:
36
36
  aws_session_token: Optional[Secret] = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
37
37
  aws_region_name: Optional[Secret] = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
38
38
  aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
39
- boto3_config: Optional[Dict[str, Any]] = None,
39
+ boto3_config: Optional[dict[str, Any]] = None,
40
40
  file_root_path: Optional[str] = None,
41
- file_extensions: Optional[List[str]] = None,
41
+ file_extensions: Optional[list[str]] = None,
42
42
  file_name_meta_key: str = "file_name",
43
43
  max_workers: int = 32,
44
44
  max_cache_size: int = 100,
@@ -126,11 +126,11 @@ class S3Downloader:
126
126
  self.file_root_path.mkdir(parents=True, exist_ok=True)
127
127
  self._storage = S3Storage.from_env(session=self._session, config=self._config)
128
128
 
129
- @component.output_types(documents=List[Document])
129
+ @component.output_types(documents=list[Document])
130
130
  def run(
131
131
  self,
132
- documents: List[Document],
133
- ) -> Dict[str, List[Document]]:
132
+ documents: list[Document],
133
+ ) -> dict[str, list[Document]]:
134
134
  """Download files from AWS S3 Buckets to local filesystem.
135
135
 
136
136
  Return enriched `Document`s with the path of the downloaded file.
@@ -160,7 +160,7 @@ class S3Downloader:
160
160
  downloaded_documents = [d for d in iterable if d is not None]
161
161
  return {"documents": downloaded_documents}
162
162
 
163
- def _filter_documents_by_extensions(self, documents: List[Document]) -> List[Document]:
163
+ def _filter_documents_by_extensions(self, documents: list[Document]) -> list[Document]:
164
164
  """Filter documents by file extensions."""
165
165
  if not self.file_extensions:
166
166
  return documents
@@ -202,7 +202,7 @@ class S3Downloader:
202
202
  document.meta["file_path"] = str(file_path)
203
203
  return document
204
204
 
205
- def _cleanup_cache(self, documents: List[Document]) -> None:
205
+ def _cleanup_cache(self, documents: list[Document]) -> None:
206
206
  """
207
207
  Remove least-recently-accessed cache files when cache exceeds `max_cache_size`.
208
208
 
@@ -224,7 +224,7 @@ class S3Downloader:
224
224
  except Exception as error:
225
225
  logger.warning("Failed to remove cache file at {path} with error: {e}", path=p, e=error)
226
226
 
227
- def to_dict(self) -> Dict[str, Any]:
227
+ def to_dict(self) -> dict[str, Any]:
228
228
  """Serialize the component to a dictionary."""
229
229
 
230
230
  s3_key_generation_function_name = (
@@ -247,7 +247,7 @@ class S3Downloader:
247
247
  )
248
248
 
249
249
  @classmethod
250
- def from_dict(cls, data: Dict[str, Any]) -> "S3Downloader":
250
+ def from_dict(cls, data: dict[str, Any]) -> "S3Downloader":
251
251
  """
252
252
  Deserializes the component from a dictionary.
253
253
  :param data:
@@ -1,5 +1,5 @@
1
1
  import json
2
- from typing import Any, Dict, List, Literal, Optional
2
+ from typing import Any, Optional
3
3
 
4
4
  from botocore.config import Config
5
5
  from botocore.exceptions import ClientError
@@ -16,14 +16,6 @@ from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
16
16
 
17
17
  logger = logging.getLogger(__name__)
18
18
 
19
- SUPPORTED_EMBEDDING_MODELS = [
20
- "amazon.titan-embed-text-v1",
21
- "cohere.embed-english-v3",
22
- "cohere.embed-multilingual-v3",
23
- "amazon.titan-embed-text-v2:0",
24
- "amazon.titan-embed-image-v1",
25
- ]
26
-
27
19
 
28
20
  @component
29
21
  class AmazonBedrockDocumentEmbedder:
@@ -57,13 +49,7 @@ class AmazonBedrockDocumentEmbedder:
57
49
 
58
50
  def __init__(
59
51
  self,
60
- model: Literal[
61
- "amazon.titan-embed-text-v1",
62
- "cohere.embed-english-v3",
63
- "cohere.embed-multilingual-v3",
64
- "amazon.titan-embed-text-v2:0",
65
- "amazon.titan-embed-image-v1",
66
- ],
52
+ model: str,
67
53
  aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
68
54
  aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
69
55
  "AWS_SECRET_ACCESS_KEY", strict=False
@@ -73,9 +59,9 @@ class AmazonBedrockDocumentEmbedder:
73
59
  aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
74
60
  batch_size: int = 32,
75
61
  progress_bar: bool = True,
76
- meta_fields_to_embed: Optional[List[str]] = None,
62
+ meta_fields_to_embed: Optional[list[str]] = None,
77
63
  embedding_separator: str = "\n",
78
- boto3_config: Optional[Dict[str, Any]] = None,
64
+ boto3_config: Optional[dict[str, Any]] = None,
79
65
  **kwargs: Any,
80
66
  ) -> None:
81
67
  """
@@ -88,8 +74,13 @@ class AmazonBedrockDocumentEmbedder:
88
74
  constructor. Aside from model, three required parameters are `aws_access_key_id`, `aws_secret_access_key`,
89
75
  and `aws_region_name`.
90
76
 
91
- :param model: The embedding model to use. The model has to be specified in the format outlined in the Amazon
92
- Bedrock [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html).
77
+ :param model: The embedding model to use.
78
+ Amazon Titan and Cohere embedding models are supported, for example:
79
+ "amazon.titan-embed-text-v1", "amazon.titan-embed-text-v2:0", "amazon.titan-embed-image-v1",
80
+ "cohere.embed-english-v3", "cohere.embed-multilingual-v3", "cohere.embed-v4:0".
81
+ To find all supported models, refer to the Amazon Bedrock
82
+ [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) and
83
+ filter for "embedding", then select models from the Amazon Titan and Cohere series.
93
84
  :param aws_access_key_id: AWS access key ID.
94
85
  :param aws_secret_access_key: AWS secret access key.
95
86
  :param aws_session_token: AWS session token.
@@ -107,11 +98,8 @@ class AmazonBedrockDocumentEmbedder:
107
98
  :raises ValueError: If the model is not supported.
108
99
  :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly.
109
100
  """
110
-
111
- if not model or model not in SUPPORTED_EMBEDDING_MODELS:
112
- msg = "Please provide a valid model from the list of supported models: " + ", ".join(
113
- SUPPORTED_EMBEDDING_MODELS
114
- )
101
+ if "titan" not in model and "cohere" not in model:
102
+ msg = f"Model {model} is not supported. Only Amazon Titan and Cohere embedding models are supported."
115
103
  raise ValueError(msg)
116
104
 
117
105
  self.model = model
@@ -149,7 +137,7 @@ class AmazonBedrockDocumentEmbedder:
149
137
  )
150
138
  raise AmazonBedrockConfigurationError(msg) from exception
151
139
 
152
- def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]:
140
+ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]:
153
141
  """
154
142
  Prepare the texts to embed by concatenating the Document text with the metadata fields to embed.
155
143
  """
@@ -162,7 +150,7 @@ class AmazonBedrockDocumentEmbedder:
162
150
  texts_to_embed.append(text_to_embed)
163
151
  return texts_to_embed
164
152
 
165
- def _embed_cohere(self, documents: List[Document]) -> List[Document]:
153
+ def _embed_cohere(self, documents: list[Document]) -> list[Document]:
166
154
  """
167
155
  Internal method to embed Documents using Cohere models.
168
156
  Batch inference is supported.
@@ -191,15 +179,19 @@ class AmazonBedrockDocumentEmbedder:
191
179
  msg = f"Could not perform inference for Amazon Bedrock model {self.model} due to:\n{exception}"
192
180
  raise AmazonBedrockInferenceError(msg) from exception
193
181
 
194
- response_body = json.loads(response.get("body").read())
195
- all_embeddings.extend(response_body["embeddings"])
182
+ cohere_embeddings = json.loads(response.get("body").read())["embeddings"]
183
+ # depending on the model, Cohere returns a dict with the embedding types as keys or a list of lists
184
+ embeddings_list = (
185
+ next(iter(cohere_embeddings.values())) if isinstance(cohere_embeddings, dict) else cohere_embeddings
186
+ )
187
+ all_embeddings.extend(embeddings_list)
196
188
 
197
189
  for doc, emb in zip(documents, all_embeddings):
198
190
  doc.embedding = emb
199
191
 
200
192
  return documents
201
193
 
202
- def _embed_titan(self, documents: List[Document]) -> List[Document]:
194
+ def _embed_titan(self, documents: list[Document]) -> list[Document]:
203
195
  """
204
196
  Internal method to embed Documents using Amazon Titan models.
205
197
  NOTE: Batch inference is not supported, so embeddings are created one by one.
@@ -227,8 +219,8 @@ class AmazonBedrockDocumentEmbedder:
227
219
 
228
220
  return documents
229
221
 
230
- @component.output_types(documents=List[Document])
231
- def run(self, documents: List[Document]) -> Dict[str, List[Document]]:
222
+ @component.output_types(documents=list[Document])
223
+ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
232
224
  """Embed the provided `Document`s using the specified model.
233
225
 
234
226
  :param documents: The `Document`s to embed.
@@ -248,12 +240,12 @@ class AmazonBedrockDocumentEmbedder:
248
240
  elif "titan" in self.model:
249
241
  documents_with_embeddings = self._embed_titan(documents=documents)
250
242
  else:
251
- msg = f"Model {self.model} is not supported. Supported models are: {', '.join(SUPPORTED_EMBEDDING_MODELS)}."
243
+ msg = f"Model {self.model} is not supported. Only Amazon Titan and Cohere embedding models are supported."
252
244
  raise ValueError(msg)
253
245
 
254
246
  return {"documents": documents_with_embeddings}
255
247
 
256
- def to_dict(self) -> Dict[str, Any]:
248
+ def to_dict(self) -> dict[str, Any]:
257
249
  """
258
250
  Serializes the component to a dictionary.
259
251
 
@@ -277,7 +269,7 @@ class AmazonBedrockDocumentEmbedder:
277
269
  )
278
270
 
279
271
  @classmethod
280
- def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockDocumentEmbedder":
272
+ def from_dict(cls, data: dict[str, Any]) -> "AmazonBedrockDocumentEmbedder":
281
273
  """
282
274
  Deserializes the component from a dictionary.
283
275
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  import json
6
6
  from dataclasses import replace
7
- from typing import Any, Dict, List, Literal, Optional, Tuple
7
+ from typing import Any, Optional
8
8
 
9
9
  from botocore.config import Config
10
10
  from botocore.exceptions import ClientError
@@ -27,8 +27,6 @@ from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
27
27
 
28
28
  logger = logging.getLogger(__name__)
29
29
 
30
- SUPPORTED_EMBEDDING_MODELS = ["amazon.titan-embed-image-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"]
31
-
32
30
 
33
31
  @component
34
32
  class AmazonBedrockDocumentImageEmbedder:
@@ -69,7 +67,7 @@ class AmazonBedrockDocumentImageEmbedder:
69
67
  def __init__(
70
68
  self,
71
69
  *,
72
- model: Literal["amazon.titan-embed-image-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"],
70
+ model: str,
73
71
  aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
74
72
  aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
75
73
  "AWS_SECRET_ACCESS_KEY", strict=False
@@ -79,20 +77,21 @@ class AmazonBedrockDocumentImageEmbedder:
79
77
  aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
80
78
  file_path_meta_field: str = "file_path",
81
79
  root_path: Optional[str] = None,
82
- image_size: Optional[Tuple[int, int]] = None,
80
+ image_size: Optional[tuple[int, int]] = None,
83
81
  progress_bar: bool = True,
84
- boto3_config: Optional[Dict[str, Any]] = None,
82
+ boto3_config: Optional[dict[str, Any]] = None,
85
83
  **kwargs: Any,
86
84
  ) -> None:
87
85
  """
88
86
  Creates a AmazonBedrockDocumentImageEmbedder component.
89
87
 
90
- :param model:
91
- The Bedrock model to use for calculating embeddings. Pass a valid model ID.
92
- Supported models:
93
- - "amazon.titan-embed-image-v1"
94
- - "cohere.embed-english-v3"
95
- - "cohere.embed-multilingual-v3"
88
+ :param model: The embedding model to use.
89
+ Amazon Titan and Cohere multimodal embedding models are supported, for example:
90
+ "amazon.titan-embed-image-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3",
91
+ "cohere.embed-v4:0".
92
+ To find all supported models, refer to the Amazon Bedrock
93
+ [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) and
94
+ filter for "embedding", then select multimodal models from the Amazon Titan and Cohere series.
96
95
  :param aws_access_key_id: AWS access key ID.
97
96
  :param aws_secret_access_key: AWS secret access key.
98
97
  :param aws_session_token: AWS session token.
@@ -114,9 +113,10 @@ class AmazonBedrockDocumentImageEmbedder:
114
113
  :raises ValueError: If the model is not supported.
115
114
  :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly.
116
115
  """
117
- if not model or model not in SUPPORTED_EMBEDDING_MODELS:
118
- msg = "Please provide a valid model from the list of supported models: " + ", ".join(
119
- SUPPORTED_EMBEDDING_MODELS
116
+ if "titan" not in model and "cohere" not in model:
117
+ msg = (
118
+ f"Model {model} is not supported. "
119
+ "Only Amazon Titan and Cohere multimodal embedding models are supported."
120
120
  )
121
121
  raise ValueError(msg)
122
122
 
@@ -135,14 +135,14 @@ class AmazonBedrockDocumentImageEmbedder:
135
135
  self.kwargs = kwargs
136
136
  self.embedding_types = None
137
137
 
138
- if emmbedding_types := self.kwargs.get("embedding_types"):
139
- if len(emmbedding_types) > 1:
138
+ if embedding_types := self.kwargs.get("embedding_types"):
139
+ if len(embedding_types) > 1:
140
140
  msg = (
141
141
  "You have provided multiple embedding_types for Cohere model. "
142
142
  "AmazonBedrockDocumentImageEmbedder only supports one embedding_type at a time."
143
143
  )
144
144
  raise ValueError(msg)
145
- self.embedding_types = emmbedding_types
145
+ self.embedding_types = embedding_types
146
146
 
147
147
  def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
148
148
  return secret.resolve_value() if secret else None
@@ -280,7 +280,10 @@ class AmazonBedrockDocumentImageEmbedder:
280
280
  elif "titan" in self.model:
281
281
  embeddings = self._embed_titan(images=images_to_embed)
282
282
  else:
283
- msg = f"Model {self.model} is not supported. Supported models are: {', '.join(SUPPORTED_EMBEDDING_MODELS)}."
283
+ msg = (
284
+ f"Model {self.model} is not supported. "
285
+ "Only Amazon Titan and Cohere multimodal embedding models are supported."
286
+ )
284
287
  raise ValueError(msg)
285
288
 
286
289
  docs_with_embeddings = []
@@ -296,7 +299,7 @@ class AmazonBedrockDocumentImageEmbedder:
296
299
 
297
300
  return {"documents": docs_with_embeddings}
298
301
 
299
- def _embed_titan(self, images: List[str]) -> List[List[float]]:
302
+ def _embed_titan(self, images: list[str]) -> list[list[float]]:
300
303
  """
301
304
  Internal method to embed base64 images using Amazon Titan models.
302
305
 
@@ -326,7 +329,7 @@ class AmazonBedrockDocumentImageEmbedder:
326
329
 
327
330
  return all_embeddings
328
331
 
329
- def _embed_cohere(self, image_uris: List[str]) -> List[List[float]]:
332
+ def _embed_cohere(self, image_uris: list[str]) -> list[list[float]]:
330
333
  """
331
334
  Internal method to embed base64 images using Cohere models.
332
335
 
@@ -351,15 +354,13 @@ class AmazonBedrockDocumentImageEmbedder:
351
354
  raise AmazonBedrockInferenceError(msg) from exception
352
355
 
353
356
  response_body = json.loads(response.get("body").read())
354
- embeddings = response_body["embeddings"]
357
+ cohere_embeddings = response_body["embeddings"]
355
358
 
356
- # if embedding_types is specified, cohere returns a dict with the embedding types as keys
357
- if isinstance(embeddings, dict):
358
- for embedding in embeddings.values():
359
- all_embeddings.append(embedding[0])
360
- else:
361
- # if embedding_types is not specified, cohere returns
362
- # a nested list of float embeddings
363
- all_embeddings.append(embeddings[0])
359
+ # depending on the model and embedding_types, Cohere returns a dict with the embedding types as keys
360
+ # or a list of lists
361
+ embeddings_list = (
362
+ next(iter(cohere_embeddings.values())) if isinstance(cohere_embeddings, dict) else cohere_embeddings
363
+ )
364
+ all_embeddings.extend(embeddings_list)
364
365
 
365
366
  return all_embeddings
@@ -1,5 +1,5 @@
1
1
  import json
2
- from typing import Any, Dict, List, Literal, Optional
2
+ from typing import Any, Optional
3
3
 
4
4
  from botocore.config import Config
5
5
  from botocore.exceptions import ClientError
@@ -14,14 +14,6 @@ from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
14
14
 
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
- SUPPORTED_EMBEDDING_MODELS = [
18
- "amazon.titan-embed-text-v1",
19
- "cohere.embed-english-v3",
20
- "cohere.embed-multilingual-v3",
21
- "amazon.titan-embed-text-v2:0",
22
- "amazon.titan-embed-image-v1",
23
- ]
24
-
25
17
 
26
18
  @component
27
19
  class AmazonBedrockTextEmbedder:
@@ -50,13 +42,7 @@ class AmazonBedrockTextEmbedder:
50
42
 
51
43
  def __init__(
52
44
  self,
53
- model: Literal[
54
- "amazon.titan-embed-text-v1",
55
- "cohere.embed-english-v3",
56
- "cohere.embed-multilingual-v3",
57
- "amazon.titan-embed-text-v2:0",
58
- "amazon.titan-embed-image-v1",
59
- ],
45
+ model: str,
60
46
  aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
61
47
  aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
62
48
  "AWS_SECRET_ACCESS_KEY", strict=False
@@ -64,7 +50,7 @@ class AmazonBedrockTextEmbedder:
64
50
  aws_session_token: Optional[Secret] = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
65
51
  aws_region_name: Optional[Secret] = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
66
52
  aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
67
- boto3_config: Optional[Dict[str, Any]] = None,
53
+ boto3_config: Optional[dict[str, Any]] = None,
68
54
  **kwargs: Any,
69
55
  ) -> None:
70
56
  """
@@ -77,8 +63,13 @@ class AmazonBedrockTextEmbedder:
77
63
  constructor. Aside from model, three required parameters are `aws_access_key_id`, `aws_secret_access_key`,
78
64
  and `aws_region_name`.
79
65
 
80
- :param model: The embedding model to use. The model has to be specified in the format outlined in the Amazon
81
- Bedrock [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html).
66
+ :param model: The embedding model to use.
67
+ Amazon Titan and Cohere embedding models are supported, for example:
68
+ "amazon.titan-embed-text-v1", "amazon.titan-embed-text-v2:0", "amazon.titan-embed-image-v1",
69
+ "cohere.embed-english-v3", "cohere.embed-multilingual-v3", "cohere.embed-v4:0".
70
+ To find all supported models, refer to the Amazon Bedrock
71
+ [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) and
72
+ filter for "embedding", then select models from the Amazon Titan and Cohere series.
82
73
  :param aws_access_key_id: AWS access key ID.
83
74
  :param aws_secret_access_key: AWS secret access key.
84
75
  :param aws_session_token: AWS session token.
@@ -90,10 +81,8 @@ class AmazonBedrockTextEmbedder:
90
81
  :raises ValueError: If the model is not supported.
91
82
  :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly.
92
83
  """
93
- if not model or model not in SUPPORTED_EMBEDDING_MODELS:
94
- msg = "Please provide a valid model from the list of supported models: " + ", ".join(
95
- SUPPORTED_EMBEDDING_MODELS
96
- )
84
+ if "titan" not in model and "cohere" not in model:
85
+ msg = f"Model {model} is not supported. Only Amazon Titan and Cohere embedding models are supported."
97
86
  raise ValueError(msg)
98
87
 
99
88
  self.model = model
@@ -127,8 +116,8 @@ class AmazonBedrockTextEmbedder:
127
116
  )
128
117
  raise AmazonBedrockConfigurationError(msg) from exception
129
118
 
130
- @component.output_types(embedding=List[float])
131
- def run(self, text: str) -> Dict[str, List[float]]:
119
+ @component.output_types(embedding=list[float])
120
+ def run(self, text: str) -> dict[str, list[float]]:
132
121
  """Embeds the input text using the Amazon Bedrock model.
133
122
 
134
123
  :param text: The input text to embed.
@@ -168,16 +157,21 @@ class AmazonBedrockTextEmbedder:
168
157
  response_body = json.loads(response.get("body").read())
169
158
 
170
159
  if "cohere" in self.model:
171
- embedding = response_body["embeddings"][0]
160
+ cohere_embeddings = response_body["embeddings"]
161
+ # depending on the model, Cohere returns a dict with the embedding types as keys or a list of lists
162
+ embeddings_list = (
163
+ next(iter(cohere_embeddings.values())) if isinstance(cohere_embeddings, dict) else cohere_embeddings
164
+ )
165
+ embedding = embeddings_list[0]
172
166
  elif "titan" in self.model:
173
167
  embedding = response_body["embedding"]
174
168
  else:
175
- msg = f"Unsupported model {self.model}. Supported models are: {', '.join(SUPPORTED_EMBEDDING_MODELS)}"
169
+ msg = f"Model {self.model} is not supported. Only Amazon Titan and Cohere embedding models are supported."
176
170
  raise ValueError(msg)
177
171
 
178
172
  return {"embedding": embedding}
179
173
 
180
- def to_dict(self) -> Dict[str, Any]:
174
+ def to_dict(self) -> dict[str, Any]:
181
175
  """
182
176
  Serializes the component to a dictionary.
183
177
 
@@ -197,7 +191,7 @@ class AmazonBedrockTextEmbedder:
197
191
  )
198
192
 
199
193
  @classmethod
200
- def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockTextEmbedder":
194
+ def from_dict(cls, data: dict[str, Any]) -> "AmazonBedrockTextEmbedder":
201
195
  """
202
196
  Deserializes the component from a dictionary.
203
197