amazon-bedrock-haystack 5.1.0__tar.gz → 5.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/CHANGELOG.md +12 -0
  2. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/PKG-INFO +2 -2
  3. amazon_bedrock_haystack-5.3.0/pydoc/config_docusaurus.yml +41 -0
  4. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/pyproject.toml +4 -4
  5. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/downloaders/s3/s3_downloader.py +10 -10
  6. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py +22 -16
  7. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/embedders/amazon_bedrock/document_image_embedder.py +28 -19
  8. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py +18 -11
  9. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py +33 -33
  10. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +36 -34
  11. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py +22 -22
  12. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/generators/amazon_bedrock/generator.py +10 -10
  13. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/rankers/amazon_bedrock/ranker.py +7 -7
  14. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_chat_generator.py +167 -3
  15. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_document_embedder.py +35 -5
  16. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_document_image_embedder.py +15 -7
  17. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_generator.py +5 -5
  18. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_s3_downloader.py +3 -3
  19. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_text_embedder.py +27 -2
  20. amazon_bedrock_haystack-5.1.0/pydoc/config.yml +0 -42
  21. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/.gitignore +0 -0
  22. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/LICENSE.txt +0 -0
  23. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/README.md +0 -0
  24. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/examples/bedrock_ranker_example.py +0 -0
  25. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/examples/chatgenerator_example.py +0 -0
  26. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/examples/embedders_generator_with_rag_example.py +0 -0
  27. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/examples/s3_downloader_example.py +0 -0
  28. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/common/amazon_bedrock/__init__.py +0 -0
  29. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/common/amazon_bedrock/errors.py +0 -0
  30. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/common/amazon_bedrock/utils.py +0 -0
  31. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/common/py.typed +0 -0
  32. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/common/s3/__init__.py +0 -0
  33. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/common/s3/errors.py +0 -0
  34. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/common/s3/utils.py +0 -0
  35. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/downloaders/py.typed +0 -0
  36. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/downloaders/s3/__init__.py +0 -0
  37. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/embedders/amazon_bedrock/__init__.py +0 -0
  38. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/embedders/py.typed +0 -0
  39. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/generators/amazon_bedrock/__init__.py +0 -0
  40. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py +0 -0
  41. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/generators/py.typed +0 -0
  42. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/rankers/amazon_bedrock/__init__.py +0 -0
  43. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/src/haystack_integrations/components/rankers/py.typed +0 -0
  44. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/__init__.py +0 -0
  45. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/conftest.py +0 -0
  46. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_chat_generator_utils.py +0 -0
  47. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_files/apple.jpg +0 -0
  48. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_files/haystack-logo.png +0 -0
  49. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_files/sample_pdf_1.pdf +0 -0
  50. {amazon_bedrock_haystack-5.1.0 → amazon_bedrock_haystack-5.3.0}/tests/test_ranker.py +0 -0
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/amazon_bedrock-v5.2.0] - 2025-10-22
4
+
5
+ ### 🚀 Features
6
+
7
+ - `AmazonBedrockChatGenerator` update tools param to ToolsType (#2415)
8
+
9
+ ## [integrations/amazon_bedrock-v5.1.0] - 2025-09-29
10
+
11
+ ### 🚀 Features
12
+
13
+ - S3Downloader - add `s3_key_generation_function` param to customize S3 key generation (#2343)
14
+
3
15
  ## [integrations/amazon_bedrock-v5.0.0] - 2025-09-22
4
16
 
5
17
  ### 🧹 Chores
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: amazon-bedrock-haystack
3
- Version: 5.1.0
3
+ Version: 5.3.0
4
4
  Summary: An integration of AWS S3 and Bedrock as a Downloader and Generator components.
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -21,7 +21,7 @@ Classifier: Programming Language :: Python :: Implementation :: PyPy
21
21
  Requires-Python: >=3.9
22
22
  Requires-Dist: aioboto3>=14.0.0
23
23
  Requires-Dist: boto3>=1.28.57
24
- Requires-Dist: haystack-ai>=2.17.1
24
+ Requires-Dist: haystack-ai>=2.19.0
25
25
  Description-Content-Type: text/markdown
26
26
 
27
27
  # amazon-bedrock-haystack
@@ -0,0 +1,41 @@
1
+ loaders:
2
+ - ignore_when_discovered:
3
+ - __init__
4
+ modules:
5
+ - haystack_integrations.common.amazon_bedrock.errors
6
+ - haystack_integrations.components.embedders.amazon_bedrock.document_embedder
7
+ - haystack_integrations.components.embedders.amazon_bedrock.text_embedder
8
+ - haystack_integrations.components.embedders.amazon_bedrock.document_image_embedder
9
+ - haystack_integrations.components.generators.amazon_bedrock.generator
10
+ - haystack_integrations.components.generators.amazon_bedrock.adapters
11
+ - haystack_integrations.common.amazon_bedrock.errors
12
+ - haystack_integrations.components.generators.amazon_bedrock.chat.chat_generator
13
+ - haystack_integrations.components.rankers.amazon_bedrock.ranker
14
+ - haystack_integrations.components.downloaders.s3.s3_downloader
15
+ - haystack_integrations.common.s3.utils
16
+ - haystack_integrations.common.s3.errors
17
+ search_path:
18
+ - ../src
19
+ type: haystack_pydoc_tools.loaders.CustomPythonLoader
20
+ processors:
21
+ - do_not_filter_modules: false
22
+ documented_only: true
23
+ expression: null
24
+ skip_empty_modules: true
25
+ type: filter
26
+ - expression: name not in ['BedrockRanker']
27
+ type: filter
28
+ - type: smart
29
+ - type: crossref
30
+ renderer:
31
+ description: Amazon Bedrock integration for Haystack
32
+ id: integrations-amazon-bedrock
33
+ markdown:
34
+ add_member_class_prefix: false
35
+ add_method_class_prefix: true
36
+ classdef_code_block: false
37
+ descriptive_class_title: false
38
+ descriptive_module_title: true
39
+ filename: amazon_bedrock.md
40
+ title: Amazon Bedrock
41
+ type: haystack_pydoc_tools.renderers.DocusaurusRenderer
@@ -23,7 +23,7 @@ classifiers = [
23
23
  "Programming Language :: Python :: Implementation :: CPython",
24
24
  "Programming Language :: Python :: Implementation :: PyPy",
25
25
  ]
26
- dependencies = ["haystack-ai>=2.17.1", "boto3>=1.28.57", "aioboto3>=14.0.0"]
26
+ dependencies = ["haystack-ai>=2.19.0", "boto3>=1.28.57", "aioboto3>=14.0.0"]
27
27
 
28
28
  [project.urls]
29
29
  Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme"
@@ -46,7 +46,7 @@ installer = "uv"
46
46
  dependencies = ["haystack-pydoc-tools", "ruff"]
47
47
 
48
48
  [tool.hatch.envs.default.scripts]
49
- docs = ["pydoc-markdown pydoc/config.yml"]
49
+ docs = ["pydoc-markdown pydoc/config_docusaurus.yml"]
50
50
  fmt = "ruff check --fix {args} && ruff format {args}"
51
51
  fmt-check = "ruff check {args} && ruff format --check {args}"
52
52
 
@@ -66,7 +66,7 @@ dependencies = [
66
66
  unit = 'pytest -m "not integration" {args:tests}'
67
67
  integration = 'pytest -m "integration" {args:tests}'
68
68
  all = 'pytest {args:tests}'
69
- cov-retry = 'all --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x'
69
+ cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}'
70
70
 
71
71
  types = """mypy -p haystack_integrations.common.amazon_bedrock \
72
72
  -p haystack_integrations.components.embedders.amazon_bedrock \
@@ -92,7 +92,7 @@ module = [
92
92
  ignore_missing_imports = true
93
93
 
94
94
  [tool.ruff]
95
- target-version = "py38"
95
+ target-version = "py39"
96
96
  line-length = 120
97
97
 
98
98
  [tool.ruff.lint]
@@ -5,7 +5,7 @@
5
5
  import os
6
6
  from concurrent.futures import ThreadPoolExecutor
7
7
  from pathlib import Path
8
- from typing import Any, Callable, Dict, List, Optional
8
+ from typing import Any, Callable, Optional
9
9
 
10
10
  from botocore.config import Config
11
11
  from haystack import component, default_from_dict, default_to_dict, logging
@@ -36,9 +36,9 @@ class S3Downloader:
36
36
  aws_session_token: Optional[Secret] = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
37
37
  aws_region_name: Optional[Secret] = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
38
38
  aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
39
- boto3_config: Optional[Dict[str, Any]] = None,
39
+ boto3_config: Optional[dict[str, Any]] = None,
40
40
  file_root_path: Optional[str] = None,
41
- file_extensions: Optional[List[str]] = None,
41
+ file_extensions: Optional[list[str]] = None,
42
42
  file_name_meta_key: str = "file_name",
43
43
  max_workers: int = 32,
44
44
  max_cache_size: int = 100,
@@ -126,11 +126,11 @@ class S3Downloader:
126
126
  self.file_root_path.mkdir(parents=True, exist_ok=True)
127
127
  self._storage = S3Storage.from_env(session=self._session, config=self._config)
128
128
 
129
- @component.output_types(documents=List[Document])
129
+ @component.output_types(documents=list[Document])
130
130
  def run(
131
131
  self,
132
- documents: List[Document],
133
- ) -> Dict[str, List[Document]]:
132
+ documents: list[Document],
133
+ ) -> dict[str, list[Document]]:
134
134
  """Download files from AWS S3 Buckets to local filesystem.
135
135
 
136
136
  Return enriched `Document`s with the path of the downloaded file.
@@ -160,7 +160,7 @@ class S3Downloader:
160
160
  downloaded_documents = [d for d in iterable if d is not None]
161
161
  return {"documents": downloaded_documents}
162
162
 
163
- def _filter_documents_by_extensions(self, documents: List[Document]) -> List[Document]:
163
+ def _filter_documents_by_extensions(self, documents: list[Document]) -> list[Document]:
164
164
  """Filter documents by file extensions."""
165
165
  if not self.file_extensions:
166
166
  return documents
@@ -202,7 +202,7 @@ class S3Downloader:
202
202
  document.meta["file_path"] = str(file_path)
203
203
  return document
204
204
 
205
- def _cleanup_cache(self, documents: List[Document]) -> None:
205
+ def _cleanup_cache(self, documents: list[Document]) -> None:
206
206
  """
207
207
  Remove least-recently-accessed cache files when cache exceeds `max_cache_size`.
208
208
 
@@ -224,7 +224,7 @@ class S3Downloader:
224
224
  except Exception as error:
225
225
  logger.warning("Failed to remove cache file at {path} with error: {e}", path=p, e=error)
226
226
 
227
- def to_dict(self) -> Dict[str, Any]:
227
+ def to_dict(self) -> dict[str, Any]:
228
228
  """Serialize the component to a dictionary."""
229
229
 
230
230
  s3_key_generation_function_name = (
@@ -247,7 +247,7 @@ class S3Downloader:
247
247
  )
248
248
 
249
249
  @classmethod
250
- def from_dict(cls, data: Dict[str, Any]) -> "S3Downloader":
250
+ def from_dict(cls, data: dict[str, Any]) -> "S3Downloader":
251
251
  """
252
252
  Deserializes the component from a dictionary.
253
253
  :param data:
@@ -1,5 +1,5 @@
1
1
  import json
2
- from typing import Any, Dict, List, Literal, Optional
2
+ from typing import Any, Literal, Optional
3
3
 
4
4
  from botocore.config import Config
5
5
  from botocore.exceptions import ClientError
@@ -18,10 +18,11 @@ logger = logging.getLogger(__name__)
18
18
 
19
19
  SUPPORTED_EMBEDDING_MODELS = [
20
20
  "amazon.titan-embed-text-v1",
21
- "cohere.embed-english-v3",
22
- "cohere.embed-multilingual-v3",
23
21
  "amazon.titan-embed-text-v2:0",
24
22
  "amazon.titan-embed-image-v1",
23
+ "cohere.embed-english-v3",
24
+ "cohere.embed-multilingual-v3",
25
+ "cohere.embed-v4:0",
25
26
  ]
26
27
 
27
28
 
@@ -59,10 +60,11 @@ class AmazonBedrockDocumentEmbedder:
59
60
  self,
60
61
  model: Literal[
61
62
  "amazon.titan-embed-text-v1",
62
- "cohere.embed-english-v3",
63
- "cohere.embed-multilingual-v3",
64
63
  "amazon.titan-embed-text-v2:0",
65
64
  "amazon.titan-embed-image-v1",
65
+ "cohere.embed-english-v3",
66
+ "cohere.embed-multilingual-v3",
67
+ "cohere.embed-v4:0",
66
68
  ],
67
69
  aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
68
70
  aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
@@ -73,9 +75,9 @@ class AmazonBedrockDocumentEmbedder:
73
75
  aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
74
76
  batch_size: int = 32,
75
77
  progress_bar: bool = True,
76
- meta_fields_to_embed: Optional[List[str]] = None,
78
+ meta_fields_to_embed: Optional[list[str]] = None,
77
79
  embedding_separator: str = "\n",
78
- boto3_config: Optional[Dict[str, Any]] = None,
80
+ boto3_config: Optional[dict[str, Any]] = None,
79
81
  **kwargs: Any,
80
82
  ) -> None:
81
83
  """
@@ -149,7 +151,7 @@ class AmazonBedrockDocumentEmbedder:
149
151
  )
150
152
  raise AmazonBedrockConfigurationError(msg) from exception
151
153
 
152
- def _prepare_texts_to_embed(self, documents: List[Document]) -> List[str]:
154
+ def _prepare_texts_to_embed(self, documents: list[Document]) -> list[str]:
153
155
  """
154
156
  Prepare the texts to embed by concatenating the Document text with the metadata fields to embed.
155
157
  """
@@ -162,7 +164,7 @@ class AmazonBedrockDocumentEmbedder:
162
164
  texts_to_embed.append(text_to_embed)
163
165
  return texts_to_embed
164
166
 
165
- def _embed_cohere(self, documents: List[Document]) -> List[Document]:
167
+ def _embed_cohere(self, documents: list[Document]) -> list[Document]:
166
168
  """
167
169
  Internal method to embed Documents using Cohere models.
168
170
  Batch inference is supported.
@@ -191,15 +193,19 @@ class AmazonBedrockDocumentEmbedder:
191
193
  msg = f"Could not perform inference for Amazon Bedrock model {self.model} due to:\n{exception}"
192
194
  raise AmazonBedrockInferenceError(msg) from exception
193
195
 
194
- response_body = json.loads(response.get("body").read())
195
- all_embeddings.extend(response_body["embeddings"])
196
+ cohere_embeddings = json.loads(response.get("body").read())["embeddings"]
197
+ # depending on the model, Cohere returns a dict with the embedding types as keys or a list of lists
198
+ embeddings_list = (
199
+ next(iter(cohere_embeddings.values())) if isinstance(cohere_embeddings, dict) else cohere_embeddings
200
+ )
201
+ all_embeddings.extend(embeddings_list)
196
202
 
197
203
  for doc, emb in zip(documents, all_embeddings):
198
204
  doc.embedding = emb
199
205
 
200
206
  return documents
201
207
 
202
- def _embed_titan(self, documents: List[Document]) -> List[Document]:
208
+ def _embed_titan(self, documents: list[Document]) -> list[Document]:
203
209
  """
204
210
  Internal method to embed Documents using Amazon Titan models.
205
211
  NOTE: Batch inference is not supported, so embeddings are created one by one.
@@ -227,8 +233,8 @@ class AmazonBedrockDocumentEmbedder:
227
233
 
228
234
  return documents
229
235
 
230
- @component.output_types(documents=List[Document])
231
- def run(self, documents: List[Document]) -> Dict[str, List[Document]]:
236
+ @component.output_types(documents=list[Document])
237
+ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
232
238
  """Embed the provided `Document`s using the specified model.
233
239
 
234
240
  :param documents: The `Document`s to embed.
@@ -253,7 +259,7 @@ class AmazonBedrockDocumentEmbedder:
253
259
 
254
260
  return {"documents": documents_with_embeddings}
255
261
 
256
- def to_dict(self) -> Dict[str, Any]:
262
+ def to_dict(self) -> dict[str, Any]:
257
263
  """
258
264
  Serializes the component to a dictionary.
259
265
 
@@ -277,7 +283,7 @@ class AmazonBedrockDocumentEmbedder:
277
283
  )
278
284
 
279
285
  @classmethod
280
- def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockDocumentEmbedder":
286
+ def from_dict(cls, data: dict[str, Any]) -> "AmazonBedrockDocumentEmbedder":
281
287
  """
282
288
  Deserializes the component from a dictionary.
283
289
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  import json
6
6
  from dataclasses import replace
7
- from typing import Any, Dict, List, Literal, Optional, Tuple
7
+ from typing import Any, Literal, Optional
8
8
 
9
9
  from botocore.config import Config
10
10
  from botocore.exceptions import ClientError
@@ -27,7 +27,12 @@ from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
27
27
 
28
28
  logger = logging.getLogger(__name__)
29
29
 
30
- SUPPORTED_EMBEDDING_MODELS = ["amazon.titan-embed-image-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"]
30
+ SUPPORTED_EMBEDDING_MODELS = [
31
+ "amazon.titan-embed-image-v1",
32
+ "cohere.embed-english-v3",
33
+ "cohere.embed-multilingual-v3",
34
+ "cohere.embed-v4:0",
35
+ ]
31
36
 
32
37
 
33
38
  @component
@@ -69,7 +74,12 @@ class AmazonBedrockDocumentImageEmbedder:
69
74
  def __init__(
70
75
  self,
71
76
  *,
72
- model: Literal["amazon.titan-embed-image-v1", "cohere.embed-english-v3", "cohere.embed-multilingual-v3"],
77
+ model: Literal[
78
+ "amazon.titan-embed-image-v1",
79
+ "cohere.embed-english-v3",
80
+ "cohere.embed-multilingual-v3",
81
+ "cohere.embed-v4:0",
82
+ ],
73
83
  aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
74
84
  aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
75
85
  "AWS_SECRET_ACCESS_KEY", strict=False
@@ -79,9 +89,9 @@ class AmazonBedrockDocumentImageEmbedder:
79
89
  aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
80
90
  file_path_meta_field: str = "file_path",
81
91
  root_path: Optional[str] = None,
82
- image_size: Optional[Tuple[int, int]] = None,
92
+ image_size: Optional[tuple[int, int]] = None,
83
93
  progress_bar: bool = True,
84
- boto3_config: Optional[Dict[str, Any]] = None,
94
+ boto3_config: Optional[dict[str, Any]] = None,
85
95
  **kwargs: Any,
86
96
  ) -> None:
87
97
  """
@@ -93,6 +103,7 @@ class AmazonBedrockDocumentImageEmbedder:
93
103
  - "amazon.titan-embed-image-v1"
94
104
  - "cohere.embed-english-v3"
95
105
  - "cohere.embed-multilingual-v3"
106
+ - "cohere.embed-v4:0"
96
107
  :param aws_access_key_id: AWS access key ID.
97
108
  :param aws_secret_access_key: AWS secret access key.
98
109
  :param aws_session_token: AWS session token.
@@ -135,14 +146,14 @@ class AmazonBedrockDocumentImageEmbedder:
135
146
  self.kwargs = kwargs
136
147
  self.embedding_types = None
137
148
 
138
- if emmbedding_types := self.kwargs.get("embedding_types"):
139
- if len(emmbedding_types) > 1:
149
+ if embedding_types := self.kwargs.get("embedding_types"):
150
+ if len(embedding_types) > 1:
140
151
  msg = (
141
152
  "You have provided multiple embedding_types for Cohere model. "
142
153
  "AmazonBedrockDocumentImageEmbedder only supports one embedding_type at a time."
143
154
  )
144
155
  raise ValueError(msg)
145
- self.embedding_types = emmbedding_types
156
+ self.embedding_types = embedding_types
146
157
 
147
158
  def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
148
159
  return secret.resolve_value() if secret else None
@@ -296,7 +307,7 @@ class AmazonBedrockDocumentImageEmbedder:
296
307
 
297
308
  return {"documents": docs_with_embeddings}
298
309
 
299
- def _embed_titan(self, images: List[str]) -> List[List[float]]:
310
+ def _embed_titan(self, images: list[str]) -> list[list[float]]:
300
311
  """
301
312
  Internal method to embed base64 images using Amazon Titan models.
302
313
 
@@ -326,7 +337,7 @@ class AmazonBedrockDocumentImageEmbedder:
326
337
 
327
338
  return all_embeddings
328
339
 
329
- def _embed_cohere(self, image_uris: List[str]) -> List[List[float]]:
340
+ def _embed_cohere(self, image_uris: list[str]) -> list[list[float]]:
330
341
  """
331
342
  Internal method to embed base64 images using Cohere models.
332
343
 
@@ -351,15 +362,13 @@ class AmazonBedrockDocumentImageEmbedder:
351
362
  raise AmazonBedrockInferenceError(msg) from exception
352
363
 
353
364
  response_body = json.loads(response.get("body").read())
354
- embeddings = response_body["embeddings"]
365
+ cohere_embeddings = response_body["embeddings"]
355
366
 
356
- # if embedding_types is specified, cohere returns a dict with the embedding types as keys
357
- if isinstance(embeddings, dict):
358
- for embedding in embeddings.values():
359
- all_embeddings.append(embedding[0])
360
- else:
361
- # if embedding_types is not specified, cohere returns
362
- # a nested list of float embeddings
363
- all_embeddings.append(embeddings[0])
367
+ # depending on the model and embedding_types, Cohere returns a dict with the embedding types as keys
368
+ # or a list of lists
369
+ embeddings_list = (
370
+ next(iter(cohere_embeddings.values())) if isinstance(cohere_embeddings, dict) else cohere_embeddings
371
+ )
372
+ all_embeddings.extend(embeddings_list)
364
373
 
365
374
  return all_embeddings
@@ -1,5 +1,5 @@
1
1
  import json
2
- from typing import Any, Dict, List, Literal, Optional
2
+ from typing import Any, Literal, Optional
3
3
 
4
4
  from botocore.config import Config
5
5
  from botocore.exceptions import ClientError
@@ -16,10 +16,11 @@ logger = logging.getLogger(__name__)
16
16
 
17
17
  SUPPORTED_EMBEDDING_MODELS = [
18
18
  "amazon.titan-embed-text-v1",
19
- "cohere.embed-english-v3",
20
- "cohere.embed-multilingual-v3",
21
19
  "amazon.titan-embed-text-v2:0",
22
20
  "amazon.titan-embed-image-v1",
21
+ "cohere.embed-english-v3",
22
+ "cohere.embed-multilingual-v3",
23
+ "cohere.embed-v4:0",
23
24
  ]
24
25
 
25
26
 
@@ -52,10 +53,11 @@ class AmazonBedrockTextEmbedder:
52
53
  self,
53
54
  model: Literal[
54
55
  "amazon.titan-embed-text-v1",
55
- "cohere.embed-english-v3",
56
- "cohere.embed-multilingual-v3",
57
56
  "amazon.titan-embed-text-v2:0",
58
57
  "amazon.titan-embed-image-v1",
58
+ "cohere.embed-english-v3",
59
+ "cohere.embed-multilingual-v3",
60
+ "cohere.embed-v4:0",
59
61
  ],
60
62
  aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
61
63
  aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
@@ -64,7 +66,7 @@ class AmazonBedrockTextEmbedder:
64
66
  aws_session_token: Optional[Secret] = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
65
67
  aws_region_name: Optional[Secret] = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
66
68
  aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
67
- boto3_config: Optional[Dict[str, Any]] = None,
69
+ boto3_config: Optional[dict[str, Any]] = None,
68
70
  **kwargs: Any,
69
71
  ) -> None:
70
72
  """
@@ -127,8 +129,8 @@ class AmazonBedrockTextEmbedder:
127
129
  )
128
130
  raise AmazonBedrockConfigurationError(msg) from exception
129
131
 
130
- @component.output_types(embedding=List[float])
131
- def run(self, text: str) -> Dict[str, List[float]]:
132
+ @component.output_types(embedding=list[float])
133
+ def run(self, text: str) -> dict[str, list[float]]:
132
134
  """Embeds the input text using the Amazon Bedrock model.
133
135
 
134
136
  :param text: The input text to embed.
@@ -168,7 +170,12 @@ class AmazonBedrockTextEmbedder:
168
170
  response_body = json.loads(response.get("body").read())
169
171
 
170
172
  if "cohere" in self.model:
171
- embedding = response_body["embeddings"][0]
173
+ cohere_embeddings = response_body["embeddings"]
174
+ # depending on the model, Cohere returns a dict with the embedding types as keys or a list of lists
175
+ embeddings_list = (
176
+ next(iter(cohere_embeddings.values())) if isinstance(cohere_embeddings, dict) else cohere_embeddings
177
+ )
178
+ embedding = embeddings_list[0]
172
179
  elif "titan" in self.model:
173
180
  embedding = response_body["embedding"]
174
181
  else:
@@ -177,7 +184,7 @@ class AmazonBedrockTextEmbedder:
177
184
 
178
185
  return {"embedding": embedding}
179
186
 
180
- def to_dict(self) -> Dict[str, Any]:
187
+ def to_dict(self) -> dict[str, Any]:
181
188
  """
182
189
  Serializes the component to a dictionary.
183
190
 
@@ -197,7 +204,7 @@ class AmazonBedrockTextEmbedder:
197
204
  )
198
205
 
199
206
  @classmethod
200
- def from_dict(cls, data: Dict[str, Any]) -> "AmazonBedrockTextEmbedder":
207
+ def from_dict(cls, data: dict[str, Any]) -> "AmazonBedrockTextEmbedder":
201
208
  """
202
209
  Deserializes the component from a dictionary.
203
210