amazon-bedrock-haystack 5.0.0__tar.gz → 5.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/CHANGELOG.md +6 -0
  2. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/PKG-INFO +1 -1
  3. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/downloaders/s3/s3_downloader.py +24 -3
  4. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_s3_downloader.py +72 -6
  5. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/.gitignore +0 -0
  6. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/LICENSE.txt +0 -0
  7. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/README.md +0 -0
  8. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/examples/bedrock_ranker_example.py +0 -0
  9. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/examples/chatgenerator_example.py +0 -0
  10. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/examples/embedders_generator_with_rag_example.py +0 -0
  11. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/examples/s3_downloader_example.py +0 -0
  12. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/pydoc/config.yml +0 -0
  13. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/pyproject.toml +0 -0
  14. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/common/amazon_bedrock/__init__.py +0 -0
  15. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/common/amazon_bedrock/errors.py +0 -0
  16. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/common/amazon_bedrock/utils.py +0 -0
  17. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/common/py.typed +0 -0
  18. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/common/s3/__init__.py +0 -0
  19. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/common/s3/errors.py +0 -0
  20. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/common/s3/utils.py +0 -0
  21. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/downloaders/py.typed +0 -0
  22. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/downloaders/s3/__init__.py +0 -0
  23. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/embedders/amazon_bedrock/__init__.py +0 -0
  24. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py +0 -0
  25. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/embedders/amazon_bedrock/document_image_embedder.py +0 -0
  26. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py +0 -0
  27. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/embedders/py.typed +0 -0
  28. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/__init__.py +0 -0
  29. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py +0 -0
  30. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py +0 -0
  31. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +0 -0
  32. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py +0 -0
  33. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/generator.py +0 -0
  34. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/generators/py.typed +0 -0
  35. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/rankers/amazon_bedrock/__init__.py +0 -0
  36. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/rankers/amazon_bedrock/ranker.py +0 -0
  37. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/src/haystack_integrations/components/rankers/py.typed +0 -0
  38. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/__init__.py +0 -0
  39. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/conftest.py +0 -0
  40. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_chat_generator.py +0 -0
  41. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_chat_generator_utils.py +0 -0
  42. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_document_embedder.py +0 -0
  43. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_document_image_embedder.py +0 -0
  44. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_files/apple.jpg +0 -0
  45. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_files/haystack-logo.png +0 -0
  46. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_files/sample_pdf_1.pdf +0 -0
  47. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_generator.py +0 -0
  48. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_ranker.py +0 -0
  49. {amazon_bedrock_haystack-5.0.0 → amazon_bedrock_haystack-5.1.0}/tests/test_text_embedder.py +0 -0
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## [integrations/amazon_bedrock-v5.0.0] - 2025-09-22
4
+
5
+ ### 🧹 Chores
6
+
7
+ - [**breaking**] Remove deprecated `BedrockRanker` (use `AmazonBedrockRanker` instead) (#2287)
8
+
3
9
  ## [integrations/amazon_bedrock-v4.2.0] - 2025-09-19
4
10
 
5
11
  ### 🚀 Features
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: amazon-bedrock-haystack
3
- Version: 5.0.0
3
+ Version: 5.1.0
4
4
  Summary: An integration of AWS S3 and Bedrock as a Downloader and Generator components.
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -5,12 +5,13 @@
5
5
  import os
6
6
  from concurrent.futures import ThreadPoolExecutor
7
7
  from pathlib import Path
8
- from typing import Any, Dict, List, Optional
8
+ from typing import Any, Callable, Dict, List, Optional
9
9
 
10
10
  from botocore.config import Config
11
11
  from haystack import component, default_from_dict, default_to_dict, logging
12
12
  from haystack.dataclasses import Document
13
13
  from haystack.utils.auth import Secret, deserialize_secrets_inplace
14
+ from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
14
15
 
15
16
  from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
16
17
  from haystack_integrations.common.s3.utils import S3Storage
@@ -41,6 +42,7 @@ class S3Downloader:
41
42
  file_name_meta_key: str = "file_name",
42
43
  max_workers: int = 32,
43
44
  max_cache_size: int = 100,
45
+ s3_key_generation_function: Optional[Callable[[Document], str]] = None,
44
46
  ) -> None:
45
47
  """
46
48
  Initializes the `S3Downloader` with the provided parameters.
@@ -64,9 +66,15 @@ class S3Downloader:
64
66
  By default, all file extensions are allowed.
65
67
  :param max_workers: The maximum number of workers to use for concurrent downloads.
66
68
  :param max_cache_size: The maximum number of files to cache.
67
- :param file_name_meta_key: The name of the meta key that contains the file name to download.
69
+ :param file_name_meta_key: The name of the meta key that contains the file name to download. The file name
70
+ will also be used to create local file path for download.
68
71
  By default, the `Document.meta["file_name"]` is used. If you want to use a
69
72
  different key in `Document.meta`, you can set it here.
73
+ :param s3_key_generation_function: An optional function that generates the S3 key for the file to download.
74
+ If not provided, the default behavior is to use `Document.meta[file_name_meta_key]`.
75
+ The function must accept a `Document` object and return a string.
76
+ If the environment variable `S3_DOWNLOADER_PREFIX` is set, its value will be automatically
77
+ prefixed to the generated S3 key.
70
78
  :raises ValueError: If the `file_root_path` is not set through
71
79
  the constructor or the `FILE_ROOT_PATH` environment variable.
72
80
 
@@ -94,6 +102,7 @@ class S3Downloader:
94
102
  self.max_workers = max_workers
95
103
  self.max_cache_size = max_cache_size
96
104
  self.file_name_meta_key = file_name_meta_key
105
+ self.s3_key_generation_function = s3_key_generation_function
97
106
 
98
107
  self._storage: Optional[S3Storage] = None
99
108
 
@@ -186,8 +195,9 @@ class S3Downloader:
186
195
  file_path.touch()
187
196
 
188
197
  else:
198
+ s3_key = self.s3_key_generation_function(document) if self.s3_key_generation_function else file_name
189
199
  # we know that _storage is not None after warm_up() is called, but mypy does not know that
190
- self._storage.download(key=file_name, local_file_path=file_path) # type: ignore[union-attr]
200
+ self._storage.download(key=s3_key, local_file_path=file_path) # type: ignore[union-attr]
191
201
 
192
202
  document.meta["file_path"] = str(file_path)
193
203
  return document
@@ -216,6 +226,11 @@ class S3Downloader:
216
226
 
217
227
  def to_dict(self) -> Dict[str, Any]:
218
228
  """Serialize the component to a dictionary."""
229
+
230
+ s3_key_generation_function_name = (
231
+ serialize_callable(self.s3_key_generation_function) if self.s3_key_generation_function else None
232
+ )
233
+
219
234
  return default_to_dict(
220
235
  self,
221
236
  aws_access_key_id=self.aws_access_key_id.to_dict() if self.aws_access_key_id else None,
@@ -228,6 +243,7 @@ class S3Downloader:
228
243
  max_cache_size=self.max_cache_size,
229
244
  file_extensions=self.file_extensions,
230
245
  file_name_meta_key=self.file_name_meta_key,
246
+ s3_key_generation_function=s3_key_generation_function_name,
231
247
  )
232
248
 
233
249
  @classmethod
@@ -239,6 +255,11 @@ class S3Downloader:
239
255
  :returns:
240
256
  Deserialized component.
241
257
  """
258
+ s3_key_generation_function_name = data["init_parameters"].get("s3_key_generation_function")
259
+ if s3_key_generation_function_name:
260
+ data["init_parameters"]["s3_key_generation_function"] = deserialize_callable(
261
+ s3_key_generation_function_name
262
+ )
242
263
  deserialize_secrets_inplace(
243
264
  data["init_parameters"],
244
265
  ["aws_access_key_id", "aws_secret_access_key", "aws_session_token", "aws_region_name", "aws_profile_name"],
@@ -30,6 +30,10 @@ def mock_s3_storage():
30
30
  yield mock
31
31
 
32
32
 
33
+ def s3_key_generation_function(document: Document) -> str:
34
+ return document.meta["file_name"] + "_suffix"
35
+
36
+
33
37
  class TestS3Downloader:
34
38
  def test_init(self, mock_boto3_session, set_env_variables, tmp_path):
35
39
  S3Downloader(file_root_path=str(tmp_path))
@@ -74,6 +78,7 @@ class TestS3Downloader:
74
78
  "max_cache_size": 100,
75
79
  "max_workers": 32,
76
80
  "file_name_meta_key": "file_name",
81
+ "s3_key_generation_function": None,
77
82
  },
78
83
  }
79
84
  assert d.to_dict() == expected
@@ -89,6 +94,7 @@ class TestS3Downloader:
89
94
  "aws_session_token": {"type": "env_var", "env_vars": ["AWS_SESSION_TOKEN"], "strict": False},
90
95
  "aws_profile_name": {"type": "env_var", "env_vars": ["AWS_PROFILE"], "strict": False},
91
96
  "file_root_path": str(tmp_path),
97
+ "s3_key_generation_function": None,
92
98
  },
93
99
  }
94
100
  d = S3Downloader.from_dict(data)
@@ -101,6 +107,7 @@ class TestS3Downloader:
101
107
  max_cache_size=400,
102
108
  max_workers=40,
103
109
  file_name_meta_key="new_file_key",
110
+ s3_key_generation_function=s3_key_generation_function,
104
111
  )
105
112
  expected = {
106
113
  "type": TYPE,
@@ -115,6 +122,7 @@ class TestS3Downloader:
115
122
  "max_cache_size": 400,
116
123
  "max_workers": 40,
117
124
  "file_name_meta_key": "new_file_key",
125
+ "s3_key_generation_function": "tests.test_s3_downloader.s3_key_generation_function",
118
126
  },
119
127
  }
120
128
  assert d.to_dict() == expected
@@ -158,14 +166,50 @@ class TestS3Downloader:
158
166
  assert len(out["documents"]) == 1
159
167
  assert out["documents"][0].meta["custom_file_key"] == "a.txt"
160
168
 
169
+ def test_run_with_s3_key_generation_function(self, tmp_path, mock_s3_storage, mock_boto3_session):
170
+ d = S3Downloader(file_root_path=str(tmp_path), s3_key_generation_function=s3_key_generation_function)
171
+ S3Downloader.warm_up(d)
172
+ d._storage = mock_s3_storage
173
+
174
+ docs = [
175
+ Document(meta={"file_id": str(uuid4()), "file_name": "a.txt"}),
176
+ ]
177
+ out = d.run(documents=docs)
178
+ assert len(out["documents"]) == 1
179
+ assert out["documents"][0].meta["file_name"] == "a.txt"
180
+
181
+ mock_s3_storage.download.assert_called_once()
182
+ assert mock_s3_storage.download.call_args.kwargs["key"] == "a.txt_suffix"
183
+
184
+ def test_run_with_s3_key_generation_function_and_file_extensions(
185
+ self, tmp_path, mock_s3_storage, mock_boto3_session
186
+ ):
187
+ d = S3Downloader(
188
+ file_root_path=str(tmp_path),
189
+ s3_key_generation_function=s3_key_generation_function,
190
+ file_extensions=[".txt"],
191
+ )
192
+ S3Downloader.warm_up(d)
193
+ d._storage = mock_s3_storage
194
+
195
+ docs = [
196
+ Document(meta={"file_id": str(uuid4()), "file_name": "a.txt"}),
197
+ Document(meta={"file_id": str(uuid4()), "file_name": "b.pdf"}),
198
+ ]
199
+ out = d.run(documents=docs)
200
+ assert len(out["documents"]) == 1
201
+ assert out["documents"][0].meta["file_name"] == "a.txt"
202
+ mock_s3_storage.download.assert_called_once()
203
+ assert mock_s3_storage.download.call_args.kwargs["key"] == "a.txt_suffix"
204
+
161
205
  @pytest.mark.integration
162
206
  @pytest.mark.skipif(
163
207
  not os.environ.get("S3_DOWNLOADER_BUCKET", None),
164
208
  reason="Export an env var called `S3_DOWNLOADER_BUCKET` containing the S3 bucket to run this test.",
165
209
  )
166
- def test_live_run(self, tmp_path):
210
+ def test_live_run(self, tmp_path, monkeypatch):
167
211
  d = S3Downloader(file_root_path=str(tmp_path))
168
- os.environ["S3_DOWNLOADER_PREFIX"] = ""
212
+ monkeypatch.setenv("S3_DOWNLOADER_PREFIX", "")
169
213
  S3Downloader.warm_up(d)
170
214
 
171
215
  docs = [
@@ -200,9 +244,9 @@ class TestS3Downloader:
200
244
  not os.environ.get("S3_DOWNLOADER_BUCKET", None),
201
245
  reason="Export an env var called `S3_DOWNLOADER_BUCKET` containing the S3 bucket to run this test.",
202
246
  )
203
- def test_live_run_with_custom_meta_key(self, tmp_path):
247
+ def test_live_run_with_custom_meta_key(self, tmp_path, monkeypatch):
204
248
  d = S3Downloader(file_root_path=str(tmp_path), file_name_meta_key="custom_name")
205
- os.environ["S3_DOWNLOADER_PREFIX"] = ""
249
+ monkeypatch.setenv("S3_DOWNLOADER_PREFIX", "")
206
250
  S3Downloader.warm_up(d)
207
251
  docs = [
208
252
  Document(meta={"custom_name": "text-sample.txt"}),
@@ -216,9 +260,9 @@ class TestS3Downloader:
216
260
  not os.environ.get("S3_DOWNLOADER_BUCKET", None),
217
261
  reason="Export an env var called `S3_DOWNLOADER_BUCKET` containing the S3 bucket to run this test.",
218
262
  )
219
- def test_live_run_with_prefix(self, tmp_path):
263
+ def test_live_run_with_prefix(self, tmp_path, monkeypatch):
220
264
  d = S3Downloader(file_root_path=str(tmp_path))
221
- os.environ["S3_DOWNLOADER_PREFIX"] = "subfolder/"
265
+ monkeypatch.setenv("S3_DOWNLOADER_PREFIX", "subfolder/")
222
266
 
223
267
  S3Downloader.warm_up(d)
224
268
  docs = [
@@ -227,3 +271,25 @@ class TestS3Downloader:
227
271
  out = d.run(documents=docs)
228
272
  assert len(out["documents"]) == 1
229
273
  assert out["documents"][0].meta["file_name"] == "employees.json"
274
+
275
+ @pytest.mark.integration
276
+ @pytest.mark.skipif(
277
+ not os.environ.get("S3_DOWNLOADER_BUCKET", None),
278
+ reason="Export an env var called `S3_DOWNLOADER_BUCKET` containing the S3 bucket to run this test.",
279
+ )
280
+ def test_live_run_with_s3_key_generation_function_and_file_extensions(self, tmp_path):
281
+ # the file in the s3 bucket has this key: "dog.jpg_suffix"
282
+
283
+ d = S3Downloader(
284
+ file_root_path=str(tmp_path),
285
+ file_extensions=[".jpg"],
286
+ file_name_meta_key="file_name",
287
+ s3_key_generation_function=s3_key_generation_function,
288
+ )
289
+ S3Downloader.warm_up(d)
290
+ docs = [
291
+ Document(meta={"file_name": "dog.jpg"}),
292
+ ]
293
+ out = d.run(documents=docs)
294
+ assert len(out["documents"]) == 1
295
+ assert out["documents"][0].meta["file_name"] == "dog.jpg"