amazon-bedrock-haystack 5.0.0__py3-none-any.whl → 5.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: amazon-bedrock-haystack
3
- Version: 5.0.0
3
+ Version: 5.1.0
4
4
  Summary: An integration of AWS S3 and Bedrock as a Downloader and Generator components.
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
@@ -7,7 +7,7 @@ haystack_integrations/common/s3/errors.py,sha256=BrTDLdhQvAuQutyg35cFyP5h8PNkDEi
7
7
  haystack_integrations/common/s3/utils.py,sha256=OJupFj54aQmg6S8VuVq6Lc2qpFZyyJajRVIpwe3_6iA,4744
8
8
  haystack_integrations/components/downloaders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  haystack_integrations/components/downloaders/s3/__init__.py,sha256=2BOd3_N0kGqRJGH-ENrTJqOqzqHryRYaSuNqpLYKMFo,179
10
- haystack_integrations/components/downloaders/s3/s3_downloader.py,sha256=kptTCSry_uEYtNAca_pU7zQJs_LJwwJKYjuYDVJrZRE,11220
10
+ haystack_integrations/components/downloaders/s3/s3_downloader.py,sha256=qarIeGxwDCA1BOZ1qdLfE8NcQtMS9bW54a8voEBHTbE,12637
11
11
  haystack_integrations/components/embedders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  haystack_integrations/components/embedders/amazon_bedrock/__init__.py,sha256=7GlhHJ4jFHCxq5QN5losGuGtrGNjvEx2dSQvEYD2yG0,408
13
13
  haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py,sha256=DD34-HAGwGwTU7KWGqKXXlFdwIs21JavBRDHrBqC-m4,13060
@@ -23,7 +23,7 @@ haystack_integrations/components/generators/amazon_bedrock/chat/utils.py,sha256=
23
23
  haystack_integrations/components/rankers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  haystack_integrations/components/rankers/amazon_bedrock/__init__.py,sha256=mJQKShAP5AfZvfKQisSh7kfKu6RIXzsYdk4eqMtcaEk,75
25
25
  haystack_integrations/components/rankers/amazon_bedrock/ranker.py,sha256=QWtUKfJxMrlfLCWTb8cCP-lKEthnEBwnTd1NSbiFMkg,11812
26
- amazon_bedrock_haystack-5.0.0.dist-info/METADATA,sha256=9W2R2hivKGIq1Lj6jdls8wmIncjF-k2ekFy-6ePCy0w,2228
27
- amazon_bedrock_haystack-5.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
- amazon_bedrock_haystack-5.0.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
29
- amazon_bedrock_haystack-5.0.0.dist-info/RECORD,,
26
+ amazon_bedrock_haystack-5.1.0.dist-info/METADATA,sha256=W5IZ1NhzQ11GjMXl5WrDjRJ0MHnV8jpAHAMyZwNe4Yo,2228
27
+ amazon_bedrock_haystack-5.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
+ amazon_bedrock_haystack-5.1.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
29
+ amazon_bedrock_haystack-5.1.0.dist-info/RECORD,,
@@ -5,12 +5,13 @@
5
5
  import os
6
6
  from concurrent.futures import ThreadPoolExecutor
7
7
  from pathlib import Path
8
- from typing import Any, Dict, List, Optional
8
+ from typing import Any, Callable, Dict, List, Optional
9
9
 
10
10
  from botocore.config import Config
11
11
  from haystack import component, default_from_dict, default_to_dict, logging
12
12
  from haystack.dataclasses import Document
13
13
  from haystack.utils.auth import Secret, deserialize_secrets_inplace
14
+ from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
14
15
 
15
16
  from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
16
17
  from haystack_integrations.common.s3.utils import S3Storage
@@ -41,6 +42,7 @@ class S3Downloader:
41
42
  file_name_meta_key: str = "file_name",
42
43
  max_workers: int = 32,
43
44
  max_cache_size: int = 100,
45
+ s3_key_generation_function: Optional[Callable[[Document], str]] = None,
44
46
  ) -> None:
45
47
  """
46
48
  Initializes the `S3Downloader` with the provided parameters.
@@ -64,9 +66,15 @@ class S3Downloader:
64
66
  By default, all file extensions are allowed.
65
67
  :param max_workers: The maximum number of workers to use for concurrent downloads.
66
68
  :param max_cache_size: The maximum number of files to cache.
67
- :param file_name_meta_key: The name of the meta key that contains the file name to download.
69
+ :param file_name_meta_key: The name of the meta key that contains the file name to download. The file name
70
+ will also be used to create local file path for download.
68
71
  By default, the `Document.meta["file_name"]` is used. If you want to use a
69
72
  different key in `Document.meta`, you can set it here.
73
+ :param s3_key_generation_function: An optional function that generates the S3 key for the file to download.
74
+ If not provided, the default behavior is to use `Document.meta[file_name_meta_key]`.
75
+ The function must accept a `Document` object and return a string.
76
+ If the environment variable `S3_DOWNLOADER_PREFIX` is set, its value will be automatically
77
+ prefixed to the generated S3 key.
70
78
  :raises ValueError: If the `file_root_path` is not set through
71
79
  the constructor or the `FILE_ROOT_PATH` environment variable.
72
80
 
@@ -94,6 +102,7 @@ class S3Downloader:
94
102
  self.max_workers = max_workers
95
103
  self.max_cache_size = max_cache_size
96
104
  self.file_name_meta_key = file_name_meta_key
105
+ self.s3_key_generation_function = s3_key_generation_function
97
106
 
98
107
  self._storage: Optional[S3Storage] = None
99
108
 
@@ -186,8 +195,9 @@ class S3Downloader:
186
195
  file_path.touch()
187
196
 
188
197
  else:
198
+ s3_key = self.s3_key_generation_function(document) if self.s3_key_generation_function else file_name
189
199
  # we know that _storage is not None after warm_up() is called, but mypy does not know that
190
- self._storage.download(key=file_name, local_file_path=file_path) # type: ignore[union-attr]
200
+ self._storage.download(key=s3_key, local_file_path=file_path) # type: ignore[union-attr]
191
201
 
192
202
  document.meta["file_path"] = str(file_path)
193
203
  return document
@@ -216,6 +226,11 @@ class S3Downloader:
216
226
 
217
227
  def to_dict(self) -> Dict[str, Any]:
218
228
  """Serialize the component to a dictionary."""
229
+
230
+ s3_key_generation_function_name = (
231
+ serialize_callable(self.s3_key_generation_function) if self.s3_key_generation_function else None
232
+ )
233
+
219
234
  return default_to_dict(
220
235
  self,
221
236
  aws_access_key_id=self.aws_access_key_id.to_dict() if self.aws_access_key_id else None,
@@ -228,6 +243,7 @@ class S3Downloader:
228
243
  max_cache_size=self.max_cache_size,
229
244
  file_extensions=self.file_extensions,
230
245
  file_name_meta_key=self.file_name_meta_key,
246
+ s3_key_generation_function=s3_key_generation_function_name,
231
247
  )
232
248
 
233
249
  @classmethod
@@ -239,6 +255,11 @@ class S3Downloader:
239
255
  :returns:
240
256
  Deserialized component.
241
257
  """
258
+ s3_key_generation_function_name = data["init_parameters"].get("s3_key_generation_function")
259
+ if s3_key_generation_function_name:
260
+ data["init_parameters"]["s3_key_generation_function"] = deserialize_callable(
261
+ s3_key_generation_function_name
262
+ )
242
263
  deserialize_secrets_inplace(
243
264
  data["init_parameters"],
244
265
  ["aws_access_key_id", "aws_secret_access_key", "aws_session_token", "aws_region_name", "aws_profile_name"],