amazon-bedrock-haystack 5.0.0__py3-none-any.whl → 5.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amazon_bedrock_haystack-5.0.0.dist-info → amazon_bedrock_haystack-5.1.0.dist-info}/METADATA +1 -1
- {amazon_bedrock_haystack-5.0.0.dist-info → amazon_bedrock_haystack-5.1.0.dist-info}/RECORD +5 -5
- haystack_integrations/components/downloaders/s3/s3_downloader.py +24 -3
- {amazon_bedrock_haystack-5.0.0.dist-info → amazon_bedrock_haystack-5.1.0.dist-info}/WHEEL +0 -0
- {amazon_bedrock_haystack-5.0.0.dist-info → amazon_bedrock_haystack-5.1.0.dist-info}/licenses/LICENSE.txt +0 -0
{amazon_bedrock_haystack-5.0.0.dist-info → amazon_bedrock_haystack-5.1.0.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: amazon-bedrock-haystack
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.1.0
|
|
4
4
|
Summary: An integration of AWS S3 and Bedrock as a Downloader and Generator components.
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -7,7 +7,7 @@ haystack_integrations/common/s3/errors.py,sha256=BrTDLdhQvAuQutyg35cFyP5h8PNkDEi
|
|
|
7
7
|
haystack_integrations/common/s3/utils.py,sha256=OJupFj54aQmg6S8VuVq6Lc2qpFZyyJajRVIpwe3_6iA,4744
|
|
8
8
|
haystack_integrations/components/downloaders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
haystack_integrations/components/downloaders/s3/__init__.py,sha256=2BOd3_N0kGqRJGH-ENrTJqOqzqHryRYaSuNqpLYKMFo,179
|
|
10
|
-
haystack_integrations/components/downloaders/s3/s3_downloader.py,sha256=
|
|
10
|
+
haystack_integrations/components/downloaders/s3/s3_downloader.py,sha256=qarIeGxwDCA1BOZ1qdLfE8NcQtMS9bW54a8voEBHTbE,12637
|
|
11
11
|
haystack_integrations/components/embedders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
haystack_integrations/components/embedders/amazon_bedrock/__init__.py,sha256=7GlhHJ4jFHCxq5QN5losGuGtrGNjvEx2dSQvEYD2yG0,408
|
|
13
13
|
haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py,sha256=DD34-HAGwGwTU7KWGqKXXlFdwIs21JavBRDHrBqC-m4,13060
|
|
@@ -23,7 +23,7 @@ haystack_integrations/components/generators/amazon_bedrock/chat/utils.py,sha256=
|
|
|
23
23
|
haystack_integrations/components/rankers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
haystack_integrations/components/rankers/amazon_bedrock/__init__.py,sha256=mJQKShAP5AfZvfKQisSh7kfKu6RIXzsYdk4eqMtcaEk,75
|
|
25
25
|
haystack_integrations/components/rankers/amazon_bedrock/ranker.py,sha256=QWtUKfJxMrlfLCWTb8cCP-lKEthnEBwnTd1NSbiFMkg,11812
|
|
26
|
-
amazon_bedrock_haystack-5.
|
|
27
|
-
amazon_bedrock_haystack-5.
|
|
28
|
-
amazon_bedrock_haystack-5.
|
|
29
|
-
amazon_bedrock_haystack-5.
|
|
26
|
+
amazon_bedrock_haystack-5.1.0.dist-info/METADATA,sha256=W5IZ1NhzQ11GjMXl5WrDjRJ0MHnV8jpAHAMyZwNe4Yo,2228
|
|
27
|
+
amazon_bedrock_haystack-5.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
28
|
+
amazon_bedrock_haystack-5.1.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
|
|
29
|
+
amazon_bedrock_haystack-5.1.0.dist-info/RECORD,,
|
|
@@ -5,12 +5,13 @@
|
|
|
5
5
|
import os
|
|
6
6
|
from concurrent.futures import ThreadPoolExecutor
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Dict, List, Optional
|
|
8
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
9
9
|
|
|
10
10
|
from botocore.config import Config
|
|
11
11
|
from haystack import component, default_from_dict, default_to_dict, logging
|
|
12
12
|
from haystack.dataclasses import Document
|
|
13
13
|
from haystack.utils.auth import Secret, deserialize_secrets_inplace
|
|
14
|
+
from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
|
|
14
15
|
|
|
15
16
|
from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
|
|
16
17
|
from haystack_integrations.common.s3.utils import S3Storage
|
|
@@ -41,6 +42,7 @@ class S3Downloader:
|
|
|
41
42
|
file_name_meta_key: str = "file_name",
|
|
42
43
|
max_workers: int = 32,
|
|
43
44
|
max_cache_size: int = 100,
|
|
45
|
+
s3_key_generation_function: Optional[Callable[[Document], str]] = None,
|
|
44
46
|
) -> None:
|
|
45
47
|
"""
|
|
46
48
|
Initializes the `S3Downloader` with the provided parameters.
|
|
@@ -64,9 +66,15 @@ class S3Downloader:
|
|
|
64
66
|
By default, all file extensions are allowed.
|
|
65
67
|
:param max_workers: The maximum number of workers to use for concurrent downloads.
|
|
66
68
|
:param max_cache_size: The maximum number of files to cache.
|
|
67
|
-
:param file_name_meta_key: The name of the meta key that contains the file name to download.
|
|
69
|
+
:param file_name_meta_key: The name of the meta key that contains the file name to download. The file name
|
|
70
|
+
will also be used to create local file path for download.
|
|
68
71
|
By default, the `Document.meta["file_name"]` is used. If you want to use a
|
|
69
72
|
different key in `Document.meta`, you can set it here.
|
|
73
|
+
:param s3_key_generation_function: An optional function that generates the S3 key for the file to download.
|
|
74
|
+
If not provided, the default behavior is to use `Document.meta[file_name_meta_key]`.
|
|
75
|
+
The function must accept a `Document` object and return a string.
|
|
76
|
+
If the environment variable `S3_DOWNLOADER_PREFIX` is set, its value will be automatically
|
|
77
|
+
prefixed to the generated S3 key.
|
|
70
78
|
:raises ValueError: If the `file_root_path` is not set through
|
|
71
79
|
the constructor or the `FILE_ROOT_PATH` environment variable.
|
|
72
80
|
|
|
@@ -94,6 +102,7 @@ class S3Downloader:
|
|
|
94
102
|
self.max_workers = max_workers
|
|
95
103
|
self.max_cache_size = max_cache_size
|
|
96
104
|
self.file_name_meta_key = file_name_meta_key
|
|
105
|
+
self.s3_key_generation_function = s3_key_generation_function
|
|
97
106
|
|
|
98
107
|
self._storage: Optional[S3Storage] = None
|
|
99
108
|
|
|
@@ -186,8 +195,9 @@ class S3Downloader:
|
|
|
186
195
|
file_path.touch()
|
|
187
196
|
|
|
188
197
|
else:
|
|
198
|
+
s3_key = self.s3_key_generation_function(document) if self.s3_key_generation_function else file_name
|
|
189
199
|
# we know that _storage is not None after warm_up() is called, but mypy does not know that
|
|
190
|
-
self._storage.download(key=
|
|
200
|
+
self._storage.download(key=s3_key, local_file_path=file_path) # type: ignore[union-attr]
|
|
191
201
|
|
|
192
202
|
document.meta["file_path"] = str(file_path)
|
|
193
203
|
return document
|
|
@@ -216,6 +226,11 @@ class S3Downloader:
|
|
|
216
226
|
|
|
217
227
|
def to_dict(self) -> Dict[str, Any]:
|
|
218
228
|
"""Serialize the component to a dictionary."""
|
|
229
|
+
|
|
230
|
+
s3_key_generation_function_name = (
|
|
231
|
+
serialize_callable(self.s3_key_generation_function) if self.s3_key_generation_function else None
|
|
232
|
+
)
|
|
233
|
+
|
|
219
234
|
return default_to_dict(
|
|
220
235
|
self,
|
|
221
236
|
aws_access_key_id=self.aws_access_key_id.to_dict() if self.aws_access_key_id else None,
|
|
@@ -228,6 +243,7 @@ class S3Downloader:
|
|
|
228
243
|
max_cache_size=self.max_cache_size,
|
|
229
244
|
file_extensions=self.file_extensions,
|
|
230
245
|
file_name_meta_key=self.file_name_meta_key,
|
|
246
|
+
s3_key_generation_function=s3_key_generation_function_name,
|
|
231
247
|
)
|
|
232
248
|
|
|
233
249
|
@classmethod
|
|
@@ -239,6 +255,11 @@ class S3Downloader:
|
|
|
239
255
|
:returns:
|
|
240
256
|
Deserialized component.
|
|
241
257
|
"""
|
|
258
|
+
s3_key_generation_function_name = data["init_parameters"].get("s3_key_generation_function")
|
|
259
|
+
if s3_key_generation_function_name:
|
|
260
|
+
data["init_parameters"]["s3_key_generation_function"] = deserialize_callable(
|
|
261
|
+
s3_key_generation_function_name
|
|
262
|
+
)
|
|
242
263
|
deserialize_secrets_inplace(
|
|
243
264
|
data["init_parameters"],
|
|
244
265
|
["aws_access_key_id", "aws_secret_access_key", "aws_session_token", "aws_region_name", "aws_profile_name"],
|
|
File without changes
|
|
File without changes
|