amazon-bedrock-haystack 4.0.0__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: amazon-bedrock-haystack
3
- Version: 4.0.0
4
- Summary: An integration of Amazon Bedrock as an AmazonBedrockGenerator component.
3
+ Version: 4.2.0
4
+ Summary: An integration of AWS S3 and Bedrock as a Downloader and Generator components.
5
5
  Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
6
6
  Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
7
7
  Project-URL: Source, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock
@@ -1,7 +1,13 @@
1
1
  haystack_integrations/common/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  haystack_integrations/common/amazon_bedrock/__init__.py,sha256=6GZ8Y3Lw0rLOsOAqi6Tu5mZC977UzQvgDxKpOWr8IQw,110
3
- haystack_integrations/common/amazon_bedrock/errors.py,sha256=ReheDbY7L3EJkWcUoih6lWHjbPHg2TlUs9SnXIKK7Gg,744
4
- haystack_integrations/common/amazon_bedrock/utils.py,sha256=ASAwEhInF9F6rhL4CbXFQUFU1pSdscWvG6jcrXkEUhc,2735
3
+ haystack_integrations/common/amazon_bedrock/errors.py,sha256=47w_rg3JTGJ5QfsUELrjYBeuLajxUmQ7-zOFvdz4mT8,856
4
+ haystack_integrations/common/amazon_bedrock/utils.py,sha256=UIJVl1e_hlP9mQr_YvVTFklbc_kqOgI7DnSxcAuSLx0,2846
5
+ haystack_integrations/common/s3/__init__.py,sha256=6GZ8Y3Lw0rLOsOAqi6Tu5mZC977UzQvgDxKpOWr8IQw,110
6
+ haystack_integrations/common/s3/errors.py,sha256=BrTDLdhQvAuQutyg35cFyP5h8PNkDEieLwehi58UqAU,452
7
+ haystack_integrations/common/s3/utils.py,sha256=OJupFj54aQmg6S8VuVq6Lc2qpFZyyJajRVIpwe3_6iA,4744
8
+ haystack_integrations/components/downloaders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ haystack_integrations/components/downloaders/s3/__init__.py,sha256=2BOd3_N0kGqRJGH-ENrTJqOqzqHryRYaSuNqpLYKMFo,179
10
+ haystack_integrations/components/downloaders/s3/s3_downloader.py,sha256=kptTCSry_uEYtNAca_pU7zQJs_LJwwJKYjuYDVJrZRE,11220
5
11
  haystack_integrations/components/embedders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
12
  haystack_integrations/components/embedders/amazon_bedrock/__init__.py,sha256=7GlhHJ4jFHCxq5QN5losGuGtrGNjvEx2dSQvEYD2yG0,408
7
13
  haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py,sha256=DD34-HAGwGwTU7KWGqKXXlFdwIs21JavBRDHrBqC-m4,13060
@@ -12,12 +18,12 @@ haystack_integrations/components/generators/amazon_bedrock/__init__.py,sha256=lv
12
18
  haystack_integrations/components/generators/amazon_bedrock/adapters.py,sha256=yBC-3YwV6qAwSXMtdZiLSYh2lUpPQIDy7Efl7w-Cu-k,19640
13
19
  haystack_integrations/components/generators/amazon_bedrock/generator.py,sha256=Brzw0XvtPJhz2kR2I3liAqWHRmDR6p5HzJerEAPhoJU,14743
14
20
  haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py,sha256=6GZ8Y3Lw0rLOsOAqi6Tu5mZC977UzQvgDxKpOWr8IQw,110
15
- haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py,sha256=_0dpBoZGY9kgK9zQOTskcjElcTifwhyBAixXDliK-vY,24918
16
- haystack_integrations/components/generators/amazon_bedrock/chat/utils.py,sha256=eF2wldu1IppL64f01N3PIa9_-BZEolQzEz9NjXvEFTQ,25810
21
+ haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py,sha256=qArwfXcforWnPzLXrAW-1hkPFpMy3NSdDyJ5GOta25w,26068
22
+ haystack_integrations/components/generators/amazon_bedrock/chat/utils.py,sha256=1M_k8CG2WH23Yz-sB7a1kiIqVh2QB8Pqi0zbWXyMUL8,27255
17
23
  haystack_integrations/components/rankers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
24
  haystack_integrations/components/rankers/amazon_bedrock/__init__.py,sha256=Zrc3BSVkEaXYpliEi6hKG9bqW4J7DNk93p50SuoyT1Q,107
19
25
  haystack_integrations/components/rankers/amazon_bedrock/ranker.py,sha256=enAjf2QyDwfpidKkFCdLz954cx-Tjh9emrOS3vINJDg,12344
20
- amazon_bedrock_haystack-4.0.0.dist-info/METADATA,sha256=8iHYVwqO_nLLbLOh59yoqxwnskmfBiDupxxSTWiCvcc,2222
21
- amazon_bedrock_haystack-4.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
- amazon_bedrock_haystack-4.0.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
23
- amazon_bedrock_haystack-4.0.0.dist-info/RECORD,,
26
+ amazon_bedrock_haystack-4.2.0.dist-info/METADATA,sha256=HM32juznvZMTZgwNyS34vsLTZeBsLxaru7sUmGz4xQA,2228
27
+ amazon_bedrock_haystack-4.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
28
+ amazon_bedrock_haystack-4.2.0.dist-info/licenses/LICENSE.txt,sha256=B05uMshqTA74s-0ltyHKI6yoPfJ3zYgQbvcXfDVGFf8,10280
29
+ amazon_bedrock_haystack-4.2.0.dist-info/RECORD,,
@@ -1,3 +1,8 @@
1
+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+
1
6
  class AmazonBedrockError(Exception):
2
7
  """
3
8
  Any error generated by the Amazon Bedrock integration.
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
1
5
  from typing import Any, Optional, Union
2
6
 
3
7
  import aioboto3
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,15 @@
1
+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+
6
+ class S3Error(Exception):
7
+ """Exception for issues that occur in the S3 based components"""
8
+
9
+
10
+ class S3ConfigurationError(S3Error):
11
+ """Exception raised when AmazonS3 node is not configured correctly"""
12
+
13
+
14
+ class S3StorageError(S3Error):
15
+ """This exception is raised when an error occurs while interacting with a S3Storage object."""
@@ -0,0 +1,120 @@
1
+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import os
6
+ from dataclasses import dataclass
7
+ from http import HTTPStatus
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ from boto3.session import Session
12
+ from botocore.config import Config
13
+ from botocore.exceptions import ClientError, NoCredentialsError, PartialCredentialsError
14
+
15
+ from haystack_integrations.common.s3.errors import S3ConfigurationError, S3StorageError
16
+
17
+
18
+ @dataclass
19
+ class S3Storage:
20
+ """This class provides a storage class for downloading files from an AWS S3 bucket."""
21
+
22
+ def __init__(
23
+ self,
24
+ s3_bucket: str,
25
+ session: Session,
26
+ s3_prefix: Optional[str] = None,
27
+ endpoint_url: Optional[str] = None,
28
+ config: Optional[Config] = None,
29
+ ) -> None:
30
+ """
31
+ Initializes the S3Storage object with the provided parameters.
32
+
33
+ :param s3_bucket: The name of the S3 bucket to download files from.
34
+ :param session: The session to use for the S3 client.
35
+ :param s3_prefix: The optional prefix of the files in the S3 bucket.
36
+ Can be used to specify folder or naming structure.
37
+ For example, if the file is in the folder "folder/subfolder/file.txt",
38
+ the s3_prefix should be "folder/subfolder/". If the file is in the root of the S3 bucket,
39
+ the s3_prefix should be None.
40
+ :param endpoint_url: The endpoint URL of the S3 bucket to download files from.
41
+ :param config: The configuration to use for the S3 client.
42
+ """
43
+
44
+ self.s3_bucket = s3_bucket
45
+ self.s3_prefix = s3_prefix
46
+ self.endpoint_url = endpoint_url
47
+ self.session = session
48
+ self.config = config
49
+
50
+ try:
51
+ self._client = self.session.client("s3", endpoint_url=self.endpoint_url, config=self.config)
52
+ except Exception as e:
53
+ msg = f"Failed to create S3 session client: {e}"
54
+ raise S3ConfigurationError(msg) from e
55
+
56
+ def download(self, key: str, local_file_path: Path) -> None:
57
+ """Download a file from S3.
58
+
59
+ :param key: The key of the file to download.
60
+ :param local_file_path: The folder path to download the file to.
61
+ It will be created if it does not exist. The file will be downloaded to
62
+ the folder with the same name as the key.
63
+ :raises S3ConfigurationError: If the S3 session client cannot be created.
64
+ :raises S3StorageError: If the file does not exist in the S3 bucket
65
+ or the file cannot be downloaded.
66
+ """
67
+
68
+ if self.s3_prefix:
69
+ s3_key = f"{self.s3_prefix}{key}"
70
+ else:
71
+ s3_key = key
72
+
73
+ try:
74
+ self._client.download_file(self.s3_bucket, s3_key, str(local_file_path))
75
+
76
+ except (NoCredentialsError, PartialCredentialsError) as e:
77
+ msg = (
78
+ f"Missing AWS credentials. Please check your AWS credentials (access key, secret key, region)."
79
+ f"Error: {e}"
80
+ )
81
+ raise S3ConfigurationError(msg) from e
82
+
83
+ except ClientError as e:
84
+ error_code = int(e.response["Error"]["Code"])
85
+
86
+ if error_code == HTTPStatus.FORBIDDEN:
87
+ msg = (
88
+ f"Failed to access S3 bucket {self.s3_bucket!r}. "
89
+ f"Please check your AWS credentials (access key, secret key, region) and ensure "
90
+ f"they have the necessary S3 permissions. "
91
+ f"Error: {e}"
92
+ )
93
+ raise S3ConfigurationError(msg) from e
94
+
95
+ elif error_code == HTTPStatus.NOT_FOUND:
96
+ msg = f"The object {s3_key!r} does not exist in the S3 bucket {self.s3_bucket!r}. \n Error: {e}"
97
+ raise S3StorageError(msg) from e
98
+ else:
99
+ msg = f"Failed to download file {s3_key!r} from S3. Error: {e}"
100
+ raise S3StorageError(msg) from e
101
+
102
+ @classmethod
103
+ def from_env(cls, *, session: Session, config: Config) -> "S3Storage":
104
+ """Create a S3Storage object from environment variables."""
105
+ s3_bucket = os.getenv("S3_DOWNLOADER_BUCKET")
106
+ if not s3_bucket:
107
+ msg = (
108
+ "Missing environment variable S3_DOWNLOADER_BUCKET."
109
+ "Please set it to the name of the S3 bucket to download files from."
110
+ )
111
+ raise ValueError(msg)
112
+ s3_prefix = os.getenv("S3_DOWNLOADER_PREFIX") or None
113
+ endpoint_url = os.getenv("AWS_ENDPOINT_URL") or None
114
+ return cls(
115
+ s3_bucket=s3_bucket,
116
+ s3_prefix=s3_prefix,
117
+ endpoint_url=endpoint_url,
118
+ session=session,
119
+ config=config,
120
+ )
File without changes
@@ -0,0 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from .s3_downloader import S3Downloader
6
+
7
+ __all__ = ["S3Downloader"]
@@ -0,0 +1,246 @@
1
+ # SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import os
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ from botocore.config import Config
11
+ from haystack import component, default_from_dict, default_to_dict, logging
12
+ from haystack.dataclasses import Document
13
+ from haystack.utils.auth import Secret, deserialize_secrets_inplace
14
+
15
+ from haystack_integrations.common.amazon_bedrock.utils import get_aws_session
16
+ from haystack_integrations.common.s3.utils import S3Storage
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @component
22
+ class S3Downloader:
23
+ """
24
+ A component for downloading files from AWS S3 Buckets to local filesystem.
25
+ Supports filtering by file extensions.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ *,
31
+ aws_access_key_id: Optional[Secret] = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
32
+ aws_secret_access_key: Optional[Secret] = Secret.from_env_var( # noqa: B008
33
+ "AWS_SECRET_ACCESS_KEY", strict=False
34
+ ),
35
+ aws_session_token: Optional[Secret] = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
36
+ aws_region_name: Optional[Secret] = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
37
+ aws_profile_name: Optional[Secret] = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
38
+ boto3_config: Optional[Dict[str, Any]] = None,
39
+ file_root_path: Optional[str] = None,
40
+ file_extensions: Optional[List[str]] = None,
41
+ file_name_meta_key: str = "file_name",
42
+ max_workers: int = 32,
43
+ max_cache_size: int = 100,
44
+ ) -> None:
45
+ """
46
+ Initializes the `S3Downloader` with the provided parameters.
47
+
48
+ Note that the AWS credentials are not required if the AWS environment is configured correctly. These are loaded
49
+ automatically from the environment or the AWS configuration file and do not need to be provided explicitly via
50
+ the constructor. If the AWS environment is not configured users need to provide the AWS credentials via the
51
+ constructor. Three required parameters are `aws_access_key_id`, `aws_secret_access_key`,
52
+ and `aws_region_name`.
53
+
54
+ :param aws_access_key_id: AWS access key ID.
55
+ :param aws_secret_access_key: AWS secret access key.
56
+ :param aws_session_token: AWS session token.
57
+ :param aws_region_name: AWS region name.
58
+ :param aws_profile_name: AWS profile name.
59
+ :param boto3_config: The configuration for the boto3 client.
60
+ :param file_root_path: The path where the file will be downloaded.
61
+ Can be set through this parameter or the `FILE_ROOT_PATH` environment variable.
62
+ If none of them is set, a `ValueError` is raised.
63
+ :param file_extensions: The file extensions that are permitted to be downloaded.
64
+ By default, all file extensions are allowed.
65
+ :param max_workers: The maximum number of workers to use for concurrent downloads.
66
+ :param max_cache_size: The maximum number of files to cache.
67
+ :param file_name_meta_key: The name of the meta key that contains the file name to download.
68
+ By default, the `Document.meta["file_name"]` is used. If you want to use a
69
+ different key in `Document.meta`, you can set it here.
70
+ :raises ValueError: If the `file_root_path` is not set through
71
+ the constructor or the `FILE_ROOT_PATH` environment variable.
72
+
73
+ """
74
+
75
+ # Set up download directory
76
+ file_root_path = file_root_path or os.getenv("FILE_ROOT_PATH")
77
+
78
+ if file_root_path is None:
79
+ msg = (
80
+ "The path where files will be downloaded is not set. Please set the "
81
+ "`file_root_path` init parameter or the `FILE_ROOT_PATH` environment variable."
82
+ )
83
+ raise ValueError(msg)
84
+
85
+ self.file_root_path = Path(file_root_path)
86
+
87
+ self.aws_access_key_id = aws_access_key_id
88
+ self.aws_secret_access_key = aws_secret_access_key
89
+ self.aws_region_name = aws_region_name
90
+ self.aws_session_token = aws_session_token
91
+ self.aws_profile_name = aws_profile_name
92
+ self.boto3_config = boto3_config
93
+ self.file_extensions = [e.lower() for e in file_extensions] if file_extensions else None
94
+ self.max_workers = max_workers
95
+ self.max_cache_size = max_cache_size
96
+ self.file_name_meta_key = file_name_meta_key
97
+
98
+ self._storage: Optional[S3Storage] = None
99
+
100
+ def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
101
+ return secret.resolve_value() if secret else None
102
+
103
+ self._session = get_aws_session(
104
+ aws_access_key_id=resolve_secret(aws_access_key_id),
105
+ aws_secret_access_key=resolve_secret(aws_secret_access_key),
106
+ aws_session_token=resolve_secret(aws_session_token),
107
+ aws_region_name=resolve_secret(aws_region_name),
108
+ aws_profile_name=resolve_secret(aws_profile_name),
109
+ )
110
+ self._config = Config(
111
+ user_agent_extra="x-client-framework:haystack", **(self.boto3_config if self.boto3_config else {})
112
+ )
113
+
114
+ def warm_up(self) -> None:
115
+ """Warm up the component by initializing the settings and storage."""
116
+ if self._storage is None:
117
+ self.file_root_path.mkdir(parents=True, exist_ok=True)
118
+ self._storage = S3Storage.from_env(session=self._session, config=self._config)
119
+
120
+ @component.output_types(documents=List[Document])
121
+ def run(
122
+ self,
123
+ documents: List[Document],
124
+ ) -> Dict[str, List[Document]]:
125
+ """Download files from AWS S3 Buckets to local filesystem.
126
+
127
+ Return enriched `Document`s with the path of the downloaded file.
128
+ :param documents: Document containing the name of the file to download in the meta field.
129
+ :returns: A dictionary with:
130
+ - `documents`: The downloaded `Document`s; each has `meta['file_path']`.
131
+ :raises S3Error: If a download attempt fails or the file does not exist in the S3 bucket.
132
+ :raises ValueError: If the path where files will be downloaded is not set.
133
+ """
134
+
135
+ if self._storage is None:
136
+ msg = f"The component {self.__class__.__name__} was not warmed up. Call 'warm_up()' before calling run()."
137
+ raise RuntimeError(msg)
138
+
139
+ filtered_documents = self._filter_documents_by_extensions(documents) if self.file_extensions else documents
140
+
141
+ if not filtered_documents:
142
+ return {"documents": []}
143
+
144
+ try:
145
+ max_workers = min(self.max_workers, len(filtered_documents) if filtered_documents else self.max_workers)
146
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
147
+ iterable = executor.map(self._download_file, filtered_documents)
148
+ finally:
149
+ self._cleanup_cache(filtered_documents)
150
+
151
+ downloaded_documents = [d for d in iterable if d is not None]
152
+ return {"documents": downloaded_documents}
153
+
154
+ def _filter_documents_by_extensions(self, documents: List[Document]) -> List[Document]:
155
+ """Filter documents by file extensions."""
156
+ if not self.file_extensions:
157
+ return documents
158
+ return [
159
+ doc
160
+ for doc in documents
161
+ if Path(doc.meta.get(self.file_name_meta_key, "")).suffix.lower() in self.file_extensions
162
+ ]
163
+
164
+ def _download_file(self, document: Document) -> Optional[Document]:
165
+ """
166
+ Download a single file from AWS S3 Bucket to local filesystem.
167
+
168
+ :param document: `Document` with the name of the file to download in the meta field.
169
+ :returns:
170
+ The same `Document` with `meta` containing the `file_path` of the
171
+ downloaded file.
172
+ :raises S3Error: If the download or head request fails or the file does not exist in the S3 bucket.
173
+ """
174
+
175
+ file_name = document.meta.get(self.file_name_meta_key)
176
+ if not file_name:
177
+ logger.warning(
178
+ f"Document missing required file name metadata key '{self.file_name_meta_key}'. Skipping download."
179
+ )
180
+ return None
181
+
182
+ file_path = self.file_root_path / Path(file_name)
183
+
184
+ if file_path.is_file():
185
+ # set access and modification time to now without redownloading the file
186
+ file_path.touch()
187
+
188
+ else:
189
+ # we know that _storage is not None after warm_up() is called, but mypy does not know that
190
+ self._storage.download(key=file_name, local_file_path=file_path) # type: ignore[union-attr]
191
+
192
+ document.meta["file_path"] = str(file_path)
193
+ return document
194
+
195
+ def _cleanup_cache(self, documents: List[Document]) -> None:
196
+ """
197
+ Remove least-recently-accessed cache files when cache exceeds `max_cache_size`.
198
+
199
+ :param documents: List of Document objects being used containing `cache_id` metadata.
200
+ """
201
+ requested_ids = {
202
+ str(abs(hash(str(doc.meta.get("cache_id", ""))))) for doc in documents if doc.meta.get("cache_id")
203
+ }
204
+
205
+ all_files = [p for p in self.file_root_path.iterdir() if p.is_file()]
206
+ misses = [p for p in all_files if p.stem not in requested_ids]
207
+
208
+ overflow = len(misses) + len(requested_ids) - self.max_cache_size
209
+ if overflow > 0:
210
+ misses.sort(key=lambda p: p.stat().st_atime)
211
+ for p in misses[:overflow]:
212
+ try:
213
+ p.unlink()
214
+ except Exception as error:
215
+ logger.warning("Failed to remove cache file at {path} with error: {e}", path=p, e=error)
216
+
217
+ def to_dict(self) -> Dict[str, Any]:
218
+ """Serialize the component to a dictionary."""
219
+ return default_to_dict(
220
+ self,
221
+ aws_access_key_id=self.aws_access_key_id.to_dict() if self.aws_access_key_id else None,
222
+ aws_secret_access_key=self.aws_secret_access_key.to_dict() if self.aws_secret_access_key else None,
223
+ aws_session_token=self.aws_session_token.to_dict() if self.aws_session_token else None,
224
+ aws_region_name=self.aws_region_name.to_dict() if self.aws_region_name else None,
225
+ aws_profile_name=self.aws_profile_name.to_dict() if self.aws_profile_name else None,
226
+ file_root_path=str(self.file_root_path),
227
+ max_workers=self.max_workers,
228
+ max_cache_size=self.max_cache_size,
229
+ file_extensions=self.file_extensions,
230
+ file_name_meta_key=self.file_name_meta_key,
231
+ )
232
+
233
+ @classmethod
234
+ def from_dict(cls, data: Dict[str, Any]) -> "S3Downloader":
235
+ """
236
+ Deserializes the component from a dictionary.
237
+ :param data:
238
+ Dictionary to deserialize from.
239
+ :returns:
240
+ Deserialized component.
241
+ """
242
+ deserialize_secrets_inplace(
243
+ data["init_parameters"],
244
+ ["aws_access_key_id", "aws_secret_access_key", "aws_session_token", "aws_region_name", "aws_profile_name"],
245
+ )
246
+ return default_from_dict(cls, data)
@@ -27,6 +27,7 @@ from haystack_integrations.components.generators.amazon_bedrock.chat.utils impor
27
27
  _parse_completion_response,
28
28
  _parse_streaming_response,
29
29
  _parse_streaming_response_async,
30
+ _validate_guardrail_config,
30
31
  )
31
32
 
32
33
  logger = logging.getLogger(__name__)
@@ -154,10 +155,11 @@ class AmazonBedrockChatGenerator:
154
155
  aws_region_name: Optional[Secret] = Secret.from_env_var(["AWS_DEFAULT_REGION"], strict=False), # noqa: B008
155
156
  aws_profile_name: Optional[Secret] = Secret.from_env_var(["AWS_PROFILE"], strict=False), # noqa: B008
156
157
  generation_kwargs: Optional[Dict[str, Any]] = None,
157
- stop_words: Optional[List[str]] = None,
158
158
  streaming_callback: Optional[StreamingCallbackT] = None,
159
159
  boto3_config: Optional[Dict[str, Any]] = None,
160
160
  tools: Optional[Union[List[Tool], Toolset]] = None,
161
+ *,
162
+ guardrail_config: Optional[Dict[str, str]] = None,
161
163
  ) -> None:
162
164
  """
163
165
  Initializes the `AmazonBedrockChatGenerator` with the provided parameters. The parameters are passed to the
@@ -179,10 +181,6 @@ class AmazonBedrockChatGenerator:
179
181
  :param generation_kwargs: Keyword arguments sent to the model. These parameters are specific to a model.
180
182
  You can find the model specific arguments in the AWS Bedrock API
181
183
  [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html).
182
- :param stop_words: A list of stop words that stop the model from generating more text
183
- when encountered. You can provide them using this parameter or using the model's `generation_kwargs`
184
- under a model's specific key for stop words.
185
- For example, you can provide stop words for Anthropic Claude in the `stop_sequences` key.
186
184
  :param streaming_callback: A callback function called when a new token is received from the stream.
187
185
  By default, the model is not set up for streaming. To enable streaming, set this parameter to a callback
188
186
  function that handles the streaming chunks. The callback function receives a
@@ -190,6 +188,19 @@ class AmazonBedrockChatGenerator:
190
188
  the streaming mode on.
191
189
  :param boto3_config: The configuration for the boto3 client.
192
190
  :param tools: A list of Tool objects or a Toolset that the model can use. Each tool should have a unique name.
191
+ :param guardrail_config: Optional configuration for a guardrail that has been created in Amazon Bedrock.
192
+ This must be provided as a dictionary matching either
193
+ [GuardrailConfiguration](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_GuardrailConfiguration.html).
194
+ or, in streaming mode (when `streaming_callback` is set),
195
+ [GuardrailStreamConfiguration](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_GuardrailStreamConfiguration.html).
196
+ If `trace` is set to `enabled`, the guardrail trace will be included under the `trace` key in the `meta`
197
+ attribute of the resulting `ChatMessage`.
198
+ Note: Enabling guardrails in streaming mode may introduce additional latency.
199
+ To manage this, you can adjust the `streamProcessingMode` parameter.
200
+ See the
201
+ [Guardrails Streaming documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-streaming.html)
202
+ for more information.
203
+
193
204
 
194
205
  :raises ValueError: If the model name is empty or None.
195
206
  :raises AmazonBedrockConfigurationError: If the AWS environment is not configured correctly or the model is
@@ -204,12 +215,15 @@ class AmazonBedrockChatGenerator:
204
215
  self.aws_session_token = aws_session_token
205
216
  self.aws_region_name = aws_region_name
206
217
  self.aws_profile_name = aws_profile_name
207
- self.stop_words = stop_words or []
208
218
  self.streaming_callback = streaming_callback
209
219
  self.boto3_config = boto3_config
220
+
210
221
  _check_duplicate_tool_names(list(tools or [])) # handles Toolset as well
211
222
  self.tools = tools
212
223
 
224
+ _validate_guardrail_config(guardrail_config=guardrail_config, streaming=streaming_callback is not None)
225
+ self.guardrail_config = guardrail_config
226
+
213
227
  def resolve_secret(secret: Optional[Secret]) -> Optional[str]:
214
228
  return secret.resolve_value() if secret else None
215
229
 
@@ -237,7 +251,6 @@ class AmazonBedrockChatGenerator:
237
251
  raise AmazonBedrockConfigurationError(msg) from exception
238
252
 
239
253
  self.generation_kwargs = generation_kwargs or {}
240
- self.stop_words = stop_words or []
241
254
  self.async_session: Optional[aioboto3.Session] = None
242
255
 
243
256
  def _get_async_session(self) -> aioboto3.Session:
@@ -291,11 +304,11 @@ class AmazonBedrockChatGenerator:
291
304
  aws_region_name=self.aws_region_name.to_dict() if self.aws_region_name else None,
292
305
  aws_profile_name=self.aws_profile_name.to_dict() if self.aws_profile_name else None,
293
306
  model=self.model,
294
- stop_words=self.stop_words,
295
307
  generation_kwargs=self.generation_kwargs,
296
308
  streaming_callback=callback_name,
297
309
  boto3_config=self.boto3_config,
298
310
  tools=serialize_tools_or_toolset(self.tools),
311
+ guardrail_config=self.guardrail_config,
299
312
  )
300
313
 
301
314
  @classmethod
@@ -308,6 +321,12 @@ class AmazonBedrockChatGenerator:
308
321
  Instance of `AmazonBedrockChatGenerator`.
309
322
  """
310
323
  init_params = data.get("init_parameters", {})
324
+
325
+ stop_words = init_params.pop("stop_words", None)
326
+ msg = "stop_words parameter will be ignored. Use the `stopSequences` key in `generation_kwargs` instead."
327
+ if stop_words:
328
+ logger.warning(msg)
329
+
311
330
  serialized_callback_handler = init_params.get("streaming_callback")
312
331
  if serialized_callback_handler:
313
332
  data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
@@ -387,6 +406,8 @@ class AmazonBedrockChatGenerator:
387
406
  params["toolConfig"] = tool_config
388
407
  if additional_fields:
389
408
  params["additionalModelRequestFields"] = additional_fields
409
+ if self.guardrail_config:
410
+ params["guardrailConfig"] = self.guardrail_config
390
411
 
391
412
  # overloads that exhaust finite Literals(bool) not treated as exhaustive
392
413
  # see https://github.com/python/mypy/issues/14764
@@ -273,6 +273,7 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
273
273
  :param model: The model ID used for generation, included in message metadata.
274
274
  :returns: List of ChatMessage objects containing the assistant's response(s) with appropriate metadata.
275
275
  """
276
+
276
277
  replies = []
277
278
  if "output" in response_body and "message" in response_body["output"]:
278
279
  message = response_body["output"]["message"]
@@ -280,7 +281,7 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
280
281
  content_blocks = message["content"]
281
282
 
282
283
  # Common meta information
283
- base_meta = {
284
+ meta = {
284
285
  "model": model,
285
286
  "index": 0,
286
287
  "finish_reason": FINISH_REASON_MAPPING.get(response_body.get("stopReason", "")),
@@ -291,6 +292,9 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
291
292
  "total_tokens": response_body.get("usage", {}).get("totalTokens", 0),
292
293
  },
293
294
  }
295
+ # guardrail trace
296
+ if "trace" in response_body:
297
+ meta["trace"] = response_body["trace"]
294
298
 
295
299
  # Process all content blocks and combine them into a single message
296
300
  text_content = []
@@ -329,7 +333,7 @@ def _parse_completion_response(response_body: Dict[str, Any], model: str) -> Lis
329
333
  ChatMessage.from_assistant(
330
334
  " ".join(text_content),
331
335
  tool_calls=tool_calls,
332
- meta=base_meta,
336
+ meta=meta,
333
337
  reasoning=ReasoningContent(
334
338
  reasoning_text=reasoning_text, extra={"reasoning_contents": reasoning_contents}
335
339
  )
@@ -355,6 +359,7 @@ def _convert_event_to_streaming_chunk(
355
359
  :param component_info: ComponentInfo object
356
360
  :returns: StreamingChunk object containing the content and metadata extracted from the event.
357
361
  """
362
+
358
363
  # Initialize an empty StreamingChunk to return if no relevant event is found
359
364
  # (e.g. for messageStart and contentBlockStop)
360
365
  base_meta = {"model": model, "received_at": datetime.now(timezone.utc).isoformat()}
@@ -426,19 +431,23 @@ def _convert_event_to_streaming_chunk(
426
431
  meta=base_meta,
427
432
  )
428
433
 
429
- elif "metadata" in event and "usage" in event["metadata"]:
430
- metadata = event["metadata"]
431
- streaming_chunk = StreamingChunk(
432
- content="",
433
- meta={
434
- **base_meta,
435
- "usage": {
436
- "prompt_tokens": metadata["usage"].get("inputTokens", 0),
437
- "completion_tokens": metadata["usage"].get("outputTokens", 0),
438
- "total_tokens": metadata["usage"].get("totalTokens", 0),
439
- },
440
- },
441
- )
434
+ elif "metadata" in event:
435
+ event_meta = event["metadata"]
436
+ chunk_meta: Dict[str, Any] = {**base_meta}
437
+
438
+ if "usage" in event_meta:
439
+ usage = event_meta["usage"]
440
+ chunk_meta["usage"] = {
441
+ "prompt_tokens": usage.get("inputTokens", 0),
442
+ "completion_tokens": usage.get("outputTokens", 0),
443
+ "total_tokens": usage.get("totalTokens", 0),
444
+ }
445
+ if "trace" in event_meta:
446
+ chunk_meta["trace"] = event_meta["trace"]
447
+
448
+ # Only create chunk if we added usage or trace data
449
+ if len(chunk_meta) > len(base_meta):
450
+ streaming_chunk = StreamingChunk(content="", meta=chunk_meta)
442
451
 
443
452
  streaming_chunk.component_info = component_info
444
453
 
@@ -547,8 +556,15 @@ def _parse_streaming_response(
547
556
  content_block_idxs.add(content_block_idx)
548
557
  streaming_callback(streaming_chunk)
549
558
  chunks.append(streaming_chunk)
559
+
550
560
  reply = _convert_streaming_chunks_to_chat_message(chunks=chunks)
561
+
562
+ # both the reasoning content and the trace are ignored in _convert_streaming_chunks_to_chat_message
563
+ # so we need to process them separately
551
564
  reasoning_content = _process_reasoning_contents(chunks=chunks)
565
+ if chunks[-1].meta and "trace" in chunks[-1].meta:
566
+ reply.meta["trace"] = chunks[-1].meta["trace"]
567
+
552
568
  reply = ChatMessage.from_assistant(
553
569
  text=reply.text,
554
570
  meta=reply.meta,
@@ -556,6 +572,7 @@ def _parse_streaming_response(
556
572
  tool_calls=reply.tool_calls,
557
573
  reasoning=reasoning_content,
558
574
  )
575
+
559
576
  return [reply]
560
577
 
561
578
 
@@ -594,3 +611,24 @@ async def _parse_streaming_response_async(
594
611
  reasoning=reasoning_content,
595
612
  )
596
613
  return [reply]
614
+
615
+
616
+ def _validate_guardrail_config(guardrail_config: Optional[Dict[str, str]] = None, streaming: bool = False) -> None:
617
+ """
618
+ Validate the guardrail configuration.
619
+
620
+ :param guardrail_config: The guardrail configuration.
621
+ :param streaming: Whether the streaming is enabled.
622
+
623
+ :raises ValueError: If the guardrail configuration is invalid.
624
+ """
625
+ if guardrail_config is None:
626
+ return
627
+
628
+ required_fields = {"guardrailIdentifier", "guardrailVersion"}
629
+ if not required_fields.issubset(guardrail_config):
630
+ msg = "`guardrailIdentifier` and `guardrailVersion` fields are required in guardrail configuration."
631
+ raise ValueError(msg)
632
+ if not streaming and "streamProcessingMode" in guardrail_config:
633
+ msg = "`streamProcessingMode` field is only supported for streaming (when `streaming_callback` is not None)."
634
+ raise ValueError(msg)