amazon-bedrock-haystack 5.4.0__tar.gz → 6.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/CHANGELOG.md +21 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/PKG-INFO +3 -4
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/pyproject.toml +2 -8
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/common/amazon_bedrock/utils.py +7 -7
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/common/s3/utils.py +3 -4
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/downloaders/s3/s3_downloader.py +14 -13
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py +18 -15
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/embedders/amazon_bedrock/document_image_embedder.py +11 -11
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py +8 -8
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py +4 -4
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +23 -23
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py +4 -4
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/generator.py +17 -16
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/rankers/amazon_bedrock/ranker.py +13 -13
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_chat_generator.py +2 -2
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_document_embedder.py +71 -3
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_document_image_embedder.py +5 -5
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_generator.py +5 -5
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_s3_downloader.py +3 -3
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/.gitignore +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/LICENSE.txt +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/README.md +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/examples/bedrock_ranker_example.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/examples/chatgenerator_example.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/examples/embedders_generator_with_rag_example.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/examples/s3_downloader_example.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/pydoc/config_docusaurus.yml +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/common/amazon_bedrock/__init__.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/common/amazon_bedrock/errors.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/common/py.typed +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/common/s3/__init__.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/common/s3/errors.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/downloaders/py.typed +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/downloaders/s3/__init__.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/embedders/amazon_bedrock/__init__.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/embedders/py.typed +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/__init__.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/generators/py.typed +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/rankers/amazon_bedrock/__init__.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/src/haystack_integrations/components/rankers/py.typed +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/__init__.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/conftest.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_chat_generator_utils.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_files/apple.jpg +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_files/haystack-logo.png +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_files/sample_pdf_1.pdf +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_ranker.py +0 -0
- {amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_text_embedder.py +0 -0
|
@@ -1,5 +1,26 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/amazon_bedrock-v6.0.0] - 2026-01-09
|
|
4
|
+
|
|
5
|
+
### 🧹 Chores
|
|
6
|
+
|
|
7
|
+
- [**breaking**] Amazon_bedrock - drop Python 3.9 and use X|Y typing (#2685)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
## [integrations/amazon_bedrock-v5.4.0] - 2026-01-08
|
|
11
|
+
|
|
12
|
+
### 🚀 Features
|
|
13
|
+
|
|
14
|
+
- Update `S3Downloader` to auto call run `warm_up` on first run instead raising error (#2673)
|
|
15
|
+
|
|
16
|
+
### 🧹 Chores
|
|
17
|
+
|
|
18
|
+
- Make fmt command more forgiving (#2671)
|
|
19
|
+
|
|
20
|
+
### 🌀 Miscellaneous
|
|
21
|
+
|
|
22
|
+
- Fix: Fix doc links (#2661)
|
|
23
|
+
|
|
3
24
|
## [integrations/amazon_bedrock-v5.3.1] - 2025-12-19
|
|
4
25
|
|
|
5
26
|
### 🐛 Bug Fixes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: amazon-bedrock-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 6.1.0
|
|
4
4
|
Summary: An integration of AWS S3 and Bedrock as a Downloader and Generator components.
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -11,17 +11,16 @@ License-File: LICENSE.txt
|
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Programming Language :: Python
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
18
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
20
19
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
21
|
-
Requires-Python: >=3.
|
|
20
|
+
Requires-Python: >=3.10
|
|
22
21
|
Requires-Dist: aioboto3>=14.0.0
|
|
23
22
|
Requires-Dist: boto3>=1.28.57
|
|
24
|
-
Requires-Dist: haystack-ai>=2.
|
|
23
|
+
Requires-Dist: haystack-ai>=2.22.0
|
|
25
24
|
Description-Content-Type: text/markdown
|
|
26
25
|
|
|
27
26
|
# amazon-bedrock-haystack
|
|
@@ -7,7 +7,7 @@ name = "amazon-bedrock-haystack"
|
|
|
7
7
|
dynamic = ["version"]
|
|
8
8
|
description = 'An integration of AWS S3 and Bedrock as a Downloader and Generator components.'
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
11
|
license = "Apache-2.0"
|
|
12
12
|
keywords = []
|
|
13
13
|
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
|
|
@@ -15,7 +15,6 @@ classifiers = [
|
|
|
15
15
|
"License :: OSI Approved :: Apache Software License",
|
|
16
16
|
"Development Status :: 4 - Beta",
|
|
17
17
|
"Programming Language :: Python",
|
|
18
|
-
"Programming Language :: Python :: 3.9",
|
|
19
18
|
"Programming Language :: Python :: 3.10",
|
|
20
19
|
"Programming Language :: Python :: 3.11",
|
|
21
20
|
"Programming Language :: Python :: 3.12",
|
|
@@ -23,7 +22,7 @@ classifiers = [
|
|
|
23
22
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
24
23
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
25
24
|
]
|
|
26
|
-
dependencies = ["haystack-ai>=2.
|
|
25
|
+
dependencies = ["haystack-ai>=2.22.0", "boto3>=1.28.57", "aioboto3>=14.0.0"]
|
|
27
26
|
|
|
28
27
|
[project.urls]
|
|
29
28
|
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme"
|
|
@@ -92,7 +91,6 @@ module = [
|
|
|
92
91
|
ignore_missing_imports = true
|
|
93
92
|
|
|
94
93
|
[tool.ruff]
|
|
95
|
-
target-version = "py39"
|
|
96
94
|
line-length = 120
|
|
97
95
|
|
|
98
96
|
[tool.ruff.lint]
|
|
@@ -140,10 +138,6 @@ ignore = [
|
|
|
140
138
|
"ARG002",
|
|
141
139
|
"ARG005",
|
|
142
140
|
]
|
|
143
|
-
unfixable = [
|
|
144
|
-
# Don't touch unused imports
|
|
145
|
-
"F401",
|
|
146
|
-
]
|
|
147
141
|
|
|
148
142
|
[tool.ruff.lint.isort]
|
|
149
143
|
known-first-party = ["haystack_integrations"]
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
7
|
import aioboto3
|
|
8
8
|
import boto3
|
|
@@ -20,14 +20,14 @@ AWS_CONFIGURATION_KEYS = [
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def get_aws_session(
|
|
23
|
-
aws_access_key_id:
|
|
24
|
-
aws_secret_access_key:
|
|
25
|
-
aws_session_token:
|
|
26
|
-
aws_region_name:
|
|
27
|
-
aws_profile_name:
|
|
23
|
+
aws_access_key_id: str | None = None,
|
|
24
|
+
aws_secret_access_key: str | None = None,
|
|
25
|
+
aws_session_token: str | None = None,
|
|
26
|
+
aws_region_name: str | None = None,
|
|
27
|
+
aws_profile_name: str | None = None,
|
|
28
28
|
async_mode: bool = False,
|
|
29
29
|
**kwargs: Any,
|
|
30
|
-
) ->
|
|
30
|
+
) -> boto3.Session | aioboto3.Session:
|
|
31
31
|
"""
|
|
32
32
|
Creates an AWS Session with the given parameters.
|
|
33
33
|
Checks if the provided AWS credentials are valid and can be used to connect to AWS.
|
|
@@ -6,7 +6,6 @@ import os
|
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from http import HTTPStatus
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Optional
|
|
10
9
|
|
|
11
10
|
from boto3.session import Session
|
|
12
11
|
from botocore.config import Config
|
|
@@ -23,9 +22,9 @@ class S3Storage:
|
|
|
23
22
|
self,
|
|
24
23
|
s3_bucket: str,
|
|
25
24
|
session: Session,
|
|
26
|
-
s3_prefix:
|
|
27
|
-
endpoint_url:
|
|
28
|
-
config:
|
|
25
|
+
s3_prefix: str | None = None,
|
|
26
|
+
endpoint_url: str | None = None,
|
|
27
|
+
config: Config | None = None,
|
|
29
28
|
) -> None:
|
|
30
29
|
"""
|
|
31
30
|
Initializes the S3Storage object with the provided parameters.
|
|
@@ -3,9 +3,10 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
|
+
from collections.abc import Callable
|
|
6
7
|
from concurrent.futures import ThreadPoolExecutor
|
|
7
8
|
from pathlib import Path
|
|
8
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
from botocore.config import Config
|
|
11
12
|
from haystack import component, default_from_dict, default_to_dict, logging
|
|
@@ -29,20 +30,20 @@ class S3Downloader:
|
|
|
29
30
|
def __init__(
|
|
30
31
|
self,
|
|
31
32
|
*,
|
|
32
|
-
aws_access_key_id:
|
|
33
|
-
aws_secret_access_key:
|
|
33
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
34
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
34
35
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
35
36
|
),
|
|
36
|
-
aws_session_token:
|
|
37
|
-
aws_region_name:
|
|
38
|
-
aws_profile_name:
|
|
39
|
-
boto3_config:
|
|
40
|
-
file_root_path:
|
|
41
|
-
file_extensions:
|
|
37
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
38
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
39
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
40
|
+
boto3_config: dict[str, Any] | None = None,
|
|
41
|
+
file_root_path: str | None = None,
|
|
42
|
+
file_extensions: list[str] | None = None,
|
|
42
43
|
file_name_meta_key: str = "file_name",
|
|
43
44
|
max_workers: int = 32,
|
|
44
45
|
max_cache_size: int = 100,
|
|
45
|
-
s3_key_generation_function:
|
|
46
|
+
s3_key_generation_function: Callable[[Document], str] | None = None,
|
|
46
47
|
) -> None:
|
|
47
48
|
"""
|
|
48
49
|
Initializes the `S3Downloader` with the provided parameters.
|
|
@@ -104,9 +105,9 @@ class S3Downloader:
|
|
|
104
105
|
self.file_name_meta_key = file_name_meta_key
|
|
105
106
|
self.s3_key_generation_function = s3_key_generation_function
|
|
106
107
|
|
|
107
|
-
self._storage:
|
|
108
|
+
self._storage: S3Storage | None = None
|
|
108
109
|
|
|
109
|
-
def resolve_secret(secret:
|
|
110
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
110
111
|
return secret.resolve_value() if secret else None
|
|
111
112
|
|
|
112
113
|
self._session = get_aws_session(
|
|
@@ -169,7 +170,7 @@ class S3Downloader:
|
|
|
169
170
|
if Path(doc.meta.get(self.file_name_meta_key, "")).suffix.lower() in self.file_extensions
|
|
170
171
|
]
|
|
171
172
|
|
|
172
|
-
def _download_file(self, document: Document) ->
|
|
173
|
+
def _download_file(self, document: Document) -> Document | None:
|
|
173
174
|
"""
|
|
174
175
|
Download a single file from AWS S3 Bucket to local filesystem.
|
|
175
176
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from
|
|
2
|
+
from dataclasses import replace
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
from botocore.config import Config
|
|
5
6
|
from botocore.exceptions import ClientError
|
|
@@ -50,18 +51,18 @@ class AmazonBedrockDocumentEmbedder:
|
|
|
50
51
|
def __init__(
|
|
51
52
|
self,
|
|
52
53
|
model: str,
|
|
53
|
-
aws_access_key_id:
|
|
54
|
-
aws_secret_access_key:
|
|
54
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
55
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
55
56
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
56
57
|
),
|
|
57
|
-
aws_session_token:
|
|
58
|
-
aws_region_name:
|
|
59
|
-
aws_profile_name:
|
|
58
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
59
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
60
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
60
61
|
batch_size: int = 32,
|
|
61
62
|
progress_bar: bool = True,
|
|
62
|
-
meta_fields_to_embed:
|
|
63
|
+
meta_fields_to_embed: list[str] | None = None,
|
|
63
64
|
embedding_separator: str = "\n",
|
|
64
|
-
boto3_config:
|
|
65
|
+
boto3_config: dict[str, Any] | None = None,
|
|
65
66
|
**kwargs: Any,
|
|
66
67
|
) -> None:
|
|
67
68
|
"""
|
|
@@ -115,7 +116,7 @@ class AmazonBedrockDocumentEmbedder:
|
|
|
115
116
|
self.boto3_config = boto3_config
|
|
116
117
|
self.kwargs = kwargs
|
|
117
118
|
|
|
118
|
-
def resolve_secret(secret:
|
|
119
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
119
120
|
return secret.resolve_value() if secret else None
|
|
120
121
|
|
|
121
122
|
try:
|
|
@@ -186,10 +187,11 @@ class AmazonBedrockDocumentEmbedder:
|
|
|
186
187
|
)
|
|
187
188
|
all_embeddings.extend(embeddings_list)
|
|
188
189
|
|
|
189
|
-
|
|
190
|
-
|
|
190
|
+
new_documents = []
|
|
191
|
+
for doc, emb in zip(documents, all_embeddings, strict=True):
|
|
192
|
+
new_documents.append(replace(doc, embedding=emb))
|
|
191
193
|
|
|
192
|
-
return
|
|
194
|
+
return new_documents
|
|
193
195
|
|
|
194
196
|
def _embed_titan(self, documents: list[Document]) -> list[Document]:
|
|
195
197
|
"""
|
|
@@ -214,10 +216,11 @@ class AmazonBedrockDocumentEmbedder:
|
|
|
214
216
|
embedding = response_body["embedding"]
|
|
215
217
|
all_embeddings.append(embedding)
|
|
216
218
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
+
new_documents = []
|
|
220
|
+
for doc, emb in zip(documents, all_embeddings, strict=True):
|
|
221
|
+
new_documents.append(replace(doc, embedding=emb))
|
|
219
222
|
|
|
220
|
-
return
|
|
223
|
+
return new_documents
|
|
221
224
|
|
|
222
225
|
@component.output_types(documents=list[Document])
|
|
223
226
|
def run(self, documents: list[Document]) -> dict[str, list[Document]]:
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
from dataclasses import replace
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
from botocore.config import Config
|
|
10
10
|
from botocore.exceptions import ClientError
|
|
@@ -68,18 +68,18 @@ class AmazonBedrockDocumentImageEmbedder:
|
|
|
68
68
|
self,
|
|
69
69
|
*,
|
|
70
70
|
model: str,
|
|
71
|
-
aws_access_key_id:
|
|
72
|
-
aws_secret_access_key:
|
|
71
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
72
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
73
73
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
74
74
|
),
|
|
75
|
-
aws_session_token:
|
|
76
|
-
aws_region_name:
|
|
77
|
-
aws_profile_name:
|
|
75
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
76
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
77
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
78
78
|
file_path_meta_field: str = "file_path",
|
|
79
|
-
root_path:
|
|
80
|
-
image_size:
|
|
79
|
+
root_path: str | None = None,
|
|
80
|
+
image_size: tuple[int, int] | None = None,
|
|
81
81
|
progress_bar: bool = True,
|
|
82
|
-
boto3_config:
|
|
82
|
+
boto3_config: dict[str, Any] | None = None,
|
|
83
83
|
**kwargs: Any,
|
|
84
84
|
) -> None:
|
|
85
85
|
"""
|
|
@@ -144,7 +144,7 @@ class AmazonBedrockDocumentImageEmbedder:
|
|
|
144
144
|
raise ValueError(msg)
|
|
145
145
|
self.embedding_types = embedding_types
|
|
146
146
|
|
|
147
|
-
def resolve_secret(secret:
|
|
147
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
148
148
|
return secret.resolve_value() if secret else None
|
|
149
149
|
|
|
150
150
|
try:
|
|
@@ -288,7 +288,7 @@ class AmazonBedrockDocumentImageEmbedder:
|
|
|
288
288
|
|
|
289
289
|
docs_with_embeddings = []
|
|
290
290
|
|
|
291
|
-
for doc, emb in zip(documents, embeddings):
|
|
291
|
+
for doc, emb in zip(documents, embeddings, strict=True):
|
|
292
292
|
# we store this information for later inspection
|
|
293
293
|
new_meta = {
|
|
294
294
|
**doc.meta,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
from botocore.config import Config
|
|
5
5
|
from botocore.exceptions import ClientError
|
|
@@ -43,14 +43,14 @@ class AmazonBedrockTextEmbedder:
|
|
|
43
43
|
def __init__(
|
|
44
44
|
self,
|
|
45
45
|
model: str,
|
|
46
|
-
aws_access_key_id:
|
|
47
|
-
aws_secret_access_key:
|
|
46
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
47
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
48
48
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
49
49
|
),
|
|
50
|
-
aws_session_token:
|
|
51
|
-
aws_region_name:
|
|
52
|
-
aws_profile_name:
|
|
53
|
-
boto3_config:
|
|
50
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
51
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
52
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
53
|
+
boto3_config: dict[str, Any] | None = None,
|
|
54
54
|
**kwargs: Any,
|
|
55
55
|
) -> None:
|
|
56
56
|
"""
|
|
@@ -94,7 +94,7 @@ class AmazonBedrockTextEmbedder:
|
|
|
94
94
|
self.boto3_config = boto3_config
|
|
95
95
|
self.kwargs = kwargs
|
|
96
96
|
|
|
97
|
-
def resolve_secret(secret:
|
|
97
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
98
98
|
return secret.resolve_value() if secret else None
|
|
99
99
|
|
|
100
100
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
from botocore.eventstream import EventStream
|
|
6
6
|
from haystack.dataclasses import StreamingChunk, SyncStreamingCallbackT
|
|
@@ -19,7 +19,7 @@ class BedrockModelAdapter(ABC):
|
|
|
19
19
|
It will be overridden by the corresponding parameter in the `model_kwargs` if it is present.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
def __init__(self, model_kwargs: dict[str, Any], max_length:
|
|
22
|
+
def __init__(self, model_kwargs: dict[str, Any], max_length: int | None) -> None:
|
|
23
23
|
self.model_kwargs = model_kwargs
|
|
24
24
|
self.max_length = max_length
|
|
25
25
|
|
|
@@ -115,7 +115,7 @@ class AnthropicClaudeAdapter(BedrockModelAdapter):
|
|
|
115
115
|
:param max_length: Maximum length of generated text
|
|
116
116
|
"""
|
|
117
117
|
|
|
118
|
-
def __init__(self, model_kwargs: dict[str, Any], max_length:
|
|
118
|
+
def __init__(self, model_kwargs: dict[str, Any], max_length: int | None) -> None:
|
|
119
119
|
self.use_messages_api = model_kwargs.get("use_messages_api", True)
|
|
120
120
|
self.include_thinking = model_kwargs.get("include_thinking", True)
|
|
121
121
|
self.thinking_tag = model_kwargs.get("thinking_tag", "thinking")
|
|
@@ -175,7 +175,7 @@ class AnthropicClaudeAdapter(BedrockModelAdapter):
|
|
|
175
175
|
if self.include_thinking and len(thinking) == len(texts):
|
|
176
176
|
texts = [
|
|
177
177
|
f"{self.thinking_tag_start}{thinking}{self.thinking_tag_end}{text}"
|
|
178
|
-
for text, thinking in zip(texts, thinking)
|
|
178
|
+
for text, thinking in zip(texts, thinking, strict=True)
|
|
179
179
|
]
|
|
180
180
|
return texts
|
|
181
181
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import aioboto3
|
|
4
4
|
from botocore.config import Config
|
|
@@ -147,19 +147,19 @@ class AmazonBedrockChatGenerator:
|
|
|
147
147
|
def __init__(
|
|
148
148
|
self,
|
|
149
149
|
model: str,
|
|
150
|
-
aws_access_key_id:
|
|
151
|
-
aws_secret_access_key:
|
|
150
|
+
aws_access_key_id: Secret | None = Secret.from_env_var(["AWS_ACCESS_KEY_ID"], strict=False), # noqa: B008
|
|
151
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
152
152
|
["AWS_SECRET_ACCESS_KEY"], strict=False
|
|
153
153
|
),
|
|
154
|
-
aws_session_token:
|
|
155
|
-
aws_region_name:
|
|
156
|
-
aws_profile_name:
|
|
157
|
-
generation_kwargs:
|
|
158
|
-
streaming_callback:
|
|
159
|
-
boto3_config:
|
|
160
|
-
tools:
|
|
154
|
+
aws_session_token: Secret | None = Secret.from_env_var(["AWS_SESSION_TOKEN"], strict=False), # noqa: B008
|
|
155
|
+
aws_region_name: Secret | None = Secret.from_env_var(["AWS_DEFAULT_REGION"], strict=False), # noqa: B008
|
|
156
|
+
aws_profile_name: Secret | None = Secret.from_env_var(["AWS_PROFILE"], strict=False), # noqa: B008
|
|
157
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
158
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
159
|
+
boto3_config: dict[str, Any] | None = None,
|
|
160
|
+
tools: ToolsType | None = None,
|
|
161
161
|
*,
|
|
162
|
-
guardrail_config:
|
|
162
|
+
guardrail_config: dict[str, str] | None = None,
|
|
163
163
|
) -> None:
|
|
164
164
|
"""
|
|
165
165
|
Initializes the `AmazonBedrockChatGenerator` with the provided parameters. The parameters are passed to the
|
|
@@ -225,7 +225,7 @@ class AmazonBedrockChatGenerator:
|
|
|
225
225
|
_validate_guardrail_config(guardrail_config=guardrail_config, streaming=streaming_callback is not None)
|
|
226
226
|
self.guardrail_config = guardrail_config
|
|
227
227
|
|
|
228
|
-
def resolve_secret(secret:
|
|
228
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
229
229
|
return secret.resolve_value() if secret else None
|
|
230
230
|
|
|
231
231
|
config = Config(
|
|
@@ -252,7 +252,7 @@ class AmazonBedrockChatGenerator:
|
|
|
252
252
|
raise AmazonBedrockConfigurationError(msg) from exception
|
|
253
253
|
|
|
254
254
|
self.generation_kwargs = generation_kwargs or {}
|
|
255
|
-
self.async_session:
|
|
255
|
+
self.async_session: aioboto3.Session | None = None
|
|
256
256
|
|
|
257
257
|
def _get_async_session(self) -> aioboto3.Session:
|
|
258
258
|
"""
|
|
@@ -341,11 +341,11 @@ class AmazonBedrockChatGenerator:
|
|
|
341
341
|
def _prepare_request_params(
|
|
342
342
|
self,
|
|
343
343
|
messages: list[ChatMessage],
|
|
344
|
-
streaming_callback:
|
|
345
|
-
generation_kwargs:
|
|
346
|
-
tools:
|
|
344
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
345
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
346
|
+
tools: ToolsType | None = None,
|
|
347
347
|
requires_async: bool = False,
|
|
348
|
-
) -> tuple[dict[str, Any],
|
|
348
|
+
) -> tuple[dict[str, Any], StreamingCallbackT | None]:
|
|
349
349
|
"""
|
|
350
350
|
Prepares and formats parameters required to call the Amazon Bedrock Converse API.
|
|
351
351
|
|
|
@@ -423,9 +423,9 @@ class AmazonBedrockChatGenerator:
|
|
|
423
423
|
def run(
|
|
424
424
|
self,
|
|
425
425
|
messages: list[ChatMessage],
|
|
426
|
-
streaming_callback:
|
|
427
|
-
generation_kwargs:
|
|
428
|
-
tools:
|
|
426
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
427
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
428
|
+
tools: ToolsType | None = None,
|
|
429
429
|
) -> dict[str, list[ChatMessage]]:
|
|
430
430
|
"""
|
|
431
431
|
Executes a synchronous inference call to the Amazon Bedrock model using the Converse API.
|
|
@@ -484,9 +484,9 @@ class AmazonBedrockChatGenerator:
|
|
|
484
484
|
async def run_async(
|
|
485
485
|
self,
|
|
486
486
|
messages: list[ChatMessage],
|
|
487
|
-
streaming_callback:
|
|
488
|
-
generation_kwargs:
|
|
489
|
-
tools:
|
|
487
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
488
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
489
|
+
tools: ToolsType | None = None,
|
|
490
490
|
) -> dict[str, list[ChatMessage]]:
|
|
491
491
|
"""
|
|
492
492
|
Executes an asynchronous inference call to the Amazon Bedrock model using the Converse API.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import json
|
|
3
3
|
from datetime import datetime, timezone
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
|
|
6
6
|
from botocore.eventstream import EventStream
|
|
7
7
|
from haystack import logging
|
|
@@ -40,7 +40,7 @@ FINISH_REASON_MAPPING: dict[str, FinishReason] = {
|
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
# Haystack to Bedrock util methods
|
|
43
|
-
def _format_tools(tools:
|
|
43
|
+
def _format_tools(tools: list[Tool] | None = None) -> dict[str, Any] | None:
|
|
44
44
|
"""
|
|
45
45
|
Format Haystack Tool(s) to Amazon Bedrock toolConfig format.
|
|
46
46
|
|
|
@@ -454,7 +454,7 @@ def _convert_event_to_streaming_chunk(
|
|
|
454
454
|
return streaming_chunk
|
|
455
455
|
|
|
456
456
|
|
|
457
|
-
def _process_reasoning_contents(chunks: list[StreamingChunk]) ->
|
|
457
|
+
def _process_reasoning_contents(chunks: list[StreamingChunk]) -> ReasoningContent | None:
|
|
458
458
|
"""
|
|
459
459
|
Process reasoning contents from a list of StreamingChunk objects into the Bedrock expected format.
|
|
460
460
|
|
|
@@ -613,7 +613,7 @@ async def _parse_streaming_response_async(
|
|
|
613
613
|
return [reply]
|
|
614
614
|
|
|
615
615
|
|
|
616
|
-
def _validate_guardrail_config(guardrail_config:
|
|
616
|
+
def _validate_guardrail_config(guardrail_config: dict[str, str] | None = None, streaming: bool = False) -> None:
|
|
617
617
|
"""
|
|
618
618
|
Validate the guardrail configuration.
|
|
619
619
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
3
|
import warnings
|
|
4
|
-
from
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from typing import Any, ClassVar, Literal, get_args
|
|
5
6
|
|
|
6
7
|
from botocore.config import Config
|
|
7
8
|
from botocore.exceptions import ClientError
|
|
@@ -95,18 +96,18 @@ class AmazonBedrockGenerator:
|
|
|
95
96
|
def __init__(
|
|
96
97
|
self,
|
|
97
98
|
model: str,
|
|
98
|
-
aws_access_key_id:
|
|
99
|
-
aws_secret_access_key:
|
|
99
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
100
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
100
101
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
101
102
|
),
|
|
102
|
-
aws_session_token:
|
|
103
|
-
aws_region_name:
|
|
104
|
-
aws_profile_name:
|
|
105
|
-
max_length:
|
|
106
|
-
truncate:
|
|
107
|
-
streaming_callback:
|
|
108
|
-
boto3_config:
|
|
109
|
-
model_family:
|
|
103
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
104
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
105
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
106
|
+
max_length: int | None = None,
|
|
107
|
+
truncate: bool | None = None,
|
|
108
|
+
streaming_callback: Callable[[StreamingChunk], None] | None = None,
|
|
109
|
+
boto3_config: dict[str, Any] | None = None,
|
|
110
|
+
model_family: MODEL_FAMILIES | None = None,
|
|
110
111
|
**kwargs: Any,
|
|
111
112
|
) -> None:
|
|
112
113
|
"""
|
|
@@ -156,7 +157,7 @@ class AmazonBedrockGenerator:
|
|
|
156
157
|
self.kwargs = kwargs
|
|
157
158
|
self.model_family = model_family
|
|
158
159
|
|
|
159
|
-
def resolve_secret(secret:
|
|
160
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
160
161
|
return secret.resolve_value() if secret else None
|
|
161
162
|
|
|
162
163
|
try:
|
|
@@ -187,9 +188,9 @@ class AmazonBedrockGenerator:
|
|
|
187
188
|
def run(
|
|
188
189
|
self,
|
|
189
190
|
prompt: str,
|
|
190
|
-
streaming_callback:
|
|
191
|
-
generation_kwargs:
|
|
192
|
-
) -> dict[str,
|
|
191
|
+
streaming_callback: Callable[[StreamingChunk], None] | None = None,
|
|
192
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
193
|
+
) -> dict[str, list[str] | dict[str, Any]]:
|
|
193
194
|
"""
|
|
194
195
|
Generates a list of string response to the given prompt.
|
|
195
196
|
|
|
@@ -240,7 +241,7 @@ class AmazonBedrockGenerator:
|
|
|
240
241
|
return {"replies": replies, "meta": metadata}
|
|
241
242
|
|
|
242
243
|
@classmethod
|
|
243
|
-
def get_model_adapter(cls, model: str, model_family:
|
|
244
|
+
def get_model_adapter(cls, model: str, model_family: str | None = None) -> type[BedrockModelAdapter]:
|
|
244
245
|
"""
|
|
245
246
|
Gets the model adapter for the given model.
|
|
246
247
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dataclasses import replace
|
|
2
|
+
from typing import Any
|
|
2
3
|
|
|
3
4
|
from botocore.exceptions import ClientError
|
|
4
5
|
from haystack import Document, component, default_from_dict, default_to_dict, logging
|
|
@@ -60,15 +61,15 @@ class AmazonBedrockRanker:
|
|
|
60
61
|
self,
|
|
61
62
|
model: str = "cohere.rerank-v3-5:0",
|
|
62
63
|
top_k: int = 10,
|
|
63
|
-
aws_access_key_id:
|
|
64
|
-
aws_secret_access_key:
|
|
64
|
+
aws_access_key_id: Secret | None = Secret.from_env_var(["AWS_ACCESS_KEY_ID"], strict=False), # noqa: B008
|
|
65
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
65
66
|
["AWS_SECRET_ACCESS_KEY"], strict=False
|
|
66
67
|
),
|
|
67
|
-
aws_session_token:
|
|
68
|
-
aws_region_name:
|
|
69
|
-
aws_profile_name:
|
|
70
|
-
max_chunks_per_doc:
|
|
71
|
-
meta_fields_to_embed:
|
|
68
|
+
aws_session_token: Secret | None = Secret.from_env_var(["AWS_SESSION_TOKEN"], strict=False), # noqa: B008
|
|
69
|
+
aws_region_name: Secret | None = Secret.from_env_var(["AWS_DEFAULT_REGION"], strict=False), # noqa: B008
|
|
70
|
+
aws_profile_name: Secret | None = Secret.from_env_var(["AWS_PROFILE"], strict=False), # noqa: B008
|
|
71
|
+
max_chunks_per_doc: int | None = None,
|
|
72
|
+
meta_fields_to_embed: list[str] | None = None,
|
|
72
73
|
meta_data_separator: str = "\n",
|
|
73
74
|
) -> None:
|
|
74
75
|
if not model:
|
|
@@ -103,7 +104,7 @@ class AmazonBedrockRanker:
|
|
|
103
104
|
self.meta_fields_to_embed = meta_fields_to_embed or []
|
|
104
105
|
self.meta_data_separator = meta_data_separator
|
|
105
106
|
|
|
106
|
-
def resolve_secret(secret:
|
|
107
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
107
108
|
return secret.resolve_value() if secret else None
|
|
108
109
|
|
|
109
110
|
try:
|
|
@@ -177,7 +178,7 @@ class AmazonBedrockRanker:
|
|
|
177
178
|
return concatenated_input_list
|
|
178
179
|
|
|
179
180
|
@component.output_types(documents=list[Document])
|
|
180
|
-
def run(self, query: str, documents: list[Document], top_k:
|
|
181
|
+
def run(self, query: str, documents: list[Document], top_k: int | None = None) -> dict[str, list[Document]]:
|
|
181
182
|
"""
|
|
182
183
|
Use the Amazon Bedrock Reranker to re-rank the list of documents based on the query.
|
|
183
184
|
|
|
@@ -201,7 +202,7 @@ class AmazonBedrockRanker:
|
|
|
201
202
|
if not documents:
|
|
202
203
|
return {"documents": []}
|
|
203
204
|
|
|
204
|
-
def resolve_secret(secret:
|
|
205
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
205
206
|
return secret.resolve_value() if secret else None
|
|
206
207
|
|
|
207
208
|
region = resolve_secret(self.aws_region_name)
|
|
@@ -251,8 +252,7 @@ class AmazonBedrockRanker:
|
|
|
251
252
|
idx = result["index"]
|
|
252
253
|
score = result["relevanceScore"]
|
|
253
254
|
doc = documents[idx]
|
|
254
|
-
doc
|
|
255
|
-
sorted_docs.append(doc)
|
|
255
|
+
sorted_docs.append(replace(doc, score=score))
|
|
256
256
|
|
|
257
257
|
return {"documents": sorted_docs}
|
|
258
258
|
except ClientError as client_error:
|
{amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_chat_generator.py
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
5
|
from haystack import Pipeline
|
|
@@ -169,7 +169,7 @@ class TestAmazonBedrockChatGenerator:
|
|
|
169
169
|
assert generator.to_dict() == expected_dict
|
|
170
170
|
|
|
171
171
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
172
|
-
def test_from_dict(self, mock_boto3_session: Any, boto3_config:
|
|
172
|
+
def test_from_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
173
173
|
"""
|
|
174
174
|
Test that the from_dict method returns the correct object
|
|
175
175
|
"""
|
{amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_document_embedder.py
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import os
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
from unittest.mock import patch
|
|
5
5
|
|
|
6
6
|
import pytest
|
|
@@ -66,7 +66,7 @@ class TestAmazonBedrockDocumentEmbedder:
|
|
|
66
66
|
)
|
|
67
67
|
|
|
68
68
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
69
|
-
def test_to_dict(self, mock_boto3_session: Any, boto3_config:
|
|
69
|
+
def test_to_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
70
70
|
embedder = AmazonBedrockDocumentEmbedder(
|
|
71
71
|
model="cohere.embed-english-v3",
|
|
72
72
|
input_type="search_document",
|
|
@@ -94,7 +94,7 @@ class TestAmazonBedrockDocumentEmbedder:
|
|
|
94
94
|
assert embedder.to_dict() == expected_dict
|
|
95
95
|
|
|
96
96
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
97
|
-
def test_from_dict(self, mock_boto3_session: Any, boto3_config:
|
|
97
|
+
def test_from_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
98
98
|
data = {
|
|
99
99
|
"type": TYPE,
|
|
100
100
|
"init_parameters": {
|
|
@@ -257,6 +257,74 @@ class TestAmazonBedrockDocumentEmbedder:
|
|
|
257
257
|
assert doc.content == docs[i].content
|
|
258
258
|
assert doc.embedding == [0.1, 0.2, 0.3]
|
|
259
259
|
|
|
260
|
+
def test_run_cohere_does_not_modify_original_documents(self, mock_boto3_session):
|
|
261
|
+
embedder = AmazonBedrockDocumentEmbedder(model="cohere.embed-english-v3")
|
|
262
|
+
|
|
263
|
+
original_docs = [
|
|
264
|
+
Document(content="test 1", id="doc1"),
|
|
265
|
+
Document(content="test 2", id="doc2"),
|
|
266
|
+
]
|
|
267
|
+
|
|
268
|
+
# Store original IDs to verify they're the same objects
|
|
269
|
+
original_doc_ids = [id(doc) for doc in original_docs]
|
|
270
|
+
original_embeddings = [doc.embedding for doc in original_docs]
|
|
271
|
+
|
|
272
|
+
with patch.object(embedder, "_client") as mock_client:
|
|
273
|
+
mock_client.invoke_model.return_value = {
|
|
274
|
+
"body": io.StringIO('{"embeddings": [[0.1, 0.2], [0.3, 0.4]]}'),
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
result = embedder.run(documents=original_docs)
|
|
278
|
+
|
|
279
|
+
# Verify originals are unchanged
|
|
280
|
+
assert all(doc.embedding is None for doc in original_docs)
|
|
281
|
+
assert original_embeddings == [None, None]
|
|
282
|
+
|
|
283
|
+
# Verify returned documents are NEW instances
|
|
284
|
+
returned_doc_ids = [id(doc) for doc in result["documents"]]
|
|
285
|
+
assert original_doc_ids != returned_doc_ids
|
|
286
|
+
|
|
287
|
+
# Verify returned documents have embeddings
|
|
288
|
+
assert result["documents"][0].embedding == [0.1, 0.2]
|
|
289
|
+
assert result["documents"][1].embedding == [0.3, 0.4]
|
|
290
|
+
assert result["documents"][0].content == "test 1"
|
|
291
|
+
assert result["documents"][1].content == "test 2"
|
|
292
|
+
|
|
293
|
+
def test_run_titan_does_not_modify_original_documents(self, mock_boto3_session):
|
|
294
|
+
embedder = AmazonBedrockDocumentEmbedder(model="amazon.titan-embed-text-v1")
|
|
295
|
+
|
|
296
|
+
original_docs = [
|
|
297
|
+
Document(content="test 1", id="doc1"),
|
|
298
|
+
Document(content="test 2", id="doc2"),
|
|
299
|
+
]
|
|
300
|
+
|
|
301
|
+
# Store original IDs to verify they're the same objects
|
|
302
|
+
original_doc_ids = [id(doc) for doc in original_docs]
|
|
303
|
+
original_embeddings = [doc.embedding for doc in original_docs]
|
|
304
|
+
|
|
305
|
+
with patch.object(embedder, "_client") as mock_client:
|
|
306
|
+
# Titan returns one embedding at a time
|
|
307
|
+
mock_client.invoke_model.side_effect = [
|
|
308
|
+
{"body": io.StringIO('{"embedding": [0.1, 0.2]}')},
|
|
309
|
+
{"body": io.StringIO('{"embedding": [0.3, 0.4]}')},
|
|
310
|
+
]
|
|
311
|
+
|
|
312
|
+
result = embedder.run(documents=original_docs)
|
|
313
|
+
|
|
314
|
+
# Verify originals are unchanged
|
|
315
|
+
assert all(doc.embedding is None for doc in original_docs)
|
|
316
|
+
assert original_embeddings == [None, None]
|
|
317
|
+
|
|
318
|
+
# Verify returned documents are NEW instances
|
|
319
|
+
returned_doc_ids = [id(doc) for doc in result["documents"]]
|
|
320
|
+
assert original_doc_ids != returned_doc_ids
|
|
321
|
+
|
|
322
|
+
# Verify returned documents have embeddings
|
|
323
|
+
assert result["documents"][0].embedding == [0.1, 0.2]
|
|
324
|
+
assert result["documents"][1].embedding == [0.3, 0.4]
|
|
325
|
+
assert result["documents"][0].content == "test 1"
|
|
326
|
+
assert result["documents"][1].content == "test 2"
|
|
327
|
+
|
|
260
328
|
@pytest.mark.integration
|
|
261
329
|
@pytest.mark.skipif(
|
|
262
330
|
not os.getenv("AWS_ACCESS_KEY_ID")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import glob
|
|
2
2
|
import io
|
|
3
3
|
import os
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
from unittest.mock import patch
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
@@ -62,7 +62,7 @@ class TestAmazonBedrockDocumentImageEmbedder:
|
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
65
|
-
def test_to_dict(self, mock_boto3_session: Any, boto3_config:
|
|
65
|
+
def test_to_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
66
66
|
embedder = AmazonBedrockDocumentImageEmbedder(
|
|
67
67
|
model="cohere.embed-english-v3",
|
|
68
68
|
embedding_types=["float"],
|
|
@@ -90,7 +90,7 @@ class TestAmazonBedrockDocumentImageEmbedder:
|
|
|
90
90
|
assert embedder.to_dict() == expected_dict
|
|
91
91
|
|
|
92
92
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
93
|
-
def test_from_dict(self, mock_boto3_session: Any, boto3_config:
|
|
93
|
+
def test_from_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
94
94
|
data = {
|
|
95
95
|
"type": TYPE,
|
|
96
96
|
"init_parameters": {
|
|
@@ -243,7 +243,7 @@ class TestAmazonBedrockDocumentImageEmbedder:
|
|
|
243
243
|
|
|
244
244
|
assert isinstance(result["documents"], list)
|
|
245
245
|
assert len(result["documents"]) == len(documents)
|
|
246
|
-
for doc, new_doc in zip(documents, result["documents"]):
|
|
246
|
+
for doc, new_doc in zip(documents, result["documents"], strict=True):
|
|
247
247
|
assert doc.embedding is None
|
|
248
248
|
assert new_doc is not doc
|
|
249
249
|
assert isinstance(new_doc, Document)
|
|
@@ -276,7 +276,7 @@ class TestAmazonBedrockDocumentImageEmbedder:
|
|
|
276
276
|
|
|
277
277
|
assert isinstance(result["documents"], list)
|
|
278
278
|
assert len(result["documents"]) == len(documents)
|
|
279
|
-
for doc, new_doc in zip(documents, result["documents"]):
|
|
279
|
+
for doc, new_doc in zip(documents, result["documents"], strict=True):
|
|
280
280
|
assert doc.embedding is None
|
|
281
281
|
assert new_doc is not doc
|
|
282
282
|
assert isinstance(new_doc, Document)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
from unittest.mock import MagicMock, call
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
@@ -21,7 +21,7 @@ from haystack_integrations.components.generators.amazon_bedrock.adapters import
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
24
|
-
def test_to_dict(mock_boto3_session: Any, boto3_config:
|
|
24
|
+
def test_to_dict(mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
25
25
|
"""
|
|
26
26
|
Test that the to_dict method returns the correct dictionary without aws credentials
|
|
27
27
|
"""
|
|
@@ -50,7 +50,7 @@ def test_to_dict(mock_boto3_session: Any, boto3_config: Optional[dict[str, Any]]
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
53
|
-
def test_from_dict(mock_boto3_session: Any, boto3_config:
|
|
53
|
+
def test_from_dict(mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
54
54
|
"""
|
|
55
55
|
Test that the from_dict method returns the correct object
|
|
56
56
|
"""
|
|
@@ -161,7 +161,7 @@ def test_constructor_with_empty_model():
|
|
|
161
161
|
("mistral.mistral-medium-v8:0", MistralAdapter), # artificial
|
|
162
162
|
],
|
|
163
163
|
)
|
|
164
|
-
def test_get_model_adapter(model: str, expected_model_adapter:
|
|
164
|
+
def test_get_model_adapter(model: str, expected_model_adapter: type[BedrockModelAdapter] | None):
|
|
165
165
|
"""
|
|
166
166
|
Test that the correct model adapter is returned for a given model
|
|
167
167
|
"""
|
|
@@ -182,7 +182,7 @@ def test_get_model_adapter(model: str, expected_model_adapter: Optional[type[Bed
|
|
|
182
182
|
],
|
|
183
183
|
)
|
|
184
184
|
def test_get_model_adapter_with_model_family(
|
|
185
|
-
model_family: str, expected_model_adapter:
|
|
185
|
+
model_family: str, expected_model_adapter: type[BedrockModelAdapter] | None
|
|
186
186
|
):
|
|
187
187
|
"""
|
|
188
188
|
Test that the correct model adapter is returned for a given model model_family
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
from unittest.mock import MagicMock, patch
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
@@ -63,7 +63,7 @@ class TestS3Downloader:
|
|
|
63
63
|
assert d.file_extensions == [".pdf", ".txt"]
|
|
64
64
|
|
|
65
65
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 10}])
|
|
66
|
-
def test_to_dict(self, mock_boto3_session: Any, tmp_path, boto3_config:
|
|
66
|
+
def test_to_dict(self, mock_boto3_session: Any, tmp_path, boto3_config: dict[str, Any] | None):
|
|
67
67
|
d = S3Downloader(file_root_path=str(tmp_path), boto3_config=boto3_config)
|
|
68
68
|
expected = {
|
|
69
69
|
"type": TYPE,
|
|
@@ -84,7 +84,7 @@ class TestS3Downloader:
|
|
|
84
84
|
assert d.to_dict() == expected
|
|
85
85
|
|
|
86
86
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 10}])
|
|
87
|
-
def test_from_dict(self, mock_boto3_session: Any, tmp_path, boto3_config:
|
|
87
|
+
def test_from_dict(self, mock_boto3_session: Any, tmp_path, boto3_config: dict[str, Any] | None):
|
|
88
88
|
data = {
|
|
89
89
|
"type": TYPE,
|
|
90
90
|
"init_parameters": {
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/examples/bedrock_ranker_example.py
RENAMED
|
File without changes
|
{amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/examples/chatgenerator_example.py
RENAMED
|
File without changes
|
|
File without changes
|
{amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/examples/s3_downloader_example.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_chat_generator_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_files/haystack-logo.png
RENAMED
|
File without changes
|
{amazon_bedrock_haystack-5.4.0 → amazon_bedrock_haystack-6.1.0}/tests/test_files/sample_pdf_1.pdf
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|