amazon-bedrock-haystack 5.3.1__tar.gz → 6.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/CHANGELOG.md +21 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/PKG-INFO +3 -4
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/pyproject.toml +3 -9
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/common/amazon_bedrock/utils.py +7 -7
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/common/s3/utils.py +3 -4
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/downloaders/s3/s3_downloader.py +15 -15
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/embedders/amazon_bedrock/document_embedder.py +11 -11
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/embedders/amazon_bedrock/document_image_embedder.py +11 -11
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/embedders/amazon_bedrock/text_embedder.py +8 -8
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py +4 -4
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py +24 -24
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py +4 -4
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/generators/amazon_bedrock/generator.py +18 -17
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/rankers/amazon_bedrock/ranker.py +12 -12
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_chat_generator.py +2 -2
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_document_embedder.py +3 -3
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_document_image_embedder.py +5 -5
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_generator.py +5 -5
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_s3_downloader.py +12 -32
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/.gitignore +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/LICENSE.txt +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/README.md +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/examples/bedrock_ranker_example.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/examples/chatgenerator_example.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/examples/embedders_generator_with_rag_example.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/examples/s3_downloader_example.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/pydoc/config_docusaurus.yml +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/common/amazon_bedrock/__init__.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/common/amazon_bedrock/errors.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/common/py.typed +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/common/s3/__init__.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/common/s3/errors.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/downloaders/py.typed +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/downloaders/s3/__init__.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/embedders/amazon_bedrock/__init__.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/embedders/py.typed +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/generators/amazon_bedrock/__init__.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/generators/amazon_bedrock/chat/__init__.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/generators/py.typed +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/rankers/amazon_bedrock/__init__.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/src/haystack_integrations/components/rankers/py.typed +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/__init__.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/conftest.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_chat_generator_utils.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_files/apple.jpg +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_files/haystack-logo.png +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_files/sample_pdf_1.pdf +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_ranker.py +0 -0
- {amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_text_embedder.py +0 -0
|
@@ -1,5 +1,26 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [integrations/amazon_bedrock-v5.4.0] - 2026-01-08
|
|
4
|
+
|
|
5
|
+
### 🚀 Features
|
|
6
|
+
|
|
7
|
+
- Update `S3Downloader` to auto call run `warm_up` on first run instead raising error (#2673)
|
|
8
|
+
|
|
9
|
+
### 🧹 Chores
|
|
10
|
+
|
|
11
|
+
- Make fmt command more forgiving (#2671)
|
|
12
|
+
|
|
13
|
+
### 🌀 Miscellaneous
|
|
14
|
+
|
|
15
|
+
- Fix: Fix doc links (#2661)
|
|
16
|
+
|
|
17
|
+
## [integrations/amazon_bedrock-v5.3.1] - 2025-12-19
|
|
18
|
+
|
|
19
|
+
### 🐛 Bug Fixes
|
|
20
|
+
|
|
21
|
+
- Relax model name validation for Bedrock Embedders (#2625)
|
|
22
|
+
|
|
23
|
+
|
|
3
24
|
## [integrations/amazon_bedrock-v5.3.0] - 2025-12-17
|
|
4
25
|
|
|
5
26
|
### 🚀 Features
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: amazon-bedrock-haystack
|
|
3
|
-
Version:
|
|
3
|
+
Version: 6.0.0
|
|
4
4
|
Summary: An integration of AWS S3 and Bedrock as a Downloader and Generator components.
|
|
5
5
|
Project-URL: Documentation, https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/deepset-ai/haystack-core-integrations/issues
|
|
@@ -11,17 +11,16 @@ License-File: LICENSE.txt
|
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Programming Language :: Python
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
18
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
20
19
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
21
|
-
Requires-Python: >=3.
|
|
20
|
+
Requires-Python: >=3.10
|
|
22
21
|
Requires-Dist: aioboto3>=14.0.0
|
|
23
22
|
Requires-Dist: boto3>=1.28.57
|
|
24
|
-
Requires-Dist: haystack-ai>=2.
|
|
23
|
+
Requires-Dist: haystack-ai>=2.22.0
|
|
25
24
|
Description-Content-Type: text/markdown
|
|
26
25
|
|
|
27
26
|
# amazon-bedrock-haystack
|
|
@@ -7,7 +7,7 @@ name = "amazon-bedrock-haystack"
|
|
|
7
7
|
dynamic = ["version"]
|
|
8
8
|
description = 'An integration of AWS S3 and Bedrock as a Downloader and Generator components.'
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
11
|
license = "Apache-2.0"
|
|
12
12
|
keywords = []
|
|
13
13
|
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
|
|
@@ -15,7 +15,6 @@ classifiers = [
|
|
|
15
15
|
"License :: OSI Approved :: Apache Software License",
|
|
16
16
|
"Development Status :: 4 - Beta",
|
|
17
17
|
"Programming Language :: Python",
|
|
18
|
-
"Programming Language :: Python :: 3.9",
|
|
19
18
|
"Programming Language :: Python :: 3.10",
|
|
20
19
|
"Programming Language :: Python :: 3.11",
|
|
21
20
|
"Programming Language :: Python :: 3.12",
|
|
@@ -23,7 +22,7 @@ classifiers = [
|
|
|
23
22
|
"Programming Language :: Python :: Implementation :: CPython",
|
|
24
23
|
"Programming Language :: Python :: Implementation :: PyPy",
|
|
25
24
|
]
|
|
26
|
-
dependencies = ["haystack-ai>=2.
|
|
25
|
+
dependencies = ["haystack-ai>=2.22.0", "boto3>=1.28.57", "aioboto3>=14.0.0"]
|
|
27
26
|
|
|
28
27
|
[project.urls]
|
|
29
28
|
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/amazon_bedrock#readme"
|
|
@@ -47,7 +46,7 @@ dependencies = ["haystack-pydoc-tools", "ruff"]
|
|
|
47
46
|
|
|
48
47
|
[tool.hatch.envs.default.scripts]
|
|
49
48
|
docs = ["pydoc-markdown pydoc/config_docusaurus.yml"]
|
|
50
|
-
fmt = "ruff check --fix {args}
|
|
49
|
+
fmt = "ruff check --fix {args}; ruff format {args}"
|
|
51
50
|
fmt-check = "ruff check {args} && ruff format --check {args}"
|
|
52
51
|
|
|
53
52
|
[tool.hatch.envs.test]
|
|
@@ -92,7 +91,6 @@ module = [
|
|
|
92
91
|
ignore_missing_imports = true
|
|
93
92
|
|
|
94
93
|
[tool.ruff]
|
|
95
|
-
target-version = "py39"
|
|
96
94
|
line-length = 120
|
|
97
95
|
|
|
98
96
|
[tool.ruff.lint]
|
|
@@ -140,10 +138,6 @@ ignore = [
|
|
|
140
138
|
"ARG002",
|
|
141
139
|
"ARG005",
|
|
142
140
|
]
|
|
143
|
-
unfixable = [
|
|
144
|
-
# Don't touch unused imports
|
|
145
|
-
"F401",
|
|
146
|
-
]
|
|
147
141
|
|
|
148
142
|
[tool.ruff.lint.isort]
|
|
149
143
|
known-first-party = ["haystack_integrations"]
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
#
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
7
|
import aioboto3
|
|
8
8
|
import boto3
|
|
@@ -20,14 +20,14 @@ AWS_CONFIGURATION_KEYS = [
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def get_aws_session(
|
|
23
|
-
aws_access_key_id:
|
|
24
|
-
aws_secret_access_key:
|
|
25
|
-
aws_session_token:
|
|
26
|
-
aws_region_name:
|
|
27
|
-
aws_profile_name:
|
|
23
|
+
aws_access_key_id: str | None = None,
|
|
24
|
+
aws_secret_access_key: str | None = None,
|
|
25
|
+
aws_session_token: str | None = None,
|
|
26
|
+
aws_region_name: str | None = None,
|
|
27
|
+
aws_profile_name: str | None = None,
|
|
28
28
|
async_mode: bool = False,
|
|
29
29
|
**kwargs: Any,
|
|
30
|
-
) ->
|
|
30
|
+
) -> boto3.Session | aioboto3.Session:
|
|
31
31
|
"""
|
|
32
32
|
Creates an AWS Session with the given parameters.
|
|
33
33
|
Checks if the provided AWS credentials are valid and can be used to connect to AWS.
|
|
@@ -6,7 +6,6 @@ import os
|
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from http import HTTPStatus
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import Optional
|
|
10
9
|
|
|
11
10
|
from boto3.session import Session
|
|
12
11
|
from botocore.config import Config
|
|
@@ -23,9 +22,9 @@ class S3Storage:
|
|
|
23
22
|
self,
|
|
24
23
|
s3_bucket: str,
|
|
25
24
|
session: Session,
|
|
26
|
-
s3_prefix:
|
|
27
|
-
endpoint_url:
|
|
28
|
-
config:
|
|
25
|
+
s3_prefix: str | None = None,
|
|
26
|
+
endpoint_url: str | None = None,
|
|
27
|
+
config: Config | None = None,
|
|
29
28
|
) -> None:
|
|
30
29
|
"""
|
|
31
30
|
Initializes the S3Storage object with the provided parameters.
|
|
@@ -3,9 +3,10 @@
|
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
|
+
from collections.abc import Callable
|
|
6
7
|
from concurrent.futures import ThreadPoolExecutor
|
|
7
8
|
from pathlib import Path
|
|
8
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
from botocore.config import Config
|
|
11
12
|
from haystack import component, default_from_dict, default_to_dict, logging
|
|
@@ -29,20 +30,20 @@ class S3Downloader:
|
|
|
29
30
|
def __init__(
|
|
30
31
|
self,
|
|
31
32
|
*,
|
|
32
|
-
aws_access_key_id:
|
|
33
|
-
aws_secret_access_key:
|
|
33
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
34
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
34
35
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
35
36
|
),
|
|
36
|
-
aws_session_token:
|
|
37
|
-
aws_region_name:
|
|
38
|
-
aws_profile_name:
|
|
39
|
-
boto3_config:
|
|
40
|
-
file_root_path:
|
|
41
|
-
file_extensions:
|
|
37
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
38
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
39
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
40
|
+
boto3_config: dict[str, Any] | None = None,
|
|
41
|
+
file_root_path: str | None = None,
|
|
42
|
+
file_extensions: list[str] | None = None,
|
|
42
43
|
file_name_meta_key: str = "file_name",
|
|
43
44
|
max_workers: int = 32,
|
|
44
45
|
max_cache_size: int = 100,
|
|
45
|
-
s3_key_generation_function:
|
|
46
|
+
s3_key_generation_function: Callable[[Document], str] | None = None,
|
|
46
47
|
) -> None:
|
|
47
48
|
"""
|
|
48
49
|
Initializes the `S3Downloader` with the provided parameters.
|
|
@@ -104,9 +105,9 @@ class S3Downloader:
|
|
|
104
105
|
self.file_name_meta_key = file_name_meta_key
|
|
105
106
|
self.s3_key_generation_function = s3_key_generation_function
|
|
106
107
|
|
|
107
|
-
self._storage:
|
|
108
|
+
self._storage: S3Storage | None = None
|
|
108
109
|
|
|
109
|
-
def resolve_secret(secret:
|
|
110
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
110
111
|
return secret.resolve_value() if secret else None
|
|
111
112
|
|
|
112
113
|
self._session = get_aws_session(
|
|
@@ -142,8 +143,7 @@ class S3Downloader:
|
|
|
142
143
|
"""
|
|
143
144
|
|
|
144
145
|
if self._storage is None:
|
|
145
|
-
|
|
146
|
-
raise RuntimeError(msg)
|
|
146
|
+
self.warm_up()
|
|
147
147
|
|
|
148
148
|
filtered_documents = self._filter_documents_by_extensions(documents) if self.file_extensions else documents
|
|
149
149
|
|
|
@@ -170,7 +170,7 @@ class S3Downloader:
|
|
|
170
170
|
if Path(doc.meta.get(self.file_name_meta_key, "")).suffix.lower() in self.file_extensions
|
|
171
171
|
]
|
|
172
172
|
|
|
173
|
-
def _download_file(self, document: Document) ->
|
|
173
|
+
def _download_file(self, document: Document) -> Document | None:
|
|
174
174
|
"""
|
|
175
175
|
Download a single file from AWS S3 Bucket to local filesystem.
|
|
176
176
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
from botocore.config import Config
|
|
5
5
|
from botocore.exceptions import ClientError
|
|
@@ -50,18 +50,18 @@ class AmazonBedrockDocumentEmbedder:
|
|
|
50
50
|
def __init__(
|
|
51
51
|
self,
|
|
52
52
|
model: str,
|
|
53
|
-
aws_access_key_id:
|
|
54
|
-
aws_secret_access_key:
|
|
53
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
54
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
55
55
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
56
56
|
),
|
|
57
|
-
aws_session_token:
|
|
58
|
-
aws_region_name:
|
|
59
|
-
aws_profile_name:
|
|
57
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
58
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
59
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
60
60
|
batch_size: int = 32,
|
|
61
61
|
progress_bar: bool = True,
|
|
62
|
-
meta_fields_to_embed:
|
|
62
|
+
meta_fields_to_embed: list[str] | None = None,
|
|
63
63
|
embedding_separator: str = "\n",
|
|
64
|
-
boto3_config:
|
|
64
|
+
boto3_config: dict[str, Any] | None = None,
|
|
65
65
|
**kwargs: Any,
|
|
66
66
|
) -> None:
|
|
67
67
|
"""
|
|
@@ -115,7 +115,7 @@ class AmazonBedrockDocumentEmbedder:
|
|
|
115
115
|
self.boto3_config = boto3_config
|
|
116
116
|
self.kwargs = kwargs
|
|
117
117
|
|
|
118
|
-
def resolve_secret(secret:
|
|
118
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
119
119
|
return secret.resolve_value() if secret else None
|
|
120
120
|
|
|
121
121
|
try:
|
|
@@ -186,7 +186,7 @@ class AmazonBedrockDocumentEmbedder:
|
|
|
186
186
|
)
|
|
187
187
|
all_embeddings.extend(embeddings_list)
|
|
188
188
|
|
|
189
|
-
for doc, emb in zip(documents, all_embeddings):
|
|
189
|
+
for doc, emb in zip(documents, all_embeddings, strict=True):
|
|
190
190
|
doc.embedding = emb
|
|
191
191
|
|
|
192
192
|
return documents
|
|
@@ -214,7 +214,7 @@ class AmazonBedrockDocumentEmbedder:
|
|
|
214
214
|
embedding = response_body["embedding"]
|
|
215
215
|
all_embeddings.append(embedding)
|
|
216
216
|
|
|
217
|
-
for doc, emb in zip(documents, all_embeddings):
|
|
217
|
+
for doc, emb in zip(documents, all_embeddings, strict=True):
|
|
218
218
|
doc.embedding = emb
|
|
219
219
|
|
|
220
220
|
return documents
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
from dataclasses import replace
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
from botocore.config import Config
|
|
10
10
|
from botocore.exceptions import ClientError
|
|
@@ -68,18 +68,18 @@ class AmazonBedrockDocumentImageEmbedder:
|
|
|
68
68
|
self,
|
|
69
69
|
*,
|
|
70
70
|
model: str,
|
|
71
|
-
aws_access_key_id:
|
|
72
|
-
aws_secret_access_key:
|
|
71
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
72
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
73
73
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
74
74
|
),
|
|
75
|
-
aws_session_token:
|
|
76
|
-
aws_region_name:
|
|
77
|
-
aws_profile_name:
|
|
75
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
76
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
77
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
78
78
|
file_path_meta_field: str = "file_path",
|
|
79
|
-
root_path:
|
|
80
|
-
image_size:
|
|
79
|
+
root_path: str | None = None,
|
|
80
|
+
image_size: tuple[int, int] | None = None,
|
|
81
81
|
progress_bar: bool = True,
|
|
82
|
-
boto3_config:
|
|
82
|
+
boto3_config: dict[str, Any] | None = None,
|
|
83
83
|
**kwargs: Any,
|
|
84
84
|
) -> None:
|
|
85
85
|
"""
|
|
@@ -144,7 +144,7 @@ class AmazonBedrockDocumentImageEmbedder:
|
|
|
144
144
|
raise ValueError(msg)
|
|
145
145
|
self.embedding_types = embedding_types
|
|
146
146
|
|
|
147
|
-
def resolve_secret(secret:
|
|
147
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
148
148
|
return secret.resolve_value() if secret else None
|
|
149
149
|
|
|
150
150
|
try:
|
|
@@ -288,7 +288,7 @@ class AmazonBedrockDocumentImageEmbedder:
|
|
|
288
288
|
|
|
289
289
|
docs_with_embeddings = []
|
|
290
290
|
|
|
291
|
-
for doc, emb in zip(documents, embeddings):
|
|
291
|
+
for doc, emb in zip(documents, embeddings, strict=True):
|
|
292
292
|
# we store this information for later inspection
|
|
293
293
|
new_meta = {
|
|
294
294
|
**doc.meta,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
from botocore.config import Config
|
|
5
5
|
from botocore.exceptions import ClientError
|
|
@@ -43,14 +43,14 @@ class AmazonBedrockTextEmbedder:
|
|
|
43
43
|
def __init__(
|
|
44
44
|
self,
|
|
45
45
|
model: str,
|
|
46
|
-
aws_access_key_id:
|
|
47
|
-
aws_secret_access_key:
|
|
46
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
47
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
48
48
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
49
49
|
),
|
|
50
|
-
aws_session_token:
|
|
51
|
-
aws_region_name:
|
|
52
|
-
aws_profile_name:
|
|
53
|
-
boto3_config:
|
|
50
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
51
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
52
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
53
|
+
boto3_config: dict[str, Any] | None = None,
|
|
54
54
|
**kwargs: Any,
|
|
55
55
|
) -> None:
|
|
56
56
|
"""
|
|
@@ -94,7 +94,7 @@ class AmazonBedrockTextEmbedder:
|
|
|
94
94
|
self.boto3_config = boto3_config
|
|
95
95
|
self.kwargs = kwargs
|
|
96
96
|
|
|
97
|
-
def resolve_secret(secret:
|
|
97
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
98
98
|
return secret.resolve_value() if secret else None
|
|
99
99
|
|
|
100
100
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
5
|
from botocore.eventstream import EventStream
|
|
6
6
|
from haystack.dataclasses import StreamingChunk, SyncStreamingCallbackT
|
|
@@ -19,7 +19,7 @@ class BedrockModelAdapter(ABC):
|
|
|
19
19
|
It will be overridden by the corresponding parameter in the `model_kwargs` if it is present.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
def __init__(self, model_kwargs: dict[str, Any], max_length:
|
|
22
|
+
def __init__(self, model_kwargs: dict[str, Any], max_length: int | None) -> None:
|
|
23
23
|
self.model_kwargs = model_kwargs
|
|
24
24
|
self.max_length = max_length
|
|
25
25
|
|
|
@@ -115,7 +115,7 @@ class AnthropicClaudeAdapter(BedrockModelAdapter):
|
|
|
115
115
|
:param max_length: Maximum length of generated text
|
|
116
116
|
"""
|
|
117
117
|
|
|
118
|
-
def __init__(self, model_kwargs: dict[str, Any], max_length:
|
|
118
|
+
def __init__(self, model_kwargs: dict[str, Any], max_length: int | None) -> None:
|
|
119
119
|
self.use_messages_api = model_kwargs.get("use_messages_api", True)
|
|
120
120
|
self.include_thinking = model_kwargs.get("include_thinking", True)
|
|
121
121
|
self.thinking_tag = model_kwargs.get("thinking_tag", "thinking")
|
|
@@ -175,7 +175,7 @@ class AnthropicClaudeAdapter(BedrockModelAdapter):
|
|
|
175
175
|
if self.include_thinking and len(thinking) == len(texts):
|
|
176
176
|
texts = [
|
|
177
177
|
f"{self.thinking_tag_start}{thinking}{self.thinking_tag_end}{text}"
|
|
178
|
-
for text, thinking in zip(texts, thinking)
|
|
178
|
+
for text, thinking in zip(texts, thinking, strict=True)
|
|
179
179
|
]
|
|
180
180
|
return texts
|
|
181
181
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import aioboto3
|
|
4
4
|
from botocore.config import Config
|
|
@@ -140,26 +140,26 @@ class AmazonBedrockChatGenerator:
|
|
|
140
140
|
automatically from the environment or the AWS configuration file.
|
|
141
141
|
If the AWS environment is not configured, set `aws_access_key_id`, `aws_secret_access_key`,
|
|
142
142
|
and `aws_region_name` as environment variables or pass them as
|
|
143
|
-
[Secret](https://docs.haystack.deepset.ai/
|
|
143
|
+
[Secret](https://docs.haystack.deepset.ai/docs/secret-management) arguments. Make sure the region you set
|
|
144
144
|
supports Amazon Bedrock.
|
|
145
145
|
"""
|
|
146
146
|
|
|
147
147
|
def __init__(
|
|
148
148
|
self,
|
|
149
149
|
model: str,
|
|
150
|
-
aws_access_key_id:
|
|
151
|
-
aws_secret_access_key:
|
|
150
|
+
aws_access_key_id: Secret | None = Secret.from_env_var(["AWS_ACCESS_KEY_ID"], strict=False), # noqa: B008
|
|
151
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
152
152
|
["AWS_SECRET_ACCESS_KEY"], strict=False
|
|
153
153
|
),
|
|
154
|
-
aws_session_token:
|
|
155
|
-
aws_region_name:
|
|
156
|
-
aws_profile_name:
|
|
157
|
-
generation_kwargs:
|
|
158
|
-
streaming_callback:
|
|
159
|
-
boto3_config:
|
|
160
|
-
tools:
|
|
154
|
+
aws_session_token: Secret | None = Secret.from_env_var(["AWS_SESSION_TOKEN"], strict=False), # noqa: B008
|
|
155
|
+
aws_region_name: Secret | None = Secret.from_env_var(["AWS_DEFAULT_REGION"], strict=False), # noqa: B008
|
|
156
|
+
aws_profile_name: Secret | None = Secret.from_env_var(["AWS_PROFILE"], strict=False), # noqa: B008
|
|
157
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
158
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
159
|
+
boto3_config: dict[str, Any] | None = None,
|
|
160
|
+
tools: ToolsType | None = None,
|
|
161
161
|
*,
|
|
162
|
-
guardrail_config:
|
|
162
|
+
guardrail_config: dict[str, str] | None = None,
|
|
163
163
|
) -> None:
|
|
164
164
|
"""
|
|
165
165
|
Initializes the `AmazonBedrockChatGenerator` with the provided parameters. The parameters are passed to the
|
|
@@ -225,7 +225,7 @@ class AmazonBedrockChatGenerator:
|
|
|
225
225
|
_validate_guardrail_config(guardrail_config=guardrail_config, streaming=streaming_callback is not None)
|
|
226
226
|
self.guardrail_config = guardrail_config
|
|
227
227
|
|
|
228
|
-
def resolve_secret(secret:
|
|
228
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
229
229
|
return secret.resolve_value() if secret else None
|
|
230
230
|
|
|
231
231
|
config = Config(
|
|
@@ -252,7 +252,7 @@ class AmazonBedrockChatGenerator:
|
|
|
252
252
|
raise AmazonBedrockConfigurationError(msg) from exception
|
|
253
253
|
|
|
254
254
|
self.generation_kwargs = generation_kwargs or {}
|
|
255
|
-
self.async_session:
|
|
255
|
+
self.async_session: aioboto3.Session | None = None
|
|
256
256
|
|
|
257
257
|
def _get_async_session(self) -> aioboto3.Session:
|
|
258
258
|
"""
|
|
@@ -341,11 +341,11 @@ class AmazonBedrockChatGenerator:
|
|
|
341
341
|
def _prepare_request_params(
|
|
342
342
|
self,
|
|
343
343
|
messages: list[ChatMessage],
|
|
344
|
-
streaming_callback:
|
|
345
|
-
generation_kwargs:
|
|
346
|
-
tools:
|
|
344
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
345
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
346
|
+
tools: ToolsType | None = None,
|
|
347
347
|
requires_async: bool = False,
|
|
348
|
-
) -> tuple[dict[str, Any],
|
|
348
|
+
) -> tuple[dict[str, Any], StreamingCallbackT | None]:
|
|
349
349
|
"""
|
|
350
350
|
Prepares and formats parameters required to call the Amazon Bedrock Converse API.
|
|
351
351
|
|
|
@@ -423,9 +423,9 @@ class AmazonBedrockChatGenerator:
|
|
|
423
423
|
def run(
|
|
424
424
|
self,
|
|
425
425
|
messages: list[ChatMessage],
|
|
426
|
-
streaming_callback:
|
|
427
|
-
generation_kwargs:
|
|
428
|
-
tools:
|
|
426
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
427
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
428
|
+
tools: ToolsType | None = None,
|
|
429
429
|
) -> dict[str, list[ChatMessage]]:
|
|
430
430
|
"""
|
|
431
431
|
Executes a synchronous inference call to the Amazon Bedrock model using the Converse API.
|
|
@@ -484,9 +484,9 @@ class AmazonBedrockChatGenerator:
|
|
|
484
484
|
async def run_async(
|
|
485
485
|
self,
|
|
486
486
|
messages: list[ChatMessage],
|
|
487
|
-
streaming_callback:
|
|
488
|
-
generation_kwargs:
|
|
489
|
-
tools:
|
|
487
|
+
streaming_callback: StreamingCallbackT | None = None,
|
|
488
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
489
|
+
tools: ToolsType | None = None,
|
|
490
490
|
) -> dict[str, list[ChatMessage]]:
|
|
491
491
|
"""
|
|
492
492
|
Executes an asynchronous inference call to the Amazon Bedrock model using the Converse API.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import json
|
|
3
3
|
from datetime import datetime, timezone
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
|
|
6
6
|
from botocore.eventstream import EventStream
|
|
7
7
|
from haystack import logging
|
|
@@ -40,7 +40,7 @@ FINISH_REASON_MAPPING: dict[str, FinishReason] = {
|
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
# Haystack to Bedrock util methods
|
|
43
|
-
def _format_tools(tools:
|
|
43
|
+
def _format_tools(tools: list[Tool] | None = None) -> dict[str, Any] | None:
|
|
44
44
|
"""
|
|
45
45
|
Format Haystack Tool(s) to Amazon Bedrock toolConfig format.
|
|
46
46
|
|
|
@@ -454,7 +454,7 @@ def _convert_event_to_streaming_chunk(
|
|
|
454
454
|
return streaming_chunk
|
|
455
455
|
|
|
456
456
|
|
|
457
|
-
def _process_reasoning_contents(chunks: list[StreamingChunk]) ->
|
|
457
|
+
def _process_reasoning_contents(chunks: list[StreamingChunk]) -> ReasoningContent | None:
|
|
458
458
|
"""
|
|
459
459
|
Process reasoning contents from a list of StreamingChunk objects into the Bedrock expected format.
|
|
460
460
|
|
|
@@ -613,7 +613,7 @@ async def _parse_streaming_response_async(
|
|
|
613
613
|
return [reply]
|
|
614
614
|
|
|
615
615
|
|
|
616
|
-
def _validate_guardrail_config(guardrail_config:
|
|
616
|
+
def _validate_guardrail_config(guardrail_config: dict[str, str] | None = None, streaming: bool = False) -> None:
|
|
617
617
|
"""
|
|
618
618
|
Validate the guardrail configuration.
|
|
619
619
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import re
|
|
3
3
|
import warnings
|
|
4
|
-
from
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from typing import Any, ClassVar, Literal, get_args
|
|
5
6
|
|
|
6
7
|
from botocore.config import Config
|
|
7
8
|
from botocore.exceptions import ClientError
|
|
@@ -58,7 +59,7 @@ class AmazonBedrockGenerator:
|
|
|
58
59
|
automatically from the environment or the AWS configuration file.
|
|
59
60
|
If the AWS environment is not configured, set `aws_access_key_id`, `aws_secret_access_key`,
|
|
60
61
|
`aws_session_token`, and `aws_region_name` as environment variables or pass them as
|
|
61
|
-
[Secret](https://docs.haystack.deepset.ai/
|
|
62
|
+
[Secret](https://docs.haystack.deepset.ai/docs/secret-management) arguments. Make sure the region you set
|
|
62
63
|
supports Amazon Bedrock.
|
|
63
64
|
"""
|
|
64
65
|
|
|
@@ -95,18 +96,18 @@ class AmazonBedrockGenerator:
|
|
|
95
96
|
def __init__(
|
|
96
97
|
self,
|
|
97
98
|
model: str,
|
|
98
|
-
aws_access_key_id:
|
|
99
|
-
aws_secret_access_key:
|
|
99
|
+
aws_access_key_id: Secret | None = Secret.from_env_var("AWS_ACCESS_KEY_ID", strict=False), # noqa: B008
|
|
100
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
100
101
|
"AWS_SECRET_ACCESS_KEY", strict=False
|
|
101
102
|
),
|
|
102
|
-
aws_session_token:
|
|
103
|
-
aws_region_name:
|
|
104
|
-
aws_profile_name:
|
|
105
|
-
max_length:
|
|
106
|
-
truncate:
|
|
107
|
-
streaming_callback:
|
|
108
|
-
boto3_config:
|
|
109
|
-
model_family:
|
|
103
|
+
aws_session_token: Secret | None = Secret.from_env_var("AWS_SESSION_TOKEN", strict=False), # noqa: B008
|
|
104
|
+
aws_region_name: Secret | None = Secret.from_env_var("AWS_DEFAULT_REGION", strict=False), # noqa: B008
|
|
105
|
+
aws_profile_name: Secret | None = Secret.from_env_var("AWS_PROFILE", strict=False), # noqa: B008
|
|
106
|
+
max_length: int | None = None,
|
|
107
|
+
truncate: bool | None = None,
|
|
108
|
+
streaming_callback: Callable[[StreamingChunk], None] | None = None,
|
|
109
|
+
boto3_config: dict[str, Any] | None = None,
|
|
110
|
+
model_family: MODEL_FAMILIES | None = None,
|
|
110
111
|
**kwargs: Any,
|
|
111
112
|
) -> None:
|
|
112
113
|
"""
|
|
@@ -156,7 +157,7 @@ class AmazonBedrockGenerator:
|
|
|
156
157
|
self.kwargs = kwargs
|
|
157
158
|
self.model_family = model_family
|
|
158
159
|
|
|
159
|
-
def resolve_secret(secret:
|
|
160
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
160
161
|
return secret.resolve_value() if secret else None
|
|
161
162
|
|
|
162
163
|
try:
|
|
@@ -187,9 +188,9 @@ class AmazonBedrockGenerator:
|
|
|
187
188
|
def run(
|
|
188
189
|
self,
|
|
189
190
|
prompt: str,
|
|
190
|
-
streaming_callback:
|
|
191
|
-
generation_kwargs:
|
|
192
|
-
) -> dict[str,
|
|
191
|
+
streaming_callback: Callable[[StreamingChunk], None] | None = None,
|
|
192
|
+
generation_kwargs: dict[str, Any] | None = None,
|
|
193
|
+
) -> dict[str, list[str] | dict[str, Any]]:
|
|
193
194
|
"""
|
|
194
195
|
Generates a list of string response to the given prompt.
|
|
195
196
|
|
|
@@ -240,7 +241,7 @@ class AmazonBedrockGenerator:
|
|
|
240
241
|
return {"replies": replies, "meta": metadata}
|
|
241
242
|
|
|
242
243
|
@classmethod
|
|
243
|
-
def get_model_adapter(cls, model: str, model_family:
|
|
244
|
+
def get_model_adapter(cls, model: str, model_family: str | None = None) -> type[BedrockModelAdapter]:
|
|
244
245
|
"""
|
|
245
246
|
Gets the model adapter for the given model.
|
|
246
247
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
from botocore.exceptions import ClientError
|
|
4
4
|
from haystack import Document, component, default_from_dict, default_to_dict, logging
|
|
@@ -52,7 +52,7 @@ class AmazonBedrockRanker:
|
|
|
52
52
|
automatically from the environment or the AWS configuration file.
|
|
53
53
|
If the AWS environment is not configured, set `aws_access_key_id`, `aws_secret_access_key`,
|
|
54
54
|
and `aws_region_name` as environment variables or pass them as
|
|
55
|
-
[Secret](https://docs.haystack.deepset.ai/
|
|
55
|
+
[Secret](https://docs.haystack.deepset.ai/docs/secret-management) arguments. Make sure the region you set
|
|
56
56
|
supports Amazon Bedrock.
|
|
57
57
|
"""
|
|
58
58
|
|
|
@@ -60,15 +60,15 @@ class AmazonBedrockRanker:
|
|
|
60
60
|
self,
|
|
61
61
|
model: str = "cohere.rerank-v3-5:0",
|
|
62
62
|
top_k: int = 10,
|
|
63
|
-
aws_access_key_id:
|
|
64
|
-
aws_secret_access_key:
|
|
63
|
+
aws_access_key_id: Secret | None = Secret.from_env_var(["AWS_ACCESS_KEY_ID"], strict=False), # noqa: B008
|
|
64
|
+
aws_secret_access_key: Secret | None = Secret.from_env_var( # noqa: B008
|
|
65
65
|
["AWS_SECRET_ACCESS_KEY"], strict=False
|
|
66
66
|
),
|
|
67
|
-
aws_session_token:
|
|
68
|
-
aws_region_name:
|
|
69
|
-
aws_profile_name:
|
|
70
|
-
max_chunks_per_doc:
|
|
71
|
-
meta_fields_to_embed:
|
|
67
|
+
aws_session_token: Secret | None = Secret.from_env_var(["AWS_SESSION_TOKEN"], strict=False), # noqa: B008
|
|
68
|
+
aws_region_name: Secret | None = Secret.from_env_var(["AWS_DEFAULT_REGION"], strict=False), # noqa: B008
|
|
69
|
+
aws_profile_name: Secret | None = Secret.from_env_var(["AWS_PROFILE"], strict=False), # noqa: B008
|
|
70
|
+
max_chunks_per_doc: int | None = None,
|
|
71
|
+
meta_fields_to_embed: list[str] | None = None,
|
|
72
72
|
meta_data_separator: str = "\n",
|
|
73
73
|
) -> None:
|
|
74
74
|
if not model:
|
|
@@ -103,7 +103,7 @@ class AmazonBedrockRanker:
|
|
|
103
103
|
self.meta_fields_to_embed = meta_fields_to_embed or []
|
|
104
104
|
self.meta_data_separator = meta_data_separator
|
|
105
105
|
|
|
106
|
-
def resolve_secret(secret:
|
|
106
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
107
107
|
return secret.resolve_value() if secret else None
|
|
108
108
|
|
|
109
109
|
try:
|
|
@@ -177,7 +177,7 @@ class AmazonBedrockRanker:
|
|
|
177
177
|
return concatenated_input_list
|
|
178
178
|
|
|
179
179
|
@component.output_types(documents=list[Document])
|
|
180
|
-
def run(self, query: str, documents: list[Document], top_k:
|
|
180
|
+
def run(self, query: str, documents: list[Document], top_k: int | None = None) -> dict[str, list[Document]]:
|
|
181
181
|
"""
|
|
182
182
|
Use the Amazon Bedrock Reranker to re-rank the list of documents based on the query.
|
|
183
183
|
|
|
@@ -201,7 +201,7 @@ class AmazonBedrockRanker:
|
|
|
201
201
|
if not documents:
|
|
202
202
|
return {"documents": []}
|
|
203
203
|
|
|
204
|
-
def resolve_secret(secret:
|
|
204
|
+
def resolve_secret(secret: Secret | None) -> str | None:
|
|
205
205
|
return secret.resolve_value() if secret else None
|
|
206
206
|
|
|
207
207
|
region = resolve_secret(self.aws_region_name)
|
{amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_chat_generator.py
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
5
|
from haystack import Pipeline
|
|
@@ -169,7 +169,7 @@ class TestAmazonBedrockChatGenerator:
|
|
|
169
169
|
assert generator.to_dict() == expected_dict
|
|
170
170
|
|
|
171
171
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
172
|
-
def test_from_dict(self, mock_boto3_session: Any, boto3_config:
|
|
172
|
+
def test_from_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
173
173
|
"""
|
|
174
174
|
Test that the from_dict method returns the correct object
|
|
175
175
|
"""
|
{amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_document_embedder.py
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import os
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
from unittest.mock import patch
|
|
5
5
|
|
|
6
6
|
import pytest
|
|
@@ -66,7 +66,7 @@ class TestAmazonBedrockDocumentEmbedder:
|
|
|
66
66
|
)
|
|
67
67
|
|
|
68
68
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
69
|
-
def test_to_dict(self, mock_boto3_session: Any, boto3_config:
|
|
69
|
+
def test_to_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
70
70
|
embedder = AmazonBedrockDocumentEmbedder(
|
|
71
71
|
model="cohere.embed-english-v3",
|
|
72
72
|
input_type="search_document",
|
|
@@ -94,7 +94,7 @@ class TestAmazonBedrockDocumentEmbedder:
|
|
|
94
94
|
assert embedder.to_dict() == expected_dict
|
|
95
95
|
|
|
96
96
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
97
|
-
def test_from_dict(self, mock_boto3_session: Any, boto3_config:
|
|
97
|
+
def test_from_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
98
98
|
data = {
|
|
99
99
|
"type": TYPE,
|
|
100
100
|
"init_parameters": {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import glob
|
|
2
2
|
import io
|
|
3
3
|
import os
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
from unittest.mock import patch
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
@@ -62,7 +62,7 @@ class TestAmazonBedrockDocumentImageEmbedder:
|
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
65
|
-
def test_to_dict(self, mock_boto3_session: Any, boto3_config:
|
|
65
|
+
def test_to_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
66
66
|
embedder = AmazonBedrockDocumentImageEmbedder(
|
|
67
67
|
model="cohere.embed-english-v3",
|
|
68
68
|
embedding_types=["float"],
|
|
@@ -90,7 +90,7 @@ class TestAmazonBedrockDocumentImageEmbedder:
|
|
|
90
90
|
assert embedder.to_dict() == expected_dict
|
|
91
91
|
|
|
92
92
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
93
|
-
def test_from_dict(self, mock_boto3_session: Any, boto3_config:
|
|
93
|
+
def test_from_dict(self, mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
94
94
|
data = {
|
|
95
95
|
"type": TYPE,
|
|
96
96
|
"init_parameters": {
|
|
@@ -243,7 +243,7 @@ class TestAmazonBedrockDocumentImageEmbedder:
|
|
|
243
243
|
|
|
244
244
|
assert isinstance(result["documents"], list)
|
|
245
245
|
assert len(result["documents"]) == len(documents)
|
|
246
|
-
for doc, new_doc in zip(documents, result["documents"]):
|
|
246
|
+
for doc, new_doc in zip(documents, result["documents"], strict=True):
|
|
247
247
|
assert doc.embedding is None
|
|
248
248
|
assert new_doc is not doc
|
|
249
249
|
assert isinstance(new_doc, Document)
|
|
@@ -276,7 +276,7 @@ class TestAmazonBedrockDocumentImageEmbedder:
|
|
|
276
276
|
|
|
277
277
|
assert isinstance(result["documents"], list)
|
|
278
278
|
assert len(result["documents"]) == len(documents)
|
|
279
|
-
for doc, new_doc in zip(documents, result["documents"]):
|
|
279
|
+
for doc, new_doc in zip(documents, result["documents"], strict=True):
|
|
280
280
|
assert doc.embedding is None
|
|
281
281
|
assert new_doc is not doc
|
|
282
282
|
assert isinstance(new_doc, Document)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
from unittest.mock import MagicMock, call
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
@@ -21,7 +21,7 @@ from haystack_integrations.components.generators.amazon_bedrock.adapters import
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
24
|
-
def test_to_dict(mock_boto3_session: Any, boto3_config:
|
|
24
|
+
def test_to_dict(mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
25
25
|
"""
|
|
26
26
|
Test that the to_dict method returns the correct dictionary without aws credentials
|
|
27
27
|
"""
|
|
@@ -50,7 +50,7 @@ def test_to_dict(mock_boto3_session: Any, boto3_config: Optional[dict[str, Any]]
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 1000}])
|
|
53
|
-
def test_from_dict(mock_boto3_session: Any, boto3_config:
|
|
53
|
+
def test_from_dict(mock_boto3_session: Any, boto3_config: dict[str, Any] | None):
|
|
54
54
|
"""
|
|
55
55
|
Test that the from_dict method returns the correct object
|
|
56
56
|
"""
|
|
@@ -161,7 +161,7 @@ def test_constructor_with_empty_model():
|
|
|
161
161
|
("mistral.mistral-medium-v8:0", MistralAdapter), # artificial
|
|
162
162
|
],
|
|
163
163
|
)
|
|
164
|
-
def test_get_model_adapter(model: str, expected_model_adapter:
|
|
164
|
+
def test_get_model_adapter(model: str, expected_model_adapter: type[BedrockModelAdapter] | None):
|
|
165
165
|
"""
|
|
166
166
|
Test that the correct model adapter is returned for a given model
|
|
167
167
|
"""
|
|
@@ -182,7 +182,7 @@ def test_get_model_adapter(model: str, expected_model_adapter: Optional[type[Bed
|
|
|
182
182
|
],
|
|
183
183
|
)
|
|
184
184
|
def test_get_model_adapter_with_model_family(
|
|
185
|
-
model_family: str, expected_model_adapter:
|
|
185
|
+
model_family: str, expected_model_adapter: type[BedrockModelAdapter] | None
|
|
186
186
|
):
|
|
187
187
|
"""
|
|
188
188
|
Test that the correct model adapter is returned for a given model model_family
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any
|
|
4
4
|
from unittest.mock import MagicMock, patch
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
@@ -63,7 +63,7 @@ class TestS3Downloader:
|
|
|
63
63
|
assert d.file_extensions == [".pdf", ".txt"]
|
|
64
64
|
|
|
65
65
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 10}])
|
|
66
|
-
def test_to_dict(self, mock_boto3_session: Any, tmp_path, boto3_config:
|
|
66
|
+
def test_to_dict(self, mock_boto3_session: Any, tmp_path, boto3_config: dict[str, Any] | None):
|
|
67
67
|
d = S3Downloader(file_root_path=str(tmp_path), boto3_config=boto3_config)
|
|
68
68
|
expected = {
|
|
69
69
|
"type": TYPE,
|
|
@@ -84,7 +84,7 @@ class TestS3Downloader:
|
|
|
84
84
|
assert d.to_dict() == expected
|
|
85
85
|
|
|
86
86
|
@pytest.mark.parametrize("boto3_config", [None, {"read_timeout": 10}])
|
|
87
|
-
def test_from_dict(self, mock_boto3_session: Any, tmp_path, boto3_config:
|
|
87
|
+
def test_from_dict(self, mock_boto3_session: Any, tmp_path, boto3_config: dict[str, Any] | None):
|
|
88
88
|
data = {
|
|
89
89
|
"type": TYPE,
|
|
90
90
|
"init_parameters": {
|
|
@@ -129,7 +129,6 @@ class TestS3Downloader:
|
|
|
129
129
|
|
|
130
130
|
def test_run(self, tmp_path, mock_s3_storage, mock_boto3_session):
|
|
131
131
|
d = S3Downloader(file_root_path=str(tmp_path))
|
|
132
|
-
S3Downloader.warm_up(d)
|
|
133
132
|
d._storage = mock_s3_storage
|
|
134
133
|
|
|
135
134
|
docs = [
|
|
@@ -141,7 +140,6 @@ class TestS3Downloader:
|
|
|
141
140
|
|
|
142
141
|
def test_run_with_extensions(self, tmp_path, mock_s3_storage, mock_boto3_session):
|
|
143
142
|
d = S3Downloader(file_root_path=str(tmp_path), file_extensions=[".txt"])
|
|
144
|
-
S3Downloader.warm_up(d)
|
|
145
143
|
d._storage = mock_s3_storage
|
|
146
144
|
|
|
147
145
|
docs = [
|
|
@@ -155,12 +153,9 @@ class TestS3Downloader:
|
|
|
155
153
|
|
|
156
154
|
def test_run_with_input_file_meta_key(self, tmp_path, mock_s3_storage, mock_boto3_session):
|
|
157
155
|
d = S3Downloader(file_root_path=str(tmp_path), file_name_meta_key="custom_file_key")
|
|
158
|
-
S3Downloader.warm_up(d)
|
|
159
156
|
d._storage = mock_s3_storage
|
|
160
157
|
|
|
161
|
-
docs = [
|
|
162
|
-
Document(meta={"file_id": str(uuid4()), "custom_file_key": "a.txt"}),
|
|
163
|
-
]
|
|
158
|
+
docs = [Document(meta={"file_id": str(uuid4()), "custom_file_key": "a.txt"})]
|
|
164
159
|
|
|
165
160
|
out = d.run(documents=docs)
|
|
166
161
|
assert len(out["documents"]) == 1
|
|
@@ -168,12 +163,9 @@ class TestS3Downloader:
|
|
|
168
163
|
|
|
169
164
|
def test_run_with_s3_key_generation_function(self, tmp_path, mock_s3_storage, mock_boto3_session):
|
|
170
165
|
d = S3Downloader(file_root_path=str(tmp_path), s3_key_generation_function=s3_key_generation_function)
|
|
171
|
-
S3Downloader.warm_up(d)
|
|
172
166
|
d._storage = mock_s3_storage
|
|
173
167
|
|
|
174
|
-
docs = [
|
|
175
|
-
Document(meta={"file_id": str(uuid4()), "file_name": "a.txt"}),
|
|
176
|
-
]
|
|
168
|
+
docs = [Document(meta={"file_id": str(uuid4()), "file_name": "a.txt"})]
|
|
177
169
|
out = d.run(documents=docs)
|
|
178
170
|
assert len(out["documents"]) == 1
|
|
179
171
|
assert out["documents"][0].meta["file_name"] == "a.txt"
|
|
@@ -189,7 +181,6 @@ class TestS3Downloader:
|
|
|
189
181
|
s3_key_generation_function=s3_key_generation_function,
|
|
190
182
|
file_extensions=[".txt"],
|
|
191
183
|
)
|
|
192
|
-
S3Downloader.warm_up(d)
|
|
193
184
|
d._storage = mock_s3_storage
|
|
194
185
|
|
|
195
186
|
docs = [
|
|
@@ -210,8 +201,6 @@ class TestS3Downloader:
|
|
|
210
201
|
def test_live_run(self, tmp_path, monkeypatch):
|
|
211
202
|
d = S3Downloader(file_root_path=str(tmp_path))
|
|
212
203
|
monkeypatch.setenv("S3_DOWNLOADER_PREFIX", "")
|
|
213
|
-
S3Downloader.warm_up(d)
|
|
214
|
-
|
|
215
204
|
docs = [
|
|
216
205
|
Document(meta={"file_id": str(uuid4()), "file_name": "text-sample.txt"}),
|
|
217
206
|
Document(meta={"file_id": str(uuid4()), "file_name": "document-sample.pdf"}),
|
|
@@ -229,9 +218,7 @@ class TestS3Downloader:
|
|
|
229
218
|
)
|
|
230
219
|
def test_live_run_with_no_documents(self, tmp_path):
|
|
231
220
|
d = S3Downloader(file_root_path=str(tmp_path))
|
|
232
|
-
|
|
233
|
-
docs = []
|
|
234
|
-
out = d.run(documents=docs)
|
|
221
|
+
out = d.run(documents=[])
|
|
235
222
|
assert len(out["documents"]) == 0
|
|
236
223
|
|
|
237
224
|
@pytest.mark.integration
|
|
@@ -247,10 +234,8 @@ class TestS3Downloader:
|
|
|
247
234
|
def test_live_run_with_custom_meta_key(self, tmp_path, monkeypatch):
|
|
248
235
|
d = S3Downloader(file_root_path=str(tmp_path), file_name_meta_key="custom_name")
|
|
249
236
|
monkeypatch.setenv("S3_DOWNLOADER_PREFIX", "")
|
|
250
|
-
|
|
251
|
-
docs = [
|
|
252
|
-
Document(meta={"custom_name": "text-sample.txt"}),
|
|
253
|
-
]
|
|
237
|
+
d.warm_up()
|
|
238
|
+
docs = [Document(meta={"custom_name": "text-sample.txt"})]
|
|
254
239
|
out = d.run(documents=docs)
|
|
255
240
|
assert len(out["documents"]) == 1
|
|
256
241
|
assert out["documents"][0].meta["custom_name"] == "text-sample.txt"
|
|
@@ -263,11 +248,8 @@ class TestS3Downloader:
|
|
|
263
248
|
def test_live_run_with_prefix(self, tmp_path, monkeypatch):
|
|
264
249
|
d = S3Downloader(file_root_path=str(tmp_path))
|
|
265
250
|
monkeypatch.setenv("S3_DOWNLOADER_PREFIX", "subfolder/")
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
docs = [
|
|
269
|
-
Document(meta={"file_name": "employees.json"}),
|
|
270
|
-
]
|
|
251
|
+
d.warm_up()
|
|
252
|
+
docs = [Document(meta={"file_name": "employees.json"})]
|
|
271
253
|
out = d.run(documents=docs)
|
|
272
254
|
assert len(out["documents"]) == 1
|
|
273
255
|
assert out["documents"][0].meta["file_name"] == "employees.json"
|
|
@@ -286,10 +268,8 @@ class TestS3Downloader:
|
|
|
286
268
|
file_name_meta_key="file_name",
|
|
287
269
|
s3_key_generation_function=s3_key_generation_function,
|
|
288
270
|
)
|
|
289
|
-
|
|
290
|
-
docs = [
|
|
291
|
-
Document(meta={"file_name": "dog.jpg"}),
|
|
292
|
-
]
|
|
271
|
+
d.warm_up()
|
|
272
|
+
docs = [Document(meta={"file_name": "dog.jpg"})]
|
|
293
273
|
out = d.run(documents=docs)
|
|
294
274
|
assert len(out["documents"]) == 1
|
|
295
275
|
assert out["documents"][0].meta["file_name"] == "dog.jpg"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/examples/bedrock_ranker_example.py
RENAMED
|
File without changes
|
{amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/examples/chatgenerator_example.py
RENAMED
|
File without changes
|
|
File without changes
|
{amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/examples/s3_downloader_example.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_chat_generator_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_files/haystack-logo.png
RENAMED
|
File without changes
|
{amazon_bedrock_haystack-5.3.1 → amazon_bedrock_haystack-6.0.0}/tests/test_files/sample_pdf_1.pdf
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|