indexify 0.3.18__tar.gz → 0.3.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.3.18 → indexify-0.3.20}/PKG-INFO +2 -1
- {indexify-0.3.18 → indexify-0.3.20}/pyproject.toml +4 -1
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/cli/cli.py +15 -17
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/api_objects.py +12 -0
- indexify-0.3.20/src/indexify/executor/blob_store/blob_store.py +69 -0
- indexify-0.3.20/src/indexify/executor/blob_store/local_fs_blob_store.py +48 -0
- indexify-0.3.20/src/indexify/executor/blob_store/metrics/blob_store.py +33 -0
- indexify-0.3.20/src/indexify/executor/blob_store/s3_blob_store.py +85 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/downloader.py +149 -25
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/executor.py +77 -41
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/function_executor.py +24 -11
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/function_executor_state.py +9 -1
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/function_executor_states_container.py +8 -1
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/function_executor_status.py +4 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/health_checker.py +7 -2
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/invocation_state_client.py +4 -2
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +6 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/single_task_runner.py +15 -11
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/task_output.py +36 -2
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/grpc/channel_manager.py +4 -3
- indexify-0.3.20/src/indexify/executor/grpc/function_executor_controller.py +391 -0
- indexify-0.3.20/src/indexify/executor/grpc/metrics/state_reconciler.py +17 -0
- indexify-0.3.20/src/indexify/executor/grpc/metrics/task_controller.py +8 -0
- indexify-0.3.20/src/indexify/executor/grpc/state_reconciler.py +429 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/grpc/state_reporter.py +52 -41
- indexify-0.3.20/src/indexify/executor/grpc/task_controller.py +492 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/metrics/task_reporter.py +14 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/task_reporter.py +115 -6
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/task_runner.py +1 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/proto/executor_api.proto +91 -7
- indexify-0.3.20/src/indexify/proto/executor_api_pb2.py +82 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/proto/executor_api_pb2.pyi +158 -3
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/proto/executor_api_pb2_grpc.py +47 -0
- indexify-0.3.18/src/indexify/executor/grpc/state_reconciler.py +0 -322
- indexify-0.3.18/src/indexify/proto/executor_api_pb2.py +0 -70
- {indexify-0.3.18 → indexify-0.3.20}/README.md +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/README.md +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/executor_flavor.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/metrics/function_executor_state.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/metrics/function_executor_state_container.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/metrics/single_task_runner.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/function_executor/task_input.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/grpc/metrics/channel_manager.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/grpc/metrics/state_reporter.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/metrics/downloader.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/metrics/executor.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/metrics/task_fetcher.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/metrics/task_runner.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/monitoring/function_allowlist.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/monitoring/handler.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/monitoring/metrics.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/monitoring/server.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/runtime_probes.py +0 -0
- {indexify-0.3.18 → indexify-0.3.20}/src/indexify/executor/task_fetcher.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.20
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
16
16
|
Classifier: Programming Language :: Python :: 3.13
|
17
17
|
Requires-Dist: aiohttp (>=3.11.0,<4.0.0)
|
18
|
+
Requires-Dist: boto3 (>=1.37.30,<2.0.0)
|
18
19
|
Requires-Dist: prometheus-client (>=0.21.1,<0.22.0)
|
19
20
|
Requires-Dist: rich (>=13.9.2,<14.0.0)
|
20
21
|
Requires-Dist: tensorlake (>=0.1)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
3
|
# Incremented if any of the components provided in this packages are updated.
|
4
|
-
version = "0.3.
|
4
|
+
version = "0.3.20"
|
5
5
|
description = "Open Source Indexify components and helper tools"
|
6
6
|
authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
|
7
7
|
license = "Apache 2.0"
|
@@ -24,6 +24,8 @@ aiohttp = "^3.11.0"
|
|
24
24
|
prometheus-client = "^0.21.1"
|
25
25
|
# Adds function-executor binary and utils lib.
|
26
26
|
tensorlake = ">=0.1"
|
27
|
+
# Uncomment the next line to use local tensorlake package (only for development!)
|
28
|
+
# tensorlake = { path = "../tensorlake", develop = true }
|
27
29
|
# pydantic is provided by tensorlake
|
28
30
|
# httpx-sse is provided by tensorlake
|
29
31
|
# grpcio is provided by tensorlake
|
@@ -33,6 +35,7 @@ tensorlake = ">=0.1"
|
|
33
35
|
rich = "^13.9.2"
|
34
36
|
typer = "^0.12"
|
35
37
|
# nanoid is provided by tensorlake
|
38
|
+
boto3 = "^1.37.30"
|
36
39
|
|
37
40
|
[tool.poetry.group.dev.dependencies]
|
38
41
|
black = "^24.10.0"
|
@@ -25,6 +25,9 @@ from rich.theme import Theme
|
|
25
25
|
from tensorlake.functions_sdk.image import Image
|
26
26
|
|
27
27
|
from indexify.executor.api_objects import FunctionURI
|
28
|
+
from indexify.executor.blob_store.blob_store import BLOBStore
|
29
|
+
from indexify.executor.blob_store.local_fs_blob_store import LocalFSBLOBStore
|
30
|
+
from indexify.executor.blob_store.s3_blob_store import S3BLOBStore
|
28
31
|
from indexify.executor.executor import Executor
|
29
32
|
from indexify.executor.executor_flavor import ExecutorFlavor
|
30
33
|
from indexify.executor.function_executor.server.subprocess_function_executor_server_factory import (
|
@@ -78,6 +81,7 @@ def build_image(
|
|
78
81
|
)
|
79
82
|
def executor(
|
80
83
|
server_addr: str = "localhost:8900",
|
84
|
+
grpc_server_addr: str = "localhost:8901",
|
81
85
|
dev: Annotated[
|
82
86
|
bool, typer.Option("--dev", "-d", help="Run the executor in development mode")
|
83
87
|
] = False,
|
@@ -120,16 +124,6 @@ def executor(
|
|
120
124
|
help="Port where to run Executor Monitoring server",
|
121
125
|
),
|
122
126
|
] = 7000,
|
123
|
-
grpc_server_addr: Annotated[
|
124
|
-
Optional[str],
|
125
|
-
typer.Option(
|
126
|
-
"--grpc-server-addr",
|
127
|
-
help=(
|
128
|
-
"(exprimental) Address of server gRPC API to connect to, e.g. 'localhost:8901'.\n"
|
129
|
-
"Enables gRPC state reporter that will periodically report the state of the Function Executors to Server\n"
|
130
|
-
),
|
131
|
-
),
|
132
|
-
] = None,
|
133
127
|
enable_grpc_state_reconciler: Annotated[
|
134
128
|
bool,
|
135
129
|
typer.Option(
|
@@ -166,11 +160,6 @@ def executor(
|
|
166
160
|
"--executor-id should be at least 10 characters long and only include characters _-[0-9][a-z][A-Z]"
|
167
161
|
)
|
168
162
|
|
169
|
-
if enable_grpc_state_reconciler and grpc_server_addr is None:
|
170
|
-
raise typer.BadParameter(
|
171
|
-
"--grpc-server-addr must be set when --enable-grpc-state-reconciler is set"
|
172
|
-
)
|
173
|
-
|
174
163
|
kv_labels: Dict[str, str] = {}
|
175
164
|
for label in labels:
|
176
165
|
key, value = label.split("=")
|
@@ -183,6 +172,7 @@ def executor(
|
|
183
172
|
"starting executor",
|
184
173
|
hostname=gethostname(),
|
185
174
|
server_addr=server_addr,
|
175
|
+
grpc_server_addr=grpc_server_addr,
|
186
176
|
config_path=config_path,
|
187
177
|
executor_version=executor_version,
|
188
178
|
labels=kv_labels,
|
@@ -192,7 +182,6 @@ def executor(
|
|
192
182
|
dev_mode=dev,
|
193
183
|
monitoring_server_host=monitoring_server_host,
|
194
184
|
monitoring_server_port=monitoring_server_port,
|
195
|
-
grpc_server_addr=grpc_server_addr,
|
196
185
|
enable_grpc_state_reconciler=enable_grpc_state_reconciler,
|
197
186
|
)
|
198
187
|
|
@@ -211,6 +200,14 @@ def executor(
|
|
211
200
|
)
|
212
201
|
exit(1)
|
213
202
|
|
203
|
+
# Enable all available blob stores in OSS because we don't know which one is going to be used.
|
204
|
+
blob_store: BLOBStore = BLOBStore(
|
205
|
+
# Local FS mode is used in tests and in cases when user wants to store data on NFS.
|
206
|
+
local=LocalFSBLOBStore(),
|
207
|
+
# S3 is initiliazed lazily so it's okay to create it even if the user is not going to use it.
|
208
|
+
s3=S3BLOBStore(),
|
209
|
+
)
|
210
|
+
|
214
211
|
prometheus_client.Info("cli", "CLI information").info(
|
215
212
|
{
|
216
213
|
"package": "indexify",
|
@@ -231,11 +228,12 @@ def executor(
|
|
231
228
|
server_ports=range(ports[0], ports[1]),
|
232
229
|
),
|
233
230
|
server_addr=server_addr,
|
231
|
+
grpc_server_addr=grpc_server_addr,
|
234
232
|
config_path=config_path,
|
235
233
|
monitoring_server_host=monitoring_server_host,
|
236
234
|
monitoring_server_port=monitoring_server_port,
|
237
|
-
grpc_server_addr=grpc_server_addr,
|
238
235
|
enable_grpc_state_reconciler=enable_grpc_state_reconciler,
|
236
|
+
blob_store=blob_store,
|
239
237
|
).run()
|
240
238
|
|
241
239
|
|
@@ -49,5 +49,17 @@ class TaskResult(BaseModel):
|
|
49
49
|
reducer: bool = False
|
50
50
|
|
51
51
|
|
52
|
+
class DataPayload(BaseModel):
|
53
|
+
path: str
|
54
|
+
size: int
|
55
|
+
sha256_hash: str
|
56
|
+
|
57
|
+
|
58
|
+
class IngestFnOutputsResponse(BaseModel):
|
59
|
+
data_payloads: List[DataPayload]
|
60
|
+
stdout: Optional[DataPayload] = None
|
61
|
+
stderr: Optional[DataPayload] = None
|
62
|
+
|
63
|
+
|
52
64
|
TASK_OUTCOME_SUCCESS = "success"
|
53
65
|
TASK_OUTCOME_FAILURE = "failure"
|
@@ -0,0 +1,69 @@
|
|
1
|
+
from typing import Any, Optional
|
2
|
+
|
3
|
+
from .local_fs_blob_store import LocalFSBLOBStore
|
4
|
+
from .metrics.blob_store import (
|
5
|
+
metric_get_blob_errors,
|
6
|
+
metric_get_blob_latency,
|
7
|
+
metric_get_blob_requests,
|
8
|
+
metric_put_blob_errors,
|
9
|
+
metric_put_blob_latency,
|
10
|
+
metric_put_blob_requests,
|
11
|
+
)
|
12
|
+
from .s3_blob_store import S3BLOBStore
|
13
|
+
|
14
|
+
|
15
|
+
class BLOBStore:
|
16
|
+
"""Dispatches generic BLOB store calls to their real backends."""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self, local: Optional[LocalFSBLOBStore] = None, s3: Optional[S3BLOBStore] = None
|
20
|
+
):
|
21
|
+
"""Creates a BLOB store that uses the supplied BLOB stores."""
|
22
|
+
self._local: Optional[LocalFSBLOBStore] = local
|
23
|
+
self._s3: Optional[S3BLOBStore] = s3
|
24
|
+
|
25
|
+
async def get(self, uri: str, logger: Any) -> bytes:
|
26
|
+
"""Returns binary value stored in BLOB with the supplied URI.
|
27
|
+
|
28
|
+
Raises Exception on error. Raises KeyError if the BLOB doesn't exist.
|
29
|
+
"""
|
30
|
+
with (
|
31
|
+
metric_get_blob_errors.count_exceptions(),
|
32
|
+
metric_get_blob_latency.time(),
|
33
|
+
):
|
34
|
+
metric_get_blob_requests.inc()
|
35
|
+
if _is_file_uri(uri):
|
36
|
+
self._check_local_is_available()
|
37
|
+
return await self._local.get(uri, logger)
|
38
|
+
else:
|
39
|
+
self._check_s3_is_available()
|
40
|
+
return await self._s3.get(uri, logger)
|
41
|
+
|
42
|
+
async def put(self, uri: str, value: bytes, logger: Any) -> None:
|
43
|
+
"""Stores the supplied binary value in a BLOB with the supplied URI.
|
44
|
+
|
45
|
+
Overwrites existing BLOB. Raises Exception on error.
|
46
|
+
"""
|
47
|
+
with (
|
48
|
+
metric_put_blob_errors.count_exceptions(),
|
49
|
+
metric_put_blob_latency.time(),
|
50
|
+
):
|
51
|
+
metric_put_blob_requests.inc()
|
52
|
+
if _is_file_uri(uri):
|
53
|
+
self._check_local_is_available()
|
54
|
+
await self._local.put(uri, value, logger)
|
55
|
+
else:
|
56
|
+
self._check_s3_is_available()
|
57
|
+
await self._s3.put(uri, value, logger)
|
58
|
+
|
59
|
+
def _check_local_is_available(self):
|
60
|
+
if self._local is None:
|
61
|
+
raise RuntimeError("Local file system BLOB store is not available")
|
62
|
+
|
63
|
+
def _check_s3_is_available(self):
|
64
|
+
if self._s3 is None:
|
65
|
+
raise RuntimeError("S3 BLOB store is not available")
|
66
|
+
|
67
|
+
|
68
|
+
def _is_file_uri(uri: str) -> bool:
|
69
|
+
return uri.startswith("file://")
|
@@ -0,0 +1,48 @@
|
|
1
|
+
import asyncio
|
2
|
+
import os
|
3
|
+
import os.path
|
4
|
+
from typing import Any
|
5
|
+
|
6
|
+
|
7
|
+
class LocalFSBLOBStore:
|
8
|
+
"""BLOB store that stores BLOBs in local file system."""
|
9
|
+
|
10
|
+
async def get(self, uri: str, logger: Any) -> bytes:
|
11
|
+
"""Returns binary value stored in file at the supplied URI.
|
12
|
+
|
13
|
+
The URI must be a file URI (starts with "file://"). The path must be absolute.
|
14
|
+
Raises Exception on error. Raises KeyError if the file doesn't exist.
|
15
|
+
"""
|
16
|
+
# Run synchronous code in a thread to not block the event loop.
|
17
|
+
return await asyncio.to_thread(self._sync_get, _path_from_file_uri(uri))
|
18
|
+
|
19
|
+
async def put(self, uri: str, value: bytes, logger: Any) -> None:
|
20
|
+
"""Stores the supplied binary value in a file at the supplied URI.
|
21
|
+
|
22
|
+
The URI must be a file URI (starts with "file://"). The path must be absolute.
|
23
|
+
Overwrites existing file. Raises Exception on error.
|
24
|
+
"""
|
25
|
+
# Run synchronous code in a thread to not block the event loop.
|
26
|
+
return await asyncio.to_thread(self._sync_put, _path_from_file_uri(uri), value)
|
27
|
+
|
28
|
+
def _sync_get(self, path: str) -> bytes:
|
29
|
+
if not os.path.isabs(path):
|
30
|
+
raise ValueError(f"Path {path} is not absolute")
|
31
|
+
|
32
|
+
if os.path.exists(path):
|
33
|
+
with open(path, mode="rb") as blob_file:
|
34
|
+
return blob_file.read()
|
35
|
+
else:
|
36
|
+
raise KeyError(f"File at {path} does not exist")
|
37
|
+
|
38
|
+
def _sync_put(self, path: str, value: bytes) -> None:
|
39
|
+
if not os.path.isabs(path):
|
40
|
+
raise ValueError(f"Path {path} is not absolute")
|
41
|
+
|
42
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
43
|
+
with open(path, mode="wb") as blob_file:
|
44
|
+
blob_file.write(value)
|
45
|
+
|
46
|
+
|
47
|
+
def _path_from_file_uri(uri: str) -> str:
|
48
|
+
return uri[7:] # strip "file://" prefix
|
@@ -0,0 +1,33 @@
|
|
1
|
+
import prometheus_client
|
2
|
+
|
3
|
+
from ...monitoring.metrics import latency_metric_for_fast_operation
|
4
|
+
|
5
|
+
metric_get_blob_requests: prometheus_client.Counter = prometheus_client.Counter(
|
6
|
+
"get_blob_requests",
|
7
|
+
"Number of get blob requests",
|
8
|
+
)
|
9
|
+
metric_get_blob_errors: prometheus_client.Counter = prometheus_client.Counter(
|
10
|
+
"get_blob_request_errors",
|
11
|
+
"Number of get blob request errors",
|
12
|
+
)
|
13
|
+
metric_get_blob_latency: prometheus_client.Histogram = (
|
14
|
+
latency_metric_for_fast_operation(
|
15
|
+
"get_blob_request",
|
16
|
+
"get blob request",
|
17
|
+
)
|
18
|
+
)
|
19
|
+
|
20
|
+
metric_put_blob_requests: prometheus_client.Counter = prometheus_client.Counter(
|
21
|
+
"put_blob_requests",
|
22
|
+
"Number of put blob requests",
|
23
|
+
)
|
24
|
+
metric_put_blob_errors: prometheus_client.Counter = prometheus_client.Counter(
|
25
|
+
"put_blob_request_errors",
|
26
|
+
"Number of put blob request errors",
|
27
|
+
)
|
28
|
+
metric_put_blob_latency: prometheus_client.Histogram = (
|
29
|
+
latency_metric_for_fast_operation(
|
30
|
+
"put_blob_request",
|
31
|
+
"put blob request",
|
32
|
+
)
|
33
|
+
)
|
@@ -0,0 +1,85 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Any, Optional
|
3
|
+
|
4
|
+
import boto3
|
5
|
+
from botocore.config import Config as BotoConfig
|
6
|
+
from botocore.exceptions import ClientError as BotoClientError
|
7
|
+
|
8
|
+
_MAX_RETRIES = 3
|
9
|
+
|
10
|
+
|
11
|
+
class S3BLOBStore:
|
12
|
+
def __init__(self):
|
13
|
+
self._s3_client: Optional[Any] = None
|
14
|
+
|
15
|
+
def _lazy_create_client(self):
|
16
|
+
"""Creates S3 client if it doesn't exist.
|
17
|
+
|
18
|
+
We create the client lazily only if S3 is used.
|
19
|
+
This is because S3 BLOB store is always created by Executor
|
20
|
+
and the creation will fail if user didn't configure S3 credentials and etc.
|
21
|
+
"""
|
22
|
+
if self._s3_client is not None:
|
23
|
+
return
|
24
|
+
|
25
|
+
# The credentials and etc are fetched by boto3 library automatically following
|
26
|
+
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials
|
27
|
+
# This provides a lot of flexibility for the user and follows a well-known and documented logic.
|
28
|
+
self._s3_client = boto3.client(
|
29
|
+
"s3",
|
30
|
+
config=BotoConfig(
|
31
|
+
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#standard-retry-mode
|
32
|
+
retries={
|
33
|
+
"max_attempts": _MAX_RETRIES,
|
34
|
+
"mode": "standard",
|
35
|
+
}
|
36
|
+
),
|
37
|
+
)
|
38
|
+
|
39
|
+
async def get(self, uri: str, logger: Any) -> bytes:
|
40
|
+
"""Returns binary value stored in S3 object at the supplied URI.
|
41
|
+
|
42
|
+
The URI must be S3 URI (starts with "s3://").
|
43
|
+
Raises Exception on error. Raises KeyError if the object doesn't exist.
|
44
|
+
"""
|
45
|
+
try:
|
46
|
+
self._lazy_create_client()
|
47
|
+
bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
|
48
|
+
response = await asyncio.to_thread(
|
49
|
+
self._s3_client.get_object, Bucket=bucket_name, Key=key
|
50
|
+
)
|
51
|
+
return response["Body"].read()
|
52
|
+
except BotoClientError as e:
|
53
|
+
logger.error("failed to get S3 object", uri=uri, exc_info=e)
|
54
|
+
|
55
|
+
if e.response["Error"]["Code"] == "NoSuchKey":
|
56
|
+
raise KeyError(f"Object {key} does not exist in bucket {bucket_name}")
|
57
|
+
raise
|
58
|
+
except Exception as e:
|
59
|
+
logger.error("failed to get S3 object", uri=uri, exc_info=e)
|
60
|
+
raise
|
61
|
+
|
62
|
+
async def put(self, uri: str, value: bytes, logger: Any) -> None:
|
63
|
+
"""Stores the supplied binary value in a S3 object at the supplied URI.
|
64
|
+
|
65
|
+
The URI must be S3 URI (starts with "s3://").
|
66
|
+
Overwrites existing object. Raises Exception on error.
|
67
|
+
"""
|
68
|
+
try:
|
69
|
+
self._lazy_create_client()
|
70
|
+
bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
|
71
|
+
await asyncio.to_thread(
|
72
|
+
self._s3_client.put_object, Bucket=bucket_name, Key=key, Body=value
|
73
|
+
)
|
74
|
+
except Exception as e:
|
75
|
+
logger.error("failed to set S3 object", uri=uri, exc_info=e)
|
76
|
+
raise
|
77
|
+
|
78
|
+
|
79
|
+
def _bucket_name_and_object_key_from_uri(uri: str) -> tuple[str, str]:
|
80
|
+
if not uri.startswith("s3://"):
|
81
|
+
raise ValueError(f"S3 URI '{uri}' is missing 's3://' prefix")
|
82
|
+
parts = uri[5:].split("/", 1)
|
83
|
+
if len(parts) != 2:
|
84
|
+
raise ValueError(f"Failed parsing bucket name from S3 URI '{uri}'")
|
85
|
+
return parts[0], parts[1] # bucket_name, key
|