indexify 0.4.22__tar.gz → 0.4.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.4.22 → indexify-0.4.24}/PKG-INFO +6 -6
- {indexify-0.4.22 → indexify-0.4.24}/pyproject.toml +6 -6
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/cli/executor.py +2 -9
- indexify-0.4.24/src/indexify/executor/blob_store/blob_store.py +153 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/blob_store/local_fs_blob_store.py +41 -1
- indexify-0.4.24/src/indexify/executor/blob_store/metrics/blob_store.py +105 -0
- indexify-0.4.24/src/indexify/executor/blob_store/s3_blob_store.py +199 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/function_executor.py +32 -56
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/invocation_state_client.py +10 -3
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -1
- indexify-0.4.24/src/indexify/executor/function_executor_controller/create_function_executor.py +265 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/downloads.py +34 -86
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/events.py +13 -7
- indexify-0.4.24/src/indexify/executor/function_executor_controller/finalize_task.py +184 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/function_executor_controller.py +121 -78
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/message_validators.py +10 -3
- indexify-0.4.24/src/indexify/executor/function_executor_controller/metrics/downloads.py +23 -0
- indexify-0.4.24/src/indexify/executor/function_executor_controller/metrics/finalize_task.py +20 -0
- indexify-0.4.24/src/indexify/executor/function_executor_controller/metrics/prepare_task.py +18 -0
- indexify-0.4.24/src/indexify/executor/function_executor_controller/prepare_task.py +256 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/run_task.py +77 -61
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/task_info.py +4 -7
- indexify-0.4.24/src/indexify/executor/function_executor_controller/task_input.py +21 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/task_output.py +26 -35
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/terminate_function_executor.py +6 -1
- indexify-0.4.24/src/indexify/executor/logging.py +69 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/monitoring/metrics.py +22 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/proto/executor_api.proto +11 -3
- indexify-0.4.24/src/indexify/proto/executor_api_pb2.py +88 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/proto/executor_api_pb2.pyi +8 -1
- indexify-0.4.22/src/indexify/executor/blob_store/blob_store.py +0 -69
- indexify-0.4.22/src/indexify/executor/blob_store/metrics/blob_store.py +0 -33
- indexify-0.4.22/src/indexify/executor/blob_store/s3_blob_store.py +0 -88
- indexify-0.4.22/src/indexify/executor/function_executor_controller/create_function_executor.py +0 -252
- indexify-0.4.22/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -21
- indexify-0.4.22/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -67
- indexify-0.4.22/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -39
- indexify-0.4.22/src/indexify/executor/function_executor_controller/prepare_task.py +0 -38
- indexify-0.4.22/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -274
- indexify-0.4.22/src/indexify/proto/executor_api_pb2.py +0 -88
- {indexify-0.4.22 → indexify-0.4.24}/README.md +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/cli/__init__.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/cli/build_image.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/cli/deploy.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/README.md +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/channel_manager.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/executor.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_allowlist.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/health_checker.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/function_executor_controller/metrics/run_task.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/host_resources/host_resources.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/metrics/channel_manager.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/metrics/executor.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/metrics/state_reconciler.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/metrics/state_reporter.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/monitoring/handler.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/monitoring/health_checker/metrics/health_checker.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/monitoring/server.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/state_reconciler.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/executor/state_reporter.py +0 -0
- {indexify-0.4.22 → indexify-0.4.24}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,20 +1,19 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.24
|
4
4
|
Summary: Open Source Indexify components and helper tools
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
7
7
|
Author: Tensorlake Inc.
|
8
8
|
Author-email: support@tensorlake.ai
|
9
|
-
Requires-Python: >=3.
|
9
|
+
Requires-Python: >=3.11,<4.0
|
10
10
|
Classifier: License :: Other/Proprietary License
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
13
12
|
Classifier: Programming Language :: Python :: 3.11
|
14
13
|
Classifier: Programming Language :: Python :: 3.12
|
15
14
|
Classifier: Programming Language :: Python :: 3.13
|
16
|
-
Requires-Dist: aiohttp (>=3.12.
|
17
|
-
Requires-Dist: boto3 (>=1.
|
15
|
+
Requires-Dist: aiohttp (>=3.12.15,<4.0.0)
|
16
|
+
Requires-Dist: boto3 (>=1.40.6,<2.0.0)
|
18
17
|
Requires-Dist: docker (>=7.1.0,<8.0.0)
|
19
18
|
Requires-Dist: httpx[http2] (==0.27.2)
|
20
19
|
Requires-Dist: nanoid (>=2.0.0,<3.0.0)
|
@@ -22,7 +21,8 @@ Requires-Dist: prometheus-client (>=0.22.1,<0.23.0)
|
|
22
21
|
Requires-Dist: psutil (>=7.0.0,<8.0.0)
|
23
22
|
Requires-Dist: pydantic (>=2.11,<3.0)
|
24
23
|
Requires-Dist: requests (>=2.32.4,<3.0.0)
|
25
|
-
Requires-Dist:
|
24
|
+
Requires-Dist: structlog (==25.4.0)
|
25
|
+
Requires-Dist: tensorlake (==0.2.39)
|
26
26
|
Requires-Dist: urllib3 (>=2.5.0,<3.0.0)
|
27
27
|
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
28
28
|
Description-Content-Type: text/markdown
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
3
|
# Incremented if any of the components provided in this packages are updated.
|
4
|
-
version = "0.4.
|
4
|
+
version = "0.4.24"
|
5
5
|
description = "Open Source Indexify components and helper tools"
|
6
6
|
authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
|
7
7
|
license = "Apache 2.0"
|
@@ -14,22 +14,22 @@ indexify-cli = "indexify.cli:cli"
|
|
14
14
|
|
15
15
|
[tool.poetry.dependencies]
|
16
16
|
# Common dependencies
|
17
|
-
python = "^3.
|
17
|
+
python = "^3.11"
|
18
18
|
nanoid = "^2.0.0"
|
19
|
-
# structlog is provided by tensorlake
|
20
19
|
# pyyaml is provided by tensorlake
|
21
20
|
|
22
21
|
# Executor only
|
23
|
-
aiohttp = "^3.12.
|
22
|
+
aiohttp = "^3.12.15"
|
24
23
|
# mTLS support for httpx 0.28.1 is broken, wait for 0.28.2 to see if the bug is fixed
|
25
24
|
httpx = { version = "0.27.2", extras = ["http2"] }
|
26
25
|
pydantic = "^2.11"
|
27
26
|
prometheus-client = "^0.22.1"
|
28
27
|
psutil = "^7.0.0"
|
29
|
-
boto3 = "^1.
|
28
|
+
boto3 = "^1.40.6"
|
29
|
+
structlog = "25.4.0"
|
30
30
|
# Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
|
31
31
|
# We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
|
32
|
-
tensorlake = "0.2.
|
32
|
+
tensorlake = "0.2.39"
|
33
33
|
# Uncomment the next line to use local tensorlake package (only for development!)
|
34
34
|
# tensorlake = { path = "../tensorlake", develop = true }
|
35
35
|
# grpcio is provided by tensorlake
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from
|
1
|
+
from indexify.executor.logging import (
|
2
2
|
configure_development_mode_logging,
|
3
3
|
configure_logging_early,
|
4
4
|
configure_production_mode_logging,
|
@@ -162,13 +162,6 @@ def executor(
|
|
162
162
|
shutil.rmtree(str(executor_cache_path))
|
163
163
|
executor_cache_path.mkdir(parents=True, exist_ok=True)
|
164
164
|
|
165
|
-
blob_store: BLOBStore = BLOBStore(
|
166
|
-
# Local FS mode is used in tests and in cases when user wants to store data on NFS.
|
167
|
-
local=LocalFSBLOBStore(),
|
168
|
-
# S3 is initiliazed lazily so it's okay to create it even if the user is not going to use it.
|
169
|
-
s3=S3BLOBStore(),
|
170
|
-
)
|
171
|
-
|
172
165
|
host_resources_provider: HostResourcesProvider = HostResourcesProvider(
|
173
166
|
gpu_allocator=NvidiaGPUAllocator(logger),
|
174
167
|
# Assuming a simple setup in OSS where Executor container has a single file system
|
@@ -200,6 +193,6 @@ def executor(
|
|
200
193
|
config_path=config_path,
|
201
194
|
monitoring_server_host=monitoring_server_host,
|
202
195
|
monitoring_server_port=monitoring_server_port,
|
203
|
-
blob_store=
|
196
|
+
blob_store=BLOBStore(),
|
204
197
|
host_resources_provider=host_resources_provider,
|
205
198
|
).run()
|
@@ -0,0 +1,153 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
from .local_fs_blob_store import LocalFSBLOBStore
|
4
|
+
from .metrics.blob_store import (
|
5
|
+
metric_abort_multipart_upload_errors,
|
6
|
+
metric_abort_multipart_upload_latency,
|
7
|
+
metric_abort_multipart_upload_requests,
|
8
|
+
metric_complete_multipart_upload_errors,
|
9
|
+
metric_complete_multipart_upload_latency,
|
10
|
+
metric_complete_multipart_upload_requests,
|
11
|
+
metric_create_multipart_upload_errors,
|
12
|
+
metric_create_multipart_upload_latency,
|
13
|
+
metric_create_multipart_upload_requests,
|
14
|
+
metric_get_blob_errors,
|
15
|
+
metric_get_blob_latency,
|
16
|
+
metric_get_blob_requests,
|
17
|
+
metric_presign_uri_errors,
|
18
|
+
metric_presign_uri_latency,
|
19
|
+
metric_presign_uri_requests,
|
20
|
+
metric_upload_blob_errors,
|
21
|
+
metric_upload_blob_latency,
|
22
|
+
metric_upload_blob_requests,
|
23
|
+
)
|
24
|
+
from .s3_blob_store import S3BLOBStore
|
25
|
+
|
26
|
+
|
27
|
+
class BLOBStore:
|
28
|
+
"""Dispatches generic BLOB store calls to their real backends."""
|
29
|
+
|
30
|
+
def __init__(self):
|
31
|
+
self._local: LocalFSBLOBStore = LocalFSBLOBStore()
|
32
|
+
self._s3: S3BLOBStore = S3BLOBStore()
|
33
|
+
|
34
|
+
async def get(self, uri: str, logger: Any) -> bytes:
|
35
|
+
"""Returns binary value stored in BLOB with the supplied URI.
|
36
|
+
|
37
|
+
Raises Exception on error. Raises KeyError if the BLOB doesn't exist.
|
38
|
+
"""
|
39
|
+
with (
|
40
|
+
metric_get_blob_errors.count_exceptions(),
|
41
|
+
metric_get_blob_latency.time(),
|
42
|
+
):
|
43
|
+
metric_get_blob_requests.inc()
|
44
|
+
if _is_file_uri(uri):
|
45
|
+
return await self._local.get(uri, logger)
|
46
|
+
else:
|
47
|
+
return await self._s3.get(uri, logger)
|
48
|
+
|
49
|
+
async def presign_get_uri(self, uri: str, expires_in_sec: int, logger: Any) -> str:
|
50
|
+
"""Returns a presigned URI for getting the BLOB with the supplied URI.
|
51
|
+
|
52
|
+
The URI allows to read any byte range in the BLOB."""
|
53
|
+
with (
|
54
|
+
metric_presign_uri_errors.count_exceptions(),
|
55
|
+
metric_presign_uri_latency.time(),
|
56
|
+
):
|
57
|
+
metric_presign_uri_requests.inc()
|
58
|
+
if _is_file_uri(uri):
|
59
|
+
return await self._local.presign_get_uri(uri, expires_in_sec, logger)
|
60
|
+
else:
|
61
|
+
return await self._s3.presign_get_uri(uri, expires_in_sec, logger)
|
62
|
+
|
63
|
+
async def upload(self, uri: str, value: bytes, logger: Any) -> None:
|
64
|
+
"""Stores the supplied binary value in a BLOB with the supplied URI.
|
65
|
+
|
66
|
+
Overwrites existing BLOB. Raises Exception on error.
|
67
|
+
"""
|
68
|
+
with (
|
69
|
+
metric_upload_blob_errors.count_exceptions(),
|
70
|
+
metric_upload_blob_latency.time(),
|
71
|
+
):
|
72
|
+
metric_upload_blob_requests.inc()
|
73
|
+
if _is_file_uri(uri):
|
74
|
+
await self._local.upload(uri, value, logger)
|
75
|
+
else:
|
76
|
+
await self._s3.upload(uri, value, logger)
|
77
|
+
|
78
|
+
async def create_multipart_upload(self, uri: str, logger: Any) -> str:
|
79
|
+
"""Creates a multipart upload for BLOB with the supplied URI and returns the upload ID."""
|
80
|
+
with (
|
81
|
+
metric_create_multipart_upload_errors.count_exceptions(),
|
82
|
+
metric_create_multipart_upload_latency.time(),
|
83
|
+
):
|
84
|
+
metric_create_multipart_upload_requests.inc()
|
85
|
+
if _is_file_uri(uri):
|
86
|
+
return await self._local.create_multipart_upload(uri, logger)
|
87
|
+
else:
|
88
|
+
return await self._s3.create_multipart_upload(uri, logger)
|
89
|
+
|
90
|
+
async def complete_multipart_upload(
|
91
|
+
self, uri: str, upload_id: str, parts_etags: list[str], logger: Any
|
92
|
+
) -> None:
|
93
|
+
"""Completes a multipart upload for BLOB with the supplied URI.
|
94
|
+
|
95
|
+
parts_etags is a list of ETags for the parts that were uploaded.
|
96
|
+
The list is ordered by part number starting from 1.
|
97
|
+
"""
|
98
|
+
with (
|
99
|
+
metric_complete_multipart_upload_errors.count_exceptions(),
|
100
|
+
metric_complete_multipart_upload_latency.time(),
|
101
|
+
):
|
102
|
+
metric_complete_multipart_upload_requests.inc()
|
103
|
+
if _is_file_uri(uri):
|
104
|
+
await self._local.complete_multipart_upload(
|
105
|
+
uri, upload_id, parts_etags, logger
|
106
|
+
)
|
107
|
+
else:
|
108
|
+
await self._s3.complete_multipart_upload(
|
109
|
+
uri, upload_id, parts_etags, logger
|
110
|
+
)
|
111
|
+
|
112
|
+
async def abort_multipart_upload(
|
113
|
+
self, uri: str, upload_id: str, logger: Any
|
114
|
+
) -> None:
|
115
|
+
"""Aborts a multipart upload for BLOB with the supplied URI."""
|
116
|
+
with (
|
117
|
+
metric_abort_multipart_upload_errors.count_exceptions(),
|
118
|
+
metric_abort_multipart_upload_latency.time(),
|
119
|
+
):
|
120
|
+
metric_abort_multipart_upload_requests.inc()
|
121
|
+
if _is_file_uri(uri):
|
122
|
+
await self._local.abort_multipart_upload(uri, upload_id, logger)
|
123
|
+
else:
|
124
|
+
await self._s3.abort_multipart_upload(uri, upload_id, logger)
|
125
|
+
|
126
|
+
async def presign_upload_part_uri(
|
127
|
+
self,
|
128
|
+
uri: str,
|
129
|
+
part_number: int,
|
130
|
+
upload_id: str,
|
131
|
+
expires_in_sec: int,
|
132
|
+
logger: Any,
|
133
|
+
) -> str:
|
134
|
+
"""Returns a presigned URI for uploading a part in a multipart upload.
|
135
|
+
|
136
|
+
part_number starts from 1."""
|
137
|
+
with (
|
138
|
+
metric_presign_uri_errors.count_exceptions(),
|
139
|
+
metric_presign_uri_latency.time(),
|
140
|
+
):
|
141
|
+
metric_presign_uri_requests.inc()
|
142
|
+
if _is_file_uri(uri):
|
143
|
+
return await self._local.presign_upload_part_uri(
|
144
|
+
uri, part_number, upload_id, expires_in_sec, logger
|
145
|
+
)
|
146
|
+
else:
|
147
|
+
return await self._s3.presign_upload_part_uri(
|
148
|
+
uri, part_number, upload_id, expires_in_sec, logger
|
149
|
+
)
|
150
|
+
|
151
|
+
|
152
|
+
def _is_file_uri(uri: str) -> bool:
|
153
|
+
return uri.startswith("file://")
|
@@ -16,7 +16,14 @@ class LocalFSBLOBStore:
|
|
16
16
|
# Run synchronous code in a thread to not block the event loop.
|
17
17
|
return await asyncio.to_thread(self._sync_get, _path_from_file_uri(uri))
|
18
18
|
|
19
|
-
async def
|
19
|
+
async def presign_get_uri(self, uri: str, expires_in_sec: int, logger: Any) -> str:
|
20
|
+
"""Returns a presigned URI for getting the file at the supplied URI.
|
21
|
+
|
22
|
+
For local files, just returns the file URI itself.
|
23
|
+
"""
|
24
|
+
return uri
|
25
|
+
|
26
|
+
async def upload(self, uri: str, value: bytes, logger: Any) -> None:
|
20
27
|
"""Stores the supplied binary value in a file at the supplied URI.
|
21
28
|
|
22
29
|
The URI must be a file URI (starts with "file://"). The path must be absolute.
|
@@ -25,6 +32,39 @@ class LocalFSBLOBStore:
|
|
25
32
|
# Run synchronous code in a thread to not block the event loop.
|
26
33
|
return await asyncio.to_thread(self._sync_put, _path_from_file_uri(uri), value)
|
27
34
|
|
35
|
+
async def create_multipart_upload(self, uri: str, logger: Any) -> str:
|
36
|
+
"""Creates a multipart upload for local file and returns a dummy upload ID."""
|
37
|
+
# Local files do not require multipart upload, return a dummy ID
|
38
|
+
return "local-multipart-upload-id"
|
39
|
+
|
40
|
+
async def complete_multipart_upload(
|
41
|
+
self, uri: str, upload_id: str, parts_etags: list[str], logger: Any
|
42
|
+
) -> None:
|
43
|
+
"""Completes a multipart upload for local file. No-op for local files."""
|
44
|
+
# No action needed for local files
|
45
|
+
return None
|
46
|
+
|
47
|
+
async def abort_multipart_upload(
|
48
|
+
self, uri: str, upload_id: str, logger: Any
|
49
|
+
) -> None:
|
50
|
+
"""Aborts a multipart upload for local file. No-op for local files."""
|
51
|
+
# No action needed for local files
|
52
|
+
return None
|
53
|
+
|
54
|
+
async def presign_upload_part_uri(
|
55
|
+
self,
|
56
|
+
uri: str,
|
57
|
+
part_number: int,
|
58
|
+
upload_id: str,
|
59
|
+
expires_in_sec: int,
|
60
|
+
logger: Any,
|
61
|
+
) -> str:
|
62
|
+
"""Returns a presigned URI for uploading a part in a multipart upload for local file.
|
63
|
+
|
64
|
+
For local files, just returns the file URI itself.
|
65
|
+
"""
|
66
|
+
return uri
|
67
|
+
|
28
68
|
def _sync_get(self, path: str) -> bytes:
|
29
69
|
if not os.path.isabs(path):
|
30
70
|
raise ValueError(f"Path {path} must be absolute")
|
@@ -0,0 +1,105 @@
|
|
1
|
+
import prometheus_client
|
2
|
+
|
3
|
+
from ...monitoring.metrics import latency_metric_for_fast_operation
|
4
|
+
|
5
|
+
metric_get_blob_requests: prometheus_client.Counter = prometheus_client.Counter(
|
6
|
+
"blob_store_get_requests",
|
7
|
+
"Number of get BLOB requests in BLOB store",
|
8
|
+
)
|
9
|
+
metric_get_blob_errors: prometheus_client.Counter = prometheus_client.Counter(
|
10
|
+
"blob_store_get_request_errors",
|
11
|
+
"Number of get BLOB request errors in BLOB store",
|
12
|
+
)
|
13
|
+
metric_get_blob_latency: prometheus_client.Histogram = (
|
14
|
+
latency_metric_for_fast_operation(
|
15
|
+
"blob_store_get",
|
16
|
+
"BLOB store get BLOB request",
|
17
|
+
)
|
18
|
+
)
|
19
|
+
|
20
|
+
metric_presign_uri_requests: prometheus_client.Counter = prometheus_client.Counter(
|
21
|
+
"blob_store_presign_uri_requests",
|
22
|
+
"Number of presign URI requests in BLOB store",
|
23
|
+
)
|
24
|
+
metric_presign_uri_errors: prometheus_client.Counter = prometheus_client.Counter(
|
25
|
+
"blob_store_presign_uri_request_errors",
|
26
|
+
"Number of presign URI request errors in BLOB store",
|
27
|
+
)
|
28
|
+
metric_presign_uri_latency: prometheus_client.Histogram = (
|
29
|
+
latency_metric_for_fast_operation(
|
30
|
+
"blob_store_presign_uri",
|
31
|
+
"BLOB store presign URI request",
|
32
|
+
)
|
33
|
+
)
|
34
|
+
|
35
|
+
metric_upload_blob_requests: prometheus_client.Counter = prometheus_client.Counter(
|
36
|
+
"blob_store_upload_requests",
|
37
|
+
"Number of upload BLOB requests in BLOB store",
|
38
|
+
)
|
39
|
+
metric_upload_blob_errors: prometheus_client.Counter = prometheus_client.Counter(
|
40
|
+
"blob_store_upload_request_errors",
|
41
|
+
"Number of upload BLOB request errors in BLOB store",
|
42
|
+
)
|
43
|
+
metric_upload_blob_latency: prometheus_client.Histogram = (
|
44
|
+
latency_metric_for_fast_operation(
|
45
|
+
"blob_store_upload",
|
46
|
+
"BLOB store upload BLOB request",
|
47
|
+
)
|
48
|
+
)
|
49
|
+
|
50
|
+
metric_create_multipart_upload_requests: prometheus_client.Counter = (
|
51
|
+
prometheus_client.Counter(
|
52
|
+
"blob_store_create_multipart_upload_requests",
|
53
|
+
"Number of create multipart upload requests in BLOB store",
|
54
|
+
)
|
55
|
+
)
|
56
|
+
metric_create_multipart_upload_errors: prometheus_client.Counter = (
|
57
|
+
prometheus_client.Counter(
|
58
|
+
"blob_store_create_multipart_upload_request_errors",
|
59
|
+
"Number of create multipart upload request errors in BLOB store",
|
60
|
+
)
|
61
|
+
)
|
62
|
+
metric_create_multipart_upload_latency: prometheus_client.Histogram = (
|
63
|
+
latency_metric_for_fast_operation(
|
64
|
+
"blob_store_create_multipart_upload_request",
|
65
|
+
"create multipart upload request in BLOB store",
|
66
|
+
)
|
67
|
+
)
|
68
|
+
|
69
|
+
metric_complete_multipart_upload_requests: prometheus_client.Counter = (
|
70
|
+
prometheus_client.Counter(
|
71
|
+
"blob_store_complete_multipart_upload_requests",
|
72
|
+
"Number of complete multipart upload requests in BLOB store",
|
73
|
+
)
|
74
|
+
)
|
75
|
+
metric_complete_multipart_upload_errors: prometheus_client.Counter = (
|
76
|
+
prometheus_client.Counter(
|
77
|
+
"blob_store_complete_multipart_upload_request_errors",
|
78
|
+
"Number of complete multipart upload request errors in BLOB store",
|
79
|
+
)
|
80
|
+
)
|
81
|
+
metric_complete_multipart_upload_latency: prometheus_client.Histogram = (
|
82
|
+
latency_metric_for_fast_operation(
|
83
|
+
"blob_store_complete_multipart_upload_request",
|
84
|
+
"complete multipart upload request in BLOB store",
|
85
|
+
)
|
86
|
+
)
|
87
|
+
|
88
|
+
metric_abort_multipart_upload_requests: prometheus_client.Counter = (
|
89
|
+
prometheus_client.Counter(
|
90
|
+
"blob_store_abort_multipart_upload_requests",
|
91
|
+
"Number of abort multipart upload requests in BLOB store",
|
92
|
+
)
|
93
|
+
)
|
94
|
+
metric_abort_multipart_upload_errors: prometheus_client.Counter = (
|
95
|
+
prometheus_client.Counter(
|
96
|
+
"blob_store_abort_multipart_upload_request_errors",
|
97
|
+
"Number of abort multipart upload request errors in BLOB store",
|
98
|
+
)
|
99
|
+
)
|
100
|
+
metric_abort_multipart_upload_latency: prometheus_client.Histogram = (
|
101
|
+
latency_metric_for_fast_operation(
|
102
|
+
"blob_store_abort_multipart_upload_request",
|
103
|
+
"abort multipart upload request in BLOB store",
|
104
|
+
)
|
105
|
+
)
|
@@ -0,0 +1,199 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Any, Optional
|
3
|
+
|
4
|
+
import boto3
|
5
|
+
from botocore.config import Config as BotoConfig
|
6
|
+
from botocore.exceptions import ClientError as BotoClientError
|
7
|
+
|
8
|
+
_MAX_RETRIES = 3
|
9
|
+
|
10
|
+
|
11
|
+
class S3BLOBStore:
|
12
|
+
def __init__(self):
|
13
|
+
self._s3_client: Optional[Any] = None
|
14
|
+
|
15
|
+
def _lazy_create_client(self):
|
16
|
+
"""Creates S3 client if it doesn't exist.
|
17
|
+
|
18
|
+
We create the client lazily only if S3 is used.
|
19
|
+
This is because S3 BLOB store is always created by Executor
|
20
|
+
and the creation will fail if user didn't configure S3 credentials and etc.
|
21
|
+
"""
|
22
|
+
if self._s3_client is not None:
|
23
|
+
return
|
24
|
+
|
25
|
+
# The credentials and etc are fetched by boto3 library automatically following
|
26
|
+
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials
|
27
|
+
# This provides a lot of flexibility for the user and follows a well-known and documented logic.
|
28
|
+
self._s3_client = boto3.client(
|
29
|
+
"s3",
|
30
|
+
config=BotoConfig(
|
31
|
+
# https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#standard-retry-mode
|
32
|
+
retries={
|
33
|
+
"max_attempts": _MAX_RETRIES,
|
34
|
+
"mode": "standard",
|
35
|
+
}
|
36
|
+
),
|
37
|
+
)
|
38
|
+
|
39
|
+
async def get(self, uri: str, logger: Any) -> bytes:
|
40
|
+
"""Returns binary value stored in S3 object at the supplied URI.
|
41
|
+
|
42
|
+
The URI must be S3 URI (starts with "s3://").
|
43
|
+
Raises Exception on error. Raises KeyError if the object doesn't exist.
|
44
|
+
"""
|
45
|
+
try:
|
46
|
+
self._lazy_create_client()
|
47
|
+
bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
|
48
|
+
response = await asyncio.to_thread(
|
49
|
+
self._s3_client.get_object, Bucket=bucket_name, Key=key
|
50
|
+
)
|
51
|
+
return response["Body"].read()
|
52
|
+
except BotoClientError as e:
|
53
|
+
logger.error("failed to get S3 object", uri=uri, exc_info=e)
|
54
|
+
|
55
|
+
if e.response["Error"]["Code"] == "NoSuchKey":
|
56
|
+
raise KeyError(f"Object {key} does not exist in bucket {bucket_name}")
|
57
|
+
raise
|
58
|
+
except Exception as e:
|
59
|
+
logger.error("failed to get S3 object", uri=uri, exc_info=e)
|
60
|
+
raise
|
61
|
+
|
62
|
+
async def presign_get_uri(self, uri: str, expires_in_sec: int, logger: Any) -> str:
|
63
|
+
"""Returns a presigned URI for getting the S3 object at the supplied URI."""
|
64
|
+
self._lazy_create_client()
|
65
|
+
bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
|
66
|
+
try:
|
67
|
+
s3_uri: str = await asyncio.to_thread(
|
68
|
+
self._s3_client.generate_presigned_url,
|
69
|
+
ClientMethod="get_object",
|
70
|
+
Params={"Bucket": bucket_name, "Key": key},
|
71
|
+
ExpiresIn=expires_in_sec,
|
72
|
+
)
|
73
|
+
return s3_uri.replace("https://", "s3://", 1)
|
74
|
+
except Exception as e:
|
75
|
+
logger.error(
|
76
|
+
"failed to presign URI for get_object operation",
|
77
|
+
uri=uri,
|
78
|
+
exc_info=e,
|
79
|
+
expires_in_sec=expires_in_sec,
|
80
|
+
)
|
81
|
+
raise
|
82
|
+
|
83
|
+
async def upload(self, uri: str, value: bytes, logger: Any) -> None:
|
84
|
+
"""Stores the supplied binary value in a S3 object at the supplied URI.
|
85
|
+
|
86
|
+
The URI must be S3 URI (starts with "s3://").
|
87
|
+
Overwrites existing object. Raises Exception on error.
|
88
|
+
"""
|
89
|
+
try:
|
90
|
+
self._lazy_create_client()
|
91
|
+
bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
|
92
|
+
await asyncio.to_thread(
|
93
|
+
self._s3_client.put_object, Bucket=bucket_name, Key=key, Body=value
|
94
|
+
)
|
95
|
+
except Exception as e:
|
96
|
+
logger.error("failed to set S3 object", uri=uri, exc_info=e)
|
97
|
+
raise
|
98
|
+
|
99
|
+
async def create_multipart_upload(self, uri: str, logger: Any) -> str:
|
100
|
+
"""Creates a multipart upload for S3 object and returns the upload ID."""
|
101
|
+
self._lazy_create_client()
|
102
|
+
bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
|
103
|
+
try:
|
104
|
+
response = await asyncio.to_thread(
|
105
|
+
self._s3_client.create_multipart_upload,
|
106
|
+
Bucket=bucket_name,
|
107
|
+
Key=key,
|
108
|
+
)
|
109
|
+
return response["UploadId"]
|
110
|
+
except Exception as e:
|
111
|
+
logger.error("failed to create multipart upload", uri=uri, exc_info=e)
|
112
|
+
raise
|
113
|
+
|
114
|
+
async def complete_multipart_upload(
|
115
|
+
self, uri: str, upload_id: str, parts_etags: list[str], logger: Any
|
116
|
+
) -> None:
|
117
|
+
"""Completes a multipart upload for S3 object."""
|
118
|
+
self._lazy_create_client()
|
119
|
+
bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
|
120
|
+
try:
|
121
|
+
await asyncio.to_thread(
|
122
|
+
self._s3_client.complete_multipart_upload,
|
123
|
+
Bucket=bucket_name,
|
124
|
+
Key=key,
|
125
|
+
UploadId=upload_id,
|
126
|
+
MultipartUpload={
|
127
|
+
"Parts": [
|
128
|
+
{"ETag": etag, "PartNumber": i + 1}
|
129
|
+
for i, etag in enumerate(parts_etags)
|
130
|
+
]
|
131
|
+
},
|
132
|
+
)
|
133
|
+
except Exception as e:
|
134
|
+
logger.error("failed to complete multipart upload", uri=uri, exc_info=e)
|
135
|
+
raise
|
136
|
+
|
137
|
+
async def abort_multipart_upload(
|
138
|
+
self, uri: str, upload_id: str, logger: Any
|
139
|
+
) -> None:
|
140
|
+
"""Aborts a multipart upload for S3 object."""
|
141
|
+
self._lazy_create_client()
|
142
|
+
bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
|
143
|
+
try:
|
144
|
+
await asyncio.to_thread(
|
145
|
+
self._s3_client.abort_multipart_upload,
|
146
|
+
Bucket=bucket_name,
|
147
|
+
Key=key,
|
148
|
+
UploadId=upload_id,
|
149
|
+
)
|
150
|
+
except Exception as e:
|
151
|
+
logger.error("failed to abort multipart upload", uri=uri, exc_info=e)
|
152
|
+
raise
|
153
|
+
|
154
|
+
async def presign_upload_part_uri(
|
155
|
+
self,
|
156
|
+
uri: str,
|
157
|
+
part_number: int,
|
158
|
+
upload_id: str,
|
159
|
+
expires_in_sec: int,
|
160
|
+
logger: Any,
|
161
|
+
) -> str:
|
162
|
+
"""Returns a presigned URI for uploading a part in a multipart upload for S3 object."""
|
163
|
+
self._lazy_create_client()
|
164
|
+
bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
|
165
|
+
try:
|
166
|
+
response = await asyncio.to_thread(
|
167
|
+
self._s3_client.generate_presigned_url,
|
168
|
+
ClientMethod="upload_part",
|
169
|
+
Params={
|
170
|
+
"Bucket": bucket_name,
|
171
|
+
"Key": key,
|
172
|
+
"UploadId": upload_id,
|
173
|
+
"PartNumber": part_number,
|
174
|
+
},
|
175
|
+
ExpiresIn=expires_in_sec,
|
176
|
+
)
|
177
|
+
return response
|
178
|
+
except Exception as e:
|
179
|
+
logger.error(
|
180
|
+
"failed to presign URI for upload_part operation",
|
181
|
+
uri=uri,
|
182
|
+
exc_info=e,
|
183
|
+
part_number=part_number,
|
184
|
+
upload_id=upload_id,
|
185
|
+
expires_in_sec=expires_in_sec,
|
186
|
+
)
|
187
|
+
raise
|
188
|
+
|
189
|
+
|
190
|
+
def _bucket_name_and_object_key_from_uri(uri: str) -> tuple[str, str]:
|
191
|
+
# Example S3 object URI:
|
192
|
+
# s3://test-indexify-server-blob-store-eugene-20250411/225b83f4-2aed-40a7-adee-b7a681f817f2
|
193
|
+
if not uri.startswith("s3://"):
|
194
|
+
raise ValueError(f"S3 URI '{uri}' is missing 's3://' prefix")
|
195
|
+
|
196
|
+
parts = uri[5:].split("/", 1)
|
197
|
+
if len(parts) != 2:
|
198
|
+
raise ValueError(f"Failed parsing bucket name from S3 URI '{uri}'")
|
199
|
+
return parts[0], parts[1] # bucket_name, key
|