indexify 0.4.22__tar.gz → 0.4.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {indexify-0.4.22 → indexify-0.4.23}/PKG-INFO +6 -7
  2. {indexify-0.4.22 → indexify-0.4.23}/pyproject.toml +6 -6
  3. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/cli/executor.py +2 -9
  4. indexify-0.4.23/src/indexify/executor/blob_store/blob_store.py +153 -0
  5. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/blob_store/local_fs_blob_store.py +41 -1
  6. indexify-0.4.23/src/indexify/executor/blob_store/metrics/blob_store.py +105 -0
  7. indexify-0.4.23/src/indexify/executor/blob_store/s3_blob_store.py +199 -0
  8. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/function_executor.py +32 -56
  9. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/invocation_state_client.py +10 -3
  10. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/server/function_executor_server_factory.py +0 -1
  11. indexify-0.4.23/src/indexify/executor/function_executor_controller/create_function_executor.py +265 -0
  12. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/downloads.py +34 -86
  13. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/events.py +13 -7
  14. indexify-0.4.23/src/indexify/executor/function_executor_controller/finalize_task.py +184 -0
  15. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/function_executor_controller.py +121 -78
  16. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/message_validators.py +10 -3
  17. indexify-0.4.23/src/indexify/executor/function_executor_controller/metrics/downloads.py +23 -0
  18. indexify-0.4.23/src/indexify/executor/function_executor_controller/metrics/finalize_task.py +20 -0
  19. indexify-0.4.23/src/indexify/executor/function_executor_controller/metrics/prepare_task.py +18 -0
  20. indexify-0.4.23/src/indexify/executor/function_executor_controller/prepare_task.py +256 -0
  21. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/run_task.py +77 -61
  22. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/task_info.py +4 -7
  23. indexify-0.4.23/src/indexify/executor/function_executor_controller/task_input.py +21 -0
  24. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/task_output.py +26 -35
  25. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/terminate_function_executor.py +6 -1
  26. indexify-0.4.23/src/indexify/executor/logging.py +69 -0
  27. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/monitoring/metrics.py +22 -0
  28. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/proto/executor_api.proto +11 -3
  29. indexify-0.4.23/src/indexify/proto/executor_api_pb2.py +88 -0
  30. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/proto/executor_api_pb2.pyi +8 -1
  31. indexify-0.4.22/src/indexify/executor/blob_store/blob_store.py +0 -69
  32. indexify-0.4.22/src/indexify/executor/blob_store/metrics/blob_store.py +0 -33
  33. indexify-0.4.22/src/indexify/executor/blob_store/s3_blob_store.py +0 -88
  34. indexify-0.4.22/src/indexify/executor/function_executor_controller/create_function_executor.py +0 -252
  35. indexify-0.4.22/src/indexify/executor/function_executor_controller/function_executor_startup_output.py +0 -21
  36. indexify-0.4.22/src/indexify/executor/function_executor_controller/metrics/downloads.py +0 -67
  37. indexify-0.4.22/src/indexify/executor/function_executor_controller/metrics/upload_task_output.py +0 -39
  38. indexify-0.4.22/src/indexify/executor/function_executor_controller/prepare_task.py +0 -38
  39. indexify-0.4.22/src/indexify/executor/function_executor_controller/upload_task_output.py +0 -274
  40. indexify-0.4.22/src/indexify/proto/executor_api_pb2.py +0 -88
  41. {indexify-0.4.22 → indexify-0.4.23}/README.md +0 -0
  42. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/cli/__init__.py +0 -0
  43. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/cli/build_image.py +0 -0
  44. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/cli/deploy.py +0 -0
  45. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/README.md +0 -0
  46. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/channel_manager.py +0 -0
  47. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/executor.py +0 -0
  48. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_allowlist.py +0 -0
  49. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/health_checker.py +0 -0
  50. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/metrics/function_executor.py +0 -0
  51. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/metrics/health_checker.py +0 -0
  52. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/metrics/invocation_state_client.py +0 -0
  53. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/server/client_configuration.py +0 -0
  54. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/server/function_executor_server.py +0 -0
  55. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/server/subprocess_function_executor_server.py +0 -0
  56. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor/server/subprocess_function_executor_server_factory.py +0 -0
  57. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/__init__.py +0 -0
  58. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/completed_task_metrics.py +0 -0
  59. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/debug_event_loop.py +0 -0
  60. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/loggers.py +0 -0
  61. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/metrics/completed_task_metrics.py +0 -0
  62. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/metrics/function_executor_controller.py +0 -0
  63. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/function_executor_controller/metrics/run_task.py +0 -0
  64. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/host_resources/host_resources.py +0 -0
  65. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/host_resources/nvidia_gpu.py +0 -0
  66. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/host_resources/nvidia_gpu_allocator.py +0 -0
  67. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/metrics/channel_manager.py +0 -0
  68. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/metrics/executor.py +0 -0
  69. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/metrics/state_reconciler.py +0 -0
  70. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/metrics/state_reporter.py +0 -0
  71. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/monitoring/handler.py +0 -0
  72. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/monitoring/health_check_handler.py +0 -0
  73. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/monitoring/health_checker/generic_health_checker.py +0 -0
  74. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/monitoring/health_checker/health_checker.py +0 -0
  75. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/monitoring/health_checker/metrics/health_checker.py +0 -0
  76. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/monitoring/prometheus_metrics_handler.py +0 -0
  77. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/monitoring/server.py +0 -0
  78. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/monitoring/startup_probe_handler.py +0 -0
  79. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/state_reconciler.py +0 -0
  80. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/executor/state_reporter.py +0 -0
  81. {indexify-0.4.22 → indexify-0.4.23}/src/indexify/proto/executor_api_pb2_grpc.py +0 -0
@@ -1,20 +1,18 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: indexify
3
- Version: 0.4.22
3
+ Version: 0.4.23
4
4
  Summary: Open Source Indexify components and helper tools
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
7
7
  Author: Tensorlake Inc.
8
8
  Author-email: support@tensorlake.ai
9
- Requires-Python: >=3.10,<4.0
9
+ Requires-Python: >=3.12,<4.0
10
10
  Classifier: License :: Other/Proprietary License
11
11
  Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
12
  Classifier: Programming Language :: Python :: 3.12
15
13
  Classifier: Programming Language :: Python :: 3.13
16
- Requires-Dist: aiohttp (>=3.12.14,<4.0.0)
17
- Requires-Dist: boto3 (>=1.39.15,<2.0.0)
14
+ Requires-Dist: aiohttp (>=3.12.15,<4.0.0)
15
+ Requires-Dist: boto3 (>=1.40.6,<2.0.0)
18
16
  Requires-Dist: docker (>=7.1.0,<8.0.0)
19
17
  Requires-Dist: httpx[http2] (==0.27.2)
20
18
  Requires-Dist: nanoid (>=2.0.0,<3.0.0)
@@ -22,7 +20,8 @@ Requires-Dist: prometheus-client (>=0.22.1,<0.23.0)
22
20
  Requires-Dist: psutil (>=7.0.0,<8.0.0)
23
21
  Requires-Dist: pydantic (>=2.11,<3.0)
24
22
  Requires-Dist: requests (>=2.32.4,<3.0.0)
25
- Requires-Dist: tensorlake (==0.2.37)
23
+ Requires-Dist: structlog (==25.4.0)
24
+ Requires-Dist: tensorlake (==0.2.39)
26
25
  Requires-Dist: urllib3 (>=2.5.0,<3.0.0)
27
26
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
28
27
  Description-Content-Type: text/markdown
@@ -1,7 +1,7 @@
1
1
  [tool.poetry]
2
2
  name = "indexify"
3
3
  # Incremented if any of the components provided in this packages are updated.
4
- version = "0.4.22"
4
+ version = "0.4.23"
5
5
  description = "Open Source Indexify components and helper tools"
6
6
  authors = ["Tensorlake Inc. <support@tensorlake.ai>"]
7
7
  license = "Apache 2.0"
@@ -14,22 +14,22 @@ indexify-cli = "indexify.cli:cli"
14
14
 
15
15
  [tool.poetry.dependencies]
16
16
  # Common dependencies
17
- python = "^3.10"
17
+ python = "^3.12"
18
18
  nanoid = "^2.0.0"
19
- # structlog is provided by tensorlake
20
19
  # pyyaml is provided by tensorlake
21
20
 
22
21
  # Executor only
23
- aiohttp = "^3.12.14"
22
+ aiohttp = "^3.12.15"
24
23
  # mTLS support for httpx 0.28.1 is broken, wait for 0.28.2 to see if the bug is fixed
25
24
  httpx = { version = "0.27.2", extras = ["http2"] }
26
25
  pydantic = "^2.11"
27
26
  prometheus-client = "^0.22.1"
28
27
  psutil = "^7.0.0"
29
- boto3 = "^1.39.15"
28
+ boto3 = "^1.40.6"
29
+ structlog = "25.4.0"
30
30
  # Adds function-executor binary, utils lib, sdk used in indexify-cli commands.
31
31
  # We need to specify the tensorlake version exactly because pip install doesn't respect poetry.lock files.
32
- tensorlake = "0.2.37"
32
+ tensorlake = "0.2.39"
33
33
  # Uncomment the next line to use local tensorlake package (only for development!)
34
34
  # tensorlake = { path = "../tensorlake", develop = true }
35
35
  # grpcio is provided by tensorlake
@@ -1,4 +1,4 @@
1
- from tensorlake.utils.logging import (
1
+ from indexify.executor.logging import (
2
2
  configure_development_mode_logging,
3
3
  configure_logging_early,
4
4
  configure_production_mode_logging,
@@ -162,13 +162,6 @@ def executor(
162
162
  shutil.rmtree(str(executor_cache_path))
163
163
  executor_cache_path.mkdir(parents=True, exist_ok=True)
164
164
 
165
- blob_store: BLOBStore = BLOBStore(
166
- # Local FS mode is used in tests and in cases when user wants to store data on NFS.
167
- local=LocalFSBLOBStore(),
168
- # S3 is initiliazed lazily so it's okay to create it even if the user is not going to use it.
169
- s3=S3BLOBStore(),
170
- )
171
-
172
165
  host_resources_provider: HostResourcesProvider = HostResourcesProvider(
173
166
  gpu_allocator=NvidiaGPUAllocator(logger),
174
167
  # Assuming a simple setup in OSS where Executor container has a single file system
@@ -200,6 +193,6 @@ def executor(
200
193
  config_path=config_path,
201
194
  monitoring_server_host=monitoring_server_host,
202
195
  monitoring_server_port=monitoring_server_port,
203
- blob_store=blob_store,
196
+ blob_store=BLOBStore(),
204
197
  host_resources_provider=host_resources_provider,
205
198
  ).run()
@@ -0,0 +1,153 @@
1
+ from typing import Any
2
+
3
+ from .local_fs_blob_store import LocalFSBLOBStore
4
+ from .metrics.blob_store import (
5
+ metric_abort_multipart_upload_errors,
6
+ metric_abort_multipart_upload_latency,
7
+ metric_abort_multipart_upload_requests,
8
+ metric_complete_multipart_upload_errors,
9
+ metric_complete_multipart_upload_latency,
10
+ metric_complete_multipart_upload_requests,
11
+ metric_create_multipart_upload_errors,
12
+ metric_create_multipart_upload_latency,
13
+ metric_create_multipart_upload_requests,
14
+ metric_get_blob_errors,
15
+ metric_get_blob_latency,
16
+ metric_get_blob_requests,
17
+ metric_presign_uri_errors,
18
+ metric_presign_uri_latency,
19
+ metric_presign_uri_requests,
20
+ metric_upload_blob_errors,
21
+ metric_upload_blob_latency,
22
+ metric_upload_blob_requests,
23
+ )
24
+ from .s3_blob_store import S3BLOBStore
25
+
26
+
27
+ class BLOBStore:
28
+ """Dispatches generic BLOB store calls to their real backends."""
29
+
30
+ def __init__(self):
31
+ self._local: LocalFSBLOBStore = LocalFSBLOBStore()
32
+ self._s3: S3BLOBStore = S3BLOBStore()
33
+
34
+ async def get(self, uri: str, logger: Any) -> bytes:
35
+ """Returns binary value stored in BLOB with the supplied URI.
36
+
37
+ Raises Exception on error. Raises KeyError if the BLOB doesn't exist.
38
+ """
39
+ with (
40
+ metric_get_blob_errors.count_exceptions(),
41
+ metric_get_blob_latency.time(),
42
+ ):
43
+ metric_get_blob_requests.inc()
44
+ if _is_file_uri(uri):
45
+ return await self._local.get(uri, logger)
46
+ else:
47
+ return await self._s3.get(uri, logger)
48
+
49
+ async def presign_get_uri(self, uri: str, expires_in_sec: int, logger: Any) -> str:
50
+ """Returns a presigned URI for getting the BLOB with the supplied URI.
51
+
52
+ The URI allows to read any byte range in the BLOB."""
53
+ with (
54
+ metric_presign_uri_errors.count_exceptions(),
55
+ metric_presign_uri_latency.time(),
56
+ ):
57
+ metric_presign_uri_requests.inc()
58
+ if _is_file_uri(uri):
59
+ return await self._local.presign_get_uri(uri, expires_in_sec, logger)
60
+ else:
61
+ return await self._s3.presign_get_uri(uri, expires_in_sec, logger)
62
+
63
+ async def upload(self, uri: str, value: bytes, logger: Any) -> None:
64
+ """Stores the supplied binary value in a BLOB with the supplied URI.
65
+
66
+ Overwrites existing BLOB. Raises Exception on error.
67
+ """
68
+ with (
69
+ metric_upload_blob_errors.count_exceptions(),
70
+ metric_upload_blob_latency.time(),
71
+ ):
72
+ metric_upload_blob_requests.inc()
73
+ if _is_file_uri(uri):
74
+ await self._local.upload(uri, value, logger)
75
+ else:
76
+ await self._s3.upload(uri, value, logger)
77
+
78
+ async def create_multipart_upload(self, uri: str, logger: Any) -> str:
79
+ """Creates a multipart upload for BLOB with the supplied URI and returns the upload ID."""
80
+ with (
81
+ metric_create_multipart_upload_errors.count_exceptions(),
82
+ metric_create_multipart_upload_latency.time(),
83
+ ):
84
+ metric_create_multipart_upload_requests.inc()
85
+ if _is_file_uri(uri):
86
+ return await self._local.create_multipart_upload(uri, logger)
87
+ else:
88
+ return await self._s3.create_multipart_upload(uri, logger)
89
+
90
+ async def complete_multipart_upload(
91
+ self, uri: str, upload_id: str, parts_etags: list[str], logger: Any
92
+ ) -> None:
93
+ """Completes a multipart upload for BLOB with the supplied URI.
94
+
95
+ parts_etags is a list of ETags for the parts that were uploaded.
96
+ The list is ordered by part number starting from 1.
97
+ """
98
+ with (
99
+ metric_complete_multipart_upload_errors.count_exceptions(),
100
+ metric_complete_multipart_upload_latency.time(),
101
+ ):
102
+ metric_complete_multipart_upload_requests.inc()
103
+ if _is_file_uri(uri):
104
+ await self._local.complete_multipart_upload(
105
+ uri, upload_id, parts_etags, logger
106
+ )
107
+ else:
108
+ await self._s3.complete_multipart_upload(
109
+ uri, upload_id, parts_etags, logger
110
+ )
111
+
112
+ async def abort_multipart_upload(
113
+ self, uri: str, upload_id: str, logger: Any
114
+ ) -> None:
115
+ """Aborts a multipart upload for BLOB with the supplied URI."""
116
+ with (
117
+ metric_abort_multipart_upload_errors.count_exceptions(),
118
+ metric_abort_multipart_upload_latency.time(),
119
+ ):
120
+ metric_abort_multipart_upload_requests.inc()
121
+ if _is_file_uri(uri):
122
+ await self._local.abort_multipart_upload(uri, upload_id, logger)
123
+ else:
124
+ await self._s3.abort_multipart_upload(uri, upload_id, logger)
125
+
126
+ async def presign_upload_part_uri(
127
+ self,
128
+ uri: str,
129
+ part_number: int,
130
+ upload_id: str,
131
+ expires_in_sec: int,
132
+ logger: Any,
133
+ ) -> str:
134
+ """Returns a presigned URI for uploading a part in a multipart upload.
135
+
136
+ part_number starts from 1."""
137
+ with (
138
+ metric_presign_uri_errors.count_exceptions(),
139
+ metric_presign_uri_latency.time(),
140
+ ):
141
+ metric_presign_uri_requests.inc()
142
+ if _is_file_uri(uri):
143
+ return await self._local.presign_upload_part_uri(
144
+ uri, part_number, upload_id, expires_in_sec, logger
145
+ )
146
+ else:
147
+ return await self._s3.presign_upload_part_uri(
148
+ uri, part_number, upload_id, expires_in_sec, logger
149
+ )
150
+
151
+
152
+ def _is_file_uri(uri: str) -> bool:
153
+ return uri.startswith("file://")
@@ -16,7 +16,14 @@ class LocalFSBLOBStore:
16
16
  # Run synchronous code in a thread to not block the event loop.
17
17
  return await asyncio.to_thread(self._sync_get, _path_from_file_uri(uri))
18
18
 
19
- async def put(self, uri: str, value: bytes, logger: Any) -> None:
19
+ async def presign_get_uri(self, uri: str, expires_in_sec: int, logger: Any) -> str:
20
+ """Returns a presigned URI for getting the file at the supplied URI.
21
+
22
+ For local files, just returns the file URI itself.
23
+ """
24
+ return uri
25
+
26
+ async def upload(self, uri: str, value: bytes, logger: Any) -> None:
20
27
  """Stores the supplied binary value in a file at the supplied URI.
21
28
 
22
29
  The URI must be a file URI (starts with "file://"). The path must be absolute.
@@ -25,6 +32,39 @@ class LocalFSBLOBStore:
25
32
  # Run synchronous code in a thread to not block the event loop.
26
33
  return await asyncio.to_thread(self._sync_put, _path_from_file_uri(uri), value)
27
34
 
35
+ async def create_multipart_upload(self, uri: str, logger: Any) -> str:
36
+ """Creates a multipart upload for local file and returns a dummy upload ID."""
37
+ # Local files do not require multipart upload, return a dummy ID
38
+ return "local-multipart-upload-id"
39
+
40
+ async def complete_multipart_upload(
41
+ self, uri: str, upload_id: str, parts_etags: list[str], logger: Any
42
+ ) -> None:
43
+ """Completes a multipart upload for local file. No-op for local files."""
44
+ # No action needed for local files
45
+ return None
46
+
47
+ async def abort_multipart_upload(
48
+ self, uri: str, upload_id: str, logger: Any
49
+ ) -> None:
50
+ """Aborts a multipart upload for local file. No-op for local files."""
51
+ # No action needed for local files
52
+ return None
53
+
54
+ async def presign_upload_part_uri(
55
+ self,
56
+ uri: str,
57
+ part_number: int,
58
+ upload_id: str,
59
+ expires_in_sec: int,
60
+ logger: Any,
61
+ ) -> str:
62
+ """Returns a presigned URI for uploading a part in a multipart upload for local file.
63
+
64
+ For local files, just returns the file URI itself.
65
+ """
66
+ return uri
67
+
28
68
  def _sync_get(self, path: str) -> bytes:
29
69
  if not os.path.isabs(path):
30
70
  raise ValueError(f"Path {path} must be absolute")
@@ -0,0 +1,105 @@
1
+ import prometheus_client
2
+
3
+ from ...monitoring.metrics import latency_metric_for_fast_operation
4
+
5
+ metric_get_blob_requests: prometheus_client.Counter = prometheus_client.Counter(
6
+ "blob_store_get_requests",
7
+ "Number of get BLOB requests in BLOB store",
8
+ )
9
+ metric_get_blob_errors: prometheus_client.Counter = prometheus_client.Counter(
10
+ "blob_store_get_request_errors",
11
+ "Number of get BLOB request errors in BLOB store",
12
+ )
13
+ metric_get_blob_latency: prometheus_client.Histogram = (
14
+ latency_metric_for_fast_operation(
15
+ "blob_store_get",
16
+ "BLOB store get BLOB request",
17
+ )
18
+ )
19
+
20
+ metric_presign_uri_requests: prometheus_client.Counter = prometheus_client.Counter(
21
+ "blob_store_presign_uri_requests",
22
+ "Number of presign URI requests in BLOB store",
23
+ )
24
+ metric_presign_uri_errors: prometheus_client.Counter = prometheus_client.Counter(
25
+ "blob_store_presign_uri_request_errors",
26
+ "Number of presign URI request errors in BLOB store",
27
+ )
28
+ metric_presign_uri_latency: prometheus_client.Histogram = (
29
+ latency_metric_for_fast_operation(
30
+ "blob_store_presign_uri",
31
+ "BLOB store presign URI request",
32
+ )
33
+ )
34
+
35
+ metric_upload_blob_requests: prometheus_client.Counter = prometheus_client.Counter(
36
+ "blob_store_upload_requests",
37
+ "Number of upload BLOB requests in BLOB store",
38
+ )
39
+ metric_upload_blob_errors: prometheus_client.Counter = prometheus_client.Counter(
40
+ "blob_store_upload_request_errors",
41
+ "Number of upload BLOB request errors in BLOB store",
42
+ )
43
+ metric_upload_blob_latency: prometheus_client.Histogram = (
44
+ latency_metric_for_fast_operation(
45
+ "blob_store_upload",
46
+ "BLOB store upload BLOB request",
47
+ )
48
+ )
49
+
50
+ metric_create_multipart_upload_requests: prometheus_client.Counter = (
51
+ prometheus_client.Counter(
52
+ "blob_store_create_multipart_upload_requests",
53
+ "Number of create multipart upload requests in BLOB store",
54
+ )
55
+ )
56
+ metric_create_multipart_upload_errors: prometheus_client.Counter = (
57
+ prometheus_client.Counter(
58
+ "blob_store_create_multipart_upload_request_errors",
59
+ "Number of create multipart upload request errors in BLOB store",
60
+ )
61
+ )
62
+ metric_create_multipart_upload_latency: prometheus_client.Histogram = (
63
+ latency_metric_for_fast_operation(
64
+ "blob_store_create_multipart_upload_request",
65
+ "create multipart upload request in BLOB store",
66
+ )
67
+ )
68
+
69
+ metric_complete_multipart_upload_requests: prometheus_client.Counter = (
70
+ prometheus_client.Counter(
71
+ "blob_store_complete_multipart_upload_requests",
72
+ "Number of complete multipart upload requests in BLOB store",
73
+ )
74
+ )
75
+ metric_complete_multipart_upload_errors: prometheus_client.Counter = (
76
+ prometheus_client.Counter(
77
+ "blob_store_complete_multipart_upload_request_errors",
78
+ "Number of complete multipart upload request errors in BLOB store",
79
+ )
80
+ )
81
+ metric_complete_multipart_upload_latency: prometheus_client.Histogram = (
82
+ latency_metric_for_fast_operation(
83
+ "blob_store_complete_multipart_upload_request",
84
+ "complete multipart upload request in BLOB store",
85
+ )
86
+ )
87
+
88
+ metric_abort_multipart_upload_requests: prometheus_client.Counter = (
89
+ prometheus_client.Counter(
90
+ "blob_store_abort_multipart_upload_requests",
91
+ "Number of abort multipart upload requests in BLOB store",
92
+ )
93
+ )
94
+ metric_abort_multipart_upload_errors: prometheus_client.Counter = (
95
+ prometheus_client.Counter(
96
+ "blob_store_abort_multipart_upload_request_errors",
97
+ "Number of abort multipart upload request errors in BLOB store",
98
+ )
99
+ )
100
+ metric_abort_multipart_upload_latency: prometheus_client.Histogram = (
101
+ latency_metric_for_fast_operation(
102
+ "blob_store_abort_multipart_upload_request",
103
+ "abort multipart upload request in BLOB store",
104
+ )
105
+ )
@@ -0,0 +1,199 @@
1
+ import asyncio
2
+ from typing import Any, Optional
3
+
4
+ import boto3
5
+ from botocore.config import Config as BotoConfig
6
+ from botocore.exceptions import ClientError as BotoClientError
7
+
8
+ _MAX_RETRIES = 3
9
+
10
+
11
+ class S3BLOBStore:
12
+ def __init__(self):
13
+ self._s3_client: Optional[Any] = None
14
+
15
+ def _lazy_create_client(self):
16
+ """Creates S3 client if it doesn't exist.
17
+
18
+ We create the client lazily only if S3 is used.
19
+ This is because S3 BLOB store is always created by Executor
20
+ and the creation will fail if user didn't configure S3 credentials and etc.
21
+ """
22
+ if self._s3_client is not None:
23
+ return
24
+
25
+ # The credentials and etc are fetched by boto3 library automatically following
26
+ # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials
27
+ # This provides a lot of flexibility for the user and follows a well-known and documented logic.
28
+ self._s3_client = boto3.client(
29
+ "s3",
30
+ config=BotoConfig(
31
+ # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#standard-retry-mode
32
+ retries={
33
+ "max_attempts": _MAX_RETRIES,
34
+ "mode": "standard",
35
+ }
36
+ ),
37
+ )
38
+
39
+ async def get(self, uri: str, logger: Any) -> bytes:
40
+ """Returns binary value stored in S3 object at the supplied URI.
41
+
42
+ The URI must be S3 URI (starts with "s3://").
43
+ Raises Exception on error. Raises KeyError if the object doesn't exist.
44
+ """
45
+ try:
46
+ self._lazy_create_client()
47
+ bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
48
+ response = await asyncio.to_thread(
49
+ self._s3_client.get_object, Bucket=bucket_name, Key=key
50
+ )
51
+ return response["Body"].read()
52
+ except BotoClientError as e:
53
+ logger.error("failed to get S3 object", uri=uri, exc_info=e)
54
+
55
+ if e.response["Error"]["Code"] == "NoSuchKey":
56
+ raise KeyError(f"Object {key} does not exist in bucket {bucket_name}")
57
+ raise
58
+ except Exception as e:
59
+ logger.error("failed to get S3 object", uri=uri, exc_info=e)
60
+ raise
61
+
62
+ async def presign_get_uri(self, uri: str, expires_in_sec: int, logger: Any) -> str:
63
+ """Returns a presigned URI for getting the S3 object at the supplied URI."""
64
+ self._lazy_create_client()
65
+ bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
66
+ try:
67
+ s3_uri: str = await asyncio.to_thread(
68
+ self._s3_client.generate_presigned_url,
69
+ ClientMethod="get_object",
70
+ Params={"Bucket": bucket_name, "Key": key},
71
+ ExpiresIn=expires_in_sec,
72
+ )
73
+ return s3_uri.replace("https://", "s3://", 1)
74
+ except Exception as e:
75
+ logger.error(
76
+ "failed to presign URI for get_object operation",
77
+ uri=uri,
78
+ exc_info=e,
79
+ expires_in_sec=expires_in_sec,
80
+ )
81
+ raise
82
+
83
+ async def upload(self, uri: str, value: bytes, logger: Any) -> None:
84
+ """Stores the supplied binary value in a S3 object at the supplied URI.
85
+
86
+ The URI must be S3 URI (starts with "s3://").
87
+ Overwrites existing object. Raises Exception on error.
88
+ """
89
+ try:
90
+ self._lazy_create_client()
91
+ bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
92
+ await asyncio.to_thread(
93
+ self._s3_client.put_object, Bucket=bucket_name, Key=key, Body=value
94
+ )
95
+ except Exception as e:
96
+ logger.error("failed to set S3 object", uri=uri, exc_info=e)
97
+ raise
98
+
99
+ async def create_multipart_upload(self, uri: str, logger: Any) -> str:
100
+ """Creates a multipart upload for S3 object and returns the upload ID."""
101
+ self._lazy_create_client()
102
+ bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
103
+ try:
104
+ response = await asyncio.to_thread(
105
+ self._s3_client.create_multipart_upload,
106
+ Bucket=bucket_name,
107
+ Key=key,
108
+ )
109
+ return response["UploadId"]
110
+ except Exception as e:
111
+ logger.error("failed to create multipart upload", uri=uri, exc_info=e)
112
+ raise
113
+
114
+ async def complete_multipart_upload(
115
+ self, uri: str, upload_id: str, parts_etags: list[str], logger: Any
116
+ ) -> None:
117
+ """Completes a multipart upload for S3 object."""
118
+ self._lazy_create_client()
119
+ bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
120
+ try:
121
+ await asyncio.to_thread(
122
+ self._s3_client.complete_multipart_upload,
123
+ Bucket=bucket_name,
124
+ Key=key,
125
+ UploadId=upload_id,
126
+ MultipartUpload={
127
+ "Parts": [
128
+ {"ETag": etag, "PartNumber": i + 1}
129
+ for i, etag in enumerate(parts_etags)
130
+ ]
131
+ },
132
+ )
133
+ except Exception as e:
134
+ logger.error("failed to complete multipart upload", uri=uri, exc_info=e)
135
+ raise
136
+
137
+ async def abort_multipart_upload(
138
+ self, uri: str, upload_id: str, logger: Any
139
+ ) -> None:
140
+ """Aborts a multipart upload for S3 object."""
141
+ self._lazy_create_client()
142
+ bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
143
+ try:
144
+ await asyncio.to_thread(
145
+ self._s3_client.abort_multipart_upload,
146
+ Bucket=bucket_name,
147
+ Key=key,
148
+ UploadId=upload_id,
149
+ )
150
+ except Exception as e:
151
+ logger.error("failed to abort multipart upload", uri=uri, exc_info=e)
152
+ raise
153
+
154
+ async def presign_upload_part_uri(
155
+ self,
156
+ uri: str,
157
+ part_number: int,
158
+ upload_id: str,
159
+ expires_in_sec: int,
160
+ logger: Any,
161
+ ) -> str:
162
+ """Returns a presigned URI for uploading a part in a multipart upload for S3 object."""
163
+ self._lazy_create_client()
164
+ bucket_name, key = _bucket_name_and_object_key_from_uri(uri)
165
+ try:
166
+ response = await asyncio.to_thread(
167
+ self._s3_client.generate_presigned_url,
168
+ ClientMethod="upload_part",
169
+ Params={
170
+ "Bucket": bucket_name,
171
+ "Key": key,
172
+ "UploadId": upload_id,
173
+ "PartNumber": part_number,
174
+ },
175
+ ExpiresIn=expires_in_sec,
176
+ )
177
+ return response
178
+ except Exception as e:
179
+ logger.error(
180
+ "failed to presign URI for upload_part operation",
181
+ uri=uri,
182
+ exc_info=e,
183
+ part_number=part_number,
184
+ upload_id=upload_id,
185
+ expires_in_sec=expires_in_sec,
186
+ )
187
+ raise
188
+
189
+
190
+ def _bucket_name_and_object_key_from_uri(uri: str) -> tuple[str, str]:
191
+ # Example S3 object URI:
192
+ # s3://test-indexify-server-blob-store-eugene-20250411/225b83f4-2aed-40a7-adee-b7a681f817f2
193
+ if not uri.startswith("s3://"):
194
+ raise ValueError(f"S3 URI '{uri}' is missing 's3://' prefix")
195
+
196
+ parts = uri[5:].split("/", 1)
197
+ if len(parts) != 2:
198
+ raise ValueError(f"Failed parsing bucket name from S3 URI '{uri}'")
199
+ return parts[0], parts[1] # bucket_name, key