3tears-object-store 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- 3tears_object_store-0.14.0.dist-info/METADATA +51 -0
- 3tears_object_store-0.14.0.dist-info/RECORD +8 -0
- 3tears_object_store-0.14.0.dist-info/WHEEL +4 -0
- 3tears_object_store-0.14.0.dist-info/licenses/LICENSE +21 -0
- threetears/object_store/__init__.py +11 -0
- threetears/object_store/py.typed +0 -0
- threetears/object_store/s3.py +354 -0
- threetears/object_store/wiring.py +81 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: 3tears-object-store
|
|
3
|
+
Version: 0.14.0
|
|
4
|
+
Summary: Streaming S3-compatible object store for large binary artifacts (Path-2)
|
|
5
|
+
Project-URL: Repository, https://github.com/pacepace/3tears
|
|
6
|
+
Author: pace
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Framework :: AsyncIO
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.14
|
|
17
|
+
Requires-Dist: 3tears
|
|
18
|
+
Requires-Dist: 3tears-media-contracts
|
|
19
|
+
Requires-Dist: 3tears-observe
|
|
20
|
+
Requires-Dist: aioboto3>=13
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# 3tears-object-store
|
|
24
|
+
|
|
25
|
+
Streaming S3-compatible object store for large binary artifacts (Path-2 of
|
|
26
|
+
the scope-and-objects design): pcaps, DB dumps, rendered reports, evidence.
|
|
27
|
+
|
|
28
|
+
Implements the dependency-free `ObjectStore` protocol from
|
|
29
|
+
`3tears-media-contracts` over any S3-compatible backend (MinIO in dev, S3 in
|
|
30
|
+
prod). **Streaming by contract** — uploads move through one part-size buffer
|
|
31
|
+
at a time via S3 multipart; downloads yield the response body in chunks — so
|
|
32
|
+
a multi-GB object never has to sit whole in a pod's memory.
|
|
33
|
+
|
|
34
|
+
Keys follow the platform's locked scope-first scheme (`keys.build_object_key`):
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
<customer_id>/<scope>/<category>/<YYYY>/<MM>/<DD>/<object_id>/<filename>
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Lifted from metallm's `S3Service` and made streaming.
|
|
41
|
+
|
|
42
|
+
## Dependency note
|
|
43
|
+
|
|
44
|
+
`aioboto3` (the async S3 client) tracks `aiobotocore`, which caps `botocore`
|
|
45
|
+
below the latest sync-`boto3` release. Adding this package therefore pins the
|
|
46
|
+
workspace's `botocore`/`boto3` lower and transitively pulls `wrapt` and `lxml`
|
|
47
|
+
down a major version. That cap is inherent to using an async S3 client and is
|
|
48
|
+
accepted — the full 3tears suite is green under the resolved set. If any
|
|
49
|
+
package comes to rely on `wrapt>=2` or `lxml>=6` behavior, add an explicit
|
|
50
|
+
lower bound at the workspace level so resolution fails loudly instead of
|
|
51
|
+
silently regressing.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
threetears/object_store/__init__.py,sha256=DU7Nhi3HAsNCaQtTZvA5FD3fHrTInK0iX7pK1Vg9Ioo,675
|
|
2
|
+
threetears/object_store/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
threetears/object_store/s3.py,sha256=VwzwrM5OpGVTN6VeFl9GMVtV4NasTZgAbjSfH36JNYE,13911
|
|
4
|
+
threetears/object_store/wiring.py,sha256=JJCKPfFXNjJRyD2LWBkQ0x6eHALJ1nnRax404Ihhoz4,2993
|
|
5
|
+
3tears_object_store-0.14.0.dist-info/METADATA,sha256=Zw49hTIJvLuFIo1PSjSspC5vdlip1KDXoKFDGtAE6oE,2075
|
|
6
|
+
3tears_object_store-0.14.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
7
|
+
3tears_object_store-0.14.0.dist-info/licenses/LICENSE,sha256=7GWEoEOcFJenZLt4LDzqH2K7QLxo_2m8rzG7Vv8VGXo,1066
|
|
8
|
+
3tears_object_store-0.14.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mark Pace
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Streaming S3-compatible object store for large binary artifacts (Path-2)."""
|
|
2
|
+
|
|
3
|
+
# the key builder is a CONTRACT (the locked scope-first layout), so it lives in
|
|
4
|
+
# the dependency-free media-contracts package -- a producing tool can build a key
|
|
5
|
+
# without inheriting this package's aioboto3 client tree. re-exported here for
|
|
6
|
+
# back-compat with callers importing it off the impl package.
|
|
7
|
+
from threetears.media.contracts.keys import build_object_key, sanitize_segment
|
|
8
|
+
from threetears.object_store.s3 import S3ObjectStore
|
|
9
|
+
from threetears.object_store.wiring import build_s3_object_store
|
|
10
|
+
|
|
11
|
+
__all__ = ["S3ObjectStore", "build_object_key", "build_s3_object_store", "sanitize_segment"]
|
|
File without changes
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Streaming S3-compatible object store (aioboto3).
|
|
2
|
+
|
|
3
|
+
Implements :class:`threetears.media.contracts.ObjectStore` over any
|
|
4
|
+
S3-compatible backend (MinIO in dev, S3 in prod). Never buffers a whole
|
|
5
|
+
object: uploads stream through one part-size buffer at a time via S3
|
|
6
|
+
multipart (or a single PUT when the whole object fits one part); downloads
|
|
7
|
+
yield the response body in chunks. Lifted from metallm's ``S3Service`` and
|
|
8
|
+
made streaming.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections.abc import AsyncIterator
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import aioboto3 # type: ignore[import-untyped]
|
|
17
|
+
from botocore.config import Config as BotoConfig # type: ignore[import-untyped]
|
|
18
|
+
from botocore.exceptions import ClientError # type: ignore[import-untyped]
|
|
19
|
+
from threetears.media.contracts import ObjectListing, ObjectStore
|
|
20
|
+
from threetears.observe import get_logger
|
|
21
|
+
|
|
22
|
+
__all__ = ["S3ObjectStore"]
|
|
23
|
+
|
|
24
|
+
log = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
#: S3 multipart parts must be >= 5 MiB (except the final part). The default
|
|
27
|
+
#: part size doubles as the upload buffer ceiling -- one part-size buffer is
|
|
28
|
+
#: the most memory a single ``put`` holds, regardless of total object size.
|
|
29
|
+
_MIN_PART_SIZE = 5 * 1024 * 1024
|
|
30
|
+
_DEFAULT_PART_SIZE = 8 * 1024 * 1024
|
|
31
|
+
|
|
32
|
+
#: streamed-download chunk size.
|
|
33
|
+
_DOWNLOAD_CHUNK_SIZE = 1024 * 1024
|
|
34
|
+
|
|
35
|
+
#: S3 ``DeleteObjects`` accepts at most 1000 keys per request; the reconciler
|
|
36
|
+
#: sweep batches to this ceiling.
|
|
37
|
+
_DELETE_BATCH_SIZE = 1000
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class S3ObjectStore:
|
|
41
|
+
"""Streaming ObjectStore over an S3-compatible backend.
|
|
42
|
+
|
|
43
|
+
:param endpoint_url: S3 endpoint (e.g. ``http://minio:9000``); ``None``
|
|
44
|
+
uses the AWS default endpoint
|
|
45
|
+
:ptype endpoint_url: str | None
|
|
46
|
+
:param access_key: access key id
|
|
47
|
+
:ptype access_key: str
|
|
48
|
+
:param secret_key: secret access key
|
|
49
|
+
:ptype secret_key: str
|
|
50
|
+
:param bucket: target bucket name
|
|
51
|
+
:ptype bucket: str
|
|
52
|
+
:param region: AWS region (MinIO ignores it; AWS S3 requires it)
|
|
53
|
+
:ptype region: str
|
|
54
|
+
:param part_size_bytes: multipart part size / upload buffer ceiling;
|
|
55
|
+
must be >= 5 MiB
|
|
56
|
+
:ptype part_size_bytes: int
|
|
57
|
+
:param session: aioboto3 session to use; defaults to a fresh
|
|
58
|
+
``aioboto3.Session()``. Injectable so tests can supply a fake client.
|
|
59
|
+
:ptype session: Any
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
*,
|
|
65
|
+
endpoint_url: str | None,
|
|
66
|
+
access_key: str,
|
|
67
|
+
secret_key: str,
|
|
68
|
+
bucket: str,
|
|
69
|
+
region: str = "us-east-1",
|
|
70
|
+
part_size_bytes: int = _DEFAULT_PART_SIZE,
|
|
71
|
+
session: Any = None,
|
|
72
|
+
) -> None:
|
|
73
|
+
if part_size_bytes < _MIN_PART_SIZE:
|
|
74
|
+
raise ValueError("part_size_bytes must be >= 5 MiB (S3 multipart minimum)")
|
|
75
|
+
self._endpoint_url = endpoint_url
|
|
76
|
+
self._access_key = access_key
|
|
77
|
+
self._secret_key = secret_key
|
|
78
|
+
self._bucket = bucket
|
|
79
|
+
self._region = region
|
|
80
|
+
self._part_size = part_size_bytes
|
|
81
|
+
self._session = session if session is not None else aioboto3.Session()
|
|
82
|
+
|
|
83
|
+
def _client(self) -> Any:
|
|
84
|
+
"""Return an async-context-manager S3 client.
|
|
85
|
+
|
|
86
|
+
:return: aioboto3 client context manager
|
|
87
|
+
:rtype: Any
|
|
88
|
+
"""
|
|
89
|
+
return self._session.client(
|
|
90
|
+
"s3",
|
|
91
|
+
endpoint_url=self._endpoint_url,
|
|
92
|
+
aws_access_key_id=self._access_key,
|
|
93
|
+
aws_secret_access_key=self._secret_key,
|
|
94
|
+
region_name=self._region,
|
|
95
|
+
config=BotoConfig(signature_version="s3v4"),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
async def ensure_bucket(self) -> None:
|
|
99
|
+
"""Create the configured bucket if it does not already exist.
|
|
100
|
+
|
|
101
|
+
:return: nothing
|
|
102
|
+
:rtype: None
|
|
103
|
+
"""
|
|
104
|
+
async with self._client() as client:
|
|
105
|
+
try:
|
|
106
|
+
await client.head_bucket(Bucket=self._bucket)
|
|
107
|
+
except ClientError as err:
|
|
108
|
+
code = str(err.response.get("Error", {}).get("Code", ""))
|
|
109
|
+
if code not in ("404", "NoSuchBucket", "NotFound"):
|
|
110
|
+
raise
|
|
111
|
+
await client.create_bucket(Bucket=self._bucket)
|
|
112
|
+
log.info(
|
|
113
|
+
"object store bucket created",
|
|
114
|
+
extra={"extra_data": {"bucket": self._bucket}},
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
async def put(
|
|
118
|
+
self,
|
|
119
|
+
key: str,
|
|
120
|
+
body: AsyncIterator[bytes],
|
|
121
|
+
*,
|
|
122
|
+
content_type: str,
|
|
123
|
+
size: int | None = None,
|
|
124
|
+
) -> None:
|
|
125
|
+
"""Stream ``body`` to ``key``.
|
|
126
|
+
|
|
127
|
+
Peak memory is one part plus the latest incoming chunk -- bounded
|
|
128
|
+
independent of total object size (a multi-GB object never sits whole
|
|
129
|
+
in memory). A single PUT is used when the whole object fits one
|
|
130
|
+
part, otherwise S3 multipart. On any failure the partial multipart
|
|
131
|
+
upload is aborted so no orphaned parts linger.
|
|
132
|
+
|
|
133
|
+
:param key: tenant-scoped object key
|
|
134
|
+
:ptype key: str
|
|
135
|
+
:param body: async iterator yielding the object's bytes in chunks
|
|
136
|
+
:ptype body: AsyncIterator[bytes]
|
|
137
|
+
:param content_type: MIME type stored on the object
|
|
138
|
+
:ptype content_type: str
|
|
139
|
+
:param size: total byte length when known (advisory; the impl
|
|
140
|
+
streams regardless)
|
|
141
|
+
:ptype size: int | None
|
|
142
|
+
:return: nothing
|
|
143
|
+
:rtype: None
|
|
144
|
+
"""
|
|
145
|
+
async with self._client() as client:
|
|
146
|
+
buffer = bytearray()
|
|
147
|
+
upload_id: str | None = None
|
|
148
|
+
parts: list[dict[str, Any]] = []
|
|
149
|
+
part_number = 1
|
|
150
|
+
completed = False
|
|
151
|
+
try:
|
|
152
|
+
async for chunk in body:
|
|
153
|
+
buffer.extend(chunk)
|
|
154
|
+
while len(buffer) >= self._part_size:
|
|
155
|
+
if upload_id is None:
|
|
156
|
+
created = await client.create_multipart_upload(
|
|
157
|
+
Bucket=self._bucket,
|
|
158
|
+
Key=key,
|
|
159
|
+
ContentType=content_type,
|
|
160
|
+
)
|
|
161
|
+
upload_id = created["UploadId"]
|
|
162
|
+
part = bytes(buffer[: self._part_size])
|
|
163
|
+
del buffer[: self._part_size]
|
|
164
|
+
resp = await client.upload_part(
|
|
165
|
+
Bucket=self._bucket,
|
|
166
|
+
Key=key,
|
|
167
|
+
PartNumber=part_number,
|
|
168
|
+
UploadId=upload_id,
|
|
169
|
+
Body=part,
|
|
170
|
+
)
|
|
171
|
+
parts.append({"ETag": resp["ETag"], "PartNumber": part_number})
|
|
172
|
+
part_number += 1
|
|
173
|
+
if upload_id is None:
|
|
174
|
+
await client.put_object(
|
|
175
|
+
Bucket=self._bucket,
|
|
176
|
+
Key=key,
|
|
177
|
+
Body=bytes(buffer),
|
|
178
|
+
ContentType=content_type,
|
|
179
|
+
)
|
|
180
|
+
else:
|
|
181
|
+
if buffer:
|
|
182
|
+
resp = await client.upload_part(
|
|
183
|
+
Bucket=self._bucket,
|
|
184
|
+
Key=key,
|
|
185
|
+
PartNumber=part_number,
|
|
186
|
+
UploadId=upload_id,
|
|
187
|
+
Body=bytes(buffer),
|
|
188
|
+
)
|
|
189
|
+
parts.append({"ETag": resp["ETag"], "PartNumber": part_number})
|
|
190
|
+
await client.complete_multipart_upload(
|
|
191
|
+
Bucket=self._bucket,
|
|
192
|
+
Key=key,
|
|
193
|
+
UploadId=upload_id,
|
|
194
|
+
MultipartUpload={"Parts": parts},
|
|
195
|
+
)
|
|
196
|
+
completed = True
|
|
197
|
+
finally:
|
|
198
|
+
if upload_id is not None and not completed:
|
|
199
|
+
try:
|
|
200
|
+
await client.abort_multipart_upload(Bucket=self._bucket, Key=key, UploadId=upload_id)
|
|
201
|
+
log.info(
|
|
202
|
+
"aborted partial multipart upload after error",
|
|
203
|
+
extra={"extra_data": {"key": key, "upload_id": upload_id}},
|
|
204
|
+
)
|
|
205
|
+
except ClientError as abort_err:
|
|
206
|
+
log.warning(
|
|
207
|
+
"failed to abort multipart upload after error",
|
|
208
|
+
extra={
|
|
209
|
+
"extra_data": {
|
|
210
|
+
"key": key,
|
|
211
|
+
"upload_id": upload_id,
|
|
212
|
+
"error": str(abort_err),
|
|
213
|
+
}
|
|
214
|
+
},
|
|
215
|
+
)
|
|
216
|
+
log.debug(
|
|
217
|
+
"object stored",
|
|
218
|
+
extra={
|
|
219
|
+
"extra_data": {
|
|
220
|
+
"key": key,
|
|
221
|
+
"multipart": upload_id is not None,
|
|
222
|
+
"parts": len(parts),
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
async def open_read(self, key: str) -> AsyncIterator[bytes]:
|
|
228
|
+
"""Open ``key`` for streaming read, yielding bytes in chunks.
|
|
229
|
+
|
|
230
|
+
:param key: object key
|
|
231
|
+
:ptype key: str
|
|
232
|
+
:return: async iterator over the object's bytes
|
|
233
|
+
:rtype: AsyncIterator[bytes]
|
|
234
|
+
"""
|
|
235
|
+
async with self._client() as client:
|
|
236
|
+
resp = await client.get_object(Bucket=self._bucket, Key=key)
|
|
237
|
+
async for chunk in resp["Body"].iter_chunks(_DOWNLOAD_CHUNK_SIZE):
|
|
238
|
+
yield chunk
|
|
239
|
+
|
|
240
|
+
async def delete(self, key: str) -> None:
|
|
241
|
+
"""Delete a single object.
|
|
242
|
+
|
|
243
|
+
:param key: object key
|
|
244
|
+
:ptype key: str
|
|
245
|
+
:return: nothing
|
|
246
|
+
:rtype: None
|
|
247
|
+
"""
|
|
248
|
+
async with self._client() as client:
|
|
249
|
+
await client.delete_object(Bucket=self._bucket, Key=key)
|
|
250
|
+
|
|
251
|
+
async def delete_many(self, keys: list[str]) -> None:
|
|
252
|
+
"""Delete many objects, batched to S3's 1000-key request limit.
|
|
253
|
+
|
|
254
|
+
The reconciler sweep can exceed 1000 keys, so deletes are chunked
|
|
255
|
+
into ``_DELETE_BATCH_SIZE`` requests rather than one oversized call
|
|
256
|
+
S3/MinIO would reject.
|
|
257
|
+
|
|
258
|
+
:param keys: object keys to delete
|
|
259
|
+
:ptype keys: list[str]
|
|
260
|
+
:return: nothing
|
|
261
|
+
:rtype: None
|
|
262
|
+
"""
|
|
263
|
+
if keys:
|
|
264
|
+
async with self._client() as client:
|
|
265
|
+
for start in range(0, len(keys), _DELETE_BATCH_SIZE):
|
|
266
|
+
batch = keys[start : start + _DELETE_BATCH_SIZE]
|
|
267
|
+
await client.delete_objects(
|
|
268
|
+
Bucket=self._bucket,
|
|
269
|
+
Delete={
|
|
270
|
+
"Objects": [{"Key": k} for k in batch],
|
|
271
|
+
"Quiet": True,
|
|
272
|
+
},
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
async def _iter_contents(self, prefix: str | None) -> AsyncIterator[dict[str, Any]]:
|
|
276
|
+
"""Yield each ``Contents`` entry across every listing page.
|
|
277
|
+
|
|
278
|
+
Shared pagination for :meth:`list_keys` and :meth:`list_entries` so the
|
|
279
|
+
continuation-token walk lives in one place.
|
|
280
|
+
|
|
281
|
+
:param prefix: key-prefix filter, or ``None`` for the whole bucket
|
|
282
|
+
:ptype prefix: str | None
|
|
283
|
+
:return: async iterator over raw ``list_objects_v2`` ``Contents`` dicts
|
|
284
|
+
:rtype: AsyncIterator[dict[str, Any]]
|
|
285
|
+
"""
|
|
286
|
+
async with self._client() as client:
|
|
287
|
+
token: str | None = None
|
|
288
|
+
while True:
|
|
289
|
+
kwargs: dict[str, Any] = {"Bucket": self._bucket}
|
|
290
|
+
if prefix is not None:
|
|
291
|
+
kwargs["Prefix"] = prefix
|
|
292
|
+
if token is not None:
|
|
293
|
+
kwargs["ContinuationToken"] = token
|
|
294
|
+
resp = await client.list_objects_v2(**kwargs)
|
|
295
|
+
for obj in resp.get("Contents", []):
|
|
296
|
+
yield obj
|
|
297
|
+
if not resp.get("IsTruncated"):
|
|
298
|
+
break
|
|
299
|
+
token = resp.get("NextContinuationToken")
|
|
300
|
+
|
|
301
|
+
async def list_keys(self, prefix: str | None = None) -> AsyncIterator[str]:
|
|
302
|
+
"""Yield object keys (paginated), optionally restricted to ``prefix``.
|
|
303
|
+
|
|
304
|
+
:param prefix: key-prefix filter (e.g. a tenant's ``<customer_id>/``);
|
|
305
|
+
``None`` lists the whole bucket
|
|
306
|
+
:ptype prefix: str | None
|
|
307
|
+
:return: async iterator over object keys
|
|
308
|
+
:rtype: AsyncIterator[str]
|
|
309
|
+
"""
|
|
310
|
+
async for obj in self._iter_contents(prefix):
|
|
311
|
+
yield obj["Key"]
|
|
312
|
+
|
|
313
|
+
async def list_entries(self, prefix: str | None = None) -> AsyncIterator[ObjectListing]:
|
|
314
|
+
"""Yield object listings (key + last-modified + size), optionally by ``prefix``.
|
|
315
|
+
|
|
316
|
+
Carries the ``LastModified`` + ``Size`` metadata S3 already returns on a
|
|
317
|
+
list so the reconciler can judge orphan age without a per-key HEAD.
|
|
318
|
+
|
|
319
|
+
:param prefix: key-prefix filter (e.g. a tenant's ``<customer_id>/``);
|
|
320
|
+
``None`` lists the whole bucket
|
|
321
|
+
:ptype prefix: str | None
|
|
322
|
+
:return: async iterator over object listings
|
|
323
|
+
:rtype: AsyncIterator[ObjectListing]
|
|
324
|
+
"""
|
|
325
|
+
async for obj in self._iter_contents(prefix):
|
|
326
|
+
yield ObjectListing(
|
|
327
|
+
key=obj["Key"],
|
|
328
|
+
last_modified=obj["LastModified"],
|
|
329
|
+
size_bytes=int(obj["Size"]),
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
async def presigned_get_url(self, key: str, *, expires_in: int = 300) -> str:
|
|
333
|
+
"""Presigned GET URL for delivery -- bytes never cross the agent.
|
|
334
|
+
|
|
335
|
+
:param key: object key
|
|
336
|
+
:ptype key: str
|
|
337
|
+
:param expires_in: URL validity in seconds
|
|
338
|
+
:ptype expires_in: int
|
|
339
|
+
:return: presigned URL
|
|
340
|
+
:rtype: str
|
|
341
|
+
"""
|
|
342
|
+
async with self._client() as client:
|
|
343
|
+
url: str = await client.generate_presigned_url(
|
|
344
|
+
"get_object",
|
|
345
|
+
Params={"Bucket": self._bucket, "Key": key},
|
|
346
|
+
ExpiresIn=expires_in,
|
|
347
|
+
)
|
|
348
|
+
return url
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
#: static conformance guarantee -- S3ObjectStore must satisfy the ObjectStore
|
|
352
|
+
#: contract this package exists to implement. mypy verifies the structural
|
|
353
|
+
#: match here; a missing or mismatched method fails type-checking.
|
|
354
|
+
_OBJECTSTORE_IMPL: type[ObjectStore] = S3ObjectStore
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Build a configured :class:`S3ObjectStore` from config + secret references.
|
|
2
|
+
|
|
3
|
+
A producing or consuming pod wires its object store from deployment config:
|
|
4
|
+
``endpoint_url`` + ``bucket`` + ``region`` as plain values, and the S3
|
|
5
|
+
credentials as platform *secret references* (``env://`` in dev, ``k8s://`` in
|
|
6
|
+
prod) that this helper resolves at construction via
|
|
7
|
+
:func:`threetears.core.security.secret_refs.resolve_secret`. The raw
|
|
8
|
+
credentials are unwrapped at the last moment and live only inside the returned
|
|
9
|
+
store -- never logged, never returned, never held in a plain string here.
|
|
10
|
+
|
|
11
|
+
This lives beside the impl (not in a pod) so every pod -- the pure-``threetears``
|
|
12
|
+
tool pod, an SDK-spawned pod, the reconciler -- wires its store the same tested
|
|
13
|
+
way rather than re-resolving refs by hand.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from threetears.core.security.secret_refs import resolve_secret
|
|
21
|
+
from threetears.observe import get_logger
|
|
22
|
+
from threetears.object_store.s3 import S3ObjectStore
|
|
23
|
+
|
|
24
|
+
__all__ = ["build_s3_object_store"]
|
|
25
|
+
|
|
26
|
+
_log = get_logger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_s3_object_store(
|
|
30
|
+
*,
|
|
31
|
+
endpoint_url: str | None,
|
|
32
|
+
bucket: str,
|
|
33
|
+
access_key_ref: str,
|
|
34
|
+
secret_key_ref: str,
|
|
35
|
+
region: str = "us-east-1",
|
|
36
|
+
session: Any = None,
|
|
37
|
+
) -> S3ObjectStore:
|
|
38
|
+
"""Resolve the credential references and construct a streaming store.
|
|
39
|
+
|
|
40
|
+
:param endpoint_url: S3 endpoint (e.g. ``http://minio:9000``); ``None`` uses
|
|
41
|
+
the AWS default endpoint
|
|
42
|
+
:ptype endpoint_url: str | None
|
|
43
|
+
:param bucket: target bucket name
|
|
44
|
+
:ptype bucket: str
|
|
45
|
+
:param access_key_ref: secret reference for the access key id
|
|
46
|
+
(``env://VAR`` / ``k8s://path``); resolved here
|
|
47
|
+
:ptype access_key_ref: str
|
|
48
|
+
:param secret_key_ref: secret reference for the secret access key; resolved here
|
|
49
|
+
:ptype secret_key_ref: str
|
|
50
|
+
:param region: AWS region (MinIO ignores it; AWS S3 requires it)
|
|
51
|
+
:ptype region: str
|
|
52
|
+
:param session: aioboto3 session passthrough for tests; ``None`` lets the
|
|
53
|
+
store create its own
|
|
54
|
+
:ptype session: Any
|
|
55
|
+
:return: a streaming object store ready to put/get/delete
|
|
56
|
+
:rtype: S3ObjectStore
|
|
57
|
+
:raises SecretResolutionError: when either credential reference is malformed,
|
|
58
|
+
names an unknown/unimplemented scheme, or cannot be resolved
|
|
59
|
+
"""
|
|
60
|
+
access_key = resolve_secret(access_key_ref).get_secret_value()
|
|
61
|
+
secret_key = resolve_secret(secret_key_ref).get_secret_value()
|
|
62
|
+
store = S3ObjectStore(
|
|
63
|
+
endpoint_url=endpoint_url,
|
|
64
|
+
access_key=access_key,
|
|
65
|
+
secret_key=secret_key,
|
|
66
|
+
bucket=bucket,
|
|
67
|
+
region=region,
|
|
68
|
+
session=session,
|
|
69
|
+
)
|
|
70
|
+
_log.info(
|
|
71
|
+
"built S3 object store",
|
|
72
|
+
extra={
|
|
73
|
+
"extra_data": {
|
|
74
|
+
# config shape only -- never the resolved credential values.
|
|
75
|
+
"bucket": bucket,
|
|
76
|
+
"region": region,
|
|
77
|
+
"endpoint_configured": endpoint_url is not None,
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
)
|
|
81
|
+
return store
|