3tears-object-store 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.4
2
+ Name: 3tears-object-store
3
+ Version: 0.14.0
4
+ Summary: Streaming S3-compatible object store for large binary artifacts (Path-2)
5
+ Project-URL: Repository, https://github.com/pacepace/3tears
6
+ Author: pace
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Framework :: AsyncIO
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.14
14
+ Classifier: Topic :: Software Development :: Libraries
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.14
17
+ Requires-Dist: 3tears
18
+ Requires-Dist: 3tears-media-contracts
19
+ Requires-Dist: 3tears-observe
20
+ Requires-Dist: aioboto3>=13
21
+ Description-Content-Type: text/markdown
22
+
23
+ # 3tears-object-store
24
+
25
+ Streaming S3-compatible object store for large binary artifacts (Path-2 of
26
+ the scope-and-objects design): pcaps, DB dumps, rendered reports, evidence.
27
+
28
+ Implements the dependency-free `ObjectStore` protocol from
29
+ `3tears-media-contracts` over any S3-compatible backend (MinIO in dev, S3 in
30
+ prod). **Streaming by contract** — uploads move through one part-size buffer
31
+ at a time via S3 multipart; downloads yield the response body in chunks — so
32
+ a multi-GB object never has to sit whole in a pod's memory.
33
+
34
+ Keys follow the platform's locked scope-first scheme (`keys.build_object_key`):
35
+
36
+ ```
37
+ <customer_id>/<scope>/<category>/<YYYY>/<MM>/<DD>/<object_id>/<filename>
38
+ ```
39
+
40
+ Lifted from metallm's `S3Service` and made streaming.
41
+
42
+ ## Dependency note
43
+
44
+ `aioboto3` (the async S3 client) tracks `aiobotocore`, which caps `botocore`
45
+ below the latest sync-`boto3` release. Adding this package therefore pins the
46
+ workspace's `botocore`/`boto3` lower and transitively pulls `wrapt` and `lxml`
47
+ down a major version. That cap is inherent to using an async S3 client and is
48
+ accepted — the full 3tears suite is green under the resolved set. If any
49
+ package comes to rely on `wrapt>=2` or `lxml>=6` behavior, add an explicit
50
+ lower bound at the workspace level so resolution fails loudly instead of
51
+ silently regressing.
@@ -0,0 +1,8 @@
1
+ threetears/object_store/__init__.py,sha256=DU7Nhi3HAsNCaQtTZvA5FD3fHrTInK0iX7pK1Vg9Ioo,675
2
+ threetears/object_store/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ threetears/object_store/s3.py,sha256=VwzwrM5OpGVTN6VeFl9GMVtV4NasTZgAbjSfH36JNYE,13911
4
+ threetears/object_store/wiring.py,sha256=JJCKPfFXNjJRyD2LWBkQ0x6eHALJ1nnRax404Ihhoz4,2993
5
+ 3tears_object_store-0.14.0.dist-info/METADATA,sha256=Zw49hTIJvLuFIo1PSjSspC5vdlip1KDXoKFDGtAE6oE,2075
6
+ 3tears_object_store-0.14.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
7
+ 3tears_object_store-0.14.0.dist-info/licenses/LICENSE,sha256=7GWEoEOcFJenZLt4LDzqH2K7QLxo_2m8rzG7Vv8VGXo,1066
8
+ 3tears_object_store-0.14.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mark Pace
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,11 @@
1
+ """Streaming S3-compatible object store for large binary artifacts (Path-2)."""
2
+
3
+ # the key builder is a CONTRACT (the locked scope-first layout), so it lives in
4
+ # the dependency-free media-contracts package -- a producing tool can build a key
5
+ # without inheriting this package's aioboto3 client tree. re-exported here for
6
+ # back-compat with callers importing it off the impl package.
7
+ from threetears.media.contracts.keys import build_object_key, sanitize_segment
8
+ from threetears.object_store.s3 import S3ObjectStore
9
+ from threetears.object_store.wiring import build_s3_object_store
10
+
11
+ __all__ = ["S3ObjectStore", "build_object_key", "build_s3_object_store", "sanitize_segment"]
File without changes
@@ -0,0 +1,354 @@
1
+ """Streaming S3-compatible object store (aioboto3).
2
+
3
+ Implements :class:`threetears.media.contracts.ObjectStore` over any
4
+ S3-compatible backend (MinIO in dev, S3 in prod). Never buffers a whole
5
+ object: uploads stream through one part-size buffer at a time via S3
6
+ multipart (or a single PUT when the whole object fits one part); downloads
7
+ yield the response body in chunks. Lifted from metallm's ``S3Service`` and
8
+ made streaming.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from collections.abc import AsyncIterator
14
+ from typing import Any
15
+
16
+ import aioboto3 # type: ignore[import-untyped]
17
+ from botocore.config import Config as BotoConfig # type: ignore[import-untyped]
18
+ from botocore.exceptions import ClientError # type: ignore[import-untyped]
19
+ from threetears.media.contracts import ObjectListing, ObjectStore
20
+ from threetears.observe import get_logger
21
+
22
+ __all__ = ["S3ObjectStore"]
23
+
24
+ log = get_logger(__name__)
25
+
26
+ #: S3 multipart parts must be >= 5 MiB (except the final part). The default
27
+ #: part size doubles as the upload buffer ceiling -- one part-size buffer is
28
+ #: the most memory a single ``put`` holds, regardless of total object size.
29
+ _MIN_PART_SIZE = 5 * 1024 * 1024
30
+ _DEFAULT_PART_SIZE = 8 * 1024 * 1024
31
+
32
+ #: streamed-download chunk size.
33
+ _DOWNLOAD_CHUNK_SIZE = 1024 * 1024
34
+
35
+ #: S3 ``DeleteObjects`` accepts at most 1000 keys per request; the reconciler
36
+ #: sweep batches to this ceiling.
37
+ _DELETE_BATCH_SIZE = 1000
38
+
39
+
40
+ class S3ObjectStore:
41
+ """Streaming ObjectStore over an S3-compatible backend.
42
+
43
+ :param endpoint_url: S3 endpoint (e.g. ``http://minio:9000``); ``None``
44
+ uses the AWS default endpoint
45
+ :ptype endpoint_url: str | None
46
+ :param access_key: access key id
47
+ :ptype access_key: str
48
+ :param secret_key: secret access key
49
+ :ptype secret_key: str
50
+ :param bucket: target bucket name
51
+ :ptype bucket: str
52
+ :param region: AWS region (MinIO ignores it; AWS S3 requires it)
53
+ :ptype region: str
54
+ :param part_size_bytes: multipart part size / upload buffer ceiling;
55
+ must be >= 5 MiB
56
+ :ptype part_size_bytes: int
57
+ :param session: aioboto3 session to use; defaults to a fresh
58
+ ``aioboto3.Session()``. Injectable so tests can supply a fake client.
59
+ :ptype session: Any
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ *,
65
+ endpoint_url: str | None,
66
+ access_key: str,
67
+ secret_key: str,
68
+ bucket: str,
69
+ region: str = "us-east-1",
70
+ part_size_bytes: int = _DEFAULT_PART_SIZE,
71
+ session: Any = None,
72
+ ) -> None:
73
+ if part_size_bytes < _MIN_PART_SIZE:
74
+ raise ValueError("part_size_bytes must be >= 5 MiB (S3 multipart minimum)")
75
+ self._endpoint_url = endpoint_url
76
+ self._access_key = access_key
77
+ self._secret_key = secret_key
78
+ self._bucket = bucket
79
+ self._region = region
80
+ self._part_size = part_size_bytes
81
+ self._session = session if session is not None else aioboto3.Session()
82
+
83
+ def _client(self) -> Any:
84
+ """Return an async-context-manager S3 client.
85
+
86
+ :return: aioboto3 client context manager
87
+ :rtype: Any
88
+ """
89
+ return self._session.client(
90
+ "s3",
91
+ endpoint_url=self._endpoint_url,
92
+ aws_access_key_id=self._access_key,
93
+ aws_secret_access_key=self._secret_key,
94
+ region_name=self._region,
95
+ config=BotoConfig(signature_version="s3v4"),
96
+ )
97
+
98
+ async def ensure_bucket(self) -> None:
99
+ """Create the configured bucket if it does not already exist.
100
+
101
+ :return: nothing
102
+ :rtype: None
103
+ """
104
+ async with self._client() as client:
105
+ try:
106
+ await client.head_bucket(Bucket=self._bucket)
107
+ except ClientError as err:
108
+ code = str(err.response.get("Error", {}).get("Code", ""))
109
+ if code not in ("404", "NoSuchBucket", "NotFound"):
110
+ raise
111
+ await client.create_bucket(Bucket=self._bucket)
112
+ log.info(
113
+ "object store bucket created",
114
+ extra={"extra_data": {"bucket": self._bucket}},
115
+ )
116
+
117
+ async def put(
118
+ self,
119
+ key: str,
120
+ body: AsyncIterator[bytes],
121
+ *,
122
+ content_type: str,
123
+ size: int | None = None,
124
+ ) -> None:
125
+ """Stream ``body`` to ``key``.
126
+
127
+ Peak memory is one part plus the latest incoming chunk -- bounded
128
+ independent of total object size (a multi-GB object never sits whole
129
+ in memory). A single PUT is used when the whole object fits one
130
+ part, otherwise S3 multipart. On any failure the partial multipart
131
+ upload is aborted so no orphaned parts linger.
132
+
133
+ :param key: tenant-scoped object key
134
+ :ptype key: str
135
+ :param body: async iterator yielding the object's bytes in chunks
136
+ :ptype body: AsyncIterator[bytes]
137
+ :param content_type: MIME type stored on the object
138
+ :ptype content_type: str
139
+ :param size: total byte length when known (advisory; the impl
140
+ streams regardless)
141
+ :ptype size: int | None
142
+ :return: nothing
143
+ :rtype: None
144
+ """
145
+ async with self._client() as client:
146
+ buffer = bytearray()
147
+ upload_id: str | None = None
148
+ parts: list[dict[str, Any]] = []
149
+ part_number = 1
150
+ completed = False
151
+ try:
152
+ async for chunk in body:
153
+ buffer.extend(chunk)
154
+ while len(buffer) >= self._part_size:
155
+ if upload_id is None:
156
+ created = await client.create_multipart_upload(
157
+ Bucket=self._bucket,
158
+ Key=key,
159
+ ContentType=content_type,
160
+ )
161
+ upload_id = created["UploadId"]
162
+ part = bytes(buffer[: self._part_size])
163
+ del buffer[: self._part_size]
164
+ resp = await client.upload_part(
165
+ Bucket=self._bucket,
166
+ Key=key,
167
+ PartNumber=part_number,
168
+ UploadId=upload_id,
169
+ Body=part,
170
+ )
171
+ parts.append({"ETag": resp["ETag"], "PartNumber": part_number})
172
+ part_number += 1
173
+ if upload_id is None:
174
+ await client.put_object(
175
+ Bucket=self._bucket,
176
+ Key=key,
177
+ Body=bytes(buffer),
178
+ ContentType=content_type,
179
+ )
180
+ else:
181
+ if buffer:
182
+ resp = await client.upload_part(
183
+ Bucket=self._bucket,
184
+ Key=key,
185
+ PartNumber=part_number,
186
+ UploadId=upload_id,
187
+ Body=bytes(buffer),
188
+ )
189
+ parts.append({"ETag": resp["ETag"], "PartNumber": part_number})
190
+ await client.complete_multipart_upload(
191
+ Bucket=self._bucket,
192
+ Key=key,
193
+ UploadId=upload_id,
194
+ MultipartUpload={"Parts": parts},
195
+ )
196
+ completed = True
197
+ finally:
198
+ if upload_id is not None and not completed:
199
+ try:
200
+ await client.abort_multipart_upload(Bucket=self._bucket, Key=key, UploadId=upload_id)
201
+ log.info(
202
+ "aborted partial multipart upload after error",
203
+ extra={"extra_data": {"key": key, "upload_id": upload_id}},
204
+ )
205
+ except ClientError as abort_err:
206
+ log.warning(
207
+ "failed to abort multipart upload after error",
208
+ extra={
209
+ "extra_data": {
210
+ "key": key,
211
+ "upload_id": upload_id,
212
+ "error": str(abort_err),
213
+ }
214
+ },
215
+ )
216
+ log.debug(
217
+ "object stored",
218
+ extra={
219
+ "extra_data": {
220
+ "key": key,
221
+ "multipart": upload_id is not None,
222
+ "parts": len(parts),
223
+ }
224
+ },
225
+ )
226
+
227
+ async def open_read(self, key: str) -> AsyncIterator[bytes]:
228
+ """Open ``key`` for streaming read, yielding bytes in chunks.
229
+
230
+ :param key: object key
231
+ :ptype key: str
232
+ :return: async iterator over the object's bytes
233
+ :rtype: AsyncIterator[bytes]
234
+ """
235
+ async with self._client() as client:
236
+ resp = await client.get_object(Bucket=self._bucket, Key=key)
237
+ async for chunk in resp["Body"].iter_chunks(_DOWNLOAD_CHUNK_SIZE):
238
+ yield chunk
239
+
240
+ async def delete(self, key: str) -> None:
241
+ """Delete a single object.
242
+
243
+ :param key: object key
244
+ :ptype key: str
245
+ :return: nothing
246
+ :rtype: None
247
+ """
248
+ async with self._client() as client:
249
+ await client.delete_object(Bucket=self._bucket, Key=key)
250
+
251
+ async def delete_many(self, keys: list[str]) -> None:
252
+ """Delete many objects, batched to S3's 1000-key request limit.
253
+
254
+ The reconciler sweep can exceed 1000 keys, so deletes are chunked
255
+ into ``_DELETE_BATCH_SIZE`` requests rather than one oversized call
256
+ S3/MinIO would reject.
257
+
258
+ :param keys: object keys to delete
259
+ :ptype keys: list[str]
260
+ :return: nothing
261
+ :rtype: None
262
+ """
263
+ if keys:
264
+ async with self._client() as client:
265
+ for start in range(0, len(keys), _DELETE_BATCH_SIZE):
266
+ batch = keys[start : start + _DELETE_BATCH_SIZE]
267
+ await client.delete_objects(
268
+ Bucket=self._bucket,
269
+ Delete={
270
+ "Objects": [{"Key": k} for k in batch],
271
+ "Quiet": True,
272
+ },
273
+ )
274
+
275
+ async def _iter_contents(self, prefix: str | None) -> AsyncIterator[dict[str, Any]]:
276
+ """Yield each ``Contents`` entry across every listing page.
277
+
278
+ Shared pagination for :meth:`list_keys` and :meth:`list_entries` so the
279
+ continuation-token walk lives in one place.
280
+
281
+ :param prefix: key-prefix filter, or ``None`` for the whole bucket
282
+ :ptype prefix: str | None
283
+ :return: async iterator over raw ``list_objects_v2`` ``Contents`` dicts
284
+ :rtype: AsyncIterator[dict[str, Any]]
285
+ """
286
+ async with self._client() as client:
287
+ token: str | None = None
288
+ while True:
289
+ kwargs: dict[str, Any] = {"Bucket": self._bucket}
290
+ if prefix is not None:
291
+ kwargs["Prefix"] = prefix
292
+ if token is not None:
293
+ kwargs["ContinuationToken"] = token
294
+ resp = await client.list_objects_v2(**kwargs)
295
+ for obj in resp.get("Contents", []):
296
+ yield obj
297
+ if not resp.get("IsTruncated"):
298
+ break
299
+ token = resp.get("NextContinuationToken")
300
+
301
+ async def list_keys(self, prefix: str | None = None) -> AsyncIterator[str]:
302
+ """Yield object keys (paginated), optionally restricted to ``prefix``.
303
+
304
+ :param prefix: key-prefix filter (e.g. a tenant's ``<customer_id>/``);
305
+ ``None`` lists the whole bucket
306
+ :ptype prefix: str | None
307
+ :return: async iterator over object keys
308
+ :rtype: AsyncIterator[str]
309
+ """
310
+ async for obj in self._iter_contents(prefix):
311
+ yield obj["Key"]
312
+
313
+ async def list_entries(self, prefix: str | None = None) -> AsyncIterator[ObjectListing]:
314
+ """Yield object listings (key + last-modified + size), optionally by ``prefix``.
315
+
316
+ Carries the ``LastModified`` + ``Size`` metadata S3 already returns on a
317
+ list so the reconciler can judge orphan age without a per-key HEAD.
318
+
319
+ :param prefix: key-prefix filter (e.g. a tenant's ``<customer_id>/``);
320
+ ``None`` lists the whole bucket
321
+ :ptype prefix: str | None
322
+ :return: async iterator over object listings
323
+ :rtype: AsyncIterator[ObjectListing]
324
+ """
325
+ async for obj in self._iter_contents(prefix):
326
+ yield ObjectListing(
327
+ key=obj["Key"],
328
+ last_modified=obj["LastModified"],
329
+ size_bytes=int(obj["Size"]),
330
+ )
331
+
332
+ async def presigned_get_url(self, key: str, *, expires_in: int = 300) -> str:
333
+ """Presigned GET URL for delivery -- bytes never cross the agent.
334
+
335
+ :param key: object key
336
+ :ptype key: str
337
+ :param expires_in: URL validity in seconds
338
+ :ptype expires_in: int
339
+ :return: presigned URL
340
+ :rtype: str
341
+ """
342
+ async with self._client() as client:
343
+ url: str = await client.generate_presigned_url(
344
+ "get_object",
345
+ Params={"Bucket": self._bucket, "Key": key},
346
+ ExpiresIn=expires_in,
347
+ )
348
+ return url
349
+
350
+
351
+ #: static conformance guarantee -- S3ObjectStore must satisfy the ObjectStore
352
+ #: contract this package exists to implement. mypy verifies the structural
353
+ #: match here; a missing or mismatched method fails type-checking.
354
+ _OBJECTSTORE_IMPL: type[ObjectStore] = S3ObjectStore
@@ -0,0 +1,81 @@
1
+ """Build a configured :class:`S3ObjectStore` from config + secret references.
2
+
3
+ A producing or consuming pod wires its object store from deployment config:
4
+ ``endpoint_url`` + ``bucket`` + ``region`` as plain values, and the S3
5
+ credentials as platform *secret references* (``env://`` in dev, ``k8s://`` in
6
+ prod) that this helper resolves at construction via
7
+ :func:`threetears.core.security.secret_refs.resolve_secret`. The raw
8
+ credentials are unwrapped at the last moment and live only inside the returned
9
+ store -- never logged, never returned, never held in a plain string here.
10
+
11
+ This lives beside the impl (not in a pod) so every pod -- the pure-``threetears``
12
+ tool pod, an SDK-spawned pod, the reconciler -- wires its store the same tested
13
+ way rather than re-resolving refs by hand.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Any
19
+
20
+ from threetears.core.security.secret_refs import resolve_secret
21
+ from threetears.observe import get_logger
22
+ from threetears.object_store.s3 import S3ObjectStore
23
+
24
+ __all__ = ["build_s3_object_store"]
25
+
26
+ _log = get_logger(__name__)
27
+
28
+
29
+ def build_s3_object_store(
30
+ *,
31
+ endpoint_url: str | None,
32
+ bucket: str,
33
+ access_key_ref: str,
34
+ secret_key_ref: str,
35
+ region: str = "us-east-1",
36
+ session: Any = None,
37
+ ) -> S3ObjectStore:
38
+ """Resolve the credential references and construct a streaming store.
39
+
40
+ :param endpoint_url: S3 endpoint (e.g. ``http://minio:9000``); ``None`` uses
41
+ the AWS default endpoint
42
+ :ptype endpoint_url: str | None
43
+ :param bucket: target bucket name
44
+ :ptype bucket: str
45
+ :param access_key_ref: secret reference for the access key id
46
+ (``env://VAR`` / ``k8s://path``); resolved here
47
+ :ptype access_key_ref: str
48
+ :param secret_key_ref: secret reference for the secret access key; resolved here
49
+ :ptype secret_key_ref: str
50
+ :param region: AWS region (MinIO ignores it; AWS S3 requires it)
51
+ :ptype region: str
52
+ :param session: aioboto3 session passthrough for tests; ``None`` lets the
53
+ store create its own
54
+ :ptype session: Any
55
+ :return: a streaming object store ready to put/get/delete
56
+ :rtype: S3ObjectStore
57
+ :raises SecretResolutionError: when either credential reference is malformed,
58
+ names an unknown/unimplemented scheme, or cannot be resolved
59
+ """
60
+ access_key = resolve_secret(access_key_ref).get_secret_value()
61
+ secret_key = resolve_secret(secret_key_ref).get_secret_value()
62
+ store = S3ObjectStore(
63
+ endpoint_url=endpoint_url,
64
+ access_key=access_key,
65
+ secret_key=secret_key,
66
+ bucket=bucket,
67
+ region=region,
68
+ session=session,
69
+ )
70
+ _log.info(
71
+ "built S3 object store",
72
+ extra={
73
+ "extra_data": {
74
+ # config shape only -- never the resolved credential values.
75
+ "bucket": bucket,
76
+ "region": region,
77
+ "endpoint_configured": endpoint_url is not None,
78
+ }
79
+ },
80
+ )
81
+ return store