3tears-object-store 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,216 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ # Claude Code local state
210
+ .claude/
211
+
212
+ # prawduct session evidence (local governance artifacts, never shipped)
213
+ .prawduct/
214
+
215
+ # macOS folder metadata
216
+ .DS_Store
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mark Pace
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.4
2
+ Name: 3tears-object-store
3
+ Version: 0.14.0
4
+ Summary: Streaming S3-compatible object store for large binary artifacts (Path-2)
5
+ Project-URL: Repository, https://github.com/pacepace/3tears
6
+ Author: pace
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Framework :: AsyncIO
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.14
14
+ Classifier: Topic :: Software Development :: Libraries
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.14
17
+ Requires-Dist: 3tears
18
+ Requires-Dist: 3tears-media-contracts
19
+ Requires-Dist: 3tears-observe
20
+ Requires-Dist: aioboto3>=13
21
+ Description-Content-Type: text/markdown
22
+
23
+ # 3tears-object-store
24
+
25
+ Streaming S3-compatible object store for large binary artifacts (Path-2 of
26
+ the scope-and-objects design): pcaps, DB dumps, rendered reports, evidence.
27
+
28
+ Implements the dependency-free `ObjectStore` protocol from
29
+ `3tears-media-contracts` over any S3-compatible backend (MinIO in dev, S3 in
30
+ prod). **Streaming by contract** — uploads move through one part-size buffer
31
+ at a time via S3 multipart; downloads yield the response body in chunks — so
32
+ a multi-GB object never has to sit whole in a pod's memory.
33
+
34
+ Keys follow the platform's locked scope-first scheme (`keys.build_object_key`):
35
+
36
+ ```
37
+ <customer_id>/<scope>/<category>/<YYYY>/<MM>/<DD>/<object_id>/<filename>
38
+ ```
39
+
40
+ Lifted from metallm's `S3Service` and made streaming.
41
+
42
+ ## Dependency note
43
+
44
+ `aioboto3` (the async S3 client) tracks `aiobotocore`, which caps `botocore`
45
+ below the latest sync-`boto3` release. Adding this package therefore pins the
46
+ workspace's `botocore`/`boto3` lower and transitively pulls `wrapt` and `lxml`
47
+ down a major version. That cap is inherent to using an async S3 client and is
48
+ accepted — the full 3tears suite is green under the resolved set. If any
49
+ package comes to rely on `wrapt>=2` or `lxml>=6` behavior, add an explicit
50
+ lower bound at the workspace level so resolution fails loudly instead of
51
+ silently regressing.
@@ -0,0 +1,29 @@
1
+ # 3tears-object-store
2
+
3
+ Streaming S3-compatible object store for large binary artifacts (Path-2 of
4
+ the scope-and-objects design): pcaps, DB dumps, rendered reports, evidence.
5
+
6
+ Implements the dependency-free `ObjectStore` protocol from
7
+ `3tears-media-contracts` over any S3-compatible backend (MinIO in dev, S3 in
8
+ prod). **Streaming by contract** — uploads move through one part-size buffer
9
+ at a time via S3 multipart; downloads yield the response body in chunks — so
10
+ a multi-GB object never has to sit whole in a pod's memory.
11
+
12
+ Keys follow the platform's locked scope-first scheme (`keys.build_object_key`):
13
+
14
+ ```
15
+ <customer_id>/<scope>/<category>/<YYYY>/<MM>/<DD>/<object_id>/<filename>
16
+ ```
17
+
18
+ Lifted from metallm's `S3Service` and made streaming.
19
+
20
+ ## Dependency note
21
+
22
+ `aioboto3` (the async S3 client) tracks `aiobotocore`, which caps `botocore`
23
+ below the latest sync-`boto3` release. Adding this package therefore pins the
24
+ workspace's `botocore`/`boto3` lower and transitively pulls `wrapt` and `lxml`
25
+ down a major version. That cap is inherent to using an async S3 client and is
26
+ accepted — the full 3tears suite is green under the resolved set. If any
27
+ package comes to rely on `wrapt>=2` or `lxml>=6` behavior, add an explicit
28
+ lower bound at the workspace level so resolution fails loudly instead of
29
+ silently regressing.
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "3tears-object-store"
7
+ version = "0.14.0"
8
+ description = "Streaming S3-compatible object store for large binary artifacts (Path-2)"
9
+ readme = "README.md"
10
+ requires-python = ">=3.14"
11
+ authors = [{name = "pace"}]
12
+ license = "MIT"
13
+ license-files = ["LICENSE"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Framework :: AsyncIO",
17
+ "Intended Audience :: Developers",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.14",
20
+ "Topic :: Software Development :: Libraries",
21
+ "Typing :: Typed",
22
+ ]
23
+ # the contract is dependency-free (media-contracts); this impl package
24
+ # isolates the aioboto3 (-> aiobotocore -> botocore) S3 client tree so it
25
+ # never leaks into the contract or its other consumers. core (``3tears``) is
26
+ # pulled in for the secret_refs resolver used by the wiring helper -- core is
27
+ # foundational + acyclic and brings no aioboto3, so the isolation still holds.
28
+ dependencies = [
29
+ "3tears",
30
+ "3tears-media-contracts",
31
+ "3tears-observe",
32
+ "aioboto3>=13",
33
+ ]
34
+
35
+ [project.urls]
36
+ Repository = "https://github.com/pacepace/3tears"
37
+
38
+ [tool.uv.sources]
39
+ 3tears = { workspace = true }
40
+ 3tears-media-contracts = { workspace = true }
41
+ 3tears-observe = { workspace = true }
42
+
43
+ [tool.hatch.build.targets.wheel]
44
+ packages = ["src/threetears"]
@@ -0,0 +1,11 @@
1
+ """Streaming S3-compatible object store for large binary artifacts (Path-2)."""
2
+
3
+ # the key builder is a CONTRACT (the locked scope-first layout), so it lives in
4
+ # the dependency-free media-contracts package -- a producing tool can build a key
5
+ # without inheriting this package's aioboto3 client tree. re-exported here for
6
+ # back-compat with callers importing it off the impl package.
7
+ from threetears.media.contracts.keys import build_object_key, sanitize_segment
8
+ from threetears.object_store.s3 import S3ObjectStore
9
+ from threetears.object_store.wiring import build_s3_object_store
10
+
11
+ __all__ = ["S3ObjectStore", "build_object_key", "build_s3_object_store", "sanitize_segment"]
@@ -0,0 +1,354 @@
1
+ """Streaming S3-compatible object store (aioboto3).
2
+
3
+ Implements :class:`threetears.media.contracts.ObjectStore` over any
4
+ S3-compatible backend (MinIO in dev, S3 in prod). Never buffers a whole
5
+ object: uploads stream through one part-size buffer at a time via S3
6
+ multipart (or a single PUT when the whole object fits one part); downloads
7
+ yield the response body in chunks. Lifted from metallm's ``S3Service`` and
8
+ made streaming.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from collections.abc import AsyncIterator
14
+ from typing import Any
15
+
16
+ import aioboto3 # type: ignore[import-untyped]
17
+ from botocore.config import Config as BotoConfig # type: ignore[import-untyped]
18
+ from botocore.exceptions import ClientError # type: ignore[import-untyped]
19
+ from threetears.media.contracts import ObjectListing, ObjectStore
20
+ from threetears.observe import get_logger
21
+
22
+ __all__ = ["S3ObjectStore"]
23
+
24
+ log = get_logger(__name__)
25
+
26
+ #: S3 multipart parts must be >= 5 MiB (except the final part). The default
27
+ #: part size doubles as the upload buffer ceiling -- one part-size buffer is
28
+ #: the most memory a single ``put`` holds, regardless of total object size.
29
+ _MIN_PART_SIZE = 5 * 1024 * 1024
30
+ _DEFAULT_PART_SIZE = 8 * 1024 * 1024
31
+
32
+ #: streamed-download chunk size.
33
+ _DOWNLOAD_CHUNK_SIZE = 1024 * 1024
34
+
35
+ #: S3 ``DeleteObjects`` accepts at most 1000 keys per request; the reconciler
36
+ #: sweep batches to this ceiling.
37
+ _DELETE_BATCH_SIZE = 1000
38
+
39
+
40
+ class S3ObjectStore:
41
+ """Streaming ObjectStore over an S3-compatible backend.
42
+
43
+ :param endpoint_url: S3 endpoint (e.g. ``http://minio:9000``); ``None``
44
+ uses the AWS default endpoint
45
+ :ptype endpoint_url: str | None
46
+ :param access_key: access key id
47
+ :ptype access_key: str
48
+ :param secret_key: secret access key
49
+ :ptype secret_key: str
50
+ :param bucket: target bucket name
51
+ :ptype bucket: str
52
+ :param region: AWS region (MinIO ignores it; AWS S3 requires it)
53
+ :ptype region: str
54
+ :param part_size_bytes: multipart part size / upload buffer ceiling;
55
+ must be >= 5 MiB
56
+ :ptype part_size_bytes: int
57
+ :param session: aioboto3 session to use; defaults to a fresh
58
+ ``aioboto3.Session()``. Injectable so tests can supply a fake client.
59
+ :ptype session: Any
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ *,
65
+ endpoint_url: str | None,
66
+ access_key: str,
67
+ secret_key: str,
68
+ bucket: str,
69
+ region: str = "us-east-1",
70
+ part_size_bytes: int = _DEFAULT_PART_SIZE,
71
+ session: Any = None,
72
+ ) -> None:
73
+ if part_size_bytes < _MIN_PART_SIZE:
74
+ raise ValueError("part_size_bytes must be >= 5 MiB (S3 multipart minimum)")
75
+ self._endpoint_url = endpoint_url
76
+ self._access_key = access_key
77
+ self._secret_key = secret_key
78
+ self._bucket = bucket
79
+ self._region = region
80
+ self._part_size = part_size_bytes
81
+ self._session = session if session is not None else aioboto3.Session()
82
+
83
+ def _client(self) -> Any:
84
+ """Return an async-context-manager S3 client.
85
+
86
+ :return: aioboto3 client context manager
87
+ :rtype: Any
88
+ """
89
+ return self._session.client(
90
+ "s3",
91
+ endpoint_url=self._endpoint_url,
92
+ aws_access_key_id=self._access_key,
93
+ aws_secret_access_key=self._secret_key,
94
+ region_name=self._region,
95
+ config=BotoConfig(signature_version="s3v4"),
96
+ )
97
+
98
+ async def ensure_bucket(self) -> None:
99
+ """Create the configured bucket if it does not already exist.
100
+
101
+ :return: nothing
102
+ :rtype: None
103
+ """
104
+ async with self._client() as client:
105
+ try:
106
+ await client.head_bucket(Bucket=self._bucket)
107
+ except ClientError as err:
108
+ code = str(err.response.get("Error", {}).get("Code", ""))
109
+ if code not in ("404", "NoSuchBucket", "NotFound"):
110
+ raise
111
+ await client.create_bucket(Bucket=self._bucket)
112
+ log.info(
113
+ "object store bucket created",
114
+ extra={"extra_data": {"bucket": self._bucket}},
115
+ )
116
+
117
+ async def put(
118
+ self,
119
+ key: str,
120
+ body: AsyncIterator[bytes],
121
+ *,
122
+ content_type: str,
123
+ size: int | None = None,
124
+ ) -> None:
125
+ """Stream ``body`` to ``key``.
126
+
127
+ Peak memory is one part plus the latest incoming chunk -- bounded
128
+ independent of total object size (a multi-GB object never sits whole
129
+ in memory). A single PUT is used when the whole object fits one
130
+ part, otherwise S3 multipart. On any failure the partial multipart
131
+ upload is aborted so no orphaned parts linger.
132
+
133
+ :param key: tenant-scoped object key
134
+ :ptype key: str
135
+ :param body: async iterator yielding the object's bytes in chunks
136
+ :ptype body: AsyncIterator[bytes]
137
+ :param content_type: MIME type stored on the object
138
+ :ptype content_type: str
139
+ :param size: total byte length when known (advisory; the impl
140
+ streams regardless)
141
+ :ptype size: int | None
142
+ :return: nothing
143
+ :rtype: None
144
+ """
145
+ async with self._client() as client:
146
+ buffer = bytearray()
147
+ upload_id: str | None = None
148
+ parts: list[dict[str, Any]] = []
149
+ part_number = 1
150
+ completed = False
151
+ try:
152
+ async for chunk in body:
153
+ buffer.extend(chunk)
154
+ while len(buffer) >= self._part_size:
155
+ if upload_id is None:
156
+ created = await client.create_multipart_upload(
157
+ Bucket=self._bucket,
158
+ Key=key,
159
+ ContentType=content_type,
160
+ )
161
+ upload_id = created["UploadId"]
162
+ part = bytes(buffer[: self._part_size])
163
+ del buffer[: self._part_size]
164
+ resp = await client.upload_part(
165
+ Bucket=self._bucket,
166
+ Key=key,
167
+ PartNumber=part_number,
168
+ UploadId=upload_id,
169
+ Body=part,
170
+ )
171
+ parts.append({"ETag": resp["ETag"], "PartNumber": part_number})
172
+ part_number += 1
173
+ if upload_id is None:
174
+ await client.put_object(
175
+ Bucket=self._bucket,
176
+ Key=key,
177
+ Body=bytes(buffer),
178
+ ContentType=content_type,
179
+ )
180
+ else:
181
+ if buffer:
182
+ resp = await client.upload_part(
183
+ Bucket=self._bucket,
184
+ Key=key,
185
+ PartNumber=part_number,
186
+ UploadId=upload_id,
187
+ Body=bytes(buffer),
188
+ )
189
+ parts.append({"ETag": resp["ETag"], "PartNumber": part_number})
190
+ await client.complete_multipart_upload(
191
+ Bucket=self._bucket,
192
+ Key=key,
193
+ UploadId=upload_id,
194
+ MultipartUpload={"Parts": parts},
195
+ )
196
+ completed = True
197
+ finally:
198
+ if upload_id is not None and not completed:
199
+ try:
200
+ await client.abort_multipart_upload(Bucket=self._bucket, Key=key, UploadId=upload_id)
201
+ log.info(
202
+ "aborted partial multipart upload after error",
203
+ extra={"extra_data": {"key": key, "upload_id": upload_id}},
204
+ )
205
+ except ClientError as abort_err:
206
+ log.warning(
207
+ "failed to abort multipart upload after error",
208
+ extra={
209
+ "extra_data": {
210
+ "key": key,
211
+ "upload_id": upload_id,
212
+ "error": str(abort_err),
213
+ }
214
+ },
215
+ )
216
+ log.debug(
217
+ "object stored",
218
+ extra={
219
+ "extra_data": {
220
+ "key": key,
221
+ "multipart": upload_id is not None,
222
+ "parts": len(parts),
223
+ }
224
+ },
225
+ )
226
+
227
+ async def open_read(self, key: str) -> AsyncIterator[bytes]:
228
+ """Open ``key`` for streaming read, yielding bytes in chunks.
229
+
230
+ :param key: object key
231
+ :ptype key: str
232
+ :return: async iterator over the object's bytes
233
+ :rtype: AsyncIterator[bytes]
234
+ """
235
+ async with self._client() as client:
236
+ resp = await client.get_object(Bucket=self._bucket, Key=key)
237
+ async for chunk in resp["Body"].iter_chunks(_DOWNLOAD_CHUNK_SIZE):
238
+ yield chunk
239
+
240
+ async def delete(self, key: str) -> None:
241
+ """Delete a single object.
242
+
243
+ :param key: object key
244
+ :ptype key: str
245
+ :return: nothing
246
+ :rtype: None
247
+ """
248
+ async with self._client() as client:
249
+ await client.delete_object(Bucket=self._bucket, Key=key)
250
+
251
+ async def delete_many(self, keys: list[str]) -> None:
252
+ """Delete many objects, batched to S3's 1000-key request limit.
253
+
254
+ The reconciler sweep can exceed 1000 keys, so deletes are chunked
255
+ into ``_DELETE_BATCH_SIZE`` requests rather than one oversized call
256
+ S3/MinIO would reject.
257
+
258
+ :param keys: object keys to delete
259
+ :ptype keys: list[str]
260
+ :return: nothing
261
+ :rtype: None
262
+ """
263
+ if keys:
264
+ async with self._client() as client:
265
+ for start in range(0, len(keys), _DELETE_BATCH_SIZE):
266
+ batch = keys[start : start + _DELETE_BATCH_SIZE]
267
+ await client.delete_objects(
268
+ Bucket=self._bucket,
269
+ Delete={
270
+ "Objects": [{"Key": k} for k in batch],
271
+ "Quiet": True,
272
+ },
273
+ )
274
+
275
+ async def _iter_contents(self, prefix: str | None) -> AsyncIterator[dict[str, Any]]:
276
+ """Yield each ``Contents`` entry across every listing page.
277
+
278
+ Shared pagination for :meth:`list_keys` and :meth:`list_entries` so the
279
+ continuation-token walk lives in one place.
280
+
281
+ :param prefix: key-prefix filter, or ``None`` for the whole bucket
282
+ :ptype prefix: str | None
283
+ :return: async iterator over raw ``list_objects_v2`` ``Contents`` dicts
284
+ :rtype: AsyncIterator[dict[str, Any]]
285
+ """
286
+ async with self._client() as client:
287
+ token: str | None = None
288
+ while True:
289
+ kwargs: dict[str, Any] = {"Bucket": self._bucket}
290
+ if prefix is not None:
291
+ kwargs["Prefix"] = prefix
292
+ if token is not None:
293
+ kwargs["ContinuationToken"] = token
294
+ resp = await client.list_objects_v2(**kwargs)
295
+ for obj in resp.get("Contents", []):
296
+ yield obj
297
+ if not resp.get("IsTruncated"):
298
+ break
299
+ token = resp.get("NextContinuationToken")
300
+
301
+ async def list_keys(self, prefix: str | None = None) -> AsyncIterator[str]:
302
+ """Yield object keys (paginated), optionally restricted to ``prefix``.
303
+
304
+ :param prefix: key-prefix filter (e.g. a tenant's ``<customer_id>/``);
305
+ ``None`` lists the whole bucket
306
+ :ptype prefix: str | None
307
+ :return: async iterator over object keys
308
+ :rtype: AsyncIterator[str]
309
+ """
310
+ async for obj in self._iter_contents(prefix):
311
+ yield obj["Key"]
312
+
313
+ async def list_entries(self, prefix: str | None = None) -> AsyncIterator[ObjectListing]:
314
+ """Yield object listings (key + last-modified + size), optionally by ``prefix``.
315
+
316
+ Carries the ``LastModified`` + ``Size`` metadata S3 already returns on a
317
+ list so the reconciler can judge orphan age without a per-key HEAD.
318
+
319
+ :param prefix: key-prefix filter (e.g. a tenant's ``<customer_id>/``);
320
+ ``None`` lists the whole bucket
321
+ :ptype prefix: str | None
322
+ :return: async iterator over object listings
323
+ :rtype: AsyncIterator[ObjectListing]
324
+ """
325
+ async for obj in self._iter_contents(prefix):
326
+ yield ObjectListing(
327
+ key=obj["Key"],
328
+ last_modified=obj["LastModified"],
329
+ size_bytes=int(obj["Size"]),
330
+ )
331
+
332
+ async def presigned_get_url(self, key: str, *, expires_in: int = 300) -> str:
333
+ """Presigned GET URL for delivery -- bytes never cross the agent.
334
+
335
+ :param key: object key
336
+ :ptype key: str
337
+ :param expires_in: URL validity in seconds
338
+ :ptype expires_in: int
339
+ :return: presigned URL
340
+ :rtype: str
341
+ """
342
+ async with self._client() as client:
343
+ url: str = await client.generate_presigned_url(
344
+ "get_object",
345
+ Params={"Bucket": self._bucket, "Key": key},
346
+ ExpiresIn=expires_in,
347
+ )
348
+ return url
349
+
350
+
351
+ #: static conformance guarantee -- S3ObjectStore must satisfy the ObjectStore
352
+ #: contract this package exists to implement. mypy verifies the structural
353
+ #: match here; a missing or mismatched method fails type-checking.
354
+ _OBJECTSTORE_IMPL: type[ObjectStore] = S3ObjectStore
@@ -0,0 +1,81 @@
1
+ """Build a configured :class:`S3ObjectStore` from config + secret references.
2
+
3
+ A producing or consuming pod wires its object store from deployment config:
4
+ ``endpoint_url`` + ``bucket`` + ``region`` as plain values, and the S3
5
+ credentials as platform *secret references* (``env://`` in dev, ``k8s://`` in
6
+ prod) that this helper resolves at construction via
7
+ :func:`threetears.core.security.secret_refs.resolve_secret`. The raw
8
+ credentials are unwrapped at the last moment and live only inside the returned
9
+ store -- never logged, never returned, never held in a plain string here.
10
+
11
+ This lives beside the impl (not in a pod) so every pod -- the pure-``threetears``
12
+ tool pod, an SDK-spawned pod, the reconciler -- wires its store the same tested
13
+ way rather than re-resolving refs by hand.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Any
19
+
20
+ from threetears.core.security.secret_refs import resolve_secret
21
+ from threetears.observe import get_logger
22
+ from threetears.object_store.s3 import S3ObjectStore
23
+
24
+ __all__ = ["build_s3_object_store"]
25
+
26
+ _log = get_logger(__name__)
27
+
28
+
29
+ def build_s3_object_store(
30
+ *,
31
+ endpoint_url: str | None,
32
+ bucket: str,
33
+ access_key_ref: str,
34
+ secret_key_ref: str,
35
+ region: str = "us-east-1",
36
+ session: Any = None,
37
+ ) -> S3ObjectStore:
38
+ """Resolve the credential references and construct a streaming store.
39
+
40
+ :param endpoint_url: S3 endpoint (e.g. ``http://minio:9000``); ``None`` uses
41
+ the AWS default endpoint
42
+ :ptype endpoint_url: str | None
43
+ :param bucket: target bucket name
44
+ :ptype bucket: str
45
+ :param access_key_ref: secret reference for the access key id
46
+ (``env://VAR`` / ``k8s://path``); resolved here
47
+ :ptype access_key_ref: str
48
+ :param secret_key_ref: secret reference for the secret access key; resolved here
49
+ :ptype secret_key_ref: str
50
+ :param region: AWS region (MinIO ignores it; AWS S3 requires it)
51
+ :ptype region: str
52
+ :param session: aioboto3 session passthrough for tests; ``None`` lets the
53
+ store create its own
54
+ :ptype session: Any
55
+ :return: a streaming object store ready to put/get/delete
56
+ :rtype: S3ObjectStore
57
+ :raises SecretResolutionError: when either credential reference is malformed,
58
+ names an unknown/unimplemented scheme, or cannot be resolved
59
+ """
60
+ access_key = resolve_secret(access_key_ref).get_secret_value()
61
+ secret_key = resolve_secret(secret_key_ref).get_secret_value()
62
+ store = S3ObjectStore(
63
+ endpoint_url=endpoint_url,
64
+ access_key=access_key,
65
+ secret_key=secret_key,
66
+ bucket=bucket,
67
+ region=region,
68
+ session=session,
69
+ )
70
+ _log.info(
71
+ "built S3 object store",
72
+ extra={
73
+ "extra_data": {
74
+ # config shape only -- never the resolved credential values.
75
+ "bucket": bucket,
76
+ "region": region,
77
+ "endpoint_configured": endpoint_url is not None,
78
+ }
79
+ },
80
+ )
81
+ return store
@@ -0,0 +1,108 @@
1
+ """Live integration tests for S3ObjectStore against a running MinIO.
2
+
3
+ Marked ``integration`` so the default unit run excludes them. Defaults
4
+ target the dev MinIO from the compose stack (localhost:9000, minioadmin,
5
+ bucket ``3tears-objects``); override via ``OBJECT_STORE_*`` env vars.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from collections.abc import AsyncIterator
12
+
13
+ import pytest
14
+
15
+ from threetears.object_store.s3 import S3ObjectStore
16
+
17
+ pytestmark = pytest.mark.integration
18
+
19
+ _ENDPOINT = os.environ.get("OBJECT_STORE_ENDPOINT", "http://localhost:9000")
20
+ _ACCESS = os.environ.get("OBJECT_STORE_ACCESS_KEY", "minioadmin")
21
+ _SECRET = os.environ.get("OBJECT_STORE_SECRET_KEY", "minioadmin")
22
+ _BUCKET = os.environ.get("OBJECT_STORE_BUCKET", "3tears-objects")
23
+
24
+
25
+ def _store(part_size_bytes: int = 8 * 1024 * 1024) -> S3ObjectStore:
26
+ """Build a store pointed at the dev MinIO.
27
+
28
+ :param part_size_bytes: multipart part size
29
+ :ptype part_size_bytes: int
30
+ :return: configured store
31
+ :rtype: S3ObjectStore
32
+ """
33
+ return S3ObjectStore(
34
+ endpoint_url=_ENDPOINT,
35
+ access_key=_ACCESS,
36
+ secret_key=_SECRET,
37
+ bucket=_BUCKET,
38
+ part_size_bytes=part_size_bytes,
39
+ )
40
+
41
+
42
+ async def _collect(stream: AsyncIterator[bytes]) -> bytes:
43
+ """Drain a byte stream into one buffer (test helper only).
44
+
45
+ :param stream: async byte stream
46
+ :ptype stream: AsyncIterator[bytes]
47
+ :return: full content
48
+ :rtype: bytes
49
+ """
50
+ out = bytearray()
51
+ async for chunk in stream:
52
+ out.extend(chunk)
53
+ return bytes(out)
54
+
55
+
56
+ async def _aiter(data: bytes, chunk: int) -> AsyncIterator[bytes]:
57
+ """Yield ``data`` in ``chunk``-sized pieces as an async iterator.
58
+
59
+ :param data: source bytes
60
+ :ptype data: bytes
61
+ :param chunk: chunk size
62
+ :ptype chunk: int
63
+ :return: async byte stream
64
+ :rtype: AsyncIterator[bytes]
65
+ """
66
+ for i in range(0, len(data), chunk):
67
+ yield data[i : i + chunk]
68
+
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_put_get_delete_roundtrip_small() -> None:
72
+ """A small object round-trips via single PUT + streamed read + presign."""
73
+ store = _store()
74
+ key = "itest/small.txt"
75
+ payload = b"hello streaming object store"
76
+
77
+ await store.put(key, _aiter(payload, 4), content_type="text/plain")
78
+ got = await _collect(store.open_read(key))
79
+ assert got == payload
80
+
81
+ url = await store.presigned_get_url(key)
82
+ assert "itest/small.txt" in url
83
+
84
+ keys = [k async for k in store.list_keys(prefix="itest/")]
85
+ assert key in keys
86
+
87
+ await store.delete(key)
88
+ after = [k async for k in store.list_keys(prefix="itest/")]
89
+ assert key not in after
90
+
91
+
92
+ @pytest.mark.asyncio
93
+ async def test_put_get_roundtrip_multipart_large() -> None:
94
+ """An object larger than one part round-trips via multipart upload."""
95
+ store = _store(part_size_bytes=5 * 1024 * 1024)
96
+ key = "itest/large.bin"
97
+ payload = os.urandom(12 * 1024 * 1024) # 12 MiB -> 3 parts at 5 MiB
98
+
99
+ await store.put(
100
+ key,
101
+ _aiter(payload, 1024 * 1024),
102
+ content_type="application/octet-stream",
103
+ size=len(payload),
104
+ )
105
+ got = await _collect(store.open_read(key))
106
+ assert got == payload
107
+
108
+ await store.delete(key)
@@ -0,0 +1,324 @@
1
+ """Unit tests for S3ObjectStore streaming/batching branches.
2
+
3
+ Uses an in-memory fake S3 client injected via the constructor ``session``
4
+ seam, so the critical paths the live-MinIO happy-path can't cheaply cover
5
+ run in CI: empty / exact-multiple / single-giant-chunk / abort-on-failure
6
+ uploads, >1000-key delete batching, and multi-page listing.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import AsyncIterator
12
+ from datetime import UTC, datetime
13
+ from typing import Any
14
+
15
+ import pytest
16
+
17
+ from threetears.object_store.s3 import S3ObjectStore
18
+
19
+
20
+ # parity-exempt: aiobotocore StreamingBody stub -- botocore's dynamically-built response body has no importable Protocol to declare parity against; only iter_chunks is exercised
21
+ class _FakeBody:
22
+ """Streaming body stub exposing aiobotocore's ``iter_chunks``."""
23
+
24
+ def __init__(self, data: bytes) -> None:
25
+ self._data = data
26
+
27
+ async def iter_chunks(self, size: int) -> AsyncIterator[bytes]:
28
+ """Yield the body in ``size``-byte chunks.
29
+
30
+ :param size: chunk size
31
+ :ptype size: int
32
+ :return: async byte stream
33
+ :rtype: AsyncIterator[bytes]
34
+ """
35
+ for i in range(0, len(self._data), size):
36
+ yield self._data[i : i + size]
37
+
38
+
39
+ class _S3State:
40
+ """In-memory backend state shared across clients from one fake session."""
41
+
42
+ def __init__(self) -> None:
43
+ self.objects: dict[str, bytes] = {}
44
+ self.mtimes: dict[str, datetime] = {}
45
+ self.delete_batches: list[list[str]] = []
46
+ self.aborted: list[str] = []
47
+ self.completed: list[str] = []
48
+ self.page_size: int = 1000
49
+ self.fail_part: int | None = None
50
+
51
+
52
+ # parity-exempt: aioboto3 S3 client stub -- a botocore-generated client with hundreds of operations and no importable Protocol; only the get/put/list/delete/presign calls S3ObjectStore makes are stubbed
53
+ class _FakeS3Client:
54
+ """Minimal in-memory S3 client matching the calls S3ObjectStore makes."""
55
+
56
+ def __init__(self, state: _S3State) -> None:
57
+ self._s = state
58
+ self._mpu: dict[str, dict[int, bytes]] = {}
59
+ self._counter = 0
60
+
61
+ async def __aenter__(self) -> _FakeS3Client:
62
+ return self
63
+
64
+ async def __aexit__(self, *exc: object) -> bool:
65
+ return False
66
+
67
+ async def create_multipart_upload(self, *, Bucket: str, Key: str, ContentType: str | None = None) -> dict[str, Any]:
68
+ self._counter += 1
69
+ uid = f"mpu-{self._counter}"
70
+ self._mpu[uid] = {}
71
+ return {"UploadId": uid}
72
+
73
+ async def upload_part(
74
+ self, *, Bucket: str, Key: str, PartNumber: int, UploadId: str, Body: bytes
75
+ ) -> dict[str, Any]:
76
+ if self._s.fail_part is not None and PartNumber == self._s.fail_part:
77
+ raise RuntimeError("simulated upload_part failure")
78
+ self._mpu[UploadId][PartNumber] = bytes(Body)
79
+ return {"ETag": f'"etag-{PartNumber}"'}
80
+
81
+ async def complete_multipart_upload(
82
+ self, *, Bucket: str, Key: str, UploadId: str, MultipartUpload: dict[str, Any]
83
+ ) -> dict[str, Any]:
84
+ stored = self._mpu.pop(UploadId)
85
+ self._s.objects[Key] = b"".join(stored[p["PartNumber"]] for p in MultipartUpload["Parts"])
86
+ self._s.completed.append(Key)
87
+ return {}
88
+
89
+ async def abort_multipart_upload(self, *, Bucket: str, Key: str, UploadId: str) -> dict[str, Any]:
90
+ self._mpu.pop(UploadId, None)
91
+ self._s.aborted.append(Key)
92
+ return {}
93
+
94
+ async def put_object(self, *, Bucket: str, Key: str, Body: bytes, ContentType: str | None = None) -> dict[str, Any]:
95
+ self._s.objects[Key] = bytes(Body)
96
+ return {}
97
+
98
+ async def get_object(self, *, Bucket: str, Key: str) -> dict[str, Any]:
99
+ return {"Body": _FakeBody(self._s.objects[Key])}
100
+
101
+ async def delete_object(self, *, Bucket: str, Key: str) -> dict[str, Any]:
102
+ self._s.objects.pop(Key, None)
103
+ return {}
104
+
105
+ async def delete_objects(self, *, Bucket: str, Delete: dict[str, Any]) -> dict[str, Any]:
106
+ batch = [o["Key"] for o in Delete["Objects"]]
107
+ self._s.delete_batches.append(batch)
108
+ for k in batch:
109
+ self._s.objects.pop(k, None)
110
+ return {}
111
+
112
+ async def list_objects_v2(
113
+ self, *, Bucket: str, Prefix: str | None = None, ContinuationToken: str | None = None
114
+ ) -> dict[str, Any]:
115
+ matched = sorted(k for k in self._s.objects if Prefix is None or k.startswith(Prefix))
116
+ start = int(ContinuationToken) if ContinuationToken else 0
117
+ page = matched[start : start + self._s.page_size]
118
+ _epoch = datetime(2020, 1, 1, tzinfo=UTC)
119
+ resp: dict[str, Any] = {
120
+ "Contents": [
121
+ {
122
+ "Key": k,
123
+ "Size": len(self._s.objects[k]),
124
+ "LastModified": self._s.mtimes.get(k, _epoch),
125
+ }
126
+ for k in page
127
+ ]
128
+ }
129
+ if start + self._s.page_size < len(matched):
130
+ resp["IsTruncated"] = True
131
+ resp["NextContinuationToken"] = str(start + self._s.page_size)
132
+ return resp
133
+
134
+
135
+ # parity-exempt: aioboto3 Session.client() factory stub -- an external SDK context-manager factory with no importable Protocol to mirror
136
+ class _FakeSession:
137
+ """Fake aioboto3 session handing out fresh in-memory clients."""
138
+
139
+ def __init__(self, state: _S3State) -> None:
140
+ self._state = state
141
+
142
+ def client(self, *args: object, **kwargs: object) -> _FakeS3Client:
143
+ """Return a fresh fake client over the shared state.
144
+
145
+ :return: fake S3 client
146
+ :rtype: _FakeS3Client
147
+ """
148
+ return _FakeS3Client(self._state)
149
+
150
+
151
+ def _store(state: _S3State, *, part_size_bytes: int = 5 * 1024 * 1024) -> S3ObjectStore:
152
+ """Build a store wired to the in-memory fake session.
153
+
154
+ :param state: shared fake backend state
155
+ :ptype state: _S3State
156
+ :param part_size_bytes: multipart part size
157
+ :ptype part_size_bytes: int
158
+ :return: store under test
159
+ :rtype: S3ObjectStore
160
+ """
161
+ return S3ObjectStore(
162
+ endpoint_url=None,
163
+ access_key="k",
164
+ secret_key="s",
165
+ bucket="b",
166
+ part_size_bytes=part_size_bytes,
167
+ session=_FakeSession(state),
168
+ )
169
+
170
+
171
+ async def _aiter(data: bytes, chunk: int) -> AsyncIterator[bytes]:
172
+ """Yield ``data`` in ``chunk``-sized pieces.
173
+
174
+ :param data: source bytes
175
+ :ptype data: bytes
176
+ :param chunk: chunk size
177
+ :ptype chunk: int
178
+ :return: async byte stream
179
+ :rtype: AsyncIterator[bytes]
180
+ """
181
+ for i in range(0, len(data), chunk):
182
+ yield data[i : i + chunk]
183
+
184
+
185
+ async def _empty() -> AsyncIterator[bytes]:
186
+ """An empty async byte stream.
187
+
188
+ :return: async byte stream that yields nothing
189
+ :rtype: AsyncIterator[bytes]
190
+ """
191
+ if False: # pragma: no cover
192
+ yield b""
193
+
194
+
195
+ @pytest.mark.asyncio
196
+ async def test_put_empty_object_uses_single_put() -> None:
197
+ """A 0-byte object stores via a single empty PUT (no multipart)."""
198
+ state = _S3State()
199
+ await _store(state).put("k/empty", _empty(), content_type="application/octet-stream")
200
+ assert state.objects["k/empty"] == b""
201
+ assert state.completed == []
202
+
203
+
204
+ @pytest.mark.asyncio
205
+ async def test_put_small_object_uses_single_put() -> None:
206
+ """An object under one part stores via a single PUT."""
207
+ state = _S3State()
208
+ await _store(state).put("k/small", _aiter(b"hello", 2), content_type="text/plain")
209
+ assert state.objects["k/small"] == b"hello"
210
+ assert state.completed == []
211
+
212
+
213
+ @pytest.mark.asyncio
214
+ async def test_put_exact_multiple_skips_empty_final_part() -> None:
215
+ """An object exactly N*part_size completes without a 0-byte final part."""
216
+ part = 5 * 1024 * 1024
217
+ state = _S3State()
218
+ payload = b"x" * (2 * part)
219
+ await _store(state, part_size_bytes=part).put(
220
+ "k/exact", _aiter(payload, part), content_type="application/octet-stream"
221
+ )
222
+ assert state.objects["k/exact"] == payload
223
+ assert state.completed == ["k/exact"]
224
+
225
+
226
+ @pytest.mark.asyncio
227
+ async def test_put_multipart_with_remainder() -> None:
228
+ """A full part plus a remainder round-trips via multipart."""
229
+ part = 5 * 1024 * 1024
230
+ state = _S3State()
231
+ payload = b"y" * (part + 1234)
232
+ await _store(state, part_size_bytes=part).put(
233
+ "k/rem", _aiter(payload, 65536), content_type="application/octet-stream"
234
+ )
235
+ assert state.objects["k/rem"] == payload
236
+
237
+
238
+ @pytest.mark.asyncio
239
+ async def test_put_single_chunk_larger_than_part() -> None:
240
+ """One giant incoming chunk is drained into multiple parts."""
241
+ part = 5 * 1024 * 1024
242
+ state = _S3State()
243
+ payload = b"z" * (3 * part)
244
+ await _store(state, part_size_bytes=part).put(
245
+ "k/big", _aiter(payload, 3 * part), content_type="application/octet-stream"
246
+ )
247
+ assert state.objects["k/big"] == payload
248
+
249
+
250
+ @pytest.mark.asyncio
251
+ async def test_put_aborts_multipart_on_mid_stream_failure() -> None:
252
+ """A failure mid-upload aborts the multipart -- no orphaned parts."""
253
+ part = 5 * 1024 * 1024
254
+ state = _S3State()
255
+ state.fail_part = 2
256
+ payload = b"w" * (3 * part)
257
+ with pytest.raises(RuntimeError, match="simulated upload_part failure"):
258
+ await _store(state, part_size_bytes=part).put(
259
+ "k/fail", _aiter(payload, part), content_type="application/octet-stream"
260
+ )
261
+ assert state.aborted == ["k/fail"]
262
+ assert "k/fail" not in state.objects
263
+
264
+
265
+ @pytest.mark.asyncio
266
+ async def test_delete_many_batches_over_the_1000_key_limit() -> None:
267
+ """>1000 keys are chunked into <=1000-key DeleteObjects requests."""
268
+ state = _S3State()
269
+ keys = [f"k/{i}" for i in range(2500)]
270
+ await _store(state).delete_many(keys)
271
+ assert [len(b) for b in state.delete_batches] == [1000, 1000, 500]
272
+ assert all(len(b) <= 1000 for b in state.delete_batches)
273
+
274
+
275
+ @pytest.mark.asyncio
276
+ async def test_delete_many_empty_is_noop() -> None:
277
+ """Deleting an empty list issues no request."""
278
+ state = _S3State()
279
+ await _store(state).delete_many([])
280
+ assert state.delete_batches == []
281
+
282
+
283
+ @pytest.mark.asyncio
284
+ async def test_list_keys_paginates_and_filters_by_prefix() -> None:
285
+ """list_keys walks every page and honors the prefix filter."""
286
+ state = _S3State()
287
+ state.page_size = 2
288
+ for i in range(5):
289
+ state.objects[f"p/{i}"] = b"x"
290
+ state.objects["other"] = b"x"
291
+ keys = [k async for k in _store(state).list_keys(prefix="p/")]
292
+ assert sorted(keys) == ["p/0", "p/1", "p/2", "p/3", "p/4"]
293
+
294
+
295
+ @pytest.mark.asyncio
296
+ async def test_list_entries_carries_key_size_and_mtime() -> None:
297
+ """list_entries paginates and yields each object's key, size, and mtime."""
298
+ state = _S3State()
299
+ state.page_size = 2
300
+ older = datetime(2021, 6, 1, tzinfo=UTC)
301
+ newer = datetime(2023, 6, 1, tzinfo=UTC)
302
+ state.objects["p/old"] = b"abc"
303
+ state.mtimes["p/old"] = older
304
+ state.objects["p/new"] = b"defgh"
305
+ state.mtimes["p/new"] = newer
306
+ state.objects["p/mid"] = b"z"
307
+ state.objects["other"] = b"x"
308
+ entries = {e.key: e async for e in _store(state).list_entries(prefix="p/")}
309
+ assert sorted(entries) == ["p/mid", "p/new", "p/old"]
310
+ assert entries["p/old"].size_bytes == 3
311
+ assert entries["p/old"].last_modified == older
312
+ assert entries["p/new"].size_bytes == 5
313
+ assert entries["p/new"].last_modified == newer
314
+ # unset mtime falls back to the fake's epoch default, never crashes
315
+ assert entries["p/mid"].last_modified == datetime(2020, 1, 1, tzinfo=UTC)
316
+
317
+
318
+ @pytest.mark.asyncio
319
+ async def test_open_read_streams_back_the_object() -> None:
320
+ """open_read yields the stored bytes in chunks."""
321
+ state = _S3State()
322
+ state.objects["k/r"] = b"abcdefgh"
323
+ chunks = [c async for c in _store(state).open_read("k/r")]
324
+ assert b"".join(chunks) == b"abcdefgh"
@@ -0,0 +1,106 @@
1
+ """Tests for build_s3_object_store -- secret-ref resolution into a store.
2
+
3
+ A capturing fake session records the kwargs the store hands to ``session.client``
4
+ when it opens a connection, so we can assert the RESOLVED credentials + the
5
+ endpoint/region config reach the S3 client without touching the store's private
6
+ attributes (the store deliberately exposes none).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from typing import Any
13
+
14
+ import pytest
15
+
16
+ from threetears.core.security.secret_refs import SecretResolutionError
17
+ from threetears.object_store.wiring import build_s3_object_store
18
+
19
+
20
+ class _NoopClient:
21
+ """An S3 client stub that satisfies the ensure_bucket() call path."""
22
+
23
+ async def __aenter__(self) -> _NoopClient:
24
+ return self
25
+
26
+ async def __aexit__(self, *exc: object) -> bool:
27
+ return False
28
+
29
+ async def head_bucket(self, **kwargs: object) -> dict[str, Any]:
30
+ return {}
31
+
32
+
33
+ class _CapturingSession:
34
+ """Fake aioboto3 session recording the kwargs of the last client() call."""
35
+
36
+ def __init__(self) -> None:
37
+ self.client_kwargs: dict[str, Any] | None = None
38
+
39
+ def client(self, *args: object, **kwargs: Any) -> _NoopClient:
40
+ self.client_kwargs = kwargs
41
+ return _NoopClient()
42
+
43
+
44
+ async def test_resolved_creds_and_config_reach_the_client(monkeypatch: pytest.MonkeyPatch) -> None:
45
+ """env:// refs resolve and flow (with endpoint/region) into session.client()."""
46
+ monkeypatch.setenv("TEST_S3_ACCESS_KEY", "AKIA-RESOLVED")
47
+ monkeypatch.setenv("TEST_S3_SECRET_KEY", "SECRET-RESOLVED")
48
+ session = _CapturingSession()
49
+ store = build_s3_object_store(
50
+ endpoint_url="http://minio:9000",
51
+ bucket="3tears-objects",
52
+ access_key_ref="env://TEST_S3_ACCESS_KEY",
53
+ secret_key_ref="env://TEST_S3_SECRET_KEY",
54
+ region="eu-west-1",
55
+ session=session,
56
+ )
57
+ # opening a client (ensure_bucket) is what hands the resolved creds to aioboto3.
58
+ await store.ensure_bucket()
59
+ assert session.client_kwargs is not None
60
+ assert session.client_kwargs["aws_access_key_id"] == "AKIA-RESOLVED"
61
+ assert session.client_kwargs["aws_secret_access_key"] == "SECRET-RESOLVED"
62
+ assert session.client_kwargs["endpoint_url"] == "http://minio:9000"
63
+ assert session.client_kwargs["region_name"] == "eu-west-1"
64
+
65
+
66
+ def test_no_credential_value_is_logged(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture) -> None:
67
+ """The build log emits config shape only -- never the resolved secret values."""
68
+ monkeypatch.setenv("TEST_AK", "AKIA-SUPERSECRET-VALUE")
69
+ monkeypatch.setenv("TEST_SK", "SK-SUPERSECRET-VALUE")
70
+ with caplog.at_level(logging.DEBUG, logger="threetears.object_store.wiring"):
71
+ build_s3_object_store(
72
+ endpoint_url="http://minio:9000",
73
+ bucket="b",
74
+ access_key_ref="env://TEST_AK",
75
+ secret_key_ref="env://TEST_SK",
76
+ )
77
+ blob = " ".join(
78
+ [r.getMessage() for r in caplog.records] + [repr(getattr(r, "extra_data", None)) for r in caplog.records]
79
+ )
80
+ assert "AKIA-SUPERSECRET-VALUE" not in blob
81
+ assert "SK-SUPERSECRET-VALUE" not in blob
82
+ # sanity: the build log actually fired, so the assertion above is not vacuous.
83
+ assert any("built S3 object store" in r.getMessage() for r in caplog.records)
84
+
85
+
86
+ def test_unknown_scheme_ref_raises(monkeypatch: pytest.MonkeyPatch) -> None:
87
+ """A credential ref naming an unknown scheme fails closed at build time."""
88
+ monkeypatch.setenv("TEST_S3_SECRET_KEY", "ok")
89
+ with pytest.raises(SecretResolutionError):
90
+ build_s3_object_store(
91
+ endpoint_url=None,
92
+ bucket="b",
93
+ access_key_ref="bogus://nope",
94
+ secret_key_ref="env://TEST_S3_SECRET_KEY",
95
+ )
96
+
97
+
98
+ def test_missing_env_ref_raises() -> None:
99
+ """An env:// ref pointing at an unset variable fails closed at build time."""
100
+ with pytest.raises(SecretResolutionError):
101
+ build_s3_object_store(
102
+ endpoint_url=None,
103
+ bucket="b",
104
+ access_key_ref="env://DEFINITELY_UNSET_S3_KEY_XYZ",
105
+ secret_key_ref="env://ALSO_UNSET_S3_SECRET_XYZ",
106
+ )