3tears-object-store 0.14.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- 3tears_object_store-0.14.0/.gitignore +216 -0
- 3tears_object_store-0.14.0/LICENSE +21 -0
- 3tears_object_store-0.14.0/PKG-INFO +51 -0
- 3tears_object_store-0.14.0/README.md +29 -0
- 3tears_object_store-0.14.0/pyproject.toml +44 -0
- 3tears_object_store-0.14.0/src/threetears/object_store/__init__.py +11 -0
- 3tears_object_store-0.14.0/src/threetears/object_store/py.typed +0 -0
- 3tears_object_store-0.14.0/src/threetears/object_store/s3.py +354 -0
- 3tears_object_store-0.14.0/src/threetears/object_store/wiring.py +81 -0
- 3tears_object_store-0.14.0/tests/integration/test_s3_objectstore.py +108 -0
- 3tears_object_store-0.14.0/tests/unit/test_s3_streaming.py +324 -0
- 3tears_object_store-0.14.0/tests/unit/test_wiring.py +106 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
# .vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
|
|
194
|
+
# PyPI configuration file
|
|
195
|
+
.pypirc
|
|
196
|
+
|
|
197
|
+
# Cursor
|
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
201
|
+
.cursorignore
|
|
202
|
+
.cursorindexingignore
|
|
203
|
+
|
|
204
|
+
# Marimo
|
|
205
|
+
marimo/_static/
|
|
206
|
+
marimo/_lsp/
|
|
207
|
+
__marimo__/
|
|
208
|
+
|
|
209
|
+
# Claude Code local state
|
|
210
|
+
.claude/
|
|
211
|
+
|
|
212
|
+
# prawduct session evidence (local governance artifacts, never shipped)
|
|
213
|
+
.prawduct/
|
|
214
|
+
|
|
215
|
+
# macOS folder metadata
|
|
216
|
+
.DS_Store
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mark Pace
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: 3tears-object-store
|
|
3
|
+
Version: 0.14.0
|
|
4
|
+
Summary: Streaming S3-compatible object store for large binary artifacts (Path-2)
|
|
5
|
+
Project-URL: Repository, https://github.com/pacepace/3tears
|
|
6
|
+
Author: pace
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Framework :: AsyncIO
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.14
|
|
17
|
+
Requires-Dist: 3tears
|
|
18
|
+
Requires-Dist: 3tears-media-contracts
|
|
19
|
+
Requires-Dist: 3tears-observe
|
|
20
|
+
Requires-Dist: aioboto3>=13
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# 3tears-object-store
|
|
24
|
+
|
|
25
|
+
Streaming S3-compatible object store for large binary artifacts (Path-2 of
|
|
26
|
+
the scope-and-objects design): pcaps, DB dumps, rendered reports, evidence.
|
|
27
|
+
|
|
28
|
+
Implements the dependency-free `ObjectStore` protocol from
|
|
29
|
+
`3tears-media-contracts` over any S3-compatible backend (MinIO in dev, S3 in
|
|
30
|
+
prod). **Streaming by contract** — uploads move through one part-size buffer
|
|
31
|
+
at a time via S3 multipart; downloads yield the response body in chunks — so
|
|
32
|
+
a multi-GB object never has to sit whole in a pod's memory.
|
|
33
|
+
|
|
34
|
+
Keys follow the platform's locked scope-first scheme (`keys.build_object_key`):
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
<customer_id>/<scope>/<category>/<YYYY>/<MM>/<DD>/<object_id>/<filename>
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Lifted from metallm's `S3Service` and made streaming.
|
|
41
|
+
|
|
42
|
+
## Dependency note
|
|
43
|
+
|
|
44
|
+
`aioboto3` (the async S3 client) tracks `aiobotocore`, which caps `botocore`
|
|
45
|
+
below the latest sync-`boto3` release. Adding this package therefore pins the
|
|
46
|
+
workspace's `botocore`/`boto3` lower and transitively pulls `wrapt` and `lxml`
|
|
47
|
+
down a major version. That cap is inherent to using an async S3 client and is
|
|
48
|
+
accepted — the full 3tears suite is green under the resolved set. If any
|
|
49
|
+
package comes to rely on `wrapt>=2` or `lxml>=6` behavior, add an explicit
|
|
50
|
+
lower bound at the workspace level so resolution fails loudly instead of
|
|
51
|
+
silently regressing.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# 3tears-object-store
|
|
2
|
+
|
|
3
|
+
Streaming S3-compatible object store for large binary artifacts (Path-2 of
|
|
4
|
+
the scope-and-objects design): pcaps, DB dumps, rendered reports, evidence.
|
|
5
|
+
|
|
6
|
+
Implements the dependency-free `ObjectStore` protocol from
|
|
7
|
+
`3tears-media-contracts` over any S3-compatible backend (MinIO in dev, S3 in
|
|
8
|
+
prod). **Streaming by contract** — uploads move through one part-size buffer
|
|
9
|
+
at a time via S3 multipart; downloads yield the response body in chunks — so
|
|
10
|
+
a multi-GB object never has to sit whole in a pod's memory.
|
|
11
|
+
|
|
12
|
+
Keys follow the platform's locked scope-first scheme (`keys.build_object_key`):
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
<customer_id>/<scope>/<category>/<YYYY>/<MM>/<DD>/<object_id>/<filename>
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Lifted from metallm's `S3Service` and made streaming.
|
|
19
|
+
|
|
20
|
+
## Dependency note
|
|
21
|
+
|
|
22
|
+
`aioboto3` (the async S3 client) tracks `aiobotocore`, which caps `botocore`
|
|
23
|
+
below the latest sync-`boto3` release. Adding this package therefore pins the
|
|
24
|
+
workspace's `botocore`/`boto3` lower and transitively pulls `wrapt` and `lxml`
|
|
25
|
+
down a major version. That cap is inherent to using an async S3 client and is
|
|
26
|
+
accepted — the full 3tears suite is green under the resolved set. If any
|
|
27
|
+
package comes to rely on `wrapt>=2` or `lxml>=6` behavior, add an explicit
|
|
28
|
+
lower bound at the workspace level so resolution fails loudly instead of
|
|
29
|
+
silently regressing.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "3tears-object-store"
|
|
7
|
+
version = "0.14.0"
|
|
8
|
+
description = "Streaming S3-compatible object store for large binary artifacts (Path-2)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.14"
|
|
11
|
+
authors = [{name = "pace"}]
|
|
12
|
+
license = "MIT"
|
|
13
|
+
license-files = ["LICENSE"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Framework :: AsyncIO",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.14",
|
|
20
|
+
"Topic :: Software Development :: Libraries",
|
|
21
|
+
"Typing :: Typed",
|
|
22
|
+
]
|
|
23
|
+
# the contract is dependency-free (media-contracts); this impl package
|
|
24
|
+
# isolates the aioboto3 (-> aiobotocore -> botocore) S3 client tree so it
|
|
25
|
+
# never leaks into the contract or its other consumers. core (``3tears``) is
|
|
26
|
+
# pulled in for the secret_refs resolver used by the wiring helper -- core is
|
|
27
|
+
# foundational + acyclic and brings no aioboto3, so the isolation still holds.
|
|
28
|
+
dependencies = [
|
|
29
|
+
"3tears",
|
|
30
|
+
"3tears-media-contracts",
|
|
31
|
+
"3tears-observe",
|
|
32
|
+
"aioboto3>=13",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Repository = "https://github.com/pacepace/3tears"
|
|
37
|
+
|
|
38
|
+
[tool.uv.sources]
|
|
39
|
+
3tears = { workspace = true }
|
|
40
|
+
3tears-media-contracts = { workspace = true }
|
|
41
|
+
3tears-observe = { workspace = true }
|
|
42
|
+
|
|
43
|
+
[tool.hatch.build.targets.wheel]
|
|
44
|
+
packages = ["src/threetears"]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Streaming S3-compatible object store for large binary artifacts (Path-2)."""
|
|
2
|
+
|
|
3
|
+
# the key builder is a CONTRACT (the locked scope-first layout), so it lives in
|
|
4
|
+
# the dependency-free media-contracts package -- a producing tool can build a key
|
|
5
|
+
# without inheriting this package's aioboto3 client tree. re-exported here for
|
|
6
|
+
# back-compat with callers importing it off the impl package.
|
|
7
|
+
from threetears.media.contracts.keys import build_object_key, sanitize_segment
|
|
8
|
+
from threetears.object_store.s3 import S3ObjectStore
|
|
9
|
+
from threetears.object_store.wiring import build_s3_object_store
|
|
10
|
+
|
|
11
|
+
__all__ = ["S3ObjectStore", "build_object_key", "build_s3_object_store", "sanitize_segment"]
|
|
File without changes
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Streaming S3-compatible object store (aioboto3).
|
|
2
|
+
|
|
3
|
+
Implements :class:`threetears.media.contracts.ObjectStore` over any
|
|
4
|
+
S3-compatible backend (MinIO in dev, S3 in prod). Never buffers a whole
|
|
5
|
+
object: uploads stream through one part-size buffer at a time via S3
|
|
6
|
+
multipart (or a single PUT when the whole object fits one part); downloads
|
|
7
|
+
yield the response body in chunks. Lifted from metallm's ``S3Service`` and
|
|
8
|
+
made streaming.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections.abc import AsyncIterator
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import aioboto3 # type: ignore[import-untyped]
|
|
17
|
+
from botocore.config import Config as BotoConfig # type: ignore[import-untyped]
|
|
18
|
+
from botocore.exceptions import ClientError # type: ignore[import-untyped]
|
|
19
|
+
from threetears.media.contracts import ObjectListing, ObjectStore
|
|
20
|
+
from threetears.observe import get_logger
|
|
21
|
+
|
|
22
|
+
__all__ = ["S3ObjectStore"]
|
|
23
|
+
|
|
24
|
+
log = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
#: S3 multipart parts must be >= 5 MiB (except the final part). The default
|
|
27
|
+
#: part size doubles as the upload buffer ceiling -- one part-size buffer is
|
|
28
|
+
#: the most memory a single ``put`` holds, regardless of total object size.
|
|
29
|
+
_MIN_PART_SIZE = 5 * 1024 * 1024
|
|
30
|
+
_DEFAULT_PART_SIZE = 8 * 1024 * 1024
|
|
31
|
+
|
|
32
|
+
#: streamed-download chunk size.
|
|
33
|
+
_DOWNLOAD_CHUNK_SIZE = 1024 * 1024
|
|
34
|
+
|
|
35
|
+
#: S3 ``DeleteObjects`` accepts at most 1000 keys per request; the reconciler
|
|
36
|
+
#: sweep batches to this ceiling.
|
|
37
|
+
_DELETE_BATCH_SIZE = 1000
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class S3ObjectStore:
|
|
41
|
+
"""Streaming ObjectStore over an S3-compatible backend.
|
|
42
|
+
|
|
43
|
+
:param endpoint_url: S3 endpoint (e.g. ``http://minio:9000``); ``None``
|
|
44
|
+
uses the AWS default endpoint
|
|
45
|
+
:ptype endpoint_url: str | None
|
|
46
|
+
:param access_key: access key id
|
|
47
|
+
:ptype access_key: str
|
|
48
|
+
:param secret_key: secret access key
|
|
49
|
+
:ptype secret_key: str
|
|
50
|
+
:param bucket: target bucket name
|
|
51
|
+
:ptype bucket: str
|
|
52
|
+
:param region: AWS region (MinIO ignores it; AWS S3 requires it)
|
|
53
|
+
:ptype region: str
|
|
54
|
+
:param part_size_bytes: multipart part size / upload buffer ceiling;
|
|
55
|
+
must be >= 5 MiB
|
|
56
|
+
:ptype part_size_bytes: int
|
|
57
|
+
:param session: aioboto3 session to use; defaults to a fresh
|
|
58
|
+
``aioboto3.Session()``. Injectable so tests can supply a fake client.
|
|
59
|
+
:ptype session: Any
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
*,
|
|
65
|
+
endpoint_url: str | None,
|
|
66
|
+
access_key: str,
|
|
67
|
+
secret_key: str,
|
|
68
|
+
bucket: str,
|
|
69
|
+
region: str = "us-east-1",
|
|
70
|
+
part_size_bytes: int = _DEFAULT_PART_SIZE,
|
|
71
|
+
session: Any = None,
|
|
72
|
+
) -> None:
|
|
73
|
+
if part_size_bytes < _MIN_PART_SIZE:
|
|
74
|
+
raise ValueError("part_size_bytes must be >= 5 MiB (S3 multipart minimum)")
|
|
75
|
+
self._endpoint_url = endpoint_url
|
|
76
|
+
self._access_key = access_key
|
|
77
|
+
self._secret_key = secret_key
|
|
78
|
+
self._bucket = bucket
|
|
79
|
+
self._region = region
|
|
80
|
+
self._part_size = part_size_bytes
|
|
81
|
+
self._session = session if session is not None else aioboto3.Session()
|
|
82
|
+
|
|
83
|
+
def _client(self) -> Any:
|
|
84
|
+
"""Return an async-context-manager S3 client.
|
|
85
|
+
|
|
86
|
+
:return: aioboto3 client context manager
|
|
87
|
+
:rtype: Any
|
|
88
|
+
"""
|
|
89
|
+
return self._session.client(
|
|
90
|
+
"s3",
|
|
91
|
+
endpoint_url=self._endpoint_url,
|
|
92
|
+
aws_access_key_id=self._access_key,
|
|
93
|
+
aws_secret_access_key=self._secret_key,
|
|
94
|
+
region_name=self._region,
|
|
95
|
+
config=BotoConfig(signature_version="s3v4"),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
async def ensure_bucket(self) -> None:
|
|
99
|
+
"""Create the configured bucket if it does not already exist.
|
|
100
|
+
|
|
101
|
+
:return: nothing
|
|
102
|
+
:rtype: None
|
|
103
|
+
"""
|
|
104
|
+
async with self._client() as client:
|
|
105
|
+
try:
|
|
106
|
+
await client.head_bucket(Bucket=self._bucket)
|
|
107
|
+
except ClientError as err:
|
|
108
|
+
code = str(err.response.get("Error", {}).get("Code", ""))
|
|
109
|
+
if code not in ("404", "NoSuchBucket", "NotFound"):
|
|
110
|
+
raise
|
|
111
|
+
await client.create_bucket(Bucket=self._bucket)
|
|
112
|
+
log.info(
|
|
113
|
+
"object store bucket created",
|
|
114
|
+
extra={"extra_data": {"bucket": self._bucket}},
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
async def put(
|
|
118
|
+
self,
|
|
119
|
+
key: str,
|
|
120
|
+
body: AsyncIterator[bytes],
|
|
121
|
+
*,
|
|
122
|
+
content_type: str,
|
|
123
|
+
size: int | None = None,
|
|
124
|
+
) -> None:
|
|
125
|
+
"""Stream ``body`` to ``key``.
|
|
126
|
+
|
|
127
|
+
Peak memory is one part plus the latest incoming chunk -- bounded
|
|
128
|
+
independent of total object size (a multi-GB object never sits whole
|
|
129
|
+
in memory). A single PUT is used when the whole object fits one
|
|
130
|
+
part, otherwise S3 multipart. On any failure the partial multipart
|
|
131
|
+
upload is aborted so no orphaned parts linger.
|
|
132
|
+
|
|
133
|
+
:param key: tenant-scoped object key
|
|
134
|
+
:ptype key: str
|
|
135
|
+
:param body: async iterator yielding the object's bytes in chunks
|
|
136
|
+
:ptype body: AsyncIterator[bytes]
|
|
137
|
+
:param content_type: MIME type stored on the object
|
|
138
|
+
:ptype content_type: str
|
|
139
|
+
:param size: total byte length when known (advisory; the impl
|
|
140
|
+
streams regardless)
|
|
141
|
+
:ptype size: int | None
|
|
142
|
+
:return: nothing
|
|
143
|
+
:rtype: None
|
|
144
|
+
"""
|
|
145
|
+
async with self._client() as client:
|
|
146
|
+
buffer = bytearray()
|
|
147
|
+
upload_id: str | None = None
|
|
148
|
+
parts: list[dict[str, Any]] = []
|
|
149
|
+
part_number = 1
|
|
150
|
+
completed = False
|
|
151
|
+
try:
|
|
152
|
+
async for chunk in body:
|
|
153
|
+
buffer.extend(chunk)
|
|
154
|
+
while len(buffer) >= self._part_size:
|
|
155
|
+
if upload_id is None:
|
|
156
|
+
created = await client.create_multipart_upload(
|
|
157
|
+
Bucket=self._bucket,
|
|
158
|
+
Key=key,
|
|
159
|
+
ContentType=content_type,
|
|
160
|
+
)
|
|
161
|
+
upload_id = created["UploadId"]
|
|
162
|
+
part = bytes(buffer[: self._part_size])
|
|
163
|
+
del buffer[: self._part_size]
|
|
164
|
+
resp = await client.upload_part(
|
|
165
|
+
Bucket=self._bucket,
|
|
166
|
+
Key=key,
|
|
167
|
+
PartNumber=part_number,
|
|
168
|
+
UploadId=upload_id,
|
|
169
|
+
Body=part,
|
|
170
|
+
)
|
|
171
|
+
parts.append({"ETag": resp["ETag"], "PartNumber": part_number})
|
|
172
|
+
part_number += 1
|
|
173
|
+
if upload_id is None:
|
|
174
|
+
await client.put_object(
|
|
175
|
+
Bucket=self._bucket,
|
|
176
|
+
Key=key,
|
|
177
|
+
Body=bytes(buffer),
|
|
178
|
+
ContentType=content_type,
|
|
179
|
+
)
|
|
180
|
+
else:
|
|
181
|
+
if buffer:
|
|
182
|
+
resp = await client.upload_part(
|
|
183
|
+
Bucket=self._bucket,
|
|
184
|
+
Key=key,
|
|
185
|
+
PartNumber=part_number,
|
|
186
|
+
UploadId=upload_id,
|
|
187
|
+
Body=bytes(buffer),
|
|
188
|
+
)
|
|
189
|
+
parts.append({"ETag": resp["ETag"], "PartNumber": part_number})
|
|
190
|
+
await client.complete_multipart_upload(
|
|
191
|
+
Bucket=self._bucket,
|
|
192
|
+
Key=key,
|
|
193
|
+
UploadId=upload_id,
|
|
194
|
+
MultipartUpload={"Parts": parts},
|
|
195
|
+
)
|
|
196
|
+
completed = True
|
|
197
|
+
finally:
|
|
198
|
+
if upload_id is not None and not completed:
|
|
199
|
+
try:
|
|
200
|
+
await client.abort_multipart_upload(Bucket=self._bucket, Key=key, UploadId=upload_id)
|
|
201
|
+
log.info(
|
|
202
|
+
"aborted partial multipart upload after error",
|
|
203
|
+
extra={"extra_data": {"key": key, "upload_id": upload_id}},
|
|
204
|
+
)
|
|
205
|
+
except ClientError as abort_err:
|
|
206
|
+
log.warning(
|
|
207
|
+
"failed to abort multipart upload after error",
|
|
208
|
+
extra={
|
|
209
|
+
"extra_data": {
|
|
210
|
+
"key": key,
|
|
211
|
+
"upload_id": upload_id,
|
|
212
|
+
"error": str(abort_err),
|
|
213
|
+
}
|
|
214
|
+
},
|
|
215
|
+
)
|
|
216
|
+
log.debug(
|
|
217
|
+
"object stored",
|
|
218
|
+
extra={
|
|
219
|
+
"extra_data": {
|
|
220
|
+
"key": key,
|
|
221
|
+
"multipart": upload_id is not None,
|
|
222
|
+
"parts": len(parts),
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
async def open_read(self, key: str) -> AsyncIterator[bytes]:
|
|
228
|
+
"""Open ``key`` for streaming read, yielding bytes in chunks.
|
|
229
|
+
|
|
230
|
+
:param key: object key
|
|
231
|
+
:ptype key: str
|
|
232
|
+
:return: async iterator over the object's bytes
|
|
233
|
+
:rtype: AsyncIterator[bytes]
|
|
234
|
+
"""
|
|
235
|
+
async with self._client() as client:
|
|
236
|
+
resp = await client.get_object(Bucket=self._bucket, Key=key)
|
|
237
|
+
async for chunk in resp["Body"].iter_chunks(_DOWNLOAD_CHUNK_SIZE):
|
|
238
|
+
yield chunk
|
|
239
|
+
|
|
240
|
+
async def delete(self, key: str) -> None:
|
|
241
|
+
"""Delete a single object.
|
|
242
|
+
|
|
243
|
+
:param key: object key
|
|
244
|
+
:ptype key: str
|
|
245
|
+
:return: nothing
|
|
246
|
+
:rtype: None
|
|
247
|
+
"""
|
|
248
|
+
async with self._client() as client:
|
|
249
|
+
await client.delete_object(Bucket=self._bucket, Key=key)
|
|
250
|
+
|
|
251
|
+
async def delete_many(self, keys: list[str]) -> None:
|
|
252
|
+
"""Delete many objects, batched to S3's 1000-key request limit.
|
|
253
|
+
|
|
254
|
+
The reconciler sweep can exceed 1000 keys, so deletes are chunked
|
|
255
|
+
into ``_DELETE_BATCH_SIZE`` requests rather than one oversized call
|
|
256
|
+
S3/MinIO would reject.
|
|
257
|
+
|
|
258
|
+
:param keys: object keys to delete
|
|
259
|
+
:ptype keys: list[str]
|
|
260
|
+
:return: nothing
|
|
261
|
+
:rtype: None
|
|
262
|
+
"""
|
|
263
|
+
if keys:
|
|
264
|
+
async with self._client() as client:
|
|
265
|
+
for start in range(0, len(keys), _DELETE_BATCH_SIZE):
|
|
266
|
+
batch = keys[start : start + _DELETE_BATCH_SIZE]
|
|
267
|
+
await client.delete_objects(
|
|
268
|
+
Bucket=self._bucket,
|
|
269
|
+
Delete={
|
|
270
|
+
"Objects": [{"Key": k} for k in batch],
|
|
271
|
+
"Quiet": True,
|
|
272
|
+
},
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
async def _iter_contents(self, prefix: str | None) -> AsyncIterator[dict[str, Any]]:
|
|
276
|
+
"""Yield each ``Contents`` entry across every listing page.
|
|
277
|
+
|
|
278
|
+
Shared pagination for :meth:`list_keys` and :meth:`list_entries` so the
|
|
279
|
+
continuation-token walk lives in one place.
|
|
280
|
+
|
|
281
|
+
:param prefix: key-prefix filter, or ``None`` for the whole bucket
|
|
282
|
+
:ptype prefix: str | None
|
|
283
|
+
:return: async iterator over raw ``list_objects_v2`` ``Contents`` dicts
|
|
284
|
+
:rtype: AsyncIterator[dict[str, Any]]
|
|
285
|
+
"""
|
|
286
|
+
async with self._client() as client:
|
|
287
|
+
token: str | None = None
|
|
288
|
+
while True:
|
|
289
|
+
kwargs: dict[str, Any] = {"Bucket": self._bucket}
|
|
290
|
+
if prefix is not None:
|
|
291
|
+
kwargs["Prefix"] = prefix
|
|
292
|
+
if token is not None:
|
|
293
|
+
kwargs["ContinuationToken"] = token
|
|
294
|
+
resp = await client.list_objects_v2(**kwargs)
|
|
295
|
+
for obj in resp.get("Contents", []):
|
|
296
|
+
yield obj
|
|
297
|
+
if not resp.get("IsTruncated"):
|
|
298
|
+
break
|
|
299
|
+
token = resp.get("NextContinuationToken")
|
|
300
|
+
|
|
301
|
+
async def list_keys(self, prefix: str | None = None) -> AsyncIterator[str]:
|
|
302
|
+
"""Yield object keys (paginated), optionally restricted to ``prefix``.
|
|
303
|
+
|
|
304
|
+
:param prefix: key-prefix filter (e.g. a tenant's ``<customer_id>/``);
|
|
305
|
+
``None`` lists the whole bucket
|
|
306
|
+
:ptype prefix: str | None
|
|
307
|
+
:return: async iterator over object keys
|
|
308
|
+
:rtype: AsyncIterator[str]
|
|
309
|
+
"""
|
|
310
|
+
async for obj in self._iter_contents(prefix):
|
|
311
|
+
yield obj["Key"]
|
|
312
|
+
|
|
313
|
+
async def list_entries(self, prefix: str | None = None) -> AsyncIterator[ObjectListing]:
|
|
314
|
+
"""Yield object listings (key + last-modified + size), optionally by ``prefix``.
|
|
315
|
+
|
|
316
|
+
Carries the ``LastModified`` + ``Size`` metadata S3 already returns on a
|
|
317
|
+
list so the reconciler can judge orphan age without a per-key HEAD.
|
|
318
|
+
|
|
319
|
+
:param prefix: key-prefix filter (e.g. a tenant's ``<customer_id>/``);
|
|
320
|
+
``None`` lists the whole bucket
|
|
321
|
+
:ptype prefix: str | None
|
|
322
|
+
:return: async iterator over object listings
|
|
323
|
+
:rtype: AsyncIterator[ObjectListing]
|
|
324
|
+
"""
|
|
325
|
+
async for obj in self._iter_contents(prefix):
|
|
326
|
+
yield ObjectListing(
|
|
327
|
+
key=obj["Key"],
|
|
328
|
+
last_modified=obj["LastModified"],
|
|
329
|
+
size_bytes=int(obj["Size"]),
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
async def presigned_get_url(self, key: str, *, expires_in: int = 300) -> str:
|
|
333
|
+
"""Presigned GET URL for delivery -- bytes never cross the agent.
|
|
334
|
+
|
|
335
|
+
:param key: object key
|
|
336
|
+
:ptype key: str
|
|
337
|
+
:param expires_in: URL validity in seconds
|
|
338
|
+
:ptype expires_in: int
|
|
339
|
+
:return: presigned URL
|
|
340
|
+
:rtype: str
|
|
341
|
+
"""
|
|
342
|
+
async with self._client() as client:
|
|
343
|
+
url: str = await client.generate_presigned_url(
|
|
344
|
+
"get_object",
|
|
345
|
+
Params={"Bucket": self._bucket, "Key": key},
|
|
346
|
+
ExpiresIn=expires_in,
|
|
347
|
+
)
|
|
348
|
+
return url
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
#: static conformance guarantee -- S3ObjectStore must satisfy the ObjectStore
|
|
352
|
+
#: contract this package exists to implement. mypy verifies the structural
|
|
353
|
+
#: match here; a missing or mismatched method fails type-checking.
|
|
354
|
+
_OBJECTSTORE_IMPL: type[ObjectStore] = S3ObjectStore
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Build a configured :class:`S3ObjectStore` from config + secret references.
|
|
2
|
+
|
|
3
|
+
A producing or consuming pod wires its object store from deployment config:
|
|
4
|
+
``endpoint_url`` + ``bucket`` + ``region`` as plain values, and the S3
|
|
5
|
+
credentials as platform *secret references* (``env://`` in dev, ``k8s://`` in
|
|
6
|
+
prod) that this helper resolves at construction via
|
|
7
|
+
:func:`threetears.core.security.secret_refs.resolve_secret`. The raw
|
|
8
|
+
credentials are unwrapped at the last moment and live only inside the returned
|
|
9
|
+
store -- never logged, never returned, never held in a plain string here.
|
|
10
|
+
|
|
11
|
+
This lives beside the impl (not in a pod) so every pod -- the pure-``threetears``
|
|
12
|
+
tool pod, an SDK-spawned pod, the reconciler -- wires its store the same tested
|
|
13
|
+
way rather than re-resolving refs by hand.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from threetears.core.security.secret_refs import resolve_secret
|
|
21
|
+
from threetears.observe import get_logger
|
|
22
|
+
from threetears.object_store.s3 import S3ObjectStore
|
|
23
|
+
|
|
24
|
+
__all__ = ["build_s3_object_store"]
|
|
25
|
+
|
|
26
|
+
_log = get_logger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_s3_object_store(
|
|
30
|
+
*,
|
|
31
|
+
endpoint_url: str | None,
|
|
32
|
+
bucket: str,
|
|
33
|
+
access_key_ref: str,
|
|
34
|
+
secret_key_ref: str,
|
|
35
|
+
region: str = "us-east-1",
|
|
36
|
+
session: Any = None,
|
|
37
|
+
) -> S3ObjectStore:
|
|
38
|
+
"""Resolve the credential references and construct a streaming store.
|
|
39
|
+
|
|
40
|
+
:param endpoint_url: S3 endpoint (e.g. ``http://minio:9000``); ``None`` uses
|
|
41
|
+
the AWS default endpoint
|
|
42
|
+
:ptype endpoint_url: str | None
|
|
43
|
+
:param bucket: target bucket name
|
|
44
|
+
:ptype bucket: str
|
|
45
|
+
:param access_key_ref: secret reference for the access key id
|
|
46
|
+
(``env://VAR`` / ``k8s://path``); resolved here
|
|
47
|
+
:ptype access_key_ref: str
|
|
48
|
+
:param secret_key_ref: secret reference for the secret access key; resolved here
|
|
49
|
+
:ptype secret_key_ref: str
|
|
50
|
+
:param region: AWS region (MinIO ignores it; AWS S3 requires it)
|
|
51
|
+
:ptype region: str
|
|
52
|
+
:param session: aioboto3 session passthrough for tests; ``None`` lets the
|
|
53
|
+
store create its own
|
|
54
|
+
:ptype session: Any
|
|
55
|
+
:return: a streaming object store ready to put/get/delete
|
|
56
|
+
:rtype: S3ObjectStore
|
|
57
|
+
:raises SecretResolutionError: when either credential reference is malformed,
|
|
58
|
+
names an unknown/unimplemented scheme, or cannot be resolved
|
|
59
|
+
"""
|
|
60
|
+
access_key = resolve_secret(access_key_ref).get_secret_value()
|
|
61
|
+
secret_key = resolve_secret(secret_key_ref).get_secret_value()
|
|
62
|
+
store = S3ObjectStore(
|
|
63
|
+
endpoint_url=endpoint_url,
|
|
64
|
+
access_key=access_key,
|
|
65
|
+
secret_key=secret_key,
|
|
66
|
+
bucket=bucket,
|
|
67
|
+
region=region,
|
|
68
|
+
session=session,
|
|
69
|
+
)
|
|
70
|
+
_log.info(
|
|
71
|
+
"built S3 object store",
|
|
72
|
+
extra={
|
|
73
|
+
"extra_data": {
|
|
74
|
+
# config shape only -- never the resolved credential values.
|
|
75
|
+
"bucket": bucket,
|
|
76
|
+
"region": region,
|
|
77
|
+
"endpoint_configured": endpoint_url is not None,
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
)
|
|
81
|
+
return store
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Live integration tests for S3ObjectStore against a running MinIO.
|
|
2
|
+
|
|
3
|
+
Marked ``integration`` so the default unit run excludes them. Defaults
|
|
4
|
+
target the dev MinIO from the compose stack (localhost:9000, minioadmin,
|
|
5
|
+
bucket ``3tears-objects``); override via ``OBJECT_STORE_*`` env vars.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from collections.abc import AsyncIterator
|
|
12
|
+
|
|
13
|
+
import pytest
|
|
14
|
+
|
|
15
|
+
from threetears.object_store.s3 import S3ObjectStore
|
|
16
|
+
|
|
17
|
+
pytestmark = pytest.mark.integration
|
|
18
|
+
|
|
19
|
+
_ENDPOINT = os.environ.get("OBJECT_STORE_ENDPOINT", "http://localhost:9000")
|
|
20
|
+
_ACCESS = os.environ.get("OBJECT_STORE_ACCESS_KEY", "minioadmin")
|
|
21
|
+
_SECRET = os.environ.get("OBJECT_STORE_SECRET_KEY", "minioadmin")
|
|
22
|
+
_BUCKET = os.environ.get("OBJECT_STORE_BUCKET", "3tears-objects")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _store(part_size_bytes: int = 8 * 1024 * 1024) -> S3ObjectStore:
|
|
26
|
+
"""Build a store pointed at the dev MinIO.
|
|
27
|
+
|
|
28
|
+
:param part_size_bytes: multipart part size
|
|
29
|
+
:ptype part_size_bytes: int
|
|
30
|
+
:return: configured store
|
|
31
|
+
:rtype: S3ObjectStore
|
|
32
|
+
"""
|
|
33
|
+
return S3ObjectStore(
|
|
34
|
+
endpoint_url=_ENDPOINT,
|
|
35
|
+
access_key=_ACCESS,
|
|
36
|
+
secret_key=_SECRET,
|
|
37
|
+
bucket=_BUCKET,
|
|
38
|
+
part_size_bytes=part_size_bytes,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
async def _collect(stream: AsyncIterator[bytes]) -> bytes:
|
|
43
|
+
"""Drain a byte stream into one buffer (test helper only).
|
|
44
|
+
|
|
45
|
+
:param stream: async byte stream
|
|
46
|
+
:ptype stream: AsyncIterator[bytes]
|
|
47
|
+
:return: full content
|
|
48
|
+
:rtype: bytes
|
|
49
|
+
"""
|
|
50
|
+
out = bytearray()
|
|
51
|
+
async for chunk in stream:
|
|
52
|
+
out.extend(chunk)
|
|
53
|
+
return bytes(out)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
async def _aiter(data: bytes, chunk: int) -> AsyncIterator[bytes]:
|
|
57
|
+
"""Yield ``data`` in ``chunk``-sized pieces as an async iterator.
|
|
58
|
+
|
|
59
|
+
:param data: source bytes
|
|
60
|
+
:ptype data: bytes
|
|
61
|
+
:param chunk: chunk size
|
|
62
|
+
:ptype chunk: int
|
|
63
|
+
:return: async byte stream
|
|
64
|
+
:rtype: AsyncIterator[bytes]
|
|
65
|
+
"""
|
|
66
|
+
for i in range(0, len(data), chunk):
|
|
67
|
+
yield data[i : i + chunk]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.mark.asyncio
|
|
71
|
+
async def test_put_get_delete_roundtrip_small() -> None:
|
|
72
|
+
"""A small object round-trips via single PUT + streamed read + presign."""
|
|
73
|
+
store = _store()
|
|
74
|
+
key = "itest/small.txt"
|
|
75
|
+
payload = b"hello streaming object store"
|
|
76
|
+
|
|
77
|
+
await store.put(key, _aiter(payload, 4), content_type="text/plain")
|
|
78
|
+
got = await _collect(store.open_read(key))
|
|
79
|
+
assert got == payload
|
|
80
|
+
|
|
81
|
+
url = await store.presigned_get_url(key)
|
|
82
|
+
assert "itest/small.txt" in url
|
|
83
|
+
|
|
84
|
+
keys = [k async for k in store.list_keys(prefix="itest/")]
|
|
85
|
+
assert key in keys
|
|
86
|
+
|
|
87
|
+
await store.delete(key)
|
|
88
|
+
after = [k async for k in store.list_keys(prefix="itest/")]
|
|
89
|
+
assert key not in after
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@pytest.mark.asyncio
|
|
93
|
+
async def test_put_get_roundtrip_multipart_large() -> None:
|
|
94
|
+
"""An object larger than one part round-trips via multipart upload."""
|
|
95
|
+
store = _store(part_size_bytes=5 * 1024 * 1024)
|
|
96
|
+
key = "itest/large.bin"
|
|
97
|
+
payload = os.urandom(12 * 1024 * 1024) # 12 MiB -> 3 parts at 5 MiB
|
|
98
|
+
|
|
99
|
+
await store.put(
|
|
100
|
+
key,
|
|
101
|
+
_aiter(payload, 1024 * 1024),
|
|
102
|
+
content_type="application/octet-stream",
|
|
103
|
+
size=len(payload),
|
|
104
|
+
)
|
|
105
|
+
got = await _collect(store.open_read(key))
|
|
106
|
+
assert got == payload
|
|
107
|
+
|
|
108
|
+
await store.delete(key)
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""Unit tests for S3ObjectStore streaming/batching branches.
|
|
2
|
+
|
|
3
|
+
Uses an in-memory fake S3 client injected via the constructor ``session``
|
|
4
|
+
seam, so the critical paths the live-MinIO happy-path can't cheaply cover
|
|
5
|
+
run in CI: empty / exact-multiple / single-giant-chunk / abort-on-failure
|
|
6
|
+
uploads, >1000-key delete batching, and multi-page listing.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections.abc import AsyncIterator
|
|
12
|
+
from datetime import UTC, datetime
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
|
|
17
|
+
from threetears.object_store.s3 import S3ObjectStore
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# parity-exempt: aiobotocore StreamingBody stub -- botocore's dynamically-built response body has no importable Protocol to declare parity against; only iter_chunks is exercised
|
|
21
|
+
class _FakeBody:
|
|
22
|
+
"""Streaming body stub exposing aiobotocore's ``iter_chunks``."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, data: bytes) -> None:
|
|
25
|
+
self._data = data
|
|
26
|
+
|
|
27
|
+
async def iter_chunks(self, size: int) -> AsyncIterator[bytes]:
|
|
28
|
+
"""Yield the body in ``size``-byte chunks.
|
|
29
|
+
|
|
30
|
+
:param size: chunk size
|
|
31
|
+
:ptype size: int
|
|
32
|
+
:return: async byte stream
|
|
33
|
+
:rtype: AsyncIterator[bytes]
|
|
34
|
+
"""
|
|
35
|
+
for i in range(0, len(self._data), size):
|
|
36
|
+
yield self._data[i : i + size]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class _S3State:
|
|
40
|
+
"""In-memory backend state shared across clients from one fake session."""
|
|
41
|
+
|
|
42
|
+
def __init__(self) -> None:
|
|
43
|
+
self.objects: dict[str, bytes] = {}
|
|
44
|
+
self.mtimes: dict[str, datetime] = {}
|
|
45
|
+
self.delete_batches: list[list[str]] = []
|
|
46
|
+
self.aborted: list[str] = []
|
|
47
|
+
self.completed: list[str] = []
|
|
48
|
+
self.page_size: int = 1000
|
|
49
|
+
self.fail_part: int | None = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# parity-exempt: aioboto3 S3 client stub -- a botocore-generated client with hundreds of operations and no importable Protocol; only the get/put/list/delete/presign calls S3ObjectStore makes are stubbed
|
|
53
|
+
class _FakeS3Client:
|
|
54
|
+
"""Minimal in-memory S3 client matching the calls S3ObjectStore makes."""
|
|
55
|
+
|
|
56
|
+
def __init__(self, state: _S3State) -> None:
|
|
57
|
+
self._s = state
|
|
58
|
+
self._mpu: dict[str, dict[int, bytes]] = {}
|
|
59
|
+
self._counter = 0
|
|
60
|
+
|
|
61
|
+
async def __aenter__(self) -> _FakeS3Client:
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
async def __aexit__(self, *exc: object) -> bool:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
async def create_multipart_upload(self, *, Bucket: str, Key: str, ContentType: str | None = None) -> dict[str, Any]:
|
|
68
|
+
self._counter += 1
|
|
69
|
+
uid = f"mpu-{self._counter}"
|
|
70
|
+
self._mpu[uid] = {}
|
|
71
|
+
return {"UploadId": uid}
|
|
72
|
+
|
|
73
|
+
async def upload_part(
|
|
74
|
+
self, *, Bucket: str, Key: str, PartNumber: int, UploadId: str, Body: bytes
|
|
75
|
+
) -> dict[str, Any]:
|
|
76
|
+
if self._s.fail_part is not None and PartNumber == self._s.fail_part:
|
|
77
|
+
raise RuntimeError("simulated upload_part failure")
|
|
78
|
+
self._mpu[UploadId][PartNumber] = bytes(Body)
|
|
79
|
+
return {"ETag": f'"etag-{PartNumber}"'}
|
|
80
|
+
|
|
81
|
+
async def complete_multipart_upload(
|
|
82
|
+
self, *, Bucket: str, Key: str, UploadId: str, MultipartUpload: dict[str, Any]
|
|
83
|
+
) -> dict[str, Any]:
|
|
84
|
+
stored = self._mpu.pop(UploadId)
|
|
85
|
+
self._s.objects[Key] = b"".join(stored[p["PartNumber"]] for p in MultipartUpload["Parts"])
|
|
86
|
+
self._s.completed.append(Key)
|
|
87
|
+
return {}
|
|
88
|
+
|
|
89
|
+
async def abort_multipart_upload(self, *, Bucket: str, Key: str, UploadId: str) -> dict[str, Any]:
|
|
90
|
+
self._mpu.pop(UploadId, None)
|
|
91
|
+
self._s.aborted.append(Key)
|
|
92
|
+
return {}
|
|
93
|
+
|
|
94
|
+
async def put_object(self, *, Bucket: str, Key: str, Body: bytes, ContentType: str | None = None) -> dict[str, Any]:
|
|
95
|
+
self._s.objects[Key] = bytes(Body)
|
|
96
|
+
return {}
|
|
97
|
+
|
|
98
|
+
async def get_object(self, *, Bucket: str, Key: str) -> dict[str, Any]:
|
|
99
|
+
return {"Body": _FakeBody(self._s.objects[Key])}
|
|
100
|
+
|
|
101
|
+
async def delete_object(self, *, Bucket: str, Key: str) -> dict[str, Any]:
|
|
102
|
+
self._s.objects.pop(Key, None)
|
|
103
|
+
return {}
|
|
104
|
+
|
|
105
|
+
async def delete_objects(self, *, Bucket: str, Delete: dict[str, Any]) -> dict[str, Any]:
|
|
106
|
+
batch = [o["Key"] for o in Delete["Objects"]]
|
|
107
|
+
self._s.delete_batches.append(batch)
|
|
108
|
+
for k in batch:
|
|
109
|
+
self._s.objects.pop(k, None)
|
|
110
|
+
return {}
|
|
111
|
+
|
|
112
|
+
async def list_objects_v2(
|
|
113
|
+
self, *, Bucket: str, Prefix: str | None = None, ContinuationToken: str | None = None
|
|
114
|
+
) -> dict[str, Any]:
|
|
115
|
+
matched = sorted(k for k in self._s.objects if Prefix is None or k.startswith(Prefix))
|
|
116
|
+
start = int(ContinuationToken) if ContinuationToken else 0
|
|
117
|
+
page = matched[start : start + self._s.page_size]
|
|
118
|
+
_epoch = datetime(2020, 1, 1, tzinfo=UTC)
|
|
119
|
+
resp: dict[str, Any] = {
|
|
120
|
+
"Contents": [
|
|
121
|
+
{
|
|
122
|
+
"Key": k,
|
|
123
|
+
"Size": len(self._s.objects[k]),
|
|
124
|
+
"LastModified": self._s.mtimes.get(k, _epoch),
|
|
125
|
+
}
|
|
126
|
+
for k in page
|
|
127
|
+
]
|
|
128
|
+
}
|
|
129
|
+
if start + self._s.page_size < len(matched):
|
|
130
|
+
resp["IsTruncated"] = True
|
|
131
|
+
resp["NextContinuationToken"] = str(start + self._s.page_size)
|
|
132
|
+
return resp
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# parity-exempt: aioboto3 Session.client() factory stub -- an external SDK context-manager factory with no importable Protocol to mirror
|
|
136
|
+
class _FakeSession:
|
|
137
|
+
"""Fake aioboto3 session handing out fresh in-memory clients."""
|
|
138
|
+
|
|
139
|
+
def __init__(self, state: _S3State) -> None:
|
|
140
|
+
self._state = state
|
|
141
|
+
|
|
142
|
+
def client(self, *args: object, **kwargs: object) -> _FakeS3Client:
|
|
143
|
+
"""Return a fresh fake client over the shared state.
|
|
144
|
+
|
|
145
|
+
:return: fake S3 client
|
|
146
|
+
:rtype: _FakeS3Client
|
|
147
|
+
"""
|
|
148
|
+
return _FakeS3Client(self._state)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _store(state: _S3State, *, part_size_bytes: int = 5 * 1024 * 1024) -> S3ObjectStore:
|
|
152
|
+
"""Build a store wired to the in-memory fake session.
|
|
153
|
+
|
|
154
|
+
:param state: shared fake backend state
|
|
155
|
+
:ptype state: _S3State
|
|
156
|
+
:param part_size_bytes: multipart part size
|
|
157
|
+
:ptype part_size_bytes: int
|
|
158
|
+
:return: store under test
|
|
159
|
+
:rtype: S3ObjectStore
|
|
160
|
+
"""
|
|
161
|
+
return S3ObjectStore(
|
|
162
|
+
endpoint_url=None,
|
|
163
|
+
access_key="k",
|
|
164
|
+
secret_key="s",
|
|
165
|
+
bucket="b",
|
|
166
|
+
part_size_bytes=part_size_bytes,
|
|
167
|
+
session=_FakeSession(state),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
async def _aiter(data: bytes, chunk: int) -> AsyncIterator[bytes]:
|
|
172
|
+
"""Yield ``data`` in ``chunk``-sized pieces.
|
|
173
|
+
|
|
174
|
+
:param data: source bytes
|
|
175
|
+
:ptype data: bytes
|
|
176
|
+
:param chunk: chunk size
|
|
177
|
+
:ptype chunk: int
|
|
178
|
+
:return: async byte stream
|
|
179
|
+
:rtype: AsyncIterator[bytes]
|
|
180
|
+
"""
|
|
181
|
+
for i in range(0, len(data), chunk):
|
|
182
|
+
yield data[i : i + chunk]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
async def _empty() -> AsyncIterator[bytes]:
|
|
186
|
+
"""An empty async byte stream.
|
|
187
|
+
|
|
188
|
+
:return: async byte stream that yields nothing
|
|
189
|
+
:rtype: AsyncIterator[bytes]
|
|
190
|
+
"""
|
|
191
|
+
if False: # pragma: no cover
|
|
192
|
+
yield b""
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@pytest.mark.asyncio
|
|
196
|
+
async def test_put_empty_object_uses_single_put() -> None:
|
|
197
|
+
"""A 0-byte object stores via a single empty PUT (no multipart)."""
|
|
198
|
+
state = _S3State()
|
|
199
|
+
await _store(state).put("k/empty", _empty(), content_type="application/octet-stream")
|
|
200
|
+
assert state.objects["k/empty"] == b""
|
|
201
|
+
assert state.completed == []
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@pytest.mark.asyncio
|
|
205
|
+
async def test_put_small_object_uses_single_put() -> None:
|
|
206
|
+
"""An object under one part stores via a single PUT."""
|
|
207
|
+
state = _S3State()
|
|
208
|
+
await _store(state).put("k/small", _aiter(b"hello", 2), content_type="text/plain")
|
|
209
|
+
assert state.objects["k/small"] == b"hello"
|
|
210
|
+
assert state.completed == []
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@pytest.mark.asyncio
|
|
214
|
+
async def test_put_exact_multiple_skips_empty_final_part() -> None:
|
|
215
|
+
"""An object exactly N*part_size completes without a 0-byte final part."""
|
|
216
|
+
part = 5 * 1024 * 1024
|
|
217
|
+
state = _S3State()
|
|
218
|
+
payload = b"x" * (2 * part)
|
|
219
|
+
await _store(state, part_size_bytes=part).put(
|
|
220
|
+
"k/exact", _aiter(payload, part), content_type="application/octet-stream"
|
|
221
|
+
)
|
|
222
|
+
assert state.objects["k/exact"] == payload
|
|
223
|
+
assert state.completed == ["k/exact"]
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
@pytest.mark.asyncio
|
|
227
|
+
async def test_put_multipart_with_remainder() -> None:
|
|
228
|
+
"""A full part plus a remainder round-trips via multipart."""
|
|
229
|
+
part = 5 * 1024 * 1024
|
|
230
|
+
state = _S3State()
|
|
231
|
+
payload = b"y" * (part + 1234)
|
|
232
|
+
await _store(state, part_size_bytes=part).put(
|
|
233
|
+
"k/rem", _aiter(payload, 65536), content_type="application/octet-stream"
|
|
234
|
+
)
|
|
235
|
+
assert state.objects["k/rem"] == payload
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@pytest.mark.asyncio
|
|
239
|
+
async def test_put_single_chunk_larger_than_part() -> None:
|
|
240
|
+
"""One giant incoming chunk is drained into multiple parts."""
|
|
241
|
+
part = 5 * 1024 * 1024
|
|
242
|
+
state = _S3State()
|
|
243
|
+
payload = b"z" * (3 * part)
|
|
244
|
+
await _store(state, part_size_bytes=part).put(
|
|
245
|
+
"k/big", _aiter(payload, 3 * part), content_type="application/octet-stream"
|
|
246
|
+
)
|
|
247
|
+
assert state.objects["k/big"] == payload
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@pytest.mark.asyncio
|
|
251
|
+
async def test_put_aborts_multipart_on_mid_stream_failure() -> None:
|
|
252
|
+
"""A failure mid-upload aborts the multipart -- no orphaned parts."""
|
|
253
|
+
part = 5 * 1024 * 1024
|
|
254
|
+
state = _S3State()
|
|
255
|
+
state.fail_part = 2
|
|
256
|
+
payload = b"w" * (3 * part)
|
|
257
|
+
with pytest.raises(RuntimeError, match="simulated upload_part failure"):
|
|
258
|
+
await _store(state, part_size_bytes=part).put(
|
|
259
|
+
"k/fail", _aiter(payload, part), content_type="application/octet-stream"
|
|
260
|
+
)
|
|
261
|
+
assert state.aborted == ["k/fail"]
|
|
262
|
+
assert "k/fail" not in state.objects
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@pytest.mark.asyncio
|
|
266
|
+
async def test_delete_many_batches_over_the_1000_key_limit() -> None:
|
|
267
|
+
""">1000 keys are chunked into <=1000-key DeleteObjects requests."""
|
|
268
|
+
state = _S3State()
|
|
269
|
+
keys = [f"k/{i}" for i in range(2500)]
|
|
270
|
+
await _store(state).delete_many(keys)
|
|
271
|
+
assert [len(b) for b in state.delete_batches] == [1000, 1000, 500]
|
|
272
|
+
assert all(len(b) <= 1000 for b in state.delete_batches)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
@pytest.mark.asyncio
|
|
276
|
+
async def test_delete_many_empty_is_noop() -> None:
|
|
277
|
+
"""Deleting an empty list issues no request."""
|
|
278
|
+
state = _S3State()
|
|
279
|
+
await _store(state).delete_many([])
|
|
280
|
+
assert state.delete_batches == []
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@pytest.mark.asyncio
|
|
284
|
+
async def test_list_keys_paginates_and_filters_by_prefix() -> None:
|
|
285
|
+
"""list_keys walks every page and honors the prefix filter."""
|
|
286
|
+
state = _S3State()
|
|
287
|
+
state.page_size = 2
|
|
288
|
+
for i in range(5):
|
|
289
|
+
state.objects[f"p/{i}"] = b"x"
|
|
290
|
+
state.objects["other"] = b"x"
|
|
291
|
+
keys = [k async for k in _store(state).list_keys(prefix="p/")]
|
|
292
|
+
assert sorted(keys) == ["p/0", "p/1", "p/2", "p/3", "p/4"]
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@pytest.mark.asyncio
|
|
296
|
+
async def test_list_entries_carries_key_size_and_mtime() -> None:
|
|
297
|
+
"""list_entries paginates and yields each object's key, size, and mtime."""
|
|
298
|
+
state = _S3State()
|
|
299
|
+
state.page_size = 2
|
|
300
|
+
older = datetime(2021, 6, 1, tzinfo=UTC)
|
|
301
|
+
newer = datetime(2023, 6, 1, tzinfo=UTC)
|
|
302
|
+
state.objects["p/old"] = b"abc"
|
|
303
|
+
state.mtimes["p/old"] = older
|
|
304
|
+
state.objects["p/new"] = b"defgh"
|
|
305
|
+
state.mtimes["p/new"] = newer
|
|
306
|
+
state.objects["p/mid"] = b"z"
|
|
307
|
+
state.objects["other"] = b"x"
|
|
308
|
+
entries = {e.key: e async for e in _store(state).list_entries(prefix="p/")}
|
|
309
|
+
assert sorted(entries) == ["p/mid", "p/new", "p/old"]
|
|
310
|
+
assert entries["p/old"].size_bytes == 3
|
|
311
|
+
assert entries["p/old"].last_modified == older
|
|
312
|
+
assert entries["p/new"].size_bytes == 5
|
|
313
|
+
assert entries["p/new"].last_modified == newer
|
|
314
|
+
# unset mtime falls back to the fake's epoch default, never crashes
|
|
315
|
+
assert entries["p/mid"].last_modified == datetime(2020, 1, 1, tzinfo=UTC)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
@pytest.mark.asyncio
|
|
319
|
+
async def test_open_read_streams_back_the_object() -> None:
|
|
320
|
+
"""open_read yields the stored bytes in chunks."""
|
|
321
|
+
state = _S3State()
|
|
322
|
+
state.objects["k/r"] = b"abcdefgh"
|
|
323
|
+
chunks = [c async for c in _store(state).open_read("k/r")]
|
|
324
|
+
assert b"".join(chunks) == b"abcdefgh"
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Tests for build_s3_object_store -- secret-ref resolution into a store.
|
|
2
|
+
|
|
3
|
+
A capturing fake session records the kwargs the store hands to ``session.client``
|
|
4
|
+
when it opens a connection, so we can assert the RESOLVED credentials + the
|
|
5
|
+
endpoint/region config reach the S3 client without touching the store's private
|
|
6
|
+
attributes (the store deliberately exposes none).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import pytest
|
|
15
|
+
|
|
16
|
+
from threetears.core.security.secret_refs import SecretResolutionError
|
|
17
|
+
from threetears.object_store.wiring import build_s3_object_store
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class _NoopClient:
|
|
21
|
+
"""An S3 client stub that satisfies the ensure_bucket() call path."""
|
|
22
|
+
|
|
23
|
+
async def __aenter__(self) -> _NoopClient:
|
|
24
|
+
return self
|
|
25
|
+
|
|
26
|
+
async def __aexit__(self, *exc: object) -> bool:
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
async def head_bucket(self, **kwargs: object) -> dict[str, Any]:
|
|
30
|
+
return {}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class _CapturingSession:
|
|
34
|
+
"""Fake aioboto3 session recording the kwargs of the last client() call."""
|
|
35
|
+
|
|
36
|
+
def __init__(self) -> None:
|
|
37
|
+
self.client_kwargs: dict[str, Any] | None = None
|
|
38
|
+
|
|
39
|
+
def client(self, *args: object, **kwargs: Any) -> _NoopClient:
|
|
40
|
+
self.client_kwargs = kwargs
|
|
41
|
+
return _NoopClient()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def test_resolved_creds_and_config_reach_the_client(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
45
|
+
"""env:// refs resolve and flow (with endpoint/region) into session.client()."""
|
|
46
|
+
monkeypatch.setenv("TEST_S3_ACCESS_KEY", "AKIA-RESOLVED")
|
|
47
|
+
monkeypatch.setenv("TEST_S3_SECRET_KEY", "SECRET-RESOLVED")
|
|
48
|
+
session = _CapturingSession()
|
|
49
|
+
store = build_s3_object_store(
|
|
50
|
+
endpoint_url="http://minio:9000",
|
|
51
|
+
bucket="3tears-objects",
|
|
52
|
+
access_key_ref="env://TEST_S3_ACCESS_KEY",
|
|
53
|
+
secret_key_ref="env://TEST_S3_SECRET_KEY",
|
|
54
|
+
region="eu-west-1",
|
|
55
|
+
session=session,
|
|
56
|
+
)
|
|
57
|
+
# opening a client (ensure_bucket) is what hands the resolved creds to aioboto3.
|
|
58
|
+
await store.ensure_bucket()
|
|
59
|
+
assert session.client_kwargs is not None
|
|
60
|
+
assert session.client_kwargs["aws_access_key_id"] == "AKIA-RESOLVED"
|
|
61
|
+
assert session.client_kwargs["aws_secret_access_key"] == "SECRET-RESOLVED"
|
|
62
|
+
assert session.client_kwargs["endpoint_url"] == "http://minio:9000"
|
|
63
|
+
assert session.client_kwargs["region_name"] == "eu-west-1"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_no_credential_value_is_logged(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture) -> None:
|
|
67
|
+
"""The build log emits config shape only -- never the resolved secret values."""
|
|
68
|
+
monkeypatch.setenv("TEST_AK", "AKIA-SUPERSECRET-VALUE")
|
|
69
|
+
monkeypatch.setenv("TEST_SK", "SK-SUPERSECRET-VALUE")
|
|
70
|
+
with caplog.at_level(logging.DEBUG, logger="threetears.object_store.wiring"):
|
|
71
|
+
build_s3_object_store(
|
|
72
|
+
endpoint_url="http://minio:9000",
|
|
73
|
+
bucket="b",
|
|
74
|
+
access_key_ref="env://TEST_AK",
|
|
75
|
+
secret_key_ref="env://TEST_SK",
|
|
76
|
+
)
|
|
77
|
+
blob = " ".join(
|
|
78
|
+
[r.getMessage() for r in caplog.records] + [repr(getattr(r, "extra_data", None)) for r in caplog.records]
|
|
79
|
+
)
|
|
80
|
+
assert "AKIA-SUPERSECRET-VALUE" not in blob
|
|
81
|
+
assert "SK-SUPERSECRET-VALUE" not in blob
|
|
82
|
+
# sanity: the build log actually fired, so the assertion above is not vacuous.
|
|
83
|
+
assert any("built S3 object store" in r.getMessage() for r in caplog.records)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_unknown_scheme_ref_raises(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
87
|
+
"""A credential ref naming an unknown scheme fails closed at build time."""
|
|
88
|
+
monkeypatch.setenv("TEST_S3_SECRET_KEY", "ok")
|
|
89
|
+
with pytest.raises(SecretResolutionError):
|
|
90
|
+
build_s3_object_store(
|
|
91
|
+
endpoint_url=None,
|
|
92
|
+
bucket="b",
|
|
93
|
+
access_key_ref="bogus://nope",
|
|
94
|
+
secret_key_ref="env://TEST_S3_SECRET_KEY",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_missing_env_ref_raises() -> None:
|
|
99
|
+
"""An env:// ref pointing at an unset variable fails closed at build time."""
|
|
100
|
+
with pytest.raises(SecretResolutionError):
|
|
101
|
+
build_s3_object_store(
|
|
102
|
+
endpoint_url=None,
|
|
103
|
+
bucket="b",
|
|
104
|
+
access_key_ref="env://DEFINITELY_UNSET_S3_KEY_XYZ",
|
|
105
|
+
secret_key_ref="env://ALSO_UNSET_S3_SECRET_XYZ",
|
|
106
|
+
)
|