seedbraid 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seedbraid-1.1.0/LICENSE +21 -0
- seedbraid-1.1.0/PKG-INFO +371 -0
- seedbraid-1.1.0/README.md +347 -0
- seedbraid-1.1.0/pyproject.toml +65 -0
- seedbraid-1.1.0/setup.cfg +4 -0
- seedbraid-1.1.0/src/seedbraid/__init__.py +8 -0
- seedbraid-1.1.0/src/seedbraid/__main__.py +6 -0
- seedbraid-1.1.0/src/seedbraid/chunking.py +218 -0
- seedbraid-1.1.0/src/seedbraid/cli.py +665 -0
- seedbraid-1.1.0/src/seedbraid/codec.py +1033 -0
- seedbraid-1.1.0/src/seedbraid/container.py +1373 -0
- seedbraid-1.1.0/src/seedbraid/diagnostics.py +249 -0
- seedbraid-1.1.0/src/seedbraid/errors.py +134 -0
- seedbraid-1.1.0/src/seedbraid/ipfs.py +415 -0
- seedbraid-1.1.0/src/seedbraid/mlhooks.py +498 -0
- seedbraid-1.1.0/src/seedbraid/oci.py +221 -0
- seedbraid-1.1.0/src/seedbraid/perf.py +239 -0
- seedbraid-1.1.0/src/seedbraid/pinning.py +398 -0
- seedbraid-1.1.0/src/seedbraid/py.typed +0 -0
- seedbraid-1.1.0/src/seedbraid/storage.py +199 -0
- seedbraid-1.1.0/src/seedbraid.egg-info/PKG-INFO +371 -0
- seedbraid-1.1.0/src/seedbraid.egg-info/SOURCES.txt +49 -0
- seedbraid-1.1.0/src/seedbraid.egg-info/dependency_links.txt +1 -0
- seedbraid-1.1.0/src/seedbraid.egg-info/entry_points.txt +2 -0
- seedbraid-1.1.0/src/seedbraid.egg-info/requires.txt +19 -0
- seedbraid-1.1.0/src/seedbraid.egg-info/top_level.txt +1 -0
- seedbraid-1.1.0/tests/test_chunking.py +27 -0
- seedbraid-1.1.0/tests/test_cli_commands.py +498 -0
- seedbraid-1.1.0/tests/test_compat_fixtures.py +60 -0
- seedbraid-1.1.0/tests/test_container.py +403 -0
- seedbraid-1.1.0/tests/test_doctor.py +219 -0
- seedbraid-1.1.0/tests/test_dvc_bridge.py +155 -0
- seedbraid-1.1.0/tests/test_encryption.py +118 -0
- seedbraid-1.1.0/tests/test_genes_pack.py +77 -0
- seedbraid-1.1.0/tests/test_genome_snapshot.py +116 -0
- seedbraid-1.1.0/tests/test_ipfs_fetch_validation.py +84 -0
- seedbraid-1.1.0/tests/test_ipfs_optional.py +54 -0
- seedbraid-1.1.0/tests/test_ipfs_reliability.py +176 -0
- seedbraid-1.1.0/tests/test_keygen_cli.py +65 -0
- seedbraid-1.1.0/tests/test_manifest_private.py +44 -0
- seedbraid-1.1.0/tests/test_ml_hooks.py +186 -0
- seedbraid-1.1.0/tests/test_oci_oras_bridge.py +204 -0
- seedbraid-1.1.0/tests/test_perf.py +125 -0
- seedbraid-1.1.0/tests/test_perf_gates.py +70 -0
- seedbraid-1.1.0/tests/test_prime_verify.py +36 -0
- seedbraid-1.1.0/tests/test_publish_warning.py +41 -0
- seedbraid-1.1.0/tests/test_release_workflow.py +269 -0
- seedbraid-1.1.0/tests/test_remote_pinning.py +190 -0
- seedbraid-1.1.0/tests/test_roundtrip.py +33 -0
- seedbraid-1.1.0/tests/test_signature.py +87 -0
- seedbraid-1.1.0/tests/test_verify_strict.py +51 -0
seedbraid-1.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Helix contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
seedbraid-1.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: seedbraid
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: Seedbraid reference-based reconstruction with CDC and IPFS seed transport
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: typer>=0.12
|
|
9
|
+
Provides-Extra: zstd
|
|
10
|
+
Requires-Dist: zstandard>=0.23; extra == "zstd"
|
|
11
|
+
Provides-Extra: crypto
|
|
12
|
+
Requires-Dist: cryptography>=43.0; extra == "crypto"
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
15
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
16
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
17
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
18
|
+
Requires-Dist: cryptography>=43.0; extra == "dev"
|
|
19
|
+
Provides-Extra: docs
|
|
20
|
+
Requires-Dist: mkdocs>=1.6; extra == "docs"
|
|
21
|
+
Requires-Dist: mkdocs-material>=9.5; extra == "docs"
|
|
22
|
+
Requires-Dist: mkdocstrings[python]>=0.25; extra == "docs"
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# Seedbraid
|
|
26
|
+
|
|
27
|
+
[](../../actions/workflows/ci.yml)
|
|
28
|
+
|
|
29
|
+
Seedbraid provides reference-based reconstruction with deterministic content-defined chunking (CDC), a binary SBD1 seed format, and IPFS publish/fetch transport.
|
|
30
|
+
|
|
31
|
+
## Beta Status (Read First)
|
|
32
|
+
- Seedbraid is currently in beta stage.
|
|
33
|
+
- Before production use, run strict validation in your own runtime/storage/network environment.
|
|
34
|
+
- Treat successful `verify --strict` and bit-perfect restore checks as release gates for your team.
|
|
35
|
+
|
|
36
|
+
## Strict Validation Workflow (Required Before Production)
|
|
37
|
+
Run the following smoke workflow before relying on Seedbraid in CI/CD or production pipelines:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
uv sync --no-editable --extra dev
|
|
41
|
+
|
|
42
|
+
workdir="$(mktemp -d)"
|
|
43
|
+
python3 - <<'PY' "$workdir/input.bin"
|
|
44
|
+
from pathlib import Path
|
|
45
|
+
import sys
|
|
46
|
+
|
|
47
|
+
out = Path(sys.argv[1])
|
|
48
|
+
payload = (b"seedbraid-beta-smoke" * 20000) + bytes(range(256)) * 200
|
|
49
|
+
out.write_bytes(payload)
|
|
50
|
+
print(f"wrote {out} bytes={len(payload)}")
|
|
51
|
+
PY
|
|
52
|
+
|
|
53
|
+
uv run --no-sync --no-editable seedbraid encode "$workdir/input.bin" \
|
|
54
|
+
--genome "$workdir/genome" \
|
|
55
|
+
--out "$workdir/seed.sbd" \
|
|
56
|
+
--chunker cdc_buzhash \
|
|
57
|
+
--avg 65536 --min 16384 --max 262144 \
|
|
58
|
+
--learn --portable --compression zlib
|
|
59
|
+
|
|
60
|
+
uv run --no-sync --no-editable seedbraid verify "$workdir/seed.sbd" \
|
|
61
|
+
--genome "$workdir/genome" \
|
|
62
|
+
--strict
|
|
63
|
+
|
|
64
|
+
uv run --no-sync --no-editable seedbraid decode "$workdir/seed.sbd" \
|
|
65
|
+
--genome "$workdir/genome" \
|
|
66
|
+
--out "$workdir/decoded.bin"
|
|
67
|
+
|
|
68
|
+
cmp -s "$workdir/input.bin" "$workdir/decoded.bin" \
|
|
69
|
+
&& echo "bit-perfect roundtrip: OK"
|
|
70
|
+
|
|
71
|
+
UV_CACHE_DIR=.uv-cache uv run --no-sync --no-editable ruff check .
|
|
72
|
+
PYTHONPATH=src UV_CACHE_DIR=.uv-cache uv run --no-sync --no-editable python -m pytest
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Features
|
|
76
|
+
- Lossless encode/decode with SHA-256 verification.
|
|
77
|
+
- Chunkers: `fixed`, `cdc_buzhash`, `cdc_rabin`.
|
|
78
|
+
- Genome storage (SQLite) for deduplicated chunk reuse.
|
|
79
|
+
- SBD1 binary seed container (`manifest + recipe + optional RAW + integrity`).
|
|
80
|
+
- IPFS CLI integration (`publish`, `fetch`).
|
|
81
|
+
- Optional remote pin integration (`pin remote-add`, publish-time remote pin).
|
|
82
|
+
|
|
83
|
+
## Why Seedbraid
|
|
84
|
+
- Seed-first architecture: reconstruction intent is shipped as a compact `SBD1` seed (`manifest + recipe`) instead of shipping full blobs repeatedly.
|
|
85
|
+
- End-to-end integrity posture: strict verify mode, compatibility fixtures, and performance gates are built into the project workflow.
|
|
86
|
+
- Practical Web3 distribution: CID publish/fetch is part of the same CLI surface as encode/decode, reducing operational handoffs.
|
|
87
|
+
- Shift-resilient dedup by default: CDC is first-class and benchmarked against fixed chunking with reproducible scripts.
|
|
88
|
+
|
|
89
|
+
## Best-Fit Use Cases
|
|
90
|
+
- Large binary versioning: datasets, ML models, media assets, and VM images.
|
|
91
|
+
- Distribution of many similar files: share a common genome and distribute compact seeds.
|
|
92
|
+
- IPFS-based distribution and retrieval: distribute by CID and verify reconstruction integrity.
|
|
93
|
+
- Shift-heavy changes (for example, single-byte insertion): CDC improves reuse over fixed chunking.
|
|
94
|
+
|
|
95
|
+
## What It Takes for OSS Adoption
|
|
96
|
+
- A 5-minute onboarding path (installation + first encode/decode tutorial).
|
|
97
|
+
- Benchmark evidence that Seedbraid wins against alternatives on size, transfer time, and restore speed.
|
|
98
|
+
- Security and operations readiness: signing/encryption and operator tooling (`doctor`, `snapshot`, `restore`).
|
|
99
|
+
- Stable format governance and backward-compatibility policy for long-lived seed archives.
|
|
100
|
+
|
|
101
|
+
## Installation
|
|
102
|
+
|
|
103
|
+
> **Note**: PyPI publishing is currently on hold. `pip install seedbraid` is not yet available.
|
|
104
|
+
> Please install from source.
|
|
105
|
+
|
|
106
|
+
## Quick Start
|
|
107
|
+
```bash
|
|
108
|
+
uv sync --no-editable --extra dev
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Optional zstd support:
|
|
112
|
+
```bash
|
|
113
|
+
uv sync --no-editable --extra dev --extra zstd
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Refresh lockfile after dependency changes:
|
|
117
|
+
```bash
|
|
118
|
+
uv lock
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Generate Encryption Key
|
|
122
|
+
Generate a high-entropy key for `SB_ENCRYPTION_KEY`:
|
|
123
|
+
```bash
|
|
124
|
+
uv run --no-editable seedbraid gen-encryption-key
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Print shell export format:
|
|
128
|
+
```bash
|
|
129
|
+
uv run --no-editable seedbraid gen-encryption-key --shell
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Set current shell variable directly:
|
|
133
|
+
```bash
|
|
134
|
+
eval "$(uv run --no-editable seedbraid gen-encryption-key --shell)"
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## CLI
|
|
138
|
+
### Encode
|
|
139
|
+
```bash
|
|
140
|
+
uv run --no-editable seedbraid encode input.bin --genome ./genome --out seed.sbd \
|
|
141
|
+
--chunker cdc_buzhash --avg 65536 --min 16384 --max 262144 \
|
|
142
|
+
--learn --no-portable --compression zlib
|
|
143
|
+
|
|
144
|
+
uv run --no-editable seedbraid encode input.bin --genome ./genome --out seed.private.sbd \
|
|
145
|
+
--manifest-private
|
|
146
|
+
|
|
147
|
+
export SB_ENCRYPTION_KEY='your-secret-passphrase'
|
|
148
|
+
uv run --no-editable seedbraid encode input.bin --genome ./genome --out seed.encrypted.sbd \
|
|
149
|
+
--encrypt --manifest-private
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Decode
|
|
153
|
+
```bash
|
|
154
|
+
uv run --no-editable seedbraid decode seed.sbd --genome ./genome --out recovered.bin
|
|
155
|
+
uv run --no-editable seedbraid decode seed.encrypted.sbd --genome ./genome --out recovered.bin \
|
|
156
|
+
--encryption-key "$SB_ENCRYPTION_KEY"
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Verify
|
|
160
|
+
```bash
|
|
161
|
+
uv run --no-editable seedbraid verify seed.sbd --genome ./genome
|
|
162
|
+
uv run --no-editable seedbraid verify seed.sbd --genome ./genome --strict
|
|
163
|
+
uv run --no-editable seedbraid verify seed.sbd --genome ./genome --require-signature --signature-key "$SB_SIGNING_KEY"
|
|
164
|
+
uv run --no-editable seedbraid verify seed.encrypted.sbd --genome ./genome --strict \
|
|
165
|
+
--encryption-key "$SB_ENCRYPTION_KEY"
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
`verify` supports two modes:
|
|
169
|
+
- Quick mode (default): checks seed integrity and required chunk availability.
|
|
170
|
+
- Strict mode (`--strict`): reconstructs all content and enforces source size and SHA-256 match.
|
|
171
|
+
|
|
172
|
+
### Prime
|
|
173
|
+
```bash
|
|
174
|
+
uv run --no-editable seedbraid prime "./dataset/**/*" --genome ./genome --chunker cdc_buzhash
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Genome Snapshot / Restore
|
|
178
|
+
```bash
|
|
179
|
+
uv run --no-editable seedbraid genome snapshot --genome ./genome --out genome.sgs
|
|
180
|
+
uv run --no-editable seedbraid genome restore genome.sgs --genome ./genome-dr --replace
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Publish (IPFS)
|
|
184
|
+
```bash
|
|
185
|
+
uv run --no-editable seedbraid publish seed.sbd --no-pin
|
|
186
|
+
uv run --no-editable seedbraid publish seed.sbd --pin
|
|
187
|
+
uv run --no-editable seedbraid publish seed.sbd --remote-pin \
|
|
188
|
+
--remote-endpoint https://pin.example/api/v1 --remote-token "$SB_PINNING_TOKEN"
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
`publish` emits a warning when seed is unencrypted. For sensitive data, prefer:
|
|
192
|
+
`seedbraid encode --encrypt --manifest-private ...` before publishing.
|
|
193
|
+
When `--remote-pin` is enabled, Seedbraid also registers CID with configured remote
|
|
194
|
+
pin provider (Pinning Services API-compatible).
|
|
195
|
+
|
|
196
|
+
### Fetch (IPFS)
|
|
197
|
+
```bash
|
|
198
|
+
uv run --no-editable seedbraid fetch <cid> --out fetched.sbd
|
|
199
|
+
uv run --no-editable seedbraid fetch <cid> --out fetched.sbd --retries 5 --backoff-ms 300
|
|
200
|
+
uv run --no-editable seedbraid fetch <cid> --out fetched.sbd --gateway https://ipfs.io/ipfs
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
`fetch` retries `ipfs cat` with exponential backoff and can fallback to an HTTP gateway.
|
|
204
|
+
|
|
205
|
+
### Pin Health (IPFS)
|
|
206
|
+
```bash
|
|
207
|
+
uv run --no-editable seedbraid pin-health <cid>
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Remote Pin Add (IPFS)
|
|
211
|
+
```bash
|
|
212
|
+
export SB_PINNING_ENDPOINT='https://pin.example/api/v1'
|
|
213
|
+
export SB_PINNING_TOKEN='your-api-token'
|
|
214
|
+
uv run --no-editable seedbraid pin remote-add <cid>
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Doctor
|
|
218
|
+
```bash
|
|
219
|
+
uv run --no-editable seedbraid doctor --genome ./genome
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
`doctor` checks:
|
|
223
|
+
- Python runtime compatibility (>=3.12)
|
|
224
|
+
- IPFS CLI availability/version
|
|
225
|
+
- `IPFS_PATH` state
|
|
226
|
+
- genome path writability
|
|
227
|
+
- compression support (`zlib`, optional `zstd`)
|
|
228
|
+
|
|
229
|
+
### Sign Seed (optional)
|
|
230
|
+
```bash
|
|
231
|
+
export SB_SIGNING_KEY='your-shared-secret'
|
|
232
|
+
uv run --no-editable seedbraid sign seed.sbd --out seed.signed.sbd --key-env SB_SIGNING_KEY --key-id team-a
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Export / Import Genes (optional)
|
|
236
|
+
```bash
|
|
237
|
+
uv run --no-editable seedbraid export-genes seed.sbd --genome ./genome --out genes.pack
|
|
238
|
+
uv run --no-editable seedbraid import-genes genes.pack --genome ./another-genome
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## IPFS Installation/Check
|
|
242
|
+
Check if IPFS CLI is available:
|
|
243
|
+
```bash
|
|
244
|
+
ipfs --version
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
If missing, install Kubo (IPFS CLI) and ensure `ipfs` is on your PATH.
|
|
248
|
+
|
|
249
|
+
## Common Failures
|
|
250
|
+
- `ipfs CLI not found`:
|
|
251
|
+
- Install IPFS and verify with `ipfs --version`.
|
|
252
|
+
- `Missing required chunk` on decode/verify:
|
|
253
|
+
- Provide the correct `--genome`, or re-encode with `--portable`.
|
|
254
|
+
- `zstd` compression error:
|
|
255
|
+
- Install optional dependency `zstandard`, or use `--compression zlib`.
|
|
256
|
+
|
|
257
|
+
## Troubleshooting Matrix
|
|
258
|
+
| Symptom | Error Code | Next Action |
|
|
259
|
+
|---|---|---|
|
|
260
|
+
| Encryption requested but key missing | `SB_E_ENCRYPTION_KEY_MISSING` | Pass `--encryption-key` or set `SB_ENCRYPTION_KEY`. |
|
|
261
|
+
| Signing requested but key missing | `SB_E_SIGNING_KEY_MISSING` | Export signing key env var and retry `seedbraid sign`. |
|
|
262
|
+
| IPFS CLI missing | `SB_E_IPFS_NOT_FOUND` | Install Kubo and confirm `ipfs --version`. |
|
|
263
|
+
| IPFS fetch/publish failure | `SB_E_IPFS_FETCH` / `SB_E_IPFS_PUBLISH` | Check daemon/network, retry, use gateway fallback if needed. |
|
|
264
|
+
| Remote pin configuration missing | `SB_E_REMOTE_PIN_CONFIG` | Set endpoint/token env vars or pass options. |
|
|
265
|
+
| Remote pin auth failed | `SB_E_REMOTE_PIN_AUTH` | Verify provider token permissions and retry. |
|
|
266
|
+
| Remote pin request invalid | `SB_E_REMOTE_PIN_REQUEST` | Check CID/provider options and retry. |
|
|
267
|
+
| Remote pin timeout/failure | `SB_E_REMOTE_PIN_TIMEOUT` / `SB_E_REMOTE_PIN` | Increase retries/timeout or check provider health. |
|
|
268
|
+
| Seed parse/integrity failure | `SB_E_SEED_FORMAT` | Re-fetch/rebuild seed and verify source integrity. |
|
|
269
|
+
|
|
270
|
+
## CI (SBD-ECO-001)
|
|
271
|
+
GitHub Actions workflows:
|
|
272
|
+
- `.github/workflows/ci.yml`
|
|
273
|
+
- Lint: `ruff check .`
|
|
274
|
+
- Test: `python -m pytest`
|
|
275
|
+
- Compatibility fixtures: `python -m pytest tests/test_compat_fixtures.py`
|
|
276
|
+
- Benchmark gate: `python scripts/bench_gate.py ...`
|
|
277
|
+
- `.github/workflows/publish-seed.yml` (manual only, `dry_run=true` default)
|
|
278
|
+
- Generates seed from `source_path` via `seedbraid encode`
|
|
279
|
+
- Runs strict integrity check via `seedbraid verify --strict`
|
|
280
|
+
- Publishes to IPFS only when `dry_run=false`
|
|
281
|
+
- Installs Kubo (`ipfs` CLI) on runner when `dry_run=false` (version configurable via `kubo_version`)
|
|
282
|
+
- Verifies Kubo release tag signature status via GitHub API before install
|
|
283
|
+
- Verifies downloaded Kubo archive checksum (`sha512`) before extraction
|
|
284
|
+
- Supports `pin`, `portable`, `manifest_private`, and optional `encrypt`
|
|
285
|
+
(`SB_ENCRYPTION_KEY` secret required when `encrypt=true`)
|
|
286
|
+
|
|
287
|
+
Local parity commands:
|
|
288
|
+
```bash
|
|
289
|
+
uv sync --no-editable --extra dev
|
|
290
|
+
uv run --no-sync --no-editable ruff check .
|
|
291
|
+
PYTHONPATH=src uv run --no-sync --no-editable python -m pytest
|
|
292
|
+
PYTHONPATH=src uv run --no-sync --no-editable python -m pytest tests/test_compat_fixtures.py
|
|
293
|
+
uv run --no-sync --no-editable python scripts/bench_gate.py \
|
|
294
|
+
--min-reuse-improvement-bps 1 \
|
|
295
|
+
--max-seed-size-ratio 1.20 \
|
|
296
|
+
--min-cdc-throughput-mib-s 0.10 \
|
|
297
|
+
--json-out .artifacts/bench-report.json
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## DVC Integration (SBD-ECO-003)
|
|
301
|
+
- Minimal DVC bridge lives in `examples/dvc/`.
|
|
302
|
+
- Pipeline stages are `encode -> verify --strict -> fetch`.
|
|
303
|
+
- `verify` stage is strict and must fail pipeline reproduction on integrity mismatch.
|
|
304
|
+
- Integration recipe and artifact layout are documented in `examples/dvc/README.md`.
|
|
305
|
+
|
|
306
|
+
## OCI Integration (SBD-ECO-004)
|
|
307
|
+
- ORAS bridge scripts and usage docs live in `examples/oci/`.
|
|
308
|
+
- Default OCI metadata convention:
|
|
309
|
+
- artifact type: `application/vnd.seedbraid.seed.v1`
|
|
310
|
+
- layer media type: `application/vnd.seedbraid.seed.layer.v1+sbd`
|
|
311
|
+
- annotations: source SHA-256, chunker, manifest-private flag, seed title
|
|
312
|
+
- Push/pull scripts:
|
|
313
|
+
- `examples/oci/scripts/push_seed.sh <seed.sbd> <registry/repository:tag>`
|
|
314
|
+
- `examples/oci/scripts/pull_seed.sh <registry/repository:tag> <out.sbd>`
|
|
315
|
+
- After pull, run strict verification:
|
|
316
|
+
- `seedbraid verify <out.sbd> --genome <genome-path> --strict`
|
|
317
|
+
|
|
318
|
+
## ML Tooling Hooks (SBD-ECO-005)
|
|
319
|
+
- Scripts for MLflow metadata logging and Hugging Face upload live in `examples/ml/`.
|
|
320
|
+
- MLflow hook logs seed metadata fields (seed digest, manifest provenance, optional transport refs).
|
|
321
|
+
- Hugging Face hook uploads `seed.sbd` + metadata sidecar with env-provided token credentials.
|
|
322
|
+
- Restore workflow from logged metadata is documented in `examples/ml/README.md`.
|
|
323
|
+
|
|
324
|
+
## Tests and CI-Equivalent Local Commands
|
|
325
|
+
```bash
|
|
326
|
+
uv run --no-editable ruff check .
|
|
327
|
+
uv run --no-editable python -m pytest
|
|
328
|
+
uv run --no-editable python -m pytest tests/test_compat_fixtures.py
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
IPFS tests auto-skip when `ipfs` is not installed.
|
|
332
|
+
Compatibility fixtures are stored in `tests/fixtures/compat/v1/` and are
|
|
333
|
+
validated by `tests/test_compat_fixtures.py`.
|
|
334
|
+
Regenerate intentionally with:
|
|
335
|
+
`uv run --no-editable python scripts/gen_compat_fixtures.py`.
|
|
336
|
+
|
|
337
|
+
## 1-byte Insertion Dedup Benchmark
|
|
338
|
+
Run:
|
|
339
|
+
```bash
|
|
340
|
+
uv run --no-editable python scripts/bench_shifted_dedup.py
|
|
341
|
+
uv run --no-editable python scripts/bench_gate.py \
|
|
342
|
+
--min-reuse-improvement-bps 1 \
|
|
343
|
+
--max-seed-size-ratio 1.20 \
|
|
344
|
+
--min-cdc-throughput-mib-s 0.10 \
|
|
345
|
+
--json-out .artifacts/bench-report.json
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
Expected behavior:
|
|
349
|
+
- `cdc_buzhash` should show better reuse than `fixed` when a single-byte insertion shifts offsets.
|
|
350
|
+
- `bench_gate.py` exits non-zero when configured thresholds are violated.
|
|
351
|
+
|
|
352
|
+
## Project Documents
|
|
353
|
+
- Format spec: `docs/FORMAT.md`
|
|
354
|
+
- Design rationale: `docs/DESIGN.md`
|
|
355
|
+
- Threat model: `docs/THREAT_MODEL.md`
|
|
356
|
+
- Error codes: `docs/ERROR_CODES.md`
|
|
357
|
+
- Performance gates: `docs/PERFORMANCE.md`
|
|
358
|
+
- DVC workflow bridge example: `examples/dvc/README.md`
|
|
359
|
+
- OCI/ORAS distribution example: `examples/oci/README.md`
|
|
360
|
+
- ML tooling hooks example: `examples/ml/README.md`
|
|
361
|
+
|
|
362
|
+
## Support Seedbraid
|
|
363
|
+
- Seedbraid is maintained as an open-source project.
|
|
364
|
+
- If Seedbraid helps your workflow, please consider donating via the repository `Sponsor` button.
|
|
365
|
+
- Donations directly support maintenance, documentation, and compatibility/performance validation.
|
|
366
|
+
|
|
367
|
+
## Open Source Governance
|
|
368
|
+
- License: `MIT` (`LICENSE`)
|
|
369
|
+
- Security policy: `SECURITY.md`
|
|
370
|
+
- Contributing guide: `CONTRIBUTING.md`
|
|
371
|
+
- Code of Conduct: `CODE_OF_CONDUCT.md`
|