@push.rocks/containerarchive 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@
3
3
  */
4
4
  export const commitinfo = {
5
5
  name: '@push.rocks/containerarchive',
6
- version: '0.0.2',
6
+ version: '0.1.0',
7
7
  description: 'content-addressed incremental backup engine with deduplication, encryption, and error correction'
8
8
  };
9
9
  //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiMDBfY29tbWl0aW5mb19kYXRhLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vdHMvMDBfY29tbWl0aW5mb19kYXRhLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBOztHQUVHO0FBQ0gsTUFBTSxDQUFDLE1BQU0sVUFBVSxHQUFHO0lBQ3hCLElBQUksRUFBRSw4QkFBOEI7SUFDcEMsT0FBTyxFQUFFLE9BQU87SUFDaEIsV0FBVyxFQUFFLGtHQUFrRztDQUNoSCxDQUFBIn0=
@@ -98,6 +98,7 @@ export interface IRetentionPolicy {
98
98
  export interface IPruneResult {
99
99
  removedSnapshots: number;
100
100
  removedPacks: number;
101
+ rewrittenPacks: number;
101
102
  freedBytes: number;
102
103
  dryRun: boolean;
103
104
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@push.rocks/containerarchive",
3
- "version": "0.0.2",
3
+ "version": "0.1.0",
4
4
  "private": false,
5
5
  "description": "content-addressed incremental backup engine with deduplication, encryption, and error correction",
6
6
  "main": "dist_ts/index.js",
package/readme.md ADDED
@@ -0,0 +1,339 @@
1
+ # @push.rocks/containerarchive
2
+
3
+ A high-performance, content-addressed incremental backup engine with built-in deduplication, encryption, compression, and Reed-Solomon error correction — powered by a Rust core with a clean TypeScript API.
4
+
5
+ ## Issue Reporting and Security
6
+
7
+ For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pnpm install @push.rocks/containerarchive
13
+ ```
14
+
15
+ ## 🏗️ Architecture
16
+
17
+ containerarchive uses a **hybrid Rust + TypeScript architecture**. The heavy lifting — chunking, hashing, compression, encryption, pack file I/O, and parity — runs in a compiled Rust binary. The TypeScript layer provides a clean, idiomatic Node.js API and manages data streaming via Unix sockets through the [`@push.rocks/smartrust`](https://code.foss.global/push.rocks/smartrust) RustBridge IPC.
18
+
19
+ ```
20
+ ┌──────────────────────────────────────┐
21
+ │ Your Application (TypeScript/JS) │
22
+ │ ┌────────────────────────────────┐ │
23
+ │ │ ContainerArchive API │ │
24
+ │ │ .init() .ingest() .restore()│ │
25
+ │ └────────────┬───────────────────┘ │
26
+ │ │ Unix Socket + JSON │
27
+ │ ┌────────────▼───────────────────┐ │
28
+ │ │ Rust Engine (compiled bin) │ │
29
+ │ │ FastCDC │ SHA-256 │ AES-GCM │ │
30
+ │ │ gzip/zstd │ Reed-Solomon │ │
31
+ │ └────────────────────────────────┘ │
32
+ └──────────────────────────────────────┘
33
+ ```
34
+
35
+ ## ✨ Features
36
+
37
+ | Feature | Details |
38
+ |---|---|
39
+ | **Content-Defined Chunking** | FastCDC with gear-based rolling hash — insertions/deletions only affect nearby boundaries |
40
+ | **Deduplication** | SHA-256 chunk addressing — identical data is stored only once across all snapshots |
41
+ | **Compression** | gzip or zstd per-chunk compression |
42
+ | **Encryption** | AES-256-GCM with Argon2id key derivation — passphrase-protected repositories |
43
+ | **Pack Files** | Chunks are batched into binary pack files with binary `.idx` indexes for fast lookup |
44
+ | **Snapshots** | Immutable point-in-time snapshots with metadata tags and multi-item support |
45
+ | **Reed-Solomon Parity** | RS(20,1) erasure coding — recover any single lost pack from a group of 20 |
46
+ | **Incremental** | Only new/changed chunks are stored on each ingest |
47
+ | **Streaming** | Unix socket streaming between TypeScript and Rust for zero-copy data transfer |
48
+ | **Multi-Item Snapshots** | Bundle multiple data streams (DB dumps, config tarballs, etc.) into a single snapshot |
49
+ | **Verification** | Three-level integrity checks: quick, standard, full |
50
+ | **Pruning** | Retention policies (keep last N, days, weeks, months) with garbage collection |
51
+ | **Repair** | Automatic index rebuild, stale lock removal, and parity-based pack recovery |
52
+
53
+ ## 📖 Usage
54
+
55
+ ### Initialize a New Repository
56
+
57
+ ```typescript
58
+ import { ContainerArchive } from '@push.rocks/containerarchive';
59
+
60
+ // Unencrypted repository
61
+ const repo = await ContainerArchive.init('/path/to/backup-repo');
62
+
63
+ // Encrypted repository (AES-256-GCM + Argon2id)
64
+ const encryptedRepo = await ContainerArchive.init('/path/to/secure-repo', {
65
+ passphrase: 'my-strong-passphrase',
66
+ });
67
+ ```
68
+
69
+ ### Open an Existing Repository
70
+
71
+ ```typescript
72
+ const repo = await ContainerArchive.open('/path/to/backup-repo');
73
+
74
+ // With passphrase for encrypted repos
75
+ const repo = await ContainerArchive.open('/path/to/secure-repo', {
76
+ passphrase: 'my-strong-passphrase',
77
+ });
78
+ ```
79
+
80
+ ### Ingest Data (Single Stream)
81
+
82
+ ```typescript
83
+ import * as fs from 'node:fs';
84
+
85
+ const inputStream = fs.createReadStream('/path/to/database-dump.sql');
86
+ const snapshot = await repo.ingest(inputStream, {
87
+ tags: { service: 'postgres', environment: 'production' },
88
+ items: [{ name: 'database.sql', type: 'database-dump' }],
89
+ });
90
+
91
+ console.log(`Snapshot ${snapshot.id} created`);
92
+ console.log(`Original: ${snapshot.originalSize} bytes`);
93
+ console.log(`Stored: ${snapshot.storedSize} bytes`);
94
+ console.log(`New chunks: ${snapshot.newChunks}, Reused: ${snapshot.reusedChunks}`);
95
+ ```
96
+
97
+ ### Multi-Item Ingest
98
+
99
+ Bundle multiple data streams into one snapshot:
100
+
101
+ ```typescript
102
+ import * as stream from 'node:stream';
103
+
104
+ const dbDump = fs.createReadStream('/tmp/pg_dump.sql');
105
+ const configTar = fs.createReadStream('/tmp/config-volumes.tar');
106
+
107
+ const snapshot = await repo.ingestMulti([
108
+ { stream: dbDump, name: 'database.sql', type: 'database-dump' },
109
+ { stream: configTar, name: 'config.tar', type: 'volume-tar' },
110
+ ], {
111
+ tags: { service: 'myapp', type: 'full-backup' },
112
+ });
113
+
114
+ console.log(`Items stored: ${snapshot.items.map(i => i.name).join(', ')}`);
115
+ ```
116
+
117
+ ### Restore Data
118
+
119
+ ```typescript
120
+ // Restore an entire snapshot
121
+ const restoreStream = await repo.restore(snapshot.id);
122
+ const writeStream = fs.createWriteStream('/tmp/restored-dump.sql');
123
+ restoreStream.pipe(writeStream);
124
+
125
+ // Restore a specific item from a multi-item snapshot
126
+ const configStream = await repo.restore(snapshot.id, { item: 'config.tar' });
127
+ configStream.pipe(fs.createWriteStream('/tmp/restored-config.tar'));
128
+ ```
129
+
130
+ ### List & Filter Snapshots
131
+
132
+ ```typescript
133
+ // List all snapshots
134
+ const allSnapshots = await repo.listSnapshots();
135
+
136
+ // Filter by tags
137
+ const prodSnapshots = await repo.listSnapshots({
138
+ tags: { environment: 'production' },
139
+ });
140
+
141
+ // Filter by date range
142
+ const recentSnapshots = await repo.listSnapshots({
143
+ after: '2026-03-01T00:00:00Z',
144
+ before: '2026-03-22T00:00:00Z',
145
+ });
146
+
147
+ // Get a specific snapshot
148
+ const snap = await repo.getSnapshot('snapshot-id-here');
149
+ ```
150
+
151
+ ### Verify Repository Integrity
152
+
153
+ ```typescript
154
+ // Quick check — validates index consistency
155
+ const quick = await repo.verify({ level: 'quick' });
156
+
157
+ // Standard — reads pack headers and validates checksums
158
+ const standard = await repo.verify({ level: 'standard' });
159
+
160
+ // Full — decompresses and re-hashes every chunk
161
+ const full = await repo.verify({ level: 'full' });
162
+
163
+ console.log(`OK: ${full.ok}`);
164
+ console.log(`Packs checked: ${full.stats.packsChecked}`);
165
+ console.log(`Chunks checked: ${full.stats.chunksChecked}`);
166
+ ```
167
+
168
+ ### Prune Old Snapshots
169
+
170
+ ```typescript
171
+ // Dry run first
172
+ const preview = await repo.prune({ keepLast: 5, keepDays: 30 }, true);
173
+ console.log(`Would remove ${preview.removedSnapshots} snapshots, free ${preview.freedBytes} bytes`);
174
+
175
+ // Execute for real
176
+ const result = await repo.prune({
177
+ keepLast: 5,
178
+ keepDays: 30,
179
+ keepWeeks: 12,
180
+ keepMonths: 6,
181
+ });
182
+ console.log(`Removed ${result.removedSnapshots} snapshots, ${result.removedPacks} packs`);
183
+ ```
184
+
185
+ ### Repair & Maintenance
186
+
187
+ ```typescript
188
+ // Repair — rebuild index, remove stale locks, attempt parity recovery
189
+ const repairResult = await repo.repair();
190
+ console.log(`Index rebuilt: ${repairResult.indexRebuilt}`);
191
+ console.log(`Packs repaired via parity: ${repairResult.packsRepaired}`);
192
+
193
+ // Rebuild global index from pack .idx files
194
+ await repo.reindex();
195
+
196
+ // Remove stale locks
197
+ await repo.unlock();
198
+ await repo.unlock({ force: true }); // force-remove all locks
199
+ ```
200
+
201
+ ### Event Subscriptions
202
+
203
+ Monitor ingest progress and errors with RxJS-based event streams:
204
+
205
+ ```typescript
206
+ // Track ingest progress
207
+ const sub = repo.on('ingest:progress', (data) => {
208
+ console.log(`${data.operation}: ${data.percentage}% — ${data.message}`);
209
+ });
210
+
211
+ // Track completed ingests
212
+ repo.on('ingest:complete', (data) => {
213
+ console.log(`Snapshot ${data.snapshotId} complete — ${data.newChunks} new chunks`);
214
+ });
215
+
216
+ // Track verification errors
217
+ repo.on('verify:error', (error) => {
218
+ console.error(`Verification error in ${error.pack || error.chunk}: ${error.error}`);
219
+ });
220
+
221
+ // Unsubscribe when done
222
+ sub.unsubscribe();
223
+ ```
224
+
225
+ ### Close the Repository
226
+
227
+ ```typescript
228
+ await repo.close();
229
+ ```
230
+
231
+ ## 🗂️ Repository Structure
232
+
233
+ When initialized, a repository has the following on-disk layout:
234
+
235
+ ```
236
+ backup-repo/
237
+ ├── config.json # Repository config (chunking, compression, encryption, parity)
238
+ ├── packs/
239
+ │ ├── data/ # Binary pack files (.pack) and indexes (.idx)
240
+ │ └── parity/ # Reed-Solomon parity packs
241
+ ├── snapshots/ # JSON snapshot manifests
242
+ ├── index/ # Global chunk index (hash → pack location)
243
+ ├── keys/ # Encrypted key files (for passphrase-protected repos)
244
+ └── locks/ # Advisory locks for concurrent access
245
+ ```
246
+
247
+ ## 🔧 How It Works
248
+
249
+ 1. **Chunking** — Incoming data is split into variable-size chunks using FastCDC with a gear-based rolling hash. Chunk sizes range from 64 KB to 1 MB (avg 256 KB). Content-defined boundaries mean that insertions or edits only affect nearby chunks, maximizing dedup across versions.
250
+
251
+ 2. **Hashing** — Each chunk is hashed with SHA-256 for content addressing. If a chunk's hash already exists in the global index, it's deduplicated — only a reference is stored.
252
+
253
+ 3. **Compression** — New chunks are compressed with gzip (default) or zstd before storage. Per-chunk compression flags are stored in the index.
254
+
255
+ 4. **Encryption** — If a passphrase is set, a random 256-bit master key is generated, wrapped with an Argon2id-derived key, and stored in a key file. Every chunk is encrypted with AES-256-GCM using a unique nonce.
256
+
257
+ 5. **Packing** — Compressed (and optionally encrypted) chunks are appended into binary pack files (~8 MB target). Each pack has an associated `.idx` file with chunk offsets, sizes, and flags for O(1) lookup.
258
+
259
+ 6. **Parity** — After every group of 20 data packs, a Reed-Solomon RS(20,1) parity pack is generated. If any single pack in the group is lost or corrupted, it can be fully reconstructed.
260
+
261
+ 7. **Snapshots** — A JSON manifest records the chunk list, tags, sizes, and item metadata. Snapshots are immutable — pruning removes snapshots but never alters existing pack data in-place.
262
+
263
+ 8. **Restore** — The snapshot manifest is read, chunks are looked up in the global index, fetched from pack files, decompressed, decrypted if needed, and streamed back in order via a Unix socket.
264
+
265
+ ## 📋 API Reference
266
+
267
+ ### `ContainerArchive`
268
+
269
+ | Method | Description |
270
+ |---|---|
271
+ | `static init(path, options?)` | Create a new repository. Returns `Promise<ContainerArchive>` |
272
+ | `static open(path, options?)` | Open an existing repository. Returns `Promise<ContainerArchive>` |
273
+ | `ingest(stream, options?)` | Ingest a single data stream. Returns `Promise<ISnapshot>` |
274
+ | `ingestMulti(items, options?)` | Ingest multiple streams as one snapshot. Returns `Promise<ISnapshot>` |
275
+ | `restore(snapshotId, options?)` | Restore a snapshot. Returns `Promise<ReadableStream>` |
276
+ | `listSnapshots(filter?)` | List snapshots with optional tag/date filtering. Returns `Promise<ISnapshot[]>` |
277
+ | `getSnapshot(id)` | Get a specific snapshot. Returns `Promise<ISnapshot>` |
278
+ | `verify(options?)` | Verify repository integrity (quick/standard/full). Returns `Promise<IVerifyResult>` |
279
+ | `prune(retention, dryRun?)` | Remove old snapshots and garbage-collect packs. Returns `Promise<IPruneResult>` |
280
+ | `repair()` | Rebuild index, remove stale locks, attempt parity recovery. Returns `Promise<IRepairResult>` |
281
+ | `reindex()` | Rebuild the global index from pack `.idx` files. Returns `Promise<void>` |
282
+ | `unlock(options?)` | Remove advisory locks. Returns `Promise<void>` |
283
+ | `on(event, handler)` | Subscribe to events. Returns `Subscription` |
284
+ | `close()` | Close the repository and terminate the Rust process. Returns `Promise<void>` |
285
+
286
+ ### Key Interfaces
287
+
288
+ ```typescript
289
+ interface ISnapshot {
290
+ id: string;
291
+ version: number;
292
+ createdAt: string;
293
+ tags: Record<string, string>;
294
+ originalSize: number;
295
+ storedSize: number;
296
+ chunkCount: number;
297
+ newChunks: number;
298
+ reusedChunks: number;
299
+ items: ISnapshotItem[];
300
+ }
301
+
302
+ interface IRetentionPolicy {
303
+ keepLast?: number;
304
+ keepDays?: number;
305
+ keepWeeks?: number;
306
+ keepMonths?: number;
307
+ }
308
+
309
+ interface IVerifyResult {
310
+ ok: boolean;
311
+ errors: IVerifyError[];
312
+ stats: {
313
+ packsChecked: number;
314
+ chunksChecked: number;
315
+ snapshotsChecked: number;
316
+ };
317
+ }
318
+ ```
319
+
320
+ ## License and Legal Information
321
+
322
+ This repository contains open-source code licensed under the MIT License. A copy of the license can be found in the [LICENSE](./LICENSE) file.
323
+
324
+ **Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
325
+
326
+ ### Trademarks
327
+
328
+ This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH or third parties, and are not included within the scope of the MIT license granted herein.
329
+
330
+ Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines or the guidelines of the respective third-party owners, and any usage must be approved in writing. Third-party trademarks used herein are the property of their respective owners and used only in a descriptive manner, e.g. for an implementation of an API or similar.
331
+
332
+ ### Company Information
333
+
334
+ Task Venture Capital GmbH
335
+ Registered at District Court Bremen HRB 35230 HB, Germany
336
+
337
+ For any legal inquiries or further information, please contact us via email at hello@task.vc.
338
+
339
+ By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.
@@ -3,6 +3,6 @@
3
3
  */
4
4
  export const commitinfo = {
5
5
  name: '@push.rocks/containerarchive',
6
- version: '0.0.2',
6
+ version: '0.1.0',
7
7
  description: 'content-addressed incremental backup engine with deduplication, encryption, and error correction'
8
8
  }
package/ts/interfaces.ts CHANGED
@@ -126,6 +126,7 @@ export interface IRetentionPolicy {
126
126
  export interface IPruneResult {
127
127
  removedSnapshots: number;
128
128
  removedPacks: number;
129
+ rewrittenPacks: number;
129
130
  freedBytes: number;
130
131
  dryRun: boolean;
131
132
  }