@cosmicdrift/kumiko-framework 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CHANGELOG.md +52 -0
  2. package/package.json +4 -3
  3. package/src/auth/__tests__/roles.test.ts +24 -0
  4. package/src/auth/index.ts +7 -0
  5. package/src/auth/roles.ts +42 -0
  6. package/src/compliance/__tests__/duration-spec.test.ts +72 -0
  7. package/src/compliance/__tests__/profiles.test.ts +308 -0
  8. package/src/compliance/__tests__/sub-processors.test.ts +139 -0
  9. package/src/compliance/duration-spec.ts +44 -0
  10. package/src/compliance/index.ts +31 -0
  11. package/src/compliance/override-schema.ts +136 -0
  12. package/src/compliance/profiles.ts +427 -0
  13. package/src/compliance/sub-processors.ts +152 -0
  14. package/src/db/__tests__/big-int-field.test.ts +131 -0
  15. package/src/db/table-builder.ts +18 -1
  16. package/src/engine/__tests__/boot-validator-api-exposure.test.ts +142 -0
  17. package/src/engine/__tests__/boot-validator-pii-retention.test.ts +570 -0
  18. package/src/engine/__tests__/boot-validator-s0-integration.test.ts +160 -0
  19. package/src/engine/boot-validator.ts +276 -0
  20. package/src/engine/define-feature.ts +39 -0
  21. package/src/engine/extension-names.ts +105 -0
  22. package/src/engine/extensions/user-data.ts +106 -0
  23. package/src/engine/factories.ts +15 -5
  24. package/src/engine/feature-ast/extractors.ts +40 -0
  25. package/src/engine/feature-ast/parse.ts +6 -0
  26. package/src/engine/feature-ast/patterns.ts +22 -0
  27. package/src/engine/feature-ast/render.ts +14 -0
  28. package/src/engine/index.ts +21 -0
  29. package/src/engine/pattern-library/__tests__/library.test.ts +5 -0
  30. package/src/engine/pattern-library/library.ts +36 -0
  31. package/src/engine/schema-builder.ts +8 -0
  32. package/src/engine/types/feature.ts +51 -0
  33. package/src/engine/types/fields.ts +134 -10
  34. package/src/engine/types/index.ts +3 -0
  35. package/src/files/__tests__/read-stream.test.ts +105 -0
  36. package/src/files/__tests__/write-stream.test.ts +233 -0
  37. package/src/files/__tests__/zip-stream.test.ts +357 -0
  38. package/src/files/in-memory-provider.ts +38 -0
  39. package/src/files/index.ts +3 -0
  40. package/src/files/local-provider.ts +58 -1
  41. package/src/files/types.ts +34 -6
  42. package/src/files/zip-stream.ts +251 -0
@@ -0,0 +1,357 @@
1
+ // Streaming-ZIP-Builder Tests (S2.U3 Atom 3a).
2
+ //
3
+ // Drei-Schichten-Test:
4
+ // 1. Strukturelle Asserts: ZIP-Magic-Numbers an den richtigen Stellen,
5
+ // EOCD am Ende, central-dir-Count matcht emittierte Entries.
6
+ // 2. CRC32-Korrektheit gegen Reference-Implementation (crypto.subtle
7
+ // oder bekannte Test-Vektoren).
8
+ // 3. Real-Roundtrip via `unzip` shell-binary: ZIP in tmp-File schreiben,
9
+ // `unzip -l` + `unzip -p` aufrufen, Inhalt verifizieren. Pinst
10
+ // dass das ZIP von einem realen Decoder (Info-ZIP) gelesen werden
11
+ // kann — kein "passt nur in unserer eigenen reverse-engineerten
12
+ // Welt".
13
+
14
+ import { spawn } from "node:child_process";
15
+ import { mkdtemp, rm, writeFile } from "node:fs/promises";
16
+ import { tmpdir } from "node:os";
17
+ import { join } from "node:path";
18
+ import { afterEach, beforeEach, describe, expect, test } from "vitest";
19
+ import { getTemporal } from "../../time";
20
+ import { createZipStream, type ZipEntry } from "../zip-stream";
21
+
22
+ async function* fromString(s: string): AsyncIterable<Uint8Array> {
23
+ yield new TextEncoder().encode(s);
24
+ }
25
+
26
+ async function* fromBytes(b: Uint8Array): AsyncIterable<Uint8Array> {
27
+ yield b;
28
+ }
29
+
30
+ async function* fromEntries(entries: ZipEntry[]): AsyncIterable<ZipEntry> {
31
+ for (const e of entries) yield e;
32
+ }
33
+
34
+ async function collect(stream: AsyncIterable<Uint8Array>): Promise<Uint8Array> {
35
+ const chunks: Uint8Array[] = [];
36
+ let total = 0;
37
+ for await (const chunk of stream) {
38
+ chunks.push(chunk);
39
+ total += chunk.byteLength;
40
+ }
41
+ const out = new Uint8Array(total);
42
+ let off = 0;
43
+ for (const c of chunks) {
44
+ out.set(c, off);
45
+ off += c.byteLength;
46
+ }
47
+ return out;
48
+ }
49
+
50
+ function readU32LE(buf: Uint8Array, offset: number): number {
51
+ return new DataView(buf.buffer, buf.byteOffset + offset, 4).getUint32(0, true);
52
+ }
53
+
54
+ function readU16LE(buf: Uint8Array, offset: number): number {
55
+ return new DataView(buf.buffer, buf.byteOffset + offset, 2).getUint16(0, true);
56
+ }
57
+
58
+ describe("createZipStream :: structural asserts", () => {
59
+ test("leerer Stream → nur EOCD-Record (22 Bytes)", async () => {
60
+ const zip = await collect(createZipStream(fromEntries([])));
61
+ expect(zip.byteLength).toBe(22);
62
+ // EOCD-Magic 0x06054b50 ("PK\x05\x06")
63
+ expect(readU32LE(zip, 0)).toBe(0x06054b50);
64
+ // 0 entries
65
+ expect(readU16LE(zip, 8)).toBe(0);
66
+ expect(readU16LE(zip, 10)).toBe(0);
67
+ });
68
+
69
+ test("1 Entry → Local-File-Header + body + central-dir + EOCD", async () => {
70
+ const body = "hello";
71
+ const zip = await collect(
72
+ createZipStream(fromEntries([{ path: "greet.txt", data: fromString(body) }])),
73
+ );
74
+
75
+ // Local file header signature 0x04034b50 ("PK\x03\x04") am Anfang
76
+ expect(readU32LE(zip, 0)).toBe(0x04034b50);
77
+ // method = STORE = 0
78
+ expect(readU16LE(zip, 8)).toBe(0);
79
+ // filename length = 9 ("greet.txt")
80
+ expect(readU16LE(zip, 26)).toBe(9);
81
+ // body folgt nach 30+9=39 bytes
82
+ const bodyStart = 39;
83
+ const bodyBytes = zip.slice(bodyStart, bodyStart + 5);
84
+ expect(new TextDecoder().decode(bodyBytes)).toBe(body);
85
+
86
+ // EOCD am Ende
87
+ const eocdStart = zip.byteLength - 22;
88
+ expect(readU32LE(zip, eocdStart)).toBe(0x06054b50);
89
+ expect(readU16LE(zip, eocdStart + 8)).toBe(1); // 1 entry
90
+ });
91
+
92
+ test("3 Entries → 3 LFH + 3 central-dir-records + 1 EOCD", async () => {
93
+ const zip = await collect(
94
+ createZipStream(
95
+ fromEntries([
96
+ { path: "a.txt", data: fromString("aaa") },
97
+ { path: "b.txt", data: fromString("bbb") },
98
+ { path: "c.txt", data: fromString("ccc") },
99
+ ]),
100
+ ),
101
+ );
102
+
103
+ // EOCD reports 3 entries
104
+ const eocdStart = zip.byteLength - 22;
105
+ expect(readU32LE(zip, eocdStart)).toBe(0x06054b50);
106
+ expect(readU16LE(zip, eocdStart + 8)).toBe(3);
107
+ expect(readU16LE(zip, eocdStart + 10)).toBe(3);
108
+ });
109
+ });
110
+
111
+ describe("createZipStream :: CRC32 correctness", () => {
112
+ test("CRC32 von 'hello' matcht IEEE-802.3-Reference (0x3610a686)", async () => {
113
+ const zip = await collect(
114
+ createZipStream(fromEntries([{ path: "x.txt", data: fromString("hello") }])),
115
+ );
116
+ // CRC32 ist im LFH bei Offset 14
117
+ const crc = readU32LE(zip, 14);
118
+ // Reference: crc32("hello") = 0x3610a686 (verifiziert via
119
+ // python3 -c 'import zlib; print(hex(zlib.crc32(b"hello")))')
120
+ expect(crc).toBe(0x3610a686);
121
+ });
122
+
123
+ test("CRC32 von '123456789' matcht Industrie-Standard-Reference (0xCBF43926)", async () => {
124
+ // "123456789" → 0xCBF43926 ist DER IEEE-802.3 CRC32 Test-Vektor;
125
+ // RFC 1952 (gzip) und RFC 3309 nutzen ihn als Reference. Wenn unsere
126
+ // Implementation hier fehlt, ist die ganze CRC32-Algorithm broken.
127
+ const zip = await collect(
128
+ createZipStream(fromEntries([{ path: "x.txt", data: fromString("123456789") }])),
129
+ );
130
+ expect(readU32LE(zip, 14)).toBe(0xcbf43926);
131
+ });
132
+
133
+ test("CRC32 von leerem Body = 0", async () => {
134
+ const zip = await collect(
135
+ createZipStream(fromEntries([{ path: "empty.txt", data: fromBytes(new Uint8Array(0)) }])),
136
+ );
137
+ expect(readU32LE(zip, 14)).toBe(0);
138
+ });
139
+ });
140
+
141
+ describe("createZipStream :: UTF-8 filename support", () => {
142
+ test("General-Purpose-Flag Bit 11 (0x0800) ist gesetzt", async () => {
143
+ const zip = await collect(
144
+ createZipStream(fromEntries([{ path: "ascii.txt", data: fromString("x") }])),
145
+ );
146
+ // General Purpose Flags sind im LFH bei Offset 6
147
+ const flags = readU16LE(zip, 6);
148
+ expect(flags & 0x0800).toBe(0x0800);
149
+ });
150
+ });
151
+
152
+ describe("createZipStream :: format limits (ZIP64-Pre-Check)", () => {
153
+ // Entry >4 GB Pre-Check ist im Code aktiv (siehe createZipStream Source);
154
+ // ein echter 4-GB-Body ist im Test nicht allozierbar. Der 65535-Entry-Test
155
+ // unten deckt die parallele Constraint-Variante ab — Refactor-Schutz.
156
+
157
+ test("Archive >65535 Entries wirft mit klarer Begruendung", async () => {
158
+ // 65535 Entries sind langsam (jedes hat einen Header). 65536 reicht
159
+ // um den Branch zu triggern. Body kann leer sein — wir testen das
160
+ // entry-count-cap, nicht den body-cap.
161
+ async function* manyEntries(): AsyncIterable<ZipEntry> {
162
+ for (let i = 0; i < 65536; i++) {
163
+ yield { path: `e${i}.txt`, data: fromBytes(new Uint8Array(0)) };
164
+ }
165
+ }
166
+ await expect(collect(createZipStream(manyEntries()))).rejects.toThrow(
167
+ /exceeds 65535-entry limit/,
168
+ );
169
+ }, 30_000); // 30s timeout — 65536 entries iterieren
170
+ });
171
+
172
+ // **Plattform-Abhaengigkeit:** dieser describe braucht das `unzip`-
173
+ // shell-binary (Info-ZIP). macOS + Linux haben das standard-installiert,
174
+ // Windows-CI muesste skippen. Repo laeuft aktuell nicht auf Windows-CI,
175
+ // daher kein `test.skipIf` — wenn das je dazukommt, hier Conditional-
176
+ // Skip via `which unzip`-Check ergaenzen.
177
+ describe("createZipStream :: real-decoder roundtrip (unzip shell-binary)", () => {
178
+ let tmpDir: string;
179
+
180
+ beforeEach(async () => {
181
+ tmpDir = await mkdtemp(join(tmpdir(), "kumiko-zip-test-"));
182
+ });
183
+
184
+ afterEach(async () => {
185
+ await rm(tmpDir, { recursive: true, force: true });
186
+ });
187
+
188
+ async function spawnUnzip(args: string[]): Promise<{ stdout: string; code: number }> {
189
+ return new Promise((resolve, reject) => {
190
+ const proc = spawn("unzip", args);
191
+ let stdout = "";
192
+ let stderr = "";
193
+ proc.stdout.on("data", (d) => (stdout += d.toString()));
194
+ proc.stderr.on("data", (d) => (stderr += d.toString()));
195
+ proc.on("close", (code) => resolve({ stdout, code: code ?? -1 }));
196
+ proc.on("error", reject);
197
+ // Stderr-Logging fuer Debug bei test-failures
198
+ void stderr;
199
+ });
200
+ }
201
+
202
+ test("Info-ZIP unzip -l listet die Entries korrekt", async () => {
203
+ const zip = await collect(
204
+ createZipStream(
205
+ fromEntries([
206
+ { path: "profile.json", data: fromString('{"name":"Alice"}') },
207
+ { path: "notes/hello.txt", data: fromString("Hello world") },
208
+ ]),
209
+ ),
210
+ );
211
+ const zipPath = join(tmpDir, "test.zip");
212
+ await writeFile(zipPath, zip);
213
+
214
+ const { stdout, code } = await spawnUnzip(["-l", zipPath]);
215
+ expect(code).toBe(0);
216
+ expect(stdout).toContain("profile.json");
217
+ expect(stdout).toContain("notes/hello.txt");
218
+ });
219
+
220
+ test("Info-ZIP unzip -p extrahiert exakt den Original-Body", async () => {
221
+ const original = '{"key":"value","arr":[1,2,3]}';
222
+ const zip = await collect(
223
+ createZipStream(fromEntries([{ path: "data.json", data: fromString(original) }])),
224
+ );
225
+ const zipPath = join(tmpDir, "data.zip");
226
+ await writeFile(zipPath, zip);
227
+
228
+ const { stdout, code } = await spawnUnzip(["-p", zipPath, "data.json"]);
229
+ expect(code).toBe(0);
230
+ expect(stdout).toBe(original);
231
+ });
232
+
233
+ test("Info-ZIP entpackt binary-Daten (UTF-8 + non-ASCII bytes) byte-identisch", async () => {
234
+ // Mix aus ASCII, UTF-8 (Umlaute), und non-printable Bytes
235
+ const utf8 = new TextEncoder().encode("Strüße ümläute 🚀");
236
+ const binary = new Uint8Array([0x00, 0x01, 0xff, 0xfe, 0x80, 0x7f]);
237
+ const combined = new Uint8Array(utf8.byteLength + binary.byteLength);
238
+ combined.set(utf8, 0);
239
+ combined.set(binary, utf8.byteLength);
240
+
241
+ const zip = await collect(
242
+ createZipStream(fromEntries([{ path: "binary.bin", data: fromBytes(combined) }])),
243
+ );
244
+ const zipPath = join(tmpDir, "binary.zip");
245
+ await writeFile(zipPath, zip);
246
+
247
+ // Statt unzip -p (Stdout-Decode), entpacke in tmp und lies die
248
+ // bytes raw zurueck. Das stellt sicher dass keine Encoding-Layer
249
+ // die Roh-Bytes verfaelscht.
250
+ const extractDir = join(tmpDir, "out");
251
+ await spawnUnzip(["-d", extractDir, zipPath]);
252
+ const { readFile } = await import("node:fs/promises");
253
+ const extracted = await readFile(join(extractDir, "binary.bin"));
254
+ expect(Array.from(extracted)).toEqual(Array.from(combined));
255
+ });
256
+
257
+ test("UTF-8 filename mit Umlauten (Bügel.pdf) wird korrekt entpackt", async () => {
258
+ // Mit UTF-8-Flag (0x0800) im General-Purpose-Flag erwartet Info-ZIP
259
+ // den filename als UTF-8. Ohne Flag wuerde Info-ZIP CP437
260
+ // interpretieren, der Umlaut waere Mojibake. Pinst dass DACH-User
261
+ // mit Umlaut-Filenames sauber exportiert werden.
262
+ const zip = await collect(
263
+ createZipStream(fromEntries([{ path: "Bügel.pdf", data: fromString("umlaut-content") }])),
264
+ );
265
+ const zipPath = join(tmpDir, "umlaut.zip");
266
+ await writeFile(zipPath, zip);
267
+
268
+ const extractDir = join(tmpDir, "out");
269
+ await spawnUnzip(["-d", extractDir, zipPath]);
270
+
271
+ const { readFile, readdir } = await import("node:fs/promises");
272
+ const entries = await readdir(extractDir);
273
+ expect(entries).toContain("Bügel.pdf");
274
+ expect(await readFile(join(extractDir, "Bügel.pdf"), "utf8")).toBe("umlaut-content");
275
+ });
276
+
277
+ test("3-Entry-ZIP: alle Entries entpackbar + byte-identisch", async () => {
278
+ const entries: ZipEntry[] = [
279
+ { path: "a.json", data: fromString('{"a":1}') },
280
+ { path: "subdir/b.json", data: fromString('{"b":2}') },
281
+ { path: "subdir/nested/c.json", data: fromString('{"c":3}') },
282
+ ];
283
+ const zip = await collect(createZipStream(fromEntries(entries)));
284
+ const zipPath = join(tmpDir, "multi.zip");
285
+ await writeFile(zipPath, zip);
286
+
287
+ const extractDir = join(tmpDir, "out");
288
+ await spawnUnzip(["-d", extractDir, zipPath]);
289
+
290
+ const { readFile } = await import("node:fs/promises");
291
+ expect(await readFile(join(extractDir, "a.json"), "utf8")).toBe('{"a":1}');
292
+ expect(await readFile(join(extractDir, "subdir/b.json"), "utf8")).toBe('{"b":2}');
293
+ expect(await readFile(join(extractDir, "subdir/nested/c.json"), "utf8")).toBe('{"c":3}');
294
+ });
295
+ });
296
+
297
+ describe("createZipStream :: mtime in UTC (Audit-Drift-Schutz)", () => {
298
+ test("mtime wird als UTC encoded, nicht als lokale Zeitzone", async () => {
299
+ // 2026-05-09 14:30:00 UTC = 16:30:00 CEST. Wenn die Implementation
300
+ // auf lokal-Zeitzone (CEST-Server) liefe, kaeme als DOS-Time 16:30
301
+ // raus. Wir pinnen 14:30 — UTC.
302
+ const fixedUtc = getTemporal().Instant.fromEpochMilliseconds(Date.UTC(2026, 4, 9, 14, 30, 0)); // 2026-05-09 14:30:00 UTC
303
+ const zip = await collect(
304
+ createZipStream(fromEntries([{ path: "x.txt", data: fromString("x"), mtime: fixedUtc }])),
305
+ );
306
+ // DOS time im LFH bei Offset 10, DOS date bei Offset 12
307
+ const dosTime = readU16LE(zip, 10);
308
+ const dosDate = readU16LE(zip, 12);
309
+ // DOS time: bits 11-15=hour, 5-10=minute, 0-4=second/2
310
+ const hour = (dosTime >> 11) & 0x1f;
311
+ const minute = (dosTime >> 5) & 0x3f;
312
+ expect(hour).toBe(14);
313
+ expect(minute).toBe(30);
314
+ // DOS date: bits 9-15=year-1980, 5-8=month, 0-4=day
315
+ const year = ((dosDate >> 9) & 0x7f) + 1980;
316
+ const month = (dosDate >> 5) & 0x0f;
317
+ const day = dosDate & 0x1f;
318
+ expect(year).toBe(2026);
319
+ expect(month).toBe(5);
320
+ expect(day).toBe(9);
321
+ });
322
+ });
323
+
324
+ describe("createZipStream :: streaming property", () => {
325
+ test("Lazy entries (async generator mit setTimeout) werden korrekt verarbeitet", async () => {
326
+ async function* lazyEntries(): AsyncIterable<ZipEntry> {
327
+ await new Promise((r) => setTimeout(r, 5));
328
+ yield { path: "lazy1.txt", data: fromString("delayed1") };
329
+ await new Promise((r) => setTimeout(r, 5));
330
+ yield { path: "lazy2.txt", data: fromString("delayed2") };
331
+ }
332
+
333
+ const zip = await collect(createZipStream(lazyEntries()));
334
+ const eocdStart = zip.byteLength - 22;
335
+ expect(readU16LE(zip, eocdStart + 8)).toBe(2);
336
+ });
337
+
338
+ test("Lazy chunks innerhalb Entry: AsyncIterable<Uint8Array> mit setTimeout", async () => {
339
+ async function* lazyChunks(): AsyncIterable<Uint8Array> {
340
+ await new Promise((r) => setTimeout(r, 2));
341
+ yield new TextEncoder().encode("chunk1-");
342
+ await new Promise((r) => setTimeout(r, 2));
343
+ yield new TextEncoder().encode("chunk2");
344
+ }
345
+
346
+ const zip = await collect(
347
+ createZipStream(fromEntries([{ path: "chunked.txt", data: lazyChunks() }])),
348
+ );
349
+
350
+ // Body folgt nach LFH (30 + filename)
351
+ const filenameLen = readU16LE(zip, 26);
352
+ const bodyStart = 30 + filenameLen;
353
+ const totalSize = readU32LE(zip, 22); // uncompressed size
354
+ const bodyBytes = zip.slice(bodyStart, bodyStart + totalSize);
355
+ expect(new TextDecoder().decode(bodyBytes)).toBe("chunk1-chunk2");
356
+ });
357
+ });
@@ -29,12 +29,50 @@ export function createInMemoryFileProvider(): InMemoryFileProvider {
29
29
  store.set(key, { data: new Uint8Array(data), mimeType });
30
30
  },
31
31
 
32
+ async writeStream(key, source, options) {
33
+ // In-Memory hat kein "Streaming" im real-physikalischen Sinn —
34
+ // wir collecten chunks in einen Uint8Array. Test-Tauglich, kein
35
+ // Production-Pfad.
36
+ const chunks: Uint8Array[] = [];
37
+ let total = 0;
38
+ for await (const chunk of source) {
39
+ chunks.push(chunk);
40
+ total += chunk.byteLength;
41
+ }
42
+ const data = new Uint8Array(total);
43
+ let offset = 0;
44
+ for (const c of chunks) {
45
+ data.set(c, offset);
46
+ offset += c.byteLength;
47
+ }
48
+ store.set(key, { data, mimeType: options?.mimeType });
49
+ },
50
+
32
51
  async read(key) {
33
52
  const entry = store.get(key);
34
53
  if (!entry) throw new Error(`in-memory file not found: ${key}`);
35
54
  return new Uint8Array(entry.data);
36
55
  },
37
56
 
57
+ readStream(key) {
58
+ // In-Memory hat technisch keine Chunks, aber das Surface muss
59
+ // identisch zu echten Streamern (Local/S3) sein damit Test-Code
60
+ // den Pfad genauso geht. Wir yielden die Bytes als single-chunk.
61
+ // Map-lookup ist O(1), also kein Verlust durch eager-resolve.
62
+ // Throw passiert beim ersten chunk-pull — gleicher Lazy-Pattern
63
+ // wie S3 (request abgesetzt beim ersten Iterator-Step).
64
+ const entry = store.get(key);
65
+ const captured = entry ? new Uint8Array(entry.data) : null;
66
+ return {
67
+ async *[Symbol.asyncIterator]() {
68
+ if (captured === null) {
69
+ throw new Error(`in-memory file not found: ${key}`);
70
+ }
71
+ yield captured;
72
+ },
73
+ };
74
+ },
75
+
38
76
  async delete(key) {
39
77
  store.delete(key);
40
78
  },
@@ -25,5 +25,8 @@ export type {
25
25
  FileStorageProvider,
26
26
  FileValidationOptions,
27
27
  SignedUrlOptions,
28
+ WriteStreamOptions,
28
29
  } from "./types";
29
30
  export { buildStorageKey, parseMaxSize, validateFile } from "./types";
31
+ export type { ZipEntry } from "./zip-stream";
32
+ export { createZipStream } from "./zip-stream";
@@ -1,5 +1,7 @@
1
- import { mkdir, readFile, rm, stat, writeFile } from "node:fs/promises";
1
+ import { createReadStream, createWriteStream } from "node:fs";
2
+ import { mkdir, readFile, rename, rm, stat, unlink, writeFile } from "node:fs/promises";
2
3
  import { dirname, join } from "node:path";
4
+ import { pipeline } from "node:stream/promises";
3
5
  import type { FileStorageProvider } from "./types";
4
6
 
5
7
  // Local-filesystem backend — intended for dev + tests. Production deploys
@@ -13,11 +15,66 @@ export function createLocalProvider(basePath: string): FileStorageProvider {
13
15
  await writeFile(filePath, data);
14
16
  },
15
17
 
18
+ async writeStream(key, source, _options): Promise<void> {
19
+ // Atomar via tmp-File + rename: ein Reader der den finalen Pfad
20
+ // sieht, sieht entweder die alte Version (falls vorhanden) oder
21
+ // die vollstaendige neue. Niemals einen halb-fertigen Stream.
22
+ // Falls der Stream mid-write bricht, ist das tmp-Cleanup
23
+ // best-effort — je nach OS-Race im stream-destroy-Pfad kann
24
+ // das `.tmp`-File kurz liegen bleiben. Kein Korrektheits-
25
+ // Problem (kein Reader sucht `*.tmp`-Patterns), nur Operations-
26
+ // Hygiene; ein periodischer cron-cleanup auf alten `.tmp`-Files
27
+ // ist die saubere Loesung wenn das in Production realistisch
28
+ // greift.
29
+ const filePath = join(basePath, key);
30
+ const tmpPath = `${filePath}.${process.pid}.${Date.now()}.tmp`;
31
+ await mkdir(dirname(filePath), { recursive: true });
32
+ try {
33
+ await pipeline(source, createWriteStream(tmpPath));
34
+ await rename(tmpPath, filePath);
35
+ } catch (e) {
36
+ // Best-effort tmp-Cleanup; wenn das auch failt, hat der
37
+ // Filesystem ein Problem das nicht in unserem Scope liegt.
38
+ await unlink(tmpPath).catch(() => {});
39
+ throw e;
40
+ }
41
+ },
42
+
16
43
  async read(key: string): Promise<Uint8Array> {
17
44
  const filePath = join(basePath, key);
18
45
  return readFile(filePath);
19
46
  },
20
47
 
48
+ readStream(key: string): AsyncIterable<Uint8Array> {
49
+ // node:fs createReadStream ist ein AsyncIterable<Buffer>; Buffer
50
+ // extends Uint8Array. Wir bauen einen kleinen Adapter weil
51
+ // @types/node das asyncIterator als AsyncIterableIterator<any>
52
+ // typt — Adapter sichert die Surface auf Uint8Array.
53
+ // Default-highWaterMark = 64KB Chunks, was fuer ZIP-Stream-Konsum
54
+ // gut ist. Errors landen im for-await-Loop des Konsumenten
55
+ // (z.B. ENOENT bei Missing-File faellt erst beim ersten chunk-
56
+ // pull, nicht beim readStream-Aufruf — gleiches Lazy-Verhalten
57
+ // wie inmemory + S3).
58
+ const filePath = join(basePath, key);
59
+ const stream = createReadStream(filePath);
60
+ return {
61
+ async *[Symbol.asyncIterator]() {
62
+ for await (const chunk of stream) {
63
+ // Stream ohne encoding liefert Buffer. Buffer extends Uint8Array,
64
+ // aber @types/node typt asyncIterator als string|Buffer. View
65
+ // ohne copy auf dasselbe ArrayBuffer; runtime-check schliesst
66
+ // den string-Branch aus (Stream wurde nicht mit encoding= gesetzt).
67
+ if (typeof chunk === "string") {
68
+ throw new Error(
69
+ "local-provider readStream: unexpected string chunk (encoding leaked)",
70
+ );
71
+ }
72
+ yield new Uint8Array(chunk.buffer, chunk.byteOffset, chunk.byteLength);
73
+ }
74
+ },
75
+ };
76
+ },
77
+
21
78
  async delete(key: string): Promise<void> {
22
79
  const filePath = join(basePath, key);
23
80
  await rm(filePath, { force: true });
@@ -14,20 +14,48 @@ export type SignedUrlOptions = {
14
14
  readonly contentDisposition?: string;
15
15
  };
16
16
 
17
+ // Options fuer `writeStream`. `mimeType` ist Content-Type-Hint analog zu
18
+ // `write`. `contentLength` ist optional fuer Provider die einen Length-
19
+ // Header brauchen (S3 multipart hat einen TransferManager, kann auch ohne
20
+ // length); local-Provider ignoriert beides.
21
+ export type WriteStreamOptions = {
22
+ readonly mimeType?: string;
23
+ readonly contentLength?: number;
24
+ };
25
+
17
26
  // Primitive storage contract: key+bytes in, bytes out. Metadata (fileName,
18
27
  // mimeType, size) lives on the FileRef row — the provider only needs to
19
28
  // shuttle bytes. `mimeType` on write() is a hint for providers that need a
20
29
  // Content-Type header (S3/R2/…); local filesystems can ignore it.
21
30
  //
22
- // `getSignedUrl` is optional: object-store backends (S3/R2/GCS) implement it
23
- // so clients can download directly from the provider after the server has
24
- // checked access offloads bandwidth and enables browser-native caching.
25
- // Filesystem providers leave it undefined; the route then returns 501 and
26
- // the client falls back to streaming via GET /files/:id. Callers must
27
- // feature-detect via `typeof provider.getSignedUrl === "function"`.
31
+ // **Streaming (`writeStream` + `readStream`) ist PFLICHT** beide
32
+ // Methoden sind required, kein optional-feature. Begruendung:
33
+ // - User-Data-Export (Atom 3c) braucht beide, sonst silent fail bei
34
+ // erstem Job mit fileRefs in Production.
35
+ // - Apps die nur kleine Files (Avatar-Uploads, Profile-Pics) handeln,
36
+ // koennen trivial via `oneShot`-Pattern den Stream-Contract erfuellen
37
+ // (single-chunk yield von write/read-Bytes). 5 Zeilen pro Provider.
38
+ // - Optional-Type wuerde TypeScript-Lying erlauben: Type sagt "kann
39
+ // fehlen", Worker throws zur Runtime → App-Authors sehen den Bug
40
+ // erst in Production. Required + TS-enforced ist ehrlich.
41
+ //
42
+ // `getSignedUrl` BLEIBT optional: object-store backends (S3/R2/GCS)
43
+ // implement it so clients can download directly from the provider after
44
+ // the server has checked access — offloads bandwidth and enables browser-
45
+ // native caching. Filesystem providers leave it undefined; the route then
46
+ // returns 501 and the client falls back to streaming via GET /files/:id.
47
+ // Callers must feature-detect via `typeof provider.getSignedUrl === "function"`.
48
+ // Hier ist Optional korrekt weil die Fallback-Pfad existiert — kein
49
+ // silent-fail, sondern 501 + alternativer download.
28
50
  export type FileStorageProvider = {
29
51
  write(key: string, data: Uint8Array, mimeType?: string): Promise<void>;
52
+ writeStream(
53
+ key: string,
54
+ source: AsyncIterable<Uint8Array>,
55
+ options?: WriteStreamOptions,
56
+ ): Promise<void>;
30
57
  read(key: string): Promise<Uint8Array>;
58
+ readStream(key: string): AsyncIterable<Uint8Array>;
31
59
  delete(key: string): Promise<void>;
32
60
  exists(key: string): Promise<boolean>;
33
61
  getSignedUrl?(key: string, expiresInSeconds: number, options?: SignedUrlOptions): Promise<string>;