@bod.ee/db 0.12.1 → 0.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -133,9 +133,14 @@ export class VFSEngine {
133
133
 
134
134
  async write(virtualPath: string, data: Uint8Array, mime?: string): Promise<FileStat> {
135
135
  const vp = normalizePath(virtualPath);
136
- const existing = this.db.get(this.metaPath(vp)) as Record<string, unknown> | null;
136
+ const existing = this.db.get(this.metaPath(vp)) as FileStat | null;
137
137
  const fileId = this.options.pathAsFileId ? vp : ((existing?.fileId as string) || generatePushId());
138
138
 
139
+ const hash = await computeHash(data);
140
+
141
+ // Skip write entirely if content hasn't changed (prevents unnecessary replication events)
142
+ if (existing?.hash === hash) return existing;
143
+
139
144
  await this.backend.write(fileId, data);
140
145
 
141
146
  const name = vp.split('/').pop()!;
@@ -147,6 +152,7 @@ export class VFSEngine {
147
152
  mtime: Date.now(),
148
153
  fileId,
149
154
  isDir: false,
155
+ hash,
150
156
  };
151
157
  this.db.set(this.metaPath(vp), stat);
152
158
  return stat;
@@ -251,13 +257,18 @@ export class VFSEngine {
251
257
 
252
258
  const newName = dstPath.split('/').pop()!;
253
259
  const fileId = this.options.pathAsFileId ? dstPath : meta.fileId;
254
- const updated: FileStat = { ...meta, name: newName, path: dstPath, fileId, mtime: Date.now() };
260
+ const updated: FileStat = { ...meta, name: newName, path: dstPath, fileId, mtime: Date.now(), hash: meta.hash };
255
261
  this.db.set(this.metaPath(dstPath), updated);
256
262
  this.db.delete(this.containerPath(srcPath));
257
263
  return updated;
258
264
  }
259
265
  }
260
266
 
267
+ async function computeHash(data: Uint8Array): Promise<string> {
268
+ const digest = await crypto.subtle.digest('SHA-256', data);
269
+ return Array.from(new Uint8Array(digest)).map(b => b.toString(16).padStart(2, '0')).join('');
270
+ }
271
+
261
272
  function guessMime(name: string): string {
262
273
  const ext = name.split('.').pop()?.toLowerCase();
263
274
  const mimes: Record<string, string> = {
@@ -26,7 +26,7 @@ export type ClientMessage =
26
26
  | { id: string; op: 'vector-search'; query: number[]; path?: string; limit?: number; threshold?: number }
27
27
  | { id: string; op: 'vector-store'; path: string; embedding: number[] }
28
28
  | { id: string; op: 'stream-snapshot'; path: string }
29
- | { id: string; op: 'stream-materialize'; path: string; keepKey?: string }
29
+ | { id: string; op: 'stream-materialize'; path: string; keepKey?: string; batchSize?: number; cursor?: string }
30
30
  | { id: string; op: 'stream-compact'; path: string; maxAge?: number; maxCount?: number; keepKey?: string }
31
31
  | { id: string; op: 'stream-reset'; path: string }
32
32
  // VFS ops
@@ -101,6 +101,7 @@ export interface FileStat {
101
101
  mtime: number;
102
102
  fileId?: string;
103
103
  isDir: boolean;
104
+ hash?: string;
104
105
  }
105
106
 
106
107
  export type BatchOp =
@@ -0,0 +1,370 @@
1
+ import { describe, it, expect, afterEach } from 'bun:test';
2
+ import { BodDB } from '../src/server/BodDB.ts';
3
+ import { BodClient } from '../src/client/BodClient.ts';
4
+
5
+ const wait = (ms: number) => new Promise(r => setTimeout(r, ms));
6
+ let nextPort = 27400 + Math.floor(Math.random() * 1000);
7
+
8
+ /**
9
+ * Massive load tests — battle-test cursor-based bootstrap + threshold compact
10
+ * under realistic and extreme conditions.
11
+ */
12
+ describe('repl load test', () => {
13
+ const instances: BodDB[] = [];
14
+ const clients: BodClient[] = [];
15
+
16
+ afterEach(() => {
17
+ for (const c of clients) c.disconnect();
18
+ clients.length = 0;
19
+ for (const db of [...instances].reverse()) db.close();
20
+ instances.length = 0;
21
+ });
22
+
23
+ function port() { return nextPort++; }
24
+
25
+ function primary(opts?: { compact?: any; autoCompactThreshold?: number }) {
26
+ const p = port();
27
+ const db = new BodDB({
28
+ path: ':memory:',
29
+ sweepInterval: 0,
30
+ replication: { role: 'primary', compact: opts?.compact ?? {}, autoCompactThreshold: opts?.autoCompactThreshold ?? 0 },
31
+ });
32
+ db.replication!.start();
33
+ db.serve({ port: p });
34
+ instances.push(db);
35
+ return { db, port: p };
36
+ }
37
+
38
+ function replica(primaryPort: number, opts?: Partial<{ replicaId: string; fullBootstrap: boolean }>) {
39
+ const p = port();
40
+ const db = new BodDB({
41
+ path: ':memory:',
42
+ sweepInterval: 0,
43
+ replication: {
44
+ role: 'replica',
45
+ primaryUrl: `ws://localhost:${primaryPort}`,
46
+ replicaId: opts?.replicaId ?? `load-replica-${p}`,
47
+ fullBootstrap: opts?.fullBootstrap ?? true,
48
+ },
49
+ });
50
+ db.serve({ port: p });
51
+ instances.push(db);
52
+ return { db, port: p };
53
+ }
54
+
55
+ function connect(p: number, opts?: any) {
56
+ const c = new BodClient({ url: `ws://localhost:${p}`, ...opts });
57
+ clients.push(c);
58
+ return c;
59
+ }
60
+
61
+ // ─── 1. 20k entries: cursor pagination end-to-end ───
62
+
63
+ it('20k entries: cursor pagination collects every key', async () => {
64
+ const { db, port: p } = primary();
65
+ for (let i = 0; i < 20_000; i++) {
66
+ db.set(`items/i${i}`, { v: i, ts: Date.now() });
67
+ }
68
+
69
+ const client = connect(p);
70
+ await client.connect();
71
+
72
+ const keys = new Set<string>();
73
+ let cursor: string | undefined;
74
+ let pages = 0;
75
+ const t0 = Date.now();
76
+ do {
77
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 500, cursor });
78
+ for (const k of Object.keys(page.data)) keys.add(k);
79
+ cursor = page.nextCursor;
80
+ pages++;
81
+ } while (cursor);
82
+ const elapsed = Date.now() - t0;
83
+
84
+ console.log(` 20k entries: ${pages} pages, ${keys.size} unique keys, ${elapsed}ms`);
85
+ expect(keys.size).toBe(20_000);
86
+ expect(pages).toBeGreaterThanOrEqual(40); // 20k / 500
87
+ }, 30_000);
88
+
89
+ // ─── 2. Cursor vs monolithic: per-page response stays small ───
90
+
91
+ it('cursor pages stay under 1MB each while monolithic is huge', async () => {
92
+ const { db, port: p } = primary();
93
+ const payload = 'z'.repeat(500);
94
+ for (let i = 0; i < 5000; i++) {
95
+ db.set(`big/p${i}`, { data: payload, i });
96
+ }
97
+
98
+ const client = connect(p);
99
+ await client.connect();
100
+
101
+ // Monolithic
102
+ const mono = await client.streamMaterialize('_repl', { keepKey: 'path' });
103
+ const monoSize = JSON.stringify(mono).length;
104
+
105
+ // Cursor-based
106
+ let maxPageSize = 0;
107
+ let cursor: string | undefined;
108
+ do {
109
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 200, cursor });
110
+ const sz = JSON.stringify(page.data).length;
111
+ if (sz > maxPageSize) maxPageSize = sz;
112
+ cursor = page.nextCursor;
113
+ } while (cursor);
114
+
115
+ console.log(` monolithic: ${(monoSize / 1024 / 1024).toFixed(2)}MB, max page: ${(maxPageSize / 1024).toFixed(0)}KB`);
116
+ expect(monoSize).toBeGreaterThan(2 * 1024 * 1024); // >2MB total
117
+ expect(maxPageSize).toBeLessThan(1024 * 1024); // each page <1MB
118
+ }, 15_000);
119
+
120
+ // ─── 3. Auto-compact under sustained write load ───
121
+
122
+ it('auto-compact keeps _repl bounded under sustained 10k writes', () => {
123
+ const { db } = primary({ compact: { maxCount: 200, keepKey: 'path' }, autoCompactThreshold: 500 });
124
+
125
+ for (let i = 0; i < 10_000; i++) {
126
+ db.set(`stream/key${i % 300}`, { round: Math.floor(i / 300), i });
127
+ }
128
+
129
+ const repl = db.get('_repl') as Record<string, any>;
130
+ const count = repl ? Object.keys(repl).length : 0;
131
+ console.log(` 10k writes (300 unique paths), threshold=500, maxCount=200: ${count} _repl entries`);
132
+ // Without compact: 10k. With compact every 500 writes keeping 200: should be way under 1000.
133
+ expect(count).toBeLessThan(1000);
134
+ }, 15_000);
135
+
136
+ // ─── 4. Replica bootstrap with 10k entries via cursor ───
137
+
138
+ it('replica bootstraps 10k entries via cursor without timeout', async () => {
139
+ const { db: p, port: pp } = primary();
140
+ for (let i = 0; i < 10_000; i++) {
141
+ p.set(`data/node${i}`, { value: i, name: `node-${i}`, tags: ['a', 'b'] });
142
+ }
143
+
144
+ const { db: r } = replica(pp);
145
+ const t0 = Date.now();
146
+ await r.replication!.start();
147
+ const elapsed = Date.now() - t0;
148
+
149
+ console.log(` 10k entry replica bootstrap: ${elapsed}ms`);
150
+
151
+ await wait(500);
152
+ // Spot-check
153
+ for (const idx of [0, 999, 5000, 9999]) {
154
+ const val = r.get(`data/node${idx}`) as any;
155
+ expect(val?.value).toBe(idx);
156
+ }
157
+ }, 30_000);
158
+
159
+ // ─── 5. Multiple replicas bootstrap concurrently ───
160
+
161
+ it('3 replicas bootstrap concurrently from same primary (10k entries)', async () => {
162
+ const { db: p, port: pp } = primary();
163
+ for (let i = 0; i < 10_000; i++) {
164
+ p.set(`shared/item${i}`, { v: i });
165
+ }
166
+
167
+ const replicas = [replica(pp), replica(pp), replica(pp)];
168
+ const t0 = Date.now();
169
+ await Promise.all(replicas.map(r => r.db.replication!.start()));
170
+ const elapsed = Date.now() - t0;
171
+
172
+ console.log(` 3 concurrent replica bootstraps (10k): ${elapsed}ms`);
173
+
174
+ await wait(500);
175
+ for (const r of replicas) {
176
+ const v0 = r.db.get('shared/item0') as any;
177
+ const v9999 = r.db.get('shared/item9999') as any;
178
+ expect(v0?.v).toBe(0);
179
+ expect(v9999?.v).toBe(9999);
180
+ }
181
+ }, 45_000);
182
+
183
+ // ─── 6. Writes during bootstrap: replica catches up via stream sub ───
184
+
185
+ it('writes during bootstrap are caught via ongoing stream subscription', async () => {
186
+ const { db: p, port: pp } = primary();
187
+ // Pre-fill
188
+ for (let i = 0; i < 5000; i++) {
189
+ p.set(`pre/item${i}`, { v: i });
190
+ }
191
+
192
+ const { db: r } = replica(pp);
193
+ // Start replica (bootstrap starts)
194
+ const startPromise = r.replication!.start();
195
+
196
+ // Write more to primary while bootstrap is in progress
197
+ for (let i = 0; i < 500; i++) {
198
+ p.set(`live/item${i}`, { v: i + 100_000 });
199
+ }
200
+
201
+ await startPromise;
202
+ // Give stream sub time to deliver live writes
203
+ await wait(1000);
204
+
205
+ // Pre-fill data should be there
206
+ const pre0 = r.get('pre/item0') as any;
207
+ expect(pre0?.v).toBe(0);
208
+ const pre4999 = r.get('pre/item4999') as any;
209
+ expect(pre4999?.v).toBe(4999);
210
+
211
+ // Live writes should eventually arrive
212
+ const live499 = r.get('live/item499') as any;
213
+ expect(live499?.v).toBe(100_499);
214
+ }, 30_000);
215
+
216
+ // ─── 7. Heavy overwrite scenario: same 50 paths written 1000× each ───
217
+
218
+ it('50 paths × 1000 overwrites: compact deduplicates correctly', () => {
219
+ const { db } = primary({ compact: { maxCount: 100, keepKey: 'path' }, autoCompactThreshold: 1000 });
220
+
221
+ for (let round = 0; round < 1000; round++) {
222
+ for (let i = 0; i < 50; i++) {
223
+ db.set(`hot/key${i}`, { round, value: round * 50 + i });
224
+ }
225
+ }
226
+
227
+ const repl = db.get('_repl') as Record<string, any>;
228
+ const count = repl ? Object.keys(repl).length : 0;
229
+ console.log(` 50×1000 overwrites: ${count} _repl entries (expect ≤ ~1100)`);
230
+ // 50k total writes. Compact every 1k writes keeping 100. Should be bounded.
231
+ expect(count).toBeLessThan(1500);
232
+
233
+ // Verify latest values survived compaction
234
+ const materialized = db.stream.materialize('_repl', { keepKey: 'path' });
235
+ const paths = Object.keys(materialized);
236
+ expect(paths.length).toBe(50);
237
+ }, 30_000);
238
+
239
+ // ─── 8. Cursor pagination with snapshot (post-compact) ───
240
+
241
+ it('cursor pagination works correctly after compaction (snapshot + live events)', async () => {
242
+ const { db, port: p } = primary();
243
+ // Write 2000, compact to 200, write 800 more
244
+ for (let i = 0; i < 2000; i++) db.set(`a/item${i}`, { v: i });
245
+ db.stream.compact('_repl', { maxCount: 200, keepKey: 'path' });
246
+ for (let i = 2000; i < 2800; i++) db.set(`a/item${i}`, { v: i });
247
+
248
+ const client = connect(p);
249
+ await client.connect();
250
+
251
+ const keys = new Set<string>();
252
+ let cursor: string | undefined;
253
+ let pages = 0;
254
+ do {
255
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 100, cursor });
256
+ for (const k of Object.keys(page.data)) keys.add(k);
257
+ cursor = page.nextCursor;
258
+ pages++;
259
+ } while (cursor);
260
+
261
+ console.log(` 2000 + compact + 800 more: ${keys.size} keys in ${pages} pages`);
262
+ // All 2800 unique paths should be present (snapshot has older ones, events have newer)
263
+ expect(keys.size).toBe(2800);
264
+ }, 15_000);
265
+
266
+ // ─── 9. Replica with auto-compact primary: data integrity ───
267
+
268
+ it('replica gets correct data when primary auto-compacts during heavy writes', async () => {
269
+ const { db: p, port: pp } = primary({ compact: { maxCount: 300, keepKey: 'path' }, autoCompactThreshold: 500 });
270
+
271
+ // 5000 writes — auto-compact fires multiple times
272
+ for (let i = 0; i < 5000; i++) {
273
+ p.set(`verified/item${i}`, { value: i * 7, tag: 'check' });
274
+ }
275
+
276
+ const { db: r } = replica(pp);
277
+ await r.replication!.start();
278
+ await wait(500);
279
+
280
+ // Exhaustive integrity check on a sample
281
+ const sample = [0, 100, 999, 2500, 4000, 4999];
282
+ for (const idx of sample) {
283
+ const val = r.get(`verified/item${idx}`) as any;
284
+ expect(val?.value).toBe(idx * 7);
285
+ expect(val?.tag).toBe('check');
286
+ }
287
+ }, 30_000);
288
+
289
+ // ─── 10. Mixed deletes + sets under load ───
290
+
291
+ it('deletes replicate correctly through cursor-based bootstrap (no fullBootstrap)', async () => {
292
+ const { db: p, port: pp } = primary();
293
+
294
+ // Create 1000, then delete half
295
+ for (let i = 0; i < 1000; i++) {
296
+ p.set(`mix/item${i}`, { v: i });
297
+ }
298
+ for (let i = 0; i < 1000; i += 2) {
299
+ p.delete(`mix/item${i}`);
300
+ }
301
+
302
+ // Disable fullBootstrap so only _repl stream materialize is used
303
+ const { db: r } = replica(pp, { fullBootstrap: false });
304
+ await r.replication!.start();
305
+ await wait(500);
306
+
307
+ // Even indices: _repl has set then delete — materialize with keepKey=path keeps last op (delete)
308
+ // But materialize folds by keepKey, and delete events have op:'delete' — they apply as db.delete()
309
+ // Odd indices should exist from set events
310
+ for (const i of [1, 3, 99, 999]) {
311
+ const val = r.get(`mix/item${i}`) as any;
312
+ expect(val?.v).toBe(i);
313
+ }
314
+ // Verify primary has them deleted
315
+ for (const i of [0, 2, 100, 998]) {
316
+ expect(p.get(`mix/item${i}`)).toBeNull();
317
+ }
318
+ }, 15_000);
319
+
320
+ // ─── 11. Rapid batchSize=1 pagination (worst case) ───
321
+
322
+ it('batchSize=1 pagination still completes for 500 entries', async () => {
323
+ const { db, port: p } = primary();
324
+ for (let i = 0; i < 500; i++) db.set(`tiny/k${i}`, { i });
325
+
326
+ const client = connect(p);
327
+ await client.connect();
328
+
329
+ const keys = new Set<string>();
330
+ let cursor: string | undefined;
331
+ let pages = 0;
332
+ const t0 = Date.now();
333
+ do {
334
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 1, cursor });
335
+ for (const k of Object.keys(page.data)) keys.add(k);
336
+ cursor = page.nextCursor;
337
+ pages++;
338
+ } while (cursor);
339
+ const elapsed = Date.now() - t0;
340
+
341
+ console.log(` batchSize=1 over 500 entries: ${pages} pages, ${elapsed}ms`);
342
+ expect(keys.size).toBe(500);
343
+ expect(pages).toBeGreaterThanOrEqual(500);
344
+ }, 30_000);
345
+
346
+ // ─── 12. Throughput benchmark: writes/sec with auto-compact ───
347
+
348
+ it('write throughput with auto-compact enabled', () => {
349
+ const { db: dbCompact } = primary({ compact: { maxCount: 200, keepKey: 'path' }, autoCompactThreshold: 500 });
350
+ const { db: dbPlain } = primary();
351
+
352
+ const N = 10_000;
353
+
354
+ const t0 = Date.now();
355
+ for (let i = 0; i < N; i++) dbPlain.set(`bench/k${i}`, { i });
356
+ const plainMs = Date.now() - t0;
357
+
358
+ const t1 = Date.now();
359
+ for (let i = 0; i < N; i++) dbCompact.set(`bench/k${i}`, { i });
360
+ const compactMs = Date.now() - t1;
361
+
362
+ const plainWps = Math.round(N / (plainMs / 1000));
363
+ const compactWps = Math.round(N / (compactMs / 1000));
364
+ const overhead = ((compactMs - plainMs) / plainMs * 100).toFixed(1);
365
+
366
+ console.log(` ${N} writes — plain: ${plainMs}ms (${plainWps} w/s), compact: ${compactMs}ms (${compactWps} w/s), overhead: ${overhead}%`);
367
+ // Auto-compact overhead should be < 100% (compact is cheap relative to N writes)
368
+ expect(compactMs).toBeLessThan(plainMs * 3);
369
+ }, 30_000);
370
+ });