@bod.ee/db 0.12.2 → 0.12.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,7 +86,7 @@ Push paths are append-only logs. `StreamEngine` adds consumer group offsets (`_s
86
86
  `MQEngine` owns all MQ SQL via `storage.db.prepare()` — same pattern as StreamEngine. Columns: `mq_status` (pending/inflight), `mq_inflight_until` (Unix ms), `mq_delivery_count`. `fetch()` uses SQLite transaction with TOCTOU guard (`changes > 0`). Ack = DELETE. Sweep reclaims expired inflight; exhausted messages move to DLQ at `<queue>/_dlq/<key>`. Per-queue options via longest prefix match on `queues` config.
87
87
 
88
88
  ### Replication
89
- `ReplicationEngine` — primary/replica + multi-source feed subscriptions via `_repl` stream. Primary: `onWrite` hooks emit events to `_repl` stream (updates flattened to per-path sets). Replica: bootstraps via `streamMaterialize`, subscribes for ongoing events, proxies writes to primary. Guards: `_replaying` prevents re-emission, `_emitting` prevents recursion from `db.push('_repl')`. Sweep deletes are replicated. Transport checks `isReplica` and forwards write ops.
89
+ `ReplicationEngine` — primary/replica + multi-source feed subscriptions via `_repl` stream. Primary: `onWrite` hooks emit events to `_repl` stream (updates flattened to per-path sets). Auto-compact on write threshold (`autoCompactThreshold`, default 500) + on startup keeps `_repl` bounded. Replica: bootstraps via cursor-based `streamMaterialize` pagination (`batchSize: 200`), subscribes for ongoing events, proxies writes to primary. `bootstrapFromStream()` helper handles all 3 bootstrap sites (replica, router-based, sources). Guards: `_replaying` prevents re-emission, `_emitting` prevents recursion from `db.push('_repl')`. Sweep deletes are replicated. Transport checks `isReplica` and forwards write ops.
90
90
 
91
91
  **Sources** (`ReplicationSource[]`): independent of role. Each source creates a `BodClient`, bootstraps filtered `_repl` snapshot, subscribes for ongoing events. `matchesSourcePaths()` filters by path prefix. `remapPath()` prepends `localPrefix`. Events applied with `_replaying=true`. Sources connect via `Promise.allSettled` — individual failures logged, others continue. Deterministic `groupId` default: `source_${url}_${paths.join('+')}`.
92
92
 
@@ -301,6 +301,11 @@ ws.send(JSON.stringify({ id: '20', op: 'batch-sub', subscriptions: [
301
301
  // Stream extended ops
302
302
  ws.send(JSON.stringify({ id: '21', op: 'stream-snapshot', path: 'events/orders' }));
303
303
  ws.send(JSON.stringify({ id: '21', op: 'stream-materialize', path: 'events/orders', keepKey: 'orderId' }));
304
+ // Cursor-based pagination (for large streams):
305
+ ws.send(JSON.stringify({ id: '21b', op: 'stream-materialize', path: 'events/orders', keepKey: 'orderId', batchSize: 200 }));
306
+ // → { id: '21b', ok: true, data: { data: {...}, nextCursor: 'abc123' } }
307
+ // Follow-up page:
308
+ ws.send(JSON.stringify({ id: '21c', op: 'stream-materialize', path: 'events/orders', keepKey: 'orderId', batchSize: 200, cursor: 'abc123' }));
304
309
  ws.send(JSON.stringify({ id: '22', op: 'stream-compact', path: 'events/orders', maxAge: 86400 }));
305
310
  ws.send(JSON.stringify({ id: '23', op: 'stream-reset', path: 'events/orders' }));
306
311
  ```
@@ -450,6 +455,13 @@ const similar = await client.vectorSearch({ query: [0.1, 0.2, 0.3], path: 'docs'
450
455
  // Stream snapshot, materialize, compact, reset
451
456
  const snap = await client.streamSnapshot('events/orders');
452
457
  const view = await client.streamMaterialize('events/orders', { keepKey: 'orderId' });
458
+ // Cursor-based materialize for large streams (avoids huge single response):
459
+ let cursor: string | undefined;
460
+ do {
461
+ const page = await client.streamMaterialize('events/orders', { keepKey: 'orderId', batchSize: 200, cursor });
462
+ // page.data contains this batch, page.nextCursor is undefined when done
463
+ cursor = page.nextCursor;
464
+ } while (cursor);
453
465
  await client.streamCompact('events/orders', { maxAge: 86400 });
454
466
  await client.streamReset('events/orders');
455
467
  ```
package/CLAUDE.md CHANGED
@@ -71,7 +71,7 @@ config.ts — demo instance config (open rules, indexes, fts, v
71
71
  - **BodClientCached**: two-tier cache wrapper around BodClient. Memory (Map, LRU eviction) + IndexedDB persistence. Stale-while-revalidate: subscribed paths always fresh, unsubscribed return stale + background refetch. Writes (`set/update/delete`) invalidate path + ancestors. `init()` opens IDB + sweeps expired. `warmup(paths[])` bulk-loads from IDB. Passthrough for `push/batch/query/search/mq/stream/vfs` via `cachedClient.client`.
72
72
  - **MCP**: `MCPAdapter` wraps a `BodClient` as a JSON-RPC MCP server (stdio + HTTP). Connects to a running BodDB instance over WebSocket — no embedded DB. Entry point: `mcp.ts`. Tools: CRUD (6), FTS (2), vectors (2), streams (4), MQ (7) = 21 tools. Use `--stdio` for Claude Code/Desktop, `--http` for remote agents.
73
73
  - **VFS (Virtual File System)**: `VFSEngine` — files stored outside SQLite via pluggable `VFSBackend` interface. `LocalBackend` stores at `<storageRoot>/<fileId>` using `Bun.file`/`Bun.write`. Metadata at `_vfs/<virtualPath>/` (size, mime, mtime, fileId, isDir) — gets subs/rules/replication for free. `fileId = pushId` so move/rename is metadata-only. REST: `POST/GET/DELETE /files/<path>`, `?stat=1`, `?list=1`, `?mkdir=1`, `PUT ?move=<dst>`. WS chunked fallback: base64-encoded `vfs-upload-init/chunk/done`, `vfs-download-init` → `vfs-download-chunk` push messages. Client: `VFSClient` via `client.vfs()` — `upload/download` (REST) + `uploadWS/downloadWS` (WS) + `stat/list/mkdir/delete/move`.
74
- - **Replication**: `ReplicationEngine` — single primary + N read replicas + multi-source feed subscriptions. Star topology. Primary emits write events to `_repl` stream via `onWrite` hooks. Replicas bootstrap via `streamMaterialize('_repl', { keepKey: 'path' })`, then subscribe for ongoing events. Write proxy: replica forwards writes to primary via BodClient, primary applies + emits, replica consumes. `_replaying` flag prevents re-emission loops. `_emitting` guard prevents recursion from `db.push('_repl')`. Updates flattened to per-path set events for correct compaction keying. Sweep delete events replicated. Excluded prefixes: `_repl`, `_streams`, `_mq`, `_auth`. **Sources**: `ReplicationSource[]` — subscribe to specific paths from multiple remote DBs. Each source is an independent BodClient that filters `_repl` events by path prefix, with optional `localPrefix` remapping (e.g. remote `users/u1` → local `db-a/users/u1`). Sources connect in parallel; individual failures don't block others. Sources are independent of role — a DB can be primary AND consume sources. **Per-path topology**: `PathTopologyRouter` — when `paths` config is set, each path prefix gets an independent mode: `primary` (local authoritative, emits), `replica` (remote authoritative, proxies writes), `sync` (bidirectional, both emit+apply), `readonly` (pull-only, rejects writes), `writeonly` (push-only, ignores remote). Longest-prefix match resolves mode. `writeProxy: 'proxy'|'reject'` overrides replica write behavior. Bootstrap skips sync paths (ongoing stream only). Auth/rules checked before proxy in all handlers. `shouldProxyPath(path)`/`shouldRejectPath(path)` replace `isReplica` checks. `emitsToRepl`/`pullsFromPrimary` getters for compact/bootstrap decisions. Stable `replicaId` from config hash. Falls back to `role` when `paths` absent (backward compat).
74
+ - **Replication**: `ReplicationEngine` — single primary + N read replicas + multi-source feed subscriptions. Star topology. Primary emits write events to `_repl` stream via `onWrite` hooks. Replicas bootstrap via cursor-based `streamMaterialize('_repl', { keepKey: 'path', batchSize: 200 })` pagination (avoids huge single WS frame), then subscribe for ongoing events. Auto-compact on write threshold (`autoCompactThreshold`, default 500) + on startup keeps `_repl` bounded. Write proxy: replica forwards writes to primary via BodClient, primary applies + emits, replica consumes. `_replaying` flag prevents re-emission loops. `_emitting` guard prevents recursion from `db.push('_repl')`. Updates flattened to per-path set events for correct compaction keying. Sweep delete events replicated. Excluded prefixes: `_repl`, `_streams`, `_mq`, `_auth`. **Sources**: `ReplicationSource[]` — subscribe to specific paths from multiple remote DBs. Each source is an independent BodClient that filters `_repl` events by path prefix, with optional `localPrefix` remapping (e.g. remote `users/u1` → local `db-a/users/u1`). Sources connect in parallel; individual failures don't block others. Sources are independent of role — a DB can be primary AND consume sources. **Per-path topology**: `PathTopologyRouter` — when `paths` config is set, each path prefix gets an independent mode: `primary` (local authoritative, emits), `replica` (remote authoritative, proxies writes), `sync` (bidirectional, both emit+apply), `readonly` (pull-only, rejects writes), `writeonly` (push-only, ignores remote). Longest-prefix match resolves mode. `writeProxy: 'proxy'|'reject'` overrides replica write behavior. Bootstrap skips sync paths (ongoing stream only). Auth/rules checked before proxy in all handlers. `shouldProxyPath(path)`/`shouldRejectPath(path)` replace `isReplica` checks. `emitsToRepl`/`pullsFromPrimary` getters for compact/bootstrap decisions. Stable `replicaId` from config hash. Falls back to `role` when `paths` absent (backward compat).
75
75
  - **KeyAuth integration guide**: `docs/keyauth-integration.md` — flows for signup, signin, new device, autoAuth, IAM roles, common mistakes.
76
76
  - **Para-chat integration guide**: `docs/para-chat-integration.md` — how para-chat uses BodDB: per-path topology, VFS, KeyAuth, caching, file sync.
77
77
  - **KeyAuth**: `KeyAuthEngine` — portable Ed25519 identity & IAM. Identity hierarchy: Root (server-level, key on filesystem), Account (portable, password-encrypted private key in DB or device-generated), Device (delegate, linked via password unlock). Challenge-response auth: server sends nonce → client signs with Ed25519 → server verifies + creates session. Self-signed tokens (no JWT lib): `base64url(payload).base64url(Ed25519_sign)`. Data model at `_auth/` prefix (protected from external writes). Device reverse-index at `_auth/deviceIndex/{dfp}` for O(1) lookup. Password change is atomic (single `db.update()`). IAM: roles with path-based permissions, account role assignment. `_auth/` excluded from replication. Transport guards: `auth-link-device` and `auth-change-password` require authenticated session; non-root users can only change own password. **Device registration**: `registerDevice(publicKey)` — client-generated keypair, no password, idempotent; `allowOpenRegistration: false` requires authenticated session. **Browser crypto**: `keyAuth.browser.ts` uses `@noble/ed25519` with DER↔raw key bridge for server compatibility. **BodClient autoAuth**: `autoAuth: true` auto-generates keypair (localStorage), registers, authenticates — zero-config device identity. `client.auth.*` convenience methods for all auth ops. **IAM transport ops**: `auth-create-role`, `auth-delete-role`, `auth-update-roles` (root only), `auth-list-accounts`, `auth-list-roles`. Device accounts (no encrypted key) safely reject `linkDevice`/`changePassword`.
package/admin/ui.html CHANGED
@@ -8,7 +8,7 @@
8
8
  body { font-family: monospace; font-size: 13px; background: #0d0d0d; color: #d4d4d4; display: flex; flex-direction: column; height: 100vh; overflow: hidden; }
9
9
 
10
10
  /* Metrics bar */
11
- #metrics-bar { display: flex; background: #0a0a0a; border-bottom: 1px solid #2a2a2a; flex-shrink: 0; overflow-x: auto; align-items: stretch; }
11
+ #metrics-bar { display: flex; background: #0a0a0a; border-bottom: 1px solid #2a2a2a; flex-shrink: 0; align-items: stretch; width: 100%; }
12
12
  .metric-card { display: flex; flex-direction: column; padding: 5px 10px 4px; border-right: 1px solid #181818; min-width: 140px; flex-shrink: 0; gap: 1px; overflow: hidden; }
13
13
  .metric-card:last-child { border-right: none; width: auto; }
14
14
  .metric-right { margin-left: auto; }
@@ -127,15 +127,15 @@
127
127
  <div class="metric-top"><span class="metric-label">Ping</span><span class="metric-value" id="s-ping">—</span></div>
128
128
  <canvas class="metric-canvas" id="g-ping" width="100" height="28"></canvas>
129
129
  </div>
130
- <div class="metric-card metric-right" id="repl-card" style="border-left:1px solid #282828;display:none;width:180px">
130
+ <div class="metric-card" id="repl-card" style="border-left:1px solid #282828;display:none;width:180px">
131
131
  <div class="metric-top"><span class="metric-label">Replication</span><span class="metric-value dim" id="s-repl-role">—</span></div>
132
132
  <div style="margin-top:4px;font-size:10px" id="s-repl-sources"></div>
133
133
  </div>
134
- <div class="metric-card" style="border-left:1px solid #282828">
134
+ <div class="metric-card metric-right" style="border-left:1px solid #282828;justify-content:space-between">
135
135
  <div class="metric-top"><span class="metric-label">Uptime</span><span class="metric-value dim" id="s-uptime">—</span></div>
136
- <div style="margin-top:4px;font-size:10px;color:#555" id="s-ts">—</div>
137
- <div style="margin-top:4px"><span class="metric-label">WS<span id="ws-dot"></span></span> <span style="font-size:10px;color:#555"><span id="s-clients">0</span> clients · <span id="s-subs">0</span> subs</span></div>
138
- <div style="margin-top:6px"><button id="stats-toggle" class="sm" onclick="toggleStats()" title="Toggle server stats collection">Stats: ON</button></div>
136
+ <div style="font-size:10px;color:#555;display:flex;justify-content:space-between"><span id="s-ts">—</span><span>v<span id="s-version">—</span></span></div>
137
+ <div><span class="metric-label">WS<span id="ws-dot"></span></span> <span style="font-size:10px;color:#555"><span id="s-clients">0</span> clients · <span id="s-subs">0</span> subs</span></div>
138
+ <div><button id="stats-toggle" class="sm" onclick="toggleStats()" title="Toggle server stats collection">Stats: ON</button></div>
139
139
  </div>
140
140
  </div>
141
141
 
@@ -1257,6 +1257,7 @@ db.on('_admin/stats', (snap) => {
1257
1257
  document.getElementById('s-subs').textContent = s.subs ?? 0;
1258
1258
  document.getElementById('s-uptime').textContent = fmtUptime(s.process.uptimeSec);
1259
1259
  document.getElementById('s-ts').textContent = new Date(s.ts).toLocaleTimeString();
1260
+ if (s.version) document.getElementById('s-version').textContent = s.version;
1260
1261
 
1261
1262
  // Replication stats
1262
1263
  if (s.repl) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bod.ee/db",
3
- "version": "0.12.2",
3
+ "version": "0.12.4",
4
4
  "module": "index.ts",
5
5
  "type": "module",
6
6
  "exports": {
@@ -456,8 +456,10 @@ export class BodClient {
456
456
  return this.send('stream-snapshot', { path });
457
457
  }
458
458
 
459
- async streamMaterialize(path: string, opts?: { keepKey?: string }): Promise<Record<string, unknown>> {
460
- return this.send('stream-materialize', { path, keepKey: opts?.keepKey }) as Promise<Record<string, unknown>>;
459
+ async streamMaterialize(path: string, opts?: { keepKey?: string }): Promise<Record<string, unknown>>;
460
+ async streamMaterialize(path: string, opts: { keepKey?: string; batchSize: number; cursor?: string }): Promise<{ data: Record<string, unknown>; nextCursor?: string }>;
461
+ async streamMaterialize(path: string, opts?: { keepKey?: string; batchSize?: number; cursor?: string }): Promise<Record<string, unknown> | { data: Record<string, unknown>; nextCursor?: string }> {
462
+ return this.send('stream-materialize', { path, ...opts }) as any;
461
463
  }
462
464
 
463
465
  async streamCompact(path: string, opts?: { maxAge?: number; maxCount?: number; keepKey?: string }): Promise<unknown> {
@@ -12,6 +12,8 @@ import { VFSEngine, type VFSEngineOptions } from './VFSEngine.ts';
12
12
  import { KeyAuthEngine, type KeyAuthEngineOptions } from './KeyAuthEngine.ts';
13
13
  import { validatePath } from '../shared/pathUtils.ts';
14
14
  import { Logger, type LogConfig } from '../shared/logger.ts';
15
+ import pkg from '../../package.json' with { type: 'json' };
16
+ const PKG_VERSION: string = pkg.version ?? 'unknown';
15
17
 
16
18
  export interface TransactionProxy {
17
19
  get(path: string): unknown;
@@ -79,7 +81,8 @@ export class BodDB {
79
81
  this.options = { ...new BodDBOptions(), ...options };
80
82
  this.log = new Logger(this.options.log);
81
83
  const _log = this.log.forComponent('db');
82
- _log.info(`Initializing BodDB (path: ${this.options.path})`);
84
+ console.log(`[BodDB] v${PKG_VERSION} (path: ${this.options.path})`);
85
+ _log.info(`Initializing BodDB v${PKG_VERSION} (path: ${this.options.path})`);
83
86
  this.storage = new StorageEngine({ path: this.options.path });
84
87
  this.subs = new SubscriptionEngine();
85
88
  this.stream = new StreamEngine(this.storage, this.subs, { compact: this.options.compact });
@@ -409,6 +412,7 @@ export class BodDB {
409
412
 
410
413
  // Reuse a single stats object to minimize allocations
411
414
  const statsData: Record<string, unknown> = {
415
+ version: PKG_VERSION,
412
416
  process: {}, db: {}, system: {},
413
417
  subs: 0, clients: 0, repl: null, ts: 0,
414
418
  };
@@ -128,11 +128,15 @@ export class ReplicationOptions {
128
128
  /** Bootstrap replica from primary's full state before applying _repl stream */
129
129
  fullBootstrap: boolean = true;
130
130
  compact?: CompactOptions;
131
+ /** Auto-compact _repl after this many emitted writes (0 = disabled, default 500) */
132
+ autoCompactThreshold: number = 500;
131
133
  sources?: ReplicationSource[];
132
134
  /** Per-path topology: strings default to 'sync', objects specify mode. When absent, role governs all paths. */
133
135
  paths?: Array<string | PathTopology>;
134
136
  }
135
137
 
138
+ const BOOTSTRAP_BATCH_SIZE = 200;
139
+
136
140
  type ProxyableMessage = Extract<ClientMessage, { op: 'set' | 'delete' | 'update' | 'push' | 'batch' }>;
137
141
 
138
142
  export class ReplicationEngine {
@@ -145,6 +149,7 @@ export class ReplicationEngine {
145
149
  private _started = false;
146
150
  private _seq = 0;
147
151
  private _emitting = false;
152
+ private _emitCount = 0;
148
153
  private _pendingReplEvents: WriteEvent[] | null = null;
149
154
  private log: ComponentLogger;
150
155
 
@@ -251,7 +256,7 @@ export class ReplicationEngine {
251
256
  /** Stop replication */
252
257
  stop(): void {
253
258
  this._started = false;
254
- if (this._compactTimer) { clearInterval(this._compactTimer); this._compactTimer = null; }
259
+ this._emitCount = 0;
255
260
  this.unsubWrite?.();
256
261
  this.unsubWrite = null;
257
262
  this.unsubStream?.();
@@ -289,22 +294,15 @@ export class ReplicationEngine {
289
294
 
290
295
  // --- Primary mode ---
291
296
 
292
- private _compactTimer: ReturnType<typeof setInterval> | null = null;
293
-
294
297
  private startPrimary(): void {
295
298
  this.unsubWrite = this.db.onWrite((ev: WriteEvent) => {
296
299
  this.emit(ev);
297
300
  });
298
301
 
299
- // Auto-compact _repl stream to prevent unbounded growth
302
+ // Compact on startup
300
303
  const compact = this.options.compact ?? { maxCount: 500, keepKey: 'path' };
301
304
  if (compact.maxCount || compact.maxAge) {
302
- // Compact on startup
303
305
  try { this.db.stream.compact('_repl', compact); } catch {}
304
- // Then periodically (every 5 minutes)
305
- this._compactTimer = setInterval(() => {
306
- try { this.db.stream.compact('_repl', compact); } catch {}
307
- }, 5 * 60_000);
308
306
  }
309
307
  }
310
308
 
@@ -351,9 +349,19 @@ export class ReplicationEngine {
351
349
  const seq = this._seq++;
352
350
  const idempotencyKey = `${replEvent.ts}:${seq}:${ev.path}`;
353
351
  this.db.push('_repl', replEvent, { idempotencyKey });
352
+
354
353
  } finally {
355
354
  this._emitting = false;
356
355
  }
356
+
357
+ // Auto-compact on write threshold (outside _emitting guard so notifications flow normally)
358
+ this._emitCount++;
359
+ const threshold = this.options.autoCompactThreshold;
360
+ if (threshold > 0 && this._emitCount >= threshold) {
361
+ this._emitCount = 0;
362
+ const compact = this.options.compact ?? { maxCount: 500, keepKey: 'path' };
363
+ try { this.db.stream.compact('_repl', compact); } catch {}
364
+ }
357
365
  }
358
366
 
359
367
  /** Start buffering replication events (call before transaction) */
@@ -394,21 +402,9 @@ export class ReplicationEngine {
394
402
  await this.bootstrapFullState(bootstrapPaths);
395
403
  }
396
404
 
397
- // Stream bootstrap filtered
398
- const snapshot = await this.client!.streamMaterialize('_repl', { keepKey: 'path' });
399
- if (snapshot) {
400
- this.db.setReplaying(true);
401
- try {
402
- for (const [, event] of Object.entries(snapshot)) {
403
- const ev = event as ReplEvent;
404
- if (this.matchesPathPrefixes(ev.path, pathPrefixes)) {
405
- this.applyEvent(ev);
406
- }
407
- }
408
- } finally {
409
- this.db.setReplaying(false);
410
- }
411
- }
405
+ // Stream bootstrap filtered (cursor-based to avoid huge single response)
406
+ const applied = await this.bootstrapFromStream(this.client!, { filter: ev => this.matchesPathPrefixes(ev.path, pathPrefixes) });
407
+ this.log.info(`Stream bootstrap (paths): ${applied} events applied`);
412
408
 
413
409
  // Subscribe to ongoing events, filter by paths
414
410
  const groupId = this.options.replicaId!;
@@ -449,20 +445,9 @@ export class ReplicationEngine {
449
445
  await this.bootstrapFullState();
450
446
  }
451
447
 
452
- // Stream bootstrap: apply _repl events on top (catches recent writes, deduped by idempotent set)
453
- const snapshot = await this.client!.streamMaterialize('_repl', { keepKey: 'path' });
454
- this.log.info(`Stream bootstrap: ${snapshot ? Object.keys(snapshot).length + ' events' : 'no events'}`);
455
- if (snapshot) {
456
- this.db.setReplaying(true);
457
- try {
458
- for (const [, event] of Object.entries(snapshot)) {
459
- const ev = event as ReplEvent;
460
- this.applyEvent(ev);
461
- }
462
- } finally {
463
- this.db.setReplaying(false);
464
- }
465
- }
448
+ // Stream bootstrap: cursor-based to avoid huge single response
449
+ const applied = await this.bootstrapFromStream(this.client!);
450
+ this.log.info(`Stream bootstrap: ${applied} events applied`);
466
451
 
467
452
  // Subscribe to ongoing events
468
453
  const groupId = this.options.replicaId!;
@@ -526,21 +511,11 @@ export class ReplicationEngine {
526
511
  const client = new BodClient({ url: source.url, auth: source.auth });
527
512
  await client.connect();
528
513
 
529
- // Bootstrap: materialize _repl, filter by source paths
530
- const snapshot = await client.streamMaterialize('_repl', { keepKey: 'path' });
531
- if (snapshot) {
532
- this.db.setReplaying(true);
533
- try {
534
- for (const [, event] of Object.entries(snapshot)) {
535
- const ev = event as ReplEvent;
536
- if (this.matchesSourcePaths(ev.path, source)) {
537
- this.applyEvent(ev, source);
538
- }
539
- }
540
- } finally {
541
- this.db.setReplaying(false);
542
- }
543
- }
514
+ // Bootstrap: cursor-based materialize _repl, filter by source paths
515
+ await this.bootstrapFromStream(client, {
516
+ filter: ev => this.matchesSourcePaths(ev.path, source),
517
+ source,
518
+ });
544
519
 
545
520
  // Subscribe to ongoing events
546
521
  const groupId = source.id || `source_${source.url}_${source.paths.sort().join('+')}`;
@@ -577,6 +552,33 @@ export class ReplicationEngine {
577
552
  return source.localPrefix ? `${source.localPrefix}/${path}` : path;
578
553
  }
579
554
 
555
+ /** Cursor-based stream bootstrap: pages through _repl materialize to avoid huge single responses */
556
+ private async bootstrapFromStream(client: BodClient, opts?: { filter?: (ev: ReplEvent) => boolean; source?: ReplicationSource }): Promise<number> {
557
+ let cursor: string | undefined;
558
+ let applied = 0;
559
+ const filter = opts?.filter;
560
+ const source = opts?.source;
561
+ this.db.setReplaying(true);
562
+ try {
563
+ do {
564
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: BOOTSTRAP_BATCH_SIZE, cursor });
565
+ if (page.data) {
566
+ for (const [, event] of Object.entries(page.data)) {
567
+ const ev = event as ReplEvent;
568
+ if (!filter || filter(ev)) {
569
+ this.applyEvent(ev, source);
570
+ applied++;
571
+ }
572
+ }
573
+ }
574
+ cursor = page.nextCursor;
575
+ } while (cursor);
576
+ } finally {
577
+ this.db.setReplaying(false);
578
+ }
579
+ return applied;
580
+ }
581
+
580
582
  private applyEvent(ev: ReplEvent, source?: ReplicationSource): void {
581
583
  const path = source ? this.remapPath(ev.path, source) : ev.path;
582
584
  // Defense-in-depth: skip events for paths we shouldn't apply (primary/writeonly)
@@ -706,7 +706,11 @@ export class Transport {
706
706
  if (self.rules && !self.rules.check('read', msg.path, ws.data.auth)) {
707
707
  return error('Permission denied', Errors.PERMISSION_DENIED);
708
708
  }
709
- return reply(self.db.stream.materialize(msg.path, msg.keepKey ? { keepKey: msg.keepKey } : undefined));
709
+ const matOpts: { keepKey?: string; batchSize?: number; cursor?: string } = {};
710
+ if (msg.keepKey) matOpts.keepKey = msg.keepKey;
711
+ if (msg.batchSize) matOpts.batchSize = msg.batchSize;
712
+ if (msg.cursor) matOpts.cursor = msg.cursor;
713
+ return reply(self.db.stream.materialize(msg.path, Object.keys(matOpts).length ? matOpts : undefined));
710
714
  }
711
715
  case 'stream-compact': {
712
716
  if (self.rules && !self.rules.check('write', msg.path, ws.data.auth)) {
@@ -26,7 +26,7 @@ export type ClientMessage =
26
26
  | { id: string; op: 'vector-search'; query: number[]; path?: string; limit?: number; threshold?: number }
27
27
  | { id: string; op: 'vector-store'; path: string; embedding: number[] }
28
28
  | { id: string; op: 'stream-snapshot'; path: string }
29
- | { id: string; op: 'stream-materialize'; path: string; keepKey?: string }
29
+ | { id: string; op: 'stream-materialize'; path: string; keepKey?: string; batchSize?: number; cursor?: string }
30
30
  | { id: string; op: 'stream-compact'; path: string; maxAge?: number; maxCount?: number; keepKey?: string }
31
31
  | { id: string; op: 'stream-reset'; path: string }
32
32
  // VFS ops
@@ -0,0 +1,370 @@
1
+ import { describe, it, expect, afterEach } from 'bun:test';
2
+ import { BodDB } from '../src/server/BodDB.ts';
3
+ import { BodClient } from '../src/client/BodClient.ts';
4
+
5
+ const wait = (ms: number) => new Promise(r => setTimeout(r, ms));
6
+ let nextPort = 27400 + Math.floor(Math.random() * 1000);
7
+
8
+ /**
9
+ * Massive load tests — battle-test cursor-based bootstrap + threshold compact
10
+ * under realistic and extreme conditions.
11
+ */
12
+ describe('repl load test', () => {
13
+ const instances: BodDB[] = [];
14
+ const clients: BodClient[] = [];
15
+
16
+ afterEach(() => {
17
+ for (const c of clients) c.disconnect();
18
+ clients.length = 0;
19
+ for (const db of [...instances].reverse()) db.close();
20
+ instances.length = 0;
21
+ });
22
+
23
+ function port() { return nextPort++; }
24
+
25
+ function primary(opts?: { compact?: any; autoCompactThreshold?: number }) {
26
+ const p = port();
27
+ const db = new BodDB({
28
+ path: ':memory:',
29
+ sweepInterval: 0,
30
+ replication: { role: 'primary', compact: opts?.compact ?? {}, autoCompactThreshold: opts?.autoCompactThreshold ?? 0 },
31
+ });
32
+ db.replication!.start();
33
+ db.serve({ port: p });
34
+ instances.push(db);
35
+ return { db, port: p };
36
+ }
37
+
38
+ function replica(primaryPort: number, opts?: Partial<{ replicaId: string; fullBootstrap: boolean }>) {
39
+ const p = port();
40
+ const db = new BodDB({
41
+ path: ':memory:',
42
+ sweepInterval: 0,
43
+ replication: {
44
+ role: 'replica',
45
+ primaryUrl: `ws://localhost:${primaryPort}`,
46
+ replicaId: opts?.replicaId ?? `load-replica-${p}`,
47
+ fullBootstrap: opts?.fullBootstrap ?? true,
48
+ },
49
+ });
50
+ db.serve({ port: p });
51
+ instances.push(db);
52
+ return { db, port: p };
53
+ }
54
+
55
+ function connect(p: number, opts?: any) {
56
+ const c = new BodClient({ url: `ws://localhost:${p}`, ...opts });
57
+ clients.push(c);
58
+ return c;
59
+ }
60
+
61
+ // ─── 1. 20k entries: cursor pagination end-to-end ───
62
+
63
+ it('20k entries: cursor pagination collects every key', async () => {
64
+ const { db, port: p } = primary();
65
+ for (let i = 0; i < 20_000; i++) {
66
+ db.set(`items/i${i}`, { v: i, ts: Date.now() });
67
+ }
68
+
69
+ const client = connect(p);
70
+ await client.connect();
71
+
72
+ const keys = new Set<string>();
73
+ let cursor: string | undefined;
74
+ let pages = 0;
75
+ const t0 = Date.now();
76
+ do {
77
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 500, cursor });
78
+ for (const k of Object.keys(page.data)) keys.add(k);
79
+ cursor = page.nextCursor;
80
+ pages++;
81
+ } while (cursor);
82
+ const elapsed = Date.now() - t0;
83
+
84
+ console.log(` 20k entries: ${pages} pages, ${keys.size} unique keys, ${elapsed}ms`);
85
+ expect(keys.size).toBe(20_000);
86
+ expect(pages).toBeGreaterThanOrEqual(40); // 20k / 500
87
+ }, 30_000);
88
+
89
+ // ─── 2. Cursor vs monolithic: per-page response stays small ───
90
+
91
+ it('cursor pages stay under 1MB each while monolithic is huge', async () => {
92
+ const { db, port: p } = primary();
93
+ const payload = 'z'.repeat(500);
94
+ for (let i = 0; i < 5000; i++) {
95
+ db.set(`big/p${i}`, { data: payload, i });
96
+ }
97
+
98
+ const client = connect(p);
99
+ await client.connect();
100
+
101
+ // Monolithic
102
+ const mono = await client.streamMaterialize('_repl', { keepKey: 'path' });
103
+ const monoSize = JSON.stringify(mono).length;
104
+
105
+ // Cursor-based
106
+ let maxPageSize = 0;
107
+ let cursor: string | undefined;
108
+ do {
109
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 200, cursor });
110
+ const sz = JSON.stringify(page.data).length;
111
+ if (sz > maxPageSize) maxPageSize = sz;
112
+ cursor = page.nextCursor;
113
+ } while (cursor);
114
+
115
+ console.log(` monolithic: ${(monoSize / 1024 / 1024).toFixed(2)}MB, max page: ${(maxPageSize / 1024).toFixed(0)}KB`);
116
+ expect(monoSize).toBeGreaterThan(2 * 1024 * 1024); // >2MB total
117
+ expect(maxPageSize).toBeLessThan(1024 * 1024); // each page <1MB
118
+ }, 15_000);
119
+
120
+ // ─── 3. Auto-compact under sustained write load ───
121
+
122
+ it('auto-compact keeps _repl bounded under sustained 10k writes', () => {
123
+ const { db } = primary({ compact: { maxCount: 200, keepKey: 'path' }, autoCompactThreshold: 500 });
124
+
125
+ for (let i = 0; i < 10_000; i++) {
126
+ db.set(`stream/key${i % 300}`, { round: Math.floor(i / 300), i });
127
+ }
128
+
129
+ const repl = db.get('_repl') as Record<string, any>;
130
+ const count = repl ? Object.keys(repl).length : 0;
131
+ console.log(` 10k writes (300 unique paths), threshold=500, maxCount=200: ${count} _repl entries`);
132
+ // Without compact: 10k. With compact every 500 writes keeping 200: should be way under 1000.
133
+ expect(count).toBeLessThan(1000);
134
+ }, 15_000);
135
+
136
+ // ─── 4. Replica bootstrap with 10k entries via cursor ───
137
+
138
+ it('replica bootstraps 10k entries via cursor without timeout', async () => {
139
+ const { db: p, port: pp } = primary();
140
+ for (let i = 0; i < 10_000; i++) {
141
+ p.set(`data/node${i}`, { value: i, name: `node-${i}`, tags: ['a', 'b'] });
142
+ }
143
+
144
+ const { db: r } = replica(pp);
145
+ const t0 = Date.now();
146
+ await r.replication!.start();
147
+ const elapsed = Date.now() - t0;
148
+
149
+ console.log(` 10k entry replica bootstrap: ${elapsed}ms`);
150
+
151
+ await wait(500);
152
+ // Spot-check
153
+ for (const idx of [0, 999, 5000, 9999]) {
154
+ const val = r.get(`data/node${idx}`) as any;
155
+ expect(val?.value).toBe(idx);
156
+ }
157
+ }, 30_000);
158
+
159
+ // ─── 5. Multiple replicas bootstrap concurrently ───
160
+
161
+ it('3 replicas bootstrap concurrently from same primary (10k entries)', async () => {
162
+ const { db: p, port: pp } = primary();
163
+ for (let i = 0; i < 10_000; i++) {
164
+ p.set(`shared/item${i}`, { v: i });
165
+ }
166
+
167
+ const replicas = [replica(pp), replica(pp), replica(pp)];
168
+ const t0 = Date.now();
169
+ await Promise.all(replicas.map(r => r.db.replication!.start()));
170
+ const elapsed = Date.now() - t0;
171
+
172
+ console.log(` 3 concurrent replica bootstraps (10k): ${elapsed}ms`);
173
+
174
+ await wait(500);
175
+ for (const r of replicas) {
176
+ const v0 = r.db.get('shared/item0') as any;
177
+ const v9999 = r.db.get('shared/item9999') as any;
178
+ expect(v0?.v).toBe(0);
179
+ expect(v9999?.v).toBe(9999);
180
+ }
181
+ }, 45_000);
182
+
183
+ // ─── 6. Writes during bootstrap: replica catches up via stream sub ───
184
+
185
+ it('writes during bootstrap are caught via ongoing stream subscription', async () => {
186
+ const { db: p, port: pp } = primary();
187
+ // Pre-fill
188
+ for (let i = 0; i < 5000; i++) {
189
+ p.set(`pre/item${i}`, { v: i });
190
+ }
191
+
192
+ const { db: r } = replica(pp);
193
+ // Start replica (bootstrap starts)
194
+ const startPromise = r.replication!.start();
195
+
196
+ // Write more to primary while bootstrap is in progress
197
+ for (let i = 0; i < 500; i++) {
198
+ p.set(`live/item${i}`, { v: i + 100_000 });
199
+ }
200
+
201
+ await startPromise;
202
+ // Give stream sub time to deliver live writes
203
+ await wait(1000);
204
+
205
+ // Pre-fill data should be there
206
+ const pre0 = r.get('pre/item0') as any;
207
+ expect(pre0?.v).toBe(0);
208
+ const pre4999 = r.get('pre/item4999') as any;
209
+ expect(pre4999?.v).toBe(4999);
210
+
211
+ // Live writes should eventually arrive
212
+ const live499 = r.get('live/item499') as any;
213
+ expect(live499?.v).toBe(100_499);
214
+ }, 30_000);
215
+
216
+ // ─── 7. Heavy overwrite scenario: same 50 paths written 1000× each ───
217
+
218
+ it('50 paths × 1000 overwrites: compact deduplicates correctly', () => {
219
+ const { db } = primary({ compact: { maxCount: 100, keepKey: 'path' }, autoCompactThreshold: 1000 });
220
+
221
+ for (let round = 0; round < 1000; round++) {
222
+ for (let i = 0; i < 50; i++) {
223
+ db.set(`hot/key${i}`, { round, value: round * 50 + i });
224
+ }
225
+ }
226
+
227
+ const repl = db.get('_repl') as Record<string, any>;
228
+ const count = repl ? Object.keys(repl).length : 0;
229
+ console.log(` 50×1000 overwrites: ${count} _repl entries (expect ≤ ~1100)`);
230
+ // 50k total writes. Compact every 1k writes keeping 100. Should be bounded.
231
+ expect(count).toBeLessThan(1500);
232
+
233
+ // Verify latest values survived compaction
234
+ const materialized = db.stream.materialize('_repl', { keepKey: 'path' });
235
+ const paths = Object.keys(materialized);
236
+ expect(paths.length).toBe(50);
237
+ }, 30_000);
238
+
239
+ // ─── 8. Cursor pagination with snapshot (post-compact) ───
240
+
241
+ it('cursor pagination works correctly after compaction (snapshot + live events)', async () => {
242
+ const { db, port: p } = primary();
243
+ // Write 2000, compact to 200, write 800 more
244
+ for (let i = 0; i < 2000; i++) db.set(`a/item${i}`, { v: i });
245
+ db.stream.compact('_repl', { maxCount: 200, keepKey: 'path' });
246
+ for (let i = 2000; i < 2800; i++) db.set(`a/item${i}`, { v: i });
247
+
248
+ const client = connect(p);
249
+ await client.connect();
250
+
251
+ const keys = new Set<string>();
252
+ let cursor: string | undefined;
253
+ let pages = 0;
254
+ do {
255
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 100, cursor });
256
+ for (const k of Object.keys(page.data)) keys.add(k);
257
+ cursor = page.nextCursor;
258
+ pages++;
259
+ } while (cursor);
260
+
261
+ console.log(` 2000 + compact + 800 more: ${keys.size} keys in ${pages} pages`);
262
+ // All 2800 unique paths should be present (snapshot has older ones, events have newer)
263
+ expect(keys.size).toBe(2800);
264
+ }, 15_000);
265
+
266
+ // ─── 9. Replica with auto-compact primary: data integrity ───
267
+
268
+ it('replica gets correct data when primary auto-compacts during heavy writes', async () => {
269
+ const { db: p, port: pp } = primary({ compact: { maxCount: 300, keepKey: 'path' }, autoCompactThreshold: 500 });
270
+
271
+ // 5000 writes — auto-compact fires multiple times
272
+ for (let i = 0; i < 5000; i++) {
273
+ p.set(`verified/item${i}`, { value: i * 7, tag: 'check' });
274
+ }
275
+
276
+ const { db: r } = replica(pp);
277
+ await r.replication!.start();
278
+ await wait(500);
279
+
280
+ // Exhaustive integrity check on a sample
281
+ const sample = [0, 100, 999, 2500, 4000, 4999];
282
+ for (const idx of sample) {
283
+ const val = r.get(`verified/item${idx}`) as any;
284
+ expect(val?.value).toBe(idx * 7);
285
+ expect(val?.tag).toBe('check');
286
+ }
287
+ }, 30_000);
288
+
289
+ // ─── 10. Mixed deletes + sets under load ───
290
+
291
+ it('deletes replicate correctly through cursor-based bootstrap (no fullBootstrap)', async () => {
292
+ const { db: p, port: pp } = primary();
293
+
294
+ // Create 1000, then delete half
295
+ for (let i = 0; i < 1000; i++) {
296
+ p.set(`mix/item${i}`, { v: i });
297
+ }
298
+ for (let i = 0; i < 1000; i += 2) {
299
+ p.delete(`mix/item${i}`);
300
+ }
301
+
302
+ // Disable fullBootstrap so only _repl stream materialize is used
303
+ const { db: r } = replica(pp, { fullBootstrap: false });
304
+ await r.replication!.start();
305
+ await wait(500);
306
+
307
+ // Even indices: _repl has set then delete — materialize with keepKey=path keeps last op (delete)
308
+ // But materialize folds by keepKey, and delete events have op:'delete' — they apply as db.delete()
309
+ // Odd indices should exist from set events
310
+ for (const i of [1, 3, 99, 999]) {
311
+ const val = r.get(`mix/item${i}`) as any;
312
+ expect(val?.v).toBe(i);
313
+ }
314
+ // Verify primary has them deleted
315
+ for (const i of [0, 2, 100, 998]) {
316
+ expect(p.get(`mix/item${i}`)).toBeNull();
317
+ }
318
+ }, 15_000);
319
+
320
+ // ─── 11. Rapid batchSize=1 pagination (worst case) ───
321
+
322
+ it('batchSize=1 pagination still completes for 500 entries', async () => {
323
+ const { db, port: p } = primary();
324
+ for (let i = 0; i < 500; i++) db.set(`tiny/k${i}`, { i });
325
+
326
+ const client = connect(p);
327
+ await client.connect();
328
+
329
+ const keys = new Set<string>();
330
+ let cursor: string | undefined;
331
+ let pages = 0;
332
+ const t0 = Date.now();
333
+ do {
334
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 1, cursor });
335
+ for (const k of Object.keys(page.data)) keys.add(k);
336
+ cursor = page.nextCursor;
337
+ pages++;
338
+ } while (cursor);
339
+ const elapsed = Date.now() - t0;
340
+
341
+ console.log(` batchSize=1 over 500 entries: ${pages} pages, ${elapsed}ms`);
342
+ expect(keys.size).toBe(500);
343
+ expect(pages).toBeGreaterThanOrEqual(500);
344
+ }, 30_000);
345
+
346
+ // ─── 12. Throughput benchmark: writes/sec with auto-compact ───
347
+
348
+ it('write throughput with auto-compact enabled', () => {
349
+ const { db: dbCompact } = primary({ compact: { maxCount: 200, keepKey: 'path' }, autoCompactThreshold: 500 });
350
+ const { db: dbPlain } = primary();
351
+
352
+ const N = 10_000;
353
+
354
+ const t0 = Date.now();
355
+ for (let i = 0; i < N; i++) dbPlain.set(`bench/k${i}`, { i });
356
+ const plainMs = Date.now() - t0;
357
+
358
+ const t1 = Date.now();
359
+ for (let i = 0; i < N; i++) dbCompact.set(`bench/k${i}`, { i });
360
+ const compactMs = Date.now() - t1;
361
+
362
+ const plainWps = Math.round(N / (plainMs / 1000));
363
+ const compactWps = Math.round(N / (compactMs / 1000));
364
+ const overhead = ((compactMs - plainMs) / plainMs * 100).toFixed(1);
365
+
366
+ console.log(` ${N} writes — plain: ${plainMs}ms (${plainWps} w/s), compact: ${compactMs}ms (${compactWps} w/s), overhead: ${overhead}%`);
367
+ // Auto-compact overhead should be < 100% (compact is cheap relative to N writes)
368
+ expect(compactMs).toBeLessThan(plainMs * 3);
369
+ }, 30_000);
370
+ });
@@ -5,18 +5,6 @@ import { BodClient } from '../src/client/BodClient.ts';
5
5
  const wait = (ms: number) => new Promise(r => setTimeout(r, ms));
6
6
  let nextPort = 26400 + Math.floor(Math.random() * 1000);
7
7
 
8
- /**
9
- * Stress tests for _repl stream bloat.
10
- *
11
- * The real-world issue: _repl grows unbounded → streamMaterialize produces
12
- * a massive WS response → client requestTimeout (30s) fires or WS chokes.
13
- *
14
- * Locally we can't easily reproduce network latency, but we CAN:
15
- * 1. Push the entry count + payload size to stress serialization/parsing
16
- * 2. Use short requestTimeout on the replica client to simulate the real failure
17
- * 3. Measure materialization time scaling (O(n) proof)
18
- * 4. Verify compaction actually fixes it
19
- */
20
8
  describe('_repl stream bloat', () => {
21
9
  const instances: BodDB[] = [];
22
10
  const clients: BodClient[] = [];
@@ -31,12 +19,12 @@ describe('_repl stream bloat', () => {
31
19
  function getPort() { return nextPort++; }
32
20
 
33
21
  /** Primary with auto-compact DISABLED so _repl grows unbounded */
34
- function createPrimary(opts?: { maxMessageSize?: number }) {
22
+ function createPrimary(opts?: { maxMessageSize?: number; compact?: any; autoCompactThreshold?: number }) {
35
23
  const port = getPort();
36
24
  const db = new BodDB({
37
25
  path: ':memory:',
38
26
  sweepInterval: 0,
39
- replication: { role: 'primary', compact: {} },
27
+ replication: { role: 'primary', compact: opts?.compact ?? {}, autoCompactThreshold: opts?.autoCompactThreshold ?? 0 },
40
28
  });
41
29
  db.replication!.start();
42
30
  db.serve({ port, maxMessageSize: opts?.maxMessageSize });
@@ -117,6 +105,51 @@ describe('_repl stream bloat', () => {
117
105
  expect(ratio).toBeGreaterThan(5); // 5000/500 = 10x, allow some dedup
118
106
  });
119
107
 
108
+ // --- Cursor-based materialize ---
109
+
110
+ it('cursor-based materialize pages correctly over large _repl', async () => {
111
+ const { db, port } = createPrimary();
112
+ fillRepl(db, 1000, 200);
113
+
114
+ const client = new BodClient({ url: `ws://localhost:${port}` });
115
+ clients.push(client);
116
+ await client.connect();
117
+
118
+ // Page through with batchSize=200
119
+ const allKeys = new Set<string>();
120
+ let cursor: string | undefined;
121
+ let pages = 0;
122
+ do {
123
+ const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 200, cursor });
124
+ if (page.data) {
125
+ for (const key of Object.keys(page.data)) allKeys.add(key);
126
+ }
127
+ cursor = page.nextCursor;
128
+ pages++;
129
+ } while (cursor);
130
+
131
+ console.log(` 1000 entries paged in ${pages} pages, got ${allKeys.size} unique keys`);
132
+ expect(allKeys.size).toBe(1000);
133
+ expect(pages).toBeGreaterThanOrEqual(5); // 1000/200 = 5 pages minimum
134
+ });
135
+
136
+ // --- Auto-compact on write threshold ---
137
+
138
+ it('auto-compact triggers after N writes', () => {
139
+ const { db } = createPrimary({ compact: { maxCount: 50, keepKey: 'path' }, autoCompactThreshold: 100 });
140
+
141
+ // Write 250 entries — compact triggers at 100, 200; maxCount=50 keeps only 50 each time
142
+ for (let i = 0; i < 250; i++) {
143
+ db.set(`data/item${i}`, { value: i });
144
+ }
145
+
146
+ const repl = db.get('_repl') as Record<string, any>;
147
+ const count = repl ? Object.keys(repl).length : 0;
148
+ console.log(` 250 writes with threshold=100, maxCount=50: ${count} _repl entries`);
149
+ // After compact at 200 (keeps 50), then 50 more → ~100. Way less than 250.
150
+ expect(count).toBeLessThan(150);
151
+ });
152
+
120
153
  // --- Timeout reproduction: short requestTimeout simulates real-world failure ---
121
154
 
122
155
  it('short requestTimeout causes streamMaterialize to fail on bloated _repl', async () => {
@@ -126,8 +159,6 @@ describe('_repl stream bloat', () => {
126
159
  const replCount = Object.keys(primary.get('_repl') as Record<string, any>).length;
127
160
  expect(replCount).toBe(10000);
128
161
 
129
- // Direct client with 1ms timeout — guaranteed to fail, proving the
130
- // timeout path exists and that materialize has no retry/fallback
131
162
  const client = new BodClient({
132
163
  url: `ws://localhost:${primaryPort}`,
133
164
  requestTimeout: 1,
@@ -161,16 +192,14 @@ describe('_repl stream bloat', () => {
161
192
  const responseSize = JSON.stringify(result).length;
162
193
 
163
194
  console.log(` 10k entries × 1KB: ${(responseSize / 1024 / 1024).toFixed(1)}MB response in ${elapsed}ms`);
164
- // On real networks with latency, this 10MB+ response would easily exceed 30s timeout
165
195
  expect(responseSize).toBeGreaterThan(5 * 1024 * 1024); // >5MB
166
196
  });
167
197
 
168
- // --- Payload size bomb: fewer entries but huge payloads ---
198
+ // --- Payload size bomb ---
169
199
 
170
200
  it('large payloads per entry amplify the problem', async () => {
171
201
  const { db, port } = createPrimary();
172
202
 
173
- // 1000 entries but 2KB each → ~2MB+ materialize response
174
203
  const bigPadding = 'y'.repeat(2000);
175
204
  for (let i = 0; i < 1000; i++) {
176
205
  db.set(`data/big${i}`, {
@@ -190,7 +219,7 @@ describe('_repl stream bloat', () => {
190
219
  const responseSize = JSON.stringify(result).length;
191
220
 
192
221
  console.log(` 1000 entries × 2KB = ${(responseSize / 1024 / 1024).toFixed(2)}MB, ${elapsed}ms`);
193
- expect(responseSize).toBeGreaterThan(2 * 1024 * 1024); // >2MB
222
+ expect(responseSize).toBeGreaterThan(2 * 1024 * 1024);
194
223
  });
195
224
 
196
225
  // --- Compaction fixes it ---
@@ -202,14 +231,12 @@ describe('_repl stream bloat', () => {
202
231
  const beforeCount = Object.keys(primary.get('_repl') as Record<string, any>).length;
203
232
  expect(beforeCount).toBe(5000);
204
233
 
205
- // Compact down to 500
206
234
  primary.stream.compact('_repl', { maxCount: 500, keepKey: 'path' });
207
235
  const afterRepl = primary.get('_repl') as Record<string, any>;
208
236
  const afterCount = afterRepl ? Object.keys(afterRepl).length : 0;
209
237
  console.log(` Compacted: ${beforeCount} → ${afterCount} entries`);
210
238
  expect(afterCount).toBeLessThanOrEqual(500);
211
239
 
212
- // Bootstrap should now work fast with compacted stream
213
240
  const { db: replica } = createReplica(primaryPort);
214
241
  const start = Date.now();
215
242
  await replica.replication!.start();
@@ -219,17 +246,15 @@ describe('_repl stream bloat', () => {
219
246
  expect(elapsed).toBeLessThan(3000);
220
247
 
221
248
  await wait(300);
222
- // Verify latest writes are present (compaction keeps newest by keepKey)
223
249
  const val = replica.get('vfs/files/project/src/deep/nested/path/module4999/component.tsx') as any;
224
250
  expect(val?.size).toBe(1024 + 4999);
225
251
  });
226
252
 
227
- // --- Repeated writes to same paths: worst case for non-compacted stream ---
253
+ // --- Repeated writes ---
228
254
 
229
255
  it('repeated writes to same paths bloat _repl with duplicates', () => {
230
256
  const { db } = createPrimary();
231
257
 
232
- // 100 paths × 50 writes each = 5000 _repl entries, but only 100 unique paths
233
258
  for (let round = 0; round < 50; round++) {
234
259
  for (let i = 0; i < 100; i++) {
235
260
  db.set(`config/setting${i}`, { value: round, updated: Date.now() });
@@ -241,30 +266,30 @@ describe('_repl stream bloat', () => {
241
266
  console.log(` 100 paths × 50 writes = ${totalEntries} _repl entries`);
242
267
  expect(totalEntries).toBe(5000);
243
268
 
244
- // Compact with keepKey deduplicates to 100
245
269
  db.stream.compact('_repl', { keepKey: 'path' });
246
270
  const after = db.get('_repl') as Record<string, any>;
247
271
  const afterCount = after ? Object.keys(after).length : 0;
248
- // snapshot (1) + remaining entries
249
272
  console.log(` After compact: ${afterCount} entries (expect ~100 unique paths)`);
250
273
  expect(afterCount).toBeLessThanOrEqual(150);
251
274
  });
252
275
 
253
- // --- No bootstrap protection: replica.start() has no timeout ---
276
+ // --- Cursor-based bootstrap works for replica ---
254
277
 
255
- it('replica.start() has no built-in timeout (current gap)', async () => {
278
+ it('replica bootstrap uses cursor-based pagination (no timeout on large _repl)', async () => {
256
279
  const { db: primary, port: primaryPort } = createPrimary();
257
280
  fillRepl(primary, 3000, 300);
258
281
 
259
282
  const { db: replica } = createReplica(primaryPort);
260
283
 
261
- // Measure: start() blocks until materialize completes — no internal timeout
262
284
  const start = Date.now();
263
285
  await replica.replication!.start();
264
286
  const elapsed = Date.now() - start;
265
287
 
266
- console.log(` replica.start() blocked for ${elapsed}ms (no internal timeout)`);
267
- // This documents the gap: there's no way to bail out of a slow bootstrap
268
- // P0 fix should add a configurable timeout here
288
+ console.log(` Cursor-based replica bootstrap: ${elapsed}ms for 3000 entries`);
289
+
290
+ await wait(300);
291
+ // Verify data arrived
292
+ const val = replica.get('vfs/files/project/src/deep/nested/path/module2999/component.tsx') as any;
293
+ expect(val?.size).toBe(1024 + 2999);
269
294
  });
270
295
  });