@bod.ee/db 0.12.2 → 0.12.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/developing-bod-db.md +1 -1
- package/.claude/skills/using-bod-db.md +12 -0
- package/CLAUDE.md +1 -1
- package/admin/ui.html +7 -6
- package/package.json +1 -1
- package/src/client/BodClient.ts +4 -2
- package/src/server/BodDB.ts +5 -1
- package/src/server/ReplicationEngine.ts +55 -53
- package/src/server/Transport.ts +5 -1
- package/src/shared/protocol.ts +1 -1
- package/tests/repl-load.test.ts +370 -0
- package/tests/repl-stream-bloat.test.ts +58 -33
|
@@ -86,7 +86,7 @@ Push paths are append-only logs. `StreamEngine` adds consumer group offsets (`_s
|
|
|
86
86
|
`MQEngine` owns all MQ SQL via `storage.db.prepare()` — same pattern as StreamEngine. Columns: `mq_status` (pending/inflight), `mq_inflight_until` (Unix ms), `mq_delivery_count`. `fetch()` uses SQLite transaction with TOCTOU guard (`changes > 0`). Ack = DELETE. Sweep reclaims expired inflight; exhausted messages move to DLQ at `<queue>/_dlq/<key>`. Per-queue options via longest prefix match on `queues` config.
|
|
87
87
|
|
|
88
88
|
### Replication
|
|
89
|
-
`ReplicationEngine` — primary/replica + multi-source feed subscriptions via `_repl` stream. Primary: `onWrite` hooks emit events to `_repl` stream (updates flattened to per-path sets). Replica: bootstraps via `streamMaterialize
|
|
89
|
+
`ReplicationEngine` — primary/replica + multi-source feed subscriptions via `_repl` stream. Primary: `onWrite` hooks emit events to `_repl` stream (updates flattened to per-path sets). Auto-compact on write threshold (`autoCompactThreshold`, default 500) + on startup keeps `_repl` bounded. Replica: bootstraps via cursor-based `streamMaterialize` pagination (`batchSize: 200`), subscribes for ongoing events, proxies writes to primary. `bootstrapFromStream()` helper handles all 3 bootstrap sites (replica, router-based, sources). Guards: `_replaying` prevents re-emission, `_emitting` prevents recursion from `db.push('_repl')`. Sweep deletes are replicated. Transport checks `isReplica` and forwards write ops.
|
|
90
90
|
|
|
91
91
|
**Sources** (`ReplicationSource[]`): independent of role. Each source creates a `BodClient`, bootstraps filtered `_repl` snapshot, subscribes for ongoing events. `matchesSourcePaths()` filters by path prefix. `remapPath()` prepends `localPrefix`. Events applied with `_replaying=true`. Sources connect via `Promise.allSettled` — individual failures logged, others continue. Deterministic `groupId` default: `source_${url}_${paths.join('+')}`.
|
|
92
92
|
|
|
@@ -301,6 +301,11 @@ ws.send(JSON.stringify({ id: '20', op: 'batch-sub', subscriptions: [
|
|
|
301
301
|
// Stream extended ops
|
|
302
302
|
ws.send(JSON.stringify({ id: '21', op: 'stream-snapshot', path: 'events/orders' }));
|
|
303
303
|
ws.send(JSON.stringify({ id: '21', op: 'stream-materialize', path: 'events/orders', keepKey: 'orderId' }));
|
|
304
|
+
// Cursor-based pagination (for large streams):
|
|
305
|
+
ws.send(JSON.stringify({ id: '21b', op: 'stream-materialize', path: 'events/orders', keepKey: 'orderId', batchSize: 200 }));
|
|
306
|
+
// → { id: '21b', ok: true, data: { data: {...}, nextCursor: 'abc123' } }
|
|
307
|
+
// Follow-up page:
|
|
308
|
+
ws.send(JSON.stringify({ id: '21c', op: 'stream-materialize', path: 'events/orders', keepKey: 'orderId', batchSize: 200, cursor: 'abc123' }));
|
|
304
309
|
ws.send(JSON.stringify({ id: '22', op: 'stream-compact', path: 'events/orders', maxAge: 86400 }));
|
|
305
310
|
ws.send(JSON.stringify({ id: '23', op: 'stream-reset', path: 'events/orders' }));
|
|
306
311
|
```
|
|
@@ -450,6 +455,13 @@ const similar = await client.vectorSearch({ query: [0.1, 0.2, 0.3], path: 'docs'
|
|
|
450
455
|
// Stream snapshot, materialize, compact, reset
|
|
451
456
|
const snap = await client.streamSnapshot('events/orders');
|
|
452
457
|
const view = await client.streamMaterialize('events/orders', { keepKey: 'orderId' });
|
|
458
|
+
// Cursor-based materialize for large streams (avoids huge single response):
|
|
459
|
+
let cursor: string | undefined;
|
|
460
|
+
do {
|
|
461
|
+
const page = await client.streamMaterialize('events/orders', { keepKey: 'orderId', batchSize: 200, cursor });
|
|
462
|
+
// page.data contains this batch, page.nextCursor is undefined when done
|
|
463
|
+
cursor = page.nextCursor;
|
|
464
|
+
} while (cursor);
|
|
453
465
|
await client.streamCompact('events/orders', { maxAge: 86400 });
|
|
454
466
|
await client.streamReset('events/orders');
|
|
455
467
|
```
|
package/CLAUDE.md
CHANGED
|
@@ -71,7 +71,7 @@ config.ts — demo instance config (open rules, indexes, fts, v
|
|
|
71
71
|
- **BodClientCached**: two-tier cache wrapper around BodClient. Memory (Map, LRU eviction) + IndexedDB persistence. Stale-while-revalidate: subscribed paths always fresh, unsubscribed return stale + background refetch. Writes (`set/update/delete`) invalidate path + ancestors. `init()` opens IDB + sweeps expired. `warmup(paths[])` bulk-loads from IDB. Passthrough for `push/batch/query/search/mq/stream/vfs` via `cachedClient.client`.
|
|
72
72
|
- **MCP**: `MCPAdapter` wraps a `BodClient` as a JSON-RPC MCP server (stdio + HTTP). Connects to a running BodDB instance over WebSocket — no embedded DB. Entry point: `mcp.ts`. Tools: CRUD (6), FTS (2), vectors (2), streams (4), MQ (7) = 21 tools. Use `--stdio` for Claude Code/Desktop, `--http` for remote agents.
|
|
73
73
|
- **VFS (Virtual File System)**: `VFSEngine` — files stored outside SQLite via pluggable `VFSBackend` interface. `LocalBackend` stores at `<storageRoot>/<fileId>` using `Bun.file`/`Bun.write`. Metadata at `_vfs/<virtualPath>/` (size, mime, mtime, fileId, isDir) — gets subs/rules/replication for free. `fileId = pushId` so move/rename is metadata-only. REST: `POST/GET/DELETE /files/<path>`, `?stat=1`, `?list=1`, `?mkdir=1`, `PUT ?move=<dst>`. WS chunked fallback: base64-encoded `vfs-upload-init/chunk/done`, `vfs-download-init` → `vfs-download-chunk` push messages. Client: `VFSClient` via `client.vfs()` — `upload/download` (REST) + `uploadWS/downloadWS` (WS) + `stat/list/mkdir/delete/move`.
|
|
74
|
-
- **Replication**: `ReplicationEngine` — single primary + N read replicas + multi-source feed subscriptions. Star topology. Primary emits write events to `_repl` stream via `onWrite` hooks. Replicas bootstrap via `streamMaterialize('_repl', { keepKey: 'path' })
|
|
74
|
+
- **Replication**: `ReplicationEngine` — single primary + N read replicas + multi-source feed subscriptions. Star topology. Primary emits write events to `_repl` stream via `onWrite` hooks. Replicas bootstrap via cursor-based `streamMaterialize('_repl', { keepKey: 'path', batchSize: 200 })` pagination (avoids huge single WS frame), then subscribe for ongoing events. Auto-compact on write threshold (`autoCompactThreshold`, default 500) + on startup keeps `_repl` bounded. Write proxy: replica forwards writes to primary via BodClient, primary applies + emits, replica consumes. `_replaying` flag prevents re-emission loops. `_emitting` guard prevents recursion from `db.push('_repl')`. Updates flattened to per-path set events for correct compaction keying. Sweep delete events replicated. Excluded prefixes: `_repl`, `_streams`, `_mq`, `_auth`. **Sources**: `ReplicationSource[]` — subscribe to specific paths from multiple remote DBs. Each source is an independent BodClient that filters `_repl` events by path prefix, with optional `localPrefix` remapping (e.g. remote `users/u1` → local `db-a/users/u1`). Sources connect in parallel; individual failures don't block others. Sources are independent of role — a DB can be primary AND consume sources. **Per-path topology**: `PathTopologyRouter` — when `paths` config is set, each path prefix gets an independent mode: `primary` (local authoritative, emits), `replica` (remote authoritative, proxies writes), `sync` (bidirectional, both emit+apply), `readonly` (pull-only, rejects writes), `writeonly` (push-only, ignores remote). Longest-prefix match resolves mode. `writeProxy: 'proxy'|'reject'` overrides replica write behavior. Bootstrap skips sync paths (ongoing stream only). Auth/rules checked before proxy in all handlers. `shouldProxyPath(path)`/`shouldRejectPath(path)` replace `isReplica` checks. `emitsToRepl`/`pullsFromPrimary` getters for compact/bootstrap decisions. Stable `replicaId` from config hash. Falls back to `role` when `paths` absent (backward compat).
|
|
75
75
|
- **KeyAuth integration guide**: `docs/keyauth-integration.md` — flows for signup, signin, new device, autoAuth, IAM roles, common mistakes.
|
|
76
76
|
- **Para-chat integration guide**: `docs/para-chat-integration.md` — how para-chat uses BodDB: per-path topology, VFS, KeyAuth, caching, file sync.
|
|
77
77
|
- **KeyAuth**: `KeyAuthEngine` — portable Ed25519 identity & IAM. Identity hierarchy: Root (server-level, key on filesystem), Account (portable, password-encrypted private key in DB or device-generated), Device (delegate, linked via password unlock). Challenge-response auth: server sends nonce → client signs with Ed25519 → server verifies + creates session. Self-signed tokens (no JWT lib): `base64url(payload).base64url(Ed25519_sign)`. Data model at `_auth/` prefix (protected from external writes). Device reverse-index at `_auth/deviceIndex/{dfp}` for O(1) lookup. Password change is atomic (single `db.update()`). IAM: roles with path-based permissions, account role assignment. `_auth/` excluded from replication. Transport guards: `auth-link-device` and `auth-change-password` require authenticated session; non-root users can only change own password. **Device registration**: `registerDevice(publicKey)` — client-generated keypair, no password, idempotent; `allowOpenRegistration: false` requires authenticated session. **Browser crypto**: `keyAuth.browser.ts` uses `@noble/ed25519` with DER↔raw key bridge for server compatibility. **BodClient autoAuth**: `autoAuth: true` auto-generates keypair (localStorage), registers, authenticates — zero-config device identity. `client.auth.*` convenience methods for all auth ops. **IAM transport ops**: `auth-create-role`, `auth-delete-role`, `auth-update-roles` (root only), `auth-list-accounts`, `auth-list-roles`. Device accounts (no encrypted key) safely reject `linkDevice`/`changePassword`.
|
package/admin/ui.html
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
body { font-family: monospace; font-size: 13px; background: #0d0d0d; color: #d4d4d4; display: flex; flex-direction: column; height: 100vh; overflow: hidden; }
|
|
9
9
|
|
|
10
10
|
/* Metrics bar */
|
|
11
|
-
#metrics-bar { display: flex; background: #0a0a0a; border-bottom: 1px solid #2a2a2a; flex-shrink: 0;
|
|
11
|
+
#metrics-bar { display: flex; background: #0a0a0a; border-bottom: 1px solid #2a2a2a; flex-shrink: 0; align-items: stretch; width: 100%; }
|
|
12
12
|
.metric-card { display: flex; flex-direction: column; padding: 5px 10px 4px; border-right: 1px solid #181818; min-width: 140px; flex-shrink: 0; gap: 1px; overflow: hidden; }
|
|
13
13
|
.metric-card:last-child { border-right: none; width: auto; }
|
|
14
14
|
.metric-right { margin-left: auto; }
|
|
@@ -127,15 +127,15 @@
|
|
|
127
127
|
<div class="metric-top"><span class="metric-label">Ping</span><span class="metric-value" id="s-ping">—</span></div>
|
|
128
128
|
<canvas class="metric-canvas" id="g-ping" width="100" height="28"></canvas>
|
|
129
129
|
</div>
|
|
130
|
-
<div class="metric-card
|
|
130
|
+
<div class="metric-card" id="repl-card" style="border-left:1px solid #282828;display:none;width:180px">
|
|
131
131
|
<div class="metric-top"><span class="metric-label">Replication</span><span class="metric-value dim" id="s-repl-role">—</span></div>
|
|
132
132
|
<div style="margin-top:4px;font-size:10px" id="s-repl-sources"></div>
|
|
133
133
|
</div>
|
|
134
|
-
<div class="metric-card" style="border-left:1px solid #282828">
|
|
134
|
+
<div class="metric-card metric-right" style="border-left:1px solid #282828;justify-content:space-between">
|
|
135
135
|
<div class="metric-top"><span class="metric-label">Uptime</span><span class="metric-value dim" id="s-uptime">—</span></div>
|
|
136
|
-
<div style="
|
|
137
|
-
<div
|
|
138
|
-
<div
|
|
136
|
+
<div style="font-size:10px;color:#555;display:flex;justify-content:space-between"><span id="s-ts">—</span><span>v<span id="s-version">—</span></span></div>
|
|
137
|
+
<div><span class="metric-label">WS<span id="ws-dot"></span></span> <span style="font-size:10px;color:#555"><span id="s-clients">0</span> clients · <span id="s-subs">0</span> subs</span></div>
|
|
138
|
+
<div><button id="stats-toggle" class="sm" onclick="toggleStats()" title="Toggle server stats collection">Stats: ON</button></div>
|
|
139
139
|
</div>
|
|
140
140
|
</div>
|
|
141
141
|
|
|
@@ -1257,6 +1257,7 @@ db.on('_admin/stats', (snap) => {
|
|
|
1257
1257
|
document.getElementById('s-subs').textContent = s.subs ?? 0;
|
|
1258
1258
|
document.getElementById('s-uptime').textContent = fmtUptime(s.process.uptimeSec);
|
|
1259
1259
|
document.getElementById('s-ts').textContent = new Date(s.ts).toLocaleTimeString();
|
|
1260
|
+
if (s.version) document.getElementById('s-version').textContent = s.version;
|
|
1260
1261
|
|
|
1261
1262
|
// Replication stats
|
|
1262
1263
|
if (s.repl) {
|
package/package.json
CHANGED
package/src/client/BodClient.ts
CHANGED
|
@@ -456,8 +456,10 @@ export class BodClient {
|
|
|
456
456
|
return this.send('stream-snapshot', { path });
|
|
457
457
|
}
|
|
458
458
|
|
|
459
|
-
async streamMaterialize(path: string, opts?: { keepKey?: string }): Promise<Record<string, unknown
|
|
460
|
-
|
|
459
|
+
async streamMaterialize(path: string, opts?: { keepKey?: string }): Promise<Record<string, unknown>>;
|
|
460
|
+
async streamMaterialize(path: string, opts: { keepKey?: string; batchSize: number; cursor?: string }): Promise<{ data: Record<string, unknown>; nextCursor?: string }>;
|
|
461
|
+
async streamMaterialize(path: string, opts?: { keepKey?: string; batchSize?: number; cursor?: string }): Promise<Record<string, unknown> | { data: Record<string, unknown>; nextCursor?: string }> {
|
|
462
|
+
return this.send('stream-materialize', { path, ...opts }) as any;
|
|
461
463
|
}
|
|
462
464
|
|
|
463
465
|
async streamCompact(path: string, opts?: { maxAge?: number; maxCount?: number; keepKey?: string }): Promise<unknown> {
|
package/src/server/BodDB.ts
CHANGED
|
@@ -12,6 +12,8 @@ import { VFSEngine, type VFSEngineOptions } from './VFSEngine.ts';
|
|
|
12
12
|
import { KeyAuthEngine, type KeyAuthEngineOptions } from './KeyAuthEngine.ts';
|
|
13
13
|
import { validatePath } from '../shared/pathUtils.ts';
|
|
14
14
|
import { Logger, type LogConfig } from '../shared/logger.ts';
|
|
15
|
+
import pkg from '../../package.json' with { type: 'json' };
|
|
16
|
+
const PKG_VERSION: string = pkg.version ?? 'unknown';
|
|
15
17
|
|
|
16
18
|
export interface TransactionProxy {
|
|
17
19
|
get(path: string): unknown;
|
|
@@ -79,7 +81,8 @@ export class BodDB {
|
|
|
79
81
|
this.options = { ...new BodDBOptions(), ...options };
|
|
80
82
|
this.log = new Logger(this.options.log);
|
|
81
83
|
const _log = this.log.forComponent('db');
|
|
82
|
-
|
|
84
|
+
console.log(`[BodDB] v${PKG_VERSION} (path: ${this.options.path})`);
|
|
85
|
+
_log.info(`Initializing BodDB v${PKG_VERSION} (path: ${this.options.path})`);
|
|
83
86
|
this.storage = new StorageEngine({ path: this.options.path });
|
|
84
87
|
this.subs = new SubscriptionEngine();
|
|
85
88
|
this.stream = new StreamEngine(this.storage, this.subs, { compact: this.options.compact });
|
|
@@ -409,6 +412,7 @@ export class BodDB {
|
|
|
409
412
|
|
|
410
413
|
// Reuse a single stats object to minimize allocations
|
|
411
414
|
const statsData: Record<string, unknown> = {
|
|
415
|
+
version: PKG_VERSION,
|
|
412
416
|
process: {}, db: {}, system: {},
|
|
413
417
|
subs: 0, clients: 0, repl: null, ts: 0,
|
|
414
418
|
};
|
|
@@ -128,11 +128,15 @@ export class ReplicationOptions {
|
|
|
128
128
|
/** Bootstrap replica from primary's full state before applying _repl stream */
|
|
129
129
|
fullBootstrap: boolean = true;
|
|
130
130
|
compact?: CompactOptions;
|
|
131
|
+
/** Auto-compact _repl after this many emitted writes (0 = disabled, default 500) */
|
|
132
|
+
autoCompactThreshold: number = 500;
|
|
131
133
|
sources?: ReplicationSource[];
|
|
132
134
|
/** Per-path topology: strings default to 'sync', objects specify mode. When absent, role governs all paths. */
|
|
133
135
|
paths?: Array<string | PathTopology>;
|
|
134
136
|
}
|
|
135
137
|
|
|
138
|
+
const BOOTSTRAP_BATCH_SIZE = 200;
|
|
139
|
+
|
|
136
140
|
type ProxyableMessage = Extract<ClientMessage, { op: 'set' | 'delete' | 'update' | 'push' | 'batch' }>;
|
|
137
141
|
|
|
138
142
|
export class ReplicationEngine {
|
|
@@ -145,6 +149,7 @@ export class ReplicationEngine {
|
|
|
145
149
|
private _started = false;
|
|
146
150
|
private _seq = 0;
|
|
147
151
|
private _emitting = false;
|
|
152
|
+
private _emitCount = 0;
|
|
148
153
|
private _pendingReplEvents: WriteEvent[] | null = null;
|
|
149
154
|
private log: ComponentLogger;
|
|
150
155
|
|
|
@@ -251,7 +256,7 @@ export class ReplicationEngine {
|
|
|
251
256
|
/** Stop replication */
|
|
252
257
|
stop(): void {
|
|
253
258
|
this._started = false;
|
|
254
|
-
|
|
259
|
+
this._emitCount = 0;
|
|
255
260
|
this.unsubWrite?.();
|
|
256
261
|
this.unsubWrite = null;
|
|
257
262
|
this.unsubStream?.();
|
|
@@ -289,22 +294,15 @@ export class ReplicationEngine {
|
|
|
289
294
|
|
|
290
295
|
// --- Primary mode ---
|
|
291
296
|
|
|
292
|
-
private _compactTimer: ReturnType<typeof setInterval> | null = null;
|
|
293
|
-
|
|
294
297
|
private startPrimary(): void {
|
|
295
298
|
this.unsubWrite = this.db.onWrite((ev: WriteEvent) => {
|
|
296
299
|
this.emit(ev);
|
|
297
300
|
});
|
|
298
301
|
|
|
299
|
-
//
|
|
302
|
+
// Compact on startup
|
|
300
303
|
const compact = this.options.compact ?? { maxCount: 500, keepKey: 'path' };
|
|
301
304
|
if (compact.maxCount || compact.maxAge) {
|
|
302
|
-
// Compact on startup
|
|
303
305
|
try { this.db.stream.compact('_repl', compact); } catch {}
|
|
304
|
-
// Then periodically (every 5 minutes)
|
|
305
|
-
this._compactTimer = setInterval(() => {
|
|
306
|
-
try { this.db.stream.compact('_repl', compact); } catch {}
|
|
307
|
-
}, 5 * 60_000);
|
|
308
306
|
}
|
|
309
307
|
}
|
|
310
308
|
|
|
@@ -351,9 +349,19 @@ export class ReplicationEngine {
|
|
|
351
349
|
const seq = this._seq++;
|
|
352
350
|
const idempotencyKey = `${replEvent.ts}:${seq}:${ev.path}`;
|
|
353
351
|
this.db.push('_repl', replEvent, { idempotencyKey });
|
|
352
|
+
|
|
354
353
|
} finally {
|
|
355
354
|
this._emitting = false;
|
|
356
355
|
}
|
|
356
|
+
|
|
357
|
+
// Auto-compact on write threshold (outside _emitting guard so notifications flow normally)
|
|
358
|
+
this._emitCount++;
|
|
359
|
+
const threshold = this.options.autoCompactThreshold;
|
|
360
|
+
if (threshold > 0 && this._emitCount >= threshold) {
|
|
361
|
+
this._emitCount = 0;
|
|
362
|
+
const compact = this.options.compact ?? { maxCount: 500, keepKey: 'path' };
|
|
363
|
+
try { this.db.stream.compact('_repl', compact); } catch {}
|
|
364
|
+
}
|
|
357
365
|
}
|
|
358
366
|
|
|
359
367
|
/** Start buffering replication events (call before transaction) */
|
|
@@ -394,21 +402,9 @@ export class ReplicationEngine {
|
|
|
394
402
|
await this.bootstrapFullState(bootstrapPaths);
|
|
395
403
|
}
|
|
396
404
|
|
|
397
|
-
// Stream bootstrap filtered
|
|
398
|
-
const
|
|
399
|
-
|
|
400
|
-
this.db.setReplaying(true);
|
|
401
|
-
try {
|
|
402
|
-
for (const [, event] of Object.entries(snapshot)) {
|
|
403
|
-
const ev = event as ReplEvent;
|
|
404
|
-
if (this.matchesPathPrefixes(ev.path, pathPrefixes)) {
|
|
405
|
-
this.applyEvent(ev);
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
} finally {
|
|
409
|
-
this.db.setReplaying(false);
|
|
410
|
-
}
|
|
411
|
-
}
|
|
405
|
+
// Stream bootstrap filtered (cursor-based to avoid huge single response)
|
|
406
|
+
const applied = await this.bootstrapFromStream(this.client!, { filter: ev => this.matchesPathPrefixes(ev.path, pathPrefixes) });
|
|
407
|
+
this.log.info(`Stream bootstrap (paths): ${applied} events applied`);
|
|
412
408
|
|
|
413
409
|
// Subscribe to ongoing events, filter by paths
|
|
414
410
|
const groupId = this.options.replicaId!;
|
|
@@ -449,20 +445,9 @@ export class ReplicationEngine {
|
|
|
449
445
|
await this.bootstrapFullState();
|
|
450
446
|
}
|
|
451
447
|
|
|
452
|
-
// Stream bootstrap:
|
|
453
|
-
const
|
|
454
|
-
this.log.info(`Stream bootstrap: ${
|
|
455
|
-
if (snapshot) {
|
|
456
|
-
this.db.setReplaying(true);
|
|
457
|
-
try {
|
|
458
|
-
for (const [, event] of Object.entries(snapshot)) {
|
|
459
|
-
const ev = event as ReplEvent;
|
|
460
|
-
this.applyEvent(ev);
|
|
461
|
-
}
|
|
462
|
-
} finally {
|
|
463
|
-
this.db.setReplaying(false);
|
|
464
|
-
}
|
|
465
|
-
}
|
|
448
|
+
// Stream bootstrap: cursor-based to avoid huge single response
|
|
449
|
+
const applied = await this.bootstrapFromStream(this.client!);
|
|
450
|
+
this.log.info(`Stream bootstrap: ${applied} events applied`);
|
|
466
451
|
|
|
467
452
|
// Subscribe to ongoing events
|
|
468
453
|
const groupId = this.options.replicaId!;
|
|
@@ -526,21 +511,11 @@ export class ReplicationEngine {
|
|
|
526
511
|
const client = new BodClient({ url: source.url, auth: source.auth });
|
|
527
512
|
await client.connect();
|
|
528
513
|
|
|
529
|
-
// Bootstrap: materialize _repl, filter by source paths
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
for (const [, event] of Object.entries(snapshot)) {
|
|
535
|
-
const ev = event as ReplEvent;
|
|
536
|
-
if (this.matchesSourcePaths(ev.path, source)) {
|
|
537
|
-
this.applyEvent(ev, source);
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
} finally {
|
|
541
|
-
this.db.setReplaying(false);
|
|
542
|
-
}
|
|
543
|
-
}
|
|
514
|
+
// Bootstrap: cursor-based materialize _repl, filter by source paths
|
|
515
|
+
await this.bootstrapFromStream(client, {
|
|
516
|
+
filter: ev => this.matchesSourcePaths(ev.path, source),
|
|
517
|
+
source,
|
|
518
|
+
});
|
|
544
519
|
|
|
545
520
|
// Subscribe to ongoing events
|
|
546
521
|
const groupId = source.id || `source_${source.url}_${source.paths.sort().join('+')}`;
|
|
@@ -577,6 +552,33 @@ export class ReplicationEngine {
|
|
|
577
552
|
return source.localPrefix ? `${source.localPrefix}/${path}` : path;
|
|
578
553
|
}
|
|
579
554
|
|
|
555
|
+
/** Cursor-based stream bootstrap: pages through _repl materialize to avoid huge single responses */
|
|
556
|
+
private async bootstrapFromStream(client: BodClient, opts?: { filter?: (ev: ReplEvent) => boolean; source?: ReplicationSource }): Promise<number> {
|
|
557
|
+
let cursor: string | undefined;
|
|
558
|
+
let applied = 0;
|
|
559
|
+
const filter = opts?.filter;
|
|
560
|
+
const source = opts?.source;
|
|
561
|
+
this.db.setReplaying(true);
|
|
562
|
+
try {
|
|
563
|
+
do {
|
|
564
|
+
const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: BOOTSTRAP_BATCH_SIZE, cursor });
|
|
565
|
+
if (page.data) {
|
|
566
|
+
for (const [, event] of Object.entries(page.data)) {
|
|
567
|
+
const ev = event as ReplEvent;
|
|
568
|
+
if (!filter || filter(ev)) {
|
|
569
|
+
this.applyEvent(ev, source);
|
|
570
|
+
applied++;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
cursor = page.nextCursor;
|
|
575
|
+
} while (cursor);
|
|
576
|
+
} finally {
|
|
577
|
+
this.db.setReplaying(false);
|
|
578
|
+
}
|
|
579
|
+
return applied;
|
|
580
|
+
}
|
|
581
|
+
|
|
580
582
|
private applyEvent(ev: ReplEvent, source?: ReplicationSource): void {
|
|
581
583
|
const path = source ? this.remapPath(ev.path, source) : ev.path;
|
|
582
584
|
// Defense-in-depth: skip events for paths we shouldn't apply (primary/writeonly)
|
package/src/server/Transport.ts
CHANGED
|
@@ -706,7 +706,11 @@ export class Transport {
|
|
|
706
706
|
if (self.rules && !self.rules.check('read', msg.path, ws.data.auth)) {
|
|
707
707
|
return error('Permission denied', Errors.PERMISSION_DENIED);
|
|
708
708
|
}
|
|
709
|
-
|
|
709
|
+
const matOpts: { keepKey?: string; batchSize?: number; cursor?: string } = {};
|
|
710
|
+
if (msg.keepKey) matOpts.keepKey = msg.keepKey;
|
|
711
|
+
if (msg.batchSize) matOpts.batchSize = msg.batchSize;
|
|
712
|
+
if (msg.cursor) matOpts.cursor = msg.cursor;
|
|
713
|
+
return reply(self.db.stream.materialize(msg.path, Object.keys(matOpts).length ? matOpts : undefined));
|
|
710
714
|
}
|
|
711
715
|
case 'stream-compact': {
|
|
712
716
|
if (self.rules && !self.rules.check('write', msg.path, ws.data.auth)) {
|
package/src/shared/protocol.ts
CHANGED
|
@@ -26,7 +26,7 @@ export type ClientMessage =
|
|
|
26
26
|
| { id: string; op: 'vector-search'; query: number[]; path?: string; limit?: number; threshold?: number }
|
|
27
27
|
| { id: string; op: 'vector-store'; path: string; embedding: number[] }
|
|
28
28
|
| { id: string; op: 'stream-snapshot'; path: string }
|
|
29
|
-
| { id: string; op: 'stream-materialize'; path: string; keepKey?: string }
|
|
29
|
+
| { id: string; op: 'stream-materialize'; path: string; keepKey?: string; batchSize?: number; cursor?: string }
|
|
30
30
|
| { id: string; op: 'stream-compact'; path: string; maxAge?: number; maxCount?: number; keepKey?: string }
|
|
31
31
|
| { id: string; op: 'stream-reset'; path: string }
|
|
32
32
|
// VFS ops
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
import { describe, it, expect, afterEach } from 'bun:test';
|
|
2
|
+
import { BodDB } from '../src/server/BodDB.ts';
|
|
3
|
+
import { BodClient } from '../src/client/BodClient.ts';
|
|
4
|
+
|
|
5
|
+
const wait = (ms: number) => new Promise(r => setTimeout(r, ms));
|
|
6
|
+
let nextPort = 27400 + Math.floor(Math.random() * 1000);
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Massive load tests — battle-test cursor-based bootstrap + threshold compact
|
|
10
|
+
* under realistic and extreme conditions.
|
|
11
|
+
*/
|
|
12
|
+
describe('repl load test', () => {
|
|
13
|
+
const instances: BodDB[] = [];
|
|
14
|
+
const clients: BodClient[] = [];
|
|
15
|
+
|
|
16
|
+
afterEach(() => {
|
|
17
|
+
for (const c of clients) c.disconnect();
|
|
18
|
+
clients.length = 0;
|
|
19
|
+
for (const db of [...instances].reverse()) db.close();
|
|
20
|
+
instances.length = 0;
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
function port() { return nextPort++; }
|
|
24
|
+
|
|
25
|
+
function primary(opts?: { compact?: any; autoCompactThreshold?: number }) {
|
|
26
|
+
const p = port();
|
|
27
|
+
const db = new BodDB({
|
|
28
|
+
path: ':memory:',
|
|
29
|
+
sweepInterval: 0,
|
|
30
|
+
replication: { role: 'primary', compact: opts?.compact ?? {}, autoCompactThreshold: opts?.autoCompactThreshold ?? 0 },
|
|
31
|
+
});
|
|
32
|
+
db.replication!.start();
|
|
33
|
+
db.serve({ port: p });
|
|
34
|
+
instances.push(db);
|
|
35
|
+
return { db, port: p };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function replica(primaryPort: number, opts?: Partial<{ replicaId: string; fullBootstrap: boolean }>) {
|
|
39
|
+
const p = port();
|
|
40
|
+
const db = new BodDB({
|
|
41
|
+
path: ':memory:',
|
|
42
|
+
sweepInterval: 0,
|
|
43
|
+
replication: {
|
|
44
|
+
role: 'replica',
|
|
45
|
+
primaryUrl: `ws://localhost:${primaryPort}`,
|
|
46
|
+
replicaId: opts?.replicaId ?? `load-replica-${p}`,
|
|
47
|
+
fullBootstrap: opts?.fullBootstrap ?? true,
|
|
48
|
+
},
|
|
49
|
+
});
|
|
50
|
+
db.serve({ port: p });
|
|
51
|
+
instances.push(db);
|
|
52
|
+
return { db, port: p };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function connect(p: number, opts?: any) {
|
|
56
|
+
const c = new BodClient({ url: `ws://localhost:${p}`, ...opts });
|
|
57
|
+
clients.push(c);
|
|
58
|
+
return c;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// ─── 1. 20k entries: cursor pagination end-to-end ───
|
|
62
|
+
|
|
63
|
+
it('20k entries: cursor pagination collects every key', async () => {
|
|
64
|
+
const { db, port: p } = primary();
|
|
65
|
+
for (let i = 0; i < 20_000; i++) {
|
|
66
|
+
db.set(`items/i${i}`, { v: i, ts: Date.now() });
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const client = connect(p);
|
|
70
|
+
await client.connect();
|
|
71
|
+
|
|
72
|
+
const keys = new Set<string>();
|
|
73
|
+
let cursor: string | undefined;
|
|
74
|
+
let pages = 0;
|
|
75
|
+
const t0 = Date.now();
|
|
76
|
+
do {
|
|
77
|
+
const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 500, cursor });
|
|
78
|
+
for (const k of Object.keys(page.data)) keys.add(k);
|
|
79
|
+
cursor = page.nextCursor;
|
|
80
|
+
pages++;
|
|
81
|
+
} while (cursor);
|
|
82
|
+
const elapsed = Date.now() - t0;
|
|
83
|
+
|
|
84
|
+
console.log(` 20k entries: ${pages} pages, ${keys.size} unique keys, ${elapsed}ms`);
|
|
85
|
+
expect(keys.size).toBe(20_000);
|
|
86
|
+
expect(pages).toBeGreaterThanOrEqual(40); // 20k / 500
|
|
87
|
+
}, 30_000);
|
|
88
|
+
|
|
89
|
+
// ─── 2. Cursor vs monolithic: per-page response stays small ───
|
|
90
|
+
|
|
91
|
+
it('cursor pages stay under 1MB each while monolithic is huge', async () => {
|
|
92
|
+
const { db, port: p } = primary();
|
|
93
|
+
const payload = 'z'.repeat(500);
|
|
94
|
+
for (let i = 0; i < 5000; i++) {
|
|
95
|
+
db.set(`big/p${i}`, { data: payload, i });
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const client = connect(p);
|
|
99
|
+
await client.connect();
|
|
100
|
+
|
|
101
|
+
// Monolithic
|
|
102
|
+
const mono = await client.streamMaterialize('_repl', { keepKey: 'path' });
|
|
103
|
+
const monoSize = JSON.stringify(mono).length;
|
|
104
|
+
|
|
105
|
+
// Cursor-based
|
|
106
|
+
let maxPageSize = 0;
|
|
107
|
+
let cursor: string | undefined;
|
|
108
|
+
do {
|
|
109
|
+
const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 200, cursor });
|
|
110
|
+
const sz = JSON.stringify(page.data).length;
|
|
111
|
+
if (sz > maxPageSize) maxPageSize = sz;
|
|
112
|
+
cursor = page.nextCursor;
|
|
113
|
+
} while (cursor);
|
|
114
|
+
|
|
115
|
+
console.log(` monolithic: ${(monoSize / 1024 / 1024).toFixed(2)}MB, max page: ${(maxPageSize / 1024).toFixed(0)}KB`);
|
|
116
|
+
expect(monoSize).toBeGreaterThan(2 * 1024 * 1024); // >2MB total
|
|
117
|
+
expect(maxPageSize).toBeLessThan(1024 * 1024); // each page <1MB
|
|
118
|
+
}, 15_000);
|
|
119
|
+
|
|
120
|
+
// ─── 3. Auto-compact under sustained write load ───
|
|
121
|
+
|
|
122
|
+
it('auto-compact keeps _repl bounded under sustained 10k writes', () => {
|
|
123
|
+
const { db } = primary({ compact: { maxCount: 200, keepKey: 'path' }, autoCompactThreshold: 500 });
|
|
124
|
+
|
|
125
|
+
for (let i = 0; i < 10_000; i++) {
|
|
126
|
+
db.set(`stream/key${i % 300}`, { round: Math.floor(i / 300), i });
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const repl = db.get('_repl') as Record<string, any>;
|
|
130
|
+
const count = repl ? Object.keys(repl).length : 0;
|
|
131
|
+
console.log(` 10k writes (300 unique paths), threshold=500, maxCount=200: ${count} _repl entries`);
|
|
132
|
+
// Without compact: 10k. With compact every 500 writes keeping 200: should be way under 1000.
|
|
133
|
+
expect(count).toBeLessThan(1000);
|
|
134
|
+
}, 15_000);
|
|
135
|
+
|
|
136
|
+
// ─── 4. Replica bootstrap with 10k entries via cursor ───
|
|
137
|
+
|
|
138
|
+
it('replica bootstraps 10k entries via cursor without timeout', async () => {
|
|
139
|
+
const { db: p, port: pp } = primary();
|
|
140
|
+
for (let i = 0; i < 10_000; i++) {
|
|
141
|
+
p.set(`data/node${i}`, { value: i, name: `node-${i}`, tags: ['a', 'b'] });
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const { db: r } = replica(pp);
|
|
145
|
+
const t0 = Date.now();
|
|
146
|
+
await r.replication!.start();
|
|
147
|
+
const elapsed = Date.now() - t0;
|
|
148
|
+
|
|
149
|
+
console.log(` 10k entry replica bootstrap: ${elapsed}ms`);
|
|
150
|
+
|
|
151
|
+
await wait(500);
|
|
152
|
+
// Spot-check
|
|
153
|
+
for (const idx of [0, 999, 5000, 9999]) {
|
|
154
|
+
const val = r.get(`data/node${idx}`) as any;
|
|
155
|
+
expect(val?.value).toBe(idx);
|
|
156
|
+
}
|
|
157
|
+
}, 30_000);
|
|
158
|
+
|
|
159
|
+
// ─── 5. Multiple replicas bootstrap concurrently ───
|
|
160
|
+
|
|
161
|
+
it('3 replicas bootstrap concurrently from same primary (10k entries)', async () => {
|
|
162
|
+
const { db: p, port: pp } = primary();
|
|
163
|
+
for (let i = 0; i < 10_000; i++) {
|
|
164
|
+
p.set(`shared/item${i}`, { v: i });
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const replicas = [replica(pp), replica(pp), replica(pp)];
|
|
168
|
+
const t0 = Date.now();
|
|
169
|
+
await Promise.all(replicas.map(r => r.db.replication!.start()));
|
|
170
|
+
const elapsed = Date.now() - t0;
|
|
171
|
+
|
|
172
|
+
console.log(` 3 concurrent replica bootstraps (10k): ${elapsed}ms`);
|
|
173
|
+
|
|
174
|
+
await wait(500);
|
|
175
|
+
for (const r of replicas) {
|
|
176
|
+
const v0 = r.db.get('shared/item0') as any;
|
|
177
|
+
const v9999 = r.db.get('shared/item9999') as any;
|
|
178
|
+
expect(v0?.v).toBe(0);
|
|
179
|
+
expect(v9999?.v).toBe(9999);
|
|
180
|
+
}
|
|
181
|
+
}, 45_000);
|
|
182
|
+
|
|
183
|
+
// ─── 6. Writes during bootstrap: replica catches up via stream sub ───
|
|
184
|
+
|
|
185
|
+
it('writes during bootstrap are caught via ongoing stream subscription', async () => {
|
|
186
|
+
const { db: p, port: pp } = primary();
|
|
187
|
+
// Pre-fill
|
|
188
|
+
for (let i = 0; i < 5000; i++) {
|
|
189
|
+
p.set(`pre/item${i}`, { v: i });
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const { db: r } = replica(pp);
|
|
193
|
+
// Start replica (bootstrap starts)
|
|
194
|
+
const startPromise = r.replication!.start();
|
|
195
|
+
|
|
196
|
+
// Write more to primary while bootstrap is in progress
|
|
197
|
+
for (let i = 0; i < 500; i++) {
|
|
198
|
+
p.set(`live/item${i}`, { v: i + 100_000 });
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
await startPromise;
|
|
202
|
+
// Give stream sub time to deliver live writes
|
|
203
|
+
await wait(1000);
|
|
204
|
+
|
|
205
|
+
// Pre-fill data should be there
|
|
206
|
+
const pre0 = r.get('pre/item0') as any;
|
|
207
|
+
expect(pre0?.v).toBe(0);
|
|
208
|
+
const pre4999 = r.get('pre/item4999') as any;
|
|
209
|
+
expect(pre4999?.v).toBe(4999);
|
|
210
|
+
|
|
211
|
+
// Live writes should eventually arrive
|
|
212
|
+
const live499 = r.get('live/item499') as any;
|
|
213
|
+
expect(live499?.v).toBe(100_499);
|
|
214
|
+
}, 30_000);
|
|
215
|
+
|
|
216
|
+
// ─── 7. Heavy overwrite scenario: same 50 paths written 1000× each ───
|
|
217
|
+
|
|
218
|
+
it('50 paths × 1000 overwrites: compact deduplicates correctly', () => {
|
|
219
|
+
const { db } = primary({ compact: { maxCount: 100, keepKey: 'path' }, autoCompactThreshold: 1000 });
|
|
220
|
+
|
|
221
|
+
for (let round = 0; round < 1000; round++) {
|
|
222
|
+
for (let i = 0; i < 50; i++) {
|
|
223
|
+
db.set(`hot/key${i}`, { round, value: round * 50 + i });
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const repl = db.get('_repl') as Record<string, any>;
|
|
228
|
+
const count = repl ? Object.keys(repl).length : 0;
|
|
229
|
+
console.log(` 50×1000 overwrites: ${count} _repl entries (expect ≤ ~1100)`);
|
|
230
|
+
// 50k total writes. Compact every 1k writes keeping 100. Should be bounded.
|
|
231
|
+
expect(count).toBeLessThan(1500);
|
|
232
|
+
|
|
233
|
+
// Verify latest values survived compaction
|
|
234
|
+
const materialized = db.stream.materialize('_repl', { keepKey: 'path' });
|
|
235
|
+
const paths = Object.keys(materialized);
|
|
236
|
+
expect(paths.length).toBe(50);
|
|
237
|
+
}, 30_000);
|
|
238
|
+
|
|
239
|
+
// ─── 8. Cursor pagination with snapshot (post-compact) ───
|
|
240
|
+
|
|
241
|
+
it('cursor pagination works correctly after compaction (snapshot + live events)', async () => {
|
|
242
|
+
const { db, port: p } = primary();
|
|
243
|
+
// Write 2000, compact to 200, write 800 more
|
|
244
|
+
for (let i = 0; i < 2000; i++) db.set(`a/item${i}`, { v: i });
|
|
245
|
+
db.stream.compact('_repl', { maxCount: 200, keepKey: 'path' });
|
|
246
|
+
for (let i = 2000; i < 2800; i++) db.set(`a/item${i}`, { v: i });
|
|
247
|
+
|
|
248
|
+
const client = connect(p);
|
|
249
|
+
await client.connect();
|
|
250
|
+
|
|
251
|
+
const keys = new Set<string>();
|
|
252
|
+
let cursor: string | undefined;
|
|
253
|
+
let pages = 0;
|
|
254
|
+
do {
|
|
255
|
+
const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 100, cursor });
|
|
256
|
+
for (const k of Object.keys(page.data)) keys.add(k);
|
|
257
|
+
cursor = page.nextCursor;
|
|
258
|
+
pages++;
|
|
259
|
+
} while (cursor);
|
|
260
|
+
|
|
261
|
+
console.log(` 2000 + compact + 800 more: ${keys.size} keys in ${pages} pages`);
|
|
262
|
+
// All 2800 unique paths should be present (snapshot has older ones, events have newer)
|
|
263
|
+
expect(keys.size).toBe(2800);
|
|
264
|
+
}, 15_000);
|
|
265
|
+
|
|
266
|
+
// ─── 9. Replica with auto-compact primary: data integrity ───
|
|
267
|
+
|
|
268
|
+
it('replica gets correct data when primary auto-compacts during heavy writes', async () => {
|
|
269
|
+
const { db: p, port: pp } = primary({ compact: { maxCount: 300, keepKey: 'path' }, autoCompactThreshold: 500 });
|
|
270
|
+
|
|
271
|
+
// 5000 writes — auto-compact fires multiple times
|
|
272
|
+
for (let i = 0; i < 5000; i++) {
|
|
273
|
+
p.set(`verified/item${i}`, { value: i * 7, tag: 'check' });
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const { db: r } = replica(pp);
|
|
277
|
+
await r.replication!.start();
|
|
278
|
+
await wait(500);
|
|
279
|
+
|
|
280
|
+
// Exhaustive integrity check on a sample
|
|
281
|
+
const sample = [0, 100, 999, 2500, 4000, 4999];
|
|
282
|
+
for (const idx of sample) {
|
|
283
|
+
const val = r.get(`verified/item${idx}`) as any;
|
|
284
|
+
expect(val?.value).toBe(idx * 7);
|
|
285
|
+
expect(val?.tag).toBe('check');
|
|
286
|
+
}
|
|
287
|
+
}, 30_000);
|
|
288
|
+
|
|
289
|
+
// ─── 10. Mixed deletes + sets under load ───
|
|
290
|
+
|
|
291
|
+
it('deletes replicate correctly through cursor-based bootstrap (no fullBootstrap)', async () => {
|
|
292
|
+
const { db: p, port: pp } = primary();
|
|
293
|
+
|
|
294
|
+
// Create 1000, then delete half
|
|
295
|
+
for (let i = 0; i < 1000; i++) {
|
|
296
|
+
p.set(`mix/item${i}`, { v: i });
|
|
297
|
+
}
|
|
298
|
+
for (let i = 0; i < 1000; i += 2) {
|
|
299
|
+
p.delete(`mix/item${i}`);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Disable fullBootstrap so only _repl stream materialize is used
|
|
303
|
+
const { db: r } = replica(pp, { fullBootstrap: false });
|
|
304
|
+
await r.replication!.start();
|
|
305
|
+
await wait(500);
|
|
306
|
+
|
|
307
|
+
// Even indices: _repl has set then delete — materialize with keepKey=path keeps last op (delete)
|
|
308
|
+
// But materialize folds by keepKey, and delete events have op:'delete' — they apply as db.delete()
|
|
309
|
+
// Odd indices should exist from set events
|
|
310
|
+
for (const i of [1, 3, 99, 999]) {
|
|
311
|
+
const val = r.get(`mix/item${i}`) as any;
|
|
312
|
+
expect(val?.v).toBe(i);
|
|
313
|
+
}
|
|
314
|
+
// Verify primary has them deleted
|
|
315
|
+
for (const i of [0, 2, 100, 998]) {
|
|
316
|
+
expect(p.get(`mix/item${i}`)).toBeNull();
|
|
317
|
+
}
|
|
318
|
+
}, 15_000);
|
|
319
|
+
|
|
320
|
+
// ─── 11. Rapid batchSize=1 pagination (worst case) ───
|
|
321
|
+
|
|
322
|
+
it('batchSize=1 pagination still completes for 500 entries', async () => {
|
|
323
|
+
const { db, port: p } = primary();
|
|
324
|
+
for (let i = 0; i < 500; i++) db.set(`tiny/k${i}`, { i });
|
|
325
|
+
|
|
326
|
+
const client = connect(p);
|
|
327
|
+
await client.connect();
|
|
328
|
+
|
|
329
|
+
const keys = new Set<string>();
|
|
330
|
+
let cursor: string | undefined;
|
|
331
|
+
let pages = 0;
|
|
332
|
+
const t0 = Date.now();
|
|
333
|
+
do {
|
|
334
|
+
const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 1, cursor });
|
|
335
|
+
for (const k of Object.keys(page.data)) keys.add(k);
|
|
336
|
+
cursor = page.nextCursor;
|
|
337
|
+
pages++;
|
|
338
|
+
} while (cursor);
|
|
339
|
+
const elapsed = Date.now() - t0;
|
|
340
|
+
|
|
341
|
+
console.log(` batchSize=1 over 500 entries: ${pages} pages, ${elapsed}ms`);
|
|
342
|
+
expect(keys.size).toBe(500);
|
|
343
|
+
expect(pages).toBeGreaterThanOrEqual(500);
|
|
344
|
+
}, 30_000);
|
|
345
|
+
|
|
346
|
+
// ─── 12. Throughput benchmark: writes/sec with auto-compact ───
|
|
347
|
+
|
|
348
|
+
it('write throughput with auto-compact enabled', () => {
|
|
349
|
+
const { db: dbCompact } = primary({ compact: { maxCount: 200, keepKey: 'path' }, autoCompactThreshold: 500 });
|
|
350
|
+
const { db: dbPlain } = primary();
|
|
351
|
+
|
|
352
|
+
const N = 10_000;
|
|
353
|
+
|
|
354
|
+
const t0 = Date.now();
|
|
355
|
+
for (let i = 0; i < N; i++) dbPlain.set(`bench/k${i}`, { i });
|
|
356
|
+
const plainMs = Date.now() - t0;
|
|
357
|
+
|
|
358
|
+
const t1 = Date.now();
|
|
359
|
+
for (let i = 0; i < N; i++) dbCompact.set(`bench/k${i}`, { i });
|
|
360
|
+
const compactMs = Date.now() - t1;
|
|
361
|
+
|
|
362
|
+
const plainWps = Math.round(N / (plainMs / 1000));
|
|
363
|
+
const compactWps = Math.round(N / (compactMs / 1000));
|
|
364
|
+
const overhead = ((compactMs - plainMs) / plainMs * 100).toFixed(1);
|
|
365
|
+
|
|
366
|
+
console.log(` ${N} writes — plain: ${plainMs}ms (${plainWps} w/s), compact: ${compactMs}ms (${compactWps} w/s), overhead: ${overhead}%`);
|
|
367
|
+
// Auto-compact overhead should be < 100% (compact is cheap relative to N writes)
|
|
368
|
+
expect(compactMs).toBeLessThan(plainMs * 3);
|
|
369
|
+
}, 30_000);
|
|
370
|
+
});
|
|
@@ -5,18 +5,6 @@ import { BodClient } from '../src/client/BodClient.ts';
|
|
|
5
5
|
const wait = (ms: number) => new Promise(r => setTimeout(r, ms));
|
|
6
6
|
let nextPort = 26400 + Math.floor(Math.random() * 1000);
|
|
7
7
|
|
|
8
|
-
/**
|
|
9
|
-
* Stress tests for _repl stream bloat.
|
|
10
|
-
*
|
|
11
|
-
* The real-world issue: _repl grows unbounded → streamMaterialize produces
|
|
12
|
-
* a massive WS response → client requestTimeout (30s) fires or WS chokes.
|
|
13
|
-
*
|
|
14
|
-
* Locally we can't easily reproduce network latency, but we CAN:
|
|
15
|
-
* 1. Push the entry count + payload size to stress serialization/parsing
|
|
16
|
-
* 2. Use short requestTimeout on the replica client to simulate the real failure
|
|
17
|
-
* 3. Measure materialization time scaling (O(n) proof)
|
|
18
|
-
* 4. Verify compaction actually fixes it
|
|
19
|
-
*/
|
|
20
8
|
describe('_repl stream bloat', () => {
|
|
21
9
|
const instances: BodDB[] = [];
|
|
22
10
|
const clients: BodClient[] = [];
|
|
@@ -31,12 +19,12 @@ describe('_repl stream bloat', () => {
|
|
|
31
19
|
function getPort() { return nextPort++; }
|
|
32
20
|
|
|
33
21
|
/** Primary with auto-compact DISABLED so _repl grows unbounded */
|
|
34
|
-
function createPrimary(opts?: { maxMessageSize?: number }) {
|
|
22
|
+
function createPrimary(opts?: { maxMessageSize?: number; compact?: any; autoCompactThreshold?: number }) {
|
|
35
23
|
const port = getPort();
|
|
36
24
|
const db = new BodDB({
|
|
37
25
|
path: ':memory:',
|
|
38
26
|
sweepInterval: 0,
|
|
39
|
-
replication: { role: 'primary', compact: {} },
|
|
27
|
+
replication: { role: 'primary', compact: opts?.compact ?? {}, autoCompactThreshold: opts?.autoCompactThreshold ?? 0 },
|
|
40
28
|
});
|
|
41
29
|
db.replication!.start();
|
|
42
30
|
db.serve({ port, maxMessageSize: opts?.maxMessageSize });
|
|
@@ -117,6 +105,51 @@ describe('_repl stream bloat', () => {
|
|
|
117
105
|
expect(ratio).toBeGreaterThan(5); // 5000/500 = 10x, allow some dedup
|
|
118
106
|
});
|
|
119
107
|
|
|
108
|
+
// --- Cursor-based materialize ---
|
|
109
|
+
|
|
110
|
+
it('cursor-based materialize pages correctly over large _repl', async () => {
|
|
111
|
+
const { db, port } = createPrimary();
|
|
112
|
+
fillRepl(db, 1000, 200);
|
|
113
|
+
|
|
114
|
+
const client = new BodClient({ url: `ws://localhost:${port}` });
|
|
115
|
+
clients.push(client);
|
|
116
|
+
await client.connect();
|
|
117
|
+
|
|
118
|
+
// Page through with batchSize=200
|
|
119
|
+
const allKeys = new Set<string>();
|
|
120
|
+
let cursor: string | undefined;
|
|
121
|
+
let pages = 0;
|
|
122
|
+
do {
|
|
123
|
+
const page = await client.streamMaterialize('_repl', { keepKey: 'path', batchSize: 200, cursor });
|
|
124
|
+
if (page.data) {
|
|
125
|
+
for (const key of Object.keys(page.data)) allKeys.add(key);
|
|
126
|
+
}
|
|
127
|
+
cursor = page.nextCursor;
|
|
128
|
+
pages++;
|
|
129
|
+
} while (cursor);
|
|
130
|
+
|
|
131
|
+
console.log(` 1000 entries paged in ${pages} pages, got ${allKeys.size} unique keys`);
|
|
132
|
+
expect(allKeys.size).toBe(1000);
|
|
133
|
+
expect(pages).toBeGreaterThanOrEqual(5); // 1000/200 = 5 pages minimum
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
// --- Auto-compact on write threshold ---
|
|
137
|
+
|
|
138
|
+
it('auto-compact triggers after N writes', () => {
|
|
139
|
+
const { db } = createPrimary({ compact: { maxCount: 50, keepKey: 'path' }, autoCompactThreshold: 100 });
|
|
140
|
+
|
|
141
|
+
// Write 250 entries — compact triggers at 100, 200; maxCount=50 keeps only 50 each time
|
|
142
|
+
for (let i = 0; i < 250; i++) {
|
|
143
|
+
db.set(`data/item${i}`, { value: i });
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const repl = db.get('_repl') as Record<string, any>;
|
|
147
|
+
const count = repl ? Object.keys(repl).length : 0;
|
|
148
|
+
console.log(` 250 writes with threshold=100, maxCount=50: ${count} _repl entries`);
|
|
149
|
+
// After compact at 200 (keeps 50), then 50 more → ~100. Way less than 250.
|
|
150
|
+
expect(count).toBeLessThan(150);
|
|
151
|
+
});
|
|
152
|
+
|
|
120
153
|
// --- Timeout reproduction: short requestTimeout simulates real-world failure ---
|
|
121
154
|
|
|
122
155
|
it('short requestTimeout causes streamMaterialize to fail on bloated _repl', async () => {
|
|
@@ -126,8 +159,6 @@ describe('_repl stream bloat', () => {
|
|
|
126
159
|
const replCount = Object.keys(primary.get('_repl') as Record<string, any>).length;
|
|
127
160
|
expect(replCount).toBe(10000);
|
|
128
161
|
|
|
129
|
-
// Direct client with 1ms timeout — guaranteed to fail, proving the
|
|
130
|
-
// timeout path exists and that materialize has no retry/fallback
|
|
131
162
|
const client = new BodClient({
|
|
132
163
|
url: `ws://localhost:${primaryPort}`,
|
|
133
164
|
requestTimeout: 1,
|
|
@@ -161,16 +192,14 @@ describe('_repl stream bloat', () => {
|
|
|
161
192
|
const responseSize = JSON.stringify(result).length;
|
|
162
193
|
|
|
163
194
|
console.log(` 10k entries × 1KB: ${(responseSize / 1024 / 1024).toFixed(1)}MB response in ${elapsed}ms`);
|
|
164
|
-
// On real networks with latency, this 10MB+ response would easily exceed 30s timeout
|
|
165
195
|
expect(responseSize).toBeGreaterThan(5 * 1024 * 1024); // >5MB
|
|
166
196
|
});
|
|
167
197
|
|
|
168
|
-
// --- Payload size bomb
|
|
198
|
+
// --- Payload size bomb ---
|
|
169
199
|
|
|
170
200
|
it('large payloads per entry amplify the problem', async () => {
|
|
171
201
|
const { db, port } = createPrimary();
|
|
172
202
|
|
|
173
|
-
// 1000 entries but 2KB each → ~2MB+ materialize response
|
|
174
203
|
const bigPadding = 'y'.repeat(2000);
|
|
175
204
|
for (let i = 0; i < 1000; i++) {
|
|
176
205
|
db.set(`data/big${i}`, {
|
|
@@ -190,7 +219,7 @@ describe('_repl stream bloat', () => {
|
|
|
190
219
|
const responseSize = JSON.stringify(result).length;
|
|
191
220
|
|
|
192
221
|
console.log(` 1000 entries × 2KB = ${(responseSize / 1024 / 1024).toFixed(2)}MB, ${elapsed}ms`);
|
|
193
|
-
expect(responseSize).toBeGreaterThan(2 * 1024 * 1024);
|
|
222
|
+
expect(responseSize).toBeGreaterThan(2 * 1024 * 1024);
|
|
194
223
|
});
|
|
195
224
|
|
|
196
225
|
// --- Compaction fixes it ---
|
|
@@ -202,14 +231,12 @@ describe('_repl stream bloat', () => {
|
|
|
202
231
|
const beforeCount = Object.keys(primary.get('_repl') as Record<string, any>).length;
|
|
203
232
|
expect(beforeCount).toBe(5000);
|
|
204
233
|
|
|
205
|
-
// Compact down to 500
|
|
206
234
|
primary.stream.compact('_repl', { maxCount: 500, keepKey: 'path' });
|
|
207
235
|
const afterRepl = primary.get('_repl') as Record<string, any>;
|
|
208
236
|
const afterCount = afterRepl ? Object.keys(afterRepl).length : 0;
|
|
209
237
|
console.log(` Compacted: ${beforeCount} → ${afterCount} entries`);
|
|
210
238
|
expect(afterCount).toBeLessThanOrEqual(500);
|
|
211
239
|
|
|
212
|
-
// Bootstrap should now work fast with compacted stream
|
|
213
240
|
const { db: replica } = createReplica(primaryPort);
|
|
214
241
|
const start = Date.now();
|
|
215
242
|
await replica.replication!.start();
|
|
@@ -219,17 +246,15 @@ describe('_repl stream bloat', () => {
|
|
|
219
246
|
expect(elapsed).toBeLessThan(3000);
|
|
220
247
|
|
|
221
248
|
await wait(300);
|
|
222
|
-
// Verify latest writes are present (compaction keeps newest by keepKey)
|
|
223
249
|
const val = replica.get('vfs/files/project/src/deep/nested/path/module4999/component.tsx') as any;
|
|
224
250
|
expect(val?.size).toBe(1024 + 4999);
|
|
225
251
|
});
|
|
226
252
|
|
|
227
|
-
// --- Repeated writes
|
|
253
|
+
// --- Repeated writes ---
|
|
228
254
|
|
|
229
255
|
it('repeated writes to same paths bloat _repl with duplicates', () => {
|
|
230
256
|
const { db } = createPrimary();
|
|
231
257
|
|
|
232
|
-
// 100 paths × 50 writes each = 5000 _repl entries, but only 100 unique paths
|
|
233
258
|
for (let round = 0; round < 50; round++) {
|
|
234
259
|
for (let i = 0; i < 100; i++) {
|
|
235
260
|
db.set(`config/setting${i}`, { value: round, updated: Date.now() });
|
|
@@ -241,30 +266,30 @@ describe('_repl stream bloat', () => {
|
|
|
241
266
|
console.log(` 100 paths × 50 writes = ${totalEntries} _repl entries`);
|
|
242
267
|
expect(totalEntries).toBe(5000);
|
|
243
268
|
|
|
244
|
-
// Compact with keepKey deduplicates to 100
|
|
245
269
|
db.stream.compact('_repl', { keepKey: 'path' });
|
|
246
270
|
const after = db.get('_repl') as Record<string, any>;
|
|
247
271
|
const afterCount = after ? Object.keys(after).length : 0;
|
|
248
|
-
// snapshot (1) + remaining entries
|
|
249
272
|
console.log(` After compact: ${afterCount} entries (expect ~100 unique paths)`);
|
|
250
273
|
expect(afterCount).toBeLessThanOrEqual(150);
|
|
251
274
|
});
|
|
252
275
|
|
|
253
|
-
// ---
|
|
276
|
+
// --- Cursor-based bootstrap works for replica ---
|
|
254
277
|
|
|
255
|
-
it('replica
|
|
278
|
+
it('replica bootstrap uses cursor-based pagination (no timeout on large _repl)', async () => {
|
|
256
279
|
const { db: primary, port: primaryPort } = createPrimary();
|
|
257
280
|
fillRepl(primary, 3000, 300);
|
|
258
281
|
|
|
259
282
|
const { db: replica } = createReplica(primaryPort);
|
|
260
283
|
|
|
261
|
-
// Measure: start() blocks until materialize completes — no internal timeout
|
|
262
284
|
const start = Date.now();
|
|
263
285
|
await replica.replication!.start();
|
|
264
286
|
const elapsed = Date.now() - start;
|
|
265
287
|
|
|
266
|
-
console.log(` replica
|
|
267
|
-
|
|
268
|
-
|
|
288
|
+
console.log(` Cursor-based replica bootstrap: ${elapsed}ms for 3000 entries`);
|
|
289
|
+
|
|
290
|
+
await wait(300);
|
|
291
|
+
// Verify data arrived
|
|
292
|
+
const val = replica.get('vfs/files/project/src/deep/nested/path/module2999/component.tsx') as any;
|
|
293
|
+
expect(val?.size).toBe(1024 + 2999);
|
|
269
294
|
});
|
|
270
295
|
});
|