@bod.ee/db 0.12.1 → 0.12.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/config-file.md +6 -0
- package/.claude/skills/deploying-bod-db.md +21 -0
- package/.claude/skills/developing-bod-db.md +1 -1
- package/.claude/skills/using-bod-db.md +42 -0
- package/CLAUDE.md +2 -1
- package/README.md +14 -0
- package/admin/ui.html +7 -6
- package/docs/para-chat-integration.md +198 -0
- package/docs/repl-stream-bloat-spec.md +48 -0
- package/package.json +1 -1
- package/src/client/BodClient.ts +4 -2
- package/src/server/BodDB.ts +8 -4
- package/src/server/ReplicationEngine.ts +283 -45
- package/src/server/Transport.ts +62 -30
- package/src/server/VFSEngine.ts +13 -2
- package/src/shared/protocol.ts +2 -1
- package/tests/repl-load.test.ts +370 -0
- package/tests/repl-stream-bloat.test.ts +295 -0
- package/tests/replication-topology.test.ts +835 -0
|
@@ -47,6 +47,12 @@ export default {
|
|
|
47
47
|
log: { enabled: true, level: 'info', components: ['transport', 'stats', 'storage'] }, // or components: '*' for all
|
|
48
48
|
replication: {
|
|
49
49
|
role: 'primary',
|
|
50
|
+
primaryUrl: 'ws://remote:4400', // needed for replica/sync paths
|
|
51
|
+
paths: [
|
|
52
|
+
{ path: '_vfs', mode: 'primary' },
|
|
53
|
+
{ path: '_auth', mode: 'replica' },
|
|
54
|
+
{ path: 'config', mode: 'sync' },
|
|
55
|
+
],
|
|
50
56
|
sources: [
|
|
51
57
|
{ url: 'ws://other-db:4400', paths: ['catalog'], localPrefix: 'ext', id: 'my-source' },
|
|
52
58
|
],
|
|
@@ -44,6 +44,27 @@ export default {
|
|
|
44
44
|
```
|
|
45
45
|
Primary config just needs `replication: { role: 'primary' }`. Replicas auto-bootstrap on startup.
|
|
46
46
|
|
|
47
|
+
## Per-Path Topology
|
|
48
|
+
Mixed ownership — each path prefix gets independent replication mode:
|
|
49
|
+
```typescript
|
|
50
|
+
// deploy/prod-edge.config.ts
|
|
51
|
+
export default {
|
|
52
|
+
path: './data-edge.db',
|
|
53
|
+
port: 4401,
|
|
54
|
+
replication: {
|
|
55
|
+
role: 'primary',
|
|
56
|
+
primaryUrl: 'ws://central:4400',
|
|
57
|
+
paths: [
|
|
58
|
+
{ path: '_vfs', mode: 'primary' }, // local files → push to central
|
|
59
|
+
{ path: '_auth', mode: 'replica' }, // auth from central → pull
|
|
60
|
+
{ path: 'config', mode: 'sync' }, // bidirectional
|
|
61
|
+
{ path: 'telemetry', mode: 'writeonly' }, // push metrics, ignore remote
|
|
62
|
+
],
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
```
|
|
66
|
+
Modes: `primary` (emit), `replica` (proxy writes), `sync` (both), `readonly` (pull-only), `writeonly` (push-only). Unconfigured paths fall back to `role`.
|
|
67
|
+
|
|
47
68
|
## Multi-Source Feed Subscriptions
|
|
48
69
|
Pull specific paths from multiple remote DBs:
|
|
49
70
|
```typescript
|
|
@@ -86,7 +86,7 @@ Push paths are append-only logs. `StreamEngine` adds consumer group offsets (`_s
|
|
|
86
86
|
`MQEngine` owns all MQ SQL via `storage.db.prepare()` — same pattern as StreamEngine. Columns: `mq_status` (pending/inflight), `mq_inflight_until` (Unix ms), `mq_delivery_count`. `fetch()` uses SQLite transaction with TOCTOU guard (`changes > 0`). Ack = DELETE. Sweep reclaims expired inflight; exhausted messages move to DLQ at `<queue>/_dlq/<key>`. Per-queue options via longest prefix match on `queues` config.
|
|
87
87
|
|
|
88
88
|
### Replication
|
|
89
|
-
`ReplicationEngine` — primary/replica + multi-source feed subscriptions via `_repl` stream. Primary: `onWrite` hooks emit events to `_repl` stream (updates flattened to per-path sets). Replica: bootstraps via `streamMaterialize
|
|
89
|
+
`ReplicationEngine` — primary/replica + multi-source feed subscriptions via `_repl` stream. Primary: `onWrite` hooks emit events to `_repl` stream (updates flattened to per-path sets). Auto-compact on write threshold (`autoCompactThreshold`, default 500) + on startup keeps `_repl` bounded. Replica: bootstraps via cursor-based `streamMaterialize` pagination (`batchSize: 200`), subscribes for ongoing events, proxies writes to primary. `bootstrapFromStream()` helper handles all 3 bootstrap sites (replica, router-based, sources). Guards: `_replaying` prevents re-emission, `_emitting` prevents recursion from `db.push('_repl')`. Sweep deletes are replicated. Transport checks `isReplica` and forwards write ops.
|
|
90
90
|
|
|
91
91
|
**Sources** (`ReplicationSource[]`): independent of role. Each source creates a `BodClient`, bootstraps filtered `_repl` snapshot, subscribes for ongoing events. `matchesSourcePaths()` filters by path prefix. `remapPath()` prepends `localPrefix`. Events applied with `_replaying=true`. Sources connect via `Promise.allSettled` — individual failures logged, others continue. Deterministic `groupId` default: `source_${url}_${paths.join('+')}`.
|
|
92
92
|
|
|
@@ -301,6 +301,11 @@ ws.send(JSON.stringify({ id: '20', op: 'batch-sub', subscriptions: [
|
|
|
301
301
|
// Stream extended ops
|
|
302
302
|
ws.send(JSON.stringify({ id: '21', op: 'stream-snapshot', path: 'events/orders' }));
|
|
303
303
|
ws.send(JSON.stringify({ id: '21', op: 'stream-materialize', path: 'events/orders', keepKey: 'orderId' }));
|
|
304
|
+
// Cursor-based pagination (for large streams):
|
|
305
|
+
ws.send(JSON.stringify({ id: '21b', op: 'stream-materialize', path: 'events/orders', keepKey: 'orderId', batchSize: 200 }));
|
|
306
|
+
// → { id: '21b', ok: true, data: { data: {...}, nextCursor: 'abc123' } }
|
|
307
|
+
// Follow-up page:
|
|
308
|
+
ws.send(JSON.stringify({ id: '21c', op: 'stream-materialize', path: 'events/orders', keepKey: 'orderId', batchSize: 200, cursor: 'abc123' }));
|
|
304
309
|
ws.send(JSON.stringify({ id: '22', op: 'stream-compact', path: 'events/orders', maxAge: 86400 }));
|
|
305
310
|
ws.send(JSON.stringify({ id: '23', op: 'stream-reset', path: 'events/orders' }));
|
|
306
311
|
```
|
|
@@ -450,6 +455,13 @@ const similar = await client.vectorSearch({ query: [0.1, 0.2, 0.3], path: 'docs'
|
|
|
450
455
|
// Stream snapshot, materialize, compact, reset
|
|
451
456
|
const snap = await client.streamSnapshot('events/orders');
|
|
452
457
|
const view = await client.streamMaterialize('events/orders', { keepKey: 'orderId' });
|
|
458
|
+
// Cursor-based materialize for large streams (avoids huge single response):
|
|
459
|
+
let cursor: string | undefined;
|
|
460
|
+
do {
|
|
461
|
+
const page = await client.streamMaterialize('events/orders', { keepKey: 'orderId', batchSize: 200, cursor });
|
|
462
|
+
// page.data contains this batch, page.nextCursor is undefined when done
|
|
463
|
+
cursor = page.nextCursor;
|
|
464
|
+
} while (cursor);
|
|
453
465
|
await client.streamCompact('events/orders', { maxAge: 86400 });
|
|
454
466
|
await client.streamReset('events/orders');
|
|
455
467
|
```
|
|
@@ -488,6 +500,36 @@ await replica.replication!.start();
|
|
|
488
500
|
- Auto-compaction on `_repl` stream (keepKey: 'path', maxCount: 10000)
|
|
489
501
|
- Excluded prefixes: `_repl`, `_streams`, `_mq`, `_auth` (internal data not replicated)
|
|
490
502
|
|
|
503
|
+
### Per-Path Topology
|
|
504
|
+
|
|
505
|
+
Mixed ownership per path — some paths local-authoritative, others remote-authoritative.
|
|
506
|
+
|
|
507
|
+
```typescript
|
|
508
|
+
const db = new BodDB({
|
|
509
|
+
path: './data.db',
|
|
510
|
+
replication: {
|
|
511
|
+
role: 'primary', // fallback for unconfigured paths
|
|
512
|
+
primaryUrl: 'ws://remote:4400',
|
|
513
|
+
paths: [
|
|
514
|
+
{ path: '_vfs', mode: 'primary' }, // local authoritative, emits to remote
|
|
515
|
+
{ path: '_auth', mode: 'replica' }, // remote authoritative, proxies writes
|
|
516
|
+
{ path: 'config', mode: 'sync' }, // bidirectional
|
|
517
|
+
{ path: 'feeds', mode: 'readonly' }, // pull-only, rejects writes
|
|
518
|
+
{ path: 'telemetry', mode: 'writeonly' }, // push-only, ignores remote
|
|
519
|
+
],
|
|
520
|
+
},
|
|
521
|
+
});
|
|
522
|
+
await db.replication!.start();
|
|
523
|
+
```
|
|
524
|
+
|
|
525
|
+
- 5 modes: `primary`, `replica`, `sync`, `readonly`, `writeonly`
|
|
526
|
+
- Longest-prefix match: `_auth/nonces` (primary) overrides `_auth` (replica)
|
|
527
|
+
- String paths default to `sync`: `paths: ['_vfs', '_auth']` = both sync
|
|
528
|
+
- `writeProxy: 'reject'` on replica paths blocks writes instead of proxying
|
|
529
|
+
- Bootstrap pulls only `replica`/`readonly` paths; `sync` gets ongoing stream only
|
|
530
|
+
- Unconfigured paths fall back to `role`
|
|
531
|
+
- Backward compatible: no `paths` = role-based (existing behavior)
|
|
532
|
+
|
|
491
533
|
### Multi-Source Feed Subscriptions
|
|
492
534
|
|
|
493
535
|
Subscribe to specific paths from multiple remote BodDB instances — like RSS feeds for database paths.
|
package/CLAUDE.md
CHANGED
|
@@ -71,8 +71,9 @@ config.ts — demo instance config (open rules, indexes, fts, v
|
|
|
71
71
|
- **BodClientCached**: two-tier cache wrapper around BodClient. Memory (Map, LRU eviction) + IndexedDB persistence. Stale-while-revalidate: subscribed paths always fresh, unsubscribed return stale + background refetch. Writes (`set/update/delete`) invalidate path + ancestors. `init()` opens IDB + sweeps expired. `warmup(paths[])` bulk-loads from IDB. Passthrough for `push/batch/query/search/mq/stream/vfs` via `cachedClient.client`.
|
|
72
72
|
- **MCP**: `MCPAdapter` wraps a `BodClient` as a JSON-RPC MCP server (stdio + HTTP). Connects to a running BodDB instance over WebSocket — no embedded DB. Entry point: `mcp.ts`. Tools: CRUD (6), FTS (2), vectors (2), streams (4), MQ (7) = 21 tools. Use `--stdio` for Claude Code/Desktop, `--http` for remote agents.
|
|
73
73
|
- **VFS (Virtual File System)**: `VFSEngine` — files stored outside SQLite via pluggable `VFSBackend` interface. `LocalBackend` stores at `<storageRoot>/<fileId>` using `Bun.file`/`Bun.write`. Metadata at `_vfs/<virtualPath>/` (size, mime, mtime, fileId, isDir) — gets subs/rules/replication for free. `fileId = pushId` so move/rename is metadata-only. REST: `POST/GET/DELETE /files/<path>`, `?stat=1`, `?list=1`, `?mkdir=1`, `PUT ?move=<dst>`. WS chunked fallback: base64-encoded `vfs-upload-init/chunk/done`, `vfs-download-init` → `vfs-download-chunk` push messages. Client: `VFSClient` via `client.vfs()` — `upload/download` (REST) + `uploadWS/downloadWS` (WS) + `stat/list/mkdir/delete/move`.
|
|
74
|
-
- **Replication**: `ReplicationEngine` — single primary + N read replicas + multi-source feed subscriptions. Star topology. Primary emits write events to `_repl` stream via `onWrite` hooks. Replicas bootstrap via `streamMaterialize('_repl', { keepKey: 'path' })
|
|
74
|
+
- **Replication**: `ReplicationEngine` — single primary + N read replicas + multi-source feed subscriptions. Star topology. Primary emits write events to `_repl` stream via `onWrite` hooks. Replicas bootstrap via cursor-based `streamMaterialize('_repl', { keepKey: 'path', batchSize: 200 })` pagination (avoids huge single WS frame), then subscribe for ongoing events. Auto-compact on write threshold (`autoCompactThreshold`, default 500) + on startup keeps `_repl` bounded. Write proxy: replica forwards writes to primary via BodClient, primary applies + emits, replica consumes. `_replaying` flag prevents re-emission loops. `_emitting` guard prevents recursion from `db.push('_repl')`. Updates flattened to per-path set events for correct compaction keying. Sweep delete events replicated. Excluded prefixes: `_repl`, `_streams`, `_mq`, `_auth`. **Sources**: `ReplicationSource[]` — subscribe to specific paths from multiple remote DBs. Each source is an independent BodClient that filters `_repl` events by path prefix, with optional `localPrefix` remapping (e.g. remote `users/u1` → local `db-a/users/u1`). Sources connect in parallel; individual failures don't block others. Sources are independent of role — a DB can be primary AND consume sources. **Per-path topology**: `PathTopologyRouter` — when `paths` config is set, each path prefix gets an independent mode: `primary` (local authoritative, emits), `replica` (remote authoritative, proxies writes), `sync` (bidirectional, both emit+apply), `readonly` (pull-only, rejects writes), `writeonly` (push-only, ignores remote). Longest-prefix match resolves mode. `writeProxy: 'proxy'|'reject'` overrides replica write behavior. Bootstrap skips sync paths (ongoing stream only). Auth/rules checked before proxy in all handlers. `shouldProxyPath(path)`/`shouldRejectPath(path)` replace `isReplica` checks. `emitsToRepl`/`pullsFromPrimary` getters for compact/bootstrap decisions. Stable `replicaId` from config hash. Falls back to `role` when `paths` absent (backward compat).
|
|
75
75
|
- **KeyAuth integration guide**: `docs/keyauth-integration.md` — flows for signup, signin, new device, autoAuth, IAM roles, common mistakes.
|
|
76
|
+
- **Para-chat integration guide**: `docs/para-chat-integration.md` — how para-chat uses BodDB: per-path topology, VFS, KeyAuth, caching, file sync.
|
|
76
77
|
- **KeyAuth**: `KeyAuthEngine` — portable Ed25519 identity & IAM. Identity hierarchy: Root (server-level, key on filesystem), Account (portable, password-encrypted private key in DB or device-generated), Device (delegate, linked via password unlock). Challenge-response auth: server sends nonce → client signs with Ed25519 → server verifies + creates session. Self-signed tokens (no JWT lib): `base64url(payload).base64url(Ed25519_sign)`. Data model at `_auth/` prefix (protected from external writes). Device reverse-index at `_auth/deviceIndex/{dfp}` for O(1) lookup. Password change is atomic (single `db.update()`). IAM: roles with path-based permissions, account role assignment. `_auth/` excluded from replication. Transport guards: `auth-link-device` and `auth-change-password` require authenticated session; non-root users can only change own password. **Device registration**: `registerDevice(publicKey)` — client-generated keypair, no password, idempotent; `allowOpenRegistration: false` requires authenticated session. **Browser crypto**: `keyAuth.browser.ts` uses `@noble/ed25519` with DER↔raw key bridge for server compatibility. **BodClient autoAuth**: `autoAuth: true` auto-generates keypair (localStorage), registers, authenticates — zero-config device identity. `client.auth.*` convenience methods for all auth ops. **IAM transport ops**: `auth-create-role`, `auth-delete-role`, `auth-update-roles` (root only), `auth-list-accounts`, `auth-list-roles`. Device accounts (no encrypted key) safely reject `linkDevice`/`changePassword`.
|
|
77
78
|
|
|
78
79
|
## MCP Server
|
package/README.md
CHANGED
|
@@ -214,6 +214,20 @@ const aggregator = new BodDB({
|
|
|
214
214
|
});
|
|
215
215
|
await aggregator.replication!.start();
|
|
216
216
|
// Remote catalog/item → local a/catalog/item
|
|
217
|
+
|
|
218
|
+
// Per-path topology — mixed ownership per path prefix
|
|
219
|
+
const edge = new BodDB({
|
|
220
|
+
replication: {
|
|
221
|
+
role: 'primary',
|
|
222
|
+
primaryUrl: 'ws://central:4400',
|
|
223
|
+
paths: [
|
|
224
|
+
{ path: '_vfs', mode: 'primary' }, // local authoritative
|
|
225
|
+
{ path: '_auth', mode: 'replica' }, // remote authoritative
|
|
226
|
+
{ path: 'config', mode: 'sync' }, // bidirectional
|
|
227
|
+
{ path: 'telemetry', mode: 'writeonly' }, // push-only
|
|
228
|
+
],
|
|
229
|
+
},
|
|
230
|
+
});
|
|
217
231
|
```
|
|
218
232
|
|
|
219
233
|
## Rules
|
package/admin/ui.html
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
body { font-family: monospace; font-size: 13px; background: #0d0d0d; color: #d4d4d4; display: flex; flex-direction: column; height: 100vh; overflow: hidden; }
|
|
9
9
|
|
|
10
10
|
/* Metrics bar */
|
|
11
|
-
#metrics-bar { display: flex; background: #0a0a0a; border-bottom: 1px solid #2a2a2a; flex-shrink: 0;
|
|
11
|
+
#metrics-bar { display: flex; background: #0a0a0a; border-bottom: 1px solid #2a2a2a; flex-shrink: 0; align-items: stretch; width: 100%; }
|
|
12
12
|
.metric-card { display: flex; flex-direction: column; padding: 5px 10px 4px; border-right: 1px solid #181818; min-width: 140px; flex-shrink: 0; gap: 1px; overflow: hidden; }
|
|
13
13
|
.metric-card:last-child { border-right: none; width: auto; }
|
|
14
14
|
.metric-right { margin-left: auto; }
|
|
@@ -127,15 +127,15 @@
|
|
|
127
127
|
<div class="metric-top"><span class="metric-label">Ping</span><span class="metric-value" id="s-ping">—</span></div>
|
|
128
128
|
<canvas class="metric-canvas" id="g-ping" width="100" height="28"></canvas>
|
|
129
129
|
</div>
|
|
130
|
-
<div class="metric-card
|
|
130
|
+
<div class="metric-card" id="repl-card" style="border-left:1px solid #282828;display:none;width:180px">
|
|
131
131
|
<div class="metric-top"><span class="metric-label">Replication</span><span class="metric-value dim" id="s-repl-role">—</span></div>
|
|
132
132
|
<div style="margin-top:4px;font-size:10px" id="s-repl-sources"></div>
|
|
133
133
|
</div>
|
|
134
|
-
<div class="metric-card" style="border-left:1px solid #282828">
|
|
134
|
+
<div class="metric-card metric-right" style="border-left:1px solid #282828;justify-content:space-between">
|
|
135
135
|
<div class="metric-top"><span class="metric-label">Uptime</span><span class="metric-value dim" id="s-uptime">—</span></div>
|
|
136
|
-
<div style="
|
|
137
|
-
<div
|
|
138
|
-
<div
|
|
136
|
+
<div style="font-size:10px;color:#555;display:flex;justify-content:space-between"><span id="s-ts">—</span><span>v<span id="s-version">—</span></span></div>
|
|
137
|
+
<div><span class="metric-label">WS<span id="ws-dot"></span></span> <span style="font-size:10px;color:#555"><span id="s-clients">0</span> clients · <span id="s-subs">0</span> subs</span></div>
|
|
138
|
+
<div><button id="stats-toggle" class="sm" onclick="toggleStats()" title="Toggle server stats collection">Stats: ON</button></div>
|
|
139
139
|
</div>
|
|
140
140
|
</div>
|
|
141
141
|
|
|
@@ -1257,6 +1257,7 @@ db.on('_admin/stats', (snap) => {
|
|
|
1257
1257
|
document.getElementById('s-subs').textContent = s.subs ?? 0;
|
|
1258
1258
|
document.getElementById('s-uptime').textContent = fmtUptime(s.process.uptimeSec);
|
|
1259
1259
|
document.getElementById('s-ts').textContent = new Date(s.ts).toLocaleTimeString();
|
|
1260
|
+
if (s.version) document.getElementById('s-version').textContent = s.version;
|
|
1260
1261
|
|
|
1261
1262
|
// Replication stats
|
|
1262
1263
|
if (s.repl) {
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# Para-Chat — BodDB Integration Guide
|
|
2
|
+
|
|
3
|
+
Para-chat is an AI coder companion that uses BodDB for real-time data, auth, file system, and cross-device sync.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Architecture
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
para-chat instance
|
|
11
|
+
├── Bun.serve (shared port)
|
|
12
|
+
│ ├── /api/* — REST API
|
|
13
|
+
│ ├── /db (WS) — BodDB transport
|
|
14
|
+
│ ├── /db (REST) — BodDB REST endpoints
|
|
15
|
+
│ └── /files/* — VFS file upload/download
|
|
16
|
+
├── BodDB (.internal/boddb.sqlite)
|
|
17
|
+
│ ├── StorageEngine — collections (notifications, terminal-sessions, etc.)
|
|
18
|
+
│ ├── VFSEngine — file metadata at _vfs/, files on disk via DiskBackend
|
|
19
|
+
│ ├── KeyAuthEngine — Ed25519 device identity, sessions, IAM
|
|
20
|
+
│ └── ReplicationEngine — sync with bod.ee (sources-based)
|
|
21
|
+
└── Frontend (BodClientCached)
|
|
22
|
+
├── BodClient (WS) — CRUD, subscriptions, auth
|
|
23
|
+
└── Memory + IndexedDB cache (300 entries, 24h TTL)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Current Replication Setup
|
|
29
|
+
|
|
30
|
+
Para-chat uses **sources** to pull `_vfs` and `_auth` from a remote bod.ee instance:
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
// api/lib/boddb.ts (current)
|
|
34
|
+
replEngine = new ReplicationEngine(instance, {
|
|
35
|
+
role: 'primary',
|
|
36
|
+
sources: [{
|
|
37
|
+
url: repl.remoteUrl, // wss://bod.ee/db
|
|
38
|
+
auth: () => token,
|
|
39
|
+
paths: repl.paths || ['_vfs', '_auth'],
|
|
40
|
+
excludePrefixes: ['_auth/sessions', '_auth/server'],
|
|
41
|
+
}],
|
|
42
|
+
});
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Config file (`.internal/boddb.config.json`):
|
|
46
|
+
```json
|
|
47
|
+
{
|
|
48
|
+
"keyAuth": { "allowOpenRegistration": true, "sessionTtl": 604800 },
|
|
49
|
+
"replication": { "remoteUrl": "wss://bod.ee/db", "paths": ["_vfs", "_auth"] }
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Limitation**: Sources are one-way pull. Local writes to `_vfs` don't automatically push to bod.ee — that's handled by a separate HTTP upload flow with dedup guards.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Recommended: Per-Path Topology
|
|
58
|
+
|
|
59
|
+
With per-path topology, each prefix gets explicit ownership semantics:
|
|
60
|
+
|
|
61
|
+
```typescript
|
|
62
|
+
// api/lib/boddb.ts (recommended)
|
|
63
|
+
const db = new BodDB({
|
|
64
|
+
path: dbPath,
|
|
65
|
+
vfs: { backend: new DiskBackend(vfsRoot), pathAsFileId: true },
|
|
66
|
+
keyAuth: { allowOpenRegistration: true, sessionTtl: 604800 },
|
|
67
|
+
replication: {
|
|
68
|
+
role: 'primary', // fallback for unconfigured paths
|
|
69
|
+
primaryUrl: repl.remoteUrl, // wss://bod.ee/db
|
|
70
|
+
paths: [
|
|
71
|
+
{ path: '_vfs', mode: 'primary' }, // local files are authoritative
|
|
72
|
+
{ path: '_auth', mode: 'replica' }, // bod.ee is auth authority
|
|
73
|
+
{ path: 'config', mode: 'sync' }, // bidirectional app config
|
|
74
|
+
{ path: 'storage', mode: 'primary' }, // local collections (notifications, etc.)
|
|
75
|
+
],
|
|
76
|
+
excludePrefixes: ['_repl', '_streams', '_mq'],
|
|
77
|
+
},
|
|
78
|
+
});
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Mode mapping for para-chat
|
|
82
|
+
|
|
83
|
+
| Path | Mode | Rationale |
|
|
84
|
+
|------|------|-----------|
|
|
85
|
+
| `_vfs` | `primary` | Local files on disk are source of truth. Emits metadata to remote. |
|
|
86
|
+
| `_auth` | `replica` | bod.ee is the identity provider. Writes proxied. Sessions/server excluded automatically. |
|
|
87
|
+
| `config/app` | `sync` | App config editable from either side. |
|
|
88
|
+
| `storage/*` | `primary` | Notifications, terminal-sessions are local. |
|
|
89
|
+
| `telemetry` | `writeonly` | Push metrics to bod.ee, never pull. |
|
|
90
|
+
|
|
91
|
+
### What changes
|
|
92
|
+
|
|
93
|
+
1. **`_auth` writes are proxied** — `createAccount`, `linkDevice` go through bod.ee automatically. No separate HTTP call needed.
|
|
94
|
+
2. **`_vfs` emits to remote** — file metadata changes push to bod.ee via `_repl` stream. No manual upload sync.
|
|
95
|
+
3. **Bootstrap is selective** — only `_auth` (replica) pulls from remote on connect. `_vfs` (primary) and `storage` (primary) keep local state.
|
|
96
|
+
4. **`_auth/sessions` and `_auth/server`** — automatically excluded from replication (`_auth` prefix excluded by default; replica mode pulls from remote but these internal paths are local-only).
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Data Paths
|
|
101
|
+
|
|
102
|
+
| Path | Type | Description |
|
|
103
|
+
|------|------|-------------|
|
|
104
|
+
| `storage/notifications` | Collection | User notifications (id, type, title, body, timestamp) |
|
|
105
|
+
| `storage/terminal-sessions` | Collection | PTY session persistence |
|
|
106
|
+
| `storage/{collection}/{id}` | Collection | Generic app collections |
|
|
107
|
+
| `config/app` | Object | App configuration |
|
|
108
|
+
| `auth/tokens/{name}` | TokenEntry | Workspace auth tokens |
|
|
109
|
+
| `_vfs/{path}` | VFS metadata | File size, mime, mtime, fileId, isDir |
|
|
110
|
+
| `_auth/accounts/{fp}` | KeyAuth | User accounts (replicated from bod.ee) |
|
|
111
|
+
| `_auth/devices/{fp}` | KeyAuth | Device registrations |
|
|
112
|
+
| `_auth/sessions/{token}` | KeyAuth | Session tokens (local only) |
|
|
113
|
+
| `_auth/server` | KeyAuth | Server keypair (local only) |
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Client Setup
|
|
118
|
+
|
|
119
|
+
```typescript
|
|
120
|
+
// src/lib/boddbClient.ts
|
|
121
|
+
import { BodClient, BodClientCached } from '@bod.ee/db/client';
|
|
122
|
+
|
|
123
|
+
const client = new BodClient({
|
|
124
|
+
url: `${wsProtocol}//${host}/db`,
|
|
125
|
+
auth: () => sessionToken,
|
|
126
|
+
reconnect: true,
|
|
127
|
+
reconnectInterval: 1000,
|
|
128
|
+
maxReconnectInterval: 30000,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
const cached = new BodClientCached(client, {
|
|
132
|
+
maxMemoryEntries: 300,
|
|
133
|
+
maxAge: 24 * 60 * 60 * 1000,
|
|
134
|
+
});
|
|
135
|
+
await cached.init();
|
|
136
|
+
await cached.warmup(['config/app', 'storage/notifications']);
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Real-time subscriptions
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
// Notifications — real-time via WS
|
|
143
|
+
cached.onChild('storage/notifications', (event) => {
|
|
144
|
+
dispatch({ type: 'notification', payload: event.val() });
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Fallback to SSE when WS unavailable
|
|
148
|
+
const sse = new EventSource('/api/v1/notifications/stream');
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### VFS (file operations)
|
|
152
|
+
|
|
153
|
+
```typescript
|
|
154
|
+
const vfs = cached.vfs();
|
|
155
|
+
await vfs.upload('docs/readme.md', file);
|
|
156
|
+
const data = await vfs.download('docs/readme.md');
|
|
157
|
+
const files = await vfs.list('docs');
|
|
158
|
+
await vfs.move('docs/old.md', 'archive/old.md');
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Auth Flow
|
|
164
|
+
|
|
165
|
+
Para-chat uses **device-based identity** (KeyAuth):
|
|
166
|
+
|
|
167
|
+
1. Client generates Ed25519 keypair → stored in `localStorage`
|
|
168
|
+
2. Device registered with bod.ee via `client.auth.authenticate(pubKey, signFn)`
|
|
169
|
+
3. Challenge-response: server sends nonce → client signs → session token issued
|
|
170
|
+
4. Session token auto-injected on all subsequent requests
|
|
171
|
+
|
|
172
|
+
With `_auth` as `replica`, auth operations proxy to bod.ee automatically — accounts, devices, and roles managed centrally.
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## File Sync
|
|
177
|
+
|
|
178
|
+
Current flow uses HTTP-based file sync with dedup:
|
|
179
|
+
1. VFS metadata replicated via `_repl` stream
|
|
180
|
+
2. On metadata change → check if file exists locally
|
|
181
|
+
3. If missing → HTTP GET from remote `/files/{path}`
|
|
182
|
+
4. Dedup guard: 5s TTL on recently downloaded files
|
|
183
|
+
|
|
184
|
+
With per-path topology (`_vfs: primary`):
|
|
185
|
+
- Local file changes emit metadata to remote via `_repl`
|
|
186
|
+
- Remote instances pull metadata + download files via HTTP
|
|
187
|
+
- No manual sync code needed for the metadata layer
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Common Mistakes
|
|
192
|
+
|
|
193
|
+
| Mistake | Problem | Fix |
|
|
194
|
+
|---------|---------|-----|
|
|
195
|
+
| Using `sources` + manual HTTP sync | Duplicated sync logic, race conditions | Use per-path topology with `primaryUrl` |
|
|
196
|
+
| Not excluding `_auth/sessions` | Remote sessions overwrite local on bootstrap | `replica` mode + default `excludePrefixes` handles this |
|
|
197
|
+
| `autoAuth: true` for user accounts | Identity lost on localStorage clear | Use `createAccount` + `linkDevice` for persistent identity |
|
|
198
|
+
| Starting replication before `serve()` | Transport not ready for proxy writes | Call `serve()` first, then `replication.start()` |
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# _repl Stream Bloat: Root Cause & Fixes
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
`streamMaterialize('_repl', { keepKey: 'path' })` hangs when `_repl` has >2000 entries. This blocks `ReplicationEngine.start()` on replicas indefinitely — the replica never connects and no sync happens. No timeout, no error, just a hung promise.
|
|
5
|
+
|
|
6
|
+
## Root Cause
|
|
7
|
+
1. **No auto-compact on _repl**: Every `db.set()` on a primary-mode path pushes to `_repl`. With 400+ VFS files and frequent saves, `_repl` grows unbounded.
|
|
8
|
+
2. **`streamMaterialize` is O(n) over WS**: Server reads all entries, deduplicates by `keepKey`, serializes as one JSON message. At ~2300 entries the WS message is too large / slow for BodClient to handle within the 30s `requestTimeout`.
|
|
9
|
+
3. **No backpressure or pagination**: The entire materialized result is sent as a single WS frame.
|
|
10
|
+
|
|
11
|
+
## Current Workaround (applied)
|
|
12
|
+
- Auto-compact in `ReplicationEngine.startPrimary()`: compacts `_repl` on startup and every 5min with `{ maxCount: 500, keepKey: 'path' }`.
|
|
13
|
+
- Manual compact via `db.stream.compact('_repl', { maxCount: 50 })` to unblock stuck replicas.
|
|
14
|
+
|
|
15
|
+
## Proper Fixes (prioritized)
|
|
16
|
+
|
|
17
|
+
### P0: Timeout + retry in replica bootstrap
|
|
18
|
+
**File:** `ReplicationEngine.ts` → `startReplicaForPaths()` / `startReplica()`
|
|
19
|
+
- Wrap `streamMaterialize` call in a timeout (e.g. 15s)
|
|
20
|
+
- On timeout: log warning, skip bootstrap, proceed to stream subscription
|
|
21
|
+
- Replica will catch up via ongoing `_repl` events (eventually consistent)
|
|
22
|
+
- This ensures a hung materialize never blocks the entire replication lifecycle
|
|
23
|
+
|
|
24
|
+
### P1: Paginated streamMaterialize
|
|
25
|
+
**File:** `StreamEngine.ts`, `Transport.ts`, `BodClient.ts`
|
|
26
|
+
- Add `limit` + `cursor` params to `stream-materialize` op
|
|
27
|
+
- Server returns pages of N entries + a cursor for the next page
|
|
28
|
+
- Client iterates pages until done
|
|
29
|
+
- Avoids single massive WS frame; works for any stream size
|
|
30
|
+
|
|
31
|
+
### P2: Compact-on-materialize (server-side)
|
|
32
|
+
**File:** `StreamEngine.ts`
|
|
33
|
+
- When `streamMaterialize` is called with `keepKey`, the server already deduplicates
|
|
34
|
+
- After dedup: auto-delete the redundant entries that were collapsed
|
|
35
|
+
- Result: materialize is self-healing — each call prunes the stream
|
|
36
|
+
- Trade-off: adds write I/O during a read op; should be opt-in via flag
|
|
37
|
+
|
|
38
|
+
### P3: Delta-based bootstrap
|
|
39
|
+
**File:** `ReplicationEngine.ts`
|
|
40
|
+
- Replica persists its last-seen `_repl` key/timestamp locally
|
|
41
|
+
- On reconnect: request only entries after the last-seen key (`startAt` param)
|
|
42
|
+
- Eliminates the need to materialize the full stream on every connect
|
|
43
|
+
- Requires `stream-read` with `startAfter` support (already exists via `startAt`)
|
|
44
|
+
|
|
45
|
+
## Metrics to Add
|
|
46
|
+
- Log `_repl` entry count at ReplicationEngine start
|
|
47
|
+
- Log `streamMaterialize` duration
|
|
48
|
+
- Alert/warn when `_repl` exceeds 1000 entries before compact
|
package/package.json
CHANGED
package/src/client/BodClient.ts
CHANGED
|
@@ -456,8 +456,10 @@ export class BodClient {
|
|
|
456
456
|
return this.send('stream-snapshot', { path });
|
|
457
457
|
}
|
|
458
458
|
|
|
459
|
-
async streamMaterialize(path: string, opts?: { keepKey?: string }): Promise<Record<string, unknown
|
|
460
|
-
|
|
459
|
+
async streamMaterialize(path: string, opts?: { keepKey?: string }): Promise<Record<string, unknown>>;
|
|
460
|
+
async streamMaterialize(path: string, opts: { keepKey?: string; batchSize: number; cursor?: string }): Promise<{ data: Record<string, unknown>; nextCursor?: string }>;
|
|
461
|
+
async streamMaterialize(path: string, opts?: { keepKey?: string; batchSize?: number; cursor?: string }): Promise<Record<string, unknown> | { data: Record<string, unknown>; nextCursor?: string }> {
|
|
462
|
+
return this.send('stream-materialize', { path, ...opts }) as any;
|
|
461
463
|
}
|
|
462
464
|
|
|
463
465
|
async streamCompact(path: string, opts?: { maxAge?: number; maxCount?: number; keepKey?: string }): Promise<unknown> {
|
package/src/server/BodDB.ts
CHANGED
|
@@ -12,6 +12,8 @@ import { VFSEngine, type VFSEngineOptions } from './VFSEngine.ts';
|
|
|
12
12
|
import { KeyAuthEngine, type KeyAuthEngineOptions } from './KeyAuthEngine.ts';
|
|
13
13
|
import { validatePath } from '../shared/pathUtils.ts';
|
|
14
14
|
import { Logger, type LogConfig } from '../shared/logger.ts';
|
|
15
|
+
import pkg from '../../package.json' with { type: 'json' };
|
|
16
|
+
const PKG_VERSION: string = pkg.version ?? 'unknown';
|
|
15
17
|
|
|
16
18
|
export interface TransactionProxy {
|
|
17
19
|
get(path: string): unknown;
|
|
@@ -79,7 +81,8 @@ export class BodDB {
|
|
|
79
81
|
this.options = { ...new BodDBOptions(), ...options };
|
|
80
82
|
this.log = new Logger(this.options.log);
|
|
81
83
|
const _log = this.log.forComponent('db');
|
|
82
|
-
|
|
84
|
+
console.log(`[BodDB] v${PKG_VERSION} (path: ${this.options.path})`);
|
|
85
|
+
_log.info(`Initializing BodDB v${PKG_VERSION} (path: ${this.options.path})`);
|
|
83
86
|
this.storage = new StorageEngine({ path: this.options.path });
|
|
84
87
|
this.subs = new SubscriptionEngine();
|
|
85
88
|
this.stream = new StreamEngine(this.storage, this.subs, { compact: this.options.compact });
|
|
@@ -145,12 +148,12 @@ export class BodDB {
|
|
|
145
148
|
// Init replication
|
|
146
149
|
if (this.options.replication) {
|
|
147
150
|
this.replication = new ReplicationEngine(this, this.options.replication);
|
|
148
|
-
// Auto-add _repl compaction for
|
|
149
|
-
if (this.replication.
|
|
151
|
+
// Auto-add _repl compaction for any node that emits to _repl
|
|
152
|
+
if (this.replication.emitsToRepl) {
|
|
150
153
|
const compactOpts = this.options.replication.compact ?? { keepKey: 'path', maxCount: 10000 };
|
|
151
154
|
this.stream.options.compact = { ...this.stream.options.compact, _repl: compactOpts };
|
|
152
155
|
}
|
|
153
|
-
_log.info(`Replication enabled (role: ${this.replication.
|
|
156
|
+
_log.info(`Replication enabled (role: ${this.options.replication.role}${this.replication.router ? ', per-path topology' : ''})`);
|
|
154
157
|
}
|
|
155
158
|
|
|
156
159
|
_log.info('BodDB ready');
|
|
@@ -409,6 +412,7 @@ export class BodDB {
|
|
|
409
412
|
|
|
410
413
|
// Reuse a single stats object to minimize allocations
|
|
411
414
|
const statsData: Record<string, unknown> = {
|
|
415
|
+
version: PKG_VERSION,
|
|
412
416
|
process: {}, db: {}, system: {},
|
|
413
417
|
subs: 0, clients: 0, repl: null, ts: 0,
|
|
414
418
|
};
|