@bod.ee/db 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/config-file.md +6 -0
- package/.claude/skills/deploying-bod-db.md +21 -0
- package/.claude/skills/using-bod-db.md +30 -0
- package/CLAUDE.md +2 -1
- package/README.md +14 -0
- package/docs/para-chat-integration.md +198 -0
- package/docs/repl-stream-bloat-spec.md +48 -0
- package/package.json +1 -1
- package/src/server/BodDB.ts +3 -3
- package/src/server/ReplicationEngine.ts +254 -18
- package/src/server/Transport.ts +57 -29
- package/src/server/VFSEngine.ts +13 -2
- package/src/shared/protocol.ts +1 -0
- package/tests/repl-stream-bloat.test.ts +270 -0
- package/tests/replication-topology.test.ts +835 -0
|
@@ -47,6 +47,12 @@ export default {
|
|
|
47
47
|
log: { enabled: true, level: 'info', components: ['transport', 'stats', 'storage'] }, // or components: '*' for all
|
|
48
48
|
replication: {
|
|
49
49
|
role: 'primary',
|
|
50
|
+
primaryUrl: 'ws://remote:4400', // needed for replica/sync paths
|
|
51
|
+
paths: [
|
|
52
|
+
{ path: '_vfs', mode: 'primary' },
|
|
53
|
+
{ path: '_auth', mode: 'replica' },
|
|
54
|
+
{ path: 'config', mode: 'sync' },
|
|
55
|
+
],
|
|
50
56
|
sources: [
|
|
51
57
|
{ url: 'ws://other-db:4400', paths: ['catalog'], localPrefix: 'ext', id: 'my-source' },
|
|
52
58
|
],
|
|
@@ -44,6 +44,27 @@ export default {
|
|
|
44
44
|
```
|
|
45
45
|
Primary config just needs `replication: { role: 'primary' }`. Replicas auto-bootstrap on startup.
|
|
46
46
|
|
|
47
|
+
## Per-Path Topology
|
|
48
|
+
Mixed ownership — each path prefix gets independent replication mode:
|
|
49
|
+
```typescript
|
|
50
|
+
// deploy/prod-edge.config.ts
|
|
51
|
+
export default {
|
|
52
|
+
path: './data-edge.db',
|
|
53
|
+
port: 4401,
|
|
54
|
+
replication: {
|
|
55
|
+
role: 'primary',
|
|
56
|
+
primaryUrl: 'ws://central:4400',
|
|
57
|
+
paths: [
|
|
58
|
+
{ path: '_vfs', mode: 'primary' }, // local files → push to central
|
|
59
|
+
{ path: '_auth', mode: 'replica' }, // auth from central → pull
|
|
60
|
+
{ path: 'config', mode: 'sync' }, // bidirectional
|
|
61
|
+
{ path: 'telemetry', mode: 'writeonly' }, // push metrics, ignore remote
|
|
62
|
+
],
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
```
|
|
66
|
+
Modes: `primary` (emit), `replica` (proxy writes), `sync` (both), `readonly` (pull-only), `writeonly` (push-only). Unconfigured paths fall back to `role`.
|
|
67
|
+
|
|
47
68
|
## Multi-Source Feed Subscriptions
|
|
48
69
|
Pull specific paths from multiple remote DBs:
|
|
49
70
|
```typescript
|
|
@@ -488,6 +488,36 @@ await replica.replication!.start();
|
|
|
488
488
|
- Auto-compaction on `_repl` stream (keepKey: 'path', maxCount: 10000)
|
|
489
489
|
- Excluded prefixes: `_repl`, `_streams`, `_mq`, `_auth` (internal data not replicated)
|
|
490
490
|
|
|
491
|
+
### Per-Path Topology
|
|
492
|
+
|
|
493
|
+
Mixed ownership per path — some paths local-authoritative, others remote-authoritative.
|
|
494
|
+
|
|
495
|
+
```typescript
|
|
496
|
+
const db = new BodDB({
|
|
497
|
+
path: './data.db',
|
|
498
|
+
replication: {
|
|
499
|
+
role: 'primary', // fallback for unconfigured paths
|
|
500
|
+
primaryUrl: 'ws://remote:4400',
|
|
501
|
+
paths: [
|
|
502
|
+
{ path: '_vfs', mode: 'primary' }, // local authoritative, emits to remote
|
|
503
|
+
{ path: '_auth', mode: 'replica' }, // remote authoritative, proxies writes
|
|
504
|
+
{ path: 'config', mode: 'sync' }, // bidirectional
|
|
505
|
+
{ path: 'feeds', mode: 'readonly' }, // pull-only, rejects writes
|
|
506
|
+
{ path: 'telemetry', mode: 'writeonly' }, // push-only, ignores remote
|
|
507
|
+
],
|
|
508
|
+
},
|
|
509
|
+
});
|
|
510
|
+
await db.replication!.start();
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
- 5 modes: `primary`, `replica`, `sync`, `readonly`, `writeonly`
|
|
514
|
+
- Longest-prefix match: `_auth/nonces` (primary) overrides `_auth` (replica)
|
|
515
|
+
- String paths default to `sync`: `paths: ['_vfs', '_auth']` = both sync
|
|
516
|
+
- `writeProxy: 'reject'` on replica paths blocks writes instead of proxying
|
|
517
|
+
- Bootstrap pulls only `replica`/`readonly` paths; `sync` gets ongoing stream only
|
|
518
|
+
- Unconfigured paths fall back to `role`
|
|
519
|
+
- Backward compatible: no `paths` = role-based (existing behavior)
|
|
520
|
+
|
|
491
521
|
### Multi-Source Feed Subscriptions
|
|
492
522
|
|
|
493
523
|
Subscribe to specific paths from multiple remote BodDB instances — like RSS feeds for database paths.
|
package/CLAUDE.md
CHANGED
|
@@ -71,8 +71,9 @@ config.ts — demo instance config (open rules, indexes, fts, v
|
|
|
71
71
|
- **BodClientCached**: two-tier cache wrapper around BodClient. Memory (Map, LRU eviction) + IndexedDB persistence. Stale-while-revalidate: subscribed paths always fresh, unsubscribed return stale + background refetch. Writes (`set/update/delete`) invalidate path + ancestors. `init()` opens IDB + sweeps expired. `warmup(paths[])` bulk-loads from IDB. Passthrough for `push/batch/query/search/mq/stream/vfs` via `cachedClient.client`.
|
|
72
72
|
- **MCP**: `MCPAdapter` wraps a `BodClient` as a JSON-RPC MCP server (stdio + HTTP). Connects to a running BodDB instance over WebSocket — no embedded DB. Entry point: `mcp.ts`. Tools: CRUD (6), FTS (2), vectors (2), streams (4), MQ (7) = 21 tools. Use `--stdio` for Claude Code/Desktop, `--http` for remote agents.
|
|
73
73
|
- **VFS (Virtual File System)**: `VFSEngine` — files stored outside SQLite via pluggable `VFSBackend` interface. `LocalBackend` stores at `<storageRoot>/<fileId>` using `Bun.file`/`Bun.write`. Metadata at `_vfs/<virtualPath>/` (size, mime, mtime, fileId, isDir) — gets subs/rules/replication for free. `fileId = pushId` so move/rename is metadata-only. REST: `POST/GET/DELETE /files/<path>`, `?stat=1`, `?list=1`, `?mkdir=1`, `PUT ?move=<dst>`. WS chunked fallback: base64-encoded `vfs-upload-init/chunk/done`, `vfs-download-init` → `vfs-download-chunk` push messages. Client: `VFSClient` via `client.vfs()` — `upload/download` (REST) + `uploadWS/downloadWS` (WS) + `stat/list/mkdir/delete/move`.
|
|
74
|
-
- **Replication**: `ReplicationEngine` — single primary + N read replicas + multi-source feed subscriptions. Star topology. Primary emits write events to `_repl` stream via `onWrite` hooks. Replicas bootstrap via `streamMaterialize('_repl', { keepKey: 'path' })`, then subscribe for ongoing events. Write proxy: replica forwards writes to primary via BodClient, primary applies + emits, replica consumes. `_replaying` flag prevents re-emission loops. `_emitting` guard prevents recursion from `db.push('_repl')`. Updates flattened to per-path set events for correct compaction keying. Sweep delete events replicated. Excluded prefixes: `_repl`, `_streams`, `_mq`, `_auth`. **Sources**: `ReplicationSource[]` — subscribe to specific paths from multiple remote DBs. Each source is an independent BodClient that filters `_repl` events by path prefix, with optional `localPrefix` remapping (e.g. remote `users/u1` → local `db-a/users/u1`). Sources connect in parallel; individual failures don't block others. Sources are independent of role — a DB can be primary AND consume sources.
|
|
74
|
+
- **Replication**: `ReplicationEngine` — single primary + N read replicas + multi-source feed subscriptions. Star topology. Primary emits write events to `_repl` stream via `onWrite` hooks. Replicas bootstrap via `streamMaterialize('_repl', { keepKey: 'path' })`, then subscribe for ongoing events. Write proxy: replica forwards writes to primary via BodClient, primary applies + emits, replica consumes. `_replaying` flag prevents re-emission loops. `_emitting` guard prevents recursion from `db.push('_repl')`. Updates flattened to per-path set events for correct compaction keying. Sweep delete events replicated. Excluded prefixes: `_repl`, `_streams`, `_mq`, `_auth`. **Sources**: `ReplicationSource[]` — subscribe to specific paths from multiple remote DBs. Each source is an independent BodClient that filters `_repl` events by path prefix, with optional `localPrefix` remapping (e.g. remote `users/u1` → local `db-a/users/u1`). Sources connect in parallel; individual failures don't block others. Sources are independent of role — a DB can be primary AND consume sources. **Per-path topology**: `PathTopologyRouter` — when `paths` config is set, each path prefix gets an independent mode: `primary` (local authoritative, emits), `replica` (remote authoritative, proxies writes), `sync` (bidirectional, both emit+apply), `readonly` (pull-only, rejects writes), `writeonly` (push-only, ignores remote). Longest-prefix match resolves mode. `writeProxy: 'proxy'|'reject'` overrides replica write behavior. Bootstrap skips sync paths (ongoing stream only). Auth/rules checked before proxy in all handlers. `shouldProxyPath(path)`/`shouldRejectPath(path)` replace `isReplica` checks. `emitsToRepl`/`pullsFromPrimary` getters for compact/bootstrap decisions. Stable `replicaId` from config hash. Falls back to `role` when `paths` absent (backward compat).
|
|
75
75
|
- **KeyAuth integration guide**: `docs/keyauth-integration.md` — flows for signup, signin, new device, autoAuth, IAM roles, common mistakes.
|
|
76
|
+
- **Para-chat integration guide**: `docs/para-chat-integration.md` — how para-chat uses BodDB: per-path topology, VFS, KeyAuth, caching, file sync.
|
|
76
77
|
- **KeyAuth**: `KeyAuthEngine` — portable Ed25519 identity & IAM. Identity hierarchy: Root (server-level, key on filesystem), Account (portable, password-encrypted private key in DB or device-generated), Device (delegate, linked via password unlock). Challenge-response auth: server sends nonce → client signs with Ed25519 → server verifies + creates session. Self-signed tokens (no JWT lib): `base64url(payload).base64url(Ed25519_sign)`. Data model at `_auth/` prefix (protected from external writes). Device reverse-index at `_auth/deviceIndex/{dfp}` for O(1) lookup. Password change is atomic (single `db.update()`). IAM: roles with path-based permissions, account role assignment. `_auth/` excluded from replication. Transport guards: `auth-link-device` and `auth-change-password` require authenticated session; non-root users can only change own password. **Device registration**: `registerDevice(publicKey)` — client-generated keypair, no password, idempotent; `allowOpenRegistration: false` requires authenticated session. **Browser crypto**: `keyAuth.browser.ts` uses `@noble/ed25519` with DER↔raw key bridge for server compatibility. **BodClient autoAuth**: `autoAuth: true` auto-generates keypair (localStorage), registers, authenticates — zero-config device identity. `client.auth.*` convenience methods for all auth ops. **IAM transport ops**: `auth-create-role`, `auth-delete-role`, `auth-update-roles` (root only), `auth-list-accounts`, `auth-list-roles`. Device accounts (no encrypted key) safely reject `linkDevice`/`changePassword`.
|
|
77
78
|
|
|
78
79
|
## MCP Server
|
package/README.md
CHANGED
|
@@ -214,6 +214,20 @@ const aggregator = new BodDB({
|
|
|
214
214
|
});
|
|
215
215
|
await aggregator.replication!.start();
|
|
216
216
|
// Remote catalog/item → local a/catalog/item
|
|
217
|
+
|
|
218
|
+
// Per-path topology — mixed ownership per path prefix
|
|
219
|
+
const edge = new BodDB({
|
|
220
|
+
replication: {
|
|
221
|
+
role: 'primary',
|
|
222
|
+
primaryUrl: 'ws://central:4400',
|
|
223
|
+
paths: [
|
|
224
|
+
{ path: '_vfs', mode: 'primary' }, // local authoritative
|
|
225
|
+
{ path: '_auth', mode: 'replica' }, // remote authoritative
|
|
226
|
+
{ path: 'config', mode: 'sync' }, // bidirectional
|
|
227
|
+
{ path: 'telemetry', mode: 'writeonly' }, // push-only
|
|
228
|
+
],
|
|
229
|
+
},
|
|
230
|
+
});
|
|
217
231
|
```
|
|
218
232
|
|
|
219
233
|
## Rules
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# Para-Chat — BodDB Integration Guide
|
|
2
|
+
|
|
3
|
+
Para-chat is an AI coder companion that uses BodDB for real-time data, auth, file system, and cross-device sync.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Architecture
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
para-chat instance
|
|
11
|
+
├── Bun.serve (shared port)
|
|
12
|
+
│ ├── /api/* — REST API
|
|
13
|
+
│ ├── /db (WS) — BodDB transport
|
|
14
|
+
│ ├── /db (REST) — BodDB REST endpoints
|
|
15
|
+
│ └── /files/* — VFS file upload/download
|
|
16
|
+
├── BodDB (.internal/boddb.sqlite)
|
|
17
|
+
│ ├── StorageEngine — collections (notifications, terminal-sessions, etc.)
|
|
18
|
+
│ ├── VFSEngine — file metadata at _vfs/, files on disk via DiskBackend
|
|
19
|
+
│ ├── KeyAuthEngine — Ed25519 device identity, sessions, IAM
|
|
20
|
+
│ └── ReplicationEngine — sync with bod.ee (sources-based)
|
|
21
|
+
└── Frontend (BodClientCached)
|
|
22
|
+
├── BodClient (WS) — CRUD, subscriptions, auth
|
|
23
|
+
└── Memory + IndexedDB cache (300 entries, 24h TTL)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Current Replication Setup
|
|
29
|
+
|
|
30
|
+
Para-chat uses **sources** to pull `_vfs` and `_auth` from a remote bod.ee instance:
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
// api/lib/boddb.ts (current)
|
|
34
|
+
replEngine = new ReplicationEngine(instance, {
|
|
35
|
+
role: 'primary',
|
|
36
|
+
sources: [{
|
|
37
|
+
url: repl.remoteUrl, // wss://bod.ee/db
|
|
38
|
+
auth: () => token,
|
|
39
|
+
paths: repl.paths || ['_vfs', '_auth'],
|
|
40
|
+
excludePrefixes: ['_auth/sessions', '_auth/server'],
|
|
41
|
+
}],
|
|
42
|
+
});
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Config file (`.internal/boddb.config.json`):
|
|
46
|
+
```json
|
|
47
|
+
{
|
|
48
|
+
"keyAuth": { "allowOpenRegistration": true, "sessionTtl": 604800 },
|
|
49
|
+
"replication": { "remoteUrl": "wss://bod.ee/db", "paths": ["_vfs", "_auth"] }
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Limitation**: Sources are one-way pull. Local writes to `_vfs` don't automatically push to bod.ee — that's handled by a separate HTTP upload flow with dedup guards.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Recommended: Per-Path Topology
|
|
58
|
+
|
|
59
|
+
With per-path topology, each prefix gets explicit ownership semantics:
|
|
60
|
+
|
|
61
|
+
```typescript
|
|
62
|
+
// api/lib/boddb.ts (recommended)
|
|
63
|
+
const db = new BodDB({
|
|
64
|
+
path: dbPath,
|
|
65
|
+
vfs: { backend: new DiskBackend(vfsRoot), pathAsFileId: true },
|
|
66
|
+
keyAuth: { allowOpenRegistration: true, sessionTtl: 604800 },
|
|
67
|
+
replication: {
|
|
68
|
+
role: 'primary', // fallback for unconfigured paths
|
|
69
|
+
primaryUrl: repl.remoteUrl, // wss://bod.ee/db
|
|
70
|
+
paths: [
|
|
71
|
+
{ path: '_vfs', mode: 'primary' }, // local files are authoritative
|
|
72
|
+
{ path: '_auth', mode: 'replica' }, // bod.ee is auth authority
|
|
73
|
+
{ path: 'config', mode: 'sync' }, // bidirectional app config
|
|
74
|
+
{ path: 'storage', mode: 'primary' }, // local collections (notifications, etc.)
|
|
75
|
+
],
|
|
76
|
+
excludePrefixes: ['_repl', '_streams', '_mq'],
|
|
77
|
+
},
|
|
78
|
+
});
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Mode mapping for para-chat
|
|
82
|
+
|
|
83
|
+
| Path | Mode | Rationale |
|
|
84
|
+
|------|------|-----------|
|
|
85
|
+
| `_vfs` | `primary` | Local files on disk are source of truth. Emits metadata to remote. |
|
|
86
|
+
| `_auth` | `replica` | bod.ee is the identity provider. Writes proxied. Sessions/server excluded automatically. |
|
|
87
|
+
| `config/app` | `sync` | App config editable from either side. |
|
|
88
|
+
| `storage/*` | `primary` | Notifications, terminal-sessions are local. |
|
|
89
|
+
| `telemetry` | `writeonly` | Push metrics to bod.ee, never pull. |
|
|
90
|
+
|
|
91
|
+
### What changes
|
|
92
|
+
|
|
93
|
+
1. **`_auth` writes are proxied** — `createAccount`, `linkDevice` go through bod.ee automatically. No separate HTTP call needed.
|
|
94
|
+
2. **`_vfs` emits to remote** — file metadata changes push to bod.ee via `_repl` stream. No manual upload sync.
|
|
95
|
+
3. **Bootstrap is selective** — only `_auth` (replica) pulls from remote on connect. `_vfs` (primary) and `storage` (primary) keep local state.
|
|
96
|
+
4. **`_auth/sessions` and `_auth/server`** — automatically excluded from replication (`_auth` prefix excluded by default; replica mode pulls from remote but these internal paths are local-only).
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Data Paths
|
|
101
|
+
|
|
102
|
+
| Path | Type | Description |
|
|
103
|
+
|------|------|-------------|
|
|
104
|
+
| `storage/notifications` | Collection | User notifications (id, type, title, body, timestamp) |
|
|
105
|
+
| `storage/terminal-sessions` | Collection | PTY session persistence |
|
|
106
|
+
| `storage/{collection}/{id}` | Collection | Generic app collections |
|
|
107
|
+
| `config/app` | Object | App configuration |
|
|
108
|
+
| `auth/tokens/{name}` | TokenEntry | Workspace auth tokens |
|
|
109
|
+
| `_vfs/{path}` | VFS metadata | File size, mime, mtime, fileId, isDir |
|
|
110
|
+
| `_auth/accounts/{fp}` | KeyAuth | User accounts (replicated from bod.ee) |
|
|
111
|
+
| `_auth/devices/{fp}` | KeyAuth | Device registrations |
|
|
112
|
+
| `_auth/sessions/{token}` | KeyAuth | Session tokens (local only) |
|
|
113
|
+
| `_auth/server` | KeyAuth | Server keypair (local only) |
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Client Setup
|
|
118
|
+
|
|
119
|
+
```typescript
|
|
120
|
+
// src/lib/boddbClient.ts
|
|
121
|
+
import { BodClient, BodClientCached } from '@bod.ee/db/client';
|
|
122
|
+
|
|
123
|
+
const client = new BodClient({
|
|
124
|
+
url: `${wsProtocol}//${host}/db`,
|
|
125
|
+
auth: () => sessionToken,
|
|
126
|
+
reconnect: true,
|
|
127
|
+
reconnectInterval: 1000,
|
|
128
|
+
maxReconnectInterval: 30000,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
const cached = new BodClientCached(client, {
|
|
132
|
+
maxMemoryEntries: 300,
|
|
133
|
+
maxAge: 24 * 60 * 60 * 1000,
|
|
134
|
+
});
|
|
135
|
+
await cached.init();
|
|
136
|
+
await cached.warmup(['config/app', 'storage/notifications']);
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Real-time subscriptions
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
// Notifications — real-time via WS
|
|
143
|
+
cached.onChild('storage/notifications', (event) => {
|
|
144
|
+
dispatch({ type: 'notification', payload: event.val() });
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Fallback to SSE when WS unavailable
|
|
148
|
+
const sse = new EventSource('/api/v1/notifications/stream');
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### VFS (file operations)
|
|
152
|
+
|
|
153
|
+
```typescript
|
|
154
|
+
const vfs = cached.vfs();
|
|
155
|
+
await vfs.upload('docs/readme.md', file);
|
|
156
|
+
const data = await vfs.download('docs/readme.md');
|
|
157
|
+
const files = await vfs.list('docs');
|
|
158
|
+
await vfs.move('docs/old.md', 'archive/old.md');
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Auth Flow
|
|
164
|
+
|
|
165
|
+
Para-chat uses **device-based identity** (KeyAuth):
|
|
166
|
+
|
|
167
|
+
1. Client generates Ed25519 keypair → stored in `localStorage`
|
|
168
|
+
2. Device registered with bod.ee via `client.auth.authenticate(pubKey, signFn)`
|
|
169
|
+
3. Challenge-response: server sends nonce → client signs → session token issued
|
|
170
|
+
4. Session token auto-injected on all subsequent requests
|
|
171
|
+
|
|
172
|
+
With `_auth` as `replica`, auth operations proxy to bod.ee automatically — accounts, devices, and roles managed centrally.
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## File Sync
|
|
177
|
+
|
|
178
|
+
Current flow uses HTTP-based file sync with dedup:
|
|
179
|
+
1. VFS metadata replicated via `_repl` stream
|
|
180
|
+
2. On metadata change → check if file exists locally
|
|
181
|
+
3. If missing → HTTP GET from remote `/files/{path}`
|
|
182
|
+
4. Dedup guard: 5s TTL on recently downloaded files
|
|
183
|
+
|
|
184
|
+
With per-path topology (`_vfs: primary`):
|
|
185
|
+
- Local file changes emit metadata to remote via `_repl`
|
|
186
|
+
- Remote instances pull metadata + download files via HTTP
|
|
187
|
+
- No manual sync code needed for the metadata layer
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Common Mistakes
|
|
192
|
+
|
|
193
|
+
| Mistake | Problem | Fix |
|
|
194
|
+
|---------|---------|-----|
|
|
195
|
+
| Using `sources` + manual HTTP sync | Duplicated sync logic, race conditions | Use per-path topology with `primaryUrl` |
|
|
196
|
+
| Not excluding `_auth/sessions` | Remote sessions overwrite local on bootstrap | `replica` mode + default `excludePrefixes` handles this |
|
|
197
|
+
| `autoAuth: true` for user accounts | Identity lost on localStorage clear | Use `createAccount` + `linkDevice` for persistent identity |
|
|
198
|
+
| Starting replication before `serve()` | Transport not ready for proxy writes | Call `serve()` first, then `replication.start()` |
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# _repl Stream Bloat: Root Cause & Fixes
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
`streamMaterialize('_repl', { keepKey: 'path' })` hangs when `_repl` has >2000 entries. This blocks `ReplicationEngine.start()` on replicas indefinitely — the replica never connects and no sync happens. No timeout, no error, just a hung promise.
|
|
5
|
+
|
|
6
|
+
## Root Cause
|
|
7
|
+
1. **No auto-compact on _repl**: Every `db.set()` on a primary-mode path pushes to `_repl`. With 400+ VFS files and frequent saves, `_repl` grows unbounded.
|
|
8
|
+
2. **`streamMaterialize` is O(n) over WS**: Server reads all entries, deduplicates by `keepKey`, serializes as one JSON message. At ~2300 entries the WS message is too large / slow for BodClient to handle within the 30s `requestTimeout`.
|
|
9
|
+
3. **No backpressure or pagination**: The entire materialized result is sent as a single WS frame.
|
|
10
|
+
|
|
11
|
+
## Current Workaround (applied)
|
|
12
|
+
- Auto-compact in `ReplicationEngine.startPrimary()`: compacts `_repl` on startup and every 5min with `{ maxCount: 500, keepKey: 'path' }`.
|
|
13
|
+
- Manual compact via `db.stream.compact('_repl', { maxCount: 50 })` to unblock stuck replicas.
|
|
14
|
+
|
|
15
|
+
## Proper Fixes (prioritized)
|
|
16
|
+
|
|
17
|
+
### P0: Timeout + retry in replica bootstrap
|
|
18
|
+
**File:** `ReplicationEngine.ts` → `startReplicaForPaths()` / `startReplica()`
|
|
19
|
+
- Wrap `streamMaterialize` call in a timeout (e.g. 15s)
|
|
20
|
+
- On timeout: log warning, skip bootstrap, proceed to stream subscription
|
|
21
|
+
- Replica will catch up via ongoing `_repl` events (eventually consistent)
|
|
22
|
+
- This ensures a hung materialize never blocks the entire replication lifecycle
|
|
23
|
+
|
|
24
|
+
### P1: Paginated streamMaterialize
|
|
25
|
+
**File:** `StreamEngine.ts`, `Transport.ts`, `BodClient.ts`
|
|
26
|
+
- Add `limit` + `cursor` params to `stream-materialize` op
|
|
27
|
+
- Server returns pages of N entries + a cursor for the next page
|
|
28
|
+
- Client iterates pages until done
|
|
29
|
+
- Avoids single massive WS frame; works for any stream size
|
|
30
|
+
|
|
31
|
+
### P2: Compact-on-materialize (server-side)
|
|
32
|
+
**File:** `StreamEngine.ts`
|
|
33
|
+
- When `streamMaterialize` is called with `keepKey`, the server already deduplicates
|
|
34
|
+
- After dedup: auto-delete the redundant entries that were collapsed
|
|
35
|
+
- Result: materialize is self-healing — each call prunes the stream
|
|
36
|
+
- Trade-off: adds write I/O during a read op; should be opt-in via flag
|
|
37
|
+
|
|
38
|
+
### P3: Delta-based bootstrap
|
|
39
|
+
**File:** `ReplicationEngine.ts`
|
|
40
|
+
- Replica persists its last-seen `_repl` key/timestamp locally
|
|
41
|
+
- On reconnect: request only entries after the last-seen key (`startAt` param)
|
|
42
|
+
- Eliminates the need to materialize the full stream on every connect
|
|
43
|
+
- Requires `stream-read` with `startAfter` support (already exists via `startAt`)
|
|
44
|
+
|
|
45
|
+
## Metrics to Add
|
|
46
|
+
- Log `_repl` entry count at ReplicationEngine start
|
|
47
|
+
- Log `streamMaterialize` duration
|
|
48
|
+
- Alert/warn when `_repl` exceeds 1000 entries before compact
|
package/package.json
CHANGED
package/src/server/BodDB.ts
CHANGED
|
@@ -145,12 +145,12 @@ export class BodDB {
|
|
|
145
145
|
// Init replication
|
|
146
146
|
if (this.options.replication) {
|
|
147
147
|
this.replication = new ReplicationEngine(this, this.options.replication);
|
|
148
|
-
// Auto-add _repl compaction for
|
|
149
|
-
if (this.replication.
|
|
148
|
+
// Auto-add _repl compaction for any node that emits to _repl
|
|
149
|
+
if (this.replication.emitsToRepl) {
|
|
150
150
|
const compactOpts = this.options.replication.compact ?? { keepKey: 'path', maxCount: 10000 };
|
|
151
151
|
this.stream.options.compact = { ...this.stream.options.compact, _repl: compactOpts };
|
|
152
152
|
}
|
|
153
|
-
_log.info(`Replication enabled (role: ${this.replication.
|
|
153
|
+
_log.info(`Replication enabled (role: ${this.options.replication.role}${this.replication.router ? ', per-path topology' : ''})`);
|
|
154
154
|
}
|
|
155
155
|
|
|
156
156
|
_log.info('BodDB ready');
|