orez 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +185 -225
  2. package/dist/admin/log-store.d.ts.map +1 -1
  3. package/dist/admin/log-store.js +17 -6
  4. package/dist/admin/log-store.js.map +1 -1
  5. package/dist/admin/server.d.ts +1 -0
  6. package/dist/admin/server.d.ts.map +1 -1
  7. package/dist/admin/server.js +10 -0
  8. package/dist/admin/server.js.map +1 -1
  9. package/dist/cli.d.ts.map +1 -1
  10. package/dist/cli.js +89 -45
  11. package/dist/cli.js.map +1 -1
  12. package/dist/index.d.ts +1 -0
  13. package/dist/index.d.ts.map +1 -1
  14. package/dist/index.js +104 -17
  15. package/dist/index.js.map +1 -1
  16. package/dist/integration/test-permissions.d.ts +5 -0
  17. package/dist/integration/test-permissions.d.ts.map +1 -0
  18. package/dist/integration/test-permissions.js +89 -0
  19. package/dist/integration/test-permissions.js.map +1 -0
  20. package/dist/pg-proxy.js +2 -2
  21. package/dist/pg-proxy.js.map +1 -1
  22. package/dist/replication/change-tracker.d.ts.map +1 -1
  23. package/dist/replication/change-tracker.js +15 -13
  24. package/dist/replication/change-tracker.js.map +1 -1
  25. package/dist/replication/handler.d.ts.map +1 -1
  26. package/dist/replication/handler.js +27 -2
  27. package/dist/replication/handler.js.map +1 -1
  28. package/dist/sqlite-mode/index.d.ts +1 -0
  29. package/dist/sqlite-mode/index.d.ts.map +1 -1
  30. package/dist/sqlite-mode/index.js +1 -0
  31. package/dist/sqlite-mode/index.js.map +1 -1
  32. package/dist/sqlite-mode/native-binary.d.ts +11 -0
  33. package/dist/sqlite-mode/native-binary.d.ts.map +1 -0
  34. package/dist/sqlite-mode/native-binary.js +67 -0
  35. package/dist/sqlite-mode/native-binary.js.map +1 -0
  36. package/package.json +8 -2
  37. package/src/admin/log-store.ts +19 -9
  38. package/src/admin/server.ts +12 -0
  39. package/src/cli.ts +92 -43
  40. package/src/index.ts +117 -18
  41. package/src/integration/integration.test.ts +86 -15
  42. package/src/integration/native-binary.guard.test.ts +13 -0
  43. package/src/integration/native-startup.test.ts +44 -0
  44. package/src/integration/restore-live-stress.test.ts +437 -0
  45. package/src/integration/restore-reset.test.ts +135 -16
  46. package/src/integration/test-permissions.ts +111 -0
  47. package/src/pg-proxy.ts +2 -2
  48. package/src/replication/change-tracker.test.ts +1 -1
  49. package/src/replication/change-tracker.ts +16 -13
  50. package/src/replication/handler.test.ts +2 -2
  51. package/src/replication/handler.ts +30 -2
  52. package/src/sqlite-mode/index.ts +1 -0
  53. package/src/sqlite-mode/native-binary.ts +89 -0
@@ -0,0 +1,437 @@
1
+ /**
2
+ * live restore stress test.
3
+ *
4
+ * keeps a frontend-like websocket connection active while a large restore runs,
5
+ * then triggers the same full reset path used by pg_restore (SIGUSR1) and
6
+ * verifies sync still works after restart.
7
+ */
8
+
9
+ import { readFileSync, rmSync, unlinkSync, writeFileSync } from 'node:fs'
10
+ import { tmpdir } from 'node:os'
11
+ import { join } from 'node:path'
12
+
13
+ import { loadModule } from 'pgsql-parser'
14
+ import postgres from 'postgres'
15
+ import { afterAll, beforeAll, describe, expect, test } from 'vitest'
16
+ import WebSocket from 'ws'
17
+
18
+ import { execDumpFile } from '../cli.js'
19
+ import { startZeroLite } from '../index.js'
20
+ import { installChangeTracking } from '../replication/change-tracker.js'
21
+ import { installAllowAllPermissions } from './test-permissions.js'
22
+
23
+ import type { PGlite } from '@electric-sql/pglite'
24
+
25
+ const SYNC_PROTOCOL_VERSION = 45
26
+
27
+ function encodeSecProtocols(
28
+ initConnectionMessage: unknown,
29
+ authToken: string | undefined
30
+ ): string {
31
+ const payload = JSON.stringify({ initConnectionMessage, authToken })
32
+ return encodeURIComponent(Buffer.from(payload, 'utf-8').toString('base64'))
33
+ }
34
+
35
+ class Queue<T> {
36
+ private items: T[] = []
37
+ private waiters: Array<{
38
+ resolve: (v: T) => void
39
+ timer?: ReturnType<typeof setTimeout>
40
+ }> = []
41
+
42
+ enqueue(item: T) {
43
+ const waiter = this.waiters.shift()
44
+ if (waiter) {
45
+ if (waiter.timer) clearTimeout(waiter.timer)
46
+ waiter.resolve(item)
47
+ } else {
48
+ this.items.push(item)
49
+ }
50
+ }
51
+
52
+ dequeue(fallback?: T, timeoutMs = 10_000): Promise<T> {
53
+ if (this.items.length > 0) {
54
+ return Promise.resolve(this.items.shift()!)
55
+ }
56
+ return new Promise<T>((resolve) => {
57
+ const waiter: { resolve: (v: T) => void; timer?: ReturnType<typeof setTimeout> } = {
58
+ resolve,
59
+ }
60
+ if (fallback !== undefined) {
61
+ waiter.timer = setTimeout(() => {
62
+ const idx = this.waiters.indexOf(waiter)
63
+ if (idx >= 0) this.waiters.splice(idx, 1)
64
+ resolve(fallback)
65
+ }, timeoutMs)
66
+ }
67
+ this.waiters.push(waiter)
68
+ })
69
+ }
70
+ }
71
+
72
+ function envInt(name: string, fallback: number): number {
73
+ const raw = process.env[name]
74
+ if (!raw) return fallback
75
+ const n = Number(raw)
76
+ return Number.isFinite(n) && n > 0 ? Math.floor(n) : fallback
77
+ }
78
+
79
+ function escapeCopy(val: string): string {
80
+ return val
81
+ .replace(/\\/g, '\\\\')
82
+ .replace(/\t/g, '\\t')
83
+ .replace(/\n/g, '\\n')
84
+ .replace(/\r/g, '\\r')
85
+ }
86
+
87
+ function generateStressDump(opts: {
88
+ tables: number
89
+ rowsPerTable: number
90
+ columnsPerTable: number
91
+ payloadBytes: number
92
+ }): string {
93
+ const lines: string[] = []
94
+ lines.push('SET statement_timeout = 0;')
95
+ lines.push("SET client_encoding = 'UTF8';")
96
+ lines.push('SET standard_conforming_strings = on;')
97
+ lines.push('')
98
+
99
+ for (let t = 0; t < opts.tables; t++) {
100
+ const table = `stress_restore_${t}`
101
+ const cols = Array.from({ length: opts.columnsPerTable }, (_, i) => `c_${i} TEXT`)
102
+ lines.push(
103
+ `CREATE TABLE IF NOT EXISTS ${table} (id BIGINT PRIMARY KEY, ${cols.join(', ')});`
104
+ )
105
+ lines.push(
106
+ `COPY ${table} (id, ${Array.from({ length: opts.columnsPerTable }, (_, i) => `c_${i}`).join(', ')}) FROM stdin;`
107
+ )
108
+
109
+ for (let r = 0; r < opts.rowsPerTable; r++) {
110
+ const id = t * 1_000_000 + r + 1
111
+ const row = Array.from({ length: opts.columnsPerTable }, (_, c) => {
112
+ if (r % 97 === 0 && c === 0) return '\\N'
113
+ const base = `t${t}_r${r}_c${c}_`
114
+ return escapeCopy(base + 'x'.repeat(Math.max(1, opts.payloadBytes - base.length)))
115
+ })
116
+ lines.push(`${id}\t${row.join('\t')}`)
117
+ }
118
+ lines.push('\\.')
119
+ lines.push('')
120
+ }
121
+
122
+ return lines.join('\n')
123
+ }
124
+
125
+ function connectAndSubscribe(
126
+ port: number,
127
+ downstream: Queue<unknown>,
128
+ query: Record<string, unknown>
129
+ ): Promise<WebSocket> {
130
+ return new Promise((resolve, reject) => {
131
+ const ts = Date.now()
132
+ const clientGroupID = `restore-live-cg-${ts}`
133
+ const urlBase =
134
+ `ws://127.0.0.1:${port}/sync/v${SYNC_PROTOCOL_VERSION}/connect` +
135
+ `?clientGroupID=${clientGroupID}` +
136
+ `&clientID=restore-live-client` +
137
+ `&schemaVersion=1&baseCookie=&ts=${Date.now()}&lmid=0`
138
+
139
+ const bootstrapProtocol = encodeSecProtocols(
140
+ ['initConnection', { desiredQueriesPatch: [] }],
141
+ undefined
142
+ )
143
+ const bootstrapWs = new WebSocket(`${urlBase}&wsid=bootstrap`, bootstrapProtocol)
144
+
145
+ const fail = (err: unknown) => {
146
+ clearTimeout(bootstrapTimer)
147
+ try {
148
+ bootstrapWs.close()
149
+ } catch {}
150
+ reject(err)
151
+ }
152
+
153
+ const bootstrapTimer = setTimeout(() => {
154
+ fail(new Error('bootstrap websocket timeout'))
155
+ }, 7000)
156
+ bootstrapWs.once('error', fail)
157
+ bootstrapWs.once('message', () => {
158
+ clearTimeout(bootstrapTimer)
159
+ try {
160
+ bootstrapWs.close()
161
+ } catch {}
162
+
163
+ const initConnectionMessage: [string, Record<string, unknown>] = [
164
+ 'initConnection',
165
+ {
166
+ desiredQueriesPatch: [{ op: 'put', hash: 'q1', ast: query }],
167
+ },
168
+ ]
169
+ const secProtocol = encodeSecProtocols(initConnectionMessage, undefined)
170
+ const ws = new WebSocket(`${urlBase}&wsid=ws1`, secProtocol)
171
+
172
+ let settled = false
173
+ const failTimer = setTimeout(() => {
174
+ if (settled) return
175
+ settled = true
176
+ try {
177
+ ws.close()
178
+ } catch {}
179
+ reject(new Error('websocket connected but no downstream messages'))
180
+ }, 7000)
181
+
182
+ ws.on('message', (data) => {
183
+ const msg = JSON.parse(data.toString())
184
+ downstream.enqueue(msg)
185
+ if (!settled) {
186
+ settled = true
187
+ clearTimeout(failTimer)
188
+ resolve(ws)
189
+ }
190
+ })
191
+ ws.once('error', (err) => {
192
+ if (settled) return
193
+ settled = true
194
+ clearTimeout(failTimer)
195
+ reject(err)
196
+ })
197
+ ws.once('close', () => {
198
+ if (settled) return
199
+ settled = true
200
+ clearTimeout(failTimer)
201
+ reject(new Error('websocket closed before initial downstream message'))
202
+ })
203
+ })
204
+ })
205
+ }
206
+
207
+ async function connectAndSubscribeWithRetry(
208
+ port: number,
209
+ downstream: Queue<unknown>,
210
+ query: Record<string, unknown>,
211
+ timeoutMs = 30_000
212
+ ): Promise<WebSocket> {
213
+ const deadline = Date.now() + timeoutMs
214
+ let lastErr: unknown
215
+ while (Date.now() < deadline) {
216
+ try {
217
+ return await connectAndSubscribe(port, downstream, query)
218
+ } catch (err) {
219
+ lastErr = err
220
+ await new Promise((r) => setTimeout(r, 300))
221
+ }
222
+ }
223
+ throw new Error(
224
+ `timed out connecting websocket after reset: ${lastErr instanceof Error ? lastErr.message : String(lastErr)}`
225
+ )
226
+ }
227
+
228
+ async function drainInitialPokes(downstream: Queue<unknown>) {
229
+ const deadline = Date.now() + 30_000
230
+ while (Date.now() < deadline) {
231
+ const msg = (await downstream.dequeue('timeout' as any, 3000)) as any
232
+ if (msg === 'timeout') return
233
+ if (Array.isArray(msg) && msg[0] === 'pokeEnd') return
234
+ }
235
+ }
236
+
237
+ async function waitForPokeWithValue(
238
+ downstream: Queue<unknown>,
239
+ expectedValue: string,
240
+ timeoutMs = 20_000
241
+ ): Promise<void> {
242
+ const deadline = Date.now() + timeoutMs
243
+ const seen: unknown[] = []
244
+ while (Date.now() < deadline) {
245
+ const remaining = Math.max(1000, deadline - Date.now())
246
+ const msg = (await downstream.dequeue('timeout' as any, remaining)) as any
247
+ if (msg === 'timeout') {
248
+ throw new Error(
249
+ `timed out waiting for pokePart; recent messages: ${JSON.stringify(seen.slice(-8))}`
250
+ )
251
+ }
252
+ seen.push(msg)
253
+ if (!Array.isArray(msg) || msg[0] !== 'pokePart' || !msg[1]?.rowsPatch) continue
254
+ const rowsPatch = msg[1].rowsPatch as Array<Record<string, any>>
255
+ if (
256
+ rowsPatch.some(
257
+ (patch) =>
258
+ patch.op === 'put' &&
259
+ patch.tableName === 'restore_live_probe' &&
260
+ patch.value?.value === expectedValue
261
+ )
262
+ ) {
263
+ return
264
+ }
265
+ }
266
+ throw new Error(
267
+ `timed out waiting for restore_live_probe value "${expectedValue}"; recent messages: ${JSON.stringify(seen.slice(-8))}`
268
+ )
269
+ }
270
+
271
+ async function waitForZero(port: number, timeoutMs = 60_000) {
272
+ const { Socket } = await import('node:net')
273
+ const deadline = Date.now() + timeoutMs
274
+ while (Date.now() < deadline) {
275
+ const ok = await new Promise<boolean>((resolve) => {
276
+ const sock = new Socket()
277
+ const done = (value: boolean) => {
278
+ sock.removeAllListeners()
279
+ try {
280
+ sock.destroy()
281
+ } catch {}
282
+ resolve(value)
283
+ }
284
+ sock.setTimeout(1000)
285
+ sock.once('connect', () => done(true))
286
+ sock.once('timeout', () => done(false))
287
+ sock.once('error', () => done(false))
288
+ sock.connect(port, '127.0.0.1')
289
+ })
290
+ if (ok) return
291
+ await new Promise((r) => setTimeout(r, 500))
292
+ }
293
+ throw new Error(`zero-cache not ready on port ${port} after ${timeoutMs}ms`)
294
+ }
295
+
296
+ describe('live restore stress with connected frontend', { timeout: 360_000 }, () => {
297
+ let db: PGlite
298
+ let pgPort: number
299
+ let zeroPort: number
300
+ let shutdown: () => Promise<void>
301
+ let restartZero: (() => Promise<void>) | undefined
302
+ let dataDir: string
303
+ let dumpFile: string
304
+
305
+ beforeAll(async () => {
306
+ await loadModule()
307
+
308
+ const tables = envInt('OREZ_STRESS_TABLES', 6)
309
+ const rowsPerTable = envInt('OREZ_STRESS_ROWS', 1800)
310
+ const columnsPerTable = envInt('OREZ_STRESS_COLS', 8)
311
+ const payloadBytes = envInt('OREZ_STRESS_PAYLOAD', 96)
312
+
313
+ dumpFile = join(tmpdir(), `orez-live-stress-${Date.now()}.sql`)
314
+ writeFileSync(
315
+ dumpFile,
316
+ generateStressDump({ tables, rowsPerTable, columnsPerTable, payloadBytes })
317
+ )
318
+
319
+ dataDir = `.orez-live-stress-test-${Date.now()}`
320
+ const started = await startZeroLite({
321
+ pgPort: 29000 + Math.floor(Math.random() * 1000),
322
+ zeroPort: 30000 + Math.floor(Math.random() * 1000),
323
+ dataDir,
324
+ logLevel: 'warn',
325
+ skipZeroCache: false,
326
+ })
327
+
328
+ db = started.db
329
+ pgPort = started.pgPort
330
+ zeroPort = started.zeroPort
331
+ shutdown = started.stop
332
+ restartZero = started.restartZero
333
+ await waitForZero(zeroPort, 90_000)
334
+ }, 180_000)
335
+
336
+ afterAll(async () => {
337
+ if (shutdown) await shutdown()
338
+ try {
339
+ unlinkSync(dumpFile)
340
+ } catch {}
341
+ if (dataDir) {
342
+ try {
343
+ rmSync(dataDir, { recursive: true, force: true })
344
+ } catch {}
345
+ }
346
+ })
347
+
348
+ test('frontend stays connected through restore lifecycle and syncs after reset', async () => {
349
+ await db.exec(`
350
+ CREATE TABLE IF NOT EXISTS restore_live_probe (
351
+ id TEXT PRIMARY KEY,
352
+ value TEXT NOT NULL
353
+ )
354
+ `)
355
+ await installAllowAllPermissions(db, ['restore_live_probe'])
356
+ if (restartZero) {
357
+ await restartZero()
358
+ await waitForZero(zeroPort, 60_000)
359
+ }
360
+ const pubName = process.env.ZERO_APP_PUBLICATIONS?.trim()
361
+ if (pubName) {
362
+ const quotedPub = '"' + pubName.replace(/"/g, '""') + '"'
363
+ await db
364
+ .exec(`ALTER PUBLICATION ${quotedPub} ADD TABLE "public"."restore_live_probe"`)
365
+ .catch(() => {})
366
+ await installChangeTracking(db)
367
+ }
368
+ await db.query(`INSERT INTO restore_live_probe (id, value) VALUES ($1, $2)`, [
369
+ 'before-restore',
370
+ 'before',
371
+ ])
372
+
373
+ const downstream = new Queue<unknown>()
374
+ let ws = await connectAndSubscribeWithRetry(zeroPort, downstream, {
375
+ table: 'restore_live_probe',
376
+ orderBy: [['id', 'asc']],
377
+ })
378
+ await drainInitialPokes(downstream)
379
+
380
+ // restore while websocket is connected (frontend simulation)
381
+ const sql = postgres({
382
+ host: '127.0.0.1',
383
+ port: pgPort,
384
+ user: 'user',
385
+ password: 'password',
386
+ database: 'postgres',
387
+ max: 1,
388
+ onnotice: () => {},
389
+ })
390
+ try {
391
+ const wireDb = { exec: (query: string) => sql.unsafe(query) as Promise<unknown> }
392
+ await execDumpFile(wireDb, dumpFile)
393
+ } finally {
394
+ await sql.end({ timeout: 1 }).catch(() => {})
395
+ }
396
+
397
+ const pid = Number(readFileSync(join(dataDir, 'orez.pid'), 'utf-8').trim())
398
+ expect(pid).toBeGreaterThan(0)
399
+ process.kill(pid, 'SIGUSR1')
400
+ await waitForZero(zeroPort, 90_000)
401
+ if (pubName) {
402
+ const quotedPub = '"' + pubName.replace(/"/g, '""') + '"'
403
+ await db
404
+ .exec(`ALTER PUBLICATION ${quotedPub} ADD TABLE "public"."restore_live_probe"`)
405
+ .catch(() => {})
406
+ }
407
+
408
+ try {
409
+ ws.close()
410
+ } catch {}
411
+ const downstreamAfterReset = new Queue<unknown>()
412
+ ws = await connectAndSubscribeWithRetry(zeroPort, downstreamAfterReset, {
413
+ table: 'restore_live_probe',
414
+ orderBy: [['id', 'asc']],
415
+ })
416
+ await drainInitialPokes(downstreamAfterReset)
417
+
418
+ // verify write is captured in change tracking after reset
419
+ const marker = `after-${Date.now()}`
420
+ await db.query(`INSERT INTO restore_live_probe (id, value) VALUES ($1, $2)`, [
421
+ `post-restore-${Date.now()}`,
422
+ marker,
423
+ ])
424
+ const tracked = await db.query<{ count: string }>(
425
+ `SELECT count(*)::text as count
426
+ FROM _orez._zero_changes
427
+ WHERE table_name = 'public.restore_live_probe'`
428
+ )
429
+ if (Number(tracked.rows[0]?.count || '0') === 0) {
430
+ throw new Error('post-reset write was not captured in _orez._zero_changes')
431
+ }
432
+
433
+ await waitForPokeWithValue(downstreamAfterReset, marker, 30_000)
434
+
435
+ ws.close()
436
+ })
437
+ })
@@ -18,6 +18,20 @@ import WebSocket from 'ws'
18
18
 
19
19
  import { execDumpFile } from '../cli.js'
20
20
  import { startZeroLite } from '../index.js'
21
+ import { installAllowAllPermissions } from './test-permissions.js'
22
+
23
+ // zero-cache protocol version (from @rocicorp/zero/out/zero-protocol/src/protocol-version.js)
24
+ const PROTOCOL_VERSION = 45
25
+
26
+ // encode initConnection message for sec-websocket-protocol header
27
+ // matches zero-protocol's encodeSecProtocols implementation
28
+ function encodeSecProtocols(
29
+ initConnectionMessage: unknown,
30
+ authToken: string | undefined
31
+ ): string {
32
+ const payload = JSON.stringify({ initConnectionMessage, authToken })
33
+ return encodeURIComponent(Buffer.from(payload, 'utf-8').toString('base64'))
34
+ }
21
35
 
22
36
  import type { PGlite } from '@electric-sql/pglite'
23
37
 
@@ -102,6 +116,7 @@ describe('restore/reset integration regression', { timeout: 150_000 }, () => {
102
116
  let pgPort: number
103
117
  let zeroPort: number
104
118
  let shutdown: () => Promise<void>
119
+ let restartZero: (() => Promise<void>) | undefined
105
120
  let dataDir: string
106
121
  let dumpFile: string
107
122
  let dumpFileIsTemp = false
@@ -127,6 +142,7 @@ describe('restore/reset integration regression', { timeout: 150_000 }, () => {
127
142
  pgPort = started.pgPort
128
143
  zeroPort = started.zeroPort
129
144
  shutdown = started.stop
145
+ restartZero = started.restartZero
130
146
 
131
147
  await waitForZero(zeroPort, 90_000)
132
148
  }, 120_000)
@@ -175,11 +191,35 @@ describe('restore/reset integration regression', { timeout: 150_000 }, () => {
175
191
  CREATE TABLE IF NOT EXISTS reset_probe (
176
192
  id text PRIMARY KEY,
177
193
  value text NOT NULL
178
- )
194
+ );
195
+
196
+ -- install change tracking trigger on the new table
197
+ DROP TRIGGER IF EXISTS _zero_change_trigger ON public.reset_probe;
198
+ CREATE TRIGGER _zero_change_trigger
199
+ AFTER INSERT OR UPDATE OR DELETE ON public.reset_probe
200
+ FOR EACH ROW EXECUTE FUNCTION public._zero_track_change();
201
+
202
+ -- install notify trigger for real-time notifications
203
+ DROP TRIGGER IF EXISTS _zero_notify_trigger ON public.reset_probe;
204
+ CREATE TRIGGER _zero_notify_trigger
205
+ AFTER INSERT OR UPDATE OR DELETE ON public.reset_probe
206
+ FOR EACH STATEMENT EXECUTE FUNCTION public._zero_notify_change();
179
207
  `)
208
+ const pubName = process.env.ZERO_APP_PUBLICATIONS?.trim()
209
+ if (pubName) {
210
+ const quotedPub = '"' + pubName.replace(/"/g, '""') + '"'
211
+ await db
212
+ .exec(`ALTER PUBLICATION ${quotedPub} ADD TABLE "public"."reset_probe"`)
213
+ .catch(() => {})
214
+ }
215
+ await installAllowAllPermissions(db, ['reset_probe'])
216
+ if (restartZero) {
217
+ await restartZero()
218
+ await waitForZero(zeroPort, 60_000)
219
+ }
180
220
 
181
221
  const downstream = new Queue<unknown>()
182
- const ws = connectAndSubscribe(zeroPort, downstream, {
222
+ const ws = await connectAndSubscribeWithRetry(zeroPort, downstream, {
183
223
  table: 'reset_probe',
184
224
  orderBy: [['id', 'asc']],
185
225
  })
@@ -214,28 +254,107 @@ function connectAndSubscribe(
214
254
  port: number,
215
255
  downstream: Queue<unknown>,
216
256
  query: Record<string, unknown>
217
- ): WebSocket {
218
- const ws = new WebSocket(
219
- `ws://localhost:${port}/sync/v4/connect` +
220
- `?clientGroupID=restore-reset-cg-${Date.now()}&clientID=restore-reset-client&wsid=ws1&schemaVersion=1&baseCookie=&ts=${Date.now()}&lmid=0`
221
- )
257
+ ): Promise<WebSocket> {
258
+ return new Promise((resolve, reject) => {
259
+ const ts = Date.now()
260
+ const clientGroupID = `restore-reset-cg-${ts}`
261
+ const clientID = 'restore-reset-client'
262
+ const urlBase =
263
+ `ws://127.0.0.1:${port}/sync/v${PROTOCOL_VERSION}/connect` +
264
+ `?clientGroupID=${clientGroupID}&clientID=${clientID}&schemaVersion=1&baseCookie=&ts=${ts}&lmid=0`
265
+
266
+ // bootstrap the client group first so the query connection is not "new group"
267
+ const bootstrapProtocol = encodeSecProtocols(
268
+ ['initConnection', { desiredQueriesPatch: [] }],
269
+ undefined
270
+ )
271
+ const bootstrapWs = new WebSocket(`${urlBase}&wsid=bootstrap`, bootstrapProtocol)
272
+ const bootstrapTimer = setTimeout(() => {
273
+ fail(new Error('bootstrap websocket timeout'))
274
+ }, 7000)
222
275
 
223
- ws.on('message', (data) => {
224
- downstream.enqueue(JSON.parse(data.toString()))
225
- })
276
+ const fail = (err: unknown) => {
277
+ clearTimeout(bootstrapTimer)
278
+ try {
279
+ bootstrapWs.close()
280
+ } catch {}
281
+ reject(err)
282
+ }
226
283
 
227
- ws.on('open', () => {
228
- ws.send(
229
- JSON.stringify([
284
+ bootstrapWs.once('error', fail)
285
+ bootstrapWs.once('message', () => {
286
+ clearTimeout(bootstrapTimer)
287
+ try {
288
+ bootstrapWs.close()
289
+ } catch {}
290
+
291
+ const initConnectionMessage: [string, Record<string, unknown>] = [
230
292
  'initConnection',
231
293
  {
232
294
  desiredQueriesPatch: [{ op: 'put', hash: 'q1', ast: query }],
233
295
  },
234
- ])
235
- )
296
+ ]
297
+ const secProtocol = encodeSecProtocols(initConnectionMessage, undefined)
298
+ const ws = new WebSocket(`${urlBase}&wsid=ws1`, secProtocol)
299
+
300
+ let settled = false
301
+ let sawMessage = false
302
+ const failTimer = setTimeout(() => {
303
+ if (settled) return
304
+ settled = true
305
+ try {
306
+ ws.close()
307
+ } catch {}
308
+ reject(new Error('websocket connected but no downstream messages'))
309
+ }, 7000)
310
+
311
+ ws.on('message', (data) => {
312
+ const msg = JSON.parse(data.toString())
313
+ downstream.enqueue(msg)
314
+ if (!sawMessage && !settled) {
315
+ sawMessage = true
316
+ settled = true
317
+ clearTimeout(failTimer)
318
+ resolve(ws)
319
+ }
320
+ })
321
+
322
+ ws.once('error', (err) => {
323
+ if (settled) return
324
+ settled = true
325
+ clearTimeout(failTimer)
326
+ reject(err)
327
+ })
328
+
329
+ ws.once('close', () => {
330
+ if (settled) return
331
+ settled = true
332
+ clearTimeout(failTimer)
333
+ reject(new Error('websocket closed before initial downstream message'))
334
+ })
335
+ })
236
336
  })
337
+ }
237
338
 
238
- return ws
339
+ async function connectAndSubscribeWithRetry(
340
+ port: number,
341
+ downstream: Queue<unknown>,
342
+ query: Record<string, unknown>,
343
+ timeoutMs = 30_000
344
+ ): Promise<WebSocket> {
345
+ const deadline = Date.now() + timeoutMs
346
+ let lastErr: unknown
347
+ while (Date.now() < deadline) {
348
+ try {
349
+ return await connectAndSubscribe(port, downstream, query)
350
+ } catch (err) {
351
+ lastErr = err
352
+ await new Promise((r) => setTimeout(r, 300))
353
+ }
354
+ }
355
+ throw new Error(
356
+ `timed out connecting websocket after reset: ${lastErr instanceof Error ? lastErr.message : String(lastErr)}`
357
+ )
239
358
  }
240
359
 
241
360
  async function drainInitialPokes(downstream: Queue<unknown>) {