orez 0.1.21 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/cli-entry.js +2 -0
  2. package/dist/cli-entry.js.map +1 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +8 -0
  5. package/dist/cli.js.map +1 -1
  6. package/dist/config.d.ts +1 -0
  7. package/dist/config.d.ts.map +1 -1
  8. package/dist/config.js +1 -0
  9. package/dist/config.js.map +1 -1
  10. package/dist/index.d.ts.map +1 -1
  11. package/dist/index.js +43 -18
  12. package/dist/index.js.map +1 -1
  13. package/dist/pg-proxy.d.ts.map +1 -1
  14. package/dist/pg-proxy.js +93 -37
  15. package/dist/pg-proxy.js.map +1 -1
  16. package/dist/pglite-ipc.d.ts +37 -0
  17. package/dist/pglite-ipc.d.ts.map +1 -0
  18. package/dist/pglite-ipc.js +182 -0
  19. package/dist/pglite-ipc.js.map +1 -0
  20. package/dist/pglite-manager.d.ts +11 -0
  21. package/dist/pglite-manager.d.ts.map +1 -1
  22. package/dist/pglite-manager.js +72 -0
  23. package/dist/pglite-manager.js.map +1 -1
  24. package/dist/pglite-worker-thread.d.ts +15 -0
  25. package/dist/pglite-worker-thread.d.ts.map +1 -0
  26. package/dist/pglite-worker-thread.js +147 -0
  27. package/dist/pglite-worker-thread.js.map +1 -0
  28. package/dist/process-title.d.ts +2 -0
  29. package/dist/process-title.d.ts.map +1 -0
  30. package/dist/process-title.js +9 -0
  31. package/dist/process-title.js.map +1 -0
  32. package/dist/recovery.d.ts +1 -0
  33. package/dist/recovery.d.ts.map +1 -1
  34. package/dist/recovery.js +23 -13
  35. package/dist/recovery.js.map +1 -1
  36. package/dist/replication/change-tracker.d.ts +2 -0
  37. package/dist/replication/change-tracker.d.ts.map +1 -1
  38. package/dist/replication/change-tracker.js +4 -0
  39. package/dist/replication/change-tracker.js.map +1 -1
  40. package/dist/replication/handler.d.ts.map +1 -1
  41. package/dist/replication/handler.js +85 -20
  42. package/dist/replication/handler.js.map +1 -1
  43. package/package.json +2 -2
  44. package/src/cli-entry.ts +4 -0
  45. package/src/cli.ts +10 -0
  46. package/src/config.ts +2 -0
  47. package/src/index.ts +55 -19
  48. package/src/integration/replication-latency.test.ts +428 -0
  49. package/src/pg-proxy.ts +106 -39
  50. package/src/pglite-ipc.test.ts +99 -0
  51. package/src/pglite-ipc.ts +214 -0
  52. package/src/pglite-manager.ts +93 -0
  53. package/src/pglite-worker-thread.ts +172 -0
  54. package/src/process-title.ts +9 -0
  55. package/src/recovery.ts +23 -14
  56. package/src/replication/change-tracker.test.ts +2 -0
  57. package/src/replication/change-tracker.ts +5 -0
  58. package/src/replication/handler.test.ts +14 -1
  59. package/src/replication/handler.ts +83 -22
  60. package/src/replication/tcp-replication.test.ts +9 -1
  61. package/src/replication/zero-compat.test.ts +17 -1
@@ -7,6 +7,7 @@ import {
7
7
  handleReplicationQuery,
8
8
  handleStartReplication,
9
9
  resetReplicationState,
10
+ signalReplicationChange,
10
11
  type ReplicationWriter,
11
12
  } from './handler'
12
13
 
@@ -137,6 +138,8 @@ describe('handleStartReplication', () => {
137
138
  afterEach(async () => {
138
139
  // closing db causes poll loop to exit with 'closed' error
139
140
  await db.close()
141
+ // wake handler from idle sleep so it hits the closed db and exits
142
+ signalReplicationChange()
140
143
  // wait for the replication promise to settle
141
144
  await replicationPromise?.catch(() => {})
142
145
  })
@@ -213,6 +216,7 @@ describe('handleStartReplication', () => {
213
216
 
214
217
  await new Promise((r) => setTimeout(r, 100))
215
218
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('streamed', 123)`)
219
+ signalReplicationChange()
216
220
  await new Promise((r) => setTimeout(r, 700))
217
221
 
218
222
  const types = written.flatMap(extractPayloadTypes)
@@ -245,12 +249,15 @@ describe('handleStartReplication', () => {
245
249
  await new Promise((r) => setTimeout(r, 100))
246
250
 
247
251
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('mut', 1)`)
252
+ signalReplicationChange()
248
253
  await new Promise((r) => setTimeout(r, 700))
249
254
 
250
255
  await db.exec(`UPDATE public.items SET value = 2 WHERE name = 'mut'`)
256
+ signalReplicationChange()
251
257
  await new Promise((r) => setTimeout(r, 700))
252
258
 
253
259
  await db.exec(`DELETE FROM public.items WHERE name = 'mut'`)
260
+ signalReplicationChange()
254
261
  await new Promise((r) => setTimeout(r, 700))
255
262
 
256
263
  const types = written.flatMap(extractPayloadTypes)
@@ -272,9 +279,11 @@ describe('handleStartReplication', () => {
272
279
  await new Promise((r) => setTimeout(r, 100))
273
280
 
274
281
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('a', 1)`)
282
+ signalReplicationChange()
275
283
  await new Promise((r) => setTimeout(r, 700))
276
284
 
277
285
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('b', 2)`)
286
+ signalReplicationChange()
278
287
  await new Promise((r) => setTimeout(r, 700))
279
288
 
280
289
  const types = written.flatMap(extractPayloadTypes)
@@ -299,6 +308,7 @@ describe('handleStartReplication', () => {
299
308
 
300
309
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('a', 1)`)
301
310
  await db.exec(`INSERT INTO public.other (label) VALUES ('b')`)
311
+ signalReplicationChange()
302
312
  await new Promise((r) => setTimeout(r, 700))
303
313
 
304
314
  const types = written.flatMap(extractPayloadTypes)
@@ -321,8 +331,9 @@ describe('handleStartReplication', () => {
321
331
  for (let i = 0; i < 20; i++) {
322
332
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('r${i}', ${i})`)
323
333
  }
334
+ signalReplicationChange()
324
335
 
325
- // wait multiple poll cycles
336
+ // wait for handler to process
326
337
  await new Promise((r) => setTimeout(r, 1500))
327
338
 
328
339
  const inserts = written.flatMap(extractPayloadTypes).filter((t) => t === 0x49)
@@ -342,9 +353,11 @@ describe('handleStartReplication', () => {
342
353
  await new Promise((r) => setTimeout(r, 100))
343
354
 
344
355
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('tx1', 1)`)
356
+ signalReplicationChange()
345
357
  await new Promise((r) => setTimeout(r, 700))
346
358
 
347
359
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('tx2', 2)`)
360
+ signalReplicationChange()
348
361
  await new Promise((r) => setTimeout(r, 700))
349
362
 
350
363
  const types = written.flatMap(extractPayloadTypes)
@@ -11,6 +11,7 @@ import { log } from '../log.js'
11
11
  const textEncoder = new TextEncoder()
12
12
  import {
13
13
  getChangesSince,
14
+ getCurrentWatermark,
14
15
  purgeConsumedChanges,
15
16
  installTriggersOnShardTables,
16
17
  type ChangeRecord,
@@ -237,8 +238,16 @@ export async function handleReplicationQuery(
237
238
  const lsn = lsnToString(nextLsn())
238
239
  const snapshotName = `00000003-00000001-1`
239
240
 
240
- // fresh slot = fresh zero-cache instance, reset watermark
241
- lastStreamedWatermark = 0
241
+ // set watermark to current DB state so replication only delivers changes
242
+ // that happen AFTER this point. this mirrors real postgres behavior where
243
+ // CREATE_REPLICATION_SLOT creates a consistent snapshot — the initial copy
244
+ // captures everything up to this point, and replication picks up from here.
245
+ // on reconnect this is effectively a no-op since the watermark is already
246
+ // at or past the current DB state.
247
+ const currentWm = await getCurrentWatermark(db)
248
+ if (currentWm > lastStreamedWatermark) {
249
+ lastStreamedWatermark = currentWm
250
+ }
242
251
 
243
252
  // persist slot so pg_replication_slots queries find it
244
253
  await db.query(
@@ -527,20 +536,24 @@ export async function handleStartReplication(
527
536
 
528
537
  // event-driven replication: proxy signals changes directly via signalReplicationChange(),
529
538
  // pg_notify as secondary signal, polling as final fallback.
530
- const pollIntervalIdle = 100
539
+ const pollIntervalIdle = 5000
531
540
  const batchSize = 50000
532
- const purgeEveryN = 5
533
- const shardRescanEveryN = 40
541
+ const purgeEveryN = 1
542
+ const shardRescanIntervalMs = 10_000
534
543
  let running = true
535
544
  let pollsSincePurge = 0
536
545
  let tryAcquireFailures = 0
537
- let pollsSinceShardRescan = 0
546
+ let lastShardRescan = -shardRescanIntervalMs
538
547
  let hasStreamedOnce = false
539
548
 
540
- // promise-based wakeup mechanism
549
+ // promise-based wakeup mechanism.
550
+ // signalPending captures signals that arrive while the handler is
551
+ // processing (not in waitForWakeup), preventing signal loss.
541
552
  let wakeupResolve: (() => void) | null = null
553
+ let signalPending = false
542
554
  let lastWakeupTime = 0
543
555
  const wakeup = () => {
556
+ signalPending = true
544
557
  if (wakeupResolve) {
545
558
  lastWakeupTime = performance.now()
546
559
  log.debug.repl('signal received, waking up')
@@ -548,15 +561,15 @@ export async function handleStartReplication(
548
561
  wakeupResolve = null
549
562
  }
550
563
  }
551
- const waitForWakeup = (timeoutMs: number): Promise<void> => {
564
+ const waitForWakeup = (timeoutMs: number): Promise<boolean> => {
552
565
  return new Promise((resolve) => {
553
566
  const timer = setTimeout(() => {
554
567
  wakeupResolve = null
555
- resolve()
568
+ resolve(false)
556
569
  }, timeoutMs)
557
570
  wakeupResolve = () => {
558
571
  clearTimeout(timer)
559
- resolve()
572
+ resolve(true)
560
573
  }
561
574
  })
562
575
  }
@@ -574,6 +587,9 @@ export async function handleStartReplication(
574
587
  }
575
588
 
576
589
  const poll = async () => {
590
+ let queryPending = true // query immediately on first iteration
591
+ let idleTimeoutCount = 0
592
+
577
593
  while (running) {
578
594
  // check if the connection was closed
579
595
  if (writer.closed) {
@@ -583,16 +599,59 @@ export async function handleStartReplication(
583
599
  }
584
600
 
585
601
  try {
602
+ // when no query is pending, wait for a signal or timeout.
603
+ // signals fire instantly when the proxy processes a write,
604
+ // so we only hit the timeout when truly idle.
605
+ if (!queryPending) {
606
+ // check if a signal arrived while we were processing
607
+ if (!signalPending) {
608
+ const wasSignaled = await waitForWakeup(pollIntervalIdle)
609
+ if (writer.closed) {
610
+ running = false
611
+ break
612
+ }
613
+ if (!wasSignaled) {
614
+ idleTimeoutCount++
615
+ // send keepalive on every timeout
616
+ writer.write(encodeKeepalive(currentLsn, nowMicros(), false))
617
+ log.debug.repl(`idle keepalive (lastWatermark=${lastWatermark})`)
618
+ // re-scan for new shard schemas during idle
619
+ if (performance.now() - lastShardRescan > shardRescanIntervalMs) {
620
+ if (mutex.tryAcquire()) {
621
+ lastShardRescan = performance.now()
622
+ try {
623
+ await installTriggersOnShardTables(db)
624
+ } finally {
625
+ mutex.release()
626
+ }
627
+ }
628
+ }
629
+ // safety poll every ~30s to catch edge cases (6 * 5000ms)
630
+ if (idleTimeoutCount < 6) continue
631
+ idleTimeoutCount = 0
632
+ log.debug.repl('safety poll')
633
+ // fall through to query
634
+ } else {
635
+ idleTimeoutCount = 0
636
+ }
637
+ } else {
638
+ idleTimeoutCount = 0
639
+ }
640
+ signalPending = false
641
+ }
642
+ queryPending = false
643
+
586
644
  // periodically re-scan for new shard schemas (e.g. chat_0 created by zero-cache)
587
- pollsSinceShardRescan++
588
- if (pollsSinceShardRescan >= shardRescanEveryN) {
645
+ if (performance.now() - lastShardRescan > shardRescanIntervalMs) {
589
646
  if (mutex.tryAcquire()) {
590
- pollsSinceShardRescan = 0
647
+ lastShardRescan = performance.now()
591
648
  try {
592
649
  await installTriggersOnShardTables(db)
593
650
  } finally {
594
651
  mutex.release()
595
652
  }
653
+ } else {
654
+ log.debug.repl('shard rescan skipped: mutex busy')
596
655
  }
597
656
  }
598
657
 
@@ -610,6 +669,7 @@ export async function handleStartReplication(
610
669
  if (tryAcquireFailures < 10) {
611
670
  // pre-sync: yield so zero-cache initial copy can finish
612
671
  await waitForWakeup(Math.min(10 * tryAcquireFailures, 100))
672
+ queryPending = true
613
673
  continue
614
674
  }
615
675
  await mutex.acquire()
@@ -673,9 +733,9 @@ export async function handleStartReplication(
673
733
  lastWatermark = batchEnd
674
734
  lastStreamedWatermark = batchEnd
675
735
  // all changes were filtered out (e.g. shard internal tables).
676
- // sleep briefly to avoid a tight loop when zero-cache is
677
- // continuously writing internal state.
678
- await waitForWakeup(pollIntervalIdle)
736
+ // brief wait to avoid tight loop, then recheck.
737
+ await waitForWakeup(200)
738
+ queryPending = true
679
739
  continue
680
740
  }
681
741
 
@@ -707,17 +767,15 @@ export async function handleStartReplication(
707
767
  }
708
768
 
709
769
  // got changes - continue immediately to check for more
770
+ queryPending = true
710
771
  continue
711
772
  }
712
773
 
713
- // send keepalive
774
+ // no changes: send keepalive
714
775
  const ts = nowMicros()
715
776
  writer.write(encodeKeepalive(currentLsn, ts, false))
716
-
717
777
  log.debug.repl(`idle (lastWatermark=${lastWatermark})`)
718
-
719
- // no changes: wait for notify signal or poll interval
720
- await waitForWakeup(pollIntervalIdle)
778
+ // next iteration will wait for signal at the top
721
779
  } catch (err: unknown) {
722
780
  const msg = err instanceof Error ? err.message : String(err)
723
781
  log.repl(`replication poll error: ${msg}`)
@@ -734,7 +792,10 @@ export async function handleStartReplication(
734
792
  try {
735
793
  await poll()
736
794
  } finally {
737
- _replicationWakeup = null
795
+ // only clear if still pointing to our wakeup (a new handler may have replaced it)
796
+ if (_replicationWakeup === wakeup) {
797
+ _replicationWakeup = null
798
+ }
738
799
  if (unsubscribe) {
739
800
  await unsubscribe().catch(() => {})
740
801
  }
@@ -17,6 +17,7 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest'
17
17
  import { getConfig } from '../config'
18
18
  import { startPgProxy } from '../pg-proxy'
19
19
  import { installChangeTracking } from './change-tracker'
20
+ import { signalReplicationChange } from './handler'
20
21
 
21
22
  import type { Server, AddressInfo } from 'node:net'
22
23
 
@@ -559,8 +560,9 @@ describe('tcp replication', () => {
559
560
  "START_REPLICATION SLOT \"stream_test\" LOGICAL 0/0 (proto_version '1', publication_names 'zero_takeout')"
560
561
  )
561
562
 
562
- // insert data right away - the poll loop will pick it up once it starts
563
+ // insert data right away - signal so the handler picks it up immediately
563
564
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('tcp_streamed', 42)`)
565
+ signalReplicationChange()
564
566
 
565
567
  // collect everything for long enough to catch the change
566
568
  const allRaw = await replClient.collectStream(3000)
@@ -626,9 +628,11 @@ describe('tcp replication', () => {
626
628
  await replClient.collectStream(200) // skip CopyBothResponse
627
629
 
628
630
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('upd_target', 10)`)
631
+ signalReplicationChange()
629
632
  await replClient.collectStream(1500)
630
633
 
631
634
  await db.exec(`UPDATE public.items SET value = 20 WHERE name = 'upd_target'`)
635
+ signalReplicationChange()
632
636
  const stream = await replClient.collectStream(1500)
633
637
 
634
638
  const decoded: PgOutputMessage[] = []
@@ -670,9 +674,11 @@ describe('tcp replication', () => {
670
674
  await replClient.collectStream(200)
671
675
 
672
676
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('del_target', 99)`)
677
+ signalReplicationChange()
673
678
  await replClient.collectStream(1500)
674
679
 
675
680
  await db.exec(`DELETE FROM public.items WHERE name = 'del_target'`)
681
+ signalReplicationChange()
676
682
  const stream = await replClient.collectStream(1500)
677
683
 
678
684
  const decoded: PgOutputMessage[] = []
@@ -715,6 +721,7 @@ describe('tcp replication', () => {
715
721
  // insert into both tables
716
722
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('multi1', 1)`)
717
723
  await db.exec(`INSERT INTO public.products (label) VALUES ('multi2')`)
724
+ signalReplicationChange()
718
725
 
719
726
  // collect until we see both relations (with timeout)
720
727
  const allDecoded: PgOutputMessage[] = []
@@ -763,6 +770,7 @@ describe('tcp replication', () => {
763
770
  for (let i = 0; i < count; i++) {
764
771
  await db.exec(`INSERT INTO public.items (name, value) VALUES ('rapid${i}', ${i})`)
765
772
  }
773
+ signalReplicationChange()
766
774
 
767
775
  // give enough time for all changes to stream
768
776
  const stream = await replClient.collectStream(2000)
@@ -17,7 +17,8 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest'
17
17
 
18
18
  import { getConfig } from '../config'
19
19
  import { startPgProxy } from '../pg-proxy'
20
- import { installChangeTracking } from './change-tracker'
20
+ import { installChangeTracking, resetShardSchemaCache } from './change-tracker'
21
+ import { signalReplicationChange } from './handler'
21
22
 
22
23
  import type { Server, AddressInfo } from 'node:net'
23
24
 
@@ -447,6 +448,7 @@ describe('zero-cache pgoutput compatibility', { timeout: 30000 }, () => {
447
448
  let port: number
448
449
 
449
450
  beforeEach(async () => {
451
+ resetShardSchemaCache()
450
452
  db = new PGlite()
451
453
  await db.waitReady
452
454
  await db.exec(`
@@ -467,12 +469,23 @@ describe('zero-cache pgoutput compatibility', { timeout: 30000 }, () => {
467
469
  await db.exec(`CREATE PUBLICATION zero_pub FOR ALL TABLES`)
468
470
  await installChangeTracking(db)
469
471
 
472
+ // auto-signal the replication handler after every db.exec() call.
473
+ // in production, writes go through the TCP proxy which signals automatically.
474
+ // in tests, db.exec() bypasses the proxy, so we signal explicitly.
475
+ const origExec = db.exec.bind(db)
476
+ ;(db as any).exec = async (sql: string) => {
477
+ const result = await origExec(sql)
478
+ signalReplicationChange()
479
+ return result
480
+ }
481
+
470
482
  const config = { ...getConfig(), pgPort: 0 }
471
483
  server = await startPgProxy(db, config)
472
484
  port = (server.address() as AddressInfo).port
473
485
  })
474
486
 
475
487
  afterEach(async () => {
488
+ signalReplicationChange()
476
489
  server?.close()
477
490
  await db?.close()
478
491
  })
@@ -968,6 +981,9 @@ describe('zero-cache pgoutput compatibility', { timeout: 30000 }, () => {
968
981
  const s = await stream()
969
982
  const q = s.messages
970
983
 
984
+ // give handler time to finish setup (trigger installation)
985
+ await new Promise((r) => setTimeout(r, 300))
986
+
971
987
  // insert into all three shard tables + a public table
972
988
  await db.exec(
973
989
  `INSERT INTO chat_0.clients ("clientGroupID", "clientID", "lastMutationID") VALUES ('cg1', 'c1', 1)`