orez 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -18,6 +18,7 @@ import {
18
18
  import { createLogStore, type LogStore } from './admin/log-store.js'
19
19
  import {
20
20
  isChildProcessRunning,
21
+ isPidRunning,
21
22
  killProcessTree,
22
23
  waitForChildProcessExit,
23
24
  } from './child-process.js'
@@ -270,8 +271,34 @@ export async function startZeroLite(overrides: Partial<ZeroLiteConfig> = {}) {
270
271
 
271
272
  mkdirSync(config.dataDir, { recursive: true })
272
273
 
273
- // write pid file for IPC (pg_restore uses this to signal restart)
274
+ // write pid file for IPC (pg_restore uses this to signal restart).
275
+ // before overwriting, check for orphaned zero-cache processes from a
276
+ // previous orez run that didn't shut down cleanly (e.g. SIGKILL'd before
277
+ // the in-process watchdog could notice). sweep anything still holding
278
+ // the zero port so the new run can bind.
274
279
  const pidFile = resolve(config.dataDir, 'orez.pid')
280
+ if (!config.skipZeroCache && process.platform !== 'win32') {
281
+ try {
282
+ const priorPid = Number(readFileSync(pidFile, 'utf8').trim())
283
+ if (priorPid > 0 && priorPid !== process.pid && !isPidRunning(priorPid)) {
284
+ const result = spawnSync('lsof', ['-ti', `:${config.zeroPort}`], {
285
+ encoding: 'utf8',
286
+ })
287
+ const orphans = (result.stdout || '')
288
+ .split(/\s+/)
289
+ .map((v) => Number(v.trim()))
290
+ .filter((v) => Number.isInteger(v) && v > 0 && v !== process.pid)
291
+ for (const pid of orphans) {
292
+ log.orez(
293
+ `killing orphan pid ${pid} holding zero port ${config.zeroPort} from previous orez run`
294
+ )
295
+ try {
296
+ killProcessTree(pid, 'SIGKILL')
297
+ } catch {}
298
+ }
299
+ }
300
+ } catch {}
301
+ }
275
302
  writeFileSync(pidFile, String(process.pid))
276
303
 
277
304
  // write admin port file so pg_restore can find it
@@ -895,6 +922,17 @@ async function startZeroCache(
895
922
  ...(sqliteMode === 'wasm' ? { ZERO_NUM_SYNC_WORKERS: '1' } : {}),
896
923
  }
897
924
 
925
+ // high worker counts multiply the blast radius of any sync-worker bug
926
+ // (e.g. orphaned workers busy-looping on EOF'd sibling pipes). dev rarely
927
+ // benefits from more than a couple; warn so it's obvious where the CPU
928
+ // went.
929
+ const workerCount = Number(env.ZERO_NUM_SYNC_WORKERS)
930
+ if (Number.isFinite(workerCount) && workerCount > 4) {
931
+ log.orez(
932
+ `warning: ZERO_NUM_SYNC_WORKERS=${workerCount} is high for development — each worker consumes CPU/memory and amplifies any sync-loop bug. consider 2.`
933
+ )
934
+ }
935
+
898
936
  const zeroCacheBin = resolve(zeroEntry, '..', 'cli.js')
899
937
  if (!existsSync(zeroCacheBin)) {
900
938
  throw new Error('zero-cache cli.js not found. install @rocicorp/zero')
@@ -908,10 +946,23 @@ async function startZeroCache(
908
946
  }
909
947
  }
910
948
 
911
- // preload script to label the zero-cache child process
949
+ // preload script to label the zero-cache child process AND self-destruct
950
+ // if the orez parent dies. macOS has no PR_SET_PDEATHSIG, so on a hard
951
+ // parent kill (SIGKILL) or a crash that skips the `stop()` path, zero-cache
952
+ // workers get reparented to init and can busy-loop on EOF'd sibling pipes
953
+ // at 100% CPU indefinitely. every forked zero-cache worker inherits
954
+ // NODE_OPTIONS, so the --require below runs in each one; they independently
955
+ // poll the captured orez pid and exit when it disappears.
912
956
  const preloadPath = resolve(config.dataDir, '.orez-zero-title.cjs')
913
957
  const zeroTitle = orezTitle('orez [zero]')
914
- writeFileSync(preloadPath, `process.title = ${JSON.stringify(zeroTitle)}\n`)
958
+ writeFileSync(
959
+ preloadPath,
960
+ `process.title = ${JSON.stringify(zeroTitle)};\n` +
961
+ `const __orezPid = ${process.pid};\n` +
962
+ `setInterval(() => {\n` +
963
+ ` try { process.kill(__orezPid, 0); } catch { process.exit(0); }\n` +
964
+ `}, 1000).unref();\n`
965
+ )
915
966
 
916
967
  const nodeOptions = [
917
968
  sqliteMode === 'wasm' ? '--max-old-space-size=16384' : '',
@@ -925,7 +976,10 @@ async function startZeroCache(
925
976
  const nodeBinary = resolveNodeBinary()
926
977
  const child = spawn(nodeBinary, [zeroCacheBin], {
927
978
  env,
928
- stdio: ['ignore', 'pipe', 'pipe'],
979
+ // stdin piped (not 'ignore') so zero-cache's pipe fd to orez closes with
980
+ // EOF on parent death — belt-and-suspenders alongside the ppid watchdog
981
+ // in the --require preload above.
982
+ stdio: ['pipe', 'pipe', 'pipe'],
929
983
  }) as ZeroChildProcess
930
984
  child.__orezTail = []
931
985
 
package/src/pg-proxy.ts CHANGED
@@ -644,17 +644,29 @@ export async function startPgProxy(
644
644
  // prevents other connections from interleaving and corrupting PGlite's
645
645
  // unnamed portal/statement state during the pipeline.
646
646
  let pipelineMutexHeld = false
647
- // clean up pglite transaction state when a client disconnects
647
+ // clean up pglite transaction state when a client disconnects.
648
+ // CRITICAL: only ROLLBACK if this socket owns the current pglite
649
+ // transaction. pglite is single-session, so an unconditional ROLLBACK
650
+ // here clobbers any OTHER socket's active transaction. that was the
651
+ // fresh-boot race: migrate.ts's idle pool sockets closed after exit,
652
+ // ran ROLLBACK while zero-cache had just sent BEGIN, and zero-cache's
653
+ // next SAVEPOINT failed with "25P01: not in a transaction block".
648
654
  socket.on('close', async () => {
649
655
  // replication sockets don't own a transaction — skip ROLLBACK
650
656
  if (isReplicationConnection) return
651
657
  try {
652
- const { db, mutex } = getDbContext(dbName)
658
+ const { db, mutex, txState } = getDbContext(dbName)
653
659
  await mutex.acquire()
654
660
  try {
655
- await db.exec('ROLLBACK')
661
+ // only rollback OUR transaction. if idle (owner=null) there's
662
+ // nothing to do; if another socket owns it, leave theirs alone.
663
+ if (txState.owner === socket && txState.status !== 0x49) {
664
+ await db.exec('ROLLBACK')
665
+ txState.status = 0x49
666
+ txState.owner = null
667
+ }
656
668
  } catch {
657
- // no transaction to rollback, or db is closed
669
+ // db is closed or rollback failed ignore
658
670
  } finally {
659
671
  mutex.release()
660
672
  }