omegon 0.6.9 → 0.6.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ import { truncateTail, DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize } from "
20
20
  import { Text } from "@styrene-lab/pi-tui";
21
21
  import { Type } from "@sinclair/typebox";
22
22
  import { spawn, execFile } from "node:child_process";
23
- import { registerCleaveProc, deregisterCleaveProc, killCleaveProc, killAllCleaveSubprocesses } from "./subprocess-tracker.ts";
23
+ import { registerCleaveProc, deregisterCleaveProc, killCleaveProc, killAllCleaveSubprocesses, cleanupOrphanedProcesses } from "./subprocess-tracker.ts";
24
24
  import * as fs from "node:fs";
25
25
  import * as path from "node:path";
26
26
  import { promisify } from "node:util";
@@ -1647,6 +1647,21 @@ export function createAssessStructuredExecutors(pi: ExtensionAPI, overrides?: As
1647
1647
  // ─── Extension ──────────────────────────────────────────────────────────────
1648
1648
 
1649
1649
  export default function cleaveExtension(pi: ExtensionAPI) {
1650
+ // ── Guard: skip cleave in child processes ───────────────────────
1651
+ // Cleave children are spawned with PI_CHILD=1. If we load cleave
1652
+ // in children, they can spawn NESTED children — exponential process
1653
+ // growth. Children should never invoke cleave tools.
1654
+ if (process.env.PI_CHILD) return;
1655
+
1656
+ // ── Kill orphaned children from previous sessions ───────────────
1657
+ // If a previous omegon session was killed (SIGKILL, crash, machine
1658
+ // reboot), its detached children may still be alive. Clean them up
1659
+ // before doing anything else.
1660
+ const orphansKilled = cleanupOrphanedProcesses();
1661
+ if (orphansKilled > 0) {
1662
+ console.warn(`[cleave] killed ${orphansKilled} orphaned subprocess(es) from a previous session`);
1663
+ }
1664
+
1650
1665
  // ── Initialize dashboard state ──────────────────────────────────
1651
1666
  emitCleaveState(pi, "idle");
1652
1667
 
@@ -1,16 +1,113 @@
1
1
  /**
2
2
  * cleave/subprocess-tracker — Process registry for cleave subprocesses.
3
3
  *
4
- * Mirrors the extraction-v2 pattern: all spawned child processes are tracked
5
- * in a Set, killed by process group (SIGTERM to -pid), and cleaned up on
6
- * session_shutdown. Prevents orphaned `pi` processes when assessments time
7
- * out or sessions exit mid-dispatch.
4
+ * All spawned child processes are tracked in a Set and killed on:
5
+ * 1. Explicit call to killAllCleaveSubprocesses() (from session_shutdown)
6
+ * 2. process.on('exit') safety net (catches crashes, SIGTERM, SIGINT,
7
+ * uncaught exceptions anything session_shutdown misses)
8
+ * 3. PID file scan on startup (catches SIGKILL to parent, machine reboot
9
+ * with processes still running)
10
+ *
11
+ * Children are spawned with `detached: true` so we can kill their entire
12
+ * process group via `kill(-pid)`. The downside: detached children survive
13
+ * parent death by default. The exit handler and PID file compensate for this.
8
14
  */
9
15
 
10
16
  import type { ChildProcess } from "node:child_process";
17
+ import { existsSync, readFileSync, writeFileSync, unlinkSync, readdirSync } from "node:fs";
18
+ import { tmpdir } from "node:os";
19
+ import { join } from "node:path";
11
20
 
12
21
  const allCleaveProcs = new Set<ChildProcess>();
13
22
 
23
+ // ── PID file ────────────────────────────────────────────────────────────────
24
+ // Each parent process writes its tracked child PIDs to a temp file.
25
+ // On startup, cleanupOrphanedProcesses() scans for files whose parent PID
26
+ // is dead and kills the orphaned children.
27
+
28
+ const PID_FILE_PREFIX = "omegon-cleave-";
29
+ const PID_FILE_SUFFIX = ".pids";
30
+
31
+ function pidFilePath(): string {
32
+ return join(tmpdir(), `${PID_FILE_PREFIX}${process.pid}${PID_FILE_SUFFIX}`);
33
+ }
34
+
35
+ /** Write current tracked PIDs to the PID file. */
36
+ function syncPidFile(): void {
37
+ const pids = [...allCleaveProcs]
38
+ .map(p => p.pid)
39
+ .filter((pid): pid is number => pid !== undefined && pid > 0);
40
+ if (pids.length === 0) {
41
+ // No tracked children — remove the file
42
+ try { unlinkSync(pidFilePath()); } catch { /* ok */ }
43
+ return;
44
+ }
45
+ try {
46
+ writeFileSync(pidFilePath(), JSON.stringify({ parentPid: process.pid, childPids: pids }));
47
+ } catch { /* best effort */ }
48
+ }
49
+
50
+ /**
51
+ * Scan for PID files from dead parents and kill their orphaned children.
52
+ * Call this during extension initialization (before any new spawns).
53
+ * Returns the number of orphaned processes killed.
54
+ */
55
+ export function cleanupOrphanedProcesses(): number {
56
+ let killed = 0;
57
+ try {
58
+ const dir = tmpdir();
59
+ const files = readdirSync(dir).filter(
60
+ f => f.startsWith(PID_FILE_PREFIX) && f.endsWith(PID_FILE_SUFFIX),
61
+ );
62
+ for (const file of files) {
63
+ const filepath = join(dir, file);
64
+ try {
65
+ const data = JSON.parse(readFileSync(filepath, "utf-8"));
66
+ const parentPid = data?.parentPid;
67
+
68
+ // Check if the parent that wrote this file is still alive
69
+ if (parentPid && parentPid !== process.pid) {
70
+ try {
71
+ process.kill(parentPid, 0); // signal 0 = existence check
72
+ continue; // Parent alive — not orphans, skip
73
+ } catch {
74
+ // Parent dead — these are orphans, kill them
75
+ }
76
+ } else if (parentPid === process.pid) {
77
+ // Our own file from a previous lifecycle (shouldn't happen), clean up
78
+ try { unlinkSync(filepath); } catch { /* ok */ }
79
+ continue;
80
+ }
81
+
82
+ const childPids = data?.childPids;
83
+ if (Array.isArray(childPids)) {
84
+ for (const pid of childPids) {
85
+ if (typeof pid !== "number" || pid <= 0) continue;
86
+ try {
87
+ // Kill the process group (detached children have their own group)
88
+ process.kill(-pid, "SIGKILL");
89
+ killed++;
90
+ } catch {
91
+ try {
92
+ process.kill(pid, "SIGKILL");
93
+ killed++;
94
+ } catch { /* already dead */ }
95
+ }
96
+ }
97
+ }
98
+ // Remove the stale PID file
99
+ try { unlinkSync(filepath); } catch { /* ok */ }
100
+ } catch {
101
+ // Malformed file — remove it
102
+ try { unlinkSync(filepath); } catch { /* ok */ }
103
+ }
104
+ }
105
+ } catch { /* best effort — tmpdir unreadable is non-fatal */ }
106
+ return killed;
107
+ }
108
+
109
+ // ── Core tracking ───────────────────────────────────────────────────────────
110
+
14
111
  /** Kill a single subprocess by process group, with fallback to direct kill. */
15
112
  export function killCleaveProc(proc: ChildProcess): void {
16
113
  try {
@@ -20,14 +117,16 @@ export function killCleaveProc(proc: ChildProcess): void {
20
117
  }
21
118
  }
22
119
 
23
- /** Add a subprocess to the tracked set. */
120
+ /** Add a subprocess to the tracked set and update the PID file. */
24
121
  export function registerCleaveProc(proc: ChildProcess): void {
25
122
  allCleaveProcs.add(proc);
123
+ syncPidFile();
26
124
  }
27
125
 
28
- /** Remove a subprocess from the tracked set. */
126
+ /** Remove a subprocess from the tracked set and update the PID file. */
29
127
  export function deregisterCleaveProc(proc: ChildProcess): void {
30
128
  allCleaveProcs.delete(proc);
129
+ syncPidFile();
31
130
  }
32
131
 
33
132
  /**
@@ -44,7 +143,7 @@ function forceKillCleaveProc(proc: ChildProcess): void {
44
143
 
45
144
  /**
46
145
  * Kill all tracked cleave subprocesses and clear the registry.
47
- * Sends SIGTERM immediately, then SIGKILL after 5 seconds to any survivors.
146
+ * Sends SIGTERM immediately, then SIGKILL after 2 seconds to any survivors.
48
147
  * Because cleave subprocesses are spawned with `detached: true`, they will
49
148
  * NOT receive SIGHUP when the parent exits — SIGKILL escalation is required.
50
149
  */
@@ -53,20 +152,54 @@ export function killAllCleaveSubprocesses(): void {
53
152
  for (const proc of snapshot) {
54
153
  killCleaveProc(proc);
55
154
  }
56
- // Escalate: SIGKILL after 5s for any process that ignored SIGTERM.
57
- // The timer is unref'd so it does not keep the Node.js event loop alive.
155
+ // Escalate: SIGKILL after 2s for any process that ignored SIGTERM.
156
+ // NOT unref'd we MUST keep the event loop alive long enough for this
157
+ // to fire, otherwise children may survive. 2s (not 5s) because at shutdown
158
+ // speed matters more than grace.
58
159
  if (snapshot.length > 0) {
59
160
  const escalation = setTimeout(() => {
60
161
  for (const proc of snapshot) {
61
162
  if (!proc.killed) forceKillCleaveProc(proc);
62
163
  }
63
- }, 5_000);
64
- escalation.unref();
164
+ }, 2_000);
165
+ // Do NOT unref — this timer must fire even during shutdown.
166
+ // The previous implementation used .unref() which allowed the process
167
+ // to exit before SIGKILL was sent, leaving orphaned children alive.
168
+ void escalation;
65
169
  }
66
170
  allCleaveProcs.clear();
171
+ syncPidFile();
67
172
  }
68
173
 
69
174
  /** Number of currently tracked subprocesses (for diagnostics). */
70
175
  export function cleaveTrackedProcCount(): number {
71
176
  return allCleaveProcs.size;
72
177
  }
178
+
179
+ // ── Process exit safety net ─────────────────────────────────────────────────
180
+ //
181
+ // This is the critical fix for orphaned `pi` processes.
182
+ //
183
+ // `process.on('exit')` fires synchronously when the parent exits for ANY
184
+ // reason: normal exit, uncaught exception, SIGTERM, SIGINT. It does NOT
185
+ // fire on SIGKILL (which is why we also have the PID file mechanism).
186
+ //
187
+ // `process.kill()` is synchronous — safe to call inside an exit handler.
188
+ // We send SIGKILL (not SIGTERM) because at this point the parent is dying
189
+ // and we can't wait for graceful shutdown.
190
+ //
191
+ // This handler fires AFTER session_shutdown (which sends SIGTERM).
192
+ // If children are already dead from SIGTERM, the SIGKILL throws ESRCH
193
+ // and we catch it — no harm done.
194
+
195
+ process.on("exit", () => {
196
+ for (const proc of allCleaveProcs) {
197
+ try {
198
+ if (proc.pid) process.kill(-proc.pid, "SIGKILL");
199
+ } catch {
200
+ try { proc.kill("SIGKILL"); } catch { /* already dead */ }
201
+ }
202
+ }
203
+ // Clean up PID file — no orphans to track if we killed everything
204
+ try { unlinkSync(pidFilePath()); } catch { /* ok */ }
205
+ });