@jhizzard/termdeck 0.10.4 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -2
- package/packages/cli/src/init-rumen.js +153 -83
- package/packages/client/public/app.js +207 -4
- package/packages/client/public/flashback-history.html +331 -0
- package/packages/client/public/flashback-history.js +258 -0
- package/packages/client/public/graph-controls.js +217 -0
- package/packages/client/public/graph.html +36 -0
- package/packages/client/public/graph.js +131 -15
- package/packages/client/public/index.html +25 -0
- package/packages/client/public/style.css +230 -0
- package/packages/server/src/config.js +49 -0
- package/packages/server/src/database.js +49 -1
- package/packages/server/src/flashback-diag.js +187 -13
- package/packages/server/src/index.js +132 -19
- package/packages/server/src/projects-routes.js +119 -0
- package/packages/server/src/pty-reaper.js +297 -0
- package/packages/server/src/setup/index.js +1 -0
- package/packages/server/src/setup/migration-templating.js +76 -0
- package/packages/server/src/setup/migrations.js +44 -4
- package/packages/server/src/setup/rumen/functions/graph-inference/index.ts +381 -0
- package/packages/server/src/setup/rumen/functions/graph-inference/tsconfig.json +14 -0
|
@@ -62,11 +62,13 @@ const { TranscriptWriter } = require('./transcripts');
|
|
|
62
62
|
const { createHealthHandler, runPreflight } = require('./preflight');
|
|
63
63
|
const { getFullHealth } = require('./health');
|
|
64
64
|
const { themes, statusColors } = require('./themes');
|
|
65
|
-
const { loadConfig, addProject, updateConfig } = require('./config');
|
|
65
|
+
const { loadConfig, addProject, removeProject, updateConfig } = require('./config');
|
|
66
66
|
const { createAuthMiddleware, verifyWebSocketUpgrade, hasAuth } = require('./auth');
|
|
67
67
|
const { createSprintRoutes } = require('./sprint-routes');
|
|
68
68
|
const { createGraphRoutes } = require('./graph-routes');
|
|
69
|
+
const { createProjectsRoutes } = require('./projects-routes');
|
|
69
70
|
const orchestrationPreview = require('./orchestration-preview');
|
|
71
|
+
const { createPtyReaper } = require('./pty-reaper');
|
|
70
72
|
|
|
71
73
|
// Sprint 37 T3 — lazy resolution of T2's CLI modules. The orchestration-preview
|
|
72
74
|
// helper is decoupled from T2's templates.js / init-project.js; we resolve
|
|
@@ -167,6 +169,33 @@ function createServer(config) {
|
|
|
167
169
|
// Initialize session manager
|
|
168
170
|
const sessions = new SessionManager(db);
|
|
169
171
|
|
|
172
|
+
// PTY orphan reaper (Sprint 42 T2). Periodically walks the live process
|
|
173
|
+
// tree, tracks descendants of each session's shell PTY, and SIGTERMs any
|
|
174
|
+
// that survive the leader's death — closing the kern.tty.ptmx_max leak
|
|
175
|
+
// path that bit Joshua on 2026-04-28 (forkpty: Device not configured).
|
|
176
|
+
// Skipped when node-pty is unavailable (no PTYs to reap) and when the
|
|
177
|
+
// explicit kill switch is set (tests / opt-out).
|
|
178
|
+
const ptyReaperEnabled = pty
|
|
179
|
+
&& process.env.TERMDECK_PTY_REAPER !== 'off'
|
|
180
|
+
&& config.ptyReaper?.enabled !== false;
|
|
181
|
+
const ptyReaperIntervalMs = Number.parseInt(
|
|
182
|
+
process.env.TERMDECK_PTY_REAPER_INTERVAL_MS
|
|
183
|
+
|| config.ptyReaper?.intervalMs
|
|
184
|
+
|| 30000,
|
|
185
|
+
10
|
|
186
|
+
);
|
|
187
|
+
const ptyReaper = ptyReaperEnabled
|
|
188
|
+
? createPtyReaper({ sessions, intervalMs: ptyReaperIntervalMs })
|
|
189
|
+
: null;
|
|
190
|
+
if (ptyReaper) {
|
|
191
|
+
ptyReaper.start();
|
|
192
|
+
console.log(`[pty-reaper] enabled (interval ${ptyReaperIntervalMs}ms)`);
|
|
193
|
+
} else if (!pty) {
|
|
194
|
+
console.log('[pty-reaper] disabled (node-pty unavailable)');
|
|
195
|
+
} else {
|
|
196
|
+
console.log('[pty-reaper] disabled by config');
|
|
197
|
+
}
|
|
198
|
+
|
|
170
199
|
// Initialize RAG + Mnestra bridge
|
|
171
200
|
const rag = new RAGIntegration(config, db);
|
|
172
201
|
const mnestraBridge = createBridge(config);
|
|
@@ -882,10 +911,22 @@ function createServer(config) {
|
|
|
882
911
|
return;
|
|
883
912
|
}
|
|
884
913
|
if (sess.ws && sess.ws.readyState === 1) {
|
|
885
|
-
|
|
914
|
+
// Sprint 43 T2: persist the fire to flashback_events BEFORE
|
|
915
|
+
// serializing the WS frame so we can include the row id. The
|
|
916
|
+
// client uses flashback_event_id to POST dismiss/click-through
|
|
917
|
+
// updates back to the audit dashboard.
|
|
918
|
+
const flashback_event_id = flashbackDiag.recordFlashback(db, {
|
|
919
|
+
sessionId: sess.id,
|
|
920
|
+
project: sess.meta.project || null,
|
|
921
|
+
error_text: question,
|
|
922
|
+
hits_count: count,
|
|
923
|
+
top_hit_id: hit.id || null,
|
|
924
|
+
top_hit_score: typeof hit.similarity === 'number' ? hit.similarity : null,
|
|
925
|
+
});
|
|
926
|
+
const frame = JSON.stringify({ type: 'proactive_memory', hit, flashback_event_id });
|
|
886
927
|
try {
|
|
887
928
|
sess.ws.send(frame);
|
|
888
|
-
console.log(`[flashback] proactive_memory sent to session ${sess.id} (source_type=${hit.source_type}, project=${hit.project})`);
|
|
929
|
+
console.log(`[flashback] proactive_memory sent to session ${sess.id} (source_type=${hit.source_type}, project=${hit.project}, event_id=${flashback_event_id})`);
|
|
889
930
|
flashbackDiag.log({
|
|
890
931
|
sessionId: sess.id,
|
|
891
932
|
event: 'proactive_memory_emit',
|
|
@@ -893,6 +934,7 @@ function createServer(config) {
|
|
|
893
934
|
frame_size_bytes: Buffer.byteLength(frame, 'utf8'),
|
|
894
935
|
result_count_in_frame: 1,
|
|
895
936
|
outcome: 'emitted',
|
|
937
|
+
flashback_event_id,
|
|
896
938
|
});
|
|
897
939
|
} catch (err) {
|
|
898
940
|
console.error('[flashback] proactive_memory send failed:', err);
|
|
@@ -1262,20 +1304,29 @@ function createServer(config) {
|
|
|
1262
1304
|
res.json(payload);
|
|
1263
1305
|
});
|
|
1264
1306
|
|
|
1265
|
-
// POST /api/projects
|
|
1266
|
-
//
|
|
1267
|
-
//
|
|
1268
|
-
//
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1307
|
+
// POST /api/projects (add) + DELETE /api/projects/:name (remove) — Sprint 42
|
|
1308
|
+
// T4 extracted both into projects-routes.js so tests can drive them without
|
|
1309
|
+
// bootstrapping the full server. Sessions are passed via getSessions() so
|
|
1310
|
+
// DELETE can enforce the 409 live-PTY guard. Files on disk at the project's
|
|
1311
|
+
// `path` are NEVER touched by remove — only the YAML entry is rewritten.
|
|
1312
|
+
createProjectsRoutes({
|
|
1313
|
+
app,
|
|
1314
|
+
config,
|
|
1315
|
+
getSessions: () => sessions.getAll(),
|
|
1316
|
+
addProject,
|
|
1317
|
+
removeProject,
|
|
1318
|
+
broadcast: (payload) => {
|
|
1319
|
+
try {
|
|
1320
|
+
const wsPayload = JSON.stringify(payload);
|
|
1321
|
+
wss.clients.forEach((client) => {
|
|
1322
|
+
if (client.readyState === 1) {
|
|
1323
|
+
try { client.send(wsPayload); } catch (err) { console.error('[ws] projects_changed send failed:', err); }
|
|
1324
|
+
}
|
|
1325
|
+
});
|
|
1326
|
+
} catch (err) {
|
|
1327
|
+
console.error('[ws] projects_changed broadcast failed:', err);
|
|
1328
|
+
}
|
|
1329
|
+
},
|
|
1279
1330
|
});
|
|
1280
1331
|
|
|
1281
1332
|
// GET /api/projects/:name/orchestration-preview — Sprint 37 T3.
|
|
@@ -1404,6 +1455,63 @@ function createServer(config) {
|
|
|
1404
1455
|
res.json({ count: events.length, events });
|
|
1405
1456
|
});
|
|
1406
1457
|
|
|
1458
|
+
// GET /api/flashback/history - Sprint 43 T2 durable audit dashboard.
|
|
1459
|
+
// Returns the most-recent flashback fires from SQLite (survives restart)
|
|
1460
|
+
// plus the click-through funnel aggregate. The dashboard uses one fetch
|
|
1461
|
+
// for both so it can render the table and the funnel in lockstep.
|
|
1462
|
+
// Optional filters: ?since=<ISO8601>, ?limit=N (default 100, max 500).
|
|
1463
|
+
app.get('/api/flashback/history', (req, res) => {
|
|
1464
|
+
const rawSince = req.query && req.query.since;
|
|
1465
|
+
const since = (typeof rawSince === 'string' && rawSince.length) ? rawSince : undefined;
|
|
1466
|
+
const rawLimit = req.query && req.query.limit;
|
|
1467
|
+
const limit = rawLimit != null ? parseInt(rawLimit, 10) : undefined;
|
|
1468
|
+
const events = flashbackDiag.getRecentFlashbacks(db, {
|
|
1469
|
+
since,
|
|
1470
|
+
limit: Number.isFinite(limit) && limit > 0 ? limit : undefined,
|
|
1471
|
+
});
|
|
1472
|
+
const funnel = flashbackDiag.getFunnelStats(db, { since });
|
|
1473
|
+
res.json({ count: events.length, events, funnel });
|
|
1474
|
+
});
|
|
1475
|
+
|
|
1476
|
+
// POST /api/flashback/:id/dismissed - mark a flashback toast as dismissed.
|
|
1477
|
+
// Called by the client when the user clicks ×, presses Escape, lets the
|
|
1478
|
+
// 30s auto-timer fire, OR clicks "Not relevant" / "Dismiss" in the modal.
|
|
1479
|
+
// Idempotent: subsequent calls are no-ops (first dismiss timestamp wins).
|
|
1480
|
+
app.post('/api/flashback/:id/dismissed', (req, res) => {
|
|
1481
|
+
const id = parseInt(req.params.id, 10);
|
|
1482
|
+
if (!Number.isFinite(id) || id <= 0) {
|
|
1483
|
+
return res.status(400).json({ error: 'Invalid id' });
|
|
1484
|
+
}
|
|
1485
|
+
const updated = flashbackDiag.markDismissed(db, id);
|
|
1486
|
+
res.json({ ok: true, updated });
|
|
1487
|
+
});
|
|
1488
|
+
|
|
1489
|
+
// POST /api/flashback/:id/clicked - mark a flashback toast as clicked-
|
|
1490
|
+
// through (user opened the modal). Click-through is also an implicit
|
|
1491
|
+
// dismiss, so this updates dismissed_at if it's still NULL. Idempotent.
|
|
1492
|
+
app.post('/api/flashback/:id/clicked', (req, res) => {
|
|
1493
|
+
const id = parseInt(req.params.id, 10);
|
|
1494
|
+
if (!Number.isFinite(id) || id <= 0) {
|
|
1495
|
+
return res.status(400).json({ error: 'Invalid id' });
|
|
1496
|
+
}
|
|
1497
|
+
const updated = flashbackDiag.markClickedThrough(db, id);
|
|
1498
|
+
res.json({ ok: true, updated });
|
|
1499
|
+
});
|
|
1500
|
+
|
|
1501
|
+
// GET /api/pty-reaper/status — Sprint 42 T2 observability surface.
|
|
1502
|
+
// Returns the live registry (per-session PTY pid + tracked descendants) and
|
|
1503
|
+
// the reaped-history ring buffer so heavy-use installs can tell whether the
|
|
1504
|
+
// reaper is firing and what it's killing. Read-only.
|
|
1505
|
+
app.get('/api/pty-reaper/status', (req, res) => {
|
|
1506
|
+
if (!ptyReaper) {
|
|
1507
|
+
return res.json({
|
|
1508
|
+
enabled: false,
|
|
1509
|
+
reason: !pty ? 'node-pty-unavailable' : 'disabled-by-config',
|
|
1510
|
+
});
|
|
1511
|
+
}
|
|
1512
|
+
res.json({ enabled: true, ...ptyReaper.status() });
|
|
1513
|
+
});
|
|
1514
|
+
|
|
1407
1515
|
// ==================== Transcript endpoints (Sprint 6 T3) ====================
|
|
1408
1516
|
|
|
1409
1517
|
// GET /api/transcripts/search - FTS across all sessions
|
|
@@ -1757,7 +1865,7 @@ function createServer(config) {
|
|
|
1757
1865
|
res.sendFile(path.join(clientDir, 'index.html'));
|
|
1758
1866
|
});
|
|
1759
1867
|
|
|
1760
|
-
return { app, server, wss, sessions, rag, db, transcriptWriter };
|
|
1868
|
+
return { app, server, wss, sessions, rag, db, transcriptWriter, ptyReaper };
|
|
1761
1869
|
}
|
|
1762
1870
|
|
|
1763
1871
|
// ==================== Setup-configure helpers (Sprint 23 T2) ====================
|
|
@@ -1975,7 +2083,7 @@ if (require.main === module) {
|
|
|
1975
2083
|
}
|
|
1976
2084
|
}
|
|
1977
2085
|
|
|
1978
|
-
const { server, transcriptWriter } = createServer(config);
|
|
2086
|
+
const { server, transcriptWriter, ptyReaper } = createServer(config);
|
|
1979
2087
|
|
|
1980
2088
|
// Graceful shutdown — flush transcript buffer before exit
|
|
1981
2089
|
let shutdownInProgress = false;
|
|
@@ -1983,6 +2091,11 @@ if (require.main === module) {
|
|
|
1983
2091
|
if (shutdownInProgress) return;
|
|
1984
2092
|
shutdownInProgress = true;
|
|
1985
2093
|
console.log(`\n[server] ${signal} received, shutting down...`);
|
|
2094
|
+
if (ptyReaper) {
|
|
2095
|
+
try { ptyReaper.stop(); } catch (err) {
|
|
2096
|
+
console.error('[pty-reaper] stop failed:', err.message);
|
|
2097
|
+
}
|
|
2098
|
+
}
|
|
1986
2099
|
if (transcriptWriter) {
|
|
1987
2100
|
console.log('[transcript] Flushing buffer before exit...');
|
|
1988
2101
|
try { await transcriptWriter.close(); } catch (err) {
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
// Projects routes — POST /api/projects (add) + DELETE /api/projects/:name
|
|
2
|
+
// (remove) extracted into a small factory so tests can drive them without
|
|
3
|
+
// bootstrapping the full server. Sprint 42 T4.
|
|
4
|
+
//
|
|
5
|
+
// Surface contract:
|
|
6
|
+
//
|
|
7
|
+
// POST /api/projects → add (existing v0.2 behavior)
|
|
8
|
+
// DELETE /api/projects/:name[?force=true] → remove
|
|
9
|
+
//
|
|
10
|
+
// DELETE semantics:
|
|
11
|
+
// - 404 if the project is not in config.yaml
|
|
12
|
+
// - 409 if any live PTY session has meta.project === name (i.e.
|
|
13
|
+
// meta.status !== 'exited'), unless ?force=true is set
|
|
14
|
+
// - On success: rewrites ~/.termdeck/config.yaml (with .bak), updates the
|
|
15
|
+
// in-memory config map, broadcasts `projects_changed` to all WS clients,
|
|
16
|
+
// and returns { ok, removed, projects, files_on_disk: 'untouched' }
|
|
17
|
+
//
|
|
18
|
+
// File contents at the project's `path` are NEVER touched here — the user's
|
|
19
|
+
// source code stays put. The dashboard modal copy reflects this so users
|
|
20
|
+
// don't fear data loss.
|
|
21
|
+
|
|
22
|
+
function createProjectsRoutes({
|
|
23
|
+
app,
|
|
24
|
+
config,
|
|
25
|
+
getSessions, // () => array of session objects with .meta.{project,status}
|
|
26
|
+
addProject, // (opts) => updated projects map (mutates config.yaml)
|
|
27
|
+
removeProject, // (name) => updated projects map (mutates config.yaml)
|
|
28
|
+
broadcast, // ({ type, projects }) => void (optional)
|
|
29
|
+
}) {
|
|
30
|
+
if (!app) throw new Error('createProjectsRoutes: app is required');
|
|
31
|
+
if (typeof addProject !== 'function') throw new Error('createProjectsRoutes: addProject is required');
|
|
32
|
+
if (typeof removeProject !== 'function') throw new Error('createProjectsRoutes: removeProject is required');
|
|
33
|
+
|
|
34
|
+
const safeBroadcast = (payload) => {
|
|
35
|
+
if (typeof broadcast !== 'function') return;
|
|
36
|
+
try { broadcast(payload); }
|
|
37
|
+
catch (err) { console.error('[projects-routes] broadcast failed:', err); }
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
// POST /api/projects — add a project, persist to config.yaml, broadcast.
|
|
41
|
+
// Body: { name, path, defaultTheme?, defaultCommand? }
|
|
42
|
+
app.post('/api/projects', (req, res) => {
|
|
43
|
+
const { name, path: projectPath, defaultTheme, defaultCommand } = req.body || {};
|
|
44
|
+
try {
|
|
45
|
+
const updatedProjects = addProject({ name, path: projectPath, defaultTheme, defaultCommand });
|
|
46
|
+
config.projects = updatedProjects;
|
|
47
|
+
safeBroadcast({ type: 'projects_changed', projects: updatedProjects });
|
|
48
|
+
res.json({ ok: true, projects: updatedProjects });
|
|
49
|
+
} catch (err) {
|
|
50
|
+
console.error('[config] addProject failed:', err.message);
|
|
51
|
+
res.status(400).json({ error: err.message });
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
// DELETE /api/projects/:name — remove a project. ?force=true to override
|
|
56
|
+
// the live-session 409 guard. Files on disk are untouched.
|
|
57
|
+
app.delete('/api/projects/:name', (req, res) => {
|
|
58
|
+
const name = req.params.name;
|
|
59
|
+
if (!name || !/^[A-Za-z0-9_.-]+$/.test(name)) {
|
|
60
|
+
return res.status(400).json({ error: 'Project name must be non-empty and contain only letters, digits, . _ or -' });
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const projects = (config && config.projects) || {};
|
|
64
|
+
if (!projects[name]) {
|
|
65
|
+
return res.status(404).json({ error: `Project "${name}" not found` });
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const force = req.query && (req.query.force === 'true' || req.query.force === '1');
|
|
69
|
+
|
|
70
|
+
let liveSessions = [];
|
|
71
|
+
try {
|
|
72
|
+
const all = (typeof getSessions === 'function' ? getSessions() : []) || [];
|
|
73
|
+
liveSessions = all.filter((s) => {
|
|
74
|
+
if (!s || !s.meta) return false;
|
|
75
|
+
return s.meta.project === name && s.meta.status !== 'exited';
|
|
76
|
+
});
|
|
77
|
+
} catch (err) {
|
|
78
|
+
console.error('[projects-routes] getSessions failed:', err);
|
|
79
|
+
liveSessions = [];
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (liveSessions.length > 0 && !force) {
|
|
83
|
+
return res.status(409).json({
|
|
84
|
+
error: `Project "${name}" has ${liveSessions.length} live PTY session${liveSessions.length === 1 ? '' : 's'}. Close them first, or pass ?force=true.`,
|
|
85
|
+
liveSessions: liveSessions.length,
|
|
86
|
+
sessionIds: liveSessions.map((s) => s.id).filter(Boolean),
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
let updatedProjects;
|
|
91
|
+
try {
|
|
92
|
+
updatedProjects = removeProject(name);
|
|
93
|
+
} catch (err) {
|
|
94
|
+
if (err && err.code === 'NOT_FOUND') {
|
|
95
|
+
return res.status(404).json({ error: err.message });
|
|
96
|
+
}
|
|
97
|
+
if (err && err.code === 'BAD_NAME') {
|
|
98
|
+
return res.status(400).json({ error: err.message });
|
|
99
|
+
}
|
|
100
|
+
console.error('[config] removeProject failed:', err.message);
|
|
101
|
+
return res.status(500).json({ error: err.message });
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
config.projects = updatedProjects;
|
|
105
|
+
safeBroadcast({ type: 'projects_changed', projects: updatedProjects });
|
|
106
|
+
|
|
107
|
+
res.json({
|
|
108
|
+
ok: true,
|
|
109
|
+
removed: name,
|
|
110
|
+
forced: !!force,
|
|
111
|
+
projects: updatedProjects,
|
|
112
|
+
files_on_disk: 'untouched',
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
module.exports = {
|
|
118
|
+
createProjectsRoutes,
|
|
119
|
+
};
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
// PTY orphan reaper (Sprint 42 T2).
|
|
2
|
+
//
|
|
3
|
+
// Each TermDeck session spawns one shell PTY (`term.pid` from node-pty). That
|
|
4
|
+
// shell typically forks Claude Code, which in turn forks MCP children
|
|
5
|
+
// (rag-system, imessage-mcp, …). When the user closes a panel TermDeck calls
|
|
6
|
+
// `term.kill()`, which delivers SIGHUP to the leader's process group — but
|
|
7
|
+
// some MCPs `setsid` to detach, escape the pgroup, and survive the parent.
|
|
8
|
+
// Reparented to launchd, those processes keep holding their PTY file
|
|
9
|
+
// descriptors, and on macOS that drains `kern.tty.ptmx_max` (511 by default).
|
|
10
|
+
// Joshua's 2026-04-28 morning incident: 585 PTY refs, `forkpty: Device not
|
|
11
|
+
// configured` blocking new terminals.
|
|
12
|
+
//
|
|
13
|
+
// This module periodically (every 30s by default) walks the live process tree
|
|
14
|
+
// and, for each known session, tracks descendants of its PTY leader. When the
|
|
15
|
+
// leader is gone or the session has transitioned to `exited`, any descendants
|
|
16
|
+
// that survived get SIGTERM'd and recorded to a ring buffer surfaced via
|
|
17
|
+
// /api/pty-reaper/status.
|
|
18
|
+
//
|
|
19
|
+
// All side-effects (`ps`, `kill`, `now`, the timer) are injectable so the
|
|
20
|
+
// tests in tests/pty-reaper.test.js can drive deterministic orphan scenarios
|
|
21
|
+
// without forking real processes.
|
|
22
|
+
//
|
|
23
|
+
// Public surface:
|
|
24
|
+
// createPtyReaper({ sessions, intervalMs?, ps?, kill?, now?, logger? })
|
|
25
|
+
// → { start(), stop(), tick(), status(), _resetForTest() }
|
|
26
|
+
|
|
27
|
+
const { execFileSync } = require('child_process');
|
|
28
|
+
|
|
29
|
+
const RING_SIZE = 200;
|
|
30
|
+
const DEFAULT_INTERVAL_MS = 30000;
|
|
31
|
+
|
|
32
|
+
// Default `ps` boundary — execFileSync is sandbox-friendly (no shell).
|
|
33
|
+
// `-e` lists every process; the trailing `=` on each column header suppresses
|
|
34
|
+
// the header row, so the output is one process per line: "<pid> <ppid> <cmd>".
|
|
35
|
+
function defaultPs() {
|
|
36
|
+
const stdout = execFileSync('ps', ['-e', '-o', 'pid=,ppid=,command='], {
|
|
37
|
+
encoding: 'utf8',
|
|
38
|
+
maxBuffer: 8 * 1024 * 1024,
|
|
39
|
+
});
|
|
40
|
+
return parsePsOutput(stdout);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function parsePsOutput(stdout) {
|
|
44
|
+
const out = [];
|
|
45
|
+
const lines = stdout.split('\n');
|
|
46
|
+
for (const raw of lines) {
|
|
47
|
+
const line = raw.trim();
|
|
48
|
+
if (!line) continue;
|
|
49
|
+
// Two leading whitespace-separated integers, then the rest is command.
|
|
50
|
+
const m = line.match(/^(\d+)\s+(\d+)\s+(.*)$/);
|
|
51
|
+
if (!m) continue;
|
|
52
|
+
const pid = parseInt(m[1], 10);
|
|
53
|
+
const ppid = parseInt(m[2], 10);
|
|
54
|
+
if (!Number.isFinite(pid) || !Number.isFinite(ppid)) continue;
|
|
55
|
+
out.push({ pid, ppid, command: m[3] });
|
|
56
|
+
}
|
|
57
|
+
return out;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function defaultKill(pid, signal) {
|
|
61
|
+
process.kill(pid, signal);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function createPtyReaper({
|
|
65
|
+
sessions,
|
|
66
|
+
intervalMs = DEFAULT_INTERVAL_MS,
|
|
67
|
+
ps = defaultPs,
|
|
68
|
+
kill = defaultKill,
|
|
69
|
+
now = Date.now,
|
|
70
|
+
logger = console,
|
|
71
|
+
} = {}) {
|
|
72
|
+
if (!sessions) {
|
|
73
|
+
throw new Error('createPtyReaper: sessions (SessionManager) is required');
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Per-session registry: sessionId → { ptyPid, descendants:Set<pid>,
|
|
77
|
+
// firstSeenAt, lastSeenAliveAt }. Refreshed each tick while the leader is
|
|
78
|
+
// alive so when it dies we still know which descendants to chase.
|
|
79
|
+
const registry = new Map();
|
|
80
|
+
let reapedHistory = [];
|
|
81
|
+
let tickCount = 0;
|
|
82
|
+
let lastTickAt = null;
|
|
83
|
+
let lastError = null;
|
|
84
|
+
let timer = null;
|
|
85
|
+
|
|
86
|
+
function isoNow() {
|
|
87
|
+
return new Date(now()).toISOString();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function recordReap(entry) {
|
|
91
|
+
reapedHistory.push(entry);
|
|
92
|
+
if (reapedHistory.length > RING_SIZE) {
|
|
93
|
+
reapedHistory = reapedHistory.slice(-RING_SIZE);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function bfsDescendants(rootPid, childrenByPpid) {
|
|
98
|
+
const out = new Set();
|
|
99
|
+
const stack = [rootPid];
|
|
100
|
+
const seen = new Set([rootPid]);
|
|
101
|
+
while (stack.length) {
|
|
102
|
+
const cur = stack.pop();
|
|
103
|
+
const kids = childrenByPpid.get(cur);
|
|
104
|
+
if (!kids) continue;
|
|
105
|
+
for (const kid of kids) {
|
|
106
|
+
if (seen.has(kid.pid)) continue;
|
|
107
|
+
seen.add(kid.pid);
|
|
108
|
+
out.add(kid.pid);
|
|
109
|
+
stack.push(kid.pid);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return out;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function iterSessions() {
|
|
116
|
+
// SessionManager.sessions is a Map<id, Session>; iterate the values
|
|
117
|
+
// directly so we get the live Session instances (not toJSON copies).
|
|
118
|
+
if (sessions.sessions && typeof sessions.sessions.values === 'function') {
|
|
119
|
+
return Array.from(sessions.sessions.values());
|
|
120
|
+
}
|
|
121
|
+
return [];
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function tick() {
|
|
125
|
+
tickCount += 1;
|
|
126
|
+
lastTickAt = isoNow();
|
|
127
|
+
|
|
128
|
+
let snapshot;
|
|
129
|
+
try {
|
|
130
|
+
snapshot = ps();
|
|
131
|
+
} catch (err) {
|
|
132
|
+
lastError = err && err.message ? err.message : String(err);
|
|
133
|
+
if (logger && logger.error) {
|
|
134
|
+
logger.error('[pty-reaper] ps() failed:', lastError);
|
|
135
|
+
}
|
|
136
|
+
return { reaped: 0, refreshed: 0, error: lastError };
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (!Array.isArray(snapshot)) snapshot = [];
|
|
140
|
+
const livePids = new Set();
|
|
141
|
+
const procByPid = new Map();
|
|
142
|
+
const childrenByPpid = new Map();
|
|
143
|
+
for (const proc of snapshot) {
|
|
144
|
+
if (!proc || !Number.isFinite(proc.pid)) continue;
|
|
145
|
+
livePids.add(proc.pid);
|
|
146
|
+
procByPid.set(proc.pid, proc);
|
|
147
|
+
const kids = childrenByPpid.get(proc.ppid);
|
|
148
|
+
if (kids) kids.push(proc);
|
|
149
|
+
else childrenByPpid.set(proc.ppid, [proc]);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
let refreshed = 0;
|
|
153
|
+
let reaped = 0;
|
|
154
|
+
const liveSessionIds = new Set();
|
|
155
|
+
|
|
156
|
+
// Pass 1: refresh registry for every known session whose leader is alive.
|
|
157
|
+
for (const session of iterSessions()) {
|
|
158
|
+
if (!session || !session.id) continue;
|
|
159
|
+
liveSessionIds.add(session.id);
|
|
160
|
+
const ptyPid = session.pid;
|
|
161
|
+
if (!Number.isFinite(ptyPid)) continue;
|
|
162
|
+
|
|
163
|
+
const leaderAlive = livePids.has(ptyPid);
|
|
164
|
+
const exited = session.meta && session.meta.status === 'exited';
|
|
165
|
+
|
|
166
|
+
if (leaderAlive && !exited) {
|
|
167
|
+
const descendants = bfsDescendants(ptyPid, childrenByPpid);
|
|
168
|
+
const existing = registry.get(session.id);
|
|
169
|
+
registry.set(session.id, {
|
|
170
|
+
ptyPid,
|
|
171
|
+
descendants,
|
|
172
|
+
firstSeenAt: existing ? existing.firstSeenAt : isoNow(),
|
|
173
|
+
lastSeenAliveAt: isoNow(),
|
|
174
|
+
});
|
|
175
|
+
refreshed += 1;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Pass 2: for each registry entry whose leader has died OR whose session
|
|
180
|
+
// has transitioned to 'exited' (or whose Session has been removed from
|
|
181
|
+
// the manager entirely), kill any descendants still alive and drop the
|
|
182
|
+
// entry. We rely on the descendant snapshot captured by the most recent
|
|
183
|
+
// refresh — once the leader is reaped we can't BFS from a dead pid.
|
|
184
|
+
for (const [sessionId, entry] of Array.from(registry.entries())) {
|
|
185
|
+
const session = sessions.get ? sessions.get(sessionId) : null;
|
|
186
|
+
const stillRegistered = liveSessionIds.has(sessionId);
|
|
187
|
+
const leaderAlive = livePids.has(entry.ptyPid);
|
|
188
|
+
const exited = session && session.meta && session.meta.status === 'exited';
|
|
189
|
+
|
|
190
|
+
if (stillRegistered && leaderAlive && !exited) continue;
|
|
191
|
+
|
|
192
|
+
const reason = !leaderAlive
|
|
193
|
+
? 'leader_dead'
|
|
194
|
+
: exited
|
|
195
|
+
? 'session_exited'
|
|
196
|
+
: 'session_removed';
|
|
197
|
+
|
|
198
|
+
for (const descPid of entry.descendants) {
|
|
199
|
+
if (!livePids.has(descPid)) continue;
|
|
200
|
+
const meta = procByPid.get(descPid) || { pid: descPid, ppid: null, command: '' };
|
|
201
|
+
try {
|
|
202
|
+
kill(descPid, 'SIGTERM');
|
|
203
|
+
recordReap({
|
|
204
|
+
ts: isoNow(),
|
|
205
|
+
sessionId,
|
|
206
|
+
ptyPid: entry.ptyPid,
|
|
207
|
+
pid: descPid,
|
|
208
|
+
ppid: meta.ppid,
|
|
209
|
+
command: (meta.command || '').slice(0, 200),
|
|
210
|
+
reason,
|
|
211
|
+
outcome: 'signaled',
|
|
212
|
+
});
|
|
213
|
+
reaped += 1;
|
|
214
|
+
} catch (err) {
|
|
215
|
+
// ESRCH = already dead; anything else we record but don't throw.
|
|
216
|
+
const code = err && err.code ? err.code : null;
|
|
217
|
+
recordReap({
|
|
218
|
+
ts: isoNow(),
|
|
219
|
+
sessionId,
|
|
220
|
+
ptyPid: entry.ptyPid,
|
|
221
|
+
pid: descPid,
|
|
222
|
+
ppid: meta.ppid,
|
|
223
|
+
command: (meta.command || '').slice(0, 200),
|
|
224
|
+
reason,
|
|
225
|
+
outcome: code === 'ESRCH' ? 'already_dead' : 'kill_failed',
|
|
226
|
+
error: err && err.message ? err.message : String(err),
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
registry.delete(sessionId);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return { reaped, refreshed, error: null };
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function start() {
|
|
237
|
+
if (timer) return;
|
|
238
|
+
timer = setInterval(() => {
|
|
239
|
+
try {
|
|
240
|
+
tick();
|
|
241
|
+
} catch (err) {
|
|
242
|
+
lastError = err && err.message ? err.message : String(err);
|
|
243
|
+
if (logger && logger.error) {
|
|
244
|
+
logger.error('[pty-reaper] tick() threw:', lastError);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}, intervalMs);
|
|
248
|
+
if (typeof timer.unref === 'function') timer.unref();
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function stop() {
|
|
252
|
+
if (timer) {
|
|
253
|
+
clearInterval(timer);
|
|
254
|
+
timer = null;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function status() {
|
|
259
|
+
const registrySnapshot = [];
|
|
260
|
+
for (const [sessionId, entry] of registry) {
|
|
261
|
+
registrySnapshot.push({
|
|
262
|
+
sessionId,
|
|
263
|
+
ptyPid: entry.ptyPid,
|
|
264
|
+
descendantPids: Array.from(entry.descendants),
|
|
265
|
+
firstSeenAt: entry.firstSeenAt,
|
|
266
|
+
lastSeenAliveAt: entry.lastSeenAliveAt,
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
return {
|
|
270
|
+
tickCount,
|
|
271
|
+
lastTickAt,
|
|
272
|
+
intervalMs,
|
|
273
|
+
lastError,
|
|
274
|
+
registry: registrySnapshot,
|
|
275
|
+
reapedCount: reapedHistory.length,
|
|
276
|
+
reapedHistory: reapedHistory.slice(),
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function _resetForTest() {
|
|
281
|
+
stop();
|
|
282
|
+
registry.clear();
|
|
283
|
+
reapedHistory = [];
|
|
284
|
+
tickCount = 0;
|
|
285
|
+
lastTickAt = null;
|
|
286
|
+
lastError = null;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return { start, stop, tick, status, _resetForTest };
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
module.exports = {
|
|
293
|
+
createPtyReaper,
|
|
294
|
+
parsePsOutput,
|
|
295
|
+
RING_SIZE,
|
|
296
|
+
DEFAULT_INTERVAL_MS,
|
|
297
|
+
};
|
|
@@ -10,6 +10,7 @@ module.exports = {
|
|
|
10
10
|
yaml: require('./yaml-io'),
|
|
11
11
|
supabaseUrl: require('./supabase-url'),
|
|
12
12
|
migrations: require('./migrations'),
|
|
13
|
+
migrationTemplating: require('./migration-templating'),
|
|
13
14
|
pgRunner: require('./pg-runner'),
|
|
14
15
|
migrationRunner: require('./migration-runner'),
|
|
15
16
|
preconditions: require('./preconditions')
|