agentflow-core 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-VF4FSBXR.js → chunk-BOSYI5YM.js} +322 -268
- package/dist/cli.cjs +326 -270
- package/dist/cli.js +1 -1
- package/dist/index.cjs +324 -270
- package/dist/index.js +1 -1
- package/package.json +1 -1
|
@@ -206,9 +206,265 @@ function getTraceTree(trace) {
|
|
|
206
206
|
return result;
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
+
// src/process-audit.ts
|
|
210
|
+
import { execSync } from "child_process";
|
|
211
|
+
import { existsSync, readdirSync, readFileSync, statSync } from "fs";
|
|
212
|
+
import { basename, join } from "path";
|
|
213
|
+
function isPidAlive(pid) {
|
|
214
|
+
try {
|
|
215
|
+
process.kill(pid, 0);
|
|
216
|
+
return true;
|
|
217
|
+
} catch {
|
|
218
|
+
return false;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
function pidMatchesName(pid, name) {
|
|
222
|
+
try {
|
|
223
|
+
const cmdline = readFileSync(`/proc/${pid}/cmdline`, "utf8");
|
|
224
|
+
return cmdline.includes(name);
|
|
225
|
+
} catch {
|
|
226
|
+
return false;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
function readPidFile(path) {
|
|
230
|
+
try {
|
|
231
|
+
const pid = parseInt(readFileSync(path, "utf8").trim(), 10);
|
|
232
|
+
return isNaN(pid) ? null : pid;
|
|
233
|
+
} catch {
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
function auditPidFile(config) {
|
|
238
|
+
if (!config.pidFile) return null;
|
|
239
|
+
const pid = readPidFile(config.pidFile);
|
|
240
|
+
if (pid === null) {
|
|
241
|
+
return {
|
|
242
|
+
path: config.pidFile,
|
|
243
|
+
pid: null,
|
|
244
|
+
alive: false,
|
|
245
|
+
matchesProcess: false,
|
|
246
|
+
stale: !existsSync(config.pidFile),
|
|
247
|
+
reason: existsSync(config.pidFile) ? "PID file exists but content is invalid" : "No PID file found"
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
const alive = isPidAlive(pid);
|
|
251
|
+
const matchesProcess = alive ? pidMatchesName(pid, config.processName) : false;
|
|
252
|
+
const stale = !alive || alive && !matchesProcess;
|
|
253
|
+
let reason;
|
|
254
|
+
if (alive && matchesProcess) {
|
|
255
|
+
reason = `PID ${pid} alive and matches ${config.processName}`;
|
|
256
|
+
} else if (alive && !matchesProcess) {
|
|
257
|
+
reason = `PID ${pid} alive but is NOT ${config.processName} (PID reused by another process)`;
|
|
258
|
+
} else {
|
|
259
|
+
reason = `PID ${pid} no longer exists`;
|
|
260
|
+
}
|
|
261
|
+
return { path: config.pidFile, pid, alive, matchesProcess, stale, reason };
|
|
262
|
+
}
|
|
263
|
+
function auditSystemd(config) {
|
|
264
|
+
if (config.systemdUnit === null || config.systemdUnit === void 0) return null;
|
|
265
|
+
const unit = config.systemdUnit;
|
|
266
|
+
try {
|
|
267
|
+
const raw = execSync(
|
|
268
|
+
`systemctl --user show ${unit} --property=ActiveState,SubState,MainPID,NRestarts,Result --no-pager 2>/dev/null`,
|
|
269
|
+
{ encoding: "utf8", timeout: 5e3 }
|
|
270
|
+
);
|
|
271
|
+
const props = {};
|
|
272
|
+
for (const line of raw.trim().split("\n")) {
|
|
273
|
+
const [k, ...v] = line.split("=");
|
|
274
|
+
if (k) props[k.trim()] = v.join("=").trim();
|
|
275
|
+
}
|
|
276
|
+
const activeState = props["ActiveState"] ?? "unknown";
|
|
277
|
+
const subState = props["SubState"] ?? "unknown";
|
|
278
|
+
const mainPid = parseInt(props["MainPID"] ?? "0", 10);
|
|
279
|
+
const restarts = parseInt(props["NRestarts"] ?? "0", 10);
|
|
280
|
+
const result = props["Result"] ?? "unknown";
|
|
281
|
+
return {
|
|
282
|
+
unit,
|
|
283
|
+
activeState,
|
|
284
|
+
subState,
|
|
285
|
+
mainPid,
|
|
286
|
+
restarts,
|
|
287
|
+
result,
|
|
288
|
+
crashLooping: activeState === "activating" && subState === "auto-restart",
|
|
289
|
+
failed: activeState === "failed"
|
|
290
|
+
};
|
|
291
|
+
} catch {
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
function auditWorkers(config) {
|
|
296
|
+
if (!config.workersFile || !existsSync(config.workersFile)) return null;
|
|
297
|
+
try {
|
|
298
|
+
const data = JSON.parse(readFileSync(config.workersFile, "utf8"));
|
|
299
|
+
const orchPid = data.pid ?? null;
|
|
300
|
+
const orchAlive = orchPid ? isPidAlive(orchPid) : false;
|
|
301
|
+
const workers = [];
|
|
302
|
+
for (const [name, info] of Object.entries(data.tools ?? {})) {
|
|
303
|
+
const w = info;
|
|
304
|
+
const wPid = w.pid ?? null;
|
|
305
|
+
const wAlive = wPid ? isPidAlive(wPid) : false;
|
|
306
|
+
workers.push({
|
|
307
|
+
name,
|
|
308
|
+
pid: wPid,
|
|
309
|
+
declaredStatus: w.status ?? "unknown",
|
|
310
|
+
alive: wAlive,
|
|
311
|
+
stale: w.status === "running" && !wAlive
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
return {
|
|
315
|
+
orchestratorPid: orchPid,
|
|
316
|
+
orchestratorAlive: orchAlive,
|
|
317
|
+
startedAt: data.started_at ?? "",
|
|
318
|
+
workers
|
|
319
|
+
};
|
|
320
|
+
} catch {
|
|
321
|
+
return null;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
function getOsProcesses(processName) {
|
|
325
|
+
try {
|
|
326
|
+
const raw = execSync(`ps aux`, { encoding: "utf8", timeout: 5e3 });
|
|
327
|
+
return raw.split("\n").filter((line) => line.includes(processName) && !line.includes("process-audit") && !line.includes("grep")).map((line) => {
|
|
328
|
+
const parts = line.trim().split(/\s+/);
|
|
329
|
+
return {
|
|
330
|
+
pid: parseInt(parts[1] ?? "0", 10),
|
|
331
|
+
cpu: parts[2] ?? "0",
|
|
332
|
+
mem: parts[3] ?? "0",
|
|
333
|
+
command: parts.slice(10).join(" ")
|
|
334
|
+
};
|
|
335
|
+
}).filter((p) => !isNaN(p.pid) && p.pid > 0);
|
|
336
|
+
} catch {
|
|
337
|
+
return [];
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
function discoverProcessConfig(dirs) {
|
|
341
|
+
let pidFile;
|
|
342
|
+
let workersFile;
|
|
343
|
+
let processName = "";
|
|
344
|
+
for (const dir of dirs) {
|
|
345
|
+
if (!existsSync(dir)) continue;
|
|
346
|
+
let entries;
|
|
347
|
+
try {
|
|
348
|
+
entries = readdirSync(dir);
|
|
349
|
+
} catch {
|
|
350
|
+
continue;
|
|
351
|
+
}
|
|
352
|
+
for (const f of entries) {
|
|
353
|
+
const fp = join(dir, f);
|
|
354
|
+
try {
|
|
355
|
+
if (!statSync(fp).isFile()) continue;
|
|
356
|
+
} catch {
|
|
357
|
+
continue;
|
|
358
|
+
}
|
|
359
|
+
if (f.endsWith(".pid") && !pidFile) {
|
|
360
|
+
pidFile = fp;
|
|
361
|
+
if (!processName) {
|
|
362
|
+
processName = basename(f, ".pid");
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
if ((f === "workers.json" || f.endsWith("-workers.json")) && !workersFile) {
|
|
366
|
+
workersFile = fp;
|
|
367
|
+
if (!processName && f !== "workers.json") {
|
|
368
|
+
processName = basename(f, "-workers.json");
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
if (!processName && !pidFile && !workersFile) return null;
|
|
374
|
+
if (!processName) processName = "agent";
|
|
375
|
+
return { processName, pidFile, workersFile };
|
|
376
|
+
}
|
|
377
|
+
function auditProcesses(config) {
|
|
378
|
+
const pidFile = auditPidFile(config);
|
|
379
|
+
const systemd = auditSystemd(config);
|
|
380
|
+
const workers = auditWorkers(config);
|
|
381
|
+
const osProcesses = getOsProcesses(config.processName);
|
|
382
|
+
const knownPids = /* @__PURE__ */ new Set();
|
|
383
|
+
if (pidFile?.pid && !pidFile.stale) knownPids.add(pidFile.pid);
|
|
384
|
+
if (workers) {
|
|
385
|
+
if (workers.orchestratorPid) knownPids.add(workers.orchestratorPid);
|
|
386
|
+
for (const w of workers.workers) {
|
|
387
|
+
if (w.pid) knownPids.add(w.pid);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
if (systemd?.mainPid) knownPids.add(systemd.mainPid);
|
|
391
|
+
const orphans = osProcesses.filter((p) => !knownPids.has(p.pid));
|
|
392
|
+
const problems = [];
|
|
393
|
+
if (pidFile?.stale) problems.push(`Stale PID file: ${pidFile.reason}`);
|
|
394
|
+
if (systemd?.crashLooping) problems.push("Systemd unit is crash-looping (auto-restart)");
|
|
395
|
+
if (systemd?.failed) problems.push("Systemd unit has failed");
|
|
396
|
+
if (systemd && systemd.restarts > 10) problems.push(`High systemd restart count: ${systemd.restarts}`);
|
|
397
|
+
if (pidFile?.pid && systemd?.mainPid && pidFile.pid !== systemd.mainPid) {
|
|
398
|
+
problems.push(`PID mismatch: file says ${pidFile.pid}, systemd says ${systemd.mainPid}`);
|
|
399
|
+
}
|
|
400
|
+
if (workers) {
|
|
401
|
+
for (const w of workers.workers) {
|
|
402
|
+
if (w.stale) problems.push(`Worker "${w.name}" (pid ${w.pid}) declares running but is dead`);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
if (orphans.length > 0) problems.push(`${orphans.length} orphan process(es) not tracked by PID file or workers registry`);
|
|
406
|
+
return { pidFile, systemd, workers, osProcesses, orphans, problems };
|
|
407
|
+
}
|
|
408
|
+
function formatAuditReport(result) {
|
|
409
|
+
const lines = [];
|
|
410
|
+
lines.push("");
|
|
411
|
+
lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
412
|
+
lines.push("\u2551 \u{1F50D} P R O C E S S A U D I T \u2551");
|
|
413
|
+
lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
414
|
+
if (result.pidFile) {
|
|
415
|
+
const pf = result.pidFile;
|
|
416
|
+
const icon = pf.pid && pf.alive && pf.matchesProcess ? "\u2705" : pf.stale ? "\u26A0\uFE0F " : "\u2139\uFE0F ";
|
|
417
|
+
lines.push(`
|
|
418
|
+
PID File: ${pf.path}`);
|
|
419
|
+
lines.push(` ${icon} ${pf.reason}`);
|
|
420
|
+
}
|
|
421
|
+
if (result.systemd) {
|
|
422
|
+
const sd = result.systemd;
|
|
423
|
+
const icon = sd.activeState === "active" ? "\u{1F7E2}" : sd.crashLooping ? "\u{1F7E1}" : sd.failed ? "\u{1F534}" : "\u26AA";
|
|
424
|
+
lines.push(`
|
|
425
|
+
Systemd: ${sd.unit}`);
|
|
426
|
+
lines.push(` ${icon} State: ${sd.activeState} (${sd.subState}) Result: ${sd.result}`);
|
|
427
|
+
lines.push(` Main PID: ${sd.mainPid || "none"} Restarts: ${sd.restarts}`);
|
|
428
|
+
}
|
|
429
|
+
if (result.workers) {
|
|
430
|
+
const w = result.workers;
|
|
431
|
+
lines.push(`
|
|
432
|
+
Workers (orchestrator pid ${w.orchestratorPid ?? "unknown"} ${w.orchestratorAlive ? "\u2705" : "\u274C"})`);
|
|
433
|
+
for (const worker of w.workers) {
|
|
434
|
+
const icon = worker.declaredStatus === "running" && worker.alive ? "\u{1F7E2}" : worker.stale ? "\u{1F534} STALE" : "\u26AA";
|
|
435
|
+
lines.push(` ${icon} ${worker.name.padEnd(14)} pid=${String(worker.pid ?? "-").padEnd(8)} status=${worker.declaredStatus}`);
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
if (result.osProcesses.length > 0) {
|
|
439
|
+
lines.push(`
|
|
440
|
+
OS Processes (${result.osProcesses.length} total)`);
|
|
441
|
+
for (const p of result.osProcesses) {
|
|
442
|
+
lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} ${p.command.substring(0, 55)}`);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
if (result.orphans.length > 0) {
|
|
446
|
+
lines.push(`
|
|
447
|
+
\u26A0\uFE0F ${result.orphans.length} ORPHAN PROCESS(ES):`);
|
|
448
|
+
for (const p of result.orphans) {
|
|
449
|
+
lines.push(` PID ${p.pid} \u2014 not tracked by PID file or workers registry`);
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
lines.push("");
|
|
453
|
+
if (result.problems.length === 0) {
|
|
454
|
+
lines.push(" \u2705 All checks passed \u2014 no process issues detected.");
|
|
455
|
+
} else {
|
|
456
|
+
lines.push(` \u26A0\uFE0F ${result.problems.length} issue(s):`);
|
|
457
|
+
for (const p of result.problems) {
|
|
458
|
+
lines.push(` \u2022 ${p}`);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
lines.push("");
|
|
462
|
+
return lines.join("\n");
|
|
463
|
+
}
|
|
464
|
+
|
|
209
465
|
// src/live.ts
|
|
210
|
-
import { existsSync, readdirSync, readFileSync, statSync, watch } from "fs";
|
|
211
|
-
import { basename, join, resolve } from "path";
|
|
466
|
+
import { existsSync as existsSync2, readdirSync as readdirSync2, readFileSync as readFileSync2, statSync as statSync2, watch } from "fs";
|
|
467
|
+
import { basename as basename2, join as join2, resolve } from "path";
|
|
212
468
|
var C = {
|
|
213
469
|
reset: "\x1B[0m",
|
|
214
470
|
bold: "\x1B[1m",
|
|
@@ -283,7 +539,7 @@ function scanFiles(dirs, recursive) {
|
|
|
283
539
|
const seen = /* @__PURE__ */ new Set();
|
|
284
540
|
function scanDir(d, topLevel) {
|
|
285
541
|
try {
|
|
286
|
-
const dirStat =
|
|
542
|
+
const dirStat = statSync2(d);
|
|
287
543
|
const dirMtime = dirStat.mtime.getTime();
|
|
288
544
|
const cachedMtime = dirMtimeCache.get(d);
|
|
289
545
|
if (cachedMtime === dirMtime) {
|
|
@@ -299,13 +555,13 @@ function scanFiles(dirs, recursive) {
|
|
|
299
555
|
}
|
|
300
556
|
}
|
|
301
557
|
const dirResults = [];
|
|
302
|
-
for (const f of
|
|
558
|
+
for (const f of readdirSync2(d)) {
|
|
303
559
|
if (f.startsWith(".")) continue;
|
|
304
|
-
const fp =
|
|
560
|
+
const fp = join2(d, f);
|
|
305
561
|
if (seen.has(fp)) continue;
|
|
306
562
|
let stat;
|
|
307
563
|
try {
|
|
308
|
-
stat =
|
|
564
|
+
stat = statSync2(fp);
|
|
309
565
|
} catch {
|
|
310
566
|
continue;
|
|
311
567
|
}
|
|
@@ -337,13 +593,13 @@ function scanFiles(dirs, recursive) {
|
|
|
337
593
|
}
|
|
338
594
|
function safeReadJson(fp) {
|
|
339
595
|
try {
|
|
340
|
-
return JSON.parse(
|
|
596
|
+
return JSON.parse(readFileSync2(fp, "utf8"));
|
|
341
597
|
} catch {
|
|
342
598
|
return null;
|
|
343
599
|
}
|
|
344
600
|
}
|
|
345
601
|
function nameFromFile(filename) {
|
|
346
|
-
return
|
|
602
|
+
return basename2(filename).replace(/\.(json|jsonl)$/, "").replace(/-state$/, "");
|
|
347
603
|
}
|
|
348
604
|
function normalizeStatus(val) {
|
|
349
605
|
if (typeof val !== "string") return "unknown";
|
|
@@ -521,7 +777,7 @@ function processJsonFile(file) {
|
|
|
521
777
|
}
|
|
522
778
|
function processJsonlFile(file) {
|
|
523
779
|
try {
|
|
524
|
-
const content =
|
|
780
|
+
const content = readFileSync2(file.path, "utf8").trim();
|
|
525
781
|
if (!content) return [];
|
|
526
782
|
const lines = content.split("\n");
|
|
527
783
|
const lineCount = lines.length;
|
|
@@ -673,6 +929,9 @@ var prevFileCount = 0;
|
|
|
673
929
|
var newExecCount = 0;
|
|
674
930
|
var sessionStart = Date.now();
|
|
675
931
|
var firstRender = true;
|
|
932
|
+
var cachedAuditConfig = null;
|
|
933
|
+
var cachedAuditResult = null;
|
|
934
|
+
var lastAuditTime = 0;
|
|
676
935
|
var fileCache = /* @__PURE__ */ new Map();
|
|
677
936
|
function getRecordsCached(f) {
|
|
678
937
|
const cached = fileCache.get(f.path);
|
|
@@ -792,6 +1051,22 @@ function render(config) {
|
|
|
792
1051
|
const level = Math.round(v / maxBucket * 8);
|
|
793
1052
|
return (failBuckets[i] > 0 ? C.red : C.green) + sparkChars[level] + C.reset;
|
|
794
1053
|
}).join("");
|
|
1054
|
+
let auditResult = null;
|
|
1055
|
+
if (now - lastAuditTime > 1e4) {
|
|
1056
|
+
if (!cachedAuditConfig) {
|
|
1057
|
+
cachedAuditConfig = discoverProcessConfig(config.dirs);
|
|
1058
|
+
}
|
|
1059
|
+
if (cachedAuditConfig) {
|
|
1060
|
+
try {
|
|
1061
|
+
auditResult = auditProcesses(cachedAuditConfig);
|
|
1062
|
+
cachedAuditResult = auditResult;
|
|
1063
|
+
lastAuditTime = now;
|
|
1064
|
+
} catch {
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
} else {
|
|
1068
|
+
auditResult = cachedAuditResult;
|
|
1069
|
+
}
|
|
795
1070
|
const distributedTraces = [];
|
|
796
1071
|
if (allTraces.length > 1) {
|
|
797
1072
|
const traceGroups = groupByTraceId(allTraces);
|
|
@@ -872,6 +1147,41 @@ function render(config) {
|
|
|
872
1147
|
);
|
|
873
1148
|
writeLine(L, "");
|
|
874
1149
|
writeLine(L, ` ${C.bold}Activity (1h)${C.reset} ${spark} ${C.dim}\u2190 now${C.reset}`);
|
|
1150
|
+
if (auditResult) {
|
|
1151
|
+
const ar = auditResult;
|
|
1152
|
+
const healthy = ar.problems.length === 0;
|
|
1153
|
+
const healthIcon = healthy ? `${C.green}\u25CF${C.reset}` : `${C.red}\u25CF${C.reset}`;
|
|
1154
|
+
const healthLabel = healthy ? `${C.green}healthy${C.reset}` : `${C.red}${ar.problems.length} issue(s)${C.reset}`;
|
|
1155
|
+
const workerParts = [];
|
|
1156
|
+
if (ar.workers) {
|
|
1157
|
+
for (const w of ar.workers.workers) {
|
|
1158
|
+
const wIcon = w.declaredStatus === "running" && w.alive ? `${C.green}\u25CF${C.reset}` : w.stale ? `${C.red}\u25CF${C.reset}` : `${C.dim}\u25CB${C.reset}`;
|
|
1159
|
+
workerParts.push(`${wIcon} ${w.name}`);
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
let sysdLabel = "";
|
|
1163
|
+
if (ar.systemd) {
|
|
1164
|
+
const si = ar.systemd.activeState === "active" ? `${C.green}\u25CF${C.reset}` : ar.systemd.crashLooping ? `${C.yellow}\u25CF${C.reset}` : ar.systemd.failed ? `${C.red}\u25CF${C.reset}` : `${C.dim}\u25CB${C.reset}`;
|
|
1165
|
+
sysdLabel = ` ${C.bold}Systemd${C.reset} ${si} ${ar.systemd.activeState}`;
|
|
1166
|
+
if (ar.systemd.restarts > 0) sysdLabel += ` ${C.dim}(${ar.systemd.restarts} restarts)${C.reset}`;
|
|
1167
|
+
}
|
|
1168
|
+
let pidLabel = "";
|
|
1169
|
+
if (ar.pidFile?.pid) {
|
|
1170
|
+
const pi = ar.pidFile.alive && ar.pidFile.matchesProcess ? `${C.green}\u25CF${C.reset}` : `${C.red}\u25CF${C.reset}`;
|
|
1171
|
+
pidLabel = ` ${C.bold}PID${C.reset} ${pi} ${ar.pidFile.pid}`;
|
|
1172
|
+
}
|
|
1173
|
+
writeLine(L, "");
|
|
1174
|
+
writeLine(L, ` ${C.bold}${C.under}Process Health${C.reset}`);
|
|
1175
|
+
writeLine(L, ` ${healthIcon} ${healthLabel}${pidLabel}${sysdLabel} ${C.bold}Procs${C.reset} ${C.dim}${ar.osProcesses.length}${C.reset} ${ar.orphans.length > 0 ? `${C.red}Orphans ${ar.orphans.length}${C.reset}` : `${C.dim}Orphans 0${C.reset}`}`);
|
|
1176
|
+
if (workerParts.length > 0) {
|
|
1177
|
+
writeLine(L, ` ${C.dim}Workers${C.reset} ${workerParts.join(" ")}`);
|
|
1178
|
+
}
|
|
1179
|
+
if (!healthy) {
|
|
1180
|
+
for (const p of ar.problems.slice(0, 3)) {
|
|
1181
|
+
writeLine(L, ` ${C.red}\u2022${C.reset} ${C.dim}${p}${C.reset}`);
|
|
1182
|
+
}
|
|
1183
|
+
}
|
|
1184
|
+
}
|
|
875
1185
|
writeLine(L, "");
|
|
876
1186
|
writeLine(
|
|
877
1187
|
L,
|
|
@@ -990,13 +1300,13 @@ function getDistDepth(dt, spanId, visited) {
|
|
|
990
1300
|
}
|
|
991
1301
|
function startLive(argv) {
|
|
992
1302
|
const config = parseArgs(argv);
|
|
993
|
-
const valid = config.dirs.filter((d) =>
|
|
1303
|
+
const valid = config.dirs.filter((d) => existsSync2(d));
|
|
994
1304
|
if (valid.length === 0) {
|
|
995
1305
|
console.error(`No valid directories found: ${config.dirs.join(", ")}`);
|
|
996
1306
|
console.error("Specify directories containing JSON/JSONL files: agentflow live <dir> [dir...]");
|
|
997
1307
|
process.exit(1);
|
|
998
1308
|
}
|
|
999
|
-
const invalid = config.dirs.filter((d) => !
|
|
1309
|
+
const invalid = config.dirs.filter((d) => !existsSync2(d));
|
|
1000
1310
|
if (invalid.length > 0) {
|
|
1001
1311
|
console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
|
|
1002
1312
|
}
|
|
@@ -1019,262 +1329,6 @@ function startLive(argv) {
|
|
|
1019
1329
|
});
|
|
1020
1330
|
}
|
|
1021
1331
|
|
|
1022
|
-
// src/process-audit.ts
|
|
1023
|
-
import { execSync } from "child_process";
|
|
1024
|
-
import { existsSync as existsSync2, readdirSync as readdirSync2, readFileSync as readFileSync2, statSync as statSync2 } from "fs";
|
|
1025
|
-
import { basename as basename2, join as join2 } from "path";
|
|
1026
|
-
function isPidAlive(pid) {
|
|
1027
|
-
try {
|
|
1028
|
-
process.kill(pid, 0);
|
|
1029
|
-
return true;
|
|
1030
|
-
} catch {
|
|
1031
|
-
return false;
|
|
1032
|
-
}
|
|
1033
|
-
}
|
|
1034
|
-
function pidMatchesName(pid, name) {
|
|
1035
|
-
try {
|
|
1036
|
-
const cmdline = readFileSync2(`/proc/${pid}/cmdline`, "utf8");
|
|
1037
|
-
return cmdline.includes(name);
|
|
1038
|
-
} catch {
|
|
1039
|
-
return false;
|
|
1040
|
-
}
|
|
1041
|
-
}
|
|
1042
|
-
function readPidFile(path) {
|
|
1043
|
-
try {
|
|
1044
|
-
const pid = parseInt(readFileSync2(path, "utf8").trim(), 10);
|
|
1045
|
-
return isNaN(pid) ? null : pid;
|
|
1046
|
-
} catch {
|
|
1047
|
-
return null;
|
|
1048
|
-
}
|
|
1049
|
-
}
|
|
1050
|
-
function auditPidFile(config) {
|
|
1051
|
-
if (!config.pidFile) return null;
|
|
1052
|
-
const pid = readPidFile(config.pidFile);
|
|
1053
|
-
if (pid === null) {
|
|
1054
|
-
return {
|
|
1055
|
-
path: config.pidFile,
|
|
1056
|
-
pid: null,
|
|
1057
|
-
alive: false,
|
|
1058
|
-
matchesProcess: false,
|
|
1059
|
-
stale: !existsSync2(config.pidFile),
|
|
1060
|
-
reason: existsSync2(config.pidFile) ? "PID file exists but content is invalid" : "No PID file found"
|
|
1061
|
-
};
|
|
1062
|
-
}
|
|
1063
|
-
const alive = isPidAlive(pid);
|
|
1064
|
-
const matchesProcess = alive ? pidMatchesName(pid, config.processName) : false;
|
|
1065
|
-
const stale = !alive || alive && !matchesProcess;
|
|
1066
|
-
let reason;
|
|
1067
|
-
if (alive && matchesProcess) {
|
|
1068
|
-
reason = `PID ${pid} alive and matches ${config.processName}`;
|
|
1069
|
-
} else if (alive && !matchesProcess) {
|
|
1070
|
-
reason = `PID ${pid} alive but is NOT ${config.processName} (PID reused by another process)`;
|
|
1071
|
-
} else {
|
|
1072
|
-
reason = `PID ${pid} no longer exists`;
|
|
1073
|
-
}
|
|
1074
|
-
return { path: config.pidFile, pid, alive, matchesProcess, stale, reason };
|
|
1075
|
-
}
|
|
1076
|
-
function auditSystemd(config) {
|
|
1077
|
-
if (config.systemdUnit === null || config.systemdUnit === void 0) return null;
|
|
1078
|
-
const unit = config.systemdUnit;
|
|
1079
|
-
try {
|
|
1080
|
-
const raw = execSync(
|
|
1081
|
-
`systemctl --user show ${unit} --property=ActiveState,SubState,MainPID,NRestarts,Result --no-pager 2>/dev/null`,
|
|
1082
|
-
{ encoding: "utf8", timeout: 5e3 }
|
|
1083
|
-
);
|
|
1084
|
-
const props = {};
|
|
1085
|
-
for (const line of raw.trim().split("\n")) {
|
|
1086
|
-
const [k, ...v] = line.split("=");
|
|
1087
|
-
if (k) props[k.trim()] = v.join("=").trim();
|
|
1088
|
-
}
|
|
1089
|
-
const activeState = props["ActiveState"] ?? "unknown";
|
|
1090
|
-
const subState = props["SubState"] ?? "unknown";
|
|
1091
|
-
const mainPid = parseInt(props["MainPID"] ?? "0", 10);
|
|
1092
|
-
const restarts = parseInt(props["NRestarts"] ?? "0", 10);
|
|
1093
|
-
const result = props["Result"] ?? "unknown";
|
|
1094
|
-
return {
|
|
1095
|
-
unit,
|
|
1096
|
-
activeState,
|
|
1097
|
-
subState,
|
|
1098
|
-
mainPid,
|
|
1099
|
-
restarts,
|
|
1100
|
-
result,
|
|
1101
|
-
crashLooping: activeState === "activating" && subState === "auto-restart",
|
|
1102
|
-
failed: activeState === "failed"
|
|
1103
|
-
};
|
|
1104
|
-
} catch {
|
|
1105
|
-
return null;
|
|
1106
|
-
}
|
|
1107
|
-
}
|
|
1108
|
-
function auditWorkers(config) {
|
|
1109
|
-
if (!config.workersFile || !existsSync2(config.workersFile)) return null;
|
|
1110
|
-
try {
|
|
1111
|
-
const data = JSON.parse(readFileSync2(config.workersFile, "utf8"));
|
|
1112
|
-
const orchPid = data.pid ?? null;
|
|
1113
|
-
const orchAlive = orchPid ? isPidAlive(orchPid) : false;
|
|
1114
|
-
const workers = [];
|
|
1115
|
-
for (const [name, info] of Object.entries(data.tools ?? {})) {
|
|
1116
|
-
const w = info;
|
|
1117
|
-
const wPid = w.pid ?? null;
|
|
1118
|
-
const wAlive = wPid ? isPidAlive(wPid) : false;
|
|
1119
|
-
workers.push({
|
|
1120
|
-
name,
|
|
1121
|
-
pid: wPid,
|
|
1122
|
-
declaredStatus: w.status ?? "unknown",
|
|
1123
|
-
alive: wAlive,
|
|
1124
|
-
stale: w.status === "running" && !wAlive
|
|
1125
|
-
});
|
|
1126
|
-
}
|
|
1127
|
-
return {
|
|
1128
|
-
orchestratorPid: orchPid,
|
|
1129
|
-
orchestratorAlive: orchAlive,
|
|
1130
|
-
startedAt: data.started_at ?? "",
|
|
1131
|
-
workers
|
|
1132
|
-
};
|
|
1133
|
-
} catch {
|
|
1134
|
-
return null;
|
|
1135
|
-
}
|
|
1136
|
-
}
|
|
1137
|
-
function getOsProcesses(processName) {
|
|
1138
|
-
try {
|
|
1139
|
-
const raw = execSync(`ps aux`, { encoding: "utf8", timeout: 5e3 });
|
|
1140
|
-
return raw.split("\n").filter((line) => line.includes(processName) && !line.includes("process-audit") && !line.includes("grep")).map((line) => {
|
|
1141
|
-
const parts = line.trim().split(/\s+/);
|
|
1142
|
-
return {
|
|
1143
|
-
pid: parseInt(parts[1] ?? "0", 10),
|
|
1144
|
-
cpu: parts[2] ?? "0",
|
|
1145
|
-
mem: parts[3] ?? "0",
|
|
1146
|
-
command: parts.slice(10).join(" ")
|
|
1147
|
-
};
|
|
1148
|
-
}).filter((p) => !isNaN(p.pid) && p.pid > 0);
|
|
1149
|
-
} catch {
|
|
1150
|
-
return [];
|
|
1151
|
-
}
|
|
1152
|
-
}
|
|
1153
|
-
function discoverProcessConfig(dirs) {
|
|
1154
|
-
let pidFile;
|
|
1155
|
-
let workersFile;
|
|
1156
|
-
let processName = "";
|
|
1157
|
-
for (const dir of dirs) {
|
|
1158
|
-
if (!existsSync2(dir)) continue;
|
|
1159
|
-
let entries;
|
|
1160
|
-
try {
|
|
1161
|
-
entries = readdirSync2(dir);
|
|
1162
|
-
} catch {
|
|
1163
|
-
continue;
|
|
1164
|
-
}
|
|
1165
|
-
for (const f of entries) {
|
|
1166
|
-
const fp = join2(dir, f);
|
|
1167
|
-
try {
|
|
1168
|
-
if (!statSync2(fp).isFile()) continue;
|
|
1169
|
-
} catch {
|
|
1170
|
-
continue;
|
|
1171
|
-
}
|
|
1172
|
-
if (f.endsWith(".pid") && !pidFile) {
|
|
1173
|
-
pidFile = fp;
|
|
1174
|
-
if (!processName) {
|
|
1175
|
-
processName = basename2(f, ".pid");
|
|
1176
|
-
}
|
|
1177
|
-
}
|
|
1178
|
-
if ((f === "workers.json" || f.endsWith("-workers.json")) && !workersFile) {
|
|
1179
|
-
workersFile = fp;
|
|
1180
|
-
if (!processName && f !== "workers.json") {
|
|
1181
|
-
processName = basename2(f, "-workers.json");
|
|
1182
|
-
}
|
|
1183
|
-
}
|
|
1184
|
-
}
|
|
1185
|
-
}
|
|
1186
|
-
if (!processName && !pidFile && !workersFile) return null;
|
|
1187
|
-
if (!processName) processName = "agent";
|
|
1188
|
-
return { processName, pidFile, workersFile };
|
|
1189
|
-
}
|
|
1190
|
-
function auditProcesses(config) {
|
|
1191
|
-
const pidFile = auditPidFile(config);
|
|
1192
|
-
const systemd = auditSystemd(config);
|
|
1193
|
-
const workers = auditWorkers(config);
|
|
1194
|
-
const osProcesses = getOsProcesses(config.processName);
|
|
1195
|
-
const knownPids = /* @__PURE__ */ new Set();
|
|
1196
|
-
if (pidFile?.pid && !pidFile.stale) knownPids.add(pidFile.pid);
|
|
1197
|
-
if (workers) {
|
|
1198
|
-
if (workers.orchestratorPid) knownPids.add(workers.orchestratorPid);
|
|
1199
|
-
for (const w of workers.workers) {
|
|
1200
|
-
if (w.pid) knownPids.add(w.pid);
|
|
1201
|
-
}
|
|
1202
|
-
}
|
|
1203
|
-
if (systemd?.mainPid) knownPids.add(systemd.mainPid);
|
|
1204
|
-
const orphans = osProcesses.filter((p) => !knownPids.has(p.pid));
|
|
1205
|
-
const problems = [];
|
|
1206
|
-
if (pidFile?.stale) problems.push(`Stale PID file: ${pidFile.reason}`);
|
|
1207
|
-
if (systemd?.crashLooping) problems.push("Systemd unit is crash-looping (auto-restart)");
|
|
1208
|
-
if (systemd?.failed) problems.push("Systemd unit has failed");
|
|
1209
|
-
if (systemd && systemd.restarts > 10) problems.push(`High systemd restart count: ${systemd.restarts}`);
|
|
1210
|
-
if (pidFile?.pid && systemd?.mainPid && pidFile.pid !== systemd.mainPid) {
|
|
1211
|
-
problems.push(`PID mismatch: file says ${pidFile.pid}, systemd says ${systemd.mainPid}`);
|
|
1212
|
-
}
|
|
1213
|
-
if (workers) {
|
|
1214
|
-
for (const w of workers.workers) {
|
|
1215
|
-
if (w.stale) problems.push(`Worker "${w.name}" (pid ${w.pid}) declares running but is dead`);
|
|
1216
|
-
}
|
|
1217
|
-
}
|
|
1218
|
-
if (orphans.length > 0) problems.push(`${orphans.length} orphan process(es) not tracked by PID file or workers registry`);
|
|
1219
|
-
return { pidFile, systemd, workers, osProcesses, orphans, problems };
|
|
1220
|
-
}
|
|
1221
|
-
function formatAuditReport(result) {
|
|
1222
|
-
const lines = [];
|
|
1223
|
-
lines.push("");
|
|
1224
|
-
lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
1225
|
-
lines.push("\u2551 \u{1F50D} P R O C E S S A U D I T \u2551");
|
|
1226
|
-
lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
1227
|
-
if (result.pidFile) {
|
|
1228
|
-
const pf = result.pidFile;
|
|
1229
|
-
const icon = pf.pid && pf.alive && pf.matchesProcess ? "\u2705" : pf.stale ? "\u26A0\uFE0F " : "\u2139\uFE0F ";
|
|
1230
|
-
lines.push(`
|
|
1231
|
-
PID File: ${pf.path}`);
|
|
1232
|
-
lines.push(` ${icon} ${pf.reason}`);
|
|
1233
|
-
}
|
|
1234
|
-
if (result.systemd) {
|
|
1235
|
-
const sd = result.systemd;
|
|
1236
|
-
const icon = sd.activeState === "active" ? "\u{1F7E2}" : sd.crashLooping ? "\u{1F7E1}" : sd.failed ? "\u{1F534}" : "\u26AA";
|
|
1237
|
-
lines.push(`
|
|
1238
|
-
Systemd: ${sd.unit}`);
|
|
1239
|
-
lines.push(` ${icon} State: ${sd.activeState} (${sd.subState}) Result: ${sd.result}`);
|
|
1240
|
-
lines.push(` Main PID: ${sd.mainPid || "none"} Restarts: ${sd.restarts}`);
|
|
1241
|
-
}
|
|
1242
|
-
if (result.workers) {
|
|
1243
|
-
const w = result.workers;
|
|
1244
|
-
lines.push(`
|
|
1245
|
-
Workers (orchestrator pid ${w.orchestratorPid ?? "unknown"} ${w.orchestratorAlive ? "\u2705" : "\u274C"})`);
|
|
1246
|
-
for (const worker of w.workers) {
|
|
1247
|
-
const icon = worker.declaredStatus === "running" && worker.alive ? "\u{1F7E2}" : worker.stale ? "\u{1F534} STALE" : "\u26AA";
|
|
1248
|
-
lines.push(` ${icon} ${worker.name.padEnd(14)} pid=${String(worker.pid ?? "-").padEnd(8)} status=${worker.declaredStatus}`);
|
|
1249
|
-
}
|
|
1250
|
-
}
|
|
1251
|
-
if (result.osProcesses.length > 0) {
|
|
1252
|
-
lines.push(`
|
|
1253
|
-
OS Processes (${result.osProcesses.length} total)`);
|
|
1254
|
-
for (const p of result.osProcesses) {
|
|
1255
|
-
lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} ${p.command.substring(0, 55)}`);
|
|
1256
|
-
}
|
|
1257
|
-
}
|
|
1258
|
-
if (result.orphans.length > 0) {
|
|
1259
|
-
lines.push(`
|
|
1260
|
-
\u26A0\uFE0F ${result.orphans.length} ORPHAN PROCESS(ES):`);
|
|
1261
|
-
for (const p of result.orphans) {
|
|
1262
|
-
lines.push(` PID ${p.pid} \u2014 not tracked by PID file or workers registry`);
|
|
1263
|
-
}
|
|
1264
|
-
}
|
|
1265
|
-
lines.push("");
|
|
1266
|
-
if (result.problems.length === 0) {
|
|
1267
|
-
lines.push(" \u2705 All checks passed \u2014 no process issues detected.");
|
|
1268
|
-
} else {
|
|
1269
|
-
lines.push(` \u26A0\uFE0F ${result.problems.length} issue(s):`);
|
|
1270
|
-
for (const p of result.problems) {
|
|
1271
|
-
lines.push(` \u2022 ${p}`);
|
|
1272
|
-
}
|
|
1273
|
-
}
|
|
1274
|
-
lines.push("");
|
|
1275
|
-
return lines.join("\n");
|
|
1276
|
-
}
|
|
1277
|
-
|
|
1278
1332
|
// src/graph-builder.ts
|
|
1279
1333
|
import { randomUUID } from "crypto";
|
|
1280
1334
|
function deepFreeze(obj) {
|
|
@@ -2459,10 +2513,10 @@ export {
|
|
|
2459
2513
|
groupByTraceId,
|
|
2460
2514
|
stitchTrace,
|
|
2461
2515
|
getTraceTree,
|
|
2462
|
-
startLive,
|
|
2463
2516
|
discoverProcessConfig,
|
|
2464
2517
|
auditProcesses,
|
|
2465
2518
|
formatAuditReport,
|
|
2519
|
+
startLive,
|
|
2466
2520
|
createGraphBuilder,
|
|
2467
2521
|
runTraced,
|
|
2468
2522
|
createTraceStore,
|