agentflow-core 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-VF4FSBXR.js → chunk-BXZC5ZMJ.js} +357 -268
- package/dist/cli.cjs +361 -270
- package/dist/cli.js +1 -1
- package/dist/index.cjs +359 -270
- package/dist/index.d.cts +6 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +1 -1
- package/package.json +1 -1
|
@@ -206,9 +206,289 @@ function getTraceTree(trace) {
|
|
|
206
206
|
return result;
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
+
// src/process-audit.ts
|
|
210
|
+
import { execSync } from "child_process";
|
|
211
|
+
import { existsSync, readdirSync, readFileSync, statSync } from "fs";
|
|
212
|
+
import { basename, join } from "path";
|
|
213
|
+
function isPidAlive(pid) {
|
|
214
|
+
try {
|
|
215
|
+
process.kill(pid, 0);
|
|
216
|
+
return true;
|
|
217
|
+
} catch {
|
|
218
|
+
return false;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
function pidMatchesName(pid, name) {
|
|
222
|
+
try {
|
|
223
|
+
const cmdline = readFileSync(`/proc/${pid}/cmdline`, "utf8");
|
|
224
|
+
return cmdline.includes(name);
|
|
225
|
+
} catch {
|
|
226
|
+
return false;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
function readPidFile(path) {
|
|
230
|
+
try {
|
|
231
|
+
const pid = parseInt(readFileSync(path, "utf8").trim(), 10);
|
|
232
|
+
return isNaN(pid) ? null : pid;
|
|
233
|
+
} catch {
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
function auditPidFile(config) {
|
|
238
|
+
if (!config.pidFile) return null;
|
|
239
|
+
const pid = readPidFile(config.pidFile);
|
|
240
|
+
if (pid === null) {
|
|
241
|
+
return {
|
|
242
|
+
path: config.pidFile,
|
|
243
|
+
pid: null,
|
|
244
|
+
alive: false,
|
|
245
|
+
matchesProcess: false,
|
|
246
|
+
stale: !existsSync(config.pidFile),
|
|
247
|
+
reason: existsSync(config.pidFile) ? "PID file exists but content is invalid" : "No PID file found"
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
const alive = isPidAlive(pid);
|
|
251
|
+
const matchesProcess = alive ? pidMatchesName(pid, config.processName) : false;
|
|
252
|
+
const stale = !alive || alive && !matchesProcess;
|
|
253
|
+
let reason;
|
|
254
|
+
if (alive && matchesProcess) {
|
|
255
|
+
reason = `PID ${pid} alive and matches ${config.processName}`;
|
|
256
|
+
} else if (alive && !matchesProcess) {
|
|
257
|
+
reason = `PID ${pid} alive but is NOT ${config.processName} (PID reused by another process)`;
|
|
258
|
+
} else {
|
|
259
|
+
reason = `PID ${pid} no longer exists`;
|
|
260
|
+
}
|
|
261
|
+
return { path: config.pidFile, pid, alive, matchesProcess, stale, reason };
|
|
262
|
+
}
|
|
263
|
+
function auditSystemd(config) {
|
|
264
|
+
if (config.systemdUnit === null || config.systemdUnit === void 0) return null;
|
|
265
|
+
const unit = config.systemdUnit;
|
|
266
|
+
try {
|
|
267
|
+
const raw = execSync(
|
|
268
|
+
`systemctl --user show ${unit} --property=ActiveState,SubState,MainPID,NRestarts,Result --no-pager 2>/dev/null`,
|
|
269
|
+
{ encoding: "utf8", timeout: 5e3 }
|
|
270
|
+
);
|
|
271
|
+
const props = {};
|
|
272
|
+
for (const line of raw.trim().split("\n")) {
|
|
273
|
+
const [k, ...v] = line.split("=");
|
|
274
|
+
if (k) props[k.trim()] = v.join("=").trim();
|
|
275
|
+
}
|
|
276
|
+
const activeState = props["ActiveState"] ?? "unknown";
|
|
277
|
+
const subState = props["SubState"] ?? "unknown";
|
|
278
|
+
const mainPid = parseInt(props["MainPID"] ?? "0", 10);
|
|
279
|
+
const restarts = parseInt(props["NRestarts"] ?? "0", 10);
|
|
280
|
+
const result = props["Result"] ?? "unknown";
|
|
281
|
+
return {
|
|
282
|
+
unit,
|
|
283
|
+
activeState,
|
|
284
|
+
subState,
|
|
285
|
+
mainPid,
|
|
286
|
+
restarts,
|
|
287
|
+
result,
|
|
288
|
+
crashLooping: activeState === "activating" && subState === "auto-restart",
|
|
289
|
+
failed: activeState === "failed"
|
|
290
|
+
};
|
|
291
|
+
} catch {
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
function auditWorkers(config) {
|
|
296
|
+
if (!config.workersFile || !existsSync(config.workersFile)) return null;
|
|
297
|
+
try {
|
|
298
|
+
const data = JSON.parse(readFileSync(config.workersFile, "utf8"));
|
|
299
|
+
const orchPid = data.pid ?? null;
|
|
300
|
+
const orchAlive = orchPid ? isPidAlive(orchPid) : false;
|
|
301
|
+
const workers = [];
|
|
302
|
+
for (const [name, info] of Object.entries(data.tools ?? {})) {
|
|
303
|
+
const w = info;
|
|
304
|
+
const wPid = w.pid ?? null;
|
|
305
|
+
const wAlive = wPid ? isPidAlive(wPid) : false;
|
|
306
|
+
workers.push({
|
|
307
|
+
name,
|
|
308
|
+
pid: wPid,
|
|
309
|
+
declaredStatus: w.status ?? "unknown",
|
|
310
|
+
alive: wAlive,
|
|
311
|
+
stale: w.status === "running" && !wAlive
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
return {
|
|
315
|
+
orchestratorPid: orchPid,
|
|
316
|
+
orchestratorAlive: orchAlive,
|
|
317
|
+
startedAt: data.started_at ?? "",
|
|
318
|
+
workers
|
|
319
|
+
};
|
|
320
|
+
} catch {
|
|
321
|
+
return null;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
function readCmdline(pid) {
|
|
325
|
+
try {
|
|
326
|
+
return readFileSync(`/proc/${pid}/cmdline`, "utf8").replace(/\0/g, " ").trim();
|
|
327
|
+
} catch {
|
|
328
|
+
return "";
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
function getOsProcesses(processName) {
|
|
332
|
+
try {
|
|
333
|
+
const raw = execSync(
|
|
334
|
+
`ps -eo pid,pcpu,pmem,etime,lstart,args --no-headers`,
|
|
335
|
+
{ encoding: "utf8", timeout: 5e3 }
|
|
336
|
+
);
|
|
337
|
+
const results = [];
|
|
338
|
+
for (const line of raw.split("\n")) {
|
|
339
|
+
if (!line.includes(processName)) continue;
|
|
340
|
+
if (line.includes("process-audit") || line.includes(" grep ")) continue;
|
|
341
|
+
const trimmed = line.trim();
|
|
342
|
+
const parts = trimmed.split(/\s+/);
|
|
343
|
+
const pid = parseInt(parts[0] ?? "0", 10);
|
|
344
|
+
if (isNaN(pid) || pid <= 0) continue;
|
|
345
|
+
const cpu = parts[1] ?? "0";
|
|
346
|
+
const mem = parts[2] ?? "0";
|
|
347
|
+
const elapsed = parts[3] ?? "";
|
|
348
|
+
const started = parts.slice(4, 9).join(" ");
|
|
349
|
+
const command = parts.slice(9).join(" ");
|
|
350
|
+
const cmdline = readCmdline(pid);
|
|
351
|
+
results.push({ pid, cpu, mem, elapsed, started, command, cmdline });
|
|
352
|
+
}
|
|
353
|
+
return results;
|
|
354
|
+
} catch {
|
|
355
|
+
return [];
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
function discoverProcessConfig(dirs) {
|
|
359
|
+
let pidFile;
|
|
360
|
+
let workersFile;
|
|
361
|
+
let processName = "";
|
|
362
|
+
for (const dir of dirs) {
|
|
363
|
+
if (!existsSync(dir)) continue;
|
|
364
|
+
let entries;
|
|
365
|
+
try {
|
|
366
|
+
entries = readdirSync(dir);
|
|
367
|
+
} catch {
|
|
368
|
+
continue;
|
|
369
|
+
}
|
|
370
|
+
for (const f of entries) {
|
|
371
|
+
const fp = join(dir, f);
|
|
372
|
+
try {
|
|
373
|
+
if (!statSync(fp).isFile()) continue;
|
|
374
|
+
} catch {
|
|
375
|
+
continue;
|
|
376
|
+
}
|
|
377
|
+
if (f.endsWith(".pid") && !pidFile) {
|
|
378
|
+
pidFile = fp;
|
|
379
|
+
if (!processName) {
|
|
380
|
+
processName = basename(f, ".pid");
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
if ((f === "workers.json" || f.endsWith("-workers.json")) && !workersFile) {
|
|
384
|
+
workersFile = fp;
|
|
385
|
+
if (!processName && f !== "workers.json") {
|
|
386
|
+
processName = basename(f, "-workers.json");
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
if (!processName && !pidFile && !workersFile) return null;
|
|
392
|
+
if (!processName) processName = "agent";
|
|
393
|
+
return { processName, pidFile, workersFile };
|
|
394
|
+
}
|
|
395
|
+
function auditProcesses(config) {
|
|
396
|
+
const pidFile = auditPidFile(config);
|
|
397
|
+
const systemd = auditSystemd(config);
|
|
398
|
+
const workers = auditWorkers(config);
|
|
399
|
+
const osProcesses = getOsProcesses(config.processName);
|
|
400
|
+
const knownPids = /* @__PURE__ */ new Set();
|
|
401
|
+
if (pidFile?.pid && !pidFile.stale) knownPids.add(pidFile.pid);
|
|
402
|
+
if (workers) {
|
|
403
|
+
if (workers.orchestratorPid) knownPids.add(workers.orchestratorPid);
|
|
404
|
+
for (const w of workers.workers) {
|
|
405
|
+
if (w.pid) knownPids.add(w.pid);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
if (systemd?.mainPid) knownPids.add(systemd.mainPid);
|
|
409
|
+
const selfPid = process.pid;
|
|
410
|
+
const selfPpid = process.ppid;
|
|
411
|
+
const orphans = osProcesses.filter(
|
|
412
|
+
(p) => !knownPids.has(p.pid) && p.pid !== selfPid && p.pid !== selfPpid
|
|
413
|
+
);
|
|
414
|
+
const problems = [];
|
|
415
|
+
if (pidFile?.stale) problems.push(`Stale PID file: ${pidFile.reason}`);
|
|
416
|
+
if (systemd?.crashLooping) problems.push("Systemd unit is crash-looping (auto-restart)");
|
|
417
|
+
if (systemd?.failed) problems.push("Systemd unit has failed");
|
|
418
|
+
if (systemd && systemd.restarts > 10) problems.push(`High systemd restart count: ${systemd.restarts}`);
|
|
419
|
+
if (pidFile?.pid && systemd?.mainPid && pidFile.pid !== systemd.mainPid) {
|
|
420
|
+
problems.push(`PID mismatch: file says ${pidFile.pid}, systemd says ${systemd.mainPid}`);
|
|
421
|
+
}
|
|
422
|
+
if (workers) {
|
|
423
|
+
for (const w of workers.workers) {
|
|
424
|
+
if (w.stale) problems.push(`Worker "${w.name}" (pid ${w.pid}) declares running but is dead`);
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
if (orphans.length > 0) problems.push(`${orphans.length} orphan process(es) not tracked by PID file or workers registry`);
|
|
428
|
+
return { pidFile, systemd, workers, osProcesses, orphans, problems };
|
|
429
|
+
}
|
|
430
|
+
function formatAuditReport(result) {
|
|
431
|
+
const lines = [];
|
|
432
|
+
lines.push("");
|
|
433
|
+
lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
434
|
+
lines.push("\u2551 \u{1F50D} P R O C E S S A U D I T \u2551");
|
|
435
|
+
lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
436
|
+
if (result.pidFile) {
|
|
437
|
+
const pf = result.pidFile;
|
|
438
|
+
const icon = pf.pid && pf.alive && pf.matchesProcess ? "\u2705" : pf.stale ? "\u26A0\uFE0F " : "\u2139\uFE0F ";
|
|
439
|
+
lines.push(`
|
|
440
|
+
PID File: ${pf.path}`);
|
|
441
|
+
lines.push(` ${icon} ${pf.reason}`);
|
|
442
|
+
}
|
|
443
|
+
if (result.systemd) {
|
|
444
|
+
const sd = result.systemd;
|
|
445
|
+
const icon = sd.activeState === "active" ? "\u{1F7E2}" : sd.crashLooping ? "\u{1F7E1}" : sd.failed ? "\u{1F534}" : "\u26AA";
|
|
446
|
+
lines.push(`
|
|
447
|
+
Systemd: ${sd.unit}`);
|
|
448
|
+
lines.push(` ${icon} State: ${sd.activeState} (${sd.subState}) Result: ${sd.result}`);
|
|
449
|
+
lines.push(` Main PID: ${sd.mainPid || "none"} Restarts: ${sd.restarts}`);
|
|
450
|
+
}
|
|
451
|
+
if (result.workers) {
|
|
452
|
+
const w = result.workers;
|
|
453
|
+
lines.push(`
|
|
454
|
+
Workers (orchestrator pid ${w.orchestratorPid ?? "unknown"} ${w.orchestratorAlive ? "\u2705" : "\u274C"})`);
|
|
455
|
+
for (const worker of w.workers) {
|
|
456
|
+
const icon = worker.declaredStatus === "running" && worker.alive ? "\u{1F7E2}" : worker.stale ? "\u{1F534} STALE" : "\u26AA";
|
|
457
|
+
lines.push(` ${icon} ${worker.name.padEnd(14)} pid=${String(worker.pid ?? "-").padEnd(8)} status=${worker.declaredStatus}`);
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
if (result.osProcesses.length > 0) {
|
|
461
|
+
lines.push(`
|
|
462
|
+
OS Processes (${result.osProcesses.length} total)`);
|
|
463
|
+
for (const p of result.osProcesses) {
|
|
464
|
+
lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} Up=${p.elapsed.padEnd(10)} ${p.command.substring(0, 50)}`);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
if (result.orphans.length > 0) {
|
|
468
|
+
lines.push(`
|
|
469
|
+
\u26A0\uFE0F ${result.orphans.length} ORPHAN PROCESS(ES):`);
|
|
470
|
+
for (const p of result.orphans) {
|
|
471
|
+
lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} Up=${p.elapsed}`);
|
|
472
|
+
lines.push(` Started: ${p.started}`);
|
|
473
|
+
lines.push(` Command: ${p.cmdline || p.command}`);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
lines.push("");
|
|
477
|
+
if (result.problems.length === 0) {
|
|
478
|
+
lines.push(" \u2705 All checks passed \u2014 no process issues detected.");
|
|
479
|
+
} else {
|
|
480
|
+
lines.push(` \u26A0\uFE0F ${result.problems.length} issue(s):`);
|
|
481
|
+
for (const p of result.problems) {
|
|
482
|
+
lines.push(` \u2022 ${p}`);
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
lines.push("");
|
|
486
|
+
return lines.join("\n");
|
|
487
|
+
}
|
|
488
|
+
|
|
209
489
|
// src/live.ts
|
|
210
|
-
import { existsSync, readdirSync, readFileSync, statSync, watch } from "fs";
|
|
211
|
-
import { basename, join, resolve } from "path";
|
|
490
|
+
import { existsSync as existsSync2, readdirSync as readdirSync2, readFileSync as readFileSync2, statSync as statSync2, watch } from "fs";
|
|
491
|
+
import { basename as basename2, join as join2, resolve } from "path";
|
|
212
492
|
var C = {
|
|
213
493
|
reset: "\x1B[0m",
|
|
214
494
|
bold: "\x1B[1m",
|
|
@@ -283,7 +563,7 @@ function scanFiles(dirs, recursive) {
|
|
|
283
563
|
const seen = /* @__PURE__ */ new Set();
|
|
284
564
|
function scanDir(d, topLevel) {
|
|
285
565
|
try {
|
|
286
|
-
const dirStat =
|
|
566
|
+
const dirStat = statSync2(d);
|
|
287
567
|
const dirMtime = dirStat.mtime.getTime();
|
|
288
568
|
const cachedMtime = dirMtimeCache.get(d);
|
|
289
569
|
if (cachedMtime === dirMtime) {
|
|
@@ -299,13 +579,13 @@ function scanFiles(dirs, recursive) {
|
|
|
299
579
|
}
|
|
300
580
|
}
|
|
301
581
|
const dirResults = [];
|
|
302
|
-
for (const f of
|
|
582
|
+
for (const f of readdirSync2(d)) {
|
|
303
583
|
if (f.startsWith(".")) continue;
|
|
304
|
-
const fp =
|
|
584
|
+
const fp = join2(d, f);
|
|
305
585
|
if (seen.has(fp)) continue;
|
|
306
586
|
let stat;
|
|
307
587
|
try {
|
|
308
|
-
stat =
|
|
588
|
+
stat = statSync2(fp);
|
|
309
589
|
} catch {
|
|
310
590
|
continue;
|
|
311
591
|
}
|
|
@@ -337,13 +617,13 @@ function scanFiles(dirs, recursive) {
|
|
|
337
617
|
}
|
|
338
618
|
function safeReadJson(fp) {
|
|
339
619
|
try {
|
|
340
|
-
return JSON.parse(
|
|
620
|
+
return JSON.parse(readFileSync2(fp, "utf8"));
|
|
341
621
|
} catch {
|
|
342
622
|
return null;
|
|
343
623
|
}
|
|
344
624
|
}
|
|
345
625
|
function nameFromFile(filename) {
|
|
346
|
-
return
|
|
626
|
+
return basename2(filename).replace(/\.(json|jsonl)$/, "").replace(/-state$/, "");
|
|
347
627
|
}
|
|
348
628
|
function normalizeStatus(val) {
|
|
349
629
|
if (typeof val !== "string") return "unknown";
|
|
@@ -521,7 +801,7 @@ function processJsonFile(file) {
|
|
|
521
801
|
}
|
|
522
802
|
function processJsonlFile(file) {
|
|
523
803
|
try {
|
|
524
|
-
const content =
|
|
804
|
+
const content = readFileSync2(file.path, "utf8").trim();
|
|
525
805
|
if (!content) return [];
|
|
526
806
|
const lines = content.split("\n");
|
|
527
807
|
const lineCount = lines.length;
|
|
@@ -673,6 +953,9 @@ var prevFileCount = 0;
|
|
|
673
953
|
var newExecCount = 0;
|
|
674
954
|
var sessionStart = Date.now();
|
|
675
955
|
var firstRender = true;
|
|
956
|
+
var cachedAuditConfig = null;
|
|
957
|
+
var cachedAuditResult = null;
|
|
958
|
+
var lastAuditTime = 0;
|
|
676
959
|
var fileCache = /* @__PURE__ */ new Map();
|
|
677
960
|
function getRecordsCached(f) {
|
|
678
961
|
const cached = fileCache.get(f.path);
|
|
@@ -792,6 +1075,24 @@ function render(config) {
|
|
|
792
1075
|
const level = Math.round(v / maxBucket * 8);
|
|
793
1076
|
return (failBuckets[i] > 0 ? C.red : C.green) + sparkChars[level] + C.reset;
|
|
794
1077
|
}).join("");
|
|
1078
|
+
let auditResult = null;
|
|
1079
|
+
if (now - lastAuditTime > 1e4) {
|
|
1080
|
+
if (!cachedAuditConfig) {
|
|
1081
|
+
cachedAuditConfig = discoverProcessConfig(config.dirs);
|
|
1082
|
+
}
|
|
1083
|
+
if (cachedAuditConfig) {
|
|
1084
|
+
try {
|
|
1085
|
+
auditResult = auditProcesses(cachedAuditConfig);
|
|
1086
|
+
cachedAuditResult = auditResult;
|
|
1087
|
+
lastAuditTime = now;
|
|
1088
|
+
} catch (err) {
|
|
1089
|
+
process.stderr.write(`[agentflow] process audit error: ${err instanceof Error ? err.message : err}
|
|
1090
|
+
`);
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
} else {
|
|
1094
|
+
auditResult = cachedAuditResult;
|
|
1095
|
+
}
|
|
795
1096
|
const distributedTraces = [];
|
|
796
1097
|
if (allTraces.length > 1) {
|
|
797
1098
|
const traceGroups = groupByTraceId(allTraces);
|
|
@@ -872,6 +1173,50 @@ function render(config) {
|
|
|
872
1173
|
);
|
|
873
1174
|
writeLine(L, "");
|
|
874
1175
|
writeLine(L, ` ${C.bold}Activity (1h)${C.reset} ${spark} ${C.dim}\u2190 now${C.reset}`);
|
|
1176
|
+
if (auditResult) {
|
|
1177
|
+
const ar = auditResult;
|
|
1178
|
+
const healthy = ar.problems.length === 0;
|
|
1179
|
+
const healthIcon = healthy ? `${C.green}\u25CF${C.reset}` : `${C.red}\u25CF${C.reset}`;
|
|
1180
|
+
const healthLabel = healthy ? `${C.green}healthy${C.reset}` : `${C.red}${ar.problems.length} issue(s)${C.reset}`;
|
|
1181
|
+
const workerParts = [];
|
|
1182
|
+
if (ar.workers) {
|
|
1183
|
+
for (const w of ar.workers.workers) {
|
|
1184
|
+
const wIcon = w.declaredStatus === "running" && w.alive ? `${C.green}\u25CF${C.reset}` : w.stale ? `${C.red}\u25CF${C.reset}` : `${C.dim}\u25CB${C.reset}`;
|
|
1185
|
+
workerParts.push(`${wIcon} ${w.name}`);
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
let sysdLabel = "";
|
|
1189
|
+
if (ar.systemd) {
|
|
1190
|
+
const si = ar.systemd.activeState === "active" ? `${C.green}\u25CF${C.reset}` : ar.systemd.crashLooping ? `${C.yellow}\u25CF${C.reset}` : ar.systemd.failed ? `${C.red}\u25CF${C.reset}` : `${C.dim}\u25CB${C.reset}`;
|
|
1191
|
+
sysdLabel = ` ${C.bold}Systemd${C.reset} ${si} ${ar.systemd.activeState}`;
|
|
1192
|
+
if (ar.systemd.restarts > 0) sysdLabel += ` ${C.dim}(${ar.systemd.restarts} restarts)${C.reset}`;
|
|
1193
|
+
}
|
|
1194
|
+
let pidLabel = "";
|
|
1195
|
+
if (ar.pidFile?.pid) {
|
|
1196
|
+
const pi = ar.pidFile.alive && ar.pidFile.matchesProcess ? `${C.green}\u25CF${C.reset}` : `${C.red}\u25CF${C.reset}`;
|
|
1197
|
+
pidLabel = ` ${C.bold}PID${C.reset} ${pi} ${ar.pidFile.pid}`;
|
|
1198
|
+
}
|
|
1199
|
+
writeLine(L, "");
|
|
1200
|
+
writeLine(L, ` ${C.bold}${C.under}Process Health${C.reset}`);
|
|
1201
|
+
writeLine(L, ` ${healthIcon} ${healthLabel}${pidLabel}${sysdLabel} ${C.bold}Procs${C.reset} ${C.dim}${ar.osProcesses.length}${C.reset} ${ar.orphans.length > 0 ? `${C.red}Orphans ${ar.orphans.length}${C.reset}` : `${C.dim}Orphans 0${C.reset}`}`);
|
|
1202
|
+
if (workerParts.length > 0) {
|
|
1203
|
+
writeLine(L, ` ${C.dim}Workers${C.reset} ${workerParts.join(" ")}`);
|
|
1204
|
+
}
|
|
1205
|
+
if (!healthy) {
|
|
1206
|
+
for (const p of ar.problems.slice(0, 3)) {
|
|
1207
|
+
writeLine(L, ` ${C.red}\u2022${C.reset} ${C.dim}${p}${C.reset}`);
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
if (ar.orphans.length > 0) {
|
|
1211
|
+
for (const o of ar.orphans.slice(0, 5)) {
|
|
1212
|
+
const cmd = (o.cmdline || o.command).substring(0, detailWidth);
|
|
1213
|
+
writeLine(L, ` ${C.red}?${C.reset} ${C.dim}pid=${o.pid} cpu=${o.cpu} mem=${o.mem} up=${o.elapsed}${C.reset} ${C.dim}${cmd}${C.reset}`);
|
|
1214
|
+
}
|
|
1215
|
+
if (ar.orphans.length > 5) {
|
|
1216
|
+
writeLine(L, ` ${C.dim}... +${ar.orphans.length - 5} more orphans${C.reset}`);
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
875
1220
|
writeLine(L, "");
|
|
876
1221
|
writeLine(
|
|
877
1222
|
L,
|
|
@@ -990,13 +1335,13 @@ function getDistDepth(dt, spanId, visited) {
|
|
|
990
1335
|
}
|
|
991
1336
|
function startLive(argv) {
|
|
992
1337
|
const config = parseArgs(argv);
|
|
993
|
-
const valid = config.dirs.filter((d) =>
|
|
1338
|
+
const valid = config.dirs.filter((d) => existsSync2(d));
|
|
994
1339
|
if (valid.length === 0) {
|
|
995
1340
|
console.error(`No valid directories found: ${config.dirs.join(", ")}`);
|
|
996
1341
|
console.error("Specify directories containing JSON/JSONL files: agentflow live <dir> [dir...]");
|
|
997
1342
|
process.exit(1);
|
|
998
1343
|
}
|
|
999
|
-
const invalid = config.dirs.filter((d) => !
|
|
1344
|
+
const invalid = config.dirs.filter((d) => !existsSync2(d));
|
|
1000
1345
|
if (invalid.length > 0) {
|
|
1001
1346
|
console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
|
|
1002
1347
|
}
|
|
@@ -1019,262 +1364,6 @@ function startLive(argv) {
|
|
|
1019
1364
|
});
|
|
1020
1365
|
}
|
|
1021
1366
|
|
|
1022
|
-
// src/process-audit.ts
|
|
1023
|
-
import { execSync } from "child_process";
|
|
1024
|
-
import { existsSync as existsSync2, readdirSync as readdirSync2, readFileSync as readFileSync2, statSync as statSync2 } from "fs";
|
|
1025
|
-
import { basename as basename2, join as join2 } from "path";
|
|
1026
|
-
function isPidAlive(pid) {
|
|
1027
|
-
try {
|
|
1028
|
-
process.kill(pid, 0);
|
|
1029
|
-
return true;
|
|
1030
|
-
} catch {
|
|
1031
|
-
return false;
|
|
1032
|
-
}
|
|
1033
|
-
}
|
|
1034
|
-
function pidMatchesName(pid, name) {
|
|
1035
|
-
try {
|
|
1036
|
-
const cmdline = readFileSync2(`/proc/${pid}/cmdline`, "utf8");
|
|
1037
|
-
return cmdline.includes(name);
|
|
1038
|
-
} catch {
|
|
1039
|
-
return false;
|
|
1040
|
-
}
|
|
1041
|
-
}
|
|
1042
|
-
function readPidFile(path) {
|
|
1043
|
-
try {
|
|
1044
|
-
const pid = parseInt(readFileSync2(path, "utf8").trim(), 10);
|
|
1045
|
-
return isNaN(pid) ? null : pid;
|
|
1046
|
-
} catch {
|
|
1047
|
-
return null;
|
|
1048
|
-
}
|
|
1049
|
-
}
|
|
1050
|
-
function auditPidFile(config) {
|
|
1051
|
-
if (!config.pidFile) return null;
|
|
1052
|
-
const pid = readPidFile(config.pidFile);
|
|
1053
|
-
if (pid === null) {
|
|
1054
|
-
return {
|
|
1055
|
-
path: config.pidFile,
|
|
1056
|
-
pid: null,
|
|
1057
|
-
alive: false,
|
|
1058
|
-
matchesProcess: false,
|
|
1059
|
-
stale: !existsSync2(config.pidFile),
|
|
1060
|
-
reason: existsSync2(config.pidFile) ? "PID file exists but content is invalid" : "No PID file found"
|
|
1061
|
-
};
|
|
1062
|
-
}
|
|
1063
|
-
const alive = isPidAlive(pid);
|
|
1064
|
-
const matchesProcess = alive ? pidMatchesName(pid, config.processName) : false;
|
|
1065
|
-
const stale = !alive || alive && !matchesProcess;
|
|
1066
|
-
let reason;
|
|
1067
|
-
if (alive && matchesProcess) {
|
|
1068
|
-
reason = `PID ${pid} alive and matches ${config.processName}`;
|
|
1069
|
-
} else if (alive && !matchesProcess) {
|
|
1070
|
-
reason = `PID ${pid} alive but is NOT ${config.processName} (PID reused by another process)`;
|
|
1071
|
-
} else {
|
|
1072
|
-
reason = `PID ${pid} no longer exists`;
|
|
1073
|
-
}
|
|
1074
|
-
return { path: config.pidFile, pid, alive, matchesProcess, stale, reason };
|
|
1075
|
-
}
|
|
1076
|
-
function auditSystemd(config) {
|
|
1077
|
-
if (config.systemdUnit === null || config.systemdUnit === void 0) return null;
|
|
1078
|
-
const unit = config.systemdUnit;
|
|
1079
|
-
try {
|
|
1080
|
-
const raw = execSync(
|
|
1081
|
-
`systemctl --user show ${unit} --property=ActiveState,SubState,MainPID,NRestarts,Result --no-pager 2>/dev/null`,
|
|
1082
|
-
{ encoding: "utf8", timeout: 5e3 }
|
|
1083
|
-
);
|
|
1084
|
-
const props = {};
|
|
1085
|
-
for (const line of raw.trim().split("\n")) {
|
|
1086
|
-
const [k, ...v] = line.split("=");
|
|
1087
|
-
if (k) props[k.trim()] = v.join("=").trim();
|
|
1088
|
-
}
|
|
1089
|
-
const activeState = props["ActiveState"] ?? "unknown";
|
|
1090
|
-
const subState = props["SubState"] ?? "unknown";
|
|
1091
|
-
const mainPid = parseInt(props["MainPID"] ?? "0", 10);
|
|
1092
|
-
const restarts = parseInt(props["NRestarts"] ?? "0", 10);
|
|
1093
|
-
const result = props["Result"] ?? "unknown";
|
|
1094
|
-
return {
|
|
1095
|
-
unit,
|
|
1096
|
-
activeState,
|
|
1097
|
-
subState,
|
|
1098
|
-
mainPid,
|
|
1099
|
-
restarts,
|
|
1100
|
-
result,
|
|
1101
|
-
crashLooping: activeState === "activating" && subState === "auto-restart",
|
|
1102
|
-
failed: activeState === "failed"
|
|
1103
|
-
};
|
|
1104
|
-
} catch {
|
|
1105
|
-
return null;
|
|
1106
|
-
}
|
|
1107
|
-
}
|
|
1108
|
-
function auditWorkers(config) {
|
|
1109
|
-
if (!config.workersFile || !existsSync2(config.workersFile)) return null;
|
|
1110
|
-
try {
|
|
1111
|
-
const data = JSON.parse(readFileSync2(config.workersFile, "utf8"));
|
|
1112
|
-
const orchPid = data.pid ?? null;
|
|
1113
|
-
const orchAlive = orchPid ? isPidAlive(orchPid) : false;
|
|
1114
|
-
const workers = [];
|
|
1115
|
-
for (const [name, info] of Object.entries(data.tools ?? {})) {
|
|
1116
|
-
const w = info;
|
|
1117
|
-
const wPid = w.pid ?? null;
|
|
1118
|
-
const wAlive = wPid ? isPidAlive(wPid) : false;
|
|
1119
|
-
workers.push({
|
|
1120
|
-
name,
|
|
1121
|
-
pid: wPid,
|
|
1122
|
-
declaredStatus: w.status ?? "unknown",
|
|
1123
|
-
alive: wAlive,
|
|
1124
|
-
stale: w.status === "running" && !wAlive
|
|
1125
|
-
});
|
|
1126
|
-
}
|
|
1127
|
-
return {
|
|
1128
|
-
orchestratorPid: orchPid,
|
|
1129
|
-
orchestratorAlive: orchAlive,
|
|
1130
|
-
startedAt: data.started_at ?? "",
|
|
1131
|
-
workers
|
|
1132
|
-
};
|
|
1133
|
-
} catch {
|
|
1134
|
-
return null;
|
|
1135
|
-
}
|
|
1136
|
-
}
|
|
1137
|
-
function getOsProcesses(processName) {
|
|
1138
|
-
try {
|
|
1139
|
-
const raw = execSync(`ps aux`, { encoding: "utf8", timeout: 5e3 });
|
|
1140
|
-
return raw.split("\n").filter((line) => line.includes(processName) && !line.includes("process-audit") && !line.includes("grep")).map((line) => {
|
|
1141
|
-
const parts = line.trim().split(/\s+/);
|
|
1142
|
-
return {
|
|
1143
|
-
pid: parseInt(parts[1] ?? "0", 10),
|
|
1144
|
-
cpu: parts[2] ?? "0",
|
|
1145
|
-
mem: parts[3] ?? "0",
|
|
1146
|
-
command: parts.slice(10).join(" ")
|
|
1147
|
-
};
|
|
1148
|
-
}).filter((p) => !isNaN(p.pid) && p.pid > 0);
|
|
1149
|
-
} catch {
|
|
1150
|
-
return [];
|
|
1151
|
-
}
|
|
1152
|
-
}
|
|
1153
|
-
function discoverProcessConfig(dirs) {
|
|
1154
|
-
let pidFile;
|
|
1155
|
-
let workersFile;
|
|
1156
|
-
let processName = "";
|
|
1157
|
-
for (const dir of dirs) {
|
|
1158
|
-
if (!existsSync2(dir)) continue;
|
|
1159
|
-
let entries;
|
|
1160
|
-
try {
|
|
1161
|
-
entries = readdirSync2(dir);
|
|
1162
|
-
} catch {
|
|
1163
|
-
continue;
|
|
1164
|
-
}
|
|
1165
|
-
for (const f of entries) {
|
|
1166
|
-
const fp = join2(dir, f);
|
|
1167
|
-
try {
|
|
1168
|
-
if (!statSync2(fp).isFile()) continue;
|
|
1169
|
-
} catch {
|
|
1170
|
-
continue;
|
|
1171
|
-
}
|
|
1172
|
-
if (f.endsWith(".pid") && !pidFile) {
|
|
1173
|
-
pidFile = fp;
|
|
1174
|
-
if (!processName) {
|
|
1175
|
-
processName = basename2(f, ".pid");
|
|
1176
|
-
}
|
|
1177
|
-
}
|
|
1178
|
-
if ((f === "workers.json" || f.endsWith("-workers.json")) && !workersFile) {
|
|
1179
|
-
workersFile = fp;
|
|
1180
|
-
if (!processName && f !== "workers.json") {
|
|
1181
|
-
processName = basename2(f, "-workers.json");
|
|
1182
|
-
}
|
|
1183
|
-
}
|
|
1184
|
-
}
|
|
1185
|
-
}
|
|
1186
|
-
if (!processName && !pidFile && !workersFile) return null;
|
|
1187
|
-
if (!processName) processName = "agent";
|
|
1188
|
-
return { processName, pidFile, workersFile };
|
|
1189
|
-
}
|
|
1190
|
-
function auditProcesses(config) {
|
|
1191
|
-
const pidFile = auditPidFile(config);
|
|
1192
|
-
const systemd = auditSystemd(config);
|
|
1193
|
-
const workers = auditWorkers(config);
|
|
1194
|
-
const osProcesses = getOsProcesses(config.processName);
|
|
1195
|
-
const knownPids = /* @__PURE__ */ new Set();
|
|
1196
|
-
if (pidFile?.pid && !pidFile.stale) knownPids.add(pidFile.pid);
|
|
1197
|
-
if (workers) {
|
|
1198
|
-
if (workers.orchestratorPid) knownPids.add(workers.orchestratorPid);
|
|
1199
|
-
for (const w of workers.workers) {
|
|
1200
|
-
if (w.pid) knownPids.add(w.pid);
|
|
1201
|
-
}
|
|
1202
|
-
}
|
|
1203
|
-
if (systemd?.mainPid) knownPids.add(systemd.mainPid);
|
|
1204
|
-
const orphans = osProcesses.filter((p) => !knownPids.has(p.pid));
|
|
1205
|
-
const problems = [];
|
|
1206
|
-
if (pidFile?.stale) problems.push(`Stale PID file: ${pidFile.reason}`);
|
|
1207
|
-
if (systemd?.crashLooping) problems.push("Systemd unit is crash-looping (auto-restart)");
|
|
1208
|
-
if (systemd?.failed) problems.push("Systemd unit has failed");
|
|
1209
|
-
if (systemd && systemd.restarts > 10) problems.push(`High systemd restart count: ${systemd.restarts}`);
|
|
1210
|
-
if (pidFile?.pid && systemd?.mainPid && pidFile.pid !== systemd.mainPid) {
|
|
1211
|
-
problems.push(`PID mismatch: file says ${pidFile.pid}, systemd says ${systemd.mainPid}`);
|
|
1212
|
-
}
|
|
1213
|
-
if (workers) {
|
|
1214
|
-
for (const w of workers.workers) {
|
|
1215
|
-
if (w.stale) problems.push(`Worker "${w.name}" (pid ${w.pid}) declares running but is dead`);
|
|
1216
|
-
}
|
|
1217
|
-
}
|
|
1218
|
-
if (orphans.length > 0) problems.push(`${orphans.length} orphan process(es) not tracked by PID file or workers registry`);
|
|
1219
|
-
return { pidFile, systemd, workers, osProcesses, orphans, problems };
|
|
1220
|
-
}
|
|
1221
|
-
function formatAuditReport(result) {
|
|
1222
|
-
const lines = [];
|
|
1223
|
-
lines.push("");
|
|
1224
|
-
lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
1225
|
-
lines.push("\u2551 \u{1F50D} P R O C E S S A U D I T \u2551");
|
|
1226
|
-
lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
1227
|
-
if (result.pidFile) {
|
|
1228
|
-
const pf = result.pidFile;
|
|
1229
|
-
const icon = pf.pid && pf.alive && pf.matchesProcess ? "\u2705" : pf.stale ? "\u26A0\uFE0F " : "\u2139\uFE0F ";
|
|
1230
|
-
lines.push(`
|
|
1231
|
-
PID File: ${pf.path}`);
|
|
1232
|
-
lines.push(` ${icon} ${pf.reason}`);
|
|
1233
|
-
}
|
|
1234
|
-
if (result.systemd) {
|
|
1235
|
-
const sd = result.systemd;
|
|
1236
|
-
const icon = sd.activeState === "active" ? "\u{1F7E2}" : sd.crashLooping ? "\u{1F7E1}" : sd.failed ? "\u{1F534}" : "\u26AA";
|
|
1237
|
-
lines.push(`
|
|
1238
|
-
Systemd: ${sd.unit}`);
|
|
1239
|
-
lines.push(` ${icon} State: ${sd.activeState} (${sd.subState}) Result: ${sd.result}`);
|
|
1240
|
-
lines.push(` Main PID: ${sd.mainPid || "none"} Restarts: ${sd.restarts}`);
|
|
1241
|
-
}
|
|
1242
|
-
if (result.workers) {
|
|
1243
|
-
const w = result.workers;
|
|
1244
|
-
lines.push(`
|
|
1245
|
-
Workers (orchestrator pid ${w.orchestratorPid ?? "unknown"} ${w.orchestratorAlive ? "\u2705" : "\u274C"})`);
|
|
1246
|
-
for (const worker of w.workers) {
|
|
1247
|
-
const icon = worker.declaredStatus === "running" && worker.alive ? "\u{1F7E2}" : worker.stale ? "\u{1F534} STALE" : "\u26AA";
|
|
1248
|
-
lines.push(` ${icon} ${worker.name.padEnd(14)} pid=${String(worker.pid ?? "-").padEnd(8)} status=${worker.declaredStatus}`);
|
|
1249
|
-
}
|
|
1250
|
-
}
|
|
1251
|
-
if (result.osProcesses.length > 0) {
|
|
1252
|
-
lines.push(`
|
|
1253
|
-
OS Processes (${result.osProcesses.length} total)`);
|
|
1254
|
-
for (const p of result.osProcesses) {
|
|
1255
|
-
lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} ${p.command.substring(0, 55)}`);
|
|
1256
|
-
}
|
|
1257
|
-
}
|
|
1258
|
-
if (result.orphans.length > 0) {
|
|
1259
|
-
lines.push(`
|
|
1260
|
-
\u26A0\uFE0F ${result.orphans.length} ORPHAN PROCESS(ES):`);
|
|
1261
|
-
for (const p of result.orphans) {
|
|
1262
|
-
lines.push(` PID ${p.pid} \u2014 not tracked by PID file or workers registry`);
|
|
1263
|
-
}
|
|
1264
|
-
}
|
|
1265
|
-
lines.push("");
|
|
1266
|
-
if (result.problems.length === 0) {
|
|
1267
|
-
lines.push(" \u2705 All checks passed \u2014 no process issues detected.");
|
|
1268
|
-
} else {
|
|
1269
|
-
lines.push(` \u26A0\uFE0F ${result.problems.length} issue(s):`);
|
|
1270
|
-
for (const p of result.problems) {
|
|
1271
|
-
lines.push(` \u2022 ${p}`);
|
|
1272
|
-
}
|
|
1273
|
-
}
|
|
1274
|
-
lines.push("");
|
|
1275
|
-
return lines.join("\n");
|
|
1276
|
-
}
|
|
1277
|
-
|
|
1278
1367
|
// src/graph-builder.ts
|
|
1279
1368
|
import { randomUUID } from "crypto";
|
|
1280
1369
|
function deepFreeze(obj) {
|
|
@@ -2459,10 +2548,10 @@ export {
|
|
|
2459
2548
|
groupByTraceId,
|
|
2460
2549
|
stitchTrace,
|
|
2461
2550
|
getTraceTree,
|
|
2462
|
-
startLive,
|
|
2463
2551
|
discoverProcessConfig,
|
|
2464
2552
|
auditProcesses,
|
|
2465
2553
|
formatAuditReport,
|
|
2554
|
+
startLive,
|
|
2466
2555
|
createGraphBuilder,
|
|
2467
2556
|
runTraced,
|
|
2468
2557
|
createTraceStore,
|