agentflow-core 0.5.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -96,9 +96,8 @@ var init_loader = __esm({
96
96
  var import_path3 = require("path");
97
97
 
98
98
  // src/live.ts
99
- var import_node_fs = require("fs");
100
- var import_node_path = require("path");
101
- var import_node_child_process = require("child_process");
99
+ var import_node_fs2 = require("fs");
100
+ var import_node_path2 = require("path");
102
101
 
103
102
  // src/graph-query.ts
104
103
  function getChildren(graph, nodeId) {
@@ -245,6 +244,264 @@ function getTraceTree(trace) {
245
244
 
246
245
  // src/live.ts
247
246
  init_loader();
247
+
248
+ // src/process-audit.ts
249
+ var import_node_child_process = require("child_process");
250
+ var import_node_fs = require("fs");
251
+ var import_node_path = require("path");
252
+ function isPidAlive(pid) {
253
+ try {
254
+ process.kill(pid, 0);
255
+ return true;
256
+ } catch {
257
+ return false;
258
+ }
259
+ }
260
+ function pidMatchesName(pid, name) {
261
+ try {
262
+ const cmdline = (0, import_node_fs.readFileSync)(`/proc/${pid}/cmdline`, "utf8");
263
+ return cmdline.includes(name);
264
+ } catch {
265
+ return false;
266
+ }
267
+ }
268
+ function readPidFile(path) {
269
+ try {
270
+ const pid = parseInt((0, import_node_fs.readFileSync)(path, "utf8").trim(), 10);
271
+ return isNaN(pid) ? null : pid;
272
+ } catch {
273
+ return null;
274
+ }
275
+ }
276
+ function auditPidFile(config) {
277
+ if (!config.pidFile) return null;
278
+ const pid = readPidFile(config.pidFile);
279
+ if (pid === null) {
280
+ return {
281
+ path: config.pidFile,
282
+ pid: null,
283
+ alive: false,
284
+ matchesProcess: false,
285
+ stale: !(0, import_node_fs.existsSync)(config.pidFile),
286
+ reason: (0, import_node_fs.existsSync)(config.pidFile) ? "PID file exists but content is invalid" : "No PID file found"
287
+ };
288
+ }
289
+ const alive = isPidAlive(pid);
290
+ const matchesProcess = alive ? pidMatchesName(pid, config.processName) : false;
291
+ const stale = !alive || alive && !matchesProcess;
292
+ let reason;
293
+ if (alive && matchesProcess) {
294
+ reason = `PID ${pid} alive and matches ${config.processName}`;
295
+ } else if (alive && !matchesProcess) {
296
+ reason = `PID ${pid} alive but is NOT ${config.processName} (PID reused by another process)`;
297
+ } else {
298
+ reason = `PID ${pid} no longer exists`;
299
+ }
300
+ return { path: config.pidFile, pid, alive, matchesProcess, stale, reason };
301
+ }
302
+ function auditSystemd(config) {
303
+ if (config.systemdUnit === null || config.systemdUnit === void 0) return null;
304
+ const unit = config.systemdUnit;
305
+ try {
306
+ const raw = (0, import_node_child_process.execSync)(
307
+ `systemctl --user show ${unit} --property=ActiveState,SubState,MainPID,NRestarts,Result --no-pager 2>/dev/null`,
308
+ { encoding: "utf8", timeout: 5e3 }
309
+ );
310
+ const props = {};
311
+ for (const line of raw.trim().split("\n")) {
312
+ const [k, ...v] = line.split("=");
313
+ if (k) props[k.trim()] = v.join("=").trim();
314
+ }
315
+ const activeState = props["ActiveState"] ?? "unknown";
316
+ const subState = props["SubState"] ?? "unknown";
317
+ const mainPid = parseInt(props["MainPID"] ?? "0", 10);
318
+ const restarts = parseInt(props["NRestarts"] ?? "0", 10);
319
+ const result = props["Result"] ?? "unknown";
320
+ return {
321
+ unit,
322
+ activeState,
323
+ subState,
324
+ mainPid,
325
+ restarts,
326
+ result,
327
+ crashLooping: activeState === "activating" && subState === "auto-restart",
328
+ failed: activeState === "failed"
329
+ };
330
+ } catch {
331
+ return null;
332
+ }
333
+ }
334
+ function auditWorkers(config) {
335
+ if (!config.workersFile || !(0, import_node_fs.existsSync)(config.workersFile)) return null;
336
+ try {
337
+ const data = JSON.parse((0, import_node_fs.readFileSync)(config.workersFile, "utf8"));
338
+ const orchPid = data.pid ?? null;
339
+ const orchAlive = orchPid ? isPidAlive(orchPid) : false;
340
+ const workers = [];
341
+ for (const [name, info] of Object.entries(data.tools ?? {})) {
342
+ const w = info;
343
+ const wPid = w.pid ?? null;
344
+ const wAlive = wPid ? isPidAlive(wPid) : false;
345
+ workers.push({
346
+ name,
347
+ pid: wPid,
348
+ declaredStatus: w.status ?? "unknown",
349
+ alive: wAlive,
350
+ stale: w.status === "running" && !wAlive
351
+ });
352
+ }
353
+ return {
354
+ orchestratorPid: orchPid,
355
+ orchestratorAlive: orchAlive,
356
+ startedAt: data.started_at ?? "",
357
+ workers
358
+ };
359
+ } catch {
360
+ return null;
361
+ }
362
+ }
363
+ function getOsProcesses(processName) {
364
+ try {
365
+ const raw = (0, import_node_child_process.execSync)(`ps aux`, { encoding: "utf8", timeout: 5e3 });
366
+ return raw.split("\n").filter((line) => line.includes(processName) && !line.includes("process-audit") && !line.includes("grep")).map((line) => {
367
+ const parts = line.trim().split(/\s+/);
368
+ return {
369
+ pid: parseInt(parts[1] ?? "0", 10),
370
+ cpu: parts[2] ?? "0",
371
+ mem: parts[3] ?? "0",
372
+ command: parts.slice(10).join(" ")
373
+ };
374
+ }).filter((p) => !isNaN(p.pid) && p.pid > 0);
375
+ } catch {
376
+ return [];
377
+ }
378
+ }
379
+ function discoverProcessConfig(dirs) {
380
+ let pidFile;
381
+ let workersFile;
382
+ let processName = "";
383
+ for (const dir of dirs) {
384
+ if (!(0, import_node_fs.existsSync)(dir)) continue;
385
+ let entries;
386
+ try {
387
+ entries = (0, import_node_fs.readdirSync)(dir);
388
+ } catch {
389
+ continue;
390
+ }
391
+ for (const f of entries) {
392
+ const fp = (0, import_node_path.join)(dir, f);
393
+ try {
394
+ if (!(0, import_node_fs.statSync)(fp).isFile()) continue;
395
+ } catch {
396
+ continue;
397
+ }
398
+ if (f.endsWith(".pid") && !pidFile) {
399
+ pidFile = fp;
400
+ if (!processName) {
401
+ processName = (0, import_node_path.basename)(f, ".pid");
402
+ }
403
+ }
404
+ if ((f === "workers.json" || f.endsWith("-workers.json")) && !workersFile) {
405
+ workersFile = fp;
406
+ if (!processName && f !== "workers.json") {
407
+ processName = (0, import_node_path.basename)(f, "-workers.json");
408
+ }
409
+ }
410
+ }
411
+ }
412
+ if (!processName && !pidFile && !workersFile) return null;
413
+ if (!processName) processName = "agent";
414
+ return { processName, pidFile, workersFile };
415
+ }
416
+ function auditProcesses(config) {
417
+ const pidFile = auditPidFile(config);
418
+ const systemd = auditSystemd(config);
419
+ const workers = auditWorkers(config);
420
+ const osProcesses = getOsProcesses(config.processName);
421
+ const knownPids = /* @__PURE__ */ new Set();
422
+ if (pidFile?.pid && !pidFile.stale) knownPids.add(pidFile.pid);
423
+ if (workers) {
424
+ if (workers.orchestratorPid) knownPids.add(workers.orchestratorPid);
425
+ for (const w of workers.workers) {
426
+ if (w.pid) knownPids.add(w.pid);
427
+ }
428
+ }
429
+ if (systemd?.mainPid) knownPids.add(systemd.mainPid);
430
+ const orphans = osProcesses.filter((p) => !knownPids.has(p.pid));
431
+ const problems = [];
432
+ if (pidFile?.stale) problems.push(`Stale PID file: ${pidFile.reason}`);
433
+ if (systemd?.crashLooping) problems.push("Systemd unit is crash-looping (auto-restart)");
434
+ if (systemd?.failed) problems.push("Systemd unit has failed");
435
+ if (systemd && systemd.restarts > 10) problems.push(`High systemd restart count: ${systemd.restarts}`);
436
+ if (pidFile?.pid && systemd?.mainPid && pidFile.pid !== systemd.mainPid) {
437
+ problems.push(`PID mismatch: file says ${pidFile.pid}, systemd says ${systemd.mainPid}`);
438
+ }
439
+ if (workers) {
440
+ for (const w of workers.workers) {
441
+ if (w.stale) problems.push(`Worker "${w.name}" (pid ${w.pid}) declares running but is dead`);
442
+ }
443
+ }
444
+ if (orphans.length > 0) problems.push(`${orphans.length} orphan process(es) not tracked by PID file or workers registry`);
445
+ return { pidFile, systemd, workers, osProcesses, orphans, problems };
446
+ }
447
+ function formatAuditReport(result) {
448
+ const lines = [];
449
+ lines.push("");
450
+ lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
451
+ lines.push("\u2551 \u{1F50D} P R O C E S S A U D I T \u2551");
452
+ lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
453
+ if (result.pidFile) {
454
+ const pf = result.pidFile;
455
+ const icon = pf.pid && pf.alive && pf.matchesProcess ? "\u2705" : pf.stale ? "\u26A0\uFE0F " : "\u2139\uFE0F ";
456
+ lines.push(`
457
+ PID File: ${pf.path}`);
458
+ lines.push(` ${icon} ${pf.reason}`);
459
+ }
460
+ if (result.systemd) {
461
+ const sd = result.systemd;
462
+ const icon = sd.activeState === "active" ? "\u{1F7E2}" : sd.crashLooping ? "\u{1F7E1}" : sd.failed ? "\u{1F534}" : "\u26AA";
463
+ lines.push(`
464
+ Systemd: ${sd.unit}`);
465
+ lines.push(` ${icon} State: ${sd.activeState} (${sd.subState}) Result: ${sd.result}`);
466
+ lines.push(` Main PID: ${sd.mainPid || "none"} Restarts: ${sd.restarts}`);
467
+ }
468
+ if (result.workers) {
469
+ const w = result.workers;
470
+ lines.push(`
471
+ Workers (orchestrator pid ${w.orchestratorPid ?? "unknown"} ${w.orchestratorAlive ? "\u2705" : "\u274C"})`);
472
+ for (const worker of w.workers) {
473
+ const icon = worker.declaredStatus === "running" && worker.alive ? "\u{1F7E2}" : worker.stale ? "\u{1F534} STALE" : "\u26AA";
474
+ lines.push(` ${icon} ${worker.name.padEnd(14)} pid=${String(worker.pid ?? "-").padEnd(8)} status=${worker.declaredStatus}`);
475
+ }
476
+ }
477
+ if (result.osProcesses.length > 0) {
478
+ lines.push(`
479
+ OS Processes (${result.osProcesses.length} total)`);
480
+ for (const p of result.osProcesses) {
481
+ lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} ${p.command.substring(0, 55)}`);
482
+ }
483
+ }
484
+ if (result.orphans.length > 0) {
485
+ lines.push(`
486
+ \u26A0\uFE0F ${result.orphans.length} ORPHAN PROCESS(ES):`);
487
+ for (const p of result.orphans) {
488
+ lines.push(` PID ${p.pid} \u2014 not tracked by PID file or workers registry`);
489
+ }
490
+ }
491
+ lines.push("");
492
+ if (result.problems.length === 0) {
493
+ lines.push(" \u2705 All checks passed \u2014 no process issues detected.");
494
+ } else {
495
+ lines.push(` \u26A0\uFE0F ${result.problems.length} issue(s):`);
496
+ for (const p of result.problems) {
497
+ lines.push(` \u2022 ${p}`);
498
+ }
499
+ }
500
+ lines.push("");
501
+ return lines.join("\n");
502
+ }
503
+
504
+ // src/live.ts
248
505
  var C = {
249
506
  reset: "\x1B[0m",
250
507
  bold: "\x1B[1m",
@@ -277,13 +534,13 @@ function parseArgs(argv) {
277
534
  config.recursive = true;
278
535
  i++;
279
536
  } else if (!arg.startsWith("-")) {
280
- config.dirs.push((0, import_node_path.resolve)(arg));
537
+ config.dirs.push((0, import_node_path2.resolve)(arg));
281
538
  i++;
282
539
  } else {
283
540
  i++;
284
541
  }
285
542
  }
286
- if (config.dirs.length === 0) config.dirs.push((0, import_node_path.resolve)("."));
543
+ if (config.dirs.length === 0) config.dirs.push((0, import_node_path2.resolve)("."));
287
544
  return config;
288
545
  }
289
546
  function printUsage() {
@@ -319,7 +576,7 @@ function scanFiles(dirs, recursive) {
319
576
  const seen = /* @__PURE__ */ new Set();
320
577
  function scanDir(d, topLevel) {
321
578
  try {
322
- const dirStat = (0, import_node_fs.statSync)(d);
579
+ const dirStat = (0, import_node_fs2.statSync)(d);
323
580
  const dirMtime = dirStat.mtime.getTime();
324
581
  const cachedMtime = dirMtimeCache.get(d);
325
582
  if (cachedMtime === dirMtime) {
@@ -335,13 +592,13 @@ function scanFiles(dirs, recursive) {
335
592
  }
336
593
  }
337
594
  const dirResults = [];
338
- for (const f of (0, import_node_fs.readdirSync)(d)) {
595
+ for (const f of (0, import_node_fs2.readdirSync)(d)) {
339
596
  if (f.startsWith(".")) continue;
340
- const fp = (0, import_node_path.join)(d, f);
597
+ const fp = (0, import_node_path2.join)(d, f);
341
598
  if (seen.has(fp)) continue;
342
599
  let stat;
343
600
  try {
344
- stat = (0, import_node_fs.statSync)(fp);
601
+ stat = (0, import_node_fs2.statSync)(fp);
345
602
  } catch {
346
603
  continue;
347
604
  }
@@ -373,13 +630,13 @@ function scanFiles(dirs, recursive) {
373
630
  }
374
631
  function safeReadJson(fp) {
375
632
  try {
376
- return JSON.parse((0, import_node_fs.readFileSync)(fp, "utf8"));
633
+ return JSON.parse((0, import_node_fs2.readFileSync)(fp, "utf8"));
377
634
  } catch {
378
635
  return null;
379
636
  }
380
637
  }
381
638
  function nameFromFile(filename) {
382
- return (0, import_node_path.basename)(filename).replace(/\.(json|jsonl)$/, "").replace(/-state$/, "");
639
+ return (0, import_node_path2.basename)(filename).replace(/\.(json|jsonl)$/, "").replace(/-state$/, "");
383
640
  }
384
641
  function normalizeStatus(val) {
385
642
  if (typeof val !== "string") return "unknown";
@@ -515,18 +772,20 @@ function processJsonFile(file) {
515
772
  const w = info;
516
773
  const status2 = findStatus(w);
517
774
  const ts2 = findTimestamp(w) || findTimestamp(obj) || file.mtime;
518
- const pid = w.pid;
775
+ const rawPid = w.pid;
776
+ const pid = typeof rawPid === "number" ? rawPid : Number(rawPid);
777
+ const validPid = Number.isFinite(pid) && pid > 0;
519
778
  let validatedStatus = status2;
520
779
  let pidAlive = true;
521
- if (pid && (status2 === "running" || status2 === "ok")) {
780
+ if (validPid && (status2 === "running" || status2 === "ok")) {
522
781
  try {
523
- (0, import_node_child_process.execSync)(`kill -0 ${pid} 2>/dev/null`, { stdio: "ignore" });
782
+ process.kill(pid, 0);
524
783
  } catch {
525
784
  pidAlive = false;
526
785
  validatedStatus = "error";
527
786
  }
528
787
  }
529
- const pidLabel = pid ? pidAlive ? `pid: ${pid}` : `pid: ${pid} (dead)` : "";
788
+ const pidLabel = validPid ? pidAlive ? `pid: ${pid}` : `pid: ${pid} (dead)` : "";
530
789
  const detail2 = pidLabel || extractDetail(w);
531
790
  records.push({
532
791
  id: name,
@@ -555,7 +814,7 @@ function processJsonFile(file) {
555
814
  }
556
815
  function processJsonlFile(file) {
557
816
  try {
558
- const content = (0, import_node_fs.readFileSync)(file.path, "utf8").trim();
817
+ const content = (0, import_node_fs2.readFileSync)(file.path, "utf8").trim();
559
818
  if (!content) return [];
560
819
  const lines = content.split("\n");
561
820
  const lineCount = lines.length;
@@ -707,6 +966,9 @@ var prevFileCount = 0;
707
966
  var newExecCount = 0;
708
967
  var sessionStart = Date.now();
709
968
  var firstRender = true;
969
+ var cachedAuditConfig = null;
970
+ var cachedAuditResult = null;
971
+ var lastAuditTime = 0;
710
972
  var fileCache = /* @__PURE__ */ new Map();
711
973
  function getRecordsCached(f) {
712
974
  const cached = fileCache.get(f.path);
@@ -826,6 +1088,22 @@ function render(config) {
826
1088
  const level = Math.round(v / maxBucket * 8);
827
1089
  return (failBuckets[i] > 0 ? C.red : C.green) + sparkChars[level] + C.reset;
828
1090
  }).join("");
1091
+ let auditResult = null;
1092
+ if (now - lastAuditTime > 1e4) {
1093
+ if (!cachedAuditConfig) {
1094
+ cachedAuditConfig = discoverProcessConfig(config.dirs);
1095
+ }
1096
+ if (cachedAuditConfig) {
1097
+ try {
1098
+ auditResult = auditProcesses(cachedAuditConfig);
1099
+ cachedAuditResult = auditResult;
1100
+ lastAuditTime = now;
1101
+ } catch {
1102
+ }
1103
+ }
1104
+ } else {
1105
+ auditResult = cachedAuditResult;
1106
+ }
829
1107
  const distributedTraces = [];
830
1108
  if (allTraces.length > 1) {
831
1109
  const traceGroups = groupByTraceId(allTraces);
@@ -906,6 +1184,41 @@ function render(config) {
906
1184
  );
907
1185
  writeLine(L, "");
908
1186
  writeLine(L, ` ${C.bold}Activity (1h)${C.reset} ${spark} ${C.dim}\u2190 now${C.reset}`);
1187
+ if (auditResult) {
1188
+ const ar = auditResult;
1189
+ const healthy = ar.problems.length === 0;
1190
+ const healthIcon = healthy ? `${C.green}\u25CF${C.reset}` : `${C.red}\u25CF${C.reset}`;
1191
+ const healthLabel = healthy ? `${C.green}healthy${C.reset}` : `${C.red}${ar.problems.length} issue(s)${C.reset}`;
1192
+ const workerParts = [];
1193
+ if (ar.workers) {
1194
+ for (const w of ar.workers.workers) {
1195
+ const wIcon = w.declaredStatus === "running" && w.alive ? `${C.green}\u25CF${C.reset}` : w.stale ? `${C.red}\u25CF${C.reset}` : `${C.dim}\u25CB${C.reset}`;
1196
+ workerParts.push(`${wIcon} ${w.name}`);
1197
+ }
1198
+ }
1199
+ let sysdLabel = "";
1200
+ if (ar.systemd) {
1201
+ const si = ar.systemd.activeState === "active" ? `${C.green}\u25CF${C.reset}` : ar.systemd.crashLooping ? `${C.yellow}\u25CF${C.reset}` : ar.systemd.failed ? `${C.red}\u25CF${C.reset}` : `${C.dim}\u25CB${C.reset}`;
1202
+ sysdLabel = ` ${C.bold}Systemd${C.reset} ${si} ${ar.systemd.activeState}`;
1203
+ if (ar.systemd.restarts > 0) sysdLabel += ` ${C.dim}(${ar.systemd.restarts} restarts)${C.reset}`;
1204
+ }
1205
+ let pidLabel = "";
1206
+ if (ar.pidFile?.pid) {
1207
+ const pi = ar.pidFile.alive && ar.pidFile.matchesProcess ? `${C.green}\u25CF${C.reset}` : `${C.red}\u25CF${C.reset}`;
1208
+ pidLabel = ` ${C.bold}PID${C.reset} ${pi} ${ar.pidFile.pid}`;
1209
+ }
1210
+ writeLine(L, "");
1211
+ writeLine(L, ` ${C.bold}${C.under}Process Health${C.reset}`);
1212
+ writeLine(L, ` ${healthIcon} ${healthLabel}${pidLabel}${sysdLabel} ${C.bold}Procs${C.reset} ${C.dim}${ar.osProcesses.length}${C.reset} ${ar.orphans.length > 0 ? `${C.red}Orphans ${ar.orphans.length}${C.reset}` : `${C.dim}Orphans 0${C.reset}`}`);
1213
+ if (workerParts.length > 0) {
1214
+ writeLine(L, ` ${C.dim}Workers${C.reset} ${workerParts.join(" ")}`);
1215
+ }
1216
+ if (!healthy) {
1217
+ for (const p of ar.problems.slice(0, 3)) {
1218
+ writeLine(L, ` ${C.red}\u2022${C.reset} ${C.dim}${p}${C.reset}`);
1219
+ }
1220
+ }
1221
+ }
909
1222
  writeLine(L, "");
910
1223
  writeLine(
911
1224
  L,
@@ -1013,21 +1326,24 @@ function render(config) {
1013
1326
  writeLine(L, ` ${C.dim}Press Ctrl+C to exit${C.reset}`);
1014
1327
  flushLines(L);
1015
1328
  }
1016
- function getDistDepth(dt, spanId) {
1329
+ function getDistDepth(dt, spanId, visited) {
1017
1330
  if (!spanId) return 0;
1331
+ const seen = visited ?? /* @__PURE__ */ new Set();
1332
+ if (seen.has(spanId)) return 0;
1333
+ seen.add(spanId);
1018
1334
  const g = dt.graphs.get(spanId);
1019
1335
  if (!g || !g.parentSpanId) return 0;
1020
- return 1 + getDistDepth(dt, g.parentSpanId);
1336
+ return 1 + getDistDepth(dt, g.parentSpanId, seen);
1021
1337
  }
1022
1338
  function startLive(argv) {
1023
1339
  const config = parseArgs(argv);
1024
- const valid = config.dirs.filter((d) => (0, import_node_fs.existsSync)(d));
1340
+ const valid = config.dirs.filter((d) => (0, import_node_fs2.existsSync)(d));
1025
1341
  if (valid.length === 0) {
1026
1342
  console.error(`No valid directories found: ${config.dirs.join(", ")}`);
1027
1343
  console.error("Specify directories containing JSON/JSONL files: agentflow live <dir> [dir...]");
1028
1344
  process.exit(1);
1029
1345
  }
1030
- const invalid = config.dirs.filter((d) => !(0, import_node_fs.existsSync)(d));
1346
+ const invalid = config.dirs.filter((d) => !(0, import_node_fs2.existsSync)(d));
1031
1347
  if (invalid.length > 0) {
1032
1348
  console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
1033
1349
  }
@@ -1036,7 +1352,7 @@ function startLive(argv) {
1036
1352
  let debounce = null;
1037
1353
  for (const dir of config.dirs) {
1038
1354
  try {
1039
- (0, import_node_fs.watch)(dir, { recursive: config.recursive }, () => {
1355
+ (0, import_node_fs2.watch)(dir, { recursive: config.recursive }, () => {
1040
1356
  if (debounce) clearTimeout(debounce);
1041
1357
  debounce = setTimeout(() => render(config), 500);
1042
1358
  });
@@ -1052,8 +1368,8 @@ function startLive(argv) {
1052
1368
 
1053
1369
  // src/runner.ts
1054
1370
  var import_node_child_process2 = require("child_process");
1055
- var import_node_fs2 = require("fs");
1056
- var import_node_path2 = require("path");
1371
+ var import_node_fs3 = require("fs");
1372
+ var import_node_path3 = require("path");
1057
1373
 
1058
1374
  // src/graph-builder.ts
1059
1375
  var import_crypto = require("crypto");
@@ -1289,12 +1605,12 @@ function globToRegex(pattern) {
1289
1605
  }
1290
1606
  function snapshotDir(dir, patterns) {
1291
1607
  const result = /* @__PURE__ */ new Map();
1292
- if (!(0, import_node_fs2.existsSync)(dir)) return result;
1293
- for (const entry of (0, import_node_fs2.readdirSync)(dir)) {
1608
+ if (!(0, import_node_fs3.existsSync)(dir)) return result;
1609
+ for (const entry of (0, import_node_fs3.readdirSync)(dir)) {
1294
1610
  if (!patterns.some((re) => re.test(entry))) continue;
1295
- const full = (0, import_node_path2.join)(dir, entry);
1611
+ const full = (0, import_node_path3.join)(dir, entry);
1296
1612
  try {
1297
- const stat = (0, import_node_fs2.statSync)(full);
1613
+ const stat = (0, import_node_fs3.statSync)(full);
1298
1614
  if (stat.isFile()) {
1299
1615
  result.set(full, stat.mtimeMs);
1300
1616
  }
@@ -1304,7 +1620,7 @@ function snapshotDir(dir, patterns) {
1304
1620
  return result;
1305
1621
  }
1306
1622
  function agentIdFromFilename(filePath) {
1307
- const base = (0, import_node_path2.basename)(filePath, ".json");
1623
+ const base = (0, import_node_path3.basename)(filePath, ".json");
1308
1624
  const cleaned = base.replace(/-state$/, "");
1309
1625
  return `alfred-${cleaned}`;
1310
1626
  }
@@ -1326,7 +1642,7 @@ async function runTraced(config) {
1326
1642
  if (command.length === 0) {
1327
1643
  throw new Error("runTraced: command must not be empty");
1328
1644
  }
1329
- const resolvedTracesDir = (0, import_node_path2.resolve)(tracesDir);
1645
+ const resolvedTracesDir = (0, import_node_path3.resolve)(tracesDir);
1330
1646
  const patterns = watchPatterns.map(globToRegex);
1331
1647
  const orchestrator = createGraphBuilder({ agentId, trigger });
1332
1648
  const { traceId, spanId } = orchestrator.traceContext;
@@ -1409,15 +1725,19 @@ async function runTraced(config) {
1409
1725
  childBuilder.endNode(childRootId);
1410
1726
  allGraphs.push(childBuilder.build());
1411
1727
  }
1412
- if (!(0, import_node_fs2.existsSync)(resolvedTracesDir)) {
1413
- (0, import_node_fs2.mkdirSync)(resolvedTracesDir, { recursive: true });
1728
+ if (!(0, import_node_fs3.existsSync)(resolvedTracesDir)) {
1729
+ (0, import_node_fs3.mkdirSync)(resolvedTracesDir, { recursive: true });
1414
1730
  }
1415
1731
  const ts = fileTimestamp();
1416
1732
  const tracePaths = [];
1417
1733
  for (const graph of allGraphs) {
1418
1734
  const filename = `${graph.agentId}-${ts}.json`;
1419
- const outPath = (0, import_node_path2.join)(resolvedTracesDir, filename);
1420
- (0, import_node_fs2.writeFileSync)(outPath, JSON.stringify(graphToJson(graph), null, 2), "utf-8");
1735
+ const outPath = (0, import_node_path3.join)(resolvedTracesDir, filename);
1736
+ const resolvedOut = (0, import_node_path3.resolve)(outPath);
1737
+ if (!resolvedOut.startsWith(resolvedTracesDir + "/") && resolvedOut !== resolvedTracesDir) {
1738
+ throw new Error(`Path traversal detected: agentId "${graph.agentId}" escapes traces directory`);
1739
+ }
1740
+ (0, import_node_fs3.writeFileSync)(outPath, JSON.stringify(graphToJson(graph), null, 2), "utf-8");
1421
1741
  tracePaths.push(outPath);
1422
1742
  }
1423
1743
  if (tracePaths.length > 0) {
@@ -1469,6 +1789,11 @@ function createTraceStore(dir) {
1469
1789
  await ensureDir();
1470
1790
  const json = graphToJson(graph);
1471
1791
  const filePath = (0, import_path.join)(dir, `${graph.id}.json`);
1792
+ const resolvedBase = (0, import_path.resolve)(dir);
1793
+ const resolvedPath = (0, import_path.resolve)(filePath);
1794
+ if (!resolvedPath.startsWith(resolvedBase + "/") && resolvedPath !== resolvedBase) {
1795
+ throw new Error(`Path traversal detected: "${graph.id}" escapes base directory`);
1796
+ }
1472
1797
  await (0, import_promises.writeFile)(filePath, JSON.stringify(json, null, 2), "utf-8");
1473
1798
  return filePath;
1474
1799
  },
@@ -1742,11 +2067,11 @@ async function traceShow(argv) {
1742
2067
  let graph = await store.get(graphId);
1743
2068
  if (!graph) {
1744
2069
  const { readFile: readFile2 } = await import("fs/promises");
1745
- const { join: join5 } = await import("path");
2070
+ const { join: join6 } = await import("path");
1746
2071
  const fname = graphId.endsWith(".json") ? graphId : `${graphId}.json`;
1747
2072
  try {
1748
2073
  const { loadGraph: loadGraph2 } = await Promise.resolve().then(() => (init_loader(), loader_exports));
1749
- const content = await readFile2(join5(dir, fname), "utf-8");
2074
+ const content = await readFile2(join6(dir, fname), "utf-8");
1750
2075
  graph = loadGraph2(content);
1751
2076
  } catch {
1752
2077
  }
@@ -1771,11 +2096,11 @@ async function traceTimeline(argv) {
1771
2096
  let graph = await store.get(graphId);
1772
2097
  if (!graph) {
1773
2098
  const { readFile: readFile2 } = await import("fs/promises");
1774
- const { join: join5 } = await import("path");
2099
+ const { join: join6 } = await import("path");
1775
2100
  const fname = graphId.endsWith(".json") ? graphId : `${graphId}.json`;
1776
2101
  try {
1777
2102
  const { loadGraph: loadGraph2 } = await Promise.resolve().then(() => (init_loader(), loader_exports));
1778
- const content = await readFile2(join5(dir, fname), "utf-8");
2103
+ const content = await readFile2(join6(dir, fname), "utf-8");
1779
2104
  graph = loadGraph2(content);
1780
2105
  } catch {
1781
2106
  }
@@ -1856,9 +2181,9 @@ async function handleTrace(argv) {
1856
2181
  }
1857
2182
 
1858
2183
  // src/watch.ts
1859
- var import_node_fs4 = require("fs");
2184
+ var import_node_fs5 = require("fs");
1860
2185
  var import_node_os = require("os");
1861
- var import_node_path3 = require("path");
2186
+ var import_node_path4 = require("path");
1862
2187
 
1863
2188
  // src/watch-alerts.ts
1864
2189
  var import_node_child_process3 = require("child_process");
@@ -1916,7 +2241,7 @@ function sendTelegram(payload, botToken, chatId) {
1916
2241
  text: formatTelegram(payload),
1917
2242
  parse_mode: "Markdown"
1918
2243
  });
1919
- return new Promise((resolve6, reject) => {
2244
+ return new Promise((resolve7, reject) => {
1920
2245
  const req = (0, import_node_https.request)(
1921
2246
  `https://api.telegram.org/bot${botToken}/sendMessage`,
1922
2247
  {
@@ -1925,7 +2250,7 @@ function sendTelegram(payload, botToken, chatId) {
1925
2250
  },
1926
2251
  (res) => {
1927
2252
  res.resume();
1928
- if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve6();
2253
+ if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve7();
1929
2254
  else reject(new Error(`Telegram API returned ${res.statusCode}`));
1930
2255
  }
1931
2256
  );
@@ -1938,7 +2263,7 @@ function sendWebhook(payload, url) {
1938
2263
  const body = JSON.stringify(payload);
1939
2264
  const isHttps = url.startsWith("https");
1940
2265
  const doRequest = isHttps ? import_node_https.request : import_node_http.request;
1941
- return new Promise((resolve6, reject) => {
2266
+ return new Promise((resolve7, reject) => {
1942
2267
  const req = doRequest(
1943
2268
  url,
1944
2269
  {
@@ -1947,7 +2272,7 @@ function sendWebhook(payload, url) {
1947
2272
  },
1948
2273
  (res) => {
1949
2274
  res.resume();
1950
- if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve6();
2275
+ if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve7();
1951
2276
  else reject(new Error(`Webhook returned ${res.statusCode}`));
1952
2277
  }
1953
2278
  );
@@ -1960,7 +2285,7 @@ function sendWebhook(payload, url) {
1960
2285
  });
1961
2286
  }
1962
2287
  function sendCommand(payload, cmd) {
1963
- return new Promise((resolve6, reject) => {
2288
+ return new Promise((resolve7, reject) => {
1964
2289
  const env = {
1965
2290
  ...process.env,
1966
2291
  AGENTFLOW_ALERT_AGENT: payload.agentId,
@@ -1973,13 +2298,13 @@ function sendCommand(payload, cmd) {
1973
2298
  };
1974
2299
  (0, import_node_child_process3.exec)(cmd, { env, timeout: 3e4 }, (err) => {
1975
2300
  if (err) reject(err);
1976
- else resolve6();
2301
+ else resolve7();
1977
2302
  });
1978
2303
  });
1979
2304
  }
1980
2305
 
1981
2306
  // src/watch-state.ts
1982
- var import_node_fs3 = require("fs");
2307
+ var import_node_fs4 = require("fs");
1983
2308
  function parseDuration(input) {
1984
2309
  const match = input.match(/^(\d+(?:\.\d+)?)\s*(s|m|h|d)$/i);
1985
2310
  if (!match) {
@@ -2004,9 +2329,9 @@ function emptyState() {
2004
2329
  return { version: 1, agents: {}, lastPollTime: 0 };
2005
2330
  }
2006
2331
  function loadWatchState(filePath) {
2007
- if (!(0, import_node_fs3.existsSync)(filePath)) return emptyState();
2332
+ if (!(0, import_node_fs4.existsSync)(filePath)) return emptyState();
2008
2333
  try {
2009
- const raw = JSON.parse((0, import_node_fs3.readFileSync)(filePath, "utf8"));
2334
+ const raw = JSON.parse((0, import_node_fs4.readFileSync)(filePath, "utf8"));
2010
2335
  if (raw.version !== 1 || typeof raw.agents !== "object") return emptyState();
2011
2336
  return raw;
2012
2337
  } catch {
@@ -2016,11 +2341,11 @@ function loadWatchState(filePath) {
2016
2341
  function saveWatchState(filePath, state) {
2017
2342
  const tmp = filePath + ".tmp";
2018
2343
  try {
2019
- (0, import_node_fs3.writeFileSync)(tmp, JSON.stringify(state, null, 2), "utf8");
2020
- (0, import_node_fs3.renameSync)(tmp, filePath);
2344
+ (0, import_node_fs4.writeFileSync)(tmp, JSON.stringify(state, null, 2), "utf8");
2345
+ (0, import_node_fs4.renameSync)(tmp, filePath);
2021
2346
  } catch {
2022
2347
  try {
2023
- (0, import_node_fs3.writeFileSync)(filePath, JSON.stringify(state, null, 2), "utf8");
2348
+ (0, import_node_fs4.writeFileSync)(filePath, JSON.stringify(state, null, 2), "utf8");
2024
2349
  } catch {
2025
2350
  }
2026
2351
  }
@@ -2248,20 +2573,20 @@ function parseWatchArgs(argv) {
2248
2573
  recursive = true;
2249
2574
  i++;
2250
2575
  } else if (!arg.startsWith("-")) {
2251
- dirs.push((0, import_node_path3.resolve)(arg));
2576
+ dirs.push((0, import_node_path4.resolve)(arg));
2252
2577
  i++;
2253
2578
  } else {
2254
2579
  i++;
2255
2580
  }
2256
2581
  }
2257
- if (dirs.length === 0) dirs.push((0, import_node_path3.resolve)("."));
2582
+ if (dirs.length === 0) dirs.push((0, import_node_path4.resolve)("."));
2258
2583
  if (alertConditions.length === 0) {
2259
2584
  alertConditions.push({ type: "error" });
2260
2585
  alertConditions.push({ type: "recovery" });
2261
2586
  }
2262
2587
  notifyChannels.unshift({ type: "stdout" });
2263
2588
  if (!stateFilePath) {
2264
- stateFilePath = (0, import_node_path3.join)(dirs[0], ".agentflow-watch-state.json");
2589
+ stateFilePath = (0, import_node_path4.join)(dirs[0], ".agentflow-watch-state.json");
2265
2590
  }
2266
2591
  return {
2267
2592
  dirs,
@@ -2269,7 +2594,7 @@ function parseWatchArgs(argv) {
2269
2594
  pollIntervalMs,
2270
2595
  alertConditions,
2271
2596
  notifyChannels,
2272
- stateFilePath: (0, import_node_path3.resolve)(stateFilePath),
2597
+ stateFilePath: (0, import_node_path4.resolve)(stateFilePath),
2273
2598
  cooldownMs
2274
2599
  };
2275
2600
  }
@@ -2323,12 +2648,12 @@ Examples:
2323
2648
  }
2324
2649
  function startWatch(argv) {
2325
2650
  const config = parseWatchArgs(argv);
2326
- const valid = config.dirs.filter((d) => (0, import_node_fs4.existsSync)(d));
2651
+ const valid = config.dirs.filter((d) => (0, import_node_fs5.existsSync)(d));
2327
2652
  if (valid.length === 0) {
2328
2653
  console.error(`No valid directories found: ${config.dirs.join(", ")}`);
2329
2654
  process.exit(1);
2330
2655
  }
2331
- const invalid = config.dirs.filter((d) => !(0, import_node_fs4.existsSync)(d));
2656
+ const invalid = config.dirs.filter((d) => !(0, import_node_fs5.existsSync)(d));
2332
2657
  if (invalid.length > 0) {
2333
2658
  console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
2334
2659
  }
@@ -2420,6 +2745,7 @@ Commands:
2420
2745
  live [dir...] [options] Real-time terminal monitor (auto-detects any JSON/JSONL)
2421
2746
  watch [dir...] [options] Headless alert system \u2014 detects failures, sends notifications
2422
2747
  trace <command> [options] Inspect saved execution traces (list, show, timeline, stuck, loops)
2748
+ audit [options] Audit OS processes \u2014 detect stale PIDs, orphans, systemd issues
2423
2749
 
2424
2750
  Run \`agentflow <command> --help\` for command-specific options.
2425
2751
 
@@ -2571,9 +2897,102 @@ async function runCommand(argv) {
2571
2897
  process.exit(1);
2572
2898
  }
2573
2899
  }
2900
+ function parseAuditArgs(argv) {
2901
+ let processName = "";
2902
+ let pidFile;
2903
+ let workersFile;
2904
+ let systemdUnit;
2905
+ const discoverDirs = [];
2906
+ const args = argv.slice(0);
2907
+ if (args[0] === "audit") args.shift();
2908
+ let i = 0;
2909
+ while (i < args.length) {
2910
+ const arg = args[i];
2911
+ if (arg === "--help" || arg === "-h") {
2912
+ printAuditUsage();
2913
+ process.exit(0);
2914
+ } else if (arg === "--process" || arg === "-p") {
2915
+ i++;
2916
+ processName = args[i] ?? "";
2917
+ i++;
2918
+ } else if (arg === "--pid-file") {
2919
+ i++;
2920
+ pidFile = args[i];
2921
+ i++;
2922
+ } else if (arg === "--workers-file") {
2923
+ i++;
2924
+ workersFile = args[i];
2925
+ i++;
2926
+ } else if (arg === "--systemd") {
2927
+ i++;
2928
+ systemdUnit = args[i];
2929
+ i++;
2930
+ } else if (arg === "--no-systemd") {
2931
+ systemdUnit = null;
2932
+ i++;
2933
+ } else if (!arg.startsWith("-")) {
2934
+ discoverDirs.push((0, import_path3.resolve)(arg));
2935
+ i++;
2936
+ } else {
2937
+ i++;
2938
+ }
2939
+ }
2940
+ if (!processName && !pidFile && !workersFile && discoverDirs.length > 0) {
2941
+ const discovered = discoverProcessConfig(discoverDirs);
2942
+ if (discovered) {
2943
+ console.log(`Auto-discovered: process="${discovered.processName}"${discovered.pidFile ? ` pid-file=${discovered.pidFile}` : ""}${discovered.workersFile ? ` workers=${discovered.workersFile}` : ""}`);
2944
+ return { ...discovered, systemdUnit };
2945
+ }
2946
+ }
2947
+ if (!processName) {
2948
+ console.error("Error: --process <name> is required, or provide directories for auto-discovery.");
2949
+ console.error("Examples:");
2950
+ console.error(" agentflow audit --process alfred --pid-file ./data/alfred.pid");
2951
+ console.error(" agentflow audit ./data # auto-discovers *.pid and workers.json");
2952
+ process.exit(1);
2953
+ }
2954
+ return { processName, pidFile, workersFile, systemdUnit };
2955
+ }
2956
+ function printAuditUsage() {
2957
+ console.log(
2958
+ `
2959
+ AgentFlow Audit \u2014 OS-level process health check for agent systems.
2960
+
2961
+ Detects stale PID files, orphan processes, systemd crash loops, and
2962
+ mismatches between declared state and actual OS process state.
2963
+
2964
+ Usage:
2965
+ agentflow audit [dir...] [options]
2966
+ agentflow audit --process <name> [options]
2967
+
2968
+ Arguments:
2969
+ dir Directories to scan for auto-discovery of *.pid and workers.json
2970
+
2971
+ Options:
2972
+ -p, --process <name> Process name to search for (e.g. "alfred", "myagent")
2973
+ --pid-file <path> Path to PID file
2974
+ --workers-file <path> Path to workers.json or process registry
2975
+ --systemd <unit> Systemd user unit name (e.g. "alfred.service")
2976
+ --no-systemd Skip systemd checks
2977
+ -h, --help Show this help message
2978
+
2979
+ Examples:
2980
+ agentflow audit ./data # auto-discover from data directory
2981
+ agentflow audit --process alfred --systemd alfred.service
2982
+ agentflow audit --process myagent --pid-file /var/run/myagent.pid --workers-file ./workers.json
2983
+ agentflow audit --process crewai --no-systemd
2984
+ `.trim()
2985
+ );
2986
+ }
2987
+ function runAudit(argv) {
2988
+ const config = parseAuditArgs(argv);
2989
+ const result = auditProcesses(config);
2990
+ console.log(formatAuditReport(result));
2991
+ process.exit(result.problems.length > 0 ? 1 : 0);
2992
+ }
2574
2993
  async function main() {
2575
2994
  const argv = process.argv.slice(2);
2576
- const knownCommands = ["run", "live", "watch", "trace"];
2995
+ const knownCommands = ["run", "live", "watch", "trace", "audit"];
2577
2996
  if (argv.length === 0 || !knownCommands.includes(argv[0]) && (argv.includes("--help") || argv.includes("-h"))) {
2578
2997
  printHelp();
2579
2998
  process.exit(0);
@@ -2592,6 +3011,9 @@ async function main() {
2592
3011
  case "trace":
2593
3012
  await handleTrace(argv);
2594
3013
  break;
3014
+ case "audit":
3015
+ runAudit(argv);
3016
+ break;
2595
3017
  default:
2596
3018
  if (!subcommand?.startsWith("-")) {
2597
3019
  startLive(["live", ...argv]);