agentflow-core 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -639,8 +639,8 @@ function withGuards(builder, config) {
639
639
  }
640
640
 
641
641
  // src/live.ts
642
- var import_node_fs = require("fs");
643
- var import_node_path = require("path");
642
+ var import_node_fs2 = require("fs");
643
+ var import_node_path2 = require("path");
644
644
 
645
645
  // src/loader.ts
646
646
  function toNodesMap(raw) {
@@ -694,6 +694,262 @@ function graphToJson(graph) {
694
694
  };
695
695
  }
696
696
 
697
+ // src/process-audit.ts
698
+ var import_node_child_process = require("child_process");
699
+ var import_node_fs = require("fs");
700
+ var import_node_path = require("path");
701
+ function isPidAlive(pid) {
702
+ try {
703
+ process.kill(pid, 0);
704
+ return true;
705
+ } catch {
706
+ return false;
707
+ }
708
+ }
709
+ function pidMatchesName(pid, name) {
710
+ try {
711
+ const cmdline = (0, import_node_fs.readFileSync)(`/proc/${pid}/cmdline`, "utf8");
712
+ return cmdline.includes(name);
713
+ } catch {
714
+ return false;
715
+ }
716
+ }
717
+ function readPidFile(path) {
718
+ try {
719
+ const pid = parseInt((0, import_node_fs.readFileSync)(path, "utf8").trim(), 10);
720
+ return isNaN(pid) ? null : pid;
721
+ } catch {
722
+ return null;
723
+ }
724
+ }
725
+ function auditPidFile(config) {
726
+ if (!config.pidFile) return null;
727
+ const pid = readPidFile(config.pidFile);
728
+ if (pid === null) {
729
+ return {
730
+ path: config.pidFile,
731
+ pid: null,
732
+ alive: false,
733
+ matchesProcess: false,
734
+ stale: !(0, import_node_fs.existsSync)(config.pidFile),
735
+ reason: (0, import_node_fs.existsSync)(config.pidFile) ? "PID file exists but content is invalid" : "No PID file found"
736
+ };
737
+ }
738
+ const alive = isPidAlive(pid);
739
+ const matchesProcess = alive ? pidMatchesName(pid, config.processName) : false;
740
+ const stale = !alive || alive && !matchesProcess;
741
+ let reason;
742
+ if (alive && matchesProcess) {
743
+ reason = `PID ${pid} alive and matches ${config.processName}`;
744
+ } else if (alive && !matchesProcess) {
745
+ reason = `PID ${pid} alive but is NOT ${config.processName} (PID reused by another process)`;
746
+ } else {
747
+ reason = `PID ${pid} no longer exists`;
748
+ }
749
+ return { path: config.pidFile, pid, alive, matchesProcess, stale, reason };
750
+ }
751
+ function auditSystemd(config) {
752
+ if (config.systemdUnit === null || config.systemdUnit === void 0) return null;
753
+ const unit = config.systemdUnit;
754
+ try {
755
+ const raw = (0, import_node_child_process.execSync)(
756
+ `systemctl --user show ${unit} --property=ActiveState,SubState,MainPID,NRestarts,Result --no-pager 2>/dev/null`,
757
+ { encoding: "utf8", timeout: 5e3 }
758
+ );
759
+ const props = {};
760
+ for (const line of raw.trim().split("\n")) {
761
+ const [k, ...v] = line.split("=");
762
+ if (k) props[k.trim()] = v.join("=").trim();
763
+ }
764
+ const activeState = props["ActiveState"] ?? "unknown";
765
+ const subState = props["SubState"] ?? "unknown";
766
+ const mainPid = parseInt(props["MainPID"] ?? "0", 10);
767
+ const restarts = parseInt(props["NRestarts"] ?? "0", 10);
768
+ const result = props["Result"] ?? "unknown";
769
+ return {
770
+ unit,
771
+ activeState,
772
+ subState,
773
+ mainPid,
774
+ restarts,
775
+ result,
776
+ crashLooping: activeState === "activating" && subState === "auto-restart",
777
+ failed: activeState === "failed"
778
+ };
779
+ } catch {
780
+ return null;
781
+ }
782
+ }
783
+ function auditWorkers(config) {
784
+ if (!config.workersFile || !(0, import_node_fs.existsSync)(config.workersFile)) return null;
785
+ try {
786
+ const data = JSON.parse((0, import_node_fs.readFileSync)(config.workersFile, "utf8"));
787
+ const orchPid = data.pid ?? null;
788
+ const orchAlive = orchPid ? isPidAlive(orchPid) : false;
789
+ const workers = [];
790
+ for (const [name, info] of Object.entries(data.tools ?? {})) {
791
+ const w = info;
792
+ const wPid = w.pid ?? null;
793
+ const wAlive = wPid ? isPidAlive(wPid) : false;
794
+ workers.push({
795
+ name,
796
+ pid: wPid,
797
+ declaredStatus: w.status ?? "unknown",
798
+ alive: wAlive,
799
+ stale: w.status === "running" && !wAlive
800
+ });
801
+ }
802
+ return {
803
+ orchestratorPid: orchPid,
804
+ orchestratorAlive: orchAlive,
805
+ startedAt: data.started_at ?? "",
806
+ workers
807
+ };
808
+ } catch {
809
+ return null;
810
+ }
811
+ }
812
+ function getOsProcesses(processName) {
813
+ try {
814
+ const raw = (0, import_node_child_process.execSync)(`ps aux`, { encoding: "utf8", timeout: 5e3 });
815
+ return raw.split("\n").filter((line) => line.includes(processName) && !line.includes("process-audit") && !line.includes("grep")).map((line) => {
816
+ const parts = line.trim().split(/\s+/);
817
+ return {
818
+ pid: parseInt(parts[1] ?? "0", 10),
819
+ cpu: parts[2] ?? "0",
820
+ mem: parts[3] ?? "0",
821
+ command: parts.slice(10).join(" ")
822
+ };
823
+ }).filter((p) => !isNaN(p.pid) && p.pid > 0);
824
+ } catch {
825
+ return [];
826
+ }
827
+ }
828
+ function discoverProcessConfig(dirs) {
829
+ let pidFile;
830
+ let workersFile;
831
+ let processName = "";
832
+ for (const dir of dirs) {
833
+ if (!(0, import_node_fs.existsSync)(dir)) continue;
834
+ let entries;
835
+ try {
836
+ entries = (0, import_node_fs.readdirSync)(dir);
837
+ } catch {
838
+ continue;
839
+ }
840
+ for (const f of entries) {
841
+ const fp = (0, import_node_path.join)(dir, f);
842
+ try {
843
+ if (!(0, import_node_fs.statSync)(fp).isFile()) continue;
844
+ } catch {
845
+ continue;
846
+ }
847
+ if (f.endsWith(".pid") && !pidFile) {
848
+ pidFile = fp;
849
+ if (!processName) {
850
+ processName = (0, import_node_path.basename)(f, ".pid");
851
+ }
852
+ }
853
+ if ((f === "workers.json" || f.endsWith("-workers.json")) && !workersFile) {
854
+ workersFile = fp;
855
+ if (!processName && f !== "workers.json") {
856
+ processName = (0, import_node_path.basename)(f, "-workers.json");
857
+ }
858
+ }
859
+ }
860
+ }
861
+ if (!processName && !pidFile && !workersFile) return null;
862
+ if (!processName) processName = "agent";
863
+ return { processName, pidFile, workersFile };
864
+ }
865
+ function auditProcesses(config) {
866
+ const pidFile = auditPidFile(config);
867
+ const systemd = auditSystemd(config);
868
+ const workers = auditWorkers(config);
869
+ const osProcesses = getOsProcesses(config.processName);
870
+ const knownPids = /* @__PURE__ */ new Set();
871
+ if (pidFile?.pid && !pidFile.stale) knownPids.add(pidFile.pid);
872
+ if (workers) {
873
+ if (workers.orchestratorPid) knownPids.add(workers.orchestratorPid);
874
+ for (const w of workers.workers) {
875
+ if (w.pid) knownPids.add(w.pid);
876
+ }
877
+ }
878
+ if (systemd?.mainPid) knownPids.add(systemd.mainPid);
879
+ const orphans = osProcesses.filter((p) => !knownPids.has(p.pid));
880
+ const problems = [];
881
+ if (pidFile?.stale) problems.push(`Stale PID file: ${pidFile.reason}`);
882
+ if (systemd?.crashLooping) problems.push("Systemd unit is crash-looping (auto-restart)");
883
+ if (systemd?.failed) problems.push("Systemd unit has failed");
884
+ if (systemd && systemd.restarts > 10) problems.push(`High systemd restart count: ${systemd.restarts}`);
885
+ if (pidFile?.pid && systemd?.mainPid && pidFile.pid !== systemd.mainPid) {
886
+ problems.push(`PID mismatch: file says ${pidFile.pid}, systemd says ${systemd.mainPid}`);
887
+ }
888
+ if (workers) {
889
+ for (const w of workers.workers) {
890
+ if (w.stale) problems.push(`Worker "${w.name}" (pid ${w.pid}) declares running but is dead`);
891
+ }
892
+ }
893
+ if (orphans.length > 0) problems.push(`${orphans.length} orphan process(es) not tracked by PID file or workers registry`);
894
+ return { pidFile, systemd, workers, osProcesses, orphans, problems };
895
+ }
896
+ function formatAuditReport(result) {
897
+ const lines = [];
898
+ lines.push("");
899
+ lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
900
+ lines.push("\u2551 \u{1F50D} P R O C E S S A U D I T \u2551");
901
+ lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
902
+ if (result.pidFile) {
903
+ const pf = result.pidFile;
904
+ const icon = pf.pid && pf.alive && pf.matchesProcess ? "\u2705" : pf.stale ? "\u26A0\uFE0F " : "\u2139\uFE0F ";
905
+ lines.push(`
906
+ PID File: ${pf.path}`);
907
+ lines.push(` ${icon} ${pf.reason}`);
908
+ }
909
+ if (result.systemd) {
910
+ const sd = result.systemd;
911
+ const icon = sd.activeState === "active" ? "\u{1F7E2}" : sd.crashLooping ? "\u{1F7E1}" : sd.failed ? "\u{1F534}" : "\u26AA";
912
+ lines.push(`
913
+ Systemd: ${sd.unit}`);
914
+ lines.push(` ${icon} State: ${sd.activeState} (${sd.subState}) Result: ${sd.result}`);
915
+ lines.push(` Main PID: ${sd.mainPid || "none"} Restarts: ${sd.restarts}`);
916
+ }
917
+ if (result.workers) {
918
+ const w = result.workers;
919
+ lines.push(`
920
+ Workers (orchestrator pid ${w.orchestratorPid ?? "unknown"} ${w.orchestratorAlive ? "\u2705" : "\u274C"})`);
921
+ for (const worker of w.workers) {
922
+ const icon = worker.declaredStatus === "running" && worker.alive ? "\u{1F7E2}" : worker.stale ? "\u{1F534} STALE" : "\u26AA";
923
+ lines.push(` ${icon} ${worker.name.padEnd(14)} pid=${String(worker.pid ?? "-").padEnd(8)} status=${worker.declaredStatus}`);
924
+ }
925
+ }
926
+ if (result.osProcesses.length > 0) {
927
+ lines.push(`
928
+ OS Processes (${result.osProcesses.length} total)`);
929
+ for (const p of result.osProcesses) {
930
+ lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} ${p.command.substring(0, 55)}`);
931
+ }
932
+ }
933
+ if (result.orphans.length > 0) {
934
+ lines.push(`
935
+ \u26A0\uFE0F ${result.orphans.length} ORPHAN PROCESS(ES):`);
936
+ for (const p of result.orphans) {
937
+ lines.push(` PID ${p.pid} \u2014 not tracked by PID file or workers registry`);
938
+ }
939
+ }
940
+ lines.push("");
941
+ if (result.problems.length === 0) {
942
+ lines.push(" \u2705 All checks passed \u2014 no process issues detected.");
943
+ } else {
944
+ lines.push(` \u26A0\uFE0F ${result.problems.length} issue(s):`);
945
+ for (const p of result.problems) {
946
+ lines.push(` \u2022 ${p}`);
947
+ }
948
+ }
949
+ lines.push("");
950
+ return lines.join("\n");
951
+ }
952
+
697
953
  // src/live.ts
698
954
  var C = {
699
955
  reset: "\x1B[0m",
@@ -727,13 +983,13 @@ function parseArgs(argv) {
727
983
  config.recursive = true;
728
984
  i++;
729
985
  } else if (!arg.startsWith("-")) {
730
- config.dirs.push((0, import_node_path.resolve)(arg));
986
+ config.dirs.push((0, import_node_path2.resolve)(arg));
731
987
  i++;
732
988
  } else {
733
989
  i++;
734
990
  }
735
991
  }
736
- if (config.dirs.length === 0) config.dirs.push((0, import_node_path.resolve)("."));
992
+ if (config.dirs.length === 0) config.dirs.push((0, import_node_path2.resolve)("."));
737
993
  return config;
738
994
  }
739
995
  function printUsage() {
@@ -769,7 +1025,7 @@ function scanFiles(dirs, recursive) {
769
1025
  const seen = /* @__PURE__ */ new Set();
770
1026
  function scanDir(d, topLevel) {
771
1027
  try {
772
- const dirStat = (0, import_node_fs.statSync)(d);
1028
+ const dirStat = (0, import_node_fs2.statSync)(d);
773
1029
  const dirMtime = dirStat.mtime.getTime();
774
1030
  const cachedMtime = dirMtimeCache.get(d);
775
1031
  if (cachedMtime === dirMtime) {
@@ -785,13 +1041,13 @@ function scanFiles(dirs, recursive) {
785
1041
  }
786
1042
  }
787
1043
  const dirResults = [];
788
- for (const f of (0, import_node_fs.readdirSync)(d)) {
1044
+ for (const f of (0, import_node_fs2.readdirSync)(d)) {
789
1045
  if (f.startsWith(".")) continue;
790
- const fp = (0, import_node_path.join)(d, f);
1046
+ const fp = (0, import_node_path2.join)(d, f);
791
1047
  if (seen.has(fp)) continue;
792
1048
  let stat;
793
1049
  try {
794
- stat = (0, import_node_fs.statSync)(fp);
1050
+ stat = (0, import_node_fs2.statSync)(fp);
795
1051
  } catch {
796
1052
  continue;
797
1053
  }
@@ -823,13 +1079,13 @@ function scanFiles(dirs, recursive) {
823
1079
  }
824
1080
  function safeReadJson(fp) {
825
1081
  try {
826
- return JSON.parse((0, import_node_fs.readFileSync)(fp, "utf8"));
1082
+ return JSON.parse((0, import_node_fs2.readFileSync)(fp, "utf8"));
827
1083
  } catch {
828
1084
  return null;
829
1085
  }
830
1086
  }
831
1087
  function nameFromFile(filename) {
832
- return (0, import_node_path.basename)(filename).replace(/\.(json|jsonl)$/, "").replace(/-state$/, "");
1088
+ return (0, import_node_path2.basename)(filename).replace(/\.(json|jsonl)$/, "").replace(/-state$/, "");
833
1089
  }
834
1090
  function normalizeStatus(val) {
835
1091
  if (typeof val !== "string") return "unknown";
@@ -1007,7 +1263,7 @@ function processJsonFile(file) {
1007
1263
  }
1008
1264
  function processJsonlFile(file) {
1009
1265
  try {
1010
- const content = (0, import_node_fs.readFileSync)(file.path, "utf8").trim();
1266
+ const content = (0, import_node_fs2.readFileSync)(file.path, "utf8").trim();
1011
1267
  if (!content) return [];
1012
1268
  const lines = content.split("\n");
1013
1269
  const lineCount = lines.length;
@@ -1159,6 +1415,9 @@ var prevFileCount = 0;
1159
1415
  var newExecCount = 0;
1160
1416
  var sessionStart = Date.now();
1161
1417
  var firstRender = true;
1418
+ var cachedAuditConfig = null;
1419
+ var cachedAuditResult = null;
1420
+ var lastAuditTime = 0;
1162
1421
  var fileCache = /* @__PURE__ */ new Map();
1163
1422
  function getRecordsCached(f) {
1164
1423
  const cached = fileCache.get(f.path);
@@ -1278,6 +1537,22 @@ function render(config) {
1278
1537
  const level = Math.round(v / maxBucket * 8);
1279
1538
  return (failBuckets[i] > 0 ? C.red : C.green) + sparkChars[level] + C.reset;
1280
1539
  }).join("");
1540
+ let auditResult = null;
1541
+ if (now - lastAuditTime > 1e4) {
1542
+ if (!cachedAuditConfig) {
1543
+ cachedAuditConfig = discoverProcessConfig(config.dirs);
1544
+ }
1545
+ if (cachedAuditConfig) {
1546
+ try {
1547
+ auditResult = auditProcesses(cachedAuditConfig);
1548
+ cachedAuditResult = auditResult;
1549
+ lastAuditTime = now;
1550
+ } catch {
1551
+ }
1552
+ }
1553
+ } else {
1554
+ auditResult = cachedAuditResult;
1555
+ }
1281
1556
  const distributedTraces = [];
1282
1557
  if (allTraces.length > 1) {
1283
1558
  const traceGroups = groupByTraceId(allTraces);
@@ -1358,6 +1633,41 @@ function render(config) {
1358
1633
  );
1359
1634
  writeLine(L, "");
1360
1635
  writeLine(L, ` ${C.bold}Activity (1h)${C.reset} ${spark} ${C.dim}\u2190 now${C.reset}`);
1636
+ if (auditResult) {
1637
+ const ar = auditResult;
1638
+ const healthy = ar.problems.length === 0;
1639
+ const healthIcon = healthy ? `${C.green}\u25CF${C.reset}` : `${C.red}\u25CF${C.reset}`;
1640
+ const healthLabel = healthy ? `${C.green}healthy${C.reset}` : `${C.red}${ar.problems.length} issue(s)${C.reset}`;
1641
+ const workerParts = [];
1642
+ if (ar.workers) {
1643
+ for (const w of ar.workers.workers) {
1644
+ const wIcon = w.declaredStatus === "running" && w.alive ? `${C.green}\u25CF${C.reset}` : w.stale ? `${C.red}\u25CF${C.reset}` : `${C.dim}\u25CB${C.reset}`;
1645
+ workerParts.push(`${wIcon} ${w.name}`);
1646
+ }
1647
+ }
1648
+ let sysdLabel = "";
1649
+ if (ar.systemd) {
1650
+ const si = ar.systemd.activeState === "active" ? `${C.green}\u25CF${C.reset}` : ar.systemd.crashLooping ? `${C.yellow}\u25CF${C.reset}` : ar.systemd.failed ? `${C.red}\u25CF${C.reset}` : `${C.dim}\u25CB${C.reset}`;
1651
+ sysdLabel = ` ${C.bold}Systemd${C.reset} ${si} ${ar.systemd.activeState}`;
1652
+ if (ar.systemd.restarts > 0) sysdLabel += ` ${C.dim}(${ar.systemd.restarts} restarts)${C.reset}`;
1653
+ }
1654
+ let pidLabel = "";
1655
+ if (ar.pidFile?.pid) {
1656
+ const pi = ar.pidFile.alive && ar.pidFile.matchesProcess ? `${C.green}\u25CF${C.reset}` : `${C.red}\u25CF${C.reset}`;
1657
+ pidLabel = ` ${C.bold}PID${C.reset} ${pi} ${ar.pidFile.pid}`;
1658
+ }
1659
+ writeLine(L, "");
1660
+ writeLine(L, ` ${C.bold}${C.under}Process Health${C.reset}`);
1661
+ writeLine(L, ` ${healthIcon} ${healthLabel}${pidLabel}${sysdLabel} ${C.bold}Procs${C.reset} ${C.dim}${ar.osProcesses.length}${C.reset} ${ar.orphans.length > 0 ? `${C.red}Orphans ${ar.orphans.length}${C.reset}` : `${C.dim}Orphans 0${C.reset}`}`);
1662
+ if (workerParts.length > 0) {
1663
+ writeLine(L, ` ${C.dim}Workers${C.reset} ${workerParts.join(" ")}`);
1664
+ }
1665
+ if (!healthy) {
1666
+ for (const p of ar.problems.slice(0, 3)) {
1667
+ writeLine(L, ` ${C.red}\u2022${C.reset} ${C.dim}${p}${C.reset}`);
1668
+ }
1669
+ }
1670
+ }
1361
1671
  writeLine(L, "");
1362
1672
  writeLine(
1363
1673
  L,
@@ -1476,13 +1786,13 @@ function getDistDepth(dt, spanId, visited) {
1476
1786
  }
1477
1787
  function startLive(argv) {
1478
1788
  const config = parseArgs(argv);
1479
- const valid = config.dirs.filter((d) => (0, import_node_fs.existsSync)(d));
1789
+ const valid = config.dirs.filter((d) => (0, import_node_fs2.existsSync)(d));
1480
1790
  if (valid.length === 0) {
1481
1791
  console.error(`No valid directories found: ${config.dirs.join(", ")}`);
1482
1792
  console.error("Specify directories containing JSON/JSONL files: agentflow live <dir> [dir...]");
1483
1793
  process.exit(1);
1484
1794
  }
1485
- const invalid = config.dirs.filter((d) => !(0, import_node_fs.existsSync)(d));
1795
+ const invalid = config.dirs.filter((d) => !(0, import_node_fs2.existsSync)(d));
1486
1796
  if (invalid.length > 0) {
1487
1797
  console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
1488
1798
  }
@@ -1491,7 +1801,7 @@ function startLive(argv) {
1491
1801
  let debounce = null;
1492
1802
  for (const dir of config.dirs) {
1493
1803
  try {
1494
- (0, import_node_fs.watch)(dir, { recursive: config.recursive }, () => {
1804
+ (0, import_node_fs2.watch)(dir, { recursive: config.recursive }, () => {
1495
1805
  if (debounce) clearTimeout(debounce);
1496
1806
  debounce = setTimeout(() => render(config), 500);
1497
1807
  });
@@ -1505,262 +1815,6 @@ function startLive(argv) {
1505
1815
  });
1506
1816
  }
1507
1817
 
1508
- // src/process-audit.ts
1509
- var import_node_child_process = require("child_process");
1510
- var import_node_fs2 = require("fs");
1511
- var import_node_path2 = require("path");
1512
- function isPidAlive(pid) {
1513
- try {
1514
- process.kill(pid, 0);
1515
- return true;
1516
- } catch {
1517
- return false;
1518
- }
1519
- }
1520
- function pidMatchesName(pid, name) {
1521
- try {
1522
- const cmdline = (0, import_node_fs2.readFileSync)(`/proc/${pid}/cmdline`, "utf8");
1523
- return cmdline.includes(name);
1524
- } catch {
1525
- return false;
1526
- }
1527
- }
1528
- function readPidFile(path) {
1529
- try {
1530
- const pid = parseInt((0, import_node_fs2.readFileSync)(path, "utf8").trim(), 10);
1531
- return isNaN(pid) ? null : pid;
1532
- } catch {
1533
- return null;
1534
- }
1535
- }
1536
- function auditPidFile(config) {
1537
- if (!config.pidFile) return null;
1538
- const pid = readPidFile(config.pidFile);
1539
- if (pid === null) {
1540
- return {
1541
- path: config.pidFile,
1542
- pid: null,
1543
- alive: false,
1544
- matchesProcess: false,
1545
- stale: !(0, import_node_fs2.existsSync)(config.pidFile),
1546
- reason: (0, import_node_fs2.existsSync)(config.pidFile) ? "PID file exists but content is invalid" : "No PID file found"
1547
- };
1548
- }
1549
- const alive = isPidAlive(pid);
1550
- const matchesProcess = alive ? pidMatchesName(pid, config.processName) : false;
1551
- const stale = !alive || alive && !matchesProcess;
1552
- let reason;
1553
- if (alive && matchesProcess) {
1554
- reason = `PID ${pid} alive and matches ${config.processName}`;
1555
- } else if (alive && !matchesProcess) {
1556
- reason = `PID ${pid} alive but is NOT ${config.processName} (PID reused by another process)`;
1557
- } else {
1558
- reason = `PID ${pid} no longer exists`;
1559
- }
1560
- return { path: config.pidFile, pid, alive, matchesProcess, stale, reason };
1561
- }
1562
- function auditSystemd(config) {
1563
- if (config.systemdUnit === null || config.systemdUnit === void 0) return null;
1564
- const unit = config.systemdUnit;
1565
- try {
1566
- const raw = (0, import_node_child_process.execSync)(
1567
- `systemctl --user show ${unit} --property=ActiveState,SubState,MainPID,NRestarts,Result --no-pager 2>/dev/null`,
1568
- { encoding: "utf8", timeout: 5e3 }
1569
- );
1570
- const props = {};
1571
- for (const line of raw.trim().split("\n")) {
1572
- const [k, ...v] = line.split("=");
1573
- if (k) props[k.trim()] = v.join("=").trim();
1574
- }
1575
- const activeState = props["ActiveState"] ?? "unknown";
1576
- const subState = props["SubState"] ?? "unknown";
1577
- const mainPid = parseInt(props["MainPID"] ?? "0", 10);
1578
- const restarts = parseInt(props["NRestarts"] ?? "0", 10);
1579
- const result = props["Result"] ?? "unknown";
1580
- return {
1581
- unit,
1582
- activeState,
1583
- subState,
1584
- mainPid,
1585
- restarts,
1586
- result,
1587
- crashLooping: activeState === "activating" && subState === "auto-restart",
1588
- failed: activeState === "failed"
1589
- };
1590
- } catch {
1591
- return null;
1592
- }
1593
- }
1594
- function auditWorkers(config) {
1595
- if (!config.workersFile || !(0, import_node_fs2.existsSync)(config.workersFile)) return null;
1596
- try {
1597
- const data = JSON.parse((0, import_node_fs2.readFileSync)(config.workersFile, "utf8"));
1598
- const orchPid = data.pid ?? null;
1599
- const orchAlive = orchPid ? isPidAlive(orchPid) : false;
1600
- const workers = [];
1601
- for (const [name, info] of Object.entries(data.tools ?? {})) {
1602
- const w = info;
1603
- const wPid = w.pid ?? null;
1604
- const wAlive = wPid ? isPidAlive(wPid) : false;
1605
- workers.push({
1606
- name,
1607
- pid: wPid,
1608
- declaredStatus: w.status ?? "unknown",
1609
- alive: wAlive,
1610
- stale: w.status === "running" && !wAlive
1611
- });
1612
- }
1613
- return {
1614
- orchestratorPid: orchPid,
1615
- orchestratorAlive: orchAlive,
1616
- startedAt: data.started_at ?? "",
1617
- workers
1618
- };
1619
- } catch {
1620
- return null;
1621
- }
1622
- }
1623
- function getOsProcesses(processName) {
1624
- try {
1625
- const raw = (0, import_node_child_process.execSync)(`ps aux`, { encoding: "utf8", timeout: 5e3 });
1626
- return raw.split("\n").filter((line) => line.includes(processName) && !line.includes("process-audit") && !line.includes("grep")).map((line) => {
1627
- const parts = line.trim().split(/\s+/);
1628
- return {
1629
- pid: parseInt(parts[1] ?? "0", 10),
1630
- cpu: parts[2] ?? "0",
1631
- mem: parts[3] ?? "0",
1632
- command: parts.slice(10).join(" ")
1633
- };
1634
- }).filter((p) => !isNaN(p.pid) && p.pid > 0);
1635
- } catch {
1636
- return [];
1637
- }
1638
- }
1639
- function discoverProcessConfig(dirs) {
1640
- let pidFile;
1641
- let workersFile;
1642
- let processName = "";
1643
- for (const dir of dirs) {
1644
- if (!(0, import_node_fs2.existsSync)(dir)) continue;
1645
- let entries;
1646
- try {
1647
- entries = (0, import_node_fs2.readdirSync)(dir);
1648
- } catch {
1649
- continue;
1650
- }
1651
- for (const f of entries) {
1652
- const fp = (0, import_node_path2.join)(dir, f);
1653
- try {
1654
- if (!(0, import_node_fs2.statSync)(fp).isFile()) continue;
1655
- } catch {
1656
- continue;
1657
- }
1658
- if (f.endsWith(".pid") && !pidFile) {
1659
- pidFile = fp;
1660
- if (!processName) {
1661
- processName = (0, import_node_path2.basename)(f, ".pid");
1662
- }
1663
- }
1664
- if ((f === "workers.json" || f.endsWith("-workers.json")) && !workersFile) {
1665
- workersFile = fp;
1666
- if (!processName && f !== "workers.json") {
1667
- processName = (0, import_node_path2.basename)(f, "-workers.json");
1668
- }
1669
- }
1670
- }
1671
- }
1672
- if (!processName && !pidFile && !workersFile) return null;
1673
- if (!processName) processName = "agent";
1674
- return { processName, pidFile, workersFile };
1675
- }
1676
- function auditProcesses(config) {
1677
- const pidFile = auditPidFile(config);
1678
- const systemd = auditSystemd(config);
1679
- const workers = auditWorkers(config);
1680
- const osProcesses = getOsProcesses(config.processName);
1681
- const knownPids = /* @__PURE__ */ new Set();
1682
- if (pidFile?.pid && !pidFile.stale) knownPids.add(pidFile.pid);
1683
- if (workers) {
1684
- if (workers.orchestratorPid) knownPids.add(workers.orchestratorPid);
1685
- for (const w of workers.workers) {
1686
- if (w.pid) knownPids.add(w.pid);
1687
- }
1688
- }
1689
- if (systemd?.mainPid) knownPids.add(systemd.mainPid);
1690
- const orphans = osProcesses.filter((p) => !knownPids.has(p.pid));
1691
- const problems = [];
1692
- if (pidFile?.stale) problems.push(`Stale PID file: ${pidFile.reason}`);
1693
- if (systemd?.crashLooping) problems.push("Systemd unit is crash-looping (auto-restart)");
1694
- if (systemd?.failed) problems.push("Systemd unit has failed");
1695
- if (systemd && systemd.restarts > 10) problems.push(`High systemd restart count: ${systemd.restarts}`);
1696
- if (pidFile?.pid && systemd?.mainPid && pidFile.pid !== systemd.mainPid) {
1697
- problems.push(`PID mismatch: file says ${pidFile.pid}, systemd says ${systemd.mainPid}`);
1698
- }
1699
- if (workers) {
1700
- for (const w of workers.workers) {
1701
- if (w.stale) problems.push(`Worker "${w.name}" (pid ${w.pid}) declares running but is dead`);
1702
- }
1703
- }
1704
- if (orphans.length > 0) problems.push(`${orphans.length} orphan process(es) not tracked by PID file or workers registry`);
1705
- return { pidFile, systemd, workers, osProcesses, orphans, problems };
1706
- }
1707
- function formatAuditReport(result) {
1708
- const lines = [];
1709
- lines.push("");
1710
- lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
1711
- lines.push("\u2551 \u{1F50D} P R O C E S S A U D I T \u2551");
1712
- lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
1713
- if (result.pidFile) {
1714
- const pf = result.pidFile;
1715
- const icon = pf.pid && pf.alive && pf.matchesProcess ? "\u2705" : pf.stale ? "\u26A0\uFE0F " : "\u2139\uFE0F ";
1716
- lines.push(`
1717
- PID File: ${pf.path}`);
1718
- lines.push(` ${icon} ${pf.reason}`);
1719
- }
1720
- if (result.systemd) {
1721
- const sd = result.systemd;
1722
- const icon = sd.activeState === "active" ? "\u{1F7E2}" : sd.crashLooping ? "\u{1F7E1}" : sd.failed ? "\u{1F534}" : "\u26AA";
1723
- lines.push(`
1724
- Systemd: ${sd.unit}`);
1725
- lines.push(` ${icon} State: ${sd.activeState} (${sd.subState}) Result: ${sd.result}`);
1726
- lines.push(` Main PID: ${sd.mainPid || "none"} Restarts: ${sd.restarts}`);
1727
- }
1728
- if (result.workers) {
1729
- const w = result.workers;
1730
- lines.push(`
1731
- Workers (orchestrator pid ${w.orchestratorPid ?? "unknown"} ${w.orchestratorAlive ? "\u2705" : "\u274C"})`);
1732
- for (const worker of w.workers) {
1733
- const icon = worker.declaredStatus === "running" && worker.alive ? "\u{1F7E2}" : worker.stale ? "\u{1F534} STALE" : "\u26AA";
1734
- lines.push(` ${icon} ${worker.name.padEnd(14)} pid=${String(worker.pid ?? "-").padEnd(8)} status=${worker.declaredStatus}`);
1735
- }
1736
- }
1737
- if (result.osProcesses.length > 0) {
1738
- lines.push(`
1739
- OS Processes (${result.osProcesses.length} total)`);
1740
- for (const p of result.osProcesses) {
1741
- lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} ${p.command.substring(0, 55)}`);
1742
- }
1743
- }
1744
- if (result.orphans.length > 0) {
1745
- lines.push(`
1746
- \u26A0\uFE0F ${result.orphans.length} ORPHAN PROCESS(ES):`);
1747
- for (const p of result.orphans) {
1748
- lines.push(` PID ${p.pid} \u2014 not tracked by PID file or workers registry`);
1749
- }
1750
- }
1751
- lines.push("");
1752
- if (result.problems.length === 0) {
1753
- lines.push(" \u2705 All checks passed \u2014 no process issues detected.");
1754
- } else {
1755
- lines.push(` \u26A0\uFE0F ${result.problems.length} issue(s):`);
1756
- for (const p of result.problems) {
1757
- lines.push(` \u2022 ${p}`);
1758
- }
1759
- }
1760
- lines.push("");
1761
- return lines.join("\n");
1762
- }
1763
-
1764
1818
  // src/runner.ts
1765
1819
  var import_node_child_process2 = require("child_process");
1766
1820
  var import_node_fs3 = require("fs");