agentflow-core 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -20,10 +20,13 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
+ auditProcesses: () => auditProcesses,
23
24
  checkGuards: () => checkGuards,
24
25
  createGraphBuilder: () => createGraphBuilder,
25
26
  createTraceStore: () => createTraceStore,
27
+ discoverProcessConfig: () => discoverProcessConfig,
26
28
  findWaitingOn: () => findWaitingOn,
29
+ formatAuditReport: () => formatAuditReport,
27
30
  getChildren: () => getChildren,
28
31
  getCriticalPath: () => getCriticalPath,
29
32
  getDepth: () => getDepth,
@@ -638,7 +641,6 @@ function withGuards(builder, config) {
638
641
  // src/live.ts
639
642
  var import_node_fs = require("fs");
640
643
  var import_node_path = require("path");
641
- var import_node_child_process = require("child_process");
642
644
 
643
645
  // src/loader.ts
644
646
  function toNodesMap(raw) {
@@ -963,18 +965,20 @@ function processJsonFile(file) {
963
965
  const w = info;
964
966
  const status2 = findStatus(w);
965
967
  const ts2 = findTimestamp(w) || findTimestamp(obj) || file.mtime;
966
- const pid = w.pid;
968
+ const rawPid = w.pid;
969
+ const pid = typeof rawPid === "number" ? rawPid : Number(rawPid);
970
+ const validPid = Number.isFinite(pid) && pid > 0;
967
971
  let validatedStatus = status2;
968
972
  let pidAlive = true;
969
- if (pid && (status2 === "running" || status2 === "ok")) {
973
+ if (validPid && (status2 === "running" || status2 === "ok")) {
970
974
  try {
971
- (0, import_node_child_process.execSync)(`kill -0 ${pid} 2>/dev/null`, { stdio: "ignore" });
975
+ process.kill(pid, 0);
972
976
  } catch {
973
977
  pidAlive = false;
974
978
  validatedStatus = "error";
975
979
  }
976
980
  }
977
- const pidLabel = pid ? pidAlive ? `pid: ${pid}` : `pid: ${pid} (dead)` : "";
981
+ const pidLabel = validPid ? pidAlive ? `pid: ${pid}` : `pid: ${pid} (dead)` : "";
978
982
  const detail2 = pidLabel || extractDetail(w);
979
983
  records.push({
980
984
  id: name,
@@ -1461,11 +1465,14 @@ function render(config) {
1461
1465
  writeLine(L, ` ${C.dim}Press Ctrl+C to exit${C.reset}`);
1462
1466
  flushLines(L);
1463
1467
  }
1464
- function getDistDepth(dt, spanId) {
1468
+ function getDistDepth(dt, spanId, visited) {
1465
1469
  if (!spanId) return 0;
1470
+ const seen = visited ?? /* @__PURE__ */ new Set();
1471
+ if (seen.has(spanId)) return 0;
1472
+ seen.add(spanId);
1466
1473
  const g = dt.graphs.get(spanId);
1467
1474
  if (!g || !g.parentSpanId) return 0;
1468
- return 1 + getDistDepth(dt, g.parentSpanId);
1475
+ return 1 + getDistDepth(dt, g.parentSpanId, seen);
1469
1476
  }
1470
1477
  function startLive(argv) {
1471
1478
  const config = parseArgs(argv);
@@ -1498,22 +1505,278 @@ function startLive(argv) {
1498
1505
  });
1499
1506
  }
1500
1507
 
1501
- // src/runner.ts
1502
- var import_node_child_process2 = require("child_process");
1508
+ // src/process-audit.ts
1509
+ var import_node_child_process = require("child_process");
1503
1510
  var import_node_fs2 = require("fs");
1504
1511
  var import_node_path2 = require("path");
1512
+ function isPidAlive(pid) {
1513
+ try {
1514
+ process.kill(pid, 0);
1515
+ return true;
1516
+ } catch {
1517
+ return false;
1518
+ }
1519
+ }
1520
+ function pidMatchesName(pid, name) {
1521
+ try {
1522
+ const cmdline = (0, import_node_fs2.readFileSync)(`/proc/${pid}/cmdline`, "utf8");
1523
+ return cmdline.includes(name);
1524
+ } catch {
1525
+ return false;
1526
+ }
1527
+ }
1528
+ function readPidFile(path) {
1529
+ try {
1530
+ const pid = parseInt((0, import_node_fs2.readFileSync)(path, "utf8").trim(), 10);
1531
+ return isNaN(pid) ? null : pid;
1532
+ } catch {
1533
+ return null;
1534
+ }
1535
+ }
1536
+ function auditPidFile(config) {
1537
+ if (!config.pidFile) return null;
1538
+ const pid = readPidFile(config.pidFile);
1539
+ if (pid === null) {
1540
+ return {
1541
+ path: config.pidFile,
1542
+ pid: null,
1543
+ alive: false,
1544
+ matchesProcess: false,
1545
+ stale: !(0, import_node_fs2.existsSync)(config.pidFile),
1546
+ reason: (0, import_node_fs2.existsSync)(config.pidFile) ? "PID file exists but content is invalid" : "No PID file found"
1547
+ };
1548
+ }
1549
+ const alive = isPidAlive(pid);
1550
+ const matchesProcess = alive ? pidMatchesName(pid, config.processName) : false;
1551
+ const stale = !alive || alive && !matchesProcess;
1552
+ let reason;
1553
+ if (alive && matchesProcess) {
1554
+ reason = `PID ${pid} alive and matches ${config.processName}`;
1555
+ } else if (alive && !matchesProcess) {
1556
+ reason = `PID ${pid} alive but is NOT ${config.processName} (PID reused by another process)`;
1557
+ } else {
1558
+ reason = `PID ${pid} no longer exists`;
1559
+ }
1560
+ return { path: config.pidFile, pid, alive, matchesProcess, stale, reason };
1561
+ }
1562
+ function auditSystemd(config) {
1563
+ if (config.systemdUnit === null || config.systemdUnit === void 0) return null;
1564
+ const unit = config.systemdUnit;
1565
+ try {
1566
+ const raw = (0, import_node_child_process.execSync)(
1567
+ `systemctl --user show ${unit} --property=ActiveState,SubState,MainPID,NRestarts,Result --no-pager 2>/dev/null`,
1568
+ { encoding: "utf8", timeout: 5e3 }
1569
+ );
1570
+ const props = {};
1571
+ for (const line of raw.trim().split("\n")) {
1572
+ const [k, ...v] = line.split("=");
1573
+ if (k) props[k.trim()] = v.join("=").trim();
1574
+ }
1575
+ const activeState = props["ActiveState"] ?? "unknown";
1576
+ const subState = props["SubState"] ?? "unknown";
1577
+ const mainPid = parseInt(props["MainPID"] ?? "0", 10);
1578
+ const restarts = parseInt(props["NRestarts"] ?? "0", 10);
1579
+ const result = props["Result"] ?? "unknown";
1580
+ return {
1581
+ unit,
1582
+ activeState,
1583
+ subState,
1584
+ mainPid,
1585
+ restarts,
1586
+ result,
1587
+ crashLooping: activeState === "activating" && subState === "auto-restart",
1588
+ failed: activeState === "failed"
1589
+ };
1590
+ } catch {
1591
+ return null;
1592
+ }
1593
+ }
1594
+ function auditWorkers(config) {
1595
+ if (!config.workersFile || !(0, import_node_fs2.existsSync)(config.workersFile)) return null;
1596
+ try {
1597
+ const data = JSON.parse((0, import_node_fs2.readFileSync)(config.workersFile, "utf8"));
1598
+ const orchPid = data.pid ?? null;
1599
+ const orchAlive = orchPid ? isPidAlive(orchPid) : false;
1600
+ const workers = [];
1601
+ for (const [name, info] of Object.entries(data.tools ?? {})) {
1602
+ const w = info;
1603
+ const wPid = w.pid ?? null;
1604
+ const wAlive = wPid ? isPidAlive(wPid) : false;
1605
+ workers.push({
1606
+ name,
1607
+ pid: wPid,
1608
+ declaredStatus: w.status ?? "unknown",
1609
+ alive: wAlive,
1610
+ stale: w.status === "running" && !wAlive
1611
+ });
1612
+ }
1613
+ return {
1614
+ orchestratorPid: orchPid,
1615
+ orchestratorAlive: orchAlive,
1616
+ startedAt: data.started_at ?? "",
1617
+ workers
1618
+ };
1619
+ } catch {
1620
+ return null;
1621
+ }
1622
+ }
1623
+ function getOsProcesses(processName) {
1624
+ try {
1625
+ const raw = (0, import_node_child_process.execSync)(`ps aux`, { encoding: "utf8", timeout: 5e3 });
1626
+ return raw.split("\n").filter((line) => line.includes(processName) && !line.includes("process-audit") && !line.includes("grep")).map((line) => {
1627
+ const parts = line.trim().split(/\s+/);
1628
+ return {
1629
+ pid: parseInt(parts[1] ?? "0", 10),
1630
+ cpu: parts[2] ?? "0",
1631
+ mem: parts[3] ?? "0",
1632
+ command: parts.slice(10).join(" ")
1633
+ };
1634
+ }).filter((p) => !isNaN(p.pid) && p.pid > 0);
1635
+ } catch {
1636
+ return [];
1637
+ }
1638
+ }
1639
+ function discoverProcessConfig(dirs) {
1640
+ let pidFile;
1641
+ let workersFile;
1642
+ let processName = "";
1643
+ for (const dir of dirs) {
1644
+ if (!(0, import_node_fs2.existsSync)(dir)) continue;
1645
+ let entries;
1646
+ try {
1647
+ entries = (0, import_node_fs2.readdirSync)(dir);
1648
+ } catch {
1649
+ continue;
1650
+ }
1651
+ for (const f of entries) {
1652
+ const fp = (0, import_node_path2.join)(dir, f);
1653
+ try {
1654
+ if (!(0, import_node_fs2.statSync)(fp).isFile()) continue;
1655
+ } catch {
1656
+ continue;
1657
+ }
1658
+ if (f.endsWith(".pid") && !pidFile) {
1659
+ pidFile = fp;
1660
+ if (!processName) {
1661
+ processName = (0, import_node_path2.basename)(f, ".pid");
1662
+ }
1663
+ }
1664
+ if ((f === "workers.json" || f.endsWith("-workers.json")) && !workersFile) {
1665
+ workersFile = fp;
1666
+ if (!processName && f !== "workers.json") {
1667
+ processName = (0, import_node_path2.basename)(f, "-workers.json");
1668
+ }
1669
+ }
1670
+ }
1671
+ }
1672
+ if (!processName && !pidFile && !workersFile) return null;
1673
+ if (!processName) processName = "agent";
1674
+ return { processName, pidFile, workersFile };
1675
+ }
1676
+ function auditProcesses(config) {
1677
+ const pidFile = auditPidFile(config);
1678
+ const systemd = auditSystemd(config);
1679
+ const workers = auditWorkers(config);
1680
+ const osProcesses = getOsProcesses(config.processName);
1681
+ const knownPids = /* @__PURE__ */ new Set();
1682
+ if (pidFile?.pid && !pidFile.stale) knownPids.add(pidFile.pid);
1683
+ if (workers) {
1684
+ if (workers.orchestratorPid) knownPids.add(workers.orchestratorPid);
1685
+ for (const w of workers.workers) {
1686
+ if (w.pid) knownPids.add(w.pid);
1687
+ }
1688
+ }
1689
+ if (systemd?.mainPid) knownPids.add(systemd.mainPid);
1690
+ const orphans = osProcesses.filter((p) => !knownPids.has(p.pid));
1691
+ const problems = [];
1692
+ if (pidFile?.stale) problems.push(`Stale PID file: ${pidFile.reason}`);
1693
+ if (systemd?.crashLooping) problems.push("Systemd unit is crash-looping (auto-restart)");
1694
+ if (systemd?.failed) problems.push("Systemd unit has failed");
1695
+ if (systemd && systemd.restarts > 10) problems.push(`High systemd restart count: ${systemd.restarts}`);
1696
+ if (pidFile?.pid && systemd?.mainPid && pidFile.pid !== systemd.mainPid) {
1697
+ problems.push(`PID mismatch: file says ${pidFile.pid}, systemd says ${systemd.mainPid}`);
1698
+ }
1699
+ if (workers) {
1700
+ for (const w of workers.workers) {
1701
+ if (w.stale) problems.push(`Worker "${w.name}" (pid ${w.pid}) declares running but is dead`);
1702
+ }
1703
+ }
1704
+ if (orphans.length > 0) problems.push(`${orphans.length} orphan process(es) not tracked by PID file or workers registry`);
1705
+ return { pidFile, systemd, workers, osProcesses, orphans, problems };
1706
+ }
1707
+ function formatAuditReport(result) {
1708
+ const lines = [];
1709
+ lines.push("");
1710
+ lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
1711
+ lines.push("\u2551 \u{1F50D} P R O C E S S A U D I T \u2551");
1712
+ lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
1713
+ if (result.pidFile) {
1714
+ const pf = result.pidFile;
1715
+ const icon = pf.pid && pf.alive && pf.matchesProcess ? "\u2705" : pf.stale ? "\u26A0\uFE0F " : "\u2139\uFE0F ";
1716
+ lines.push(`
1717
+ PID File: ${pf.path}`);
1718
+ lines.push(` ${icon} ${pf.reason}`);
1719
+ }
1720
+ if (result.systemd) {
1721
+ const sd = result.systemd;
1722
+ const icon = sd.activeState === "active" ? "\u{1F7E2}" : sd.crashLooping ? "\u{1F7E1}" : sd.failed ? "\u{1F534}" : "\u26AA";
1723
+ lines.push(`
1724
+ Systemd: ${sd.unit}`);
1725
+ lines.push(` ${icon} State: ${sd.activeState} (${sd.subState}) Result: ${sd.result}`);
1726
+ lines.push(` Main PID: ${sd.mainPid || "none"} Restarts: ${sd.restarts}`);
1727
+ }
1728
+ if (result.workers) {
1729
+ const w = result.workers;
1730
+ lines.push(`
1731
+ Workers (orchestrator pid ${w.orchestratorPid ?? "unknown"} ${w.orchestratorAlive ? "\u2705" : "\u274C"})`);
1732
+ for (const worker of w.workers) {
1733
+ const icon = worker.declaredStatus === "running" && worker.alive ? "\u{1F7E2}" : worker.stale ? "\u{1F534} STALE" : "\u26AA";
1734
+ lines.push(` ${icon} ${worker.name.padEnd(14)} pid=${String(worker.pid ?? "-").padEnd(8)} status=${worker.declaredStatus}`);
1735
+ }
1736
+ }
1737
+ if (result.osProcesses.length > 0) {
1738
+ lines.push(`
1739
+ OS Processes (${result.osProcesses.length} total)`);
1740
+ for (const p of result.osProcesses) {
1741
+ lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} ${p.command.substring(0, 55)}`);
1742
+ }
1743
+ }
1744
+ if (result.orphans.length > 0) {
1745
+ lines.push(`
1746
+ \u26A0\uFE0F ${result.orphans.length} ORPHAN PROCESS(ES):`);
1747
+ for (const p of result.orphans) {
1748
+ lines.push(` PID ${p.pid} \u2014 not tracked by PID file or workers registry`);
1749
+ }
1750
+ }
1751
+ lines.push("");
1752
+ if (result.problems.length === 0) {
1753
+ lines.push(" \u2705 All checks passed \u2014 no process issues detected.");
1754
+ } else {
1755
+ lines.push(` \u26A0\uFE0F ${result.problems.length} issue(s):`);
1756
+ for (const p of result.problems) {
1757
+ lines.push(` \u2022 ${p}`);
1758
+ }
1759
+ }
1760
+ lines.push("");
1761
+ return lines.join("\n");
1762
+ }
1763
+
1764
+ // src/runner.ts
1765
+ var import_node_child_process2 = require("child_process");
1766
+ var import_node_fs3 = require("fs");
1767
+ var import_node_path3 = require("path");
1505
1768
  function globToRegex(pattern) {
1506
1769
  const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*").replace(/\?/g, ".");
1507
1770
  return new RegExp(`^${escaped}$`);
1508
1771
  }
1509
1772
  function snapshotDir(dir, patterns) {
1510
1773
  const result = /* @__PURE__ */ new Map();
1511
- if (!(0, import_node_fs2.existsSync)(dir)) return result;
1512
- for (const entry of (0, import_node_fs2.readdirSync)(dir)) {
1774
+ if (!(0, import_node_fs3.existsSync)(dir)) return result;
1775
+ for (const entry of (0, import_node_fs3.readdirSync)(dir)) {
1513
1776
  if (!patterns.some((re) => re.test(entry))) continue;
1514
- const full = (0, import_node_path2.join)(dir, entry);
1777
+ const full = (0, import_node_path3.join)(dir, entry);
1515
1778
  try {
1516
- const stat = (0, import_node_fs2.statSync)(full);
1779
+ const stat = (0, import_node_fs3.statSync)(full);
1517
1780
  if (stat.isFile()) {
1518
1781
  result.set(full, stat.mtimeMs);
1519
1782
  }
@@ -1523,7 +1786,7 @@ function snapshotDir(dir, patterns) {
1523
1786
  return result;
1524
1787
  }
1525
1788
  function agentIdFromFilename(filePath) {
1526
- const base = (0, import_node_path2.basename)(filePath, ".json");
1789
+ const base = (0, import_node_path3.basename)(filePath, ".json");
1527
1790
  const cleaned = base.replace(/-state$/, "");
1528
1791
  return `alfred-${cleaned}`;
1529
1792
  }
@@ -1545,7 +1808,7 @@ async function runTraced(config) {
1545
1808
  if (command.length === 0) {
1546
1809
  throw new Error("runTraced: command must not be empty");
1547
1810
  }
1548
- const resolvedTracesDir = (0, import_node_path2.resolve)(tracesDir);
1811
+ const resolvedTracesDir = (0, import_node_path3.resolve)(tracesDir);
1549
1812
  const patterns = watchPatterns.map(globToRegex);
1550
1813
  const orchestrator = createGraphBuilder({ agentId, trigger });
1551
1814
  const { traceId, spanId } = orchestrator.traceContext;
@@ -1628,15 +1891,19 @@ async function runTraced(config) {
1628
1891
  childBuilder.endNode(childRootId);
1629
1892
  allGraphs.push(childBuilder.build());
1630
1893
  }
1631
- if (!(0, import_node_fs2.existsSync)(resolvedTracesDir)) {
1632
- (0, import_node_fs2.mkdirSync)(resolvedTracesDir, { recursive: true });
1894
+ if (!(0, import_node_fs3.existsSync)(resolvedTracesDir)) {
1895
+ (0, import_node_fs3.mkdirSync)(resolvedTracesDir, { recursive: true });
1633
1896
  }
1634
1897
  const ts = fileTimestamp();
1635
1898
  const tracePaths = [];
1636
1899
  for (const graph of allGraphs) {
1637
1900
  const filename = `${graph.agentId}-${ts}.json`;
1638
- const outPath = (0, import_node_path2.join)(resolvedTracesDir, filename);
1639
- (0, import_node_fs2.writeFileSync)(outPath, JSON.stringify(graphToJson(graph), null, 2), "utf-8");
1901
+ const outPath = (0, import_node_path3.join)(resolvedTracesDir, filename);
1902
+ const resolvedOut = (0, import_node_path3.resolve)(outPath);
1903
+ if (!resolvedOut.startsWith(resolvedTracesDir + "/") && resolvedOut !== resolvedTracesDir) {
1904
+ throw new Error(`Path traversal detected: agentId "${graph.agentId}" escapes traces directory`);
1905
+ }
1906
+ (0, import_node_fs3.writeFileSync)(outPath, JSON.stringify(graphToJson(graph), null, 2), "utf-8");
1640
1907
  tracePaths.push(outPath);
1641
1908
  }
1642
1909
  if (tracePaths.length > 0) {
@@ -1684,6 +1951,11 @@ function createTraceStore(dir) {
1684
1951
  await ensureDir();
1685
1952
  const json = graphToJson(graph);
1686
1953
  const filePath = (0, import_path.join)(dir, `${graph.id}.json`);
1954
+ const resolvedBase = (0, import_path.resolve)(dir);
1955
+ const resolvedPath = (0, import_path.resolve)(filePath);
1956
+ if (!resolvedPath.startsWith(resolvedBase + "/") && resolvedPath !== resolvedBase) {
1957
+ throw new Error(`Path traversal detected: "${graph.id}" escapes base directory`);
1958
+ }
1687
1959
  await (0, import_promises.writeFile)(filePath, JSON.stringify(json, null, 2), "utf-8");
1688
1960
  return filePath;
1689
1961
  },
@@ -1882,9 +2154,9 @@ function toTimeline(graph) {
1882
2154
  }
1883
2155
 
1884
2156
  // src/watch.ts
1885
- var import_node_fs4 = require("fs");
2157
+ var import_node_fs5 = require("fs");
1886
2158
  var import_node_os = require("os");
1887
- var import_node_path3 = require("path");
2159
+ var import_node_path4 = require("path");
1888
2160
 
1889
2161
  // src/watch-alerts.ts
1890
2162
  var import_node_child_process3 = require("child_process");
@@ -1942,7 +2214,7 @@ function sendTelegram(payload, botToken, chatId) {
1942
2214
  text: formatTelegram(payload),
1943
2215
  parse_mode: "Markdown"
1944
2216
  });
1945
- return new Promise((resolve4, reject) => {
2217
+ return new Promise((resolve5, reject) => {
1946
2218
  const req = (0, import_node_https.request)(
1947
2219
  `https://api.telegram.org/bot${botToken}/sendMessage`,
1948
2220
  {
@@ -1951,7 +2223,7 @@ function sendTelegram(payload, botToken, chatId) {
1951
2223
  },
1952
2224
  (res) => {
1953
2225
  res.resume();
1954
- if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve4();
2226
+ if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve5();
1955
2227
  else reject(new Error(`Telegram API returned ${res.statusCode}`));
1956
2228
  }
1957
2229
  );
@@ -1964,7 +2236,7 @@ function sendWebhook(payload, url) {
1964
2236
  const body = JSON.stringify(payload);
1965
2237
  const isHttps = url.startsWith("https");
1966
2238
  const doRequest = isHttps ? import_node_https.request : import_node_http.request;
1967
- return new Promise((resolve4, reject) => {
2239
+ return new Promise((resolve5, reject) => {
1968
2240
  const req = doRequest(
1969
2241
  url,
1970
2242
  {
@@ -1973,7 +2245,7 @@ function sendWebhook(payload, url) {
1973
2245
  },
1974
2246
  (res) => {
1975
2247
  res.resume();
1976
- if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve4();
2248
+ if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve5();
1977
2249
  else reject(new Error(`Webhook returned ${res.statusCode}`));
1978
2250
  }
1979
2251
  );
@@ -1986,7 +2258,7 @@ function sendWebhook(payload, url) {
1986
2258
  });
1987
2259
  }
1988
2260
  function sendCommand(payload, cmd) {
1989
- return new Promise((resolve4, reject) => {
2261
+ return new Promise((resolve5, reject) => {
1990
2262
  const env = {
1991
2263
  ...process.env,
1992
2264
  AGENTFLOW_ALERT_AGENT: payload.agentId,
@@ -1999,13 +2271,13 @@ function sendCommand(payload, cmd) {
1999
2271
  };
2000
2272
  (0, import_node_child_process3.exec)(cmd, { env, timeout: 3e4 }, (err) => {
2001
2273
  if (err) reject(err);
2002
- else resolve4();
2274
+ else resolve5();
2003
2275
  });
2004
2276
  });
2005
2277
  }
2006
2278
 
2007
2279
  // src/watch-state.ts
2008
- var import_node_fs3 = require("fs");
2280
+ var import_node_fs4 = require("fs");
2009
2281
  function parseDuration(input) {
2010
2282
  const match = input.match(/^(\d+(?:\.\d+)?)\s*(s|m|h|d)$/i);
2011
2283
  if (!match) {
@@ -2030,9 +2302,9 @@ function emptyState() {
2030
2302
  return { version: 1, agents: {}, lastPollTime: 0 };
2031
2303
  }
2032
2304
  function loadWatchState(filePath) {
2033
- if (!(0, import_node_fs3.existsSync)(filePath)) return emptyState();
2305
+ if (!(0, import_node_fs4.existsSync)(filePath)) return emptyState();
2034
2306
  try {
2035
- const raw = JSON.parse((0, import_node_fs3.readFileSync)(filePath, "utf8"));
2307
+ const raw = JSON.parse((0, import_node_fs4.readFileSync)(filePath, "utf8"));
2036
2308
  if (raw.version !== 1 || typeof raw.agents !== "object") return emptyState();
2037
2309
  return raw;
2038
2310
  } catch {
@@ -2042,11 +2314,11 @@ function loadWatchState(filePath) {
2042
2314
  function saveWatchState(filePath, state) {
2043
2315
  const tmp = filePath + ".tmp";
2044
2316
  try {
2045
- (0, import_node_fs3.writeFileSync)(tmp, JSON.stringify(state, null, 2), "utf8");
2046
- (0, import_node_fs3.renameSync)(tmp, filePath);
2317
+ (0, import_node_fs4.writeFileSync)(tmp, JSON.stringify(state, null, 2), "utf8");
2318
+ (0, import_node_fs4.renameSync)(tmp, filePath);
2047
2319
  } catch {
2048
2320
  try {
2049
- (0, import_node_fs3.writeFileSync)(filePath, JSON.stringify(state, null, 2), "utf8");
2321
+ (0, import_node_fs4.writeFileSync)(filePath, JSON.stringify(state, null, 2), "utf8");
2050
2322
  } catch {
2051
2323
  }
2052
2324
  }
@@ -2274,20 +2546,20 @@ function parseWatchArgs(argv) {
2274
2546
  recursive = true;
2275
2547
  i++;
2276
2548
  } else if (!arg.startsWith("-")) {
2277
- dirs.push((0, import_node_path3.resolve)(arg));
2549
+ dirs.push((0, import_node_path4.resolve)(arg));
2278
2550
  i++;
2279
2551
  } else {
2280
2552
  i++;
2281
2553
  }
2282
2554
  }
2283
- if (dirs.length === 0) dirs.push((0, import_node_path3.resolve)("."));
2555
+ if (dirs.length === 0) dirs.push((0, import_node_path4.resolve)("."));
2284
2556
  if (alertConditions.length === 0) {
2285
2557
  alertConditions.push({ type: "error" });
2286
2558
  alertConditions.push({ type: "recovery" });
2287
2559
  }
2288
2560
  notifyChannels.unshift({ type: "stdout" });
2289
2561
  if (!stateFilePath) {
2290
- stateFilePath = (0, import_node_path3.join)(dirs[0], ".agentflow-watch-state.json");
2562
+ stateFilePath = (0, import_node_path4.join)(dirs[0], ".agentflow-watch-state.json");
2291
2563
  }
2292
2564
  return {
2293
2565
  dirs,
@@ -2295,7 +2567,7 @@ function parseWatchArgs(argv) {
2295
2567
  pollIntervalMs,
2296
2568
  alertConditions,
2297
2569
  notifyChannels,
2298
- stateFilePath: (0, import_node_path3.resolve)(stateFilePath),
2570
+ stateFilePath: (0, import_node_path4.resolve)(stateFilePath),
2299
2571
  cooldownMs
2300
2572
  };
2301
2573
  }
@@ -2349,12 +2621,12 @@ Examples:
2349
2621
  }
2350
2622
  function startWatch(argv) {
2351
2623
  const config = parseWatchArgs(argv);
2352
- const valid = config.dirs.filter((d) => (0, import_node_fs4.existsSync)(d));
2624
+ const valid = config.dirs.filter((d) => (0, import_node_fs5.existsSync)(d));
2353
2625
  if (valid.length === 0) {
2354
2626
  console.error(`No valid directories found: ${config.dirs.join(", ")}`);
2355
2627
  process.exit(1);
2356
2628
  }
2357
- const invalid = config.dirs.filter((d) => !(0, import_node_fs4.existsSync)(d));
2629
+ const invalid = config.dirs.filter((d) => !(0, import_node_fs5.existsSync)(d));
2358
2630
  if (invalid.length > 0) {
2359
2631
  console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
2360
2632
  }
@@ -2433,10 +2705,13 @@ agentflow watch started`);
2433
2705
  }
2434
2706
  // Annotate the CommonJS export names for ESM import in node:
2435
2707
  0 && (module.exports = {
2708
+ auditProcesses,
2436
2709
  checkGuards,
2437
2710
  createGraphBuilder,
2438
2711
  createTraceStore,
2712
+ discoverProcessConfig,
2439
2713
  findWaitingOn,
2714
+ formatAuditReport,
2440
2715
  getChildren,
2441
2716
  getCriticalPath,
2442
2717
  getDepth,
package/dist/index.d.cts CHANGED
@@ -494,6 +494,114 @@ declare function withGuards(builder: GraphBuilder, config?: GuardConfig): GraphB
494
494
 
495
495
  declare function startLive(argv: string[]): void;
496
496
 
497
+ /**
498
+ * AgentFlow Process Audit — OS-level process health checks for agent systems.
499
+ *
500
+ * Detects stale PID files, orphan processes, systemd unit issues,
501
+ * and mismatches between declared state (PID files, workers.json)
502
+ * and actual OS process state.
503
+ *
504
+ * Linux-only (reads /proc). Returns structured results for programmatic
505
+ * use or terminal display.
506
+ *
507
+ * @module
508
+ */
509
+ interface PidFileResult {
510
+ path: string;
511
+ pid: number | null;
512
+ alive: boolean;
513
+ /** Whether /proc/<pid>/cmdline contains the expected process name. */
514
+ matchesProcess: boolean;
515
+ stale: boolean;
516
+ reason: string;
517
+ }
518
+ interface SystemdUnitResult {
519
+ unit: string;
520
+ activeState: string;
521
+ subState: string;
522
+ mainPid: number;
523
+ restarts: number;
524
+ result: string;
525
+ crashLooping: boolean;
526
+ failed: boolean;
527
+ }
528
+ interface WorkerEntry {
529
+ name: string;
530
+ pid: number | null;
531
+ declaredStatus: string;
532
+ alive: boolean;
533
+ stale: boolean;
534
+ }
535
+ interface WorkersResult {
536
+ orchestratorPid: number | null;
537
+ orchestratorAlive: boolean;
538
+ startedAt: string;
539
+ workers: WorkerEntry[];
540
+ }
541
+ interface OsProcess {
542
+ pid: number;
543
+ cpu: string;
544
+ mem: string;
545
+ command: string;
546
+ }
547
+ interface ProcessAuditResult {
548
+ pidFile: PidFileResult | null;
549
+ systemd: SystemdUnitResult | null;
550
+ workers: WorkersResult | null;
551
+ osProcesses: OsProcess[];
552
+ orphans: OsProcess[];
553
+ problems: string[];
554
+ }
555
+ interface ProcessAuditConfig {
556
+ /** Path to the PID file (e.g. /home/user/.myapp/data/app.pid). */
557
+ pidFile?: string;
558
+ /** Path to workers.json or equivalent process registry. */
559
+ workersFile?: string;
560
+ /** Systemd unit name (e.g. "myapp.service"). Use `null` to skip. */
561
+ systemdUnit?: string | null;
562
+ /** Process name to match in `pgrep -a` and /proc/cmdline (e.g. "alfred", "myagent"). */
563
+ processName: string;
564
+ }
565
+ /**
566
+ * Scan directories for PID files (`*.pid`), worker registries (`workers.json`,
567
+ * `*-workers.json`), and infer a process name from the PID file name.
568
+ *
569
+ * Returns a config suitable for `auditProcesses()`, or null if nothing found.
570
+ *
571
+ * @example
572
+ * ```ts
573
+ * const config = discoverProcessConfig(['./data', '/var/run/myagent']);
574
+ * if (config) console.log(formatAuditReport(auditProcesses(config)));
575
+ * ```
576
+ */
577
+ declare function discoverProcessConfig(dirs: string[]): ProcessAuditConfig | null;
578
+ /**
579
+ * Run a full process health audit.
580
+ *
581
+ * Checks PID files, systemd units, worker registries, and OS process tables
582
+ * to detect stale processes, orphans, and state mismatches.
583
+ *
584
+ * @example
585
+ * ```ts
586
+ * import { auditProcesses, formatAuditReport } from 'agentflow-core';
587
+ *
588
+ * const result = auditProcesses({
589
+ * processName: 'alfred',
590
+ * pidFile: '/home/user/.alfred/data/alfred.pid',
591
+ * workersFile: '/home/user/.alfred/data/workers.json',
592
+ * systemdUnit: 'alfred.service',
593
+ * });
594
+ *
595
+ * console.log(formatAuditReport(result));
596
+ * ```
597
+ */
598
+ declare function auditProcesses(config: ProcessAuditConfig): ProcessAuditResult;
599
+ /**
600
+ * Format an audit result as a human-readable terminal report.
601
+ * Uses Unicode box-drawing characters and status icons.
602
+ */
603
+ declare function formatAuditReport(result: ProcessAuditResult): string;
604
+
497
605
  /**
498
606
  * Load and deserialize execution graphs from JSON.
499
607
  *
@@ -743,4 +851,4 @@ interface AlertPayload {
743
851
  readonly dirs: readonly string[];
744
852
  }
745
853
 
746
- export { type Adapter, type AgentFlowConfig, type AlertCondition, type AlertPayload, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type GuardConfig, type GuardViolation, type MutableExecutionNode, type NodeStatus, type NodeType, type NotifyChannel, type RunConfig, type RunResult, type StartNodeOptions, type TraceEvent, type TraceEventType, type TraceStore, type WatchConfig, type Writer, checkGuards, createGraphBuilder, createTraceStore, findWaitingOn, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, startWatch, stitchTrace, toAsciiTree, toTimeline, withGuards };
854
+ export { type Adapter, type AgentFlowConfig, type AlertCondition, type AlertPayload, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type GuardConfig, type GuardViolation, type MutableExecutionNode, type NodeStatus, type NodeType, type NotifyChannel, type OsProcess, type PidFileResult, type ProcessAuditConfig, type ProcessAuditResult, type RunConfig, type RunResult, type StartNodeOptions, type SystemdUnitResult, type TraceEvent, type TraceEventType, type TraceStore, type WatchConfig, type WorkerEntry, type WorkersResult, type Writer, auditProcesses, checkGuards, createGraphBuilder, createTraceStore, discoverProcessConfig, findWaitingOn, formatAuditReport, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, startWatch, stitchTrace, toAsciiTree, toTimeline, withGuards };