agentflow-core 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -98,7 +98,6 @@ var import_path3 = require("path");
98
98
  // src/live.ts
99
99
  var import_node_fs = require("fs");
100
100
  var import_node_path = require("path");
101
- var import_node_child_process = require("child_process");
102
101
 
103
102
  // src/graph-query.ts
104
103
  function getChildren(graph, nodeId) {
@@ -515,18 +514,20 @@ function processJsonFile(file) {
515
514
  const w = info;
516
515
  const status2 = findStatus(w);
517
516
  const ts2 = findTimestamp(w) || findTimestamp(obj) || file.mtime;
518
- const pid = w.pid;
517
+ const rawPid = w.pid;
518
+ const pid = typeof rawPid === "number" ? rawPid : Number(rawPid);
519
+ const validPid = Number.isFinite(pid) && pid > 0;
519
520
  let validatedStatus = status2;
520
521
  let pidAlive = true;
521
- if (pid && (status2 === "running" || status2 === "ok")) {
522
+ if (validPid && (status2 === "running" || status2 === "ok")) {
522
523
  try {
523
- (0, import_node_child_process.execSync)(`kill -0 ${pid} 2>/dev/null`, { stdio: "ignore" });
524
+ process.kill(pid, 0);
524
525
  } catch {
525
526
  pidAlive = false;
526
527
  validatedStatus = "error";
527
528
  }
528
529
  }
529
- const pidLabel = pid ? pidAlive ? `pid: ${pid}` : `pid: ${pid} (dead)` : "";
530
+ const pidLabel = validPid ? pidAlive ? `pid: ${pid}` : `pid: ${pid} (dead)` : "";
530
531
  const detail2 = pidLabel || extractDetail(w);
531
532
  records.push({
532
533
  id: name,
@@ -1013,11 +1014,14 @@ function render(config) {
1013
1014
  writeLine(L, ` ${C.dim}Press Ctrl+C to exit${C.reset}`);
1014
1015
  flushLines(L);
1015
1016
  }
1016
- function getDistDepth(dt, spanId) {
1017
+ function getDistDepth(dt, spanId, visited) {
1017
1018
  if (!spanId) return 0;
1019
+ const seen = visited ?? /* @__PURE__ */ new Set();
1020
+ if (seen.has(spanId)) return 0;
1021
+ seen.add(spanId);
1018
1022
  const g = dt.graphs.get(spanId);
1019
1023
  if (!g || !g.parentSpanId) return 0;
1020
- return 1 + getDistDepth(dt, g.parentSpanId);
1024
+ return 1 + getDistDepth(dt, g.parentSpanId, seen);
1021
1025
  }
1022
1026
  function startLive(argv) {
1023
1027
  const config = parseArgs(argv);
@@ -1050,10 +1054,266 @@ function startLive(argv) {
1050
1054
  });
1051
1055
  }
1052
1056
 
1053
- // src/runner.ts
1054
- var import_node_child_process2 = require("child_process");
1057
+ // src/process-audit.ts
1058
+ var import_node_child_process = require("child_process");
1055
1059
  var import_node_fs2 = require("fs");
1056
1060
  var import_node_path2 = require("path");
1061
+ function isPidAlive(pid) {
1062
+ try {
1063
+ process.kill(pid, 0);
1064
+ return true;
1065
+ } catch {
1066
+ return false;
1067
+ }
1068
+ }
1069
+ function pidMatchesName(pid, name) {
1070
+ try {
1071
+ const cmdline = (0, import_node_fs2.readFileSync)(`/proc/${pid}/cmdline`, "utf8");
1072
+ return cmdline.includes(name);
1073
+ } catch {
1074
+ return false;
1075
+ }
1076
+ }
1077
+ function readPidFile(path) {
1078
+ try {
1079
+ const pid = parseInt((0, import_node_fs2.readFileSync)(path, "utf8").trim(), 10);
1080
+ return isNaN(pid) ? null : pid;
1081
+ } catch {
1082
+ return null;
1083
+ }
1084
+ }
1085
+ function auditPidFile(config) {
1086
+ if (!config.pidFile) return null;
1087
+ const pid = readPidFile(config.pidFile);
1088
+ if (pid === null) {
1089
+ return {
1090
+ path: config.pidFile,
1091
+ pid: null,
1092
+ alive: false,
1093
+ matchesProcess: false,
1094
+ stale: !(0, import_node_fs2.existsSync)(config.pidFile),
1095
+ reason: (0, import_node_fs2.existsSync)(config.pidFile) ? "PID file exists but content is invalid" : "No PID file found"
1096
+ };
1097
+ }
1098
+ const alive = isPidAlive(pid);
1099
+ const matchesProcess = alive ? pidMatchesName(pid, config.processName) : false;
1100
+ const stale = !alive || alive && !matchesProcess;
1101
+ let reason;
1102
+ if (alive && matchesProcess) {
1103
+ reason = `PID ${pid} alive and matches ${config.processName}`;
1104
+ } else if (alive && !matchesProcess) {
1105
+ reason = `PID ${pid} alive but is NOT ${config.processName} (PID reused by another process)`;
1106
+ } else {
1107
+ reason = `PID ${pid} no longer exists`;
1108
+ }
1109
+ return { path: config.pidFile, pid, alive, matchesProcess, stale, reason };
1110
+ }
1111
+ function auditSystemd(config) {
1112
+ if (config.systemdUnit === null || config.systemdUnit === void 0) return null;
1113
+ const unit = config.systemdUnit;
1114
+ try {
1115
+ const raw = (0, import_node_child_process.execSync)(
1116
+ `systemctl --user show ${unit} --property=ActiveState,SubState,MainPID,NRestarts,Result --no-pager 2>/dev/null`,
1117
+ { encoding: "utf8", timeout: 5e3 }
1118
+ );
1119
+ const props = {};
1120
+ for (const line of raw.trim().split("\n")) {
1121
+ const [k, ...v] = line.split("=");
1122
+ if (k) props[k.trim()] = v.join("=").trim();
1123
+ }
1124
+ const activeState = props["ActiveState"] ?? "unknown";
1125
+ const subState = props["SubState"] ?? "unknown";
1126
+ const mainPid = parseInt(props["MainPID"] ?? "0", 10);
1127
+ const restarts = parseInt(props["NRestarts"] ?? "0", 10);
1128
+ const result = props["Result"] ?? "unknown";
1129
+ return {
1130
+ unit,
1131
+ activeState,
1132
+ subState,
1133
+ mainPid,
1134
+ restarts,
1135
+ result,
1136
+ crashLooping: activeState === "activating" && subState === "auto-restart",
1137
+ failed: activeState === "failed"
1138
+ };
1139
+ } catch {
1140
+ return null;
1141
+ }
1142
+ }
1143
+ function auditWorkers(config) {
1144
+ if (!config.workersFile || !(0, import_node_fs2.existsSync)(config.workersFile)) return null;
1145
+ try {
1146
+ const data = JSON.parse((0, import_node_fs2.readFileSync)(config.workersFile, "utf8"));
1147
+ const orchPid = data.pid ?? null;
1148
+ const orchAlive = orchPid ? isPidAlive(orchPid) : false;
1149
+ const workers = [];
1150
+ for (const [name, info] of Object.entries(data.tools ?? {})) {
1151
+ const w = info;
1152
+ const wPid = w.pid ?? null;
1153
+ const wAlive = wPid ? isPidAlive(wPid) : false;
1154
+ workers.push({
1155
+ name,
1156
+ pid: wPid,
1157
+ declaredStatus: w.status ?? "unknown",
1158
+ alive: wAlive,
1159
+ stale: w.status === "running" && !wAlive
1160
+ });
1161
+ }
1162
+ return {
1163
+ orchestratorPid: orchPid,
1164
+ orchestratorAlive: orchAlive,
1165
+ startedAt: data.started_at ?? "",
1166
+ workers
1167
+ };
1168
+ } catch {
1169
+ return null;
1170
+ }
1171
+ }
1172
+ function getOsProcesses(processName) {
1173
+ try {
1174
+ const raw = (0, import_node_child_process.execSync)(`ps aux`, { encoding: "utf8", timeout: 5e3 });
1175
+ return raw.split("\n").filter((line) => line.includes(processName) && !line.includes("process-audit") && !line.includes("grep")).map((line) => {
1176
+ const parts = line.trim().split(/\s+/);
1177
+ return {
1178
+ pid: parseInt(parts[1] ?? "0", 10),
1179
+ cpu: parts[2] ?? "0",
1180
+ mem: parts[3] ?? "0",
1181
+ command: parts.slice(10).join(" ")
1182
+ };
1183
+ }).filter((p) => !isNaN(p.pid) && p.pid > 0);
1184
+ } catch {
1185
+ return [];
1186
+ }
1187
+ }
1188
+ function discoverProcessConfig(dirs) {
1189
+ let pidFile;
1190
+ let workersFile;
1191
+ let processName = "";
1192
+ for (const dir of dirs) {
1193
+ if (!(0, import_node_fs2.existsSync)(dir)) continue;
1194
+ let entries;
1195
+ try {
1196
+ entries = (0, import_node_fs2.readdirSync)(dir);
1197
+ } catch {
1198
+ continue;
1199
+ }
1200
+ for (const f of entries) {
1201
+ const fp = (0, import_node_path2.join)(dir, f);
1202
+ try {
1203
+ if (!(0, import_node_fs2.statSync)(fp).isFile()) continue;
1204
+ } catch {
1205
+ continue;
1206
+ }
1207
+ if (f.endsWith(".pid") && !pidFile) {
1208
+ pidFile = fp;
1209
+ if (!processName) {
1210
+ processName = (0, import_node_path2.basename)(f, ".pid");
1211
+ }
1212
+ }
1213
+ if ((f === "workers.json" || f.endsWith("-workers.json")) && !workersFile) {
1214
+ workersFile = fp;
1215
+ if (!processName && f !== "workers.json") {
1216
+ processName = (0, import_node_path2.basename)(f, "-workers.json");
1217
+ }
1218
+ }
1219
+ }
1220
+ }
1221
+ if (!processName && !pidFile && !workersFile) return null;
1222
+ if (!processName) processName = "agent";
1223
+ return { processName, pidFile, workersFile };
1224
+ }
1225
+ function auditProcesses(config) {
1226
+ const pidFile = auditPidFile(config);
1227
+ const systemd = auditSystemd(config);
1228
+ const workers = auditWorkers(config);
1229
+ const osProcesses = getOsProcesses(config.processName);
1230
+ const knownPids = /* @__PURE__ */ new Set();
1231
+ if (pidFile?.pid && !pidFile.stale) knownPids.add(pidFile.pid);
1232
+ if (workers) {
1233
+ if (workers.orchestratorPid) knownPids.add(workers.orchestratorPid);
1234
+ for (const w of workers.workers) {
1235
+ if (w.pid) knownPids.add(w.pid);
1236
+ }
1237
+ }
1238
+ if (systemd?.mainPid) knownPids.add(systemd.mainPid);
1239
+ const orphans = osProcesses.filter((p) => !knownPids.has(p.pid));
1240
+ const problems = [];
1241
+ if (pidFile?.stale) problems.push(`Stale PID file: ${pidFile.reason}`);
1242
+ if (systemd?.crashLooping) problems.push("Systemd unit is crash-looping (auto-restart)");
1243
+ if (systemd?.failed) problems.push("Systemd unit has failed");
1244
+ if (systemd && systemd.restarts > 10) problems.push(`High systemd restart count: ${systemd.restarts}`);
1245
+ if (pidFile?.pid && systemd?.mainPid && pidFile.pid !== systemd.mainPid) {
1246
+ problems.push(`PID mismatch: file says ${pidFile.pid}, systemd says ${systemd.mainPid}`);
1247
+ }
1248
+ if (workers) {
1249
+ for (const w of workers.workers) {
1250
+ if (w.stale) problems.push(`Worker "${w.name}" (pid ${w.pid}) declares running but is dead`);
1251
+ }
1252
+ }
1253
+ if (orphans.length > 0) problems.push(`${orphans.length} orphan process(es) not tracked by PID file or workers registry`);
1254
+ return { pidFile, systemd, workers, osProcesses, orphans, problems };
1255
+ }
1256
+ function formatAuditReport(result) {
1257
+ const lines = [];
1258
+ lines.push("");
1259
+ lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
1260
+ lines.push("\u2551 \u{1F50D} P R O C E S S A U D I T \u2551");
1261
+ lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
1262
+ if (result.pidFile) {
1263
+ const pf = result.pidFile;
1264
+ const icon = pf.pid && pf.alive && pf.matchesProcess ? "\u2705" : pf.stale ? "\u26A0\uFE0F " : "\u2139\uFE0F ";
1265
+ lines.push(`
1266
+ PID File: ${pf.path}`);
1267
+ lines.push(` ${icon} ${pf.reason}`);
1268
+ }
1269
+ if (result.systemd) {
1270
+ const sd = result.systemd;
1271
+ const icon = sd.activeState === "active" ? "\u{1F7E2}" : sd.crashLooping ? "\u{1F7E1}" : sd.failed ? "\u{1F534}" : "\u26AA";
1272
+ lines.push(`
1273
+ Systemd: ${sd.unit}`);
1274
+ lines.push(` ${icon} State: ${sd.activeState} (${sd.subState}) Result: ${sd.result}`);
1275
+ lines.push(` Main PID: ${sd.mainPid || "none"} Restarts: ${sd.restarts}`);
1276
+ }
1277
+ if (result.workers) {
1278
+ const w = result.workers;
1279
+ lines.push(`
1280
+ Workers (orchestrator pid ${w.orchestratorPid ?? "unknown"} ${w.orchestratorAlive ? "\u2705" : "\u274C"})`);
1281
+ for (const worker of w.workers) {
1282
+ const icon = worker.declaredStatus === "running" && worker.alive ? "\u{1F7E2}" : worker.stale ? "\u{1F534} STALE" : "\u26AA";
1283
+ lines.push(` ${icon} ${worker.name.padEnd(14)} pid=${String(worker.pid ?? "-").padEnd(8)} status=${worker.declaredStatus}`);
1284
+ }
1285
+ }
1286
+ if (result.osProcesses.length > 0) {
1287
+ lines.push(`
1288
+ OS Processes (${result.osProcesses.length} total)`);
1289
+ for (const p of result.osProcesses) {
1290
+ lines.push(` PID ${String(p.pid).padEnd(8)} CPU=${p.cpu.padEnd(6)} MEM=${p.mem.padEnd(6)} ${p.command.substring(0, 55)}`);
1291
+ }
1292
+ }
1293
+ if (result.orphans.length > 0) {
1294
+ lines.push(`
1295
+ \u26A0\uFE0F ${result.orphans.length} ORPHAN PROCESS(ES):`);
1296
+ for (const p of result.orphans) {
1297
+ lines.push(` PID ${p.pid} \u2014 not tracked by PID file or workers registry`);
1298
+ }
1299
+ }
1300
+ lines.push("");
1301
+ if (result.problems.length === 0) {
1302
+ lines.push(" \u2705 All checks passed \u2014 no process issues detected.");
1303
+ } else {
1304
+ lines.push(` \u26A0\uFE0F ${result.problems.length} issue(s):`);
1305
+ for (const p of result.problems) {
1306
+ lines.push(` \u2022 ${p}`);
1307
+ }
1308
+ }
1309
+ lines.push("");
1310
+ return lines.join("\n");
1311
+ }
1312
+
1313
+ // src/runner.ts
1314
+ var import_node_child_process2 = require("child_process");
1315
+ var import_node_fs3 = require("fs");
1316
+ var import_node_path3 = require("path");
1057
1317
 
1058
1318
  // src/graph-builder.ts
1059
1319
  var import_crypto = require("crypto");
@@ -1289,12 +1549,12 @@ function globToRegex(pattern) {
1289
1549
  }
1290
1550
  function snapshotDir(dir, patterns) {
1291
1551
  const result = /* @__PURE__ */ new Map();
1292
- if (!(0, import_node_fs2.existsSync)(dir)) return result;
1293
- for (const entry of (0, import_node_fs2.readdirSync)(dir)) {
1552
+ if (!(0, import_node_fs3.existsSync)(dir)) return result;
1553
+ for (const entry of (0, import_node_fs3.readdirSync)(dir)) {
1294
1554
  if (!patterns.some((re) => re.test(entry))) continue;
1295
- const full = (0, import_node_path2.join)(dir, entry);
1555
+ const full = (0, import_node_path3.join)(dir, entry);
1296
1556
  try {
1297
- const stat = (0, import_node_fs2.statSync)(full);
1557
+ const stat = (0, import_node_fs3.statSync)(full);
1298
1558
  if (stat.isFile()) {
1299
1559
  result.set(full, stat.mtimeMs);
1300
1560
  }
@@ -1304,7 +1564,7 @@ function snapshotDir(dir, patterns) {
1304
1564
  return result;
1305
1565
  }
1306
1566
  function agentIdFromFilename(filePath) {
1307
- const base = (0, import_node_path2.basename)(filePath, ".json");
1567
+ const base = (0, import_node_path3.basename)(filePath, ".json");
1308
1568
  const cleaned = base.replace(/-state$/, "");
1309
1569
  return `alfred-${cleaned}`;
1310
1570
  }
@@ -1326,7 +1586,7 @@ async function runTraced(config) {
1326
1586
  if (command.length === 0) {
1327
1587
  throw new Error("runTraced: command must not be empty");
1328
1588
  }
1329
- const resolvedTracesDir = (0, import_node_path2.resolve)(tracesDir);
1589
+ const resolvedTracesDir = (0, import_node_path3.resolve)(tracesDir);
1330
1590
  const patterns = watchPatterns.map(globToRegex);
1331
1591
  const orchestrator = createGraphBuilder({ agentId, trigger });
1332
1592
  const { traceId, spanId } = orchestrator.traceContext;
@@ -1409,15 +1669,19 @@ async function runTraced(config) {
1409
1669
  childBuilder.endNode(childRootId);
1410
1670
  allGraphs.push(childBuilder.build());
1411
1671
  }
1412
- if (!(0, import_node_fs2.existsSync)(resolvedTracesDir)) {
1413
- (0, import_node_fs2.mkdirSync)(resolvedTracesDir, { recursive: true });
1672
+ if (!(0, import_node_fs3.existsSync)(resolvedTracesDir)) {
1673
+ (0, import_node_fs3.mkdirSync)(resolvedTracesDir, { recursive: true });
1414
1674
  }
1415
1675
  const ts = fileTimestamp();
1416
1676
  const tracePaths = [];
1417
1677
  for (const graph of allGraphs) {
1418
1678
  const filename = `${graph.agentId}-${ts}.json`;
1419
- const outPath = (0, import_node_path2.join)(resolvedTracesDir, filename);
1420
- (0, import_node_fs2.writeFileSync)(outPath, JSON.stringify(graphToJson(graph), null, 2), "utf-8");
1679
+ const outPath = (0, import_node_path3.join)(resolvedTracesDir, filename);
1680
+ const resolvedOut = (0, import_node_path3.resolve)(outPath);
1681
+ if (!resolvedOut.startsWith(resolvedTracesDir + "/") && resolvedOut !== resolvedTracesDir) {
1682
+ throw new Error(`Path traversal detected: agentId "${graph.agentId}" escapes traces directory`);
1683
+ }
1684
+ (0, import_node_fs3.writeFileSync)(outPath, JSON.stringify(graphToJson(graph), null, 2), "utf-8");
1421
1685
  tracePaths.push(outPath);
1422
1686
  }
1423
1687
  if (tracePaths.length > 0) {
@@ -1469,6 +1733,11 @@ function createTraceStore(dir) {
1469
1733
  await ensureDir();
1470
1734
  const json = graphToJson(graph);
1471
1735
  const filePath = (0, import_path.join)(dir, `${graph.id}.json`);
1736
+ const resolvedBase = (0, import_path.resolve)(dir);
1737
+ const resolvedPath = (0, import_path.resolve)(filePath);
1738
+ if (!resolvedPath.startsWith(resolvedBase + "/") && resolvedPath !== resolvedBase) {
1739
+ throw new Error(`Path traversal detected: "${graph.id}" escapes base directory`);
1740
+ }
1472
1741
  await (0, import_promises.writeFile)(filePath, JSON.stringify(json, null, 2), "utf-8");
1473
1742
  return filePath;
1474
1743
  },
@@ -1742,11 +2011,11 @@ async function traceShow(argv) {
1742
2011
  let graph = await store.get(graphId);
1743
2012
  if (!graph) {
1744
2013
  const { readFile: readFile2 } = await import("fs/promises");
1745
- const { join: join5 } = await import("path");
2014
+ const { join: join6 } = await import("path");
1746
2015
  const fname = graphId.endsWith(".json") ? graphId : `${graphId}.json`;
1747
2016
  try {
1748
2017
  const { loadGraph: loadGraph2 } = await Promise.resolve().then(() => (init_loader(), loader_exports));
1749
- const content = await readFile2(join5(dir, fname), "utf-8");
2018
+ const content = await readFile2(join6(dir, fname), "utf-8");
1750
2019
  graph = loadGraph2(content);
1751
2020
  } catch {
1752
2021
  }
@@ -1771,11 +2040,11 @@ async function traceTimeline(argv) {
1771
2040
  let graph = await store.get(graphId);
1772
2041
  if (!graph) {
1773
2042
  const { readFile: readFile2 } = await import("fs/promises");
1774
- const { join: join5 } = await import("path");
2043
+ const { join: join6 } = await import("path");
1775
2044
  const fname = graphId.endsWith(".json") ? graphId : `${graphId}.json`;
1776
2045
  try {
1777
2046
  const { loadGraph: loadGraph2 } = await Promise.resolve().then(() => (init_loader(), loader_exports));
1778
- const content = await readFile2(join5(dir, fname), "utf-8");
2047
+ const content = await readFile2(join6(dir, fname), "utf-8");
1779
2048
  graph = loadGraph2(content);
1780
2049
  } catch {
1781
2050
  }
@@ -1856,9 +2125,9 @@ async function handleTrace(argv) {
1856
2125
  }
1857
2126
 
1858
2127
  // src/watch.ts
1859
- var import_node_fs4 = require("fs");
2128
+ var import_node_fs5 = require("fs");
1860
2129
  var import_node_os = require("os");
1861
- var import_node_path3 = require("path");
2130
+ var import_node_path4 = require("path");
1862
2131
 
1863
2132
  // src/watch-alerts.ts
1864
2133
  var import_node_child_process3 = require("child_process");
@@ -1916,7 +2185,7 @@ function sendTelegram(payload, botToken, chatId) {
1916
2185
  text: formatTelegram(payload),
1917
2186
  parse_mode: "Markdown"
1918
2187
  });
1919
- return new Promise((resolve6, reject) => {
2188
+ return new Promise((resolve7, reject) => {
1920
2189
  const req = (0, import_node_https.request)(
1921
2190
  `https://api.telegram.org/bot${botToken}/sendMessage`,
1922
2191
  {
@@ -1925,7 +2194,7 @@ function sendTelegram(payload, botToken, chatId) {
1925
2194
  },
1926
2195
  (res) => {
1927
2196
  res.resume();
1928
- if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve6();
2197
+ if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve7();
1929
2198
  else reject(new Error(`Telegram API returned ${res.statusCode}`));
1930
2199
  }
1931
2200
  );
@@ -1938,7 +2207,7 @@ function sendWebhook(payload, url) {
1938
2207
  const body = JSON.stringify(payload);
1939
2208
  const isHttps = url.startsWith("https");
1940
2209
  const doRequest = isHttps ? import_node_https.request : import_node_http.request;
1941
- return new Promise((resolve6, reject) => {
2210
+ return new Promise((resolve7, reject) => {
1942
2211
  const req = doRequest(
1943
2212
  url,
1944
2213
  {
@@ -1947,7 +2216,7 @@ function sendWebhook(payload, url) {
1947
2216
  },
1948
2217
  (res) => {
1949
2218
  res.resume();
1950
- if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve6();
2219
+ if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve7();
1951
2220
  else reject(new Error(`Webhook returned ${res.statusCode}`));
1952
2221
  }
1953
2222
  );
@@ -1960,7 +2229,7 @@ function sendWebhook(payload, url) {
1960
2229
  });
1961
2230
  }
1962
2231
  function sendCommand(payload, cmd) {
1963
- return new Promise((resolve6, reject) => {
2232
+ return new Promise((resolve7, reject) => {
1964
2233
  const env = {
1965
2234
  ...process.env,
1966
2235
  AGENTFLOW_ALERT_AGENT: payload.agentId,
@@ -1973,13 +2242,13 @@ function sendCommand(payload, cmd) {
1973
2242
  };
1974
2243
  (0, import_node_child_process3.exec)(cmd, { env, timeout: 3e4 }, (err) => {
1975
2244
  if (err) reject(err);
1976
- else resolve6();
2245
+ else resolve7();
1977
2246
  });
1978
2247
  });
1979
2248
  }
1980
2249
 
1981
2250
  // src/watch-state.ts
1982
- var import_node_fs3 = require("fs");
2251
+ var import_node_fs4 = require("fs");
1983
2252
  function parseDuration(input) {
1984
2253
  const match = input.match(/^(\d+(?:\.\d+)?)\s*(s|m|h|d)$/i);
1985
2254
  if (!match) {
@@ -2004,9 +2273,9 @@ function emptyState() {
2004
2273
  return { version: 1, agents: {}, lastPollTime: 0 };
2005
2274
  }
2006
2275
  function loadWatchState(filePath) {
2007
- if (!(0, import_node_fs3.existsSync)(filePath)) return emptyState();
2276
+ if (!(0, import_node_fs4.existsSync)(filePath)) return emptyState();
2008
2277
  try {
2009
- const raw = JSON.parse((0, import_node_fs3.readFileSync)(filePath, "utf8"));
2278
+ const raw = JSON.parse((0, import_node_fs4.readFileSync)(filePath, "utf8"));
2010
2279
  if (raw.version !== 1 || typeof raw.agents !== "object") return emptyState();
2011
2280
  return raw;
2012
2281
  } catch {
@@ -2016,11 +2285,11 @@ function loadWatchState(filePath) {
2016
2285
  function saveWatchState(filePath, state) {
2017
2286
  const tmp = filePath + ".tmp";
2018
2287
  try {
2019
- (0, import_node_fs3.writeFileSync)(tmp, JSON.stringify(state, null, 2), "utf8");
2020
- (0, import_node_fs3.renameSync)(tmp, filePath);
2288
+ (0, import_node_fs4.writeFileSync)(tmp, JSON.stringify(state, null, 2), "utf8");
2289
+ (0, import_node_fs4.renameSync)(tmp, filePath);
2021
2290
  } catch {
2022
2291
  try {
2023
- (0, import_node_fs3.writeFileSync)(filePath, JSON.stringify(state, null, 2), "utf8");
2292
+ (0, import_node_fs4.writeFileSync)(filePath, JSON.stringify(state, null, 2), "utf8");
2024
2293
  } catch {
2025
2294
  }
2026
2295
  }
@@ -2248,20 +2517,20 @@ function parseWatchArgs(argv) {
2248
2517
  recursive = true;
2249
2518
  i++;
2250
2519
  } else if (!arg.startsWith("-")) {
2251
- dirs.push((0, import_node_path3.resolve)(arg));
2520
+ dirs.push((0, import_node_path4.resolve)(arg));
2252
2521
  i++;
2253
2522
  } else {
2254
2523
  i++;
2255
2524
  }
2256
2525
  }
2257
- if (dirs.length === 0) dirs.push((0, import_node_path3.resolve)("."));
2526
+ if (dirs.length === 0) dirs.push((0, import_node_path4.resolve)("."));
2258
2527
  if (alertConditions.length === 0) {
2259
2528
  alertConditions.push({ type: "error" });
2260
2529
  alertConditions.push({ type: "recovery" });
2261
2530
  }
2262
2531
  notifyChannels.unshift({ type: "stdout" });
2263
2532
  if (!stateFilePath) {
2264
- stateFilePath = (0, import_node_path3.join)(dirs[0], ".agentflow-watch-state.json");
2533
+ stateFilePath = (0, import_node_path4.join)(dirs[0], ".agentflow-watch-state.json");
2265
2534
  }
2266
2535
  return {
2267
2536
  dirs,
@@ -2269,7 +2538,7 @@ function parseWatchArgs(argv) {
2269
2538
  pollIntervalMs,
2270
2539
  alertConditions,
2271
2540
  notifyChannels,
2272
- stateFilePath: (0, import_node_path3.resolve)(stateFilePath),
2541
+ stateFilePath: (0, import_node_path4.resolve)(stateFilePath),
2273
2542
  cooldownMs
2274
2543
  };
2275
2544
  }
@@ -2323,12 +2592,12 @@ Examples:
2323
2592
  }
2324
2593
  function startWatch(argv) {
2325
2594
  const config = parseWatchArgs(argv);
2326
- const valid = config.dirs.filter((d) => (0, import_node_fs4.existsSync)(d));
2595
+ const valid = config.dirs.filter((d) => (0, import_node_fs5.existsSync)(d));
2327
2596
  if (valid.length === 0) {
2328
2597
  console.error(`No valid directories found: ${config.dirs.join(", ")}`);
2329
2598
  process.exit(1);
2330
2599
  }
2331
- const invalid = config.dirs.filter((d) => !(0, import_node_fs4.existsSync)(d));
2600
+ const invalid = config.dirs.filter((d) => !(0, import_node_fs5.existsSync)(d));
2332
2601
  if (invalid.length > 0) {
2333
2602
  console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
2334
2603
  }
@@ -2366,9 +2635,17 @@ agentflow watch started`);
2366
2635
  records.push(...recs);
2367
2636
  }
2368
2637
  const alerts = detectTransitions(state, records, config, now);
2369
- for (const alert of alerts) {
2370
- for (const channel of config.notifyChannels) {
2371
- await sendAlert(alert, channel);
2638
+ const isBootstrap = pollCount === 1 && Object.keys(state.agents).length === 0;
2639
+ if (isBootstrap) {
2640
+ const suppressed = alerts.length;
2641
+ if (suppressed > 0) {
2642
+ console.log(`[bootstrap] Suppressed ${suppressed} initial alerts (baseline scan)`);
2643
+ }
2644
+ } else {
2645
+ for (const alert of alerts) {
2646
+ for (const channel of config.notifyChannels) {
2647
+ await sendAlert(alert, channel);
2648
+ }
2372
2649
  }
2373
2650
  }
2374
2651
  state = updateWatchState(state, records, alerts, now);
@@ -2412,6 +2689,7 @@ Commands:
2412
2689
  live [dir...] [options] Real-time terminal monitor (auto-detects any JSON/JSONL)
2413
2690
  watch [dir...] [options] Headless alert system \u2014 detects failures, sends notifications
2414
2691
  trace <command> [options] Inspect saved execution traces (list, show, timeline, stuck, loops)
2692
+ audit [options] Audit OS processes \u2014 detect stale PIDs, orphans, systemd issues
2415
2693
 
2416
2694
  Run \`agentflow <command> --help\` for command-specific options.
2417
2695
 
@@ -2563,9 +2841,102 @@ async function runCommand(argv) {
2563
2841
  process.exit(1);
2564
2842
  }
2565
2843
  }
2844
+ function parseAuditArgs(argv) {
2845
+ let processName = "";
2846
+ let pidFile;
2847
+ let workersFile;
2848
+ let systemdUnit;
2849
+ const discoverDirs = [];
2850
+ const args = argv.slice(0);
2851
+ if (args[0] === "audit") args.shift();
2852
+ let i = 0;
2853
+ while (i < args.length) {
2854
+ const arg = args[i];
2855
+ if (arg === "--help" || arg === "-h") {
2856
+ printAuditUsage();
2857
+ process.exit(0);
2858
+ } else if (arg === "--process" || arg === "-p") {
2859
+ i++;
2860
+ processName = args[i] ?? "";
2861
+ i++;
2862
+ } else if (arg === "--pid-file") {
2863
+ i++;
2864
+ pidFile = args[i];
2865
+ i++;
2866
+ } else if (arg === "--workers-file") {
2867
+ i++;
2868
+ workersFile = args[i];
2869
+ i++;
2870
+ } else if (arg === "--systemd") {
2871
+ i++;
2872
+ systemdUnit = args[i];
2873
+ i++;
2874
+ } else if (arg === "--no-systemd") {
2875
+ systemdUnit = null;
2876
+ i++;
2877
+ } else if (!arg.startsWith("-")) {
2878
+ discoverDirs.push((0, import_path3.resolve)(arg));
2879
+ i++;
2880
+ } else {
2881
+ i++;
2882
+ }
2883
+ }
2884
+ if (!processName && !pidFile && !workersFile && discoverDirs.length > 0) {
2885
+ const discovered = discoverProcessConfig(discoverDirs);
2886
+ if (discovered) {
2887
+ console.log(`Auto-discovered: process="${discovered.processName}"${discovered.pidFile ? ` pid-file=${discovered.pidFile}` : ""}${discovered.workersFile ? ` workers=${discovered.workersFile}` : ""}`);
2888
+ return { ...discovered, systemdUnit };
2889
+ }
2890
+ }
2891
+ if (!processName) {
2892
+ console.error("Error: --process <name> is required, or provide directories for auto-discovery.");
2893
+ console.error("Examples:");
2894
+ console.error(" agentflow audit --process alfred --pid-file ./data/alfred.pid");
2895
+ console.error(" agentflow audit ./data # auto-discovers *.pid and workers.json");
2896
+ process.exit(1);
2897
+ }
2898
+ return { processName, pidFile, workersFile, systemdUnit };
2899
+ }
2900
+ function printAuditUsage() {
2901
+ console.log(
2902
+ `
2903
+ AgentFlow Audit \u2014 OS-level process health check for agent systems.
2904
+
2905
+ Detects stale PID files, orphan processes, systemd crash loops, and
2906
+ mismatches between declared state and actual OS process state.
2907
+
2908
+ Usage:
2909
+ agentflow audit [dir...] [options]
2910
+ agentflow audit --process <name> [options]
2911
+
2912
+ Arguments:
2913
+ dir Directories to scan for auto-discovery of *.pid and workers.json
2914
+
2915
+ Options:
2916
+ -p, --process <name> Process name to search for (e.g. "alfred", "myagent")
2917
+ --pid-file <path> Path to PID file
2918
+ --workers-file <path> Path to workers.json or process registry
2919
+ --systemd <unit> Systemd user unit name (e.g. "alfred.service")
2920
+ --no-systemd Skip systemd checks
2921
+ -h, --help Show this help message
2922
+
2923
+ Examples:
2924
+ agentflow audit ./data # auto-discover from data directory
2925
+ agentflow audit --process alfred --systemd alfred.service
2926
+ agentflow audit --process myagent --pid-file /var/run/myagent.pid --workers-file ./workers.json
2927
+ agentflow audit --process crewai --no-systemd
2928
+ `.trim()
2929
+ );
2930
+ }
2931
+ function runAudit(argv) {
2932
+ const config = parseAuditArgs(argv);
2933
+ const result = auditProcesses(config);
2934
+ console.log(formatAuditReport(result));
2935
+ process.exit(result.problems.length > 0 ? 1 : 0);
2936
+ }
2566
2937
  async function main() {
2567
2938
  const argv = process.argv.slice(2);
2568
- const knownCommands = ["run", "live", "watch", "trace"];
2939
+ const knownCommands = ["run", "live", "watch", "trace", "audit"];
2569
2940
  if (argv.length === 0 || !knownCommands.includes(argv[0]) && (argv.includes("--help") || argv.includes("-h"))) {
2570
2941
  printHelp();
2571
2942
  process.exit(0);
@@ -2584,6 +2955,9 @@ async function main() {
2584
2955
  case "trace":
2585
2956
  await handleTrace(argv);
2586
2957
  break;
2958
+ case "audit":
2959
+ runAudit(argv);
2960
+ break;
2587
2961
  default:
2588
2962
  if (!subcommand?.startsWith("-")) {
2589
2963
  startLive(["live", ...argv]);