agentflow-core 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -38,6 +38,7 @@ __export(index_exports, {
38
38
  loadGraph: () => loadGraph,
39
39
  runTraced: () => runTraced,
40
40
  startLive: () => startLive,
41
+ startWatch: () => startWatch,
41
42
  stitchTrace: () => stitchTrace
42
43
  });
43
44
  module.exports = __toCommonJS(index_exports);
@@ -1214,6 +1215,522 @@ function startLive(argv) {
1214
1215
  process.exit(0);
1215
1216
  });
1216
1217
  }
1218
+
1219
+ // src/watch.ts
1220
+ var import_node_fs4 = require("fs");
1221
+ var import_node_path3 = require("path");
1222
+ var import_node_os = require("os");
1223
+
1224
+ // src/watch-state.ts
1225
+ var import_node_fs3 = require("fs");
1226
+ function parseDuration(input) {
1227
+ const match = input.match(/^(\d+(?:\.\d+)?)\s*(s|m|h|d)$/i);
1228
+ if (!match) {
1229
+ const n = parseInt(input, 10);
1230
+ return isNaN(n) ? 0 : n * 1e3;
1231
+ }
1232
+ const value = parseFloat(match[1]);
1233
+ switch (match[2].toLowerCase()) {
1234
+ case "s":
1235
+ return value * 1e3;
1236
+ case "m":
1237
+ return value * 6e4;
1238
+ case "h":
1239
+ return value * 36e5;
1240
+ case "d":
1241
+ return value * 864e5;
1242
+ default:
1243
+ return value * 1e3;
1244
+ }
1245
+ }
1246
+ function emptyState() {
1247
+ return { version: 1, agents: {}, lastPollTime: 0 };
1248
+ }
1249
+ function loadWatchState(filePath) {
1250
+ if (!(0, import_node_fs3.existsSync)(filePath)) return emptyState();
1251
+ try {
1252
+ const raw = JSON.parse((0, import_node_fs3.readFileSync)(filePath, "utf8"));
1253
+ if (raw.version !== 1 || typeof raw.agents !== "object") return emptyState();
1254
+ return raw;
1255
+ } catch {
1256
+ return emptyState();
1257
+ }
1258
+ }
1259
+ function saveWatchState(filePath, state) {
1260
+ const tmp = filePath + ".tmp";
1261
+ try {
1262
+ (0, import_node_fs3.writeFileSync)(tmp, JSON.stringify(state, null, 2), "utf8");
1263
+ (0, import_node_fs3.renameSync)(tmp, filePath);
1264
+ } catch {
1265
+ try {
1266
+ (0, import_node_fs3.writeFileSync)(filePath, JSON.stringify(state, null, 2), "utf8");
1267
+ } catch {
1268
+ }
1269
+ }
1270
+ }
1271
+ function estimateInterval(history) {
1272
+ if (history.length < 3) return 0;
1273
+ const sorted = [...history].sort((a, b) => a - b);
1274
+ const deltas = [];
1275
+ for (let i = 1; i < sorted.length; i++) {
1276
+ const d = sorted[i] - sorted[i - 1];
1277
+ if (d > 0) deltas.push(d);
1278
+ }
1279
+ if (deltas.length === 0) return 0;
1280
+ deltas.sort((a, b) => a - b);
1281
+ return deltas[Math.floor(deltas.length / 2)];
1282
+ }
1283
+ function detectTransitions(previous, currentRecords, config, now) {
1284
+ const alerts = [];
1285
+ const hasError = config.alertConditions.some((c) => c.type === "error");
1286
+ const hasRecovery = config.alertConditions.some((c) => c.type === "recovery");
1287
+ const staleConditions = config.alertConditions.filter((c) => c.type === "stale");
1288
+ const consecutiveConditions = config.alertConditions.filter((c) => c.type === "consecutive-errors");
1289
+ const byAgent = /* @__PURE__ */ new Map();
1290
+ for (const r of currentRecords) {
1291
+ const existing = byAgent.get(r.id);
1292
+ if (!existing || r.lastActive > existing.lastActive) {
1293
+ byAgent.set(r.id, r);
1294
+ }
1295
+ }
1296
+ for (const [agentId, record] of byAgent) {
1297
+ const prev = previous.agents[agentId];
1298
+ const prevStatus = prev?.lastStatus ?? "unknown";
1299
+ const currStatus = record.status;
1300
+ if (hasError && currStatus === "error" && prevStatus !== "error") {
1301
+ if (canAlert(prev, "error", config.cooldownMs, now)) {
1302
+ alerts.push(makePayload(agentId, "error", prevStatus, currStatus, record, config.dirs));
1303
+ }
1304
+ }
1305
+ if (hasRecovery && currStatus === "ok" && prevStatus === "error") {
1306
+ alerts.push(makePayload(agentId, "recovery", prevStatus, currStatus, record, config.dirs));
1307
+ }
1308
+ const newConsec = currStatus === "error" ? (prev?.consecutiveErrors ?? 0) + 1 : 0;
1309
+ for (const cond of consecutiveConditions) {
1310
+ if (newConsec === cond.threshold) {
1311
+ if (canAlert(prev, `consecutive-errors:${cond.threshold}`, config.cooldownMs, now)) {
1312
+ alerts.push(makePayload(
1313
+ agentId,
1314
+ `consecutive-errors (${cond.threshold})`,
1315
+ prevStatus,
1316
+ currStatus,
1317
+ { ...record, detail: `${newConsec} consecutive errors. ${record.detail}` },
1318
+ config.dirs
1319
+ ));
1320
+ }
1321
+ }
1322
+ }
1323
+ for (const cond of staleConditions) {
1324
+ const sinceActive = now - record.lastActive;
1325
+ if (sinceActive > cond.durationMs && record.lastActive > 0) {
1326
+ if (canAlert(prev, "stale", config.cooldownMs, now)) {
1327
+ const mins = Math.floor(sinceActive / 6e4);
1328
+ alerts.push(makePayload(
1329
+ agentId,
1330
+ "stale",
1331
+ prevStatus,
1332
+ currStatus,
1333
+ { ...record, detail: `No update for ${mins}m. ${record.detail}` },
1334
+ config.dirs
1335
+ ));
1336
+ }
1337
+ }
1338
+ }
1339
+ if (staleConditions.length === 0) {
1340
+ const history = prev?.mtimeHistory ?? [];
1341
+ const expectedInterval = estimateInterval(history);
1342
+ if (expectedInterval > 0) {
1343
+ const sinceActive = now - record.lastActive;
1344
+ if (sinceActive > expectedInterval * 3) {
1345
+ if (canAlert(prev, "stale-auto", config.cooldownMs, now)) {
1346
+ const mins = Math.floor(sinceActive / 6e4);
1347
+ const expectedMins = Math.floor(expectedInterval / 6e4);
1348
+ alerts.push(makePayload(
1349
+ agentId,
1350
+ "stale (auto)",
1351
+ prevStatus,
1352
+ currStatus,
1353
+ { ...record, detail: `No update for ${mins}m (expected every ~${expectedMins}m). ${record.detail}` },
1354
+ config.dirs
1355
+ ));
1356
+ }
1357
+ }
1358
+ }
1359
+ }
1360
+ }
1361
+ return alerts;
1362
+ }
1363
+ function updateWatchState(state, records, alerts, now) {
1364
+ const agents = { ...state.agents };
1365
+ const alertsByAgent = /* @__PURE__ */ new Map();
1366
+ for (const a of alerts) alertsByAgent.set(a.agentId, a);
1367
+ const byAgent = /* @__PURE__ */ new Map();
1368
+ for (const r of records) {
1369
+ const existing = byAgent.get(r.id);
1370
+ if (!existing || r.lastActive > existing.lastActive) {
1371
+ byAgent.set(r.id, r);
1372
+ }
1373
+ }
1374
+ for (const [agentId, record] of byAgent) {
1375
+ const prev = agents[agentId];
1376
+ const history = prev?.mtimeHistory ?? [];
1377
+ const newHistory = [...history];
1378
+ if (newHistory.length === 0 || newHistory[newHistory.length - 1] !== record.lastActive) {
1379
+ newHistory.push(record.lastActive);
1380
+ }
1381
+ while (newHistory.length > 10) newHistory.shift();
1382
+ const alert = alertsByAgent.get(agentId);
1383
+ const consecutiveErrors = record.status === "error" ? (prev?.consecutiveErrors ?? 0) + 1 : 0;
1384
+ agents[agentId] = {
1385
+ id: agentId,
1386
+ lastStatus: record.status,
1387
+ lastActive: record.lastActive,
1388
+ lastAlertTime: alert ? now : prev?.lastAlertTime ?? 0,
1389
+ lastAlertReason: alert ? alert.condition : prev?.lastAlertReason ?? "",
1390
+ consecutiveErrors,
1391
+ mtimeHistory: newHistory
1392
+ };
1393
+ }
1394
+ return { version: 1, agents, lastPollTime: now };
1395
+ }
1396
+ function canAlert(prev, reason, cooldownMs, now) {
1397
+ if (!prev) return true;
1398
+ if (prev.lastAlertReason !== reason) return true;
1399
+ return now - prev.lastAlertTime > cooldownMs;
1400
+ }
1401
+ function makePayload(agentId, condition, previousStatus, currentStatus, record, dirs) {
1402
+ return {
1403
+ agentId,
1404
+ condition,
1405
+ previousStatus,
1406
+ currentStatus,
1407
+ detail: record.detail,
1408
+ file: record.file,
1409
+ timestamp: Date.now(),
1410
+ dirs
1411
+ };
1412
+ }
1413
+
1414
+ // src/watch-alerts.ts
1415
+ var import_node_https = require("https");
1416
+ var import_node_http = require("http");
1417
+ var import_node_child_process2 = require("child_process");
1418
+ function formatAlertMessage(payload) {
1419
+ const time = new Date(payload.timestamp).toISOString();
1420
+ const arrow = `${payload.previousStatus} \u2192 ${payload.currentStatus}`;
1421
+ return [
1422
+ `[ALERT] ${payload.condition}: "${payload.agentId}"`,
1423
+ ` Status: ${arrow}`,
1424
+ payload.detail ? ` Detail: ${payload.detail}` : null,
1425
+ ` File: ${payload.file}`,
1426
+ ` Time: ${time}`
1427
+ ].filter(Boolean).join("\n");
1428
+ }
1429
+ function formatTelegram(payload) {
1430
+ const icon = payload.condition === "recovery" ? "\u2705" : "\u26A0\uFE0F";
1431
+ const time = new Date(payload.timestamp).toLocaleTimeString();
1432
+ return [
1433
+ `${icon} *AgentFlow Alert*`,
1434
+ `*${payload.condition}*: \`${payload.agentId}\``,
1435
+ `Status: ${payload.previousStatus} \u2192 ${payload.currentStatus}`,
1436
+ payload.detail ? `Detail: ${payload.detail.slice(0, 200)}` : null,
1437
+ `Time: ${time}`
1438
+ ].filter(Boolean).join("\n");
1439
+ }
1440
+ async function sendAlert(payload, channel) {
1441
+ try {
1442
+ switch (channel.type) {
1443
+ case "stdout":
1444
+ sendStdout(payload);
1445
+ break;
1446
+ case "telegram":
1447
+ await sendTelegram(payload, channel.botToken, channel.chatId);
1448
+ break;
1449
+ case "webhook":
1450
+ await sendWebhook(payload, channel.url);
1451
+ break;
1452
+ case "command":
1453
+ await sendCommand(payload, channel.cmd);
1454
+ break;
1455
+ }
1456
+ } catch (err) {
1457
+ const msg = err instanceof Error ? err.message : String(err);
1458
+ console.error(`[agentflow] Failed to send ${channel.type} alert: ${msg}`);
1459
+ }
1460
+ }
1461
+ function sendStdout(payload) {
1462
+ console.log(formatAlertMessage(payload));
1463
+ }
1464
+ function sendTelegram(payload, botToken, chatId) {
1465
+ const body = JSON.stringify({
1466
+ chat_id: chatId,
1467
+ text: formatTelegram(payload),
1468
+ parse_mode: "Markdown"
1469
+ });
1470
+ return new Promise((resolve4, reject) => {
1471
+ const req = (0, import_node_https.request)(
1472
+ `https://api.telegram.org/bot${botToken}/sendMessage`,
1473
+ { method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) } },
1474
+ (res) => {
1475
+ res.resume();
1476
+ if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve4();
1477
+ else reject(new Error(`Telegram API returned ${res.statusCode}`));
1478
+ }
1479
+ );
1480
+ req.on("error", reject);
1481
+ req.write(body);
1482
+ req.end();
1483
+ });
1484
+ }
1485
+ function sendWebhook(payload, url) {
1486
+ const body = JSON.stringify(payload);
1487
+ const isHttps = url.startsWith("https");
1488
+ const doRequest = isHttps ? import_node_https.request : import_node_http.request;
1489
+ return new Promise((resolve4, reject) => {
1490
+ const req = doRequest(
1491
+ url,
1492
+ { method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) } },
1493
+ (res) => {
1494
+ res.resume();
1495
+ if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve4();
1496
+ else reject(new Error(`Webhook returned ${res.statusCode}`));
1497
+ }
1498
+ );
1499
+ req.on("error", reject);
1500
+ req.setTimeout(1e4, () => {
1501
+ req.destroy(new Error("Webhook timeout"));
1502
+ });
1503
+ req.write(body);
1504
+ req.end();
1505
+ });
1506
+ }
1507
+ function sendCommand(payload, cmd) {
1508
+ return new Promise((resolve4, reject) => {
1509
+ const env = {
1510
+ ...process.env,
1511
+ AGENTFLOW_ALERT_AGENT: payload.agentId,
1512
+ AGENTFLOW_ALERT_CONDITION: payload.condition,
1513
+ AGENTFLOW_ALERT_STATUS: payload.currentStatus,
1514
+ AGENTFLOW_ALERT_PREVIOUS_STATUS: payload.previousStatus,
1515
+ AGENTFLOW_ALERT_DETAIL: payload.detail,
1516
+ AGENTFLOW_ALERT_FILE: payload.file,
1517
+ AGENTFLOW_ALERT_TIMESTAMP: String(payload.timestamp)
1518
+ };
1519
+ (0, import_node_child_process2.exec)(cmd, { env, timeout: 3e4 }, (err) => {
1520
+ if (err) reject(err);
1521
+ else resolve4();
1522
+ });
1523
+ });
1524
+ }
1525
+
1526
+ // src/watch.ts
1527
+ function parseWatchArgs(argv) {
1528
+ const dirs = [];
1529
+ const alertConditions = [];
1530
+ const notifyChannels = [];
1531
+ let recursive = false;
1532
+ let pollIntervalMs = 3e4;
1533
+ let cooldownMs = 30 * 6e4;
1534
+ let stateFilePath = "";
1535
+ const args = argv.slice(0);
1536
+ if (args[0] === "watch") args.shift();
1537
+ let i = 0;
1538
+ while (i < args.length) {
1539
+ const arg = args[i];
1540
+ if (arg === "--help" || arg === "-h") {
1541
+ printWatchUsage();
1542
+ process.exit(0);
1543
+ } else if (arg === "--alert-on") {
1544
+ i++;
1545
+ const val = args[i] ?? "";
1546
+ if (val === "error") {
1547
+ alertConditions.push({ type: "error" });
1548
+ } else if (val === "recovery") {
1549
+ alertConditions.push({ type: "recovery" });
1550
+ } else if (val.startsWith("stale:")) {
1551
+ const dur = parseDuration(val.slice(6));
1552
+ if (dur > 0) alertConditions.push({ type: "stale", durationMs: dur });
1553
+ } else if (val.startsWith("consecutive-errors:")) {
1554
+ const n = parseInt(val.slice(19), 10);
1555
+ if (n > 0) alertConditions.push({ type: "consecutive-errors", threshold: n });
1556
+ }
1557
+ i++;
1558
+ } else if (arg === "--notify") {
1559
+ i++;
1560
+ const val = args[i] ?? "";
1561
+ if (val === "telegram") {
1562
+ const botToken = process.env["AGENTFLOW_TELEGRAM_BOT_TOKEN"] ?? "";
1563
+ const chatId = process.env["AGENTFLOW_TELEGRAM_CHAT_ID"] ?? "";
1564
+ if (botToken && chatId) {
1565
+ notifyChannels.push({ type: "telegram", botToken, chatId });
1566
+ } else {
1567
+ console.error("Warning: --notify telegram requires AGENTFLOW_TELEGRAM_BOT_TOKEN and AGENTFLOW_TELEGRAM_CHAT_ID env vars");
1568
+ }
1569
+ } else if (val.startsWith("webhook:")) {
1570
+ notifyChannels.push({ type: "webhook", url: val.slice(8) });
1571
+ } else if (val.startsWith("command:")) {
1572
+ notifyChannels.push({ type: "command", cmd: val.slice(8) });
1573
+ }
1574
+ i++;
1575
+ } else if (arg === "--poll") {
1576
+ i++;
1577
+ const v = parseInt(args[i] ?? "", 10);
1578
+ if (!isNaN(v) && v > 0) pollIntervalMs = v * 1e3;
1579
+ i++;
1580
+ } else if (arg === "--cooldown") {
1581
+ i++;
1582
+ const dur = parseDuration(args[i] ?? "30m");
1583
+ if (dur > 0) cooldownMs = dur;
1584
+ i++;
1585
+ } else if (arg === "--state-file") {
1586
+ i++;
1587
+ stateFilePath = args[i] ?? "";
1588
+ i++;
1589
+ } else if (arg === "--recursive" || arg === "-R") {
1590
+ recursive = true;
1591
+ i++;
1592
+ } else if (!arg.startsWith("-")) {
1593
+ dirs.push((0, import_node_path3.resolve)(arg));
1594
+ i++;
1595
+ } else {
1596
+ i++;
1597
+ }
1598
+ }
1599
+ if (dirs.length === 0) dirs.push((0, import_node_path3.resolve)("."));
1600
+ if (alertConditions.length === 0) {
1601
+ alertConditions.push({ type: "error" });
1602
+ alertConditions.push({ type: "recovery" });
1603
+ }
1604
+ notifyChannels.unshift({ type: "stdout" });
1605
+ if (!stateFilePath) {
1606
+ stateFilePath = (0, import_node_path3.join)(dirs[0], ".agentflow-watch-state.json");
1607
+ }
1608
+ return {
1609
+ dirs,
1610
+ recursive,
1611
+ pollIntervalMs,
1612
+ alertConditions,
1613
+ notifyChannels,
1614
+ stateFilePath: (0, import_node_path3.resolve)(stateFilePath),
1615
+ cooldownMs
1616
+ };
1617
+ }
1618
+ function printWatchUsage() {
1619
+ console.log(`
1620
+ AgentFlow Watch \u2014 headless alert system for agent infrastructure.
1621
+
1622
+ Polls directories for JSON/JSONL files, detects failures and stale
1623
+ agents, sends alerts. Same auto-detection as \`agentflow live\`.
1624
+
1625
+ Usage:
1626
+ agentflow watch [dir...] [options]
1627
+
1628
+ Arguments:
1629
+ dir One or more directories to watch (default: .)
1630
+
1631
+ Alert conditions (--alert-on, repeatable):
1632
+ error Agent transitions to error status
1633
+ recovery Agent recovers from error to ok
1634
+ stale:DURATION No file update within duration (e.g. 15m, 1h)
1635
+ consecutive-errors:N N consecutive error observations
1636
+
1637
+ Default (if none specified): error + recovery
1638
+
1639
+ Notification channels (--notify, repeatable):
1640
+ telegram Telegram Bot API (needs env vars)
1641
+ webhook:URL POST JSON to any URL
1642
+ command:CMD Run shell command with alert env vars
1643
+
1644
+ Stdout alerts are always printed regardless of --notify flags.
1645
+
1646
+ Options:
1647
+ --poll <secs> Poll interval in seconds (default: 30)
1648
+ --cooldown <duration> Alert dedup cooldown (default: 30m)
1649
+ --state-file <path> Persistence file (default: <dir>/.agentflow-watch-state.json)
1650
+ -R, --recursive Scan subdirectories (1 level deep)
1651
+ -h, --help Show this help message
1652
+
1653
+ Environment variables:
1654
+ AGENTFLOW_TELEGRAM_BOT_TOKEN Telegram bot token (for --notify telegram)
1655
+ AGENTFLOW_TELEGRAM_CHAT_ID Telegram chat ID (for --notify telegram)
1656
+
1657
+ Examples:
1658
+ agentflow watch ./data --alert-on error --alert-on stale:15m
1659
+ agentflow watch ./data ./cron --notify telegram --poll 60
1660
+ agentflow watch ./traces --notify webhook:https://hooks.slack.com/... --alert-on consecutive-errors:3
1661
+ agentflow watch ./data --notify "command:curl -X POST https://my-pagerduty/alert"
1662
+ `.trim());
1663
+ }
1664
+ function startWatch(argv) {
1665
+ const config = parseWatchArgs(argv);
1666
+ const valid = config.dirs.filter((d) => (0, import_node_fs4.existsSync)(d));
1667
+ if (valid.length === 0) {
1668
+ console.error(`No valid directories found: ${config.dirs.join(", ")}`);
1669
+ process.exit(1);
1670
+ }
1671
+ const invalid = config.dirs.filter((d) => !(0, import_node_fs4.existsSync)(d));
1672
+ if (invalid.length > 0) {
1673
+ console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
1674
+ }
1675
+ let state = loadWatchState(config.stateFilePath);
1676
+ const condLabels = config.alertConditions.map((c) => {
1677
+ if (c.type === "stale") return `stale:${Math.floor(c.durationMs / 6e4)}m`;
1678
+ if (c.type === "consecutive-errors") return `consecutive-errors:${c.threshold}`;
1679
+ return c.type;
1680
+ });
1681
+ const channelLabels = config.notifyChannels.filter((c) => c.type !== "stdout").map((c) => {
1682
+ if (c.type === "webhook") return `webhook:${c.url.slice(0, 40)}...`;
1683
+ if (c.type === "command") return `command:${c.cmd.slice(0, 40)}`;
1684
+ return c.type;
1685
+ });
1686
+ console.log(`
1687
+ agentflow watch started`);
1688
+ console.log(` Directories: ${valid.join(", ")}`);
1689
+ console.log(` Poll: ${config.pollIntervalMs / 1e3}s`);
1690
+ console.log(` Alert on: ${condLabels.join(", ")}`);
1691
+ console.log(` Notify: stdout${channelLabels.length > 0 ? ", " + channelLabels.join(", ") : ""}`);
1692
+ console.log(` Cooldown: ${Math.floor(config.cooldownMs / 6e4)}m`);
1693
+ console.log(` State: ${config.stateFilePath}`);
1694
+ console.log(` Hostname: ${(0, import_node_os.hostname)()}`);
1695
+ console.log("");
1696
+ let pollCount = 0;
1697
+ async function poll() {
1698
+ const now = Date.now();
1699
+ pollCount++;
1700
+ const files = scanFiles(valid, config.recursive);
1701
+ const records = [];
1702
+ for (const f of files.slice(0, 500)) {
1703
+ const recs = f.ext === ".jsonl" ? processJsonlFile(f) : processJsonFile(f);
1704
+ records.push(...recs);
1705
+ }
1706
+ const alerts = detectTransitions(state, records, config, now);
1707
+ for (const alert of alerts) {
1708
+ for (const channel of config.notifyChannels) {
1709
+ await sendAlert(alert, channel);
1710
+ }
1711
+ }
1712
+ state = updateWatchState(state, records, alerts, now);
1713
+ saveWatchState(config.stateFilePath, state);
1714
+ if (pollCount % 10 === 0) {
1715
+ const agentCount = Object.keys(state.agents).length;
1716
+ const errorCount = Object.values(state.agents).filter((a) => a.lastStatus === "error").length;
1717
+ const runningCount = Object.values(state.agents).filter((a) => a.lastStatus === "running").length;
1718
+ const time = (/* @__PURE__ */ new Date()).toLocaleTimeString();
1719
+ console.log(`[${time}] heartbeat: ${agentCount} agents, ${runningCount} running, ${errorCount} errors, ${files.length} files`);
1720
+ }
1721
+ }
1722
+ poll();
1723
+ setInterval(() => {
1724
+ poll();
1725
+ }, config.pollIntervalMs);
1726
+ function shutdown() {
1727
+ console.log("\nagentflow watch stopped.");
1728
+ saveWatchState(config.stateFilePath, state);
1729
+ process.exit(0);
1730
+ }
1731
+ process.on("SIGINT", shutdown);
1732
+ process.on("SIGTERM", shutdown);
1733
+ }
1217
1734
  // Annotate the CommonJS export names for ESM import in node:
1218
1735
  0 && (module.exports = {
1219
1736
  createGraphBuilder,
@@ -1234,5 +1751,6 @@ function startLive(argv) {
1234
1751
  loadGraph,
1235
1752
  runTraced,
1236
1753
  startLive,
1754
+ startWatch,
1237
1755
  stitchTrace
1238
1756
  });
package/dist/index.d.cts CHANGED
@@ -393,8 +393,72 @@ declare function runTraced(config: RunConfig): Promise<RunResult>;
393
393
  *
394
394
  * @module
395
395
  */
396
+
396
397
  declare function startLive(argv: string[]): void;
397
398
 
399
+ /**
400
+ * AgentFlow Watch — headless alert system for agent infrastructure.
401
+ *
402
+ * Polls directories for JSON/JSONL state files, detects status transitions
403
+ * (ok→error, stale, recovery), and sends alerts via Telegram, webhooks,
404
+ * shell commands, or stdout.
405
+ *
406
+ * @module
407
+ */
408
+ declare function startWatch(argv: string[]): void;
409
+
410
+ /**
411
+ * Type definitions for the `agentflow watch` alert system.
412
+ * @module
413
+ */
414
+ /** Alert condition parsed from --alert-on flags. */
415
+ type AlertCondition = {
416
+ readonly type: 'error';
417
+ } | {
418
+ readonly type: 'stale';
419
+ readonly durationMs: number;
420
+ } | {
421
+ readonly type: 'recovery';
422
+ } | {
423
+ readonly type: 'consecutive-errors';
424
+ readonly threshold: number;
425
+ };
426
+ /** Notification channel parsed from --notify flags. */
427
+ type NotifyChannel = {
428
+ readonly type: 'stdout';
429
+ } | {
430
+ readonly type: 'telegram';
431
+ readonly botToken: string;
432
+ readonly chatId: string;
433
+ } | {
434
+ readonly type: 'webhook';
435
+ readonly url: string;
436
+ } | {
437
+ readonly type: 'command';
438
+ readonly cmd: string;
439
+ };
440
+ /** Configuration for the watch command. */
441
+ interface WatchConfig {
442
+ readonly dirs: string[];
443
+ readonly recursive: boolean;
444
+ readonly pollIntervalMs: number;
445
+ readonly alertConditions: AlertCondition[];
446
+ readonly notifyChannels: NotifyChannel[];
447
+ readonly stateFilePath: string;
448
+ readonly cooldownMs: number;
449
+ }
450
+ /** Alert payload passed to notification channels. */
451
+ interface AlertPayload {
452
+ readonly agentId: string;
453
+ readonly condition: string;
454
+ readonly previousStatus: string;
455
+ readonly currentStatus: string;
456
+ readonly detail: string;
457
+ readonly file: string;
458
+ readonly timestamp: number;
459
+ readonly dirs: readonly string[];
460
+ }
461
+
398
462
  declare function groupByTraceId(graphs: ExecutionGraph[]): Map<string, ExecutionGraph[]>;
399
463
  declare function stitchTrace(graphs: ExecutionGraph[]): DistributedTrace;
400
464
  declare function getTraceTree(trace: DistributedTrace): ExecutionGraph[];
@@ -515,4 +579,4 @@ declare function getDepth(graph: ExecutionGraph): number;
515
579
  */
516
580
  declare function getStats(graph: ExecutionGraph): GraphStats;
517
581
 
518
- export { type Adapter, type AgentFlowConfig, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type MutableExecutionNode, type NodeStatus, type NodeType, type RunConfig, type RunResult, type StartNodeOptions, type TraceEvent, type TraceEventType, type Writer, createGraphBuilder, findWaitingOn, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, stitchTrace };
582
+ export { type Adapter, type AgentFlowConfig, type AlertCondition, type AlertPayload, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type MutableExecutionNode, type NodeStatus, type NodeType, type NotifyChannel, type RunConfig, type RunResult, type StartNodeOptions, type TraceEvent, type TraceEventType, type WatchConfig, type Writer, createGraphBuilder, findWaitingOn, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, startWatch, stitchTrace };
package/dist/index.d.ts CHANGED
@@ -393,8 +393,72 @@ declare function runTraced(config: RunConfig): Promise<RunResult>;
393
393
  *
394
394
  * @module
395
395
  */
396
+
396
397
  declare function startLive(argv: string[]): void;
397
398
 
399
+ /**
400
+ * AgentFlow Watch — headless alert system for agent infrastructure.
401
+ *
402
+ * Polls directories for JSON/JSONL state files, detects status transitions
403
+ * (ok→error, stale, recovery), and sends alerts via Telegram, webhooks,
404
+ * shell commands, or stdout.
405
+ *
406
+ * @module
407
+ */
408
+ declare function startWatch(argv: string[]): void;
409
+
410
+ /**
411
+ * Type definitions for the `agentflow watch` alert system.
412
+ * @module
413
+ */
414
+ /** Alert condition parsed from --alert-on flags. */
415
+ type AlertCondition = {
416
+ readonly type: 'error';
417
+ } | {
418
+ readonly type: 'stale';
419
+ readonly durationMs: number;
420
+ } | {
421
+ readonly type: 'recovery';
422
+ } | {
423
+ readonly type: 'consecutive-errors';
424
+ readonly threshold: number;
425
+ };
426
+ /** Notification channel parsed from --notify flags. */
427
+ type NotifyChannel = {
428
+ readonly type: 'stdout';
429
+ } | {
430
+ readonly type: 'telegram';
431
+ readonly botToken: string;
432
+ readonly chatId: string;
433
+ } | {
434
+ readonly type: 'webhook';
435
+ readonly url: string;
436
+ } | {
437
+ readonly type: 'command';
438
+ readonly cmd: string;
439
+ };
440
+ /** Configuration for the watch command. */
441
+ interface WatchConfig {
442
+ readonly dirs: string[];
443
+ readonly recursive: boolean;
444
+ readonly pollIntervalMs: number;
445
+ readonly alertConditions: AlertCondition[];
446
+ readonly notifyChannels: NotifyChannel[];
447
+ readonly stateFilePath: string;
448
+ readonly cooldownMs: number;
449
+ }
450
+ /** Alert payload passed to notification channels. */
451
+ interface AlertPayload {
452
+ readonly agentId: string;
453
+ readonly condition: string;
454
+ readonly previousStatus: string;
455
+ readonly currentStatus: string;
456
+ readonly detail: string;
457
+ readonly file: string;
458
+ readonly timestamp: number;
459
+ readonly dirs: readonly string[];
460
+ }
461
+
398
462
  declare function groupByTraceId(graphs: ExecutionGraph[]): Map<string, ExecutionGraph[]>;
399
463
  declare function stitchTrace(graphs: ExecutionGraph[]): DistributedTrace;
400
464
  declare function getTraceTree(trace: DistributedTrace): ExecutionGraph[];
@@ -515,4 +579,4 @@ declare function getDepth(graph: ExecutionGraph): number;
515
579
  */
516
580
  declare function getStats(graph: ExecutionGraph): GraphStats;
517
581
 
518
- export { type Adapter, type AgentFlowConfig, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type MutableExecutionNode, type NodeStatus, type NodeType, type RunConfig, type RunResult, type StartNodeOptions, type TraceEvent, type TraceEventType, type Writer, createGraphBuilder, findWaitingOn, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, stitchTrace };
582
+ export { type Adapter, type AgentFlowConfig, type AlertCondition, type AlertPayload, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type MutableExecutionNode, type NodeStatus, type NodeType, type NotifyChannel, type RunConfig, type RunResult, type StartNodeOptions, type TraceEvent, type TraceEventType, type WatchConfig, type Writer, createGraphBuilder, findWaitingOn, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, startWatch, stitchTrace };