agentflow-core 0.2.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-5SPZPOFN.js → chunk-NPH34CAL.js} +518 -1
- package/dist/cli.cjs +526 -4
- package/dist/cli.js +13 -6
- package/dist/index.cjs +518 -0
- package/dist/index.d.cts +65 -1
- package/dist/index.d.ts +65 -1
- package/dist/index.js +3 -1
- package/package.json +24 -3
package/dist/index.cjs
CHANGED
|
@@ -38,6 +38,7 @@ __export(index_exports, {
|
|
|
38
38
|
loadGraph: () => loadGraph,
|
|
39
39
|
runTraced: () => runTraced,
|
|
40
40
|
startLive: () => startLive,
|
|
41
|
+
startWatch: () => startWatch,
|
|
41
42
|
stitchTrace: () => stitchTrace
|
|
42
43
|
});
|
|
43
44
|
module.exports = __toCommonJS(index_exports);
|
|
@@ -1214,6 +1215,522 @@ function startLive(argv) {
|
|
|
1214
1215
|
process.exit(0);
|
|
1215
1216
|
});
|
|
1216
1217
|
}
|
|
1218
|
+
|
|
1219
|
+
// src/watch.ts
|
|
1220
|
+
var import_node_fs4 = require("fs");
|
|
1221
|
+
var import_node_path3 = require("path");
|
|
1222
|
+
var import_node_os = require("os");
|
|
1223
|
+
|
|
1224
|
+
// src/watch-state.ts
|
|
1225
|
+
var import_node_fs3 = require("fs");
|
|
1226
|
+
function parseDuration(input) {
|
|
1227
|
+
const match = input.match(/^(\d+(?:\.\d+)?)\s*(s|m|h|d)$/i);
|
|
1228
|
+
if (!match) {
|
|
1229
|
+
const n = parseInt(input, 10);
|
|
1230
|
+
return isNaN(n) ? 0 : n * 1e3;
|
|
1231
|
+
}
|
|
1232
|
+
const value = parseFloat(match[1]);
|
|
1233
|
+
switch (match[2].toLowerCase()) {
|
|
1234
|
+
case "s":
|
|
1235
|
+
return value * 1e3;
|
|
1236
|
+
case "m":
|
|
1237
|
+
return value * 6e4;
|
|
1238
|
+
case "h":
|
|
1239
|
+
return value * 36e5;
|
|
1240
|
+
case "d":
|
|
1241
|
+
return value * 864e5;
|
|
1242
|
+
default:
|
|
1243
|
+
return value * 1e3;
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
function emptyState() {
|
|
1247
|
+
return { version: 1, agents: {}, lastPollTime: 0 };
|
|
1248
|
+
}
|
|
1249
|
+
function loadWatchState(filePath) {
|
|
1250
|
+
if (!(0, import_node_fs3.existsSync)(filePath)) return emptyState();
|
|
1251
|
+
try {
|
|
1252
|
+
const raw = JSON.parse((0, import_node_fs3.readFileSync)(filePath, "utf8"));
|
|
1253
|
+
if (raw.version !== 1 || typeof raw.agents !== "object") return emptyState();
|
|
1254
|
+
return raw;
|
|
1255
|
+
} catch {
|
|
1256
|
+
return emptyState();
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
function saveWatchState(filePath, state) {
|
|
1260
|
+
const tmp = filePath + ".tmp";
|
|
1261
|
+
try {
|
|
1262
|
+
(0, import_node_fs3.writeFileSync)(tmp, JSON.stringify(state, null, 2), "utf8");
|
|
1263
|
+
(0, import_node_fs3.renameSync)(tmp, filePath);
|
|
1264
|
+
} catch {
|
|
1265
|
+
try {
|
|
1266
|
+
(0, import_node_fs3.writeFileSync)(filePath, JSON.stringify(state, null, 2), "utf8");
|
|
1267
|
+
} catch {
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
function estimateInterval(history) {
|
|
1272
|
+
if (history.length < 3) return 0;
|
|
1273
|
+
const sorted = [...history].sort((a, b) => a - b);
|
|
1274
|
+
const deltas = [];
|
|
1275
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
1276
|
+
const d = sorted[i] - sorted[i - 1];
|
|
1277
|
+
if (d > 0) deltas.push(d);
|
|
1278
|
+
}
|
|
1279
|
+
if (deltas.length === 0) return 0;
|
|
1280
|
+
deltas.sort((a, b) => a - b);
|
|
1281
|
+
return deltas[Math.floor(deltas.length / 2)];
|
|
1282
|
+
}
|
|
1283
|
+
function detectTransitions(previous, currentRecords, config, now) {
|
|
1284
|
+
const alerts = [];
|
|
1285
|
+
const hasError = config.alertConditions.some((c) => c.type === "error");
|
|
1286
|
+
const hasRecovery = config.alertConditions.some((c) => c.type === "recovery");
|
|
1287
|
+
const staleConditions = config.alertConditions.filter((c) => c.type === "stale");
|
|
1288
|
+
const consecutiveConditions = config.alertConditions.filter((c) => c.type === "consecutive-errors");
|
|
1289
|
+
const byAgent = /* @__PURE__ */ new Map();
|
|
1290
|
+
for (const r of currentRecords) {
|
|
1291
|
+
const existing = byAgent.get(r.id);
|
|
1292
|
+
if (!existing || r.lastActive > existing.lastActive) {
|
|
1293
|
+
byAgent.set(r.id, r);
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
for (const [agentId, record] of byAgent) {
|
|
1297
|
+
const prev = previous.agents[agentId];
|
|
1298
|
+
const prevStatus = prev?.lastStatus ?? "unknown";
|
|
1299
|
+
const currStatus = record.status;
|
|
1300
|
+
if (hasError && currStatus === "error" && prevStatus !== "error") {
|
|
1301
|
+
if (canAlert(prev, "error", config.cooldownMs, now)) {
|
|
1302
|
+
alerts.push(makePayload(agentId, "error", prevStatus, currStatus, record, config.dirs));
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1305
|
+
if (hasRecovery && currStatus === "ok" && prevStatus === "error") {
|
|
1306
|
+
alerts.push(makePayload(agentId, "recovery", prevStatus, currStatus, record, config.dirs));
|
|
1307
|
+
}
|
|
1308
|
+
const newConsec = currStatus === "error" ? (prev?.consecutiveErrors ?? 0) + 1 : 0;
|
|
1309
|
+
for (const cond of consecutiveConditions) {
|
|
1310
|
+
if (newConsec === cond.threshold) {
|
|
1311
|
+
if (canAlert(prev, `consecutive-errors:${cond.threshold}`, config.cooldownMs, now)) {
|
|
1312
|
+
alerts.push(makePayload(
|
|
1313
|
+
agentId,
|
|
1314
|
+
`consecutive-errors (${cond.threshold})`,
|
|
1315
|
+
prevStatus,
|
|
1316
|
+
currStatus,
|
|
1317
|
+
{ ...record, detail: `${newConsec} consecutive errors. ${record.detail}` },
|
|
1318
|
+
config.dirs
|
|
1319
|
+
));
|
|
1320
|
+
}
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
for (const cond of staleConditions) {
|
|
1324
|
+
const sinceActive = now - record.lastActive;
|
|
1325
|
+
if (sinceActive > cond.durationMs && record.lastActive > 0) {
|
|
1326
|
+
if (canAlert(prev, "stale", config.cooldownMs, now)) {
|
|
1327
|
+
const mins = Math.floor(sinceActive / 6e4);
|
|
1328
|
+
alerts.push(makePayload(
|
|
1329
|
+
agentId,
|
|
1330
|
+
"stale",
|
|
1331
|
+
prevStatus,
|
|
1332
|
+
currStatus,
|
|
1333
|
+
{ ...record, detail: `No update for ${mins}m. ${record.detail}` },
|
|
1334
|
+
config.dirs
|
|
1335
|
+
));
|
|
1336
|
+
}
|
|
1337
|
+
}
|
|
1338
|
+
}
|
|
1339
|
+
if (staleConditions.length === 0) {
|
|
1340
|
+
const history = prev?.mtimeHistory ?? [];
|
|
1341
|
+
const expectedInterval = estimateInterval(history);
|
|
1342
|
+
if (expectedInterval > 0) {
|
|
1343
|
+
const sinceActive = now - record.lastActive;
|
|
1344
|
+
if (sinceActive > expectedInterval * 3) {
|
|
1345
|
+
if (canAlert(prev, "stale-auto", config.cooldownMs, now)) {
|
|
1346
|
+
const mins = Math.floor(sinceActive / 6e4);
|
|
1347
|
+
const expectedMins = Math.floor(expectedInterval / 6e4);
|
|
1348
|
+
alerts.push(makePayload(
|
|
1349
|
+
agentId,
|
|
1350
|
+
"stale (auto)",
|
|
1351
|
+
prevStatus,
|
|
1352
|
+
currStatus,
|
|
1353
|
+
{ ...record, detail: `No update for ${mins}m (expected every ~${expectedMins}m). ${record.detail}` },
|
|
1354
|
+
config.dirs
|
|
1355
|
+
));
|
|
1356
|
+
}
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
return alerts;
|
|
1362
|
+
}
|
|
1363
|
+
function updateWatchState(state, records, alerts, now) {
|
|
1364
|
+
const agents = { ...state.agents };
|
|
1365
|
+
const alertsByAgent = /* @__PURE__ */ new Map();
|
|
1366
|
+
for (const a of alerts) alertsByAgent.set(a.agentId, a);
|
|
1367
|
+
const byAgent = /* @__PURE__ */ new Map();
|
|
1368
|
+
for (const r of records) {
|
|
1369
|
+
const existing = byAgent.get(r.id);
|
|
1370
|
+
if (!existing || r.lastActive > existing.lastActive) {
|
|
1371
|
+
byAgent.set(r.id, r);
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
for (const [agentId, record] of byAgent) {
|
|
1375
|
+
const prev = agents[agentId];
|
|
1376
|
+
const history = prev?.mtimeHistory ?? [];
|
|
1377
|
+
const newHistory = [...history];
|
|
1378
|
+
if (newHistory.length === 0 || newHistory[newHistory.length - 1] !== record.lastActive) {
|
|
1379
|
+
newHistory.push(record.lastActive);
|
|
1380
|
+
}
|
|
1381
|
+
while (newHistory.length > 10) newHistory.shift();
|
|
1382
|
+
const alert = alertsByAgent.get(agentId);
|
|
1383
|
+
const consecutiveErrors = record.status === "error" ? (prev?.consecutiveErrors ?? 0) + 1 : 0;
|
|
1384
|
+
agents[agentId] = {
|
|
1385
|
+
id: agentId,
|
|
1386
|
+
lastStatus: record.status,
|
|
1387
|
+
lastActive: record.lastActive,
|
|
1388
|
+
lastAlertTime: alert ? now : prev?.lastAlertTime ?? 0,
|
|
1389
|
+
lastAlertReason: alert ? alert.condition : prev?.lastAlertReason ?? "",
|
|
1390
|
+
consecutiveErrors,
|
|
1391
|
+
mtimeHistory: newHistory
|
|
1392
|
+
};
|
|
1393
|
+
}
|
|
1394
|
+
return { version: 1, agents, lastPollTime: now };
|
|
1395
|
+
}
|
|
1396
|
+
function canAlert(prev, reason, cooldownMs, now) {
|
|
1397
|
+
if (!prev) return true;
|
|
1398
|
+
if (prev.lastAlertReason !== reason) return true;
|
|
1399
|
+
return now - prev.lastAlertTime > cooldownMs;
|
|
1400
|
+
}
|
|
1401
|
+
function makePayload(agentId, condition, previousStatus, currentStatus, record, dirs) {
|
|
1402
|
+
return {
|
|
1403
|
+
agentId,
|
|
1404
|
+
condition,
|
|
1405
|
+
previousStatus,
|
|
1406
|
+
currentStatus,
|
|
1407
|
+
detail: record.detail,
|
|
1408
|
+
file: record.file,
|
|
1409
|
+
timestamp: Date.now(),
|
|
1410
|
+
dirs
|
|
1411
|
+
};
|
|
1412
|
+
}
|
|
1413
|
+
|
|
1414
|
+
// src/watch-alerts.ts
|
|
1415
|
+
var import_node_https = require("https");
|
|
1416
|
+
var import_node_http = require("http");
|
|
1417
|
+
var import_node_child_process2 = require("child_process");
|
|
1418
|
+
function formatAlertMessage(payload) {
|
|
1419
|
+
const time = new Date(payload.timestamp).toISOString();
|
|
1420
|
+
const arrow = `${payload.previousStatus} \u2192 ${payload.currentStatus}`;
|
|
1421
|
+
return [
|
|
1422
|
+
`[ALERT] ${payload.condition}: "${payload.agentId}"`,
|
|
1423
|
+
` Status: ${arrow}`,
|
|
1424
|
+
payload.detail ? ` Detail: ${payload.detail}` : null,
|
|
1425
|
+
` File: ${payload.file}`,
|
|
1426
|
+
` Time: ${time}`
|
|
1427
|
+
].filter(Boolean).join("\n");
|
|
1428
|
+
}
|
|
1429
|
+
function formatTelegram(payload) {
|
|
1430
|
+
const icon = payload.condition === "recovery" ? "\u2705" : "\u26A0\uFE0F";
|
|
1431
|
+
const time = new Date(payload.timestamp).toLocaleTimeString();
|
|
1432
|
+
return [
|
|
1433
|
+
`${icon} *AgentFlow Alert*`,
|
|
1434
|
+
`*${payload.condition}*: \`${payload.agentId}\``,
|
|
1435
|
+
`Status: ${payload.previousStatus} \u2192 ${payload.currentStatus}`,
|
|
1436
|
+
payload.detail ? `Detail: ${payload.detail.slice(0, 200)}` : null,
|
|
1437
|
+
`Time: ${time}`
|
|
1438
|
+
].filter(Boolean).join("\n");
|
|
1439
|
+
}
|
|
1440
|
+
async function sendAlert(payload, channel) {
|
|
1441
|
+
try {
|
|
1442
|
+
switch (channel.type) {
|
|
1443
|
+
case "stdout":
|
|
1444
|
+
sendStdout(payload);
|
|
1445
|
+
break;
|
|
1446
|
+
case "telegram":
|
|
1447
|
+
await sendTelegram(payload, channel.botToken, channel.chatId);
|
|
1448
|
+
break;
|
|
1449
|
+
case "webhook":
|
|
1450
|
+
await sendWebhook(payload, channel.url);
|
|
1451
|
+
break;
|
|
1452
|
+
case "command":
|
|
1453
|
+
await sendCommand(payload, channel.cmd);
|
|
1454
|
+
break;
|
|
1455
|
+
}
|
|
1456
|
+
} catch (err) {
|
|
1457
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1458
|
+
console.error(`[agentflow] Failed to send ${channel.type} alert: ${msg}`);
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
function sendStdout(payload) {
|
|
1462
|
+
console.log(formatAlertMessage(payload));
|
|
1463
|
+
}
|
|
1464
|
+
function sendTelegram(payload, botToken, chatId) {
|
|
1465
|
+
const body = JSON.stringify({
|
|
1466
|
+
chat_id: chatId,
|
|
1467
|
+
text: formatTelegram(payload),
|
|
1468
|
+
parse_mode: "Markdown"
|
|
1469
|
+
});
|
|
1470
|
+
return new Promise((resolve4, reject) => {
|
|
1471
|
+
const req = (0, import_node_https.request)(
|
|
1472
|
+
`https://api.telegram.org/bot${botToken}/sendMessage`,
|
|
1473
|
+
{ method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) } },
|
|
1474
|
+
(res) => {
|
|
1475
|
+
res.resume();
|
|
1476
|
+
if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve4();
|
|
1477
|
+
else reject(new Error(`Telegram API returned ${res.statusCode}`));
|
|
1478
|
+
}
|
|
1479
|
+
);
|
|
1480
|
+
req.on("error", reject);
|
|
1481
|
+
req.write(body);
|
|
1482
|
+
req.end();
|
|
1483
|
+
});
|
|
1484
|
+
}
|
|
1485
|
+
function sendWebhook(payload, url) {
|
|
1486
|
+
const body = JSON.stringify(payload);
|
|
1487
|
+
const isHttps = url.startsWith("https");
|
|
1488
|
+
const doRequest = isHttps ? import_node_https.request : import_node_http.request;
|
|
1489
|
+
return new Promise((resolve4, reject) => {
|
|
1490
|
+
const req = doRequest(
|
|
1491
|
+
url,
|
|
1492
|
+
{ method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) } },
|
|
1493
|
+
(res) => {
|
|
1494
|
+
res.resume();
|
|
1495
|
+
if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve4();
|
|
1496
|
+
else reject(new Error(`Webhook returned ${res.statusCode}`));
|
|
1497
|
+
}
|
|
1498
|
+
);
|
|
1499
|
+
req.on("error", reject);
|
|
1500
|
+
req.setTimeout(1e4, () => {
|
|
1501
|
+
req.destroy(new Error("Webhook timeout"));
|
|
1502
|
+
});
|
|
1503
|
+
req.write(body);
|
|
1504
|
+
req.end();
|
|
1505
|
+
});
|
|
1506
|
+
}
|
|
1507
|
+
function sendCommand(payload, cmd) {
|
|
1508
|
+
return new Promise((resolve4, reject) => {
|
|
1509
|
+
const env = {
|
|
1510
|
+
...process.env,
|
|
1511
|
+
AGENTFLOW_ALERT_AGENT: payload.agentId,
|
|
1512
|
+
AGENTFLOW_ALERT_CONDITION: payload.condition,
|
|
1513
|
+
AGENTFLOW_ALERT_STATUS: payload.currentStatus,
|
|
1514
|
+
AGENTFLOW_ALERT_PREVIOUS_STATUS: payload.previousStatus,
|
|
1515
|
+
AGENTFLOW_ALERT_DETAIL: payload.detail,
|
|
1516
|
+
AGENTFLOW_ALERT_FILE: payload.file,
|
|
1517
|
+
AGENTFLOW_ALERT_TIMESTAMP: String(payload.timestamp)
|
|
1518
|
+
};
|
|
1519
|
+
(0, import_node_child_process2.exec)(cmd, { env, timeout: 3e4 }, (err) => {
|
|
1520
|
+
if (err) reject(err);
|
|
1521
|
+
else resolve4();
|
|
1522
|
+
});
|
|
1523
|
+
});
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
// src/watch.ts
|
|
1527
|
+
function parseWatchArgs(argv) {
|
|
1528
|
+
const dirs = [];
|
|
1529
|
+
const alertConditions = [];
|
|
1530
|
+
const notifyChannels = [];
|
|
1531
|
+
let recursive = false;
|
|
1532
|
+
let pollIntervalMs = 3e4;
|
|
1533
|
+
let cooldownMs = 30 * 6e4;
|
|
1534
|
+
let stateFilePath = "";
|
|
1535
|
+
const args = argv.slice(0);
|
|
1536
|
+
if (args[0] === "watch") args.shift();
|
|
1537
|
+
let i = 0;
|
|
1538
|
+
while (i < args.length) {
|
|
1539
|
+
const arg = args[i];
|
|
1540
|
+
if (arg === "--help" || arg === "-h") {
|
|
1541
|
+
printWatchUsage();
|
|
1542
|
+
process.exit(0);
|
|
1543
|
+
} else if (arg === "--alert-on") {
|
|
1544
|
+
i++;
|
|
1545
|
+
const val = args[i] ?? "";
|
|
1546
|
+
if (val === "error") {
|
|
1547
|
+
alertConditions.push({ type: "error" });
|
|
1548
|
+
} else if (val === "recovery") {
|
|
1549
|
+
alertConditions.push({ type: "recovery" });
|
|
1550
|
+
} else if (val.startsWith("stale:")) {
|
|
1551
|
+
const dur = parseDuration(val.slice(6));
|
|
1552
|
+
if (dur > 0) alertConditions.push({ type: "stale", durationMs: dur });
|
|
1553
|
+
} else if (val.startsWith("consecutive-errors:")) {
|
|
1554
|
+
const n = parseInt(val.slice(19), 10);
|
|
1555
|
+
if (n > 0) alertConditions.push({ type: "consecutive-errors", threshold: n });
|
|
1556
|
+
}
|
|
1557
|
+
i++;
|
|
1558
|
+
} else if (arg === "--notify") {
|
|
1559
|
+
i++;
|
|
1560
|
+
const val = args[i] ?? "";
|
|
1561
|
+
if (val === "telegram") {
|
|
1562
|
+
const botToken = process.env["AGENTFLOW_TELEGRAM_BOT_TOKEN"] ?? "";
|
|
1563
|
+
const chatId = process.env["AGENTFLOW_TELEGRAM_CHAT_ID"] ?? "";
|
|
1564
|
+
if (botToken && chatId) {
|
|
1565
|
+
notifyChannels.push({ type: "telegram", botToken, chatId });
|
|
1566
|
+
} else {
|
|
1567
|
+
console.error("Warning: --notify telegram requires AGENTFLOW_TELEGRAM_BOT_TOKEN and AGENTFLOW_TELEGRAM_CHAT_ID env vars");
|
|
1568
|
+
}
|
|
1569
|
+
} else if (val.startsWith("webhook:")) {
|
|
1570
|
+
notifyChannels.push({ type: "webhook", url: val.slice(8) });
|
|
1571
|
+
} else if (val.startsWith("command:")) {
|
|
1572
|
+
notifyChannels.push({ type: "command", cmd: val.slice(8) });
|
|
1573
|
+
}
|
|
1574
|
+
i++;
|
|
1575
|
+
} else if (arg === "--poll") {
|
|
1576
|
+
i++;
|
|
1577
|
+
const v = parseInt(args[i] ?? "", 10);
|
|
1578
|
+
if (!isNaN(v) && v > 0) pollIntervalMs = v * 1e3;
|
|
1579
|
+
i++;
|
|
1580
|
+
} else if (arg === "--cooldown") {
|
|
1581
|
+
i++;
|
|
1582
|
+
const dur = parseDuration(args[i] ?? "30m");
|
|
1583
|
+
if (dur > 0) cooldownMs = dur;
|
|
1584
|
+
i++;
|
|
1585
|
+
} else if (arg === "--state-file") {
|
|
1586
|
+
i++;
|
|
1587
|
+
stateFilePath = args[i] ?? "";
|
|
1588
|
+
i++;
|
|
1589
|
+
} else if (arg === "--recursive" || arg === "-R") {
|
|
1590
|
+
recursive = true;
|
|
1591
|
+
i++;
|
|
1592
|
+
} else if (!arg.startsWith("-")) {
|
|
1593
|
+
dirs.push((0, import_node_path3.resolve)(arg));
|
|
1594
|
+
i++;
|
|
1595
|
+
} else {
|
|
1596
|
+
i++;
|
|
1597
|
+
}
|
|
1598
|
+
}
|
|
1599
|
+
if (dirs.length === 0) dirs.push((0, import_node_path3.resolve)("."));
|
|
1600
|
+
if (alertConditions.length === 0) {
|
|
1601
|
+
alertConditions.push({ type: "error" });
|
|
1602
|
+
alertConditions.push({ type: "recovery" });
|
|
1603
|
+
}
|
|
1604
|
+
notifyChannels.unshift({ type: "stdout" });
|
|
1605
|
+
if (!stateFilePath) {
|
|
1606
|
+
stateFilePath = (0, import_node_path3.join)(dirs[0], ".agentflow-watch-state.json");
|
|
1607
|
+
}
|
|
1608
|
+
return {
|
|
1609
|
+
dirs,
|
|
1610
|
+
recursive,
|
|
1611
|
+
pollIntervalMs,
|
|
1612
|
+
alertConditions,
|
|
1613
|
+
notifyChannels,
|
|
1614
|
+
stateFilePath: (0, import_node_path3.resolve)(stateFilePath),
|
|
1615
|
+
cooldownMs
|
|
1616
|
+
};
|
|
1617
|
+
}
|
|
1618
|
+
function printWatchUsage() {
|
|
1619
|
+
console.log(`
|
|
1620
|
+
AgentFlow Watch \u2014 headless alert system for agent infrastructure.
|
|
1621
|
+
|
|
1622
|
+
Polls directories for JSON/JSONL files, detects failures and stale
|
|
1623
|
+
agents, sends alerts. Same auto-detection as \`agentflow live\`.
|
|
1624
|
+
|
|
1625
|
+
Usage:
|
|
1626
|
+
agentflow watch [dir...] [options]
|
|
1627
|
+
|
|
1628
|
+
Arguments:
|
|
1629
|
+
dir One or more directories to watch (default: .)
|
|
1630
|
+
|
|
1631
|
+
Alert conditions (--alert-on, repeatable):
|
|
1632
|
+
error Agent transitions to error status
|
|
1633
|
+
recovery Agent recovers from error to ok
|
|
1634
|
+
stale:DURATION No file update within duration (e.g. 15m, 1h)
|
|
1635
|
+
consecutive-errors:N N consecutive error observations
|
|
1636
|
+
|
|
1637
|
+
Default (if none specified): error + recovery
|
|
1638
|
+
|
|
1639
|
+
Notification channels (--notify, repeatable):
|
|
1640
|
+
telegram Telegram Bot API (needs env vars)
|
|
1641
|
+
webhook:URL POST JSON to any URL
|
|
1642
|
+
command:CMD Run shell command with alert env vars
|
|
1643
|
+
|
|
1644
|
+
Stdout alerts are always printed regardless of --notify flags.
|
|
1645
|
+
|
|
1646
|
+
Options:
|
|
1647
|
+
--poll <secs> Poll interval in seconds (default: 30)
|
|
1648
|
+
--cooldown <duration> Alert dedup cooldown (default: 30m)
|
|
1649
|
+
--state-file <path> Persistence file (default: <dir>/.agentflow-watch-state.json)
|
|
1650
|
+
-R, --recursive Scan subdirectories (1 level deep)
|
|
1651
|
+
-h, --help Show this help message
|
|
1652
|
+
|
|
1653
|
+
Environment variables:
|
|
1654
|
+
AGENTFLOW_TELEGRAM_BOT_TOKEN Telegram bot token (for --notify telegram)
|
|
1655
|
+
AGENTFLOW_TELEGRAM_CHAT_ID Telegram chat ID (for --notify telegram)
|
|
1656
|
+
|
|
1657
|
+
Examples:
|
|
1658
|
+
agentflow watch ./data --alert-on error --alert-on stale:15m
|
|
1659
|
+
agentflow watch ./data ./cron --notify telegram --poll 60
|
|
1660
|
+
agentflow watch ./traces --notify webhook:https://hooks.slack.com/... --alert-on consecutive-errors:3
|
|
1661
|
+
agentflow watch ./data --notify "command:curl -X POST https://my-pagerduty/alert"
|
|
1662
|
+
`.trim());
|
|
1663
|
+
}
|
|
1664
|
+
function startWatch(argv) {
|
|
1665
|
+
const config = parseWatchArgs(argv);
|
|
1666
|
+
const valid = config.dirs.filter((d) => (0, import_node_fs4.existsSync)(d));
|
|
1667
|
+
if (valid.length === 0) {
|
|
1668
|
+
console.error(`No valid directories found: ${config.dirs.join(", ")}`);
|
|
1669
|
+
process.exit(1);
|
|
1670
|
+
}
|
|
1671
|
+
const invalid = config.dirs.filter((d) => !(0, import_node_fs4.existsSync)(d));
|
|
1672
|
+
if (invalid.length > 0) {
|
|
1673
|
+
console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
|
|
1674
|
+
}
|
|
1675
|
+
let state = loadWatchState(config.stateFilePath);
|
|
1676
|
+
const condLabels = config.alertConditions.map((c) => {
|
|
1677
|
+
if (c.type === "stale") return `stale:${Math.floor(c.durationMs / 6e4)}m`;
|
|
1678
|
+
if (c.type === "consecutive-errors") return `consecutive-errors:${c.threshold}`;
|
|
1679
|
+
return c.type;
|
|
1680
|
+
});
|
|
1681
|
+
const channelLabels = config.notifyChannels.filter((c) => c.type !== "stdout").map((c) => {
|
|
1682
|
+
if (c.type === "webhook") return `webhook:${c.url.slice(0, 40)}...`;
|
|
1683
|
+
if (c.type === "command") return `command:${c.cmd.slice(0, 40)}`;
|
|
1684
|
+
return c.type;
|
|
1685
|
+
});
|
|
1686
|
+
console.log(`
|
|
1687
|
+
agentflow watch started`);
|
|
1688
|
+
console.log(` Directories: ${valid.join(", ")}`);
|
|
1689
|
+
console.log(` Poll: ${config.pollIntervalMs / 1e3}s`);
|
|
1690
|
+
console.log(` Alert on: ${condLabels.join(", ")}`);
|
|
1691
|
+
console.log(` Notify: stdout${channelLabels.length > 0 ? ", " + channelLabels.join(", ") : ""}`);
|
|
1692
|
+
console.log(` Cooldown: ${Math.floor(config.cooldownMs / 6e4)}m`);
|
|
1693
|
+
console.log(` State: ${config.stateFilePath}`);
|
|
1694
|
+
console.log(` Hostname: ${(0, import_node_os.hostname)()}`);
|
|
1695
|
+
console.log("");
|
|
1696
|
+
let pollCount = 0;
|
|
1697
|
+
async function poll() {
|
|
1698
|
+
const now = Date.now();
|
|
1699
|
+
pollCount++;
|
|
1700
|
+
const files = scanFiles(valid, config.recursive);
|
|
1701
|
+
const records = [];
|
|
1702
|
+
for (const f of files.slice(0, 500)) {
|
|
1703
|
+
const recs = f.ext === ".jsonl" ? processJsonlFile(f) : processJsonFile(f);
|
|
1704
|
+
records.push(...recs);
|
|
1705
|
+
}
|
|
1706
|
+
const alerts = detectTransitions(state, records, config, now);
|
|
1707
|
+
for (const alert of alerts) {
|
|
1708
|
+
for (const channel of config.notifyChannels) {
|
|
1709
|
+
await sendAlert(alert, channel);
|
|
1710
|
+
}
|
|
1711
|
+
}
|
|
1712
|
+
state = updateWatchState(state, records, alerts, now);
|
|
1713
|
+
saveWatchState(config.stateFilePath, state);
|
|
1714
|
+
if (pollCount % 10 === 0) {
|
|
1715
|
+
const agentCount = Object.keys(state.agents).length;
|
|
1716
|
+
const errorCount = Object.values(state.agents).filter((a) => a.lastStatus === "error").length;
|
|
1717
|
+
const runningCount = Object.values(state.agents).filter((a) => a.lastStatus === "running").length;
|
|
1718
|
+
const time = (/* @__PURE__ */ new Date()).toLocaleTimeString();
|
|
1719
|
+
console.log(`[${time}] heartbeat: ${agentCount} agents, ${runningCount} running, ${errorCount} errors, ${files.length} files`);
|
|
1720
|
+
}
|
|
1721
|
+
}
|
|
1722
|
+
poll();
|
|
1723
|
+
setInterval(() => {
|
|
1724
|
+
poll();
|
|
1725
|
+
}, config.pollIntervalMs);
|
|
1726
|
+
function shutdown() {
|
|
1727
|
+
console.log("\nagentflow watch stopped.");
|
|
1728
|
+
saveWatchState(config.stateFilePath, state);
|
|
1729
|
+
process.exit(0);
|
|
1730
|
+
}
|
|
1731
|
+
process.on("SIGINT", shutdown);
|
|
1732
|
+
process.on("SIGTERM", shutdown);
|
|
1733
|
+
}
|
|
1217
1734
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1218
1735
|
0 && (module.exports = {
|
|
1219
1736
|
createGraphBuilder,
|
|
@@ -1234,5 +1751,6 @@ function startLive(argv) {
|
|
|
1234
1751
|
loadGraph,
|
|
1235
1752
|
runTraced,
|
|
1236
1753
|
startLive,
|
|
1754
|
+
startWatch,
|
|
1237
1755
|
stitchTrace
|
|
1238
1756
|
});
|
package/dist/index.d.cts
CHANGED
|
@@ -393,8 +393,72 @@ declare function runTraced(config: RunConfig): Promise<RunResult>;
|
|
|
393
393
|
*
|
|
394
394
|
* @module
|
|
395
395
|
*/
|
|
396
|
+
|
|
396
397
|
declare function startLive(argv: string[]): void;
|
|
397
398
|
|
|
399
|
+
/**
|
|
400
|
+
* AgentFlow Watch — headless alert system for agent infrastructure.
|
|
401
|
+
*
|
|
402
|
+
* Polls directories for JSON/JSONL state files, detects status transitions
|
|
403
|
+
* (ok→error, stale, recovery), and sends alerts via Telegram, webhooks,
|
|
404
|
+
* shell commands, or stdout.
|
|
405
|
+
*
|
|
406
|
+
* @module
|
|
407
|
+
*/
|
|
408
|
+
declare function startWatch(argv: string[]): void;
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Type definitions for the `agentflow watch` alert system.
|
|
412
|
+
* @module
|
|
413
|
+
*/
|
|
414
|
+
/** Alert condition parsed from --alert-on flags. */
|
|
415
|
+
type AlertCondition = {
|
|
416
|
+
readonly type: 'error';
|
|
417
|
+
} | {
|
|
418
|
+
readonly type: 'stale';
|
|
419
|
+
readonly durationMs: number;
|
|
420
|
+
} | {
|
|
421
|
+
readonly type: 'recovery';
|
|
422
|
+
} | {
|
|
423
|
+
readonly type: 'consecutive-errors';
|
|
424
|
+
readonly threshold: number;
|
|
425
|
+
};
|
|
426
|
+
/** Notification channel parsed from --notify flags. */
|
|
427
|
+
type NotifyChannel = {
|
|
428
|
+
readonly type: 'stdout';
|
|
429
|
+
} | {
|
|
430
|
+
readonly type: 'telegram';
|
|
431
|
+
readonly botToken: string;
|
|
432
|
+
readonly chatId: string;
|
|
433
|
+
} | {
|
|
434
|
+
readonly type: 'webhook';
|
|
435
|
+
readonly url: string;
|
|
436
|
+
} | {
|
|
437
|
+
readonly type: 'command';
|
|
438
|
+
readonly cmd: string;
|
|
439
|
+
};
|
|
440
|
+
/** Configuration for the watch command. */
|
|
441
|
+
interface WatchConfig {
|
|
442
|
+
readonly dirs: string[];
|
|
443
|
+
readonly recursive: boolean;
|
|
444
|
+
readonly pollIntervalMs: number;
|
|
445
|
+
readonly alertConditions: AlertCondition[];
|
|
446
|
+
readonly notifyChannels: NotifyChannel[];
|
|
447
|
+
readonly stateFilePath: string;
|
|
448
|
+
readonly cooldownMs: number;
|
|
449
|
+
}
|
|
450
|
+
/** Alert payload passed to notification channels. */
|
|
451
|
+
interface AlertPayload {
|
|
452
|
+
readonly agentId: string;
|
|
453
|
+
readonly condition: string;
|
|
454
|
+
readonly previousStatus: string;
|
|
455
|
+
readonly currentStatus: string;
|
|
456
|
+
readonly detail: string;
|
|
457
|
+
readonly file: string;
|
|
458
|
+
readonly timestamp: number;
|
|
459
|
+
readonly dirs: readonly string[];
|
|
460
|
+
}
|
|
461
|
+
|
|
398
462
|
declare function groupByTraceId(graphs: ExecutionGraph[]): Map<string, ExecutionGraph[]>;
|
|
399
463
|
declare function stitchTrace(graphs: ExecutionGraph[]): DistributedTrace;
|
|
400
464
|
declare function getTraceTree(trace: DistributedTrace): ExecutionGraph[];
|
|
@@ -515,4 +579,4 @@ declare function getDepth(graph: ExecutionGraph): number;
|
|
|
515
579
|
*/
|
|
516
580
|
declare function getStats(graph: ExecutionGraph): GraphStats;
|
|
517
581
|
|
|
518
|
-
export { type Adapter, type AgentFlowConfig, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type MutableExecutionNode, type NodeStatus, type NodeType, type RunConfig, type RunResult, type StartNodeOptions, type TraceEvent, type TraceEventType, type Writer, createGraphBuilder, findWaitingOn, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, stitchTrace };
|
|
582
|
+
export { type Adapter, type AgentFlowConfig, type AlertCondition, type AlertPayload, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type MutableExecutionNode, type NodeStatus, type NodeType, type NotifyChannel, type RunConfig, type RunResult, type StartNodeOptions, type TraceEvent, type TraceEventType, type WatchConfig, type Writer, createGraphBuilder, findWaitingOn, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, startWatch, stitchTrace };
|
package/dist/index.d.ts
CHANGED
|
@@ -393,8 +393,72 @@ declare function runTraced(config: RunConfig): Promise<RunResult>;
|
|
|
393
393
|
*
|
|
394
394
|
* @module
|
|
395
395
|
*/
|
|
396
|
+
|
|
396
397
|
declare function startLive(argv: string[]): void;
|
|
397
398
|
|
|
399
|
+
/**
|
|
400
|
+
* AgentFlow Watch — headless alert system for agent infrastructure.
|
|
401
|
+
*
|
|
402
|
+
* Polls directories for JSON/JSONL state files, detects status transitions
|
|
403
|
+
* (ok→error, stale, recovery), and sends alerts via Telegram, webhooks,
|
|
404
|
+
* shell commands, or stdout.
|
|
405
|
+
*
|
|
406
|
+
* @module
|
|
407
|
+
*/
|
|
408
|
+
declare function startWatch(argv: string[]): void;
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Type definitions for the `agentflow watch` alert system.
|
|
412
|
+
* @module
|
|
413
|
+
*/
|
|
414
|
+
/** Alert condition parsed from --alert-on flags. */
|
|
415
|
+
type AlertCondition = {
|
|
416
|
+
readonly type: 'error';
|
|
417
|
+
} | {
|
|
418
|
+
readonly type: 'stale';
|
|
419
|
+
readonly durationMs: number;
|
|
420
|
+
} | {
|
|
421
|
+
readonly type: 'recovery';
|
|
422
|
+
} | {
|
|
423
|
+
readonly type: 'consecutive-errors';
|
|
424
|
+
readonly threshold: number;
|
|
425
|
+
};
|
|
426
|
+
/** Notification channel parsed from --notify flags. */
|
|
427
|
+
type NotifyChannel = {
|
|
428
|
+
readonly type: 'stdout';
|
|
429
|
+
} | {
|
|
430
|
+
readonly type: 'telegram';
|
|
431
|
+
readonly botToken: string;
|
|
432
|
+
readonly chatId: string;
|
|
433
|
+
} | {
|
|
434
|
+
readonly type: 'webhook';
|
|
435
|
+
readonly url: string;
|
|
436
|
+
} | {
|
|
437
|
+
readonly type: 'command';
|
|
438
|
+
readonly cmd: string;
|
|
439
|
+
};
|
|
440
|
+
/** Configuration for the watch command. */
|
|
441
|
+
interface WatchConfig {
|
|
442
|
+
readonly dirs: string[];
|
|
443
|
+
readonly recursive: boolean;
|
|
444
|
+
readonly pollIntervalMs: number;
|
|
445
|
+
readonly alertConditions: AlertCondition[];
|
|
446
|
+
readonly notifyChannels: NotifyChannel[];
|
|
447
|
+
readonly stateFilePath: string;
|
|
448
|
+
readonly cooldownMs: number;
|
|
449
|
+
}
|
|
450
|
+
/** Alert payload passed to notification channels. */
|
|
451
|
+
interface AlertPayload {
|
|
452
|
+
readonly agentId: string;
|
|
453
|
+
readonly condition: string;
|
|
454
|
+
readonly previousStatus: string;
|
|
455
|
+
readonly currentStatus: string;
|
|
456
|
+
readonly detail: string;
|
|
457
|
+
readonly file: string;
|
|
458
|
+
readonly timestamp: number;
|
|
459
|
+
readonly dirs: readonly string[];
|
|
460
|
+
}
|
|
461
|
+
|
|
398
462
|
declare function groupByTraceId(graphs: ExecutionGraph[]): Map<string, ExecutionGraph[]>;
|
|
399
463
|
declare function stitchTrace(graphs: ExecutionGraph[]): DistributedTrace;
|
|
400
464
|
declare function getTraceTree(trace: DistributedTrace): ExecutionGraph[];
|
|
@@ -515,4 +579,4 @@ declare function getDepth(graph: ExecutionGraph): number;
|
|
|
515
579
|
*/
|
|
516
580
|
declare function getStats(graph: ExecutionGraph): GraphStats;
|
|
517
581
|
|
|
518
|
-
export { type Adapter, type AgentFlowConfig, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type MutableExecutionNode, type NodeStatus, type NodeType, type RunConfig, type RunResult, type StartNodeOptions, type TraceEvent, type TraceEventType, type Writer, createGraphBuilder, findWaitingOn, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, stitchTrace };
|
|
582
|
+
export { type Adapter, type AgentFlowConfig, type AlertCondition, type AlertPayload, type DistributedTrace, type EdgeType, type ExecutionEdge, type ExecutionGraph, type ExecutionNode, type GraphBuilder, type GraphStats, type GraphStatus, type MutableExecutionNode, type NodeStatus, type NodeType, type NotifyChannel, type RunConfig, type RunResult, type StartNodeOptions, type TraceEvent, type TraceEventType, type WatchConfig, type Writer, createGraphBuilder, findWaitingOn, getChildren, getCriticalPath, getDepth, getDuration, getFailures, getHungNodes, getNode, getParent, getStats, getSubtree, getTraceTree, graphToJson, groupByTraceId, loadGraph, runTraced, startLive, startWatch, stitchTrace };
|