agentflow-core 0.2.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-5SPZPOFN.js → chunk-NPH34CAL.js} +518 -1
- package/dist/cli.cjs +526 -4
- package/dist/cli.js +13 -6
- package/dist/index.cjs +518 -0
- package/dist/index.d.cts +65 -1
- package/dist/index.d.ts +65 -1
- package/dist/index.js +3 -1
- package/package.json +24 -3
|
@@ -1169,6 +1169,522 @@ function startLive(argv) {
|
|
|
1169
1169
|
});
|
|
1170
1170
|
}
|
|
1171
1171
|
|
|
1172
|
+
// src/watch.ts
|
|
1173
|
+
import { existsSync as existsSync4 } from "fs";
|
|
1174
|
+
import { resolve as resolve3, join as join3 } from "path";
|
|
1175
|
+
import { hostname } from "os";
|
|
1176
|
+
|
|
1177
|
+
// src/watch-state.ts
|
|
1178
|
+
import { existsSync as existsSync3, readFileSync as readFileSync2, writeFileSync as writeFileSync2, renameSync } from "fs";
|
|
1179
|
+
function parseDuration(input) {
|
|
1180
|
+
const match = input.match(/^(\d+(?:\.\d+)?)\s*(s|m|h|d)$/i);
|
|
1181
|
+
if (!match) {
|
|
1182
|
+
const n = parseInt(input, 10);
|
|
1183
|
+
return isNaN(n) ? 0 : n * 1e3;
|
|
1184
|
+
}
|
|
1185
|
+
const value = parseFloat(match[1]);
|
|
1186
|
+
switch (match[2].toLowerCase()) {
|
|
1187
|
+
case "s":
|
|
1188
|
+
return value * 1e3;
|
|
1189
|
+
case "m":
|
|
1190
|
+
return value * 6e4;
|
|
1191
|
+
case "h":
|
|
1192
|
+
return value * 36e5;
|
|
1193
|
+
case "d":
|
|
1194
|
+
return value * 864e5;
|
|
1195
|
+
default:
|
|
1196
|
+
return value * 1e3;
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
function emptyState() {
|
|
1200
|
+
return { version: 1, agents: {}, lastPollTime: 0 };
|
|
1201
|
+
}
|
|
1202
|
+
function loadWatchState(filePath) {
|
|
1203
|
+
if (!existsSync3(filePath)) return emptyState();
|
|
1204
|
+
try {
|
|
1205
|
+
const raw = JSON.parse(readFileSync2(filePath, "utf8"));
|
|
1206
|
+
if (raw.version !== 1 || typeof raw.agents !== "object") return emptyState();
|
|
1207
|
+
return raw;
|
|
1208
|
+
} catch {
|
|
1209
|
+
return emptyState();
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
function saveWatchState(filePath, state) {
|
|
1213
|
+
const tmp = filePath + ".tmp";
|
|
1214
|
+
try {
|
|
1215
|
+
writeFileSync2(tmp, JSON.stringify(state, null, 2), "utf8");
|
|
1216
|
+
renameSync(tmp, filePath);
|
|
1217
|
+
} catch {
|
|
1218
|
+
try {
|
|
1219
|
+
writeFileSync2(filePath, JSON.stringify(state, null, 2), "utf8");
|
|
1220
|
+
} catch {
|
|
1221
|
+
}
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
function estimateInterval(history) {
|
|
1225
|
+
if (history.length < 3) return 0;
|
|
1226
|
+
const sorted = [...history].sort((a, b) => a - b);
|
|
1227
|
+
const deltas = [];
|
|
1228
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
1229
|
+
const d = sorted[i] - sorted[i - 1];
|
|
1230
|
+
if (d > 0) deltas.push(d);
|
|
1231
|
+
}
|
|
1232
|
+
if (deltas.length === 0) return 0;
|
|
1233
|
+
deltas.sort((a, b) => a - b);
|
|
1234
|
+
return deltas[Math.floor(deltas.length / 2)];
|
|
1235
|
+
}
|
|
1236
|
+
function detectTransitions(previous, currentRecords, config, now) {
|
|
1237
|
+
const alerts = [];
|
|
1238
|
+
const hasError = config.alertConditions.some((c) => c.type === "error");
|
|
1239
|
+
const hasRecovery = config.alertConditions.some((c) => c.type === "recovery");
|
|
1240
|
+
const staleConditions = config.alertConditions.filter((c) => c.type === "stale");
|
|
1241
|
+
const consecutiveConditions = config.alertConditions.filter((c) => c.type === "consecutive-errors");
|
|
1242
|
+
const byAgent = /* @__PURE__ */ new Map();
|
|
1243
|
+
for (const r of currentRecords) {
|
|
1244
|
+
const existing = byAgent.get(r.id);
|
|
1245
|
+
if (!existing || r.lastActive > existing.lastActive) {
|
|
1246
|
+
byAgent.set(r.id, r);
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
for (const [agentId, record] of byAgent) {
|
|
1250
|
+
const prev = previous.agents[agentId];
|
|
1251
|
+
const prevStatus = prev?.lastStatus ?? "unknown";
|
|
1252
|
+
const currStatus = record.status;
|
|
1253
|
+
if (hasError && currStatus === "error" && prevStatus !== "error") {
|
|
1254
|
+
if (canAlert(prev, "error", config.cooldownMs, now)) {
|
|
1255
|
+
alerts.push(makePayload(agentId, "error", prevStatus, currStatus, record, config.dirs));
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
if (hasRecovery && currStatus === "ok" && prevStatus === "error") {
|
|
1259
|
+
alerts.push(makePayload(agentId, "recovery", prevStatus, currStatus, record, config.dirs));
|
|
1260
|
+
}
|
|
1261
|
+
const newConsec = currStatus === "error" ? (prev?.consecutiveErrors ?? 0) + 1 : 0;
|
|
1262
|
+
for (const cond of consecutiveConditions) {
|
|
1263
|
+
if (newConsec === cond.threshold) {
|
|
1264
|
+
if (canAlert(prev, `consecutive-errors:${cond.threshold}`, config.cooldownMs, now)) {
|
|
1265
|
+
alerts.push(makePayload(
|
|
1266
|
+
agentId,
|
|
1267
|
+
`consecutive-errors (${cond.threshold})`,
|
|
1268
|
+
prevStatus,
|
|
1269
|
+
currStatus,
|
|
1270
|
+
{ ...record, detail: `${newConsec} consecutive errors. ${record.detail}` },
|
|
1271
|
+
config.dirs
|
|
1272
|
+
));
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
for (const cond of staleConditions) {
|
|
1277
|
+
const sinceActive = now - record.lastActive;
|
|
1278
|
+
if (sinceActive > cond.durationMs && record.lastActive > 0) {
|
|
1279
|
+
if (canAlert(prev, "stale", config.cooldownMs, now)) {
|
|
1280
|
+
const mins = Math.floor(sinceActive / 6e4);
|
|
1281
|
+
alerts.push(makePayload(
|
|
1282
|
+
agentId,
|
|
1283
|
+
"stale",
|
|
1284
|
+
prevStatus,
|
|
1285
|
+
currStatus,
|
|
1286
|
+
{ ...record, detail: `No update for ${mins}m. ${record.detail}` },
|
|
1287
|
+
config.dirs
|
|
1288
|
+
));
|
|
1289
|
+
}
|
|
1290
|
+
}
|
|
1291
|
+
}
|
|
1292
|
+
if (staleConditions.length === 0) {
|
|
1293
|
+
const history = prev?.mtimeHistory ?? [];
|
|
1294
|
+
const expectedInterval = estimateInterval(history);
|
|
1295
|
+
if (expectedInterval > 0) {
|
|
1296
|
+
const sinceActive = now - record.lastActive;
|
|
1297
|
+
if (sinceActive > expectedInterval * 3) {
|
|
1298
|
+
if (canAlert(prev, "stale-auto", config.cooldownMs, now)) {
|
|
1299
|
+
const mins = Math.floor(sinceActive / 6e4);
|
|
1300
|
+
const expectedMins = Math.floor(expectedInterval / 6e4);
|
|
1301
|
+
alerts.push(makePayload(
|
|
1302
|
+
agentId,
|
|
1303
|
+
"stale (auto)",
|
|
1304
|
+
prevStatus,
|
|
1305
|
+
currStatus,
|
|
1306
|
+
{ ...record, detail: `No update for ${mins}m (expected every ~${expectedMins}m). ${record.detail}` },
|
|
1307
|
+
config.dirs
|
|
1308
|
+
));
|
|
1309
|
+
}
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
return alerts;
|
|
1315
|
+
}
|
|
1316
|
+
function updateWatchState(state, records, alerts, now) {
|
|
1317
|
+
const agents = { ...state.agents };
|
|
1318
|
+
const alertsByAgent = /* @__PURE__ */ new Map();
|
|
1319
|
+
for (const a of alerts) alertsByAgent.set(a.agentId, a);
|
|
1320
|
+
const byAgent = /* @__PURE__ */ new Map();
|
|
1321
|
+
for (const r of records) {
|
|
1322
|
+
const existing = byAgent.get(r.id);
|
|
1323
|
+
if (!existing || r.lastActive > existing.lastActive) {
|
|
1324
|
+
byAgent.set(r.id, r);
|
|
1325
|
+
}
|
|
1326
|
+
}
|
|
1327
|
+
for (const [agentId, record] of byAgent) {
|
|
1328
|
+
const prev = agents[agentId];
|
|
1329
|
+
const history = prev?.mtimeHistory ?? [];
|
|
1330
|
+
const newHistory = [...history];
|
|
1331
|
+
if (newHistory.length === 0 || newHistory[newHistory.length - 1] !== record.lastActive) {
|
|
1332
|
+
newHistory.push(record.lastActive);
|
|
1333
|
+
}
|
|
1334
|
+
while (newHistory.length > 10) newHistory.shift();
|
|
1335
|
+
const alert = alertsByAgent.get(agentId);
|
|
1336
|
+
const consecutiveErrors = record.status === "error" ? (prev?.consecutiveErrors ?? 0) + 1 : 0;
|
|
1337
|
+
agents[agentId] = {
|
|
1338
|
+
id: agentId,
|
|
1339
|
+
lastStatus: record.status,
|
|
1340
|
+
lastActive: record.lastActive,
|
|
1341
|
+
lastAlertTime: alert ? now : prev?.lastAlertTime ?? 0,
|
|
1342
|
+
lastAlertReason: alert ? alert.condition : prev?.lastAlertReason ?? "",
|
|
1343
|
+
consecutiveErrors,
|
|
1344
|
+
mtimeHistory: newHistory
|
|
1345
|
+
};
|
|
1346
|
+
}
|
|
1347
|
+
return { version: 1, agents, lastPollTime: now };
|
|
1348
|
+
}
|
|
1349
|
+
function canAlert(prev, reason, cooldownMs, now) {
|
|
1350
|
+
if (!prev) return true;
|
|
1351
|
+
if (prev.lastAlertReason !== reason) return true;
|
|
1352
|
+
return now - prev.lastAlertTime > cooldownMs;
|
|
1353
|
+
}
|
|
1354
|
+
function makePayload(agentId, condition, previousStatus, currentStatus, record, dirs) {
|
|
1355
|
+
return {
|
|
1356
|
+
agentId,
|
|
1357
|
+
condition,
|
|
1358
|
+
previousStatus,
|
|
1359
|
+
currentStatus,
|
|
1360
|
+
detail: record.detail,
|
|
1361
|
+
file: record.file,
|
|
1362
|
+
timestamp: Date.now(),
|
|
1363
|
+
dirs
|
|
1364
|
+
};
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
// src/watch-alerts.ts
|
|
1368
|
+
import { request as httpsRequest } from "https";
|
|
1369
|
+
import { request as httpRequest } from "http";
|
|
1370
|
+
import { exec } from "child_process";
|
|
1371
|
+
function formatAlertMessage(payload) {
|
|
1372
|
+
const time = new Date(payload.timestamp).toISOString();
|
|
1373
|
+
const arrow = `${payload.previousStatus} \u2192 ${payload.currentStatus}`;
|
|
1374
|
+
return [
|
|
1375
|
+
`[ALERT] ${payload.condition}: "${payload.agentId}"`,
|
|
1376
|
+
` Status: ${arrow}`,
|
|
1377
|
+
payload.detail ? ` Detail: ${payload.detail}` : null,
|
|
1378
|
+
` File: ${payload.file}`,
|
|
1379
|
+
` Time: ${time}`
|
|
1380
|
+
].filter(Boolean).join("\n");
|
|
1381
|
+
}
|
|
1382
|
+
function formatTelegram(payload) {
|
|
1383
|
+
const icon = payload.condition === "recovery" ? "\u2705" : "\u26A0\uFE0F";
|
|
1384
|
+
const time = new Date(payload.timestamp).toLocaleTimeString();
|
|
1385
|
+
return [
|
|
1386
|
+
`${icon} *AgentFlow Alert*`,
|
|
1387
|
+
`*${payload.condition}*: \`${payload.agentId}\``,
|
|
1388
|
+
`Status: ${payload.previousStatus} \u2192 ${payload.currentStatus}`,
|
|
1389
|
+
payload.detail ? `Detail: ${payload.detail.slice(0, 200)}` : null,
|
|
1390
|
+
`Time: ${time}`
|
|
1391
|
+
].filter(Boolean).join("\n");
|
|
1392
|
+
}
|
|
1393
|
+
async function sendAlert(payload, channel) {
|
|
1394
|
+
try {
|
|
1395
|
+
switch (channel.type) {
|
|
1396
|
+
case "stdout":
|
|
1397
|
+
sendStdout(payload);
|
|
1398
|
+
break;
|
|
1399
|
+
case "telegram":
|
|
1400
|
+
await sendTelegram(payload, channel.botToken, channel.chatId);
|
|
1401
|
+
break;
|
|
1402
|
+
case "webhook":
|
|
1403
|
+
await sendWebhook(payload, channel.url);
|
|
1404
|
+
break;
|
|
1405
|
+
case "command":
|
|
1406
|
+
await sendCommand(payload, channel.cmd);
|
|
1407
|
+
break;
|
|
1408
|
+
}
|
|
1409
|
+
} catch (err) {
|
|
1410
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1411
|
+
console.error(`[agentflow] Failed to send ${channel.type} alert: ${msg}`);
|
|
1412
|
+
}
|
|
1413
|
+
}
|
|
1414
|
+
function sendStdout(payload) {
|
|
1415
|
+
console.log(formatAlertMessage(payload));
|
|
1416
|
+
}
|
|
1417
|
+
function sendTelegram(payload, botToken, chatId) {
|
|
1418
|
+
const body = JSON.stringify({
|
|
1419
|
+
chat_id: chatId,
|
|
1420
|
+
text: formatTelegram(payload),
|
|
1421
|
+
parse_mode: "Markdown"
|
|
1422
|
+
});
|
|
1423
|
+
return new Promise((resolve4, reject) => {
|
|
1424
|
+
const req = httpsRequest(
|
|
1425
|
+
`https://api.telegram.org/bot${botToken}/sendMessage`,
|
|
1426
|
+
{ method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) } },
|
|
1427
|
+
(res) => {
|
|
1428
|
+
res.resume();
|
|
1429
|
+
if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve4();
|
|
1430
|
+
else reject(new Error(`Telegram API returned ${res.statusCode}`));
|
|
1431
|
+
}
|
|
1432
|
+
);
|
|
1433
|
+
req.on("error", reject);
|
|
1434
|
+
req.write(body);
|
|
1435
|
+
req.end();
|
|
1436
|
+
});
|
|
1437
|
+
}
|
|
1438
|
+
function sendWebhook(payload, url) {
|
|
1439
|
+
const body = JSON.stringify(payload);
|
|
1440
|
+
const isHttps = url.startsWith("https");
|
|
1441
|
+
const doRequest = isHttps ? httpsRequest : httpRequest;
|
|
1442
|
+
return new Promise((resolve4, reject) => {
|
|
1443
|
+
const req = doRequest(
|
|
1444
|
+
url,
|
|
1445
|
+
{ method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body) } },
|
|
1446
|
+
(res) => {
|
|
1447
|
+
res.resume();
|
|
1448
|
+
if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) resolve4();
|
|
1449
|
+
else reject(new Error(`Webhook returned ${res.statusCode}`));
|
|
1450
|
+
}
|
|
1451
|
+
);
|
|
1452
|
+
req.on("error", reject);
|
|
1453
|
+
req.setTimeout(1e4, () => {
|
|
1454
|
+
req.destroy(new Error("Webhook timeout"));
|
|
1455
|
+
});
|
|
1456
|
+
req.write(body);
|
|
1457
|
+
req.end();
|
|
1458
|
+
});
|
|
1459
|
+
}
|
|
1460
|
+
function sendCommand(payload, cmd) {
|
|
1461
|
+
return new Promise((resolve4, reject) => {
|
|
1462
|
+
const env = {
|
|
1463
|
+
...process.env,
|
|
1464
|
+
AGENTFLOW_ALERT_AGENT: payload.agentId,
|
|
1465
|
+
AGENTFLOW_ALERT_CONDITION: payload.condition,
|
|
1466
|
+
AGENTFLOW_ALERT_STATUS: payload.currentStatus,
|
|
1467
|
+
AGENTFLOW_ALERT_PREVIOUS_STATUS: payload.previousStatus,
|
|
1468
|
+
AGENTFLOW_ALERT_DETAIL: payload.detail,
|
|
1469
|
+
AGENTFLOW_ALERT_FILE: payload.file,
|
|
1470
|
+
AGENTFLOW_ALERT_TIMESTAMP: String(payload.timestamp)
|
|
1471
|
+
};
|
|
1472
|
+
exec(cmd, { env, timeout: 3e4 }, (err) => {
|
|
1473
|
+
if (err) reject(err);
|
|
1474
|
+
else resolve4();
|
|
1475
|
+
});
|
|
1476
|
+
});
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
// src/watch.ts
|
|
1480
|
+
function parseWatchArgs(argv) {
|
|
1481
|
+
const dirs = [];
|
|
1482
|
+
const alertConditions = [];
|
|
1483
|
+
const notifyChannels = [];
|
|
1484
|
+
let recursive = false;
|
|
1485
|
+
let pollIntervalMs = 3e4;
|
|
1486
|
+
let cooldownMs = 30 * 6e4;
|
|
1487
|
+
let stateFilePath = "";
|
|
1488
|
+
const args = argv.slice(0);
|
|
1489
|
+
if (args[0] === "watch") args.shift();
|
|
1490
|
+
let i = 0;
|
|
1491
|
+
while (i < args.length) {
|
|
1492
|
+
const arg = args[i];
|
|
1493
|
+
if (arg === "--help" || arg === "-h") {
|
|
1494
|
+
printWatchUsage();
|
|
1495
|
+
process.exit(0);
|
|
1496
|
+
} else if (arg === "--alert-on") {
|
|
1497
|
+
i++;
|
|
1498
|
+
const val = args[i] ?? "";
|
|
1499
|
+
if (val === "error") {
|
|
1500
|
+
alertConditions.push({ type: "error" });
|
|
1501
|
+
} else if (val === "recovery") {
|
|
1502
|
+
alertConditions.push({ type: "recovery" });
|
|
1503
|
+
} else if (val.startsWith("stale:")) {
|
|
1504
|
+
const dur = parseDuration(val.slice(6));
|
|
1505
|
+
if (dur > 0) alertConditions.push({ type: "stale", durationMs: dur });
|
|
1506
|
+
} else if (val.startsWith("consecutive-errors:")) {
|
|
1507
|
+
const n = parseInt(val.slice(19), 10);
|
|
1508
|
+
if (n > 0) alertConditions.push({ type: "consecutive-errors", threshold: n });
|
|
1509
|
+
}
|
|
1510
|
+
i++;
|
|
1511
|
+
} else if (arg === "--notify") {
|
|
1512
|
+
i++;
|
|
1513
|
+
const val = args[i] ?? "";
|
|
1514
|
+
if (val === "telegram") {
|
|
1515
|
+
const botToken = process.env["AGENTFLOW_TELEGRAM_BOT_TOKEN"] ?? "";
|
|
1516
|
+
const chatId = process.env["AGENTFLOW_TELEGRAM_CHAT_ID"] ?? "";
|
|
1517
|
+
if (botToken && chatId) {
|
|
1518
|
+
notifyChannels.push({ type: "telegram", botToken, chatId });
|
|
1519
|
+
} else {
|
|
1520
|
+
console.error("Warning: --notify telegram requires AGENTFLOW_TELEGRAM_BOT_TOKEN and AGENTFLOW_TELEGRAM_CHAT_ID env vars");
|
|
1521
|
+
}
|
|
1522
|
+
} else if (val.startsWith("webhook:")) {
|
|
1523
|
+
notifyChannels.push({ type: "webhook", url: val.slice(8) });
|
|
1524
|
+
} else if (val.startsWith("command:")) {
|
|
1525
|
+
notifyChannels.push({ type: "command", cmd: val.slice(8) });
|
|
1526
|
+
}
|
|
1527
|
+
i++;
|
|
1528
|
+
} else if (arg === "--poll") {
|
|
1529
|
+
i++;
|
|
1530
|
+
const v = parseInt(args[i] ?? "", 10);
|
|
1531
|
+
if (!isNaN(v) && v > 0) pollIntervalMs = v * 1e3;
|
|
1532
|
+
i++;
|
|
1533
|
+
} else if (arg === "--cooldown") {
|
|
1534
|
+
i++;
|
|
1535
|
+
const dur = parseDuration(args[i] ?? "30m");
|
|
1536
|
+
if (dur > 0) cooldownMs = dur;
|
|
1537
|
+
i++;
|
|
1538
|
+
} else if (arg === "--state-file") {
|
|
1539
|
+
i++;
|
|
1540
|
+
stateFilePath = args[i] ?? "";
|
|
1541
|
+
i++;
|
|
1542
|
+
} else if (arg === "--recursive" || arg === "-R") {
|
|
1543
|
+
recursive = true;
|
|
1544
|
+
i++;
|
|
1545
|
+
} else if (!arg.startsWith("-")) {
|
|
1546
|
+
dirs.push(resolve3(arg));
|
|
1547
|
+
i++;
|
|
1548
|
+
} else {
|
|
1549
|
+
i++;
|
|
1550
|
+
}
|
|
1551
|
+
}
|
|
1552
|
+
if (dirs.length === 0) dirs.push(resolve3("."));
|
|
1553
|
+
if (alertConditions.length === 0) {
|
|
1554
|
+
alertConditions.push({ type: "error" });
|
|
1555
|
+
alertConditions.push({ type: "recovery" });
|
|
1556
|
+
}
|
|
1557
|
+
notifyChannels.unshift({ type: "stdout" });
|
|
1558
|
+
if (!stateFilePath) {
|
|
1559
|
+
stateFilePath = join3(dirs[0], ".agentflow-watch-state.json");
|
|
1560
|
+
}
|
|
1561
|
+
return {
|
|
1562
|
+
dirs,
|
|
1563
|
+
recursive,
|
|
1564
|
+
pollIntervalMs,
|
|
1565
|
+
alertConditions,
|
|
1566
|
+
notifyChannels,
|
|
1567
|
+
stateFilePath: resolve3(stateFilePath),
|
|
1568
|
+
cooldownMs
|
|
1569
|
+
};
|
|
1570
|
+
}
|
|
1571
|
+
function printWatchUsage() {
|
|
1572
|
+
console.log(`
|
|
1573
|
+
AgentFlow Watch \u2014 headless alert system for agent infrastructure.
|
|
1574
|
+
|
|
1575
|
+
Polls directories for JSON/JSONL files, detects failures and stale
|
|
1576
|
+
agents, sends alerts. Same auto-detection as \`agentflow live\`.
|
|
1577
|
+
|
|
1578
|
+
Usage:
|
|
1579
|
+
agentflow watch [dir...] [options]
|
|
1580
|
+
|
|
1581
|
+
Arguments:
|
|
1582
|
+
dir One or more directories to watch (default: .)
|
|
1583
|
+
|
|
1584
|
+
Alert conditions (--alert-on, repeatable):
|
|
1585
|
+
error Agent transitions to error status
|
|
1586
|
+
recovery Agent recovers from error to ok
|
|
1587
|
+
stale:DURATION No file update within duration (e.g. 15m, 1h)
|
|
1588
|
+
consecutive-errors:N N consecutive error observations
|
|
1589
|
+
|
|
1590
|
+
Default (if none specified): error + recovery
|
|
1591
|
+
|
|
1592
|
+
Notification channels (--notify, repeatable):
|
|
1593
|
+
telegram Telegram Bot API (needs env vars)
|
|
1594
|
+
webhook:URL POST JSON to any URL
|
|
1595
|
+
command:CMD Run shell command with alert env vars
|
|
1596
|
+
|
|
1597
|
+
Stdout alerts are always printed regardless of --notify flags.
|
|
1598
|
+
|
|
1599
|
+
Options:
|
|
1600
|
+
--poll <secs> Poll interval in seconds (default: 30)
|
|
1601
|
+
--cooldown <duration> Alert dedup cooldown (default: 30m)
|
|
1602
|
+
--state-file <path> Persistence file (default: <dir>/.agentflow-watch-state.json)
|
|
1603
|
+
-R, --recursive Scan subdirectories (1 level deep)
|
|
1604
|
+
-h, --help Show this help message
|
|
1605
|
+
|
|
1606
|
+
Environment variables:
|
|
1607
|
+
AGENTFLOW_TELEGRAM_BOT_TOKEN Telegram bot token (for --notify telegram)
|
|
1608
|
+
AGENTFLOW_TELEGRAM_CHAT_ID Telegram chat ID (for --notify telegram)
|
|
1609
|
+
|
|
1610
|
+
Examples:
|
|
1611
|
+
agentflow watch ./data --alert-on error --alert-on stale:15m
|
|
1612
|
+
agentflow watch ./data ./cron --notify telegram --poll 60
|
|
1613
|
+
agentflow watch ./traces --notify webhook:https://hooks.slack.com/... --alert-on consecutive-errors:3
|
|
1614
|
+
agentflow watch ./data --notify "command:curl -X POST https://my-pagerduty/alert"
|
|
1615
|
+
`.trim());
|
|
1616
|
+
}
|
|
1617
|
+
function startWatch(argv) {
|
|
1618
|
+
const config = parseWatchArgs(argv);
|
|
1619
|
+
const valid = config.dirs.filter((d) => existsSync4(d));
|
|
1620
|
+
if (valid.length === 0) {
|
|
1621
|
+
console.error(`No valid directories found: ${config.dirs.join(", ")}`);
|
|
1622
|
+
process.exit(1);
|
|
1623
|
+
}
|
|
1624
|
+
const invalid = config.dirs.filter((d) => !existsSync4(d));
|
|
1625
|
+
if (invalid.length > 0) {
|
|
1626
|
+
console.warn(`Skipping non-existent: ${invalid.join(", ")}`);
|
|
1627
|
+
}
|
|
1628
|
+
let state = loadWatchState(config.stateFilePath);
|
|
1629
|
+
const condLabels = config.alertConditions.map((c) => {
|
|
1630
|
+
if (c.type === "stale") return `stale:${Math.floor(c.durationMs / 6e4)}m`;
|
|
1631
|
+
if (c.type === "consecutive-errors") return `consecutive-errors:${c.threshold}`;
|
|
1632
|
+
return c.type;
|
|
1633
|
+
});
|
|
1634
|
+
const channelLabels = config.notifyChannels.filter((c) => c.type !== "stdout").map((c) => {
|
|
1635
|
+
if (c.type === "webhook") return `webhook:${c.url.slice(0, 40)}...`;
|
|
1636
|
+
if (c.type === "command") return `command:${c.cmd.slice(0, 40)}`;
|
|
1637
|
+
return c.type;
|
|
1638
|
+
});
|
|
1639
|
+
console.log(`
|
|
1640
|
+
agentflow watch started`);
|
|
1641
|
+
console.log(` Directories: ${valid.join(", ")}`);
|
|
1642
|
+
console.log(` Poll: ${config.pollIntervalMs / 1e3}s`);
|
|
1643
|
+
console.log(` Alert on: ${condLabels.join(", ")}`);
|
|
1644
|
+
console.log(` Notify: stdout${channelLabels.length > 0 ? ", " + channelLabels.join(", ") : ""}`);
|
|
1645
|
+
console.log(` Cooldown: ${Math.floor(config.cooldownMs / 6e4)}m`);
|
|
1646
|
+
console.log(` State: ${config.stateFilePath}`);
|
|
1647
|
+
console.log(` Hostname: ${hostname()}`);
|
|
1648
|
+
console.log("");
|
|
1649
|
+
let pollCount = 0;
|
|
1650
|
+
async function poll() {
|
|
1651
|
+
const now = Date.now();
|
|
1652
|
+
pollCount++;
|
|
1653
|
+
const files = scanFiles(valid, config.recursive);
|
|
1654
|
+
const records = [];
|
|
1655
|
+
for (const f of files.slice(0, 500)) {
|
|
1656
|
+
const recs = f.ext === ".jsonl" ? processJsonlFile(f) : processJsonFile(f);
|
|
1657
|
+
records.push(...recs);
|
|
1658
|
+
}
|
|
1659
|
+
const alerts = detectTransitions(state, records, config, now);
|
|
1660
|
+
for (const alert of alerts) {
|
|
1661
|
+
for (const channel of config.notifyChannels) {
|
|
1662
|
+
await sendAlert(alert, channel);
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
state = updateWatchState(state, records, alerts, now);
|
|
1666
|
+
saveWatchState(config.stateFilePath, state);
|
|
1667
|
+
if (pollCount % 10 === 0) {
|
|
1668
|
+
const agentCount = Object.keys(state.agents).length;
|
|
1669
|
+
const errorCount = Object.values(state.agents).filter((a) => a.lastStatus === "error").length;
|
|
1670
|
+
const runningCount = Object.values(state.agents).filter((a) => a.lastStatus === "running").length;
|
|
1671
|
+
const time = (/* @__PURE__ */ new Date()).toLocaleTimeString();
|
|
1672
|
+
console.log(`[${time}] heartbeat: ${agentCount} agents, ${runningCount} running, ${errorCount} errors, ${files.length} files`);
|
|
1673
|
+
}
|
|
1674
|
+
}
|
|
1675
|
+
poll();
|
|
1676
|
+
setInterval(() => {
|
|
1677
|
+
poll();
|
|
1678
|
+
}, config.pollIntervalMs);
|
|
1679
|
+
function shutdown() {
|
|
1680
|
+
console.log("\nagentflow watch stopped.");
|
|
1681
|
+
saveWatchState(config.stateFilePath, state);
|
|
1682
|
+
process.exit(0);
|
|
1683
|
+
}
|
|
1684
|
+
process.on("SIGINT", shutdown);
|
|
1685
|
+
process.on("SIGTERM", shutdown);
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1172
1688
|
export {
|
|
1173
1689
|
createGraphBuilder,
|
|
1174
1690
|
loadGraph,
|
|
@@ -1188,5 +1704,6 @@ export {
|
|
|
1188
1704
|
groupByTraceId,
|
|
1189
1705
|
stitchTrace,
|
|
1190
1706
|
getTraceTree,
|
|
1191
|
-
startLive
|
|
1707
|
+
startLive,
|
|
1708
|
+
startWatch
|
|
1192
1709
|
};
|