sentinelayer-cli 0.6.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +996 -996
- package/bin/create-sentinelayer.js +5 -5
- package/bin/sentinelayer-cli.js +4 -4
- package/bin/sl.js +5 -5
- package/package.json +64 -63
- package/src/agents/jules/config/definition.js +160 -160
- package/src/agents/jules/config/system-prompt.js +182 -182
- package/src/agents/jules/error-intake.js +51 -51
- package/src/agents/jules/fix-cycle.js +17 -17
- package/src/agents/jules/loop.js +457 -450
- package/src/agents/jules/pulse.js +10 -10
- package/src/agents/jules/stream.js +187 -186
- package/src/agents/jules/swarm/file-scanner.js +74 -74
- package/src/agents/jules/swarm/index.js +11 -11
- package/src/agents/jules/swarm/orchestrator.js +362 -362
- package/src/agents/jules/swarm/pattern-hunter.js +123 -123
- package/src/agents/jules/swarm/sub-agent.js +311 -309
- package/src/agents/jules/tools/aidenid-email.js +189 -189
- package/src/agents/jules/tools/auth-audit.js +1699 -1691
- package/src/agents/jules/tools/dispatch.js +340 -335
- package/src/agents/jules/tools/file-edit.js +2 -2
- package/src/agents/jules/tools/file-read.js +2 -2
- package/src/agents/jules/tools/frontend-analyze.js +570 -570
- package/src/agents/jules/tools/glob.js +2 -2
- package/src/agents/jules/tools/grep.js +2 -2
- package/src/agents/jules/tools/index.js +29 -29
- package/src/agents/jules/tools/path-guards.js +2 -2
- package/src/agents/jules/tools/runtime-audit.js +507 -507
- package/src/agents/jules/tools/shell.js +2 -2
- package/src/agents/jules/tools/url-policy.js +100 -100
- package/src/agents/persona-visuals.js +64 -61
- package/src/agents/shared-tools/dispatch-core.js +320 -315
- package/src/agents/shared-tools/file-edit.js +180 -180
- package/src/agents/shared-tools/file-read.js +100 -100
- package/src/agents/shared-tools/glob.js +168 -168
- package/src/agents/shared-tools/grep.js +228 -228
- package/src/agents/shared-tools/index.js +46 -46
- package/src/agents/shared-tools/path-guards.js +161 -161
- package/src/agents/shared-tools/shell.js +383 -383
- package/src/ai/aidenid.js +1021 -1009
- package/src/ai/client.js +553 -553
- package/src/ai/domain-target-store.js +268 -268
- package/src/ai/identity-store.js +270 -270
- package/src/ai/proxy.js +137 -137
- package/src/ai/site-store.js +145 -145
- package/src/audit/agents/architecture.js +180 -180
- package/src/audit/agents/compliance.js +179 -179
- package/src/audit/agents/documentation.js +165 -165
- package/src/audit/agents/performance.js +145 -145
- package/src/audit/agents/security.js +215 -215
- package/src/audit/agents/testing.js +172 -172
- package/src/audit/orchestrator.js +557 -557
- package/src/audit/package.js +204 -204
- package/src/audit/registry.js +284 -284
- package/src/audit/replay.js +103 -103
- package/src/auth/gate.js +400 -371
- package/src/auth/http.js +681 -611
- package/src/auth/service.js +1106 -1106
- package/src/auth/session-store.js +813 -813
- package/src/cli.js +257 -252
- package/src/commands/ai/identity-lifecycle.js +1338 -1338
- package/src/commands/ai/provision-governance.js +1272 -1272
- package/src/commands/ai/shared.js +147 -147
- package/src/commands/ai.js +11 -11
- package/src/commands/apply.js +12 -12
- package/src/commands/audit.js +1171 -1166
- package/src/commands/auth.js +419 -419
- package/src/commands/chat.js +191 -191
- package/src/commands/config.js +184 -184
- package/src/commands/cost.js +311 -311
- package/src/commands/daemon/core.js +850 -850
- package/src/commands/daemon/extended.js +1048 -1048
- package/src/commands/daemon/shared.js +213 -213
- package/src/commands/daemon.js +11 -11
- package/src/commands/guide.js +174 -174
- package/src/commands/ingest.js +58 -58
- package/src/commands/init.js +55 -55
- package/src/commands/legacy-args.js +10 -10
- package/src/commands/mcp.js +461 -461
- package/src/commands/omargate.js +29 -29
- package/src/commands/persona.js +20 -20
- package/src/commands/plugin.js +260 -260
- package/src/commands/policy.js +132 -132
- package/src/commands/prompt.js +238 -238
- package/src/commands/review.js +704 -704
- package/src/commands/scan.js +872 -872
- package/src/commands/session.js +590 -0
- package/src/commands/spec.js +778 -716
- package/src/commands/swarm.js +651 -651
- package/src/commands/telemetry.js +202 -202
- package/src/commands/watch.js +511 -511
- package/src/config/agent-dictionary.js +182 -182
- package/src/config/io.js +56 -56
- package/src/config/paths.js +18 -18
- package/src/config/schema.js +55 -55
- package/src/config/service.js +184 -184
- package/src/cost/budget.js +235 -235
- package/src/cost/history.js +188 -188
- package/src/cost/tracker.js +171 -171
- package/src/daemon/artifact-lineage.js +534 -534
- package/src/daemon/assignment-ledger.js +966 -770
- package/src/daemon/ast-parser-layer.js +258 -258
- package/src/daemon/budget-governor.js +633 -633
- package/src/daemon/callgraph-overlay.js +646 -646
- package/src/daemon/error-worker.js +1209 -626
- package/src/daemon/fix-cycle.js +384 -377
- package/src/daemon/hybrid-mapper.js +929 -929
- package/src/daemon/ingest-refresh.js +10 -9
- package/src/daemon/jira-lifecycle.js +767 -632
- package/src/daemon/operator-control.js +657 -657
- package/src/daemon/pulse.js +327 -327
- package/src/daemon/reliability-lane.js +471 -471
- package/src/daemon/scope-engine.js +1068 -0
- package/src/daemon/watchdog.js +971 -971
- package/src/events/schema.js +190 -0
- package/src/guide/generator.js +316 -316
- package/src/ingest/engine.js +918 -918
- package/src/interactive/index.js +97 -97
- package/src/legacy-cli.js +3161 -2994
- package/src/mcp/registry.js +695 -695
- package/src/memory/blackboard.js +301 -301
- package/src/memory/retrieval.js +581 -581
- package/src/plugin/manifest.js +553 -553
- package/src/policy/packs.js +144 -144
- package/src/prompt/generator.js +136 -118
- package/src/review/ai-review.js +679 -679
- package/src/review/local-review.js +1351 -1305
- package/src/review/omargate-interactive.js +68 -68
- package/src/review/omargate-orchestrator.js +404 -300
- package/src/review/persona-prompts.js +296 -296
- package/src/review/replay.js +235 -235
- package/src/review/report.js +664 -664
- package/src/review/scan-modes.js +48 -42
- package/src/review/spec-binding.js +487 -487
- package/src/scaffold/generator.js +67 -67
- package/src/scaffold/templates.js +150 -150
- package/src/scan/generator.js +418 -418
- package/src/scan/gh-secrets.js +107 -107
- package/src/session/agent-registry.js +352 -0
- package/src/session/daemon.js +801 -0
- package/src/session/paths.js +33 -0
- package/src/session/runtime-bridge.js +739 -0
- package/src/session/store.js +388 -0
- package/src/session/stream.js +325 -0
- package/src/spec/generator.js +619 -519
- package/src/spec/regenerate.js +237 -237
- package/src/spec/templates.js +91 -91
- package/src/swarm/dashboard.js +247 -247
- package/src/swarm/factory.js +363 -363
- package/src/swarm/pentest.js +934 -934
- package/src/swarm/registry.js +419 -419
- package/src/swarm/report.js +158 -158
- package/src/swarm/runtime.js +576 -576
- package/src/swarm/scenario-dsl.js +272 -272
- package/src/telemetry/ledger.js +302 -302
- package/src/telemetry/session-tracker.js +234 -234
- package/src/telemetry/sync.js +203 -203
- package/src/ui/command-hints.js +13 -13
- package/src/ui/markdown.js +220 -220
package/src/daemon/watchdog.js
CHANGED
|
@@ -1,971 +1,971 @@
|
|
|
1
|
-
import fsp from "node:fs/promises";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
|
|
4
|
-
import { parse as parseYaml } from "yaml";
|
|
5
|
-
|
|
6
|
-
import { listAssignments, resolveAssignmentLedgerStorage } from "./assignment-ledger.js";
|
|
7
|
-
import { listBudgetStates } from "./budget-governor.js";
|
|
8
|
-
import { listErrorQueue, resolveErrorDaemonStorage } from "./error-worker.js";
|
|
9
|
-
|
|
10
|
-
const WATCHDOG_SCHEMA_VERSION = "1.0.0";
|
|
11
|
-
const STATE_SCHEMA_VERSION = "1.0.0";
|
|
12
|
-
|
|
13
|
-
const ACTIVE_ASSIGNMENT_STATUSES = new Set(["CLAIMED", "IN_PROGRESS", "BLOCKED"]);
|
|
14
|
-
|
|
15
|
-
export const WATCHDOG_EVENT_TYPES = Object.freeze([
|
|
16
|
-
"agent_stuck",
|
|
17
|
-
"budget_warning",
|
|
18
|
-
"alert_recovered",
|
|
19
|
-
"pr_merged",
|
|
20
|
-
"audit_complete",
|
|
21
|
-
"kill_switch_activated",
|
|
22
|
-
]);
|
|
23
|
-
|
|
24
|
-
export const WATCHDOG_SIGNAL_CODES = Object.freeze([
|
|
25
|
-
"NO_TOOL_CALL",
|
|
26
|
-
"REPEATED_FILE_READ",
|
|
27
|
-
"BUDGET_WARNING_NO_FINDINGS",
|
|
28
|
-
"TURN_STALL",
|
|
29
|
-
]);
|
|
30
|
-
|
|
31
|
-
const WATCHDOG_SIGNAL_SET = new Set(WATCHDOG_SIGNAL_CODES);
|
|
32
|
-
const WATCHDOG_EVENT_SET = new Set(WATCHDOG_EVENT_TYPES);
|
|
33
|
-
|
|
34
|
-
function normalizeString(value) {
|
|
35
|
-
return String(value || "").trim();
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
function normalizeIsoTimestamp(value, fallbackIso = new Date().toISOString()) {
|
|
39
|
-
const normalized = normalizeString(value);
|
|
40
|
-
if (!normalized) {
|
|
41
|
-
return fallbackIso;
|
|
42
|
-
}
|
|
43
|
-
const epoch = Date.parse(normalized);
|
|
44
|
-
if (!Number.isFinite(epoch)) {
|
|
45
|
-
return fallbackIso;
|
|
46
|
-
}
|
|
47
|
-
return new Date(epoch).toISOString();
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
function normalizeNumber(value, fallback = 0) {
|
|
51
|
-
const parsed = Number(value);
|
|
52
|
-
if (!Number.isFinite(parsed)) {
|
|
53
|
-
return fallback;
|
|
54
|
-
}
|
|
55
|
-
return parsed;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function normalizePositiveInteger(value, fallbackValue) {
|
|
59
|
-
const normalized = normalizeNumber(value, fallbackValue);
|
|
60
|
-
if (!Number.isFinite(normalized) || normalized <= 0) {
|
|
61
|
-
return fallbackValue;
|
|
62
|
-
}
|
|
63
|
-
return Math.max(1, Math.floor(normalized));
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
function normalizeNonNegativeNumber(value, fallbackValue = 0) {
|
|
67
|
-
const normalized = normalizeNumber(value, fallbackValue);
|
|
68
|
-
if (!Number.isFinite(normalized) || normalized < 0) {
|
|
69
|
-
return fallbackValue;
|
|
70
|
-
}
|
|
71
|
-
return normalized;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
function normalizeObject(value) {
|
|
75
|
-
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
76
|
-
return {};
|
|
77
|
-
}
|
|
78
|
-
return { ...value };
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
function normalizeBoolean(value, fallbackValue = false) {
|
|
82
|
-
if (typeof value === "boolean") {
|
|
83
|
-
return value;
|
|
84
|
-
}
|
|
85
|
-
const normalized = normalizeString(value).toLowerCase();
|
|
86
|
-
if (!normalized) {
|
|
87
|
-
return fallbackValue;
|
|
88
|
-
}
|
|
89
|
-
if (normalized === "true" || normalized === "1" || normalized === "yes") {
|
|
90
|
-
return true;
|
|
91
|
-
}
|
|
92
|
-
if (normalized === "false" || normalized === "0" || normalized === "no") {
|
|
93
|
-
return false;
|
|
94
|
-
}
|
|
95
|
-
return fallbackValue;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
function resolveEnvTemplate(value, env) {
|
|
99
|
-
const normalized = normalizeString(value);
|
|
100
|
-
if (!normalized) {
|
|
101
|
-
return "";
|
|
102
|
-
}
|
|
103
|
-
return normalized.replace(/\$\{([A-Z0-9_]+)\}/g, (_, key) => normalizeString(env[key]));
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
function computeSecondsSince(previousIso, nowIso) {
|
|
107
|
-
const previousEpoch = Date.parse(normalizeIsoTimestamp(previousIso, nowIso));
|
|
108
|
-
const nowEpoch = Date.parse(normalizeIsoTimestamp(nowIso, new Date().toISOString()));
|
|
109
|
-
if (!Number.isFinite(previousEpoch) || !Number.isFinite(nowEpoch)) {
|
|
110
|
-
return null;
|
|
111
|
-
}
|
|
112
|
-
return Math.max(0, Math.floor((nowEpoch - previousEpoch) / 1000));
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
function pickLastToolCallAt(assignment = {}) {
|
|
116
|
-
const snapshot = normalizeObject(assignment.budgetSnapshot);
|
|
117
|
-
return (
|
|
118
|
-
normalizeString(snapshot.lastToolCallAt) ||
|
|
119
|
-
normalizeString(snapshot.lastActionAt) ||
|
|
120
|
-
normalizeString(assignment.heartbeatAt) ||
|
|
121
|
-
normalizeString(assignment.updatedAt) ||
|
|
122
|
-
""
|
|
123
|
-
);
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
function extractRecentFileReads(snapshot = {}) {
|
|
127
|
-
const candidates = [];
|
|
128
|
-
const normalizedSnapshot = normalizeObject(snapshot);
|
|
129
|
-
for (const key of ["recentFileReads", "fileReadHistory", "fileReads"]) {
|
|
130
|
-
const value = normalizedSnapshot[key];
|
|
131
|
-
if (Array.isArray(value)) {
|
|
132
|
-
for (const item of value) {
|
|
133
|
-
if (typeof item === "string") {
|
|
134
|
-
const normalized = normalizeString(item);
|
|
135
|
-
if (normalized) {
|
|
136
|
-
candidates.push(normalized);
|
|
137
|
-
}
|
|
138
|
-
continue;
|
|
139
|
-
}
|
|
140
|
-
if (item && typeof item === "object") {
|
|
141
|
-
const normalized =
|
|
142
|
-
normalizeString(item.path) ||
|
|
143
|
-
normalizeString(item.file) ||
|
|
144
|
-
normalizeString(item.filePath);
|
|
145
|
-
if (normalized) {
|
|
146
|
-
candidates.push(normalized);
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
return candidates;
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
function computeRepeatedTailCount(values = []) {
|
|
156
|
-
if (!Array.isArray(values) || values.length === 0) {
|
|
157
|
-
return {
|
|
158
|
-
repeatedValue: "",
|
|
159
|
-
repeatCount: 0,
|
|
160
|
-
};
|
|
161
|
-
}
|
|
162
|
-
const normalized = values.map((value) => normalizeString(value)).filter(Boolean);
|
|
163
|
-
if (normalized.length === 0) {
|
|
164
|
-
return {
|
|
165
|
-
repeatedValue: "",
|
|
166
|
-
repeatCount: 0,
|
|
167
|
-
};
|
|
168
|
-
}
|
|
169
|
-
const tail = normalized[normalized.length - 1];
|
|
170
|
-
let repeatCount = 0;
|
|
171
|
-
for (let index = normalized.length - 1; index >= 0; index -= 1) {
|
|
172
|
-
if (normalized[index] !== tail) {
|
|
173
|
-
break;
|
|
174
|
-
}
|
|
175
|
-
repeatCount += 1;
|
|
176
|
-
}
|
|
177
|
-
return {
|
|
178
|
-
repeatedValue: tail,
|
|
179
|
-
repeatCount,
|
|
180
|
-
};
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
function computeBudgetUsageRatio(record = {}) {
|
|
184
|
-
const usage = normalizeObject(record.usage);
|
|
185
|
-
const budget = normalizeObject(record.budget);
|
|
186
|
-
const ratios = [];
|
|
187
|
-
const pairs = [
|
|
188
|
-
["tokensUsed", "maxTokens"],
|
|
189
|
-
["costUsd", "maxCostUsd"],
|
|
190
|
-
["runtimeMs", "maxRuntimeMs"],
|
|
191
|
-
["toolCalls", "maxToolCalls"],
|
|
192
|
-
];
|
|
193
|
-
for (const [usageKey, budgetKey] of pairs) {
|
|
194
|
-
const used = normalizeNonNegativeNumber(usage[usageKey], 0);
|
|
195
|
-
const limit = normalizeNonNegativeNumber(budget[budgetKey], 0);
|
|
196
|
-
if (limit > 0) {
|
|
197
|
-
ratios.push(used / limit);
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
if (ratios.length === 0) {
|
|
201
|
-
return 0;
|
|
202
|
-
}
|
|
203
|
-
return Math.max(...ratios);
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
function normalizeSeverity(value) {
|
|
207
|
-
const normalized = normalizeString(value).toUpperCase();
|
|
208
|
-
if (normalized === "P0" || normalized === "P1" || normalized === "P2" || normalized === "P3") {
|
|
209
|
-
return normalized;
|
|
210
|
-
}
|
|
211
|
-
return "P3";
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
function createInitialState(nowIso) {
|
|
215
|
-
return {
|
|
216
|
-
schemaVersion: STATE_SCHEMA_VERSION,
|
|
217
|
-
generatedAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
218
|
-
activeAlerts: {},
|
|
219
|
-
runCount: 0,
|
|
220
|
-
lastRunId: null,
|
|
221
|
-
lastRunAt: null,
|
|
222
|
-
};
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
function normalizeState(state = {}, nowIso = new Date().toISOString()) {
|
|
226
|
-
const rawAlerts = state.activeAlerts && typeof state.activeAlerts === "object" ? state.activeAlerts : {};
|
|
227
|
-
const activeAlerts = {};
|
|
228
|
-
for (const [alertId, alert] of Object.entries(rawAlerts)) {
|
|
229
|
-
if (!normalizeString(alertId)) {
|
|
230
|
-
continue;
|
|
231
|
-
}
|
|
232
|
-
activeAlerts[alertId] = {
|
|
233
|
-
alertId,
|
|
234
|
-
eventType: WATCHDOG_EVENT_SET.has(normalizeString(alert.eventType))
|
|
235
|
-
? normalizeString(alert.eventType)
|
|
236
|
-
: "agent_stuck",
|
|
237
|
-
signalCode: WATCHDOG_SIGNAL_SET.has(normalizeString(alert.signalCode))
|
|
238
|
-
? normalizeString(alert.signalCode)
|
|
239
|
-
: "NO_TOOL_CALL",
|
|
240
|
-
workItemId: normalizeString(alert.workItemId),
|
|
241
|
-
agentIdentity: normalizeString(alert.agentIdentity),
|
|
242
|
-
firstSeenAt: normalizeIsoTimestamp(alert.firstSeenAt, nowIso),
|
|
243
|
-
lastSeenAt: normalizeIsoTimestamp(alert.lastSeenAt, nowIso),
|
|
244
|
-
message: normalizeString(alert.message),
|
|
245
|
-
severity: normalizeSeverity(alert.severity),
|
|
246
|
-
};
|
|
247
|
-
}
|
|
248
|
-
return {
|
|
249
|
-
schemaVersion: STATE_SCHEMA_VERSION,
|
|
250
|
-
generatedAt: normalizeIsoTimestamp(state.generatedAt, nowIso),
|
|
251
|
-
activeAlerts,
|
|
252
|
-
runCount: Math.max(0, Math.floor(normalizeNumber(state.runCount, 0))),
|
|
253
|
-
lastRunId: normalizeString(state.lastRunId) || null,
|
|
254
|
-
lastRunAt: state.lastRunAt ? normalizeIsoTimestamp(state.lastRunAt, nowIso) : null,
|
|
255
|
-
};
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
async function readJsonFile(filePath, defaultFactory) {
|
|
259
|
-
try {
|
|
260
|
-
const raw = await fsp.readFile(filePath, "utf-8");
|
|
261
|
-
return JSON.parse(raw);
|
|
262
|
-
} catch (error) {
|
|
263
|
-
if (error && typeof error === "object" && error.code === "ENOENT") {
|
|
264
|
-
return defaultFactory();
|
|
265
|
-
}
|
|
266
|
-
throw error;
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
async function writeJsonFile(filePath, payload) {
|
|
271
|
-
await fsp.mkdir(path.dirname(filePath), { recursive: true });
|
|
272
|
-
await fsp.writeFile(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf-8");
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
async function appendEvent(filePath, payload) {
|
|
276
|
-
await fsp.mkdir(path.dirname(filePath), { recursive: true });
|
|
277
|
-
await fsp.appendFile(filePath, `${JSON.stringify(payload)}\n`, "utf-8");
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
function buildRunId(nowIso, count) {
|
|
281
|
-
const token = normalizeIsoTimestamp(nowIso, new Date().toISOString()).replace(/[:.]/g, "-");
|
|
282
|
-
return `watchdog-${token}-${String(count).padStart(4, "0")}`;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
function normalizeChannel(channel = {}, env = process.env) {
|
|
286
|
-
const type = normalizeString(channel.type).toLowerCase();
|
|
287
|
-
if (type === "slack") {
|
|
288
|
-
const webhookUrl = resolveEnvTemplate(
|
|
289
|
-
channel.webhook_url || channel.webhookUrl || channel.url || "",
|
|
290
|
-
env
|
|
291
|
-
);
|
|
292
|
-
return webhookUrl
|
|
293
|
-
? {
|
|
294
|
-
type: "slack",
|
|
295
|
-
webhookUrl,
|
|
296
|
-
}
|
|
297
|
-
: null;
|
|
298
|
-
}
|
|
299
|
-
if (type === "telegram") {
|
|
300
|
-
const botToken = resolveEnvTemplate(channel.bot_token || channel.botToken || "", env);
|
|
301
|
-
const chatId = resolveEnvTemplate(channel.chat_id || channel.chatId || "", env);
|
|
302
|
-
return botToken && chatId
|
|
303
|
-
? {
|
|
304
|
-
type: "telegram",
|
|
305
|
-
botToken,
|
|
306
|
-
chatId,
|
|
307
|
-
}
|
|
308
|
-
: null;
|
|
309
|
-
}
|
|
310
|
-
return null;
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
async function loadWatchdogConfig({ targetPath = ".", env = process.env } = {}) {
|
|
314
|
-
const configPath = path.join(path.resolve(String(targetPath || ".")), ".sentinelayer.yml");
|
|
315
|
-
const fallback = {
|
|
316
|
-
channels: [],
|
|
317
|
-
frequency: "smart",
|
|
318
|
-
events: ["agent_stuck", "budget_warning", "alert_recovered"],
|
|
319
|
-
};
|
|
320
|
-
try {
|
|
321
|
-
const parsed = parseYaml(await fsp.readFile(configPath, "utf-8")) || {};
|
|
322
|
-
const alerts = parsed && typeof parsed === "object" ? normalizeObject(parsed.alerts) : {};
|
|
323
|
-
const channels = Array.isArray(alerts.channels)
|
|
324
|
-
? alerts.channels.map((channel) => normalizeChannel(channel, env)).filter(Boolean)
|
|
325
|
-
: [];
|
|
326
|
-
const events = Array.isArray(alerts.events)
|
|
327
|
-
? alerts.events
|
|
328
|
-
.map((eventType) => normalizeString(eventType))
|
|
329
|
-
.filter((eventType) => WATCHDOG_EVENT_SET.has(eventType))
|
|
330
|
-
: fallback.events;
|
|
331
|
-
const frequency = normalizeString(alerts.frequency).toLowerCase() || fallback.frequency;
|
|
332
|
-
return {
|
|
333
|
-
configPath,
|
|
334
|
-
exists: true,
|
|
335
|
-
channels,
|
|
336
|
-
frequency,
|
|
337
|
-
events: events.length > 0 ? events : fallback.events,
|
|
338
|
-
};
|
|
339
|
-
} catch (error) {
|
|
340
|
-
if (error && typeof error === "object" && error.code === "ENOENT") {
|
|
341
|
-
return {
|
|
342
|
-
configPath,
|
|
343
|
-
exists: false,
|
|
344
|
-
channels: [],
|
|
345
|
-
frequency: fallback.frequency,
|
|
346
|
-
events: fallback.events,
|
|
347
|
-
};
|
|
348
|
-
}
|
|
349
|
-
throw error;
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
function buildDetection({
|
|
354
|
-
eventType,
|
|
355
|
-
signalCode,
|
|
356
|
-
workItemId,
|
|
357
|
-
agentIdentity,
|
|
358
|
-
severity,
|
|
359
|
-
message,
|
|
360
|
-
details = {},
|
|
361
|
-
}) {
|
|
362
|
-
return {
|
|
363
|
-
alertId: `${workItemId}:${signalCode}`,
|
|
364
|
-
eventType,
|
|
365
|
-
signalCode,
|
|
366
|
-
workItemId,
|
|
367
|
-
agentIdentity,
|
|
368
|
-
severity,
|
|
369
|
-
message,
|
|
370
|
-
details,
|
|
371
|
-
};
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
function evaluateWatchdogSignals({
|
|
375
|
-
assignment,
|
|
376
|
-
queueItem,
|
|
377
|
-
budgetRecord,
|
|
378
|
-
nowIso,
|
|
379
|
-
noToolCallSeconds,
|
|
380
|
-
repeatedFileReadsThreshold,
|
|
381
|
-
budgetWarningThreshold,
|
|
382
|
-
turnStallTurns,
|
|
383
|
-
}) {
|
|
384
|
-
const detections = [];
|
|
385
|
-
const workItemId = normalizeString(assignment.workItemId);
|
|
386
|
-
const agentIdentity = normalizeString(assignment.assignedAgentIdentity) || "unassigned";
|
|
387
|
-
const severity = normalizeSeverity(queueItem?.severity);
|
|
388
|
-
const budgetSnapshot = normalizeObject(assignment.budgetSnapshot);
|
|
389
|
-
|
|
390
|
-
const lastToolCallAt = pickLastToolCallAt(assignment);
|
|
391
|
-
const idleSeconds = computeSecondsSince(lastToolCallAt, nowIso);
|
|
392
|
-
if (idleSeconds !== null && idleSeconds >= noToolCallSeconds) {
|
|
393
|
-
detections.push(
|
|
394
|
-
buildDetection({
|
|
395
|
-
eventType: "agent_stuck",
|
|
396
|
-
signalCode: "NO_TOOL_CALL",
|
|
397
|
-
workItemId,
|
|
398
|
-
agentIdentity,
|
|
399
|
-
severity,
|
|
400
|
-
message: `No tool calls observed for ${idleSeconds}s (threshold ${noToolCallSeconds}s).`,
|
|
401
|
-
details: {
|
|
402
|
-
idleSeconds,
|
|
403
|
-
thresholdSeconds: noToolCallSeconds,
|
|
404
|
-
lastToolCallAt: normalizeIsoTimestamp(lastToolCallAt, nowIso),
|
|
405
|
-
},
|
|
406
|
-
})
|
|
407
|
-
);
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
const recentFileReads = extractRecentFileReads(budgetSnapshot);
|
|
411
|
-
const repetition = computeRepeatedTailCount(recentFileReads);
|
|
412
|
-
if (repetition.repeatCount >= repeatedFileReadsThreshold) {
|
|
413
|
-
detections.push(
|
|
414
|
-
buildDetection({
|
|
415
|
-
eventType: "agent_stuck",
|
|
416
|
-
signalCode: "REPEATED_FILE_READ",
|
|
417
|
-
workItemId,
|
|
418
|
-
agentIdentity,
|
|
419
|
-
severity,
|
|
420
|
-
message: `Repeated file read detected (${repetition.repeatCount}x): ${repetition.repeatedValue}`,
|
|
421
|
-
details: {
|
|
422
|
-
filePath: repetition.repeatedValue,
|
|
423
|
-
repeatCount: repetition.repeatCount,
|
|
424
|
-
threshold: repeatedFileReadsThreshold,
|
|
425
|
-
},
|
|
426
|
-
})
|
|
427
|
-
);
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
const turnCount = Math.floor(normalizeNonNegativeNumber(budgetSnapshot.turnCount, 0));
|
|
431
|
-
const lastProgressTurn = Math.floor(
|
|
432
|
-
normalizeNonNegativeNumber(
|
|
433
|
-
budgetSnapshot.lastProgressTurn ?? budgetSnapshot.lastFindingTurn ?? turnCount,
|
|
434
|
-
turnCount
|
|
435
|
-
)
|
|
436
|
-
);
|
|
437
|
-
const stalledTurns = Math.max(0, turnCount - lastProgressTurn);
|
|
438
|
-
if (turnCount > 0 && stalledTurns >= turnStallTurns) {
|
|
439
|
-
detections.push(
|
|
440
|
-
buildDetection({
|
|
441
|
-
eventType: "agent_stuck",
|
|
442
|
-
signalCode: "TURN_STALL",
|
|
443
|
-
workItemId,
|
|
444
|
-
agentIdentity,
|
|
445
|
-
severity,
|
|
446
|
-
message: `Turn progression stalled for ${stalledTurns} turns (threshold ${turnStallTurns}).`,
|
|
447
|
-
details: {
|
|
448
|
-
turnCount,
|
|
449
|
-
lastProgressTurn,
|
|
450
|
-
stalledTurns,
|
|
451
|
-
threshold: turnStallTurns,
|
|
452
|
-
},
|
|
453
|
-
})
|
|
454
|
-
);
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
const usageRatio = computeBudgetUsageRatio(budgetRecord || {});
|
|
458
|
-
const findingsProduced = Math.floor(
|
|
459
|
-
normalizeNonNegativeNumber(
|
|
460
|
-
budgetSnapshot.findingsProduced ??
|
|
461
|
-
queueItem?.metadata?.findingsProduced ??
|
|
462
|
-
queueItem?.metadata?.findingsCount ??
|
|
463
|
-
0,
|
|
464
|
-
0
|
|
465
|
-
)
|
|
466
|
-
);
|
|
467
|
-
if (usageRatio >= budgetWarningThreshold && findingsProduced <= 0) {
|
|
468
|
-
detections.push(
|
|
469
|
-
buildDetection({
|
|
470
|
-
eventType: "budget_warning",
|
|
471
|
-
signalCode: "BUDGET_WARNING_NO_FINDINGS",
|
|
472
|
-
workItemId,
|
|
473
|
-
agentIdentity,
|
|
474
|
-
severity,
|
|
475
|
-
message: `Budget usage ${(usageRatio * 100).toFixed(1)}% with no findings produced.`,
|
|
476
|
-
details: {
|
|
477
|
-
usageRatio: Number(usageRatio.toFixed(6)),
|
|
478
|
-
threshold: budgetWarningThreshold,
|
|
479
|
-
findingsProduced,
|
|
480
|
-
lifecycleState: normalizeString(budgetRecord?.lifecycleState) || "WITHIN_BUDGET",
|
|
481
|
-
},
|
|
482
|
-
})
|
|
483
|
-
);
|
|
484
|
-
}
|
|
485
|
-
|
|
486
|
-
return detections;
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
function toActiveAlertRecord(alert = {}, nowIso = new Date().toISOString()) {
|
|
490
|
-
return {
|
|
491
|
-
alertId: alert.alertId,
|
|
492
|
-
eventType: alert.eventType,
|
|
493
|
-
signalCode: alert.signalCode,
|
|
494
|
-
workItemId: alert.workItemId,
|
|
495
|
-
agentIdentity: alert.agentIdentity,
|
|
496
|
-
firstSeenAt: normalizeIsoTimestamp(alert.firstSeenAt || nowIso, nowIso),
|
|
497
|
-
lastSeenAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
498
|
-
message: normalizeString(alert.message),
|
|
499
|
-
severity: normalizeSeverity(alert.severity),
|
|
500
|
-
};
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
function buildAlertTransitions({
|
|
504
|
-
detections = [],
|
|
505
|
-
previousState = {},
|
|
506
|
-
nowIso = new Date().toISOString(),
|
|
507
|
-
}) {
|
|
508
|
-
const previousAlerts = normalizeObject(previousState.activeAlerts);
|
|
509
|
-
const activeAlerts = {};
|
|
510
|
-
const activated = [];
|
|
511
|
-
const stillActive = [];
|
|
512
|
-
const detectionById = new Map();
|
|
513
|
-
for (const detection of detections) {
|
|
514
|
-
detectionById.set(detection.alertId, detection);
|
|
515
|
-
const previous = previousAlerts[detection.alertId] || null;
|
|
516
|
-
const record = toActiveAlertRecord(
|
|
517
|
-
{
|
|
518
|
-
...detection,
|
|
519
|
-
firstSeenAt: previous?.firstSeenAt || nowIso,
|
|
520
|
-
},
|
|
521
|
-
nowIso
|
|
522
|
-
);
|
|
523
|
-
activeAlerts[detection.alertId] = record;
|
|
524
|
-
if (previous) {
|
|
525
|
-
stillActive.push({
|
|
526
|
-
...detection,
|
|
527
|
-
firstSeenAt: previous.firstSeenAt,
|
|
528
|
-
lastSeenAt: nowIso,
|
|
529
|
-
});
|
|
530
|
-
} else {
|
|
531
|
-
activated.push({
|
|
532
|
-
...detection,
|
|
533
|
-
firstSeenAt: nowIso,
|
|
534
|
-
lastSeenAt: nowIso,
|
|
535
|
-
});
|
|
536
|
-
}
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
const recovered = [];
|
|
540
|
-
for (const [alertId, previous] of Object.entries(previousAlerts)) {
|
|
541
|
-
if (detectionById.has(alertId)) {
|
|
542
|
-
continue;
|
|
543
|
-
}
|
|
544
|
-
recovered.push({
|
|
545
|
-
alertId,
|
|
546
|
-
eventType: "alert_recovered",
|
|
547
|
-
signalCode: normalizeString(previous.signalCode),
|
|
548
|
-
workItemId: normalizeString(previous.workItemId),
|
|
549
|
-
agentIdentity: normalizeString(previous.agentIdentity),
|
|
550
|
-
severity: normalizeSeverity(previous.severity),
|
|
551
|
-
message: `Recovered: ${normalizeString(previous.message) || "watchdog signal cleared"}`,
|
|
552
|
-
firstSeenAt: normalizeIsoTimestamp(previous.firstSeenAt, nowIso),
|
|
553
|
-
lastSeenAt: normalizeIsoTimestamp(previous.lastSeenAt, nowIso),
|
|
554
|
-
recoveredAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
555
|
-
});
|
|
556
|
-
}
|
|
557
|
-
|
|
558
|
-
return {
|
|
559
|
-
activeAlerts,
|
|
560
|
-
activated,
|
|
561
|
-
stillActive,
|
|
562
|
-
recovered,
|
|
563
|
-
};
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
function formatAlertMessage(alert = {}) {
|
|
567
|
-
const eventType = normalizeString(alert.eventType);
|
|
568
|
-
if (eventType === "agent_stuck") {
|
|
569
|
-
const idleSeconds = normalizeNumber(alert.details?.idleSeconds, 0);
|
|
570
|
-
const budgetRatio = normalizeNumber(alert.details?.usageRatio, 0);
|
|
571
|
-
const budgetPct = budgetRatio > 0 ? ` | budget=${(budgetRatio * 100).toFixed(1)}%` : "";
|
|
572
|
-
return `[SentinelLayer] Agent "${alert.agentIdentity}" stuck (${alert.signalCode}) on ${alert.workItemId}${idleSeconds > 0 ? ` | idle=${idleSeconds}s` : ""}${budgetPct}\n${alert.message}`;
|
|
573
|
-
}
|
|
574
|
-
if (eventType === "budget_warning") {
|
|
575
|
-
const budgetRatio = normalizeNumber(alert.details?.usageRatio, 0);
|
|
576
|
-
return `[SentinelLayer] Budget warning for ${alert.workItemId} (${alert.agentIdentity}) | usage=${(budgetRatio * 100).toFixed(1)}%\n${alert.message}`;
|
|
577
|
-
}
|
|
578
|
-
return `[SentinelLayer] ${alert.eventType} ${alert.workItemId || ""} ${alert.agentIdentity || ""}\n${alert.message}`;
|
|
579
|
-
}
|
|
580
|
-
|
|
581
|
-
async function sendSlackAlert(channel, message, fetchImpl) {
|
|
582
|
-
const response = await fetchImpl(channel.webhookUrl, {
|
|
583
|
-
method: "POST",
|
|
584
|
-
headers: {
|
|
585
|
-
"content-type": "application/json",
|
|
586
|
-
},
|
|
587
|
-
body: JSON.stringify({
|
|
588
|
-
text: message,
|
|
589
|
-
}),
|
|
590
|
-
});
|
|
591
|
-
if (!response.ok) {
|
|
592
|
-
throw new Error(`Slack webhook returned ${response.status}.`);
|
|
593
|
-
}
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
async function sendTelegramAlert(channel, message, fetchImpl) {
|
|
597
|
-
const endpoint = `https://api.telegram.org/bot${channel.botToken}/sendMessage`;
|
|
598
|
-
const response = await fetchImpl(endpoint, {
|
|
599
|
-
method: "POST",
|
|
600
|
-
headers: {
|
|
601
|
-
"content-type": "application/json",
|
|
602
|
-
},
|
|
603
|
-
body: JSON.stringify({
|
|
604
|
-
chat_id: channel.chatId,
|
|
605
|
-
text: message,
|
|
606
|
-
disable_web_page_preview: true,
|
|
607
|
-
}),
|
|
608
|
-
});
|
|
609
|
-
if (!response.ok) {
|
|
610
|
-
throw new Error(`Telegram sendMessage returned ${response.status}.`);
|
|
611
|
-
}
|
|
612
|
-
}
|
|
613
|
-
|
|
614
|
-
async function dispatchAlertToChannel({
|
|
615
|
-
channel,
|
|
616
|
-
alert,
|
|
617
|
-
execute = false,
|
|
618
|
-
fetchImpl = globalThis.fetch,
|
|
619
|
-
}) {
|
|
620
|
-
const message = formatAlertMessage(alert);
|
|
621
|
-
if (!execute) {
|
|
622
|
-
return {
|
|
623
|
-
channelType: channel.type,
|
|
624
|
-
alertId: alert.alertId,
|
|
625
|
-
eventType: alert.eventType,
|
|
626
|
-
sent: false,
|
|
627
|
-
dryRun: true,
|
|
628
|
-
message,
|
|
629
|
-
error: "",
|
|
630
|
-
};
|
|
631
|
-
}
|
|
632
|
-
if (typeof fetchImpl !== "function") {
|
|
633
|
-
return {
|
|
634
|
-
channelType: channel.type,
|
|
635
|
-
alertId: alert.alertId,
|
|
636
|
-
eventType: alert.eventType,
|
|
637
|
-
sent: false,
|
|
638
|
-
dryRun: false,
|
|
639
|
-
message,
|
|
640
|
-
error: "Fetch implementation is unavailable.",
|
|
641
|
-
};
|
|
642
|
-
}
|
|
643
|
-
try {
|
|
644
|
-
if (channel.type === "slack") {
|
|
645
|
-
await sendSlackAlert(channel, message, fetchImpl);
|
|
646
|
-
} else if (channel.type === "telegram") {
|
|
647
|
-
await sendTelegramAlert(channel, message, fetchImpl);
|
|
648
|
-
} else {
|
|
649
|
-
throw new Error(`Unsupported alert channel type '${channel.type}'.`);
|
|
650
|
-
}
|
|
651
|
-
return {
|
|
652
|
-
channelType: channel.type,
|
|
653
|
-
alertId: alert.alertId,
|
|
654
|
-
eventType: alert.eventType,
|
|
655
|
-
sent: true,
|
|
656
|
-
dryRun: false,
|
|
657
|
-
message,
|
|
658
|
-
error: "",
|
|
659
|
-
};
|
|
660
|
-
} catch (error) {
|
|
661
|
-
return {
|
|
662
|
-
channelType: channel.type,
|
|
663
|
-
alertId: alert.alertId,
|
|
664
|
-
eventType: alert.eventType,
|
|
665
|
-
sent: false,
|
|
666
|
-
dryRun: false,
|
|
667
|
-
message,
|
|
668
|
-
error: normalizeString(error?.message || error),
|
|
669
|
-
};
|
|
670
|
-
}
|
|
671
|
-
}
|
|
672
|
-
|
|
673
|
-
async function dispatchAlerts({
|
|
674
|
-
alerts = [],
|
|
675
|
-
config = {},
|
|
676
|
-
execute = false,
|
|
677
|
-
fetchImpl = globalThis.fetch,
|
|
678
|
-
}) {
|
|
679
|
-
const channels = Array.isArray(config.channels) ? config.channels : [];
|
|
680
|
-
const allowedEvents = new Set(Array.isArray(config.events) ? config.events : []);
|
|
681
|
-
const tasks = [];
|
|
682
|
-
for (const alert of alerts) {
|
|
683
|
-
if (allowedEvents.size > 0 && !allowedEvents.has(alert.eventType)) {
|
|
684
|
-
continue;
|
|
685
|
-
}
|
|
686
|
-
for (const channel of channels) {
|
|
687
|
-
tasks.push(
|
|
688
|
-
dispatchAlertToChannel({
|
|
689
|
-
channel,
|
|
690
|
-
alert,
|
|
691
|
-
execute,
|
|
692
|
-
fetchImpl,
|
|
693
|
-
})
|
|
694
|
-
);
|
|
695
|
-
}
|
|
696
|
-
}
|
|
697
|
-
return Promise.all(tasks);
|
|
698
|
-
}
|
|
699
|
-
|
|
700
|
-
export async function resolveWatchdogStorage({
|
|
701
|
-
targetPath = ".",
|
|
702
|
-
outputDir = "",
|
|
703
|
-
env,
|
|
704
|
-
homeDir,
|
|
705
|
-
} = {}) {
|
|
706
|
-
const daemonStorage = await resolveErrorDaemonStorage({
|
|
707
|
-
targetPath,
|
|
708
|
-
outputDir,
|
|
709
|
-
env,
|
|
710
|
-
homeDir,
|
|
711
|
-
});
|
|
712
|
-
return {
|
|
713
|
-
...daemonStorage,
|
|
714
|
-
watchdogStatePath: path.join(daemonStorage.baseDir, "watchdog-state.json"),
|
|
715
|
-
watchdogEventsPath: path.join(daemonStorage.baseDir, "watchdog-events.ndjson"),
|
|
716
|
-
watchdogRunsDir: path.join(daemonStorage.baseDir, "watchdog-runs"),
|
|
717
|
-
};
|
|
718
|
-
}
|
|
719
|
-
|
|
720
|
-
export async function runWatchdogTick({
|
|
721
|
-
targetPath = ".",
|
|
722
|
-
outputDir = "",
|
|
723
|
-
noToolCallSeconds = 60,
|
|
724
|
-
repeatedFileReadsThreshold = 3,
|
|
725
|
-
budgetWarningThreshold = 0.9,
|
|
726
|
-
turnStallTurns = 5,
|
|
727
|
-
execute = false,
|
|
728
|
-
limit = 200,
|
|
729
|
-
env = process.env,
|
|
730
|
-
homeDir,
|
|
731
|
-
nowIso = new Date().toISOString(),
|
|
732
|
-
fetchImpl = globalThis.fetch,
|
|
733
|
-
} = {}) {
|
|
734
|
-
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
735
|
-
const normalizedNoToolCallSeconds = normalizePositiveInteger(noToolCallSeconds, 60);
|
|
736
|
-
const normalizedRepeatedFileReads = normalizePositiveInteger(repeatedFileReadsThreshold, 3);
|
|
737
|
-
const normalizedTurnStallTurns = normalizePositiveInteger(turnStallTurns, 5);
|
|
738
|
-
const normalizedBudgetWarningThreshold = Math.max(
|
|
739
|
-
0,
|
|
740
|
-
Math.min(1, normalizeNonNegativeNumber(budgetWarningThreshold, 0.9))
|
|
741
|
-
);
|
|
742
|
-
const normalizedLimit = normalizePositiveInteger(limit, 200);
|
|
743
|
-
const normalizedExecute = normalizeBoolean(execute, false);
|
|
744
|
-
const storage = await resolveWatchdogStorage({
|
|
745
|
-
targetPath,
|
|
746
|
-
outputDir,
|
|
747
|
-
env,
|
|
748
|
-
homeDir,
|
|
749
|
-
});
|
|
750
|
-
const config = await loadWatchdogConfig({
|
|
751
|
-
targetPath,
|
|
752
|
-
env,
|
|
753
|
-
});
|
|
754
|
-
|
|
755
|
-
const [assignments, queue, budgets, previousState] = await Promise.all([
|
|
756
|
-
listAssignments({
|
|
757
|
-
targetPath,
|
|
758
|
-
outputDir,
|
|
759
|
-
includeExpired: true,
|
|
760
|
-
limit: normalizedLimit,
|
|
761
|
-
env,
|
|
762
|
-
homeDir,
|
|
763
|
-
nowIso: normalizedNow,
|
|
764
|
-
}),
|
|
765
|
-
listErrorQueue({
|
|
766
|
-
targetPath,
|
|
767
|
-
outputDir,
|
|
768
|
-
limit: normalizedLimit,
|
|
769
|
-
env,
|
|
770
|
-
homeDir,
|
|
771
|
-
}),
|
|
772
|
-
listBudgetStates({
|
|
773
|
-
targetPath,
|
|
774
|
-
outputDir,
|
|
775
|
-
limit: normalizedLimit,
|
|
776
|
-
env,
|
|
777
|
-
homeDir,
|
|
778
|
-
nowIso: normalizedNow,
|
|
779
|
-
}),
|
|
780
|
-
readJsonFile(storage.watchdogStatePath, () => createInitialState(normalizedNow)).then((state) =>
|
|
781
|
-
normalizeState(state, normalizedNow)
|
|
782
|
-
),
|
|
783
|
-
]);
|
|
784
|
-
|
|
785
|
-
const queueByWorkItem = new Map(queue.items.map((item) => [item.workItemId, item]));
|
|
786
|
-
const budgetByWorkItem = new Map(budgets.records.map((record) => [record.workItemId, record]));
|
|
787
|
-
const activeAssignments = assignments.assignments.filter((assignment) =>
|
|
788
|
-
ACTIVE_ASSIGNMENT_STATUSES.has(normalizeString(assignment.status).toUpperCase())
|
|
789
|
-
);
|
|
790
|
-
|
|
791
|
-
const detections = [];
|
|
792
|
-
for (const assignment of activeAssignments) {
|
|
793
|
-
const queueItem = queueByWorkItem.get(assignment.workItemId) || null;
|
|
794
|
-
const budgetRecord = budgetByWorkItem.get(assignment.workItemId) || null;
|
|
795
|
-
detections.push(
|
|
796
|
-
...evaluateWatchdogSignals({
|
|
797
|
-
assignment,
|
|
798
|
-
queueItem,
|
|
799
|
-
budgetRecord,
|
|
800
|
-
nowIso: normalizedNow,
|
|
801
|
-
noToolCallSeconds: normalizedNoToolCallSeconds,
|
|
802
|
-
repeatedFileReadsThreshold: normalizedRepeatedFileReads,
|
|
803
|
-
budgetWarningThreshold: normalizedBudgetWarningThreshold,
|
|
804
|
-
turnStallTurns: normalizedTurnStallTurns,
|
|
805
|
-
})
|
|
806
|
-
);
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
const transitions = buildAlertTransitions({
|
|
810
|
-
detections,
|
|
811
|
-
previousState,
|
|
812
|
-
nowIso: normalizedNow,
|
|
813
|
-
});
|
|
814
|
-
const stateChangedAlerts = [...transitions.activated, ...transitions.recovered];
|
|
815
|
-
const notifications = await dispatchAlerts({
|
|
816
|
-
alerts: stateChangedAlerts,
|
|
817
|
-
config,
|
|
818
|
-
execute: normalizedExecute,
|
|
819
|
-
fetchImpl,
|
|
820
|
-
});
|
|
821
|
-
|
|
822
|
-
const nextState = normalizeState(
|
|
823
|
-
{
|
|
824
|
-
...previousState,
|
|
825
|
-
generatedAt: normalizedNow,
|
|
826
|
-
activeAlerts: transitions.activeAlerts,
|
|
827
|
-
runCount: previousState.runCount + 1,
|
|
828
|
-
lastRunId: buildRunId(normalizedNow, previousState.runCount + 1),
|
|
829
|
-
lastRunAt: normalizedNow,
|
|
830
|
-
},
|
|
831
|
-
normalizedNow
|
|
832
|
-
);
|
|
833
|
-
|
|
834
|
-
await fsp.mkdir(storage.watchdogRunsDir, { recursive: true });
|
|
835
|
-
const runId = nextState.lastRunId;
|
|
836
|
-
const runPath = path.join(storage.watchdogRunsDir, `${runId}.json`);
|
|
837
|
-
const runPayload = {
|
|
838
|
-
schemaVersion: WATCHDOG_SCHEMA_VERSION,
|
|
839
|
-
generatedAt: normalizedNow,
|
|
840
|
-
runId,
|
|
841
|
-
config: {
|
|
842
|
-
noToolCallSeconds: normalizedNoToolCallSeconds,
|
|
843
|
-
repeatedFileReadsThreshold: normalizedRepeatedFileReads,
|
|
844
|
-
budgetWarningThreshold: normalizedBudgetWarningThreshold,
|
|
845
|
-
turnStallTurns: normalizedTurnStallTurns,
|
|
846
|
-
execute: normalizedExecute,
|
|
847
|
-
channelCount: config.channels.length,
|
|
848
|
-
events: config.events,
|
|
849
|
-
frequency: config.frequency,
|
|
850
|
-
},
|
|
851
|
-
summary: {
|
|
852
|
-
assignmentCount: activeAssignments.length,
|
|
853
|
-
detectionCount: detections.length,
|
|
854
|
-
activeAlertCount: Object.keys(transitions.activeAlerts).length,
|
|
855
|
-
activatedCount: transitions.activated.length,
|
|
856
|
-
recoveredCount: transitions.recovered.length,
|
|
857
|
-
notificationCount: notifications.length,
|
|
858
|
-
sentNotificationCount: notifications.filter((item) => item.sent).length,
|
|
859
|
-
failedNotificationCount: notifications.filter((item) => !item.sent && !item.dryRun).length,
|
|
860
|
-
},
|
|
861
|
-
detections,
|
|
862
|
-
activatedAlerts: transitions.activated,
|
|
863
|
-
recoveredAlerts: transitions.recovered,
|
|
864
|
-
notifications,
|
|
865
|
-
};
|
|
866
|
-
|
|
867
|
-
await Promise.all([
|
|
868
|
-
writeJsonFile(runPath, runPayload),
|
|
869
|
-
writeJsonFile(storage.watchdogStatePath, nextState),
|
|
870
|
-
appendEvent(storage.watchdogEventsPath, {
|
|
871
|
-
timestamp: normalizedNow,
|
|
872
|
-
eventType: "watchdog_tick",
|
|
873
|
-
runId,
|
|
874
|
-
detectionCount: detections.length,
|
|
875
|
-
activatedCount: transitions.activated.length,
|
|
876
|
-
recoveredCount: transitions.recovered.length,
|
|
877
|
-
notificationCount: notifications.length,
|
|
878
|
-
sentNotificationCount: notifications.filter((item) => item.sent).length,
|
|
879
|
-
failedNotificationCount: notifications.filter((item) => !item.sent && !item.dryRun).length,
|
|
880
|
-
}),
|
|
881
|
-
]);
|
|
882
|
-
|
|
883
|
-
return {
|
|
884
|
-
...storage,
|
|
885
|
-
configPath: config.configPath,
|
|
886
|
-
configExists: config.exists,
|
|
887
|
-
runId,
|
|
888
|
-
runPath,
|
|
889
|
-
statePath: storage.watchdogStatePath,
|
|
890
|
-
eventsPath: storage.watchdogEventsPath,
|
|
891
|
-
state: nextState,
|
|
892
|
-
detections,
|
|
893
|
-
activatedAlerts: transitions.activated,
|
|
894
|
-
recoveredAlerts: transitions.recovered,
|
|
895
|
-
notifications,
|
|
896
|
-
summary: runPayload.summary,
|
|
897
|
-
};
|
|
898
|
-
}
|
|
899
|
-
|
|
900
|
-
export async function getWatchdogStatus({
|
|
901
|
-
targetPath = ".",
|
|
902
|
-
outputDir = "",
|
|
903
|
-
limit = 10,
|
|
904
|
-
env = process.env,
|
|
905
|
-
homeDir,
|
|
906
|
-
nowIso = new Date().toISOString(),
|
|
907
|
-
} = {}) {
|
|
908
|
-
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
909
|
-
const normalizedLimit = normalizePositiveInteger(limit, 10);
|
|
910
|
-
const storage = await resolveWatchdogStorage({
|
|
911
|
-
targetPath,
|
|
912
|
-
outputDir,
|
|
913
|
-
env,
|
|
914
|
-
homeDir,
|
|
915
|
-
});
|
|
916
|
-
const config = await loadWatchdogConfig({
|
|
917
|
-
targetPath,
|
|
918
|
-
env,
|
|
919
|
-
});
|
|
920
|
-
const state = await readJsonFile(storage.watchdogStatePath, () =>
|
|
921
|
-
createInitialState(normalizedNow)
|
|
922
|
-
).then((payload) => normalizeState(payload, normalizedNow));
|
|
923
|
-
|
|
924
|
-
let runEntries = [];
|
|
925
|
-
try {
|
|
926
|
-
runEntries = await fsp.readdir(storage.watchdogRunsDir, { withFileTypes: true });
|
|
927
|
-
} catch (error) {
|
|
928
|
-
if (!(error && typeof error === "object" && error.code === "ENOENT")) {
|
|
929
|
-
throw error;
|
|
930
|
-
}
|
|
931
|
-
}
|
|
932
|
-
|
|
933
|
-
const runFiles = runEntries
|
|
934
|
-
.filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
|
|
935
|
-
.map((entry) => entry.name)
|
|
936
|
-
.sort((left, right) => right.localeCompare(left))
|
|
937
|
-
.slice(0, normalizedLimit);
|
|
938
|
-
|
|
939
|
-
const recentRuns = [];
|
|
940
|
-
for (const runFile of runFiles) {
|
|
941
|
-
const runPath = path.join(storage.watchdogRunsDir, runFile);
|
|
942
|
-
try {
|
|
943
|
-
const parsed = JSON.parse(await fsp.readFile(runPath, "utf-8"));
|
|
944
|
-
recentRuns.push({
|
|
945
|
-
runId: normalizeString(parsed.runId),
|
|
946
|
-
generatedAt: normalizeIsoTimestamp(parsed.generatedAt, normalizedNow),
|
|
947
|
-
detectionCount: normalizeNonNegativeNumber(parsed.summary?.detectionCount, 0),
|
|
948
|
-
activatedCount: normalizeNonNegativeNumber(parsed.summary?.activatedCount, 0),
|
|
949
|
-
recoveredCount: normalizeNonNegativeNumber(parsed.summary?.recoveredCount, 0),
|
|
950
|
-
notificationCount: normalizeNonNegativeNumber(parsed.summary?.notificationCount, 0),
|
|
951
|
-
runPath,
|
|
952
|
-
});
|
|
953
|
-
} catch {
|
|
954
|
-
// Ignore malformed run artifacts.
|
|
955
|
-
}
|
|
956
|
-
}
|
|
957
|
-
|
|
958
|
-
return {
|
|
959
|
-
...storage,
|
|
960
|
-
configPath: config.configPath,
|
|
961
|
-
configExists: config.exists,
|
|
962
|
-
config,
|
|
963
|
-
statePath: storage.watchdogStatePath,
|
|
964
|
-
eventsPath: storage.watchdogEventsPath,
|
|
965
|
-
state,
|
|
966
|
-
activeAlerts: Object.values(state.activeAlerts),
|
|
967
|
-
activeAlertCount: Object.keys(state.activeAlerts).length,
|
|
968
|
-
runCount: state.runCount,
|
|
969
|
-
recentRuns,
|
|
970
|
-
};
|
|
971
|
-
}
|
|
1
|
+
import fsp from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
import { parse as parseYaml } from "yaml";
|
|
5
|
+
|
|
6
|
+
import { listAssignments, resolveAssignmentLedgerStorage } from "./assignment-ledger.js";
|
|
7
|
+
import { listBudgetStates } from "./budget-governor.js";
|
|
8
|
+
import { listErrorQueue, resolveErrorDaemonStorage } from "./error-worker.js";
|
|
9
|
+
|
|
10
|
+
const WATCHDOG_SCHEMA_VERSION = "1.0.0";
|
|
11
|
+
const STATE_SCHEMA_VERSION = "1.0.0";
|
|
12
|
+
|
|
13
|
+
const ACTIVE_ASSIGNMENT_STATUSES = new Set(["CLAIMED", "IN_PROGRESS", "BLOCKED"]);
|
|
14
|
+
|
|
15
|
+
export const WATCHDOG_EVENT_TYPES = Object.freeze([
|
|
16
|
+
"agent_stuck",
|
|
17
|
+
"budget_warning",
|
|
18
|
+
"alert_recovered",
|
|
19
|
+
"pr_merged",
|
|
20
|
+
"audit_complete",
|
|
21
|
+
"kill_switch_activated",
|
|
22
|
+
]);
|
|
23
|
+
|
|
24
|
+
export const WATCHDOG_SIGNAL_CODES = Object.freeze([
|
|
25
|
+
"NO_TOOL_CALL",
|
|
26
|
+
"REPEATED_FILE_READ",
|
|
27
|
+
"BUDGET_WARNING_NO_FINDINGS",
|
|
28
|
+
"TURN_STALL",
|
|
29
|
+
]);
|
|
30
|
+
|
|
31
|
+
const WATCHDOG_SIGNAL_SET = new Set(WATCHDOG_SIGNAL_CODES);
|
|
32
|
+
const WATCHDOG_EVENT_SET = new Set(WATCHDOG_EVENT_TYPES);
|
|
33
|
+
|
|
34
|
+
function normalizeString(value) {
|
|
35
|
+
return String(value || "").trim();
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function normalizeIsoTimestamp(value, fallbackIso = new Date().toISOString()) {
|
|
39
|
+
const normalized = normalizeString(value);
|
|
40
|
+
if (!normalized) {
|
|
41
|
+
return fallbackIso;
|
|
42
|
+
}
|
|
43
|
+
const epoch = Date.parse(normalized);
|
|
44
|
+
if (!Number.isFinite(epoch)) {
|
|
45
|
+
return fallbackIso;
|
|
46
|
+
}
|
|
47
|
+
return new Date(epoch).toISOString();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function normalizeNumber(value, fallback = 0) {
|
|
51
|
+
const parsed = Number(value);
|
|
52
|
+
if (!Number.isFinite(parsed)) {
|
|
53
|
+
return fallback;
|
|
54
|
+
}
|
|
55
|
+
return parsed;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function normalizePositiveInteger(value, fallbackValue) {
|
|
59
|
+
const normalized = normalizeNumber(value, fallbackValue);
|
|
60
|
+
if (!Number.isFinite(normalized) || normalized <= 0) {
|
|
61
|
+
return fallbackValue;
|
|
62
|
+
}
|
|
63
|
+
return Math.max(1, Math.floor(normalized));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function normalizeNonNegativeNumber(value, fallbackValue = 0) {
|
|
67
|
+
const normalized = normalizeNumber(value, fallbackValue);
|
|
68
|
+
if (!Number.isFinite(normalized) || normalized < 0) {
|
|
69
|
+
return fallbackValue;
|
|
70
|
+
}
|
|
71
|
+
return normalized;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function normalizeObject(value) {
|
|
75
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
76
|
+
return {};
|
|
77
|
+
}
|
|
78
|
+
return { ...value };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function normalizeBoolean(value, fallbackValue = false) {
|
|
82
|
+
if (typeof value === "boolean") {
|
|
83
|
+
return value;
|
|
84
|
+
}
|
|
85
|
+
const normalized = normalizeString(value).toLowerCase();
|
|
86
|
+
if (!normalized) {
|
|
87
|
+
return fallbackValue;
|
|
88
|
+
}
|
|
89
|
+
if (normalized === "true" || normalized === "1" || normalized === "yes") {
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
if (normalized === "false" || normalized === "0" || normalized === "no") {
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
return fallbackValue;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function resolveEnvTemplate(value, env) {
|
|
99
|
+
const normalized = normalizeString(value);
|
|
100
|
+
if (!normalized) {
|
|
101
|
+
return "";
|
|
102
|
+
}
|
|
103
|
+
return normalized.replace(/\$\{([A-Z0-9_]+)\}/g, (_, key) => normalizeString(env[key]));
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function computeSecondsSince(previousIso, nowIso) {
|
|
107
|
+
const previousEpoch = Date.parse(normalizeIsoTimestamp(previousIso, nowIso));
|
|
108
|
+
const nowEpoch = Date.parse(normalizeIsoTimestamp(nowIso, new Date().toISOString()));
|
|
109
|
+
if (!Number.isFinite(previousEpoch) || !Number.isFinite(nowEpoch)) {
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
return Math.max(0, Math.floor((nowEpoch - previousEpoch) / 1000));
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function pickLastToolCallAt(assignment = {}) {
|
|
116
|
+
const snapshot = normalizeObject(assignment.budgetSnapshot);
|
|
117
|
+
return (
|
|
118
|
+
normalizeString(snapshot.lastToolCallAt) ||
|
|
119
|
+
normalizeString(snapshot.lastActionAt) ||
|
|
120
|
+
normalizeString(assignment.heartbeatAt) ||
|
|
121
|
+
normalizeString(assignment.updatedAt) ||
|
|
122
|
+
""
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function extractRecentFileReads(snapshot = {}) {
|
|
127
|
+
const candidates = [];
|
|
128
|
+
const normalizedSnapshot = normalizeObject(snapshot);
|
|
129
|
+
for (const key of ["recentFileReads", "fileReadHistory", "fileReads"]) {
|
|
130
|
+
const value = normalizedSnapshot[key];
|
|
131
|
+
if (Array.isArray(value)) {
|
|
132
|
+
for (const item of value) {
|
|
133
|
+
if (typeof item === "string") {
|
|
134
|
+
const normalized = normalizeString(item);
|
|
135
|
+
if (normalized) {
|
|
136
|
+
candidates.push(normalized);
|
|
137
|
+
}
|
|
138
|
+
continue;
|
|
139
|
+
}
|
|
140
|
+
if (item && typeof item === "object") {
|
|
141
|
+
const normalized =
|
|
142
|
+
normalizeString(item.path) ||
|
|
143
|
+
normalizeString(item.file) ||
|
|
144
|
+
normalizeString(item.filePath);
|
|
145
|
+
if (normalized) {
|
|
146
|
+
candidates.push(normalized);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return candidates;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function computeRepeatedTailCount(values = []) {
|
|
156
|
+
if (!Array.isArray(values) || values.length === 0) {
|
|
157
|
+
return {
|
|
158
|
+
repeatedValue: "",
|
|
159
|
+
repeatCount: 0,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
const normalized = values.map((value) => normalizeString(value)).filter(Boolean);
|
|
163
|
+
if (normalized.length === 0) {
|
|
164
|
+
return {
|
|
165
|
+
repeatedValue: "",
|
|
166
|
+
repeatCount: 0,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
const tail = normalized[normalized.length - 1];
|
|
170
|
+
let repeatCount = 0;
|
|
171
|
+
for (let index = normalized.length - 1; index >= 0; index -= 1) {
|
|
172
|
+
if (normalized[index] !== tail) {
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
175
|
+
repeatCount += 1;
|
|
176
|
+
}
|
|
177
|
+
return {
|
|
178
|
+
repeatedValue: tail,
|
|
179
|
+
repeatCount,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function computeBudgetUsageRatio(record = {}) {
|
|
184
|
+
const usage = normalizeObject(record.usage);
|
|
185
|
+
const budget = normalizeObject(record.budget);
|
|
186
|
+
const ratios = [];
|
|
187
|
+
const pairs = [
|
|
188
|
+
["tokensUsed", "maxTokens"],
|
|
189
|
+
["costUsd", "maxCostUsd"],
|
|
190
|
+
["runtimeMs", "maxRuntimeMs"],
|
|
191
|
+
["toolCalls", "maxToolCalls"],
|
|
192
|
+
];
|
|
193
|
+
for (const [usageKey, budgetKey] of pairs) {
|
|
194
|
+
const used = normalizeNonNegativeNumber(usage[usageKey], 0);
|
|
195
|
+
const limit = normalizeNonNegativeNumber(budget[budgetKey], 0);
|
|
196
|
+
if (limit > 0) {
|
|
197
|
+
ratios.push(used / limit);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
if (ratios.length === 0) {
|
|
201
|
+
return 0;
|
|
202
|
+
}
|
|
203
|
+
return Math.max(...ratios);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
function normalizeSeverity(value) {
|
|
207
|
+
const normalized = normalizeString(value).toUpperCase();
|
|
208
|
+
if (normalized === "P0" || normalized === "P1" || normalized === "P2" || normalized === "P3") {
|
|
209
|
+
return normalized;
|
|
210
|
+
}
|
|
211
|
+
return "P3";
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function createInitialState(nowIso) {
|
|
215
|
+
return {
|
|
216
|
+
schemaVersion: STATE_SCHEMA_VERSION,
|
|
217
|
+
generatedAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
218
|
+
activeAlerts: {},
|
|
219
|
+
runCount: 0,
|
|
220
|
+
lastRunId: null,
|
|
221
|
+
lastRunAt: null,
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function normalizeState(state = {}, nowIso = new Date().toISOString()) {
|
|
226
|
+
const rawAlerts = state.activeAlerts && typeof state.activeAlerts === "object" ? state.activeAlerts : {};
|
|
227
|
+
const activeAlerts = {};
|
|
228
|
+
for (const [alertId, alert] of Object.entries(rawAlerts)) {
|
|
229
|
+
if (!normalizeString(alertId)) {
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
activeAlerts[alertId] = {
|
|
233
|
+
alertId,
|
|
234
|
+
eventType: WATCHDOG_EVENT_SET.has(normalizeString(alert.eventType))
|
|
235
|
+
? normalizeString(alert.eventType)
|
|
236
|
+
: "agent_stuck",
|
|
237
|
+
signalCode: WATCHDOG_SIGNAL_SET.has(normalizeString(alert.signalCode))
|
|
238
|
+
? normalizeString(alert.signalCode)
|
|
239
|
+
: "NO_TOOL_CALL",
|
|
240
|
+
workItemId: normalizeString(alert.workItemId),
|
|
241
|
+
agentIdentity: normalizeString(alert.agentIdentity),
|
|
242
|
+
firstSeenAt: normalizeIsoTimestamp(alert.firstSeenAt, nowIso),
|
|
243
|
+
lastSeenAt: normalizeIsoTimestamp(alert.lastSeenAt, nowIso),
|
|
244
|
+
message: normalizeString(alert.message),
|
|
245
|
+
severity: normalizeSeverity(alert.severity),
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
return {
|
|
249
|
+
schemaVersion: STATE_SCHEMA_VERSION,
|
|
250
|
+
generatedAt: normalizeIsoTimestamp(state.generatedAt, nowIso),
|
|
251
|
+
activeAlerts,
|
|
252
|
+
runCount: Math.max(0, Math.floor(normalizeNumber(state.runCount, 0))),
|
|
253
|
+
lastRunId: normalizeString(state.lastRunId) || null,
|
|
254
|
+
lastRunAt: state.lastRunAt ? normalizeIsoTimestamp(state.lastRunAt, nowIso) : null,
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
async function readJsonFile(filePath, defaultFactory) {
|
|
259
|
+
try {
|
|
260
|
+
const raw = await fsp.readFile(filePath, "utf-8");
|
|
261
|
+
return JSON.parse(raw);
|
|
262
|
+
} catch (error) {
|
|
263
|
+
if (error && typeof error === "object" && error.code === "ENOENT") {
|
|
264
|
+
return defaultFactory();
|
|
265
|
+
}
|
|
266
|
+
throw error;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
async function writeJsonFile(filePath, payload) {
|
|
271
|
+
await fsp.mkdir(path.dirname(filePath), { recursive: true });
|
|
272
|
+
await fsp.writeFile(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf-8");
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
async function appendEvent(filePath, payload) {
|
|
276
|
+
await fsp.mkdir(path.dirname(filePath), { recursive: true });
|
|
277
|
+
await fsp.appendFile(filePath, `${JSON.stringify(payload)}\n`, "utf-8");
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function buildRunId(nowIso, count) {
|
|
281
|
+
const token = normalizeIsoTimestamp(nowIso, new Date().toISOString()).replace(/[:.]/g, "-");
|
|
282
|
+
return `watchdog-${token}-${String(count).padStart(4, "0")}`;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function normalizeChannel(channel = {}, env = process.env) {
|
|
286
|
+
const type = normalizeString(channel.type).toLowerCase();
|
|
287
|
+
if (type === "slack") {
|
|
288
|
+
const webhookUrl = resolveEnvTemplate(
|
|
289
|
+
channel.webhook_url || channel.webhookUrl || channel.url || "",
|
|
290
|
+
env
|
|
291
|
+
);
|
|
292
|
+
return webhookUrl
|
|
293
|
+
? {
|
|
294
|
+
type: "slack",
|
|
295
|
+
webhookUrl,
|
|
296
|
+
}
|
|
297
|
+
: null;
|
|
298
|
+
}
|
|
299
|
+
if (type === "telegram") {
|
|
300
|
+
const botToken = resolveEnvTemplate(channel.bot_token || channel.botToken || "", env);
|
|
301
|
+
const chatId = resolveEnvTemplate(channel.chat_id || channel.chatId || "", env);
|
|
302
|
+
return botToken && chatId
|
|
303
|
+
? {
|
|
304
|
+
type: "telegram",
|
|
305
|
+
botToken,
|
|
306
|
+
chatId,
|
|
307
|
+
}
|
|
308
|
+
: null;
|
|
309
|
+
}
|
|
310
|
+
return null;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
async function loadWatchdogConfig({ targetPath = ".", env = process.env } = {}) {
|
|
314
|
+
const configPath = path.join(path.resolve(String(targetPath || ".")), ".sentinelayer.yml");
|
|
315
|
+
const fallback = {
|
|
316
|
+
channels: [],
|
|
317
|
+
frequency: "smart",
|
|
318
|
+
events: ["agent_stuck", "budget_warning", "alert_recovered"],
|
|
319
|
+
};
|
|
320
|
+
try {
|
|
321
|
+
const parsed = parseYaml(await fsp.readFile(configPath, "utf-8")) || {};
|
|
322
|
+
const alerts = parsed && typeof parsed === "object" ? normalizeObject(parsed.alerts) : {};
|
|
323
|
+
const channels = Array.isArray(alerts.channels)
|
|
324
|
+
? alerts.channels.map((channel) => normalizeChannel(channel, env)).filter(Boolean)
|
|
325
|
+
: [];
|
|
326
|
+
const events = Array.isArray(alerts.events)
|
|
327
|
+
? alerts.events
|
|
328
|
+
.map((eventType) => normalizeString(eventType))
|
|
329
|
+
.filter((eventType) => WATCHDOG_EVENT_SET.has(eventType))
|
|
330
|
+
: fallback.events;
|
|
331
|
+
const frequency = normalizeString(alerts.frequency).toLowerCase() || fallback.frequency;
|
|
332
|
+
return {
|
|
333
|
+
configPath,
|
|
334
|
+
exists: true,
|
|
335
|
+
channels,
|
|
336
|
+
frequency,
|
|
337
|
+
events: events.length > 0 ? events : fallback.events,
|
|
338
|
+
};
|
|
339
|
+
} catch (error) {
|
|
340
|
+
if (error && typeof error === "object" && error.code === "ENOENT") {
|
|
341
|
+
return {
|
|
342
|
+
configPath,
|
|
343
|
+
exists: false,
|
|
344
|
+
channels: [],
|
|
345
|
+
frequency: fallback.frequency,
|
|
346
|
+
events: fallback.events,
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
throw error;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
function buildDetection({
|
|
354
|
+
eventType,
|
|
355
|
+
signalCode,
|
|
356
|
+
workItemId,
|
|
357
|
+
agentIdentity,
|
|
358
|
+
severity,
|
|
359
|
+
message,
|
|
360
|
+
details = {},
|
|
361
|
+
}) {
|
|
362
|
+
return {
|
|
363
|
+
alertId: `${workItemId}:${signalCode}`,
|
|
364
|
+
eventType,
|
|
365
|
+
signalCode,
|
|
366
|
+
workItemId,
|
|
367
|
+
agentIdentity,
|
|
368
|
+
severity,
|
|
369
|
+
message,
|
|
370
|
+
details,
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
function evaluateWatchdogSignals({
|
|
375
|
+
assignment,
|
|
376
|
+
queueItem,
|
|
377
|
+
budgetRecord,
|
|
378
|
+
nowIso,
|
|
379
|
+
noToolCallSeconds,
|
|
380
|
+
repeatedFileReadsThreshold,
|
|
381
|
+
budgetWarningThreshold,
|
|
382
|
+
turnStallTurns,
|
|
383
|
+
}) {
|
|
384
|
+
const detections = [];
|
|
385
|
+
const workItemId = normalizeString(assignment.workItemId);
|
|
386
|
+
const agentIdentity = normalizeString(assignment.assignedAgentIdentity) || "unassigned";
|
|
387
|
+
const severity = normalizeSeverity(queueItem?.severity);
|
|
388
|
+
const budgetSnapshot = normalizeObject(assignment.budgetSnapshot);
|
|
389
|
+
|
|
390
|
+
const lastToolCallAt = pickLastToolCallAt(assignment);
|
|
391
|
+
const idleSeconds = computeSecondsSince(lastToolCallAt, nowIso);
|
|
392
|
+
if (idleSeconds !== null && idleSeconds >= noToolCallSeconds) {
|
|
393
|
+
detections.push(
|
|
394
|
+
buildDetection({
|
|
395
|
+
eventType: "agent_stuck",
|
|
396
|
+
signalCode: "NO_TOOL_CALL",
|
|
397
|
+
workItemId,
|
|
398
|
+
agentIdentity,
|
|
399
|
+
severity,
|
|
400
|
+
message: `No tool calls observed for ${idleSeconds}s (threshold ${noToolCallSeconds}s).`,
|
|
401
|
+
details: {
|
|
402
|
+
idleSeconds,
|
|
403
|
+
thresholdSeconds: noToolCallSeconds,
|
|
404
|
+
lastToolCallAt: normalizeIsoTimestamp(lastToolCallAt, nowIso),
|
|
405
|
+
},
|
|
406
|
+
})
|
|
407
|
+
);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
const recentFileReads = extractRecentFileReads(budgetSnapshot);
|
|
411
|
+
const repetition = computeRepeatedTailCount(recentFileReads);
|
|
412
|
+
if (repetition.repeatCount >= repeatedFileReadsThreshold) {
|
|
413
|
+
detections.push(
|
|
414
|
+
buildDetection({
|
|
415
|
+
eventType: "agent_stuck",
|
|
416
|
+
signalCode: "REPEATED_FILE_READ",
|
|
417
|
+
workItemId,
|
|
418
|
+
agentIdentity,
|
|
419
|
+
severity,
|
|
420
|
+
message: `Repeated file read detected (${repetition.repeatCount}x): ${repetition.repeatedValue}`,
|
|
421
|
+
details: {
|
|
422
|
+
filePath: repetition.repeatedValue,
|
|
423
|
+
repeatCount: repetition.repeatCount,
|
|
424
|
+
threshold: repeatedFileReadsThreshold,
|
|
425
|
+
},
|
|
426
|
+
})
|
|
427
|
+
);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const turnCount = Math.floor(normalizeNonNegativeNumber(budgetSnapshot.turnCount, 0));
|
|
431
|
+
const lastProgressTurn = Math.floor(
|
|
432
|
+
normalizeNonNegativeNumber(
|
|
433
|
+
budgetSnapshot.lastProgressTurn ?? budgetSnapshot.lastFindingTurn ?? turnCount,
|
|
434
|
+
turnCount
|
|
435
|
+
)
|
|
436
|
+
);
|
|
437
|
+
const stalledTurns = Math.max(0, turnCount - lastProgressTurn);
|
|
438
|
+
if (turnCount > 0 && stalledTurns >= turnStallTurns) {
|
|
439
|
+
detections.push(
|
|
440
|
+
buildDetection({
|
|
441
|
+
eventType: "agent_stuck",
|
|
442
|
+
signalCode: "TURN_STALL",
|
|
443
|
+
workItemId,
|
|
444
|
+
agentIdentity,
|
|
445
|
+
severity,
|
|
446
|
+
message: `Turn progression stalled for ${stalledTurns} turns (threshold ${turnStallTurns}).`,
|
|
447
|
+
details: {
|
|
448
|
+
turnCount,
|
|
449
|
+
lastProgressTurn,
|
|
450
|
+
stalledTurns,
|
|
451
|
+
threshold: turnStallTurns,
|
|
452
|
+
},
|
|
453
|
+
})
|
|
454
|
+
);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
const usageRatio = computeBudgetUsageRatio(budgetRecord || {});
|
|
458
|
+
const findingsProduced = Math.floor(
|
|
459
|
+
normalizeNonNegativeNumber(
|
|
460
|
+
budgetSnapshot.findingsProduced ??
|
|
461
|
+
queueItem?.metadata?.findingsProduced ??
|
|
462
|
+
queueItem?.metadata?.findingsCount ??
|
|
463
|
+
0,
|
|
464
|
+
0
|
|
465
|
+
)
|
|
466
|
+
);
|
|
467
|
+
if (usageRatio >= budgetWarningThreshold && findingsProduced <= 0) {
|
|
468
|
+
detections.push(
|
|
469
|
+
buildDetection({
|
|
470
|
+
eventType: "budget_warning",
|
|
471
|
+
signalCode: "BUDGET_WARNING_NO_FINDINGS",
|
|
472
|
+
workItemId,
|
|
473
|
+
agentIdentity,
|
|
474
|
+
severity,
|
|
475
|
+
message: `Budget usage ${(usageRatio * 100).toFixed(1)}% with no findings produced.`,
|
|
476
|
+
details: {
|
|
477
|
+
usageRatio: Number(usageRatio.toFixed(6)),
|
|
478
|
+
threshold: budgetWarningThreshold,
|
|
479
|
+
findingsProduced,
|
|
480
|
+
lifecycleState: normalizeString(budgetRecord?.lifecycleState) || "WITHIN_BUDGET",
|
|
481
|
+
},
|
|
482
|
+
})
|
|
483
|
+
);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
return detections;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
function toActiveAlertRecord(alert = {}, nowIso = new Date().toISOString()) {
|
|
490
|
+
return {
|
|
491
|
+
alertId: alert.alertId,
|
|
492
|
+
eventType: alert.eventType,
|
|
493
|
+
signalCode: alert.signalCode,
|
|
494
|
+
workItemId: alert.workItemId,
|
|
495
|
+
agentIdentity: alert.agentIdentity,
|
|
496
|
+
firstSeenAt: normalizeIsoTimestamp(alert.firstSeenAt || nowIso, nowIso),
|
|
497
|
+
lastSeenAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
498
|
+
message: normalizeString(alert.message),
|
|
499
|
+
severity: normalizeSeverity(alert.severity),
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
function buildAlertTransitions({
|
|
504
|
+
detections = [],
|
|
505
|
+
previousState = {},
|
|
506
|
+
nowIso = new Date().toISOString(),
|
|
507
|
+
}) {
|
|
508
|
+
const previousAlerts = normalizeObject(previousState.activeAlerts);
|
|
509
|
+
const activeAlerts = {};
|
|
510
|
+
const activated = [];
|
|
511
|
+
const stillActive = [];
|
|
512
|
+
const detectionById = new Map();
|
|
513
|
+
for (const detection of detections) {
|
|
514
|
+
detectionById.set(detection.alertId, detection);
|
|
515
|
+
const previous = previousAlerts[detection.alertId] || null;
|
|
516
|
+
const record = toActiveAlertRecord(
|
|
517
|
+
{
|
|
518
|
+
...detection,
|
|
519
|
+
firstSeenAt: previous?.firstSeenAt || nowIso,
|
|
520
|
+
},
|
|
521
|
+
nowIso
|
|
522
|
+
);
|
|
523
|
+
activeAlerts[detection.alertId] = record;
|
|
524
|
+
if (previous) {
|
|
525
|
+
stillActive.push({
|
|
526
|
+
...detection,
|
|
527
|
+
firstSeenAt: previous.firstSeenAt,
|
|
528
|
+
lastSeenAt: nowIso,
|
|
529
|
+
});
|
|
530
|
+
} else {
|
|
531
|
+
activated.push({
|
|
532
|
+
...detection,
|
|
533
|
+
firstSeenAt: nowIso,
|
|
534
|
+
lastSeenAt: nowIso,
|
|
535
|
+
});
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
const recovered = [];
|
|
540
|
+
for (const [alertId, previous] of Object.entries(previousAlerts)) {
|
|
541
|
+
if (detectionById.has(alertId)) {
|
|
542
|
+
continue;
|
|
543
|
+
}
|
|
544
|
+
recovered.push({
|
|
545
|
+
alertId,
|
|
546
|
+
eventType: "alert_recovered",
|
|
547
|
+
signalCode: normalizeString(previous.signalCode),
|
|
548
|
+
workItemId: normalizeString(previous.workItemId),
|
|
549
|
+
agentIdentity: normalizeString(previous.agentIdentity),
|
|
550
|
+
severity: normalizeSeverity(previous.severity),
|
|
551
|
+
message: `Recovered: ${normalizeString(previous.message) || "watchdog signal cleared"}`,
|
|
552
|
+
firstSeenAt: normalizeIsoTimestamp(previous.firstSeenAt, nowIso),
|
|
553
|
+
lastSeenAt: normalizeIsoTimestamp(previous.lastSeenAt, nowIso),
|
|
554
|
+
recoveredAt: normalizeIsoTimestamp(nowIso, nowIso),
|
|
555
|
+
});
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
return {
|
|
559
|
+
activeAlerts,
|
|
560
|
+
activated,
|
|
561
|
+
stillActive,
|
|
562
|
+
recovered,
|
|
563
|
+
};
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
function formatAlertMessage(alert = {}) {
|
|
567
|
+
const eventType = normalizeString(alert.eventType);
|
|
568
|
+
if (eventType === "agent_stuck") {
|
|
569
|
+
const idleSeconds = normalizeNumber(alert.details?.idleSeconds, 0);
|
|
570
|
+
const budgetRatio = normalizeNumber(alert.details?.usageRatio, 0);
|
|
571
|
+
const budgetPct = budgetRatio > 0 ? ` | budget=${(budgetRatio * 100).toFixed(1)}%` : "";
|
|
572
|
+
return `[SentinelLayer] Agent "${alert.agentIdentity}" stuck (${alert.signalCode}) on ${alert.workItemId}${idleSeconds > 0 ? ` | idle=${idleSeconds}s` : ""}${budgetPct}\n${alert.message}`;
|
|
573
|
+
}
|
|
574
|
+
if (eventType === "budget_warning") {
|
|
575
|
+
const budgetRatio = normalizeNumber(alert.details?.usageRatio, 0);
|
|
576
|
+
return `[SentinelLayer] Budget warning for ${alert.workItemId} (${alert.agentIdentity}) | usage=${(budgetRatio * 100).toFixed(1)}%\n${alert.message}`;
|
|
577
|
+
}
|
|
578
|
+
return `[SentinelLayer] ${alert.eventType} ${alert.workItemId || ""} ${alert.agentIdentity || ""}\n${alert.message}`;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
async function sendSlackAlert(channel, message, fetchImpl) {
|
|
582
|
+
const response = await fetchImpl(channel.webhookUrl, {
|
|
583
|
+
method: "POST",
|
|
584
|
+
headers: {
|
|
585
|
+
"content-type": "application/json",
|
|
586
|
+
},
|
|
587
|
+
body: JSON.stringify({
|
|
588
|
+
text: message,
|
|
589
|
+
}),
|
|
590
|
+
});
|
|
591
|
+
if (!response.ok) {
|
|
592
|
+
throw new Error(`Slack webhook returned ${response.status}.`);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
async function sendTelegramAlert(channel, message, fetchImpl) {
|
|
597
|
+
const endpoint = `https://api.telegram.org/bot${channel.botToken}/sendMessage`;
|
|
598
|
+
const response = await fetchImpl(endpoint, {
|
|
599
|
+
method: "POST",
|
|
600
|
+
headers: {
|
|
601
|
+
"content-type": "application/json",
|
|
602
|
+
},
|
|
603
|
+
body: JSON.stringify({
|
|
604
|
+
chat_id: channel.chatId,
|
|
605
|
+
text: message,
|
|
606
|
+
disable_web_page_preview: true,
|
|
607
|
+
}),
|
|
608
|
+
});
|
|
609
|
+
if (!response.ok) {
|
|
610
|
+
throw new Error(`Telegram sendMessage returned ${response.status}.`);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
async function dispatchAlertToChannel({
|
|
615
|
+
channel,
|
|
616
|
+
alert,
|
|
617
|
+
execute = false,
|
|
618
|
+
fetchImpl = globalThis.fetch,
|
|
619
|
+
}) {
|
|
620
|
+
const message = formatAlertMessage(alert);
|
|
621
|
+
if (!execute) {
|
|
622
|
+
return {
|
|
623
|
+
channelType: channel.type,
|
|
624
|
+
alertId: alert.alertId,
|
|
625
|
+
eventType: alert.eventType,
|
|
626
|
+
sent: false,
|
|
627
|
+
dryRun: true,
|
|
628
|
+
message,
|
|
629
|
+
error: "",
|
|
630
|
+
};
|
|
631
|
+
}
|
|
632
|
+
if (typeof fetchImpl !== "function") {
|
|
633
|
+
return {
|
|
634
|
+
channelType: channel.type,
|
|
635
|
+
alertId: alert.alertId,
|
|
636
|
+
eventType: alert.eventType,
|
|
637
|
+
sent: false,
|
|
638
|
+
dryRun: false,
|
|
639
|
+
message,
|
|
640
|
+
error: "Fetch implementation is unavailable.",
|
|
641
|
+
};
|
|
642
|
+
}
|
|
643
|
+
try {
|
|
644
|
+
if (channel.type === "slack") {
|
|
645
|
+
await sendSlackAlert(channel, message, fetchImpl);
|
|
646
|
+
} else if (channel.type === "telegram") {
|
|
647
|
+
await sendTelegramAlert(channel, message, fetchImpl);
|
|
648
|
+
} else {
|
|
649
|
+
throw new Error(`Unsupported alert channel type '${channel.type}'.`);
|
|
650
|
+
}
|
|
651
|
+
return {
|
|
652
|
+
channelType: channel.type,
|
|
653
|
+
alertId: alert.alertId,
|
|
654
|
+
eventType: alert.eventType,
|
|
655
|
+
sent: true,
|
|
656
|
+
dryRun: false,
|
|
657
|
+
message,
|
|
658
|
+
error: "",
|
|
659
|
+
};
|
|
660
|
+
} catch (error) {
|
|
661
|
+
return {
|
|
662
|
+
channelType: channel.type,
|
|
663
|
+
alertId: alert.alertId,
|
|
664
|
+
eventType: alert.eventType,
|
|
665
|
+
sent: false,
|
|
666
|
+
dryRun: false,
|
|
667
|
+
message,
|
|
668
|
+
error: normalizeString(error?.message || error),
|
|
669
|
+
};
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
async function dispatchAlerts({
|
|
674
|
+
alerts = [],
|
|
675
|
+
config = {},
|
|
676
|
+
execute = false,
|
|
677
|
+
fetchImpl = globalThis.fetch,
|
|
678
|
+
}) {
|
|
679
|
+
const channels = Array.isArray(config.channels) ? config.channels : [];
|
|
680
|
+
const allowedEvents = new Set(Array.isArray(config.events) ? config.events : []);
|
|
681
|
+
const tasks = [];
|
|
682
|
+
for (const alert of alerts) {
|
|
683
|
+
if (allowedEvents.size > 0 && !allowedEvents.has(alert.eventType)) {
|
|
684
|
+
continue;
|
|
685
|
+
}
|
|
686
|
+
for (const channel of channels) {
|
|
687
|
+
tasks.push(
|
|
688
|
+
dispatchAlertToChannel({
|
|
689
|
+
channel,
|
|
690
|
+
alert,
|
|
691
|
+
execute,
|
|
692
|
+
fetchImpl,
|
|
693
|
+
})
|
|
694
|
+
);
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
return Promise.all(tasks);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
export async function resolveWatchdogStorage({
|
|
701
|
+
targetPath = ".",
|
|
702
|
+
outputDir = "",
|
|
703
|
+
env,
|
|
704
|
+
homeDir,
|
|
705
|
+
} = {}) {
|
|
706
|
+
const daemonStorage = await resolveErrorDaemonStorage({
|
|
707
|
+
targetPath,
|
|
708
|
+
outputDir,
|
|
709
|
+
env,
|
|
710
|
+
homeDir,
|
|
711
|
+
});
|
|
712
|
+
return {
|
|
713
|
+
...daemonStorage,
|
|
714
|
+
watchdogStatePath: path.join(daemonStorage.baseDir, "watchdog-state.json"),
|
|
715
|
+
watchdogEventsPath: path.join(daemonStorage.baseDir, "watchdog-events.ndjson"),
|
|
716
|
+
watchdogRunsDir: path.join(daemonStorage.baseDir, "watchdog-runs"),
|
|
717
|
+
};
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
export async function runWatchdogTick({
|
|
721
|
+
targetPath = ".",
|
|
722
|
+
outputDir = "",
|
|
723
|
+
noToolCallSeconds = 60,
|
|
724
|
+
repeatedFileReadsThreshold = 3,
|
|
725
|
+
budgetWarningThreshold = 0.9,
|
|
726
|
+
turnStallTurns = 5,
|
|
727
|
+
execute = false,
|
|
728
|
+
limit = 200,
|
|
729
|
+
env = process.env,
|
|
730
|
+
homeDir,
|
|
731
|
+
nowIso = new Date().toISOString(),
|
|
732
|
+
fetchImpl = globalThis.fetch,
|
|
733
|
+
} = {}) {
|
|
734
|
+
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
735
|
+
const normalizedNoToolCallSeconds = normalizePositiveInteger(noToolCallSeconds, 60);
|
|
736
|
+
const normalizedRepeatedFileReads = normalizePositiveInteger(repeatedFileReadsThreshold, 3);
|
|
737
|
+
const normalizedTurnStallTurns = normalizePositiveInteger(turnStallTurns, 5);
|
|
738
|
+
const normalizedBudgetWarningThreshold = Math.max(
|
|
739
|
+
0,
|
|
740
|
+
Math.min(1, normalizeNonNegativeNumber(budgetWarningThreshold, 0.9))
|
|
741
|
+
);
|
|
742
|
+
const normalizedLimit = normalizePositiveInteger(limit, 200);
|
|
743
|
+
const normalizedExecute = normalizeBoolean(execute, false);
|
|
744
|
+
const storage = await resolveWatchdogStorage({
|
|
745
|
+
targetPath,
|
|
746
|
+
outputDir,
|
|
747
|
+
env,
|
|
748
|
+
homeDir,
|
|
749
|
+
});
|
|
750
|
+
const config = await loadWatchdogConfig({
|
|
751
|
+
targetPath,
|
|
752
|
+
env,
|
|
753
|
+
});
|
|
754
|
+
|
|
755
|
+
const [assignments, queue, budgets, previousState] = await Promise.all([
|
|
756
|
+
listAssignments({
|
|
757
|
+
targetPath,
|
|
758
|
+
outputDir,
|
|
759
|
+
includeExpired: true,
|
|
760
|
+
limit: normalizedLimit,
|
|
761
|
+
env,
|
|
762
|
+
homeDir,
|
|
763
|
+
nowIso: normalizedNow,
|
|
764
|
+
}),
|
|
765
|
+
listErrorQueue({
|
|
766
|
+
targetPath,
|
|
767
|
+
outputDir,
|
|
768
|
+
limit: normalizedLimit,
|
|
769
|
+
env,
|
|
770
|
+
homeDir,
|
|
771
|
+
}),
|
|
772
|
+
listBudgetStates({
|
|
773
|
+
targetPath,
|
|
774
|
+
outputDir,
|
|
775
|
+
limit: normalizedLimit,
|
|
776
|
+
env,
|
|
777
|
+
homeDir,
|
|
778
|
+
nowIso: normalizedNow,
|
|
779
|
+
}),
|
|
780
|
+
readJsonFile(storage.watchdogStatePath, () => createInitialState(normalizedNow)).then((state) =>
|
|
781
|
+
normalizeState(state, normalizedNow)
|
|
782
|
+
),
|
|
783
|
+
]);
|
|
784
|
+
|
|
785
|
+
const queueByWorkItem = new Map(queue.items.map((item) => [item.workItemId, item]));
|
|
786
|
+
const budgetByWorkItem = new Map(budgets.records.map((record) => [record.workItemId, record]));
|
|
787
|
+
const activeAssignments = assignments.assignments.filter((assignment) =>
|
|
788
|
+
ACTIVE_ASSIGNMENT_STATUSES.has(normalizeString(assignment.status).toUpperCase())
|
|
789
|
+
);
|
|
790
|
+
|
|
791
|
+
const detections = [];
|
|
792
|
+
for (const assignment of activeAssignments) {
|
|
793
|
+
const queueItem = queueByWorkItem.get(assignment.workItemId) || null;
|
|
794
|
+
const budgetRecord = budgetByWorkItem.get(assignment.workItemId) || null;
|
|
795
|
+
detections.push(
|
|
796
|
+
...evaluateWatchdogSignals({
|
|
797
|
+
assignment,
|
|
798
|
+
queueItem,
|
|
799
|
+
budgetRecord,
|
|
800
|
+
nowIso: normalizedNow,
|
|
801
|
+
noToolCallSeconds: normalizedNoToolCallSeconds,
|
|
802
|
+
repeatedFileReadsThreshold: normalizedRepeatedFileReads,
|
|
803
|
+
budgetWarningThreshold: normalizedBudgetWarningThreshold,
|
|
804
|
+
turnStallTurns: normalizedTurnStallTurns,
|
|
805
|
+
})
|
|
806
|
+
);
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
const transitions = buildAlertTransitions({
|
|
810
|
+
detections,
|
|
811
|
+
previousState,
|
|
812
|
+
nowIso: normalizedNow,
|
|
813
|
+
});
|
|
814
|
+
const stateChangedAlerts = [...transitions.activated, ...transitions.recovered];
|
|
815
|
+
const notifications = await dispatchAlerts({
|
|
816
|
+
alerts: stateChangedAlerts,
|
|
817
|
+
config,
|
|
818
|
+
execute: normalizedExecute,
|
|
819
|
+
fetchImpl,
|
|
820
|
+
});
|
|
821
|
+
|
|
822
|
+
const nextState = normalizeState(
|
|
823
|
+
{
|
|
824
|
+
...previousState,
|
|
825
|
+
generatedAt: normalizedNow,
|
|
826
|
+
activeAlerts: transitions.activeAlerts,
|
|
827
|
+
runCount: previousState.runCount + 1,
|
|
828
|
+
lastRunId: buildRunId(normalizedNow, previousState.runCount + 1),
|
|
829
|
+
lastRunAt: normalizedNow,
|
|
830
|
+
},
|
|
831
|
+
normalizedNow
|
|
832
|
+
);
|
|
833
|
+
|
|
834
|
+
await fsp.mkdir(storage.watchdogRunsDir, { recursive: true });
|
|
835
|
+
const runId = nextState.lastRunId;
|
|
836
|
+
const runPath = path.join(storage.watchdogRunsDir, `${runId}.json`);
|
|
837
|
+
const runPayload = {
|
|
838
|
+
schemaVersion: WATCHDOG_SCHEMA_VERSION,
|
|
839
|
+
generatedAt: normalizedNow,
|
|
840
|
+
runId,
|
|
841
|
+
config: {
|
|
842
|
+
noToolCallSeconds: normalizedNoToolCallSeconds,
|
|
843
|
+
repeatedFileReadsThreshold: normalizedRepeatedFileReads,
|
|
844
|
+
budgetWarningThreshold: normalizedBudgetWarningThreshold,
|
|
845
|
+
turnStallTurns: normalizedTurnStallTurns,
|
|
846
|
+
execute: normalizedExecute,
|
|
847
|
+
channelCount: config.channels.length,
|
|
848
|
+
events: config.events,
|
|
849
|
+
frequency: config.frequency,
|
|
850
|
+
},
|
|
851
|
+
summary: {
|
|
852
|
+
assignmentCount: activeAssignments.length,
|
|
853
|
+
detectionCount: detections.length,
|
|
854
|
+
activeAlertCount: Object.keys(transitions.activeAlerts).length,
|
|
855
|
+
activatedCount: transitions.activated.length,
|
|
856
|
+
recoveredCount: transitions.recovered.length,
|
|
857
|
+
notificationCount: notifications.length,
|
|
858
|
+
sentNotificationCount: notifications.filter((item) => item.sent).length,
|
|
859
|
+
failedNotificationCount: notifications.filter((item) => !item.sent && !item.dryRun).length,
|
|
860
|
+
},
|
|
861
|
+
detections,
|
|
862
|
+
activatedAlerts: transitions.activated,
|
|
863
|
+
recoveredAlerts: transitions.recovered,
|
|
864
|
+
notifications,
|
|
865
|
+
};
|
|
866
|
+
|
|
867
|
+
await Promise.all([
|
|
868
|
+
writeJsonFile(runPath, runPayload),
|
|
869
|
+
writeJsonFile(storage.watchdogStatePath, nextState),
|
|
870
|
+
appendEvent(storage.watchdogEventsPath, {
|
|
871
|
+
timestamp: normalizedNow,
|
|
872
|
+
eventType: "watchdog_tick",
|
|
873
|
+
runId,
|
|
874
|
+
detectionCount: detections.length,
|
|
875
|
+
activatedCount: transitions.activated.length,
|
|
876
|
+
recoveredCount: transitions.recovered.length,
|
|
877
|
+
notificationCount: notifications.length,
|
|
878
|
+
sentNotificationCount: notifications.filter((item) => item.sent).length,
|
|
879
|
+
failedNotificationCount: notifications.filter((item) => !item.sent && !item.dryRun).length,
|
|
880
|
+
}),
|
|
881
|
+
]);
|
|
882
|
+
|
|
883
|
+
return {
|
|
884
|
+
...storage,
|
|
885
|
+
configPath: config.configPath,
|
|
886
|
+
configExists: config.exists,
|
|
887
|
+
runId,
|
|
888
|
+
runPath,
|
|
889
|
+
statePath: storage.watchdogStatePath,
|
|
890
|
+
eventsPath: storage.watchdogEventsPath,
|
|
891
|
+
state: nextState,
|
|
892
|
+
detections,
|
|
893
|
+
activatedAlerts: transitions.activated,
|
|
894
|
+
recoveredAlerts: transitions.recovered,
|
|
895
|
+
notifications,
|
|
896
|
+
summary: runPayload.summary,
|
|
897
|
+
};
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
export async function getWatchdogStatus({
|
|
901
|
+
targetPath = ".",
|
|
902
|
+
outputDir = "",
|
|
903
|
+
limit = 10,
|
|
904
|
+
env = process.env,
|
|
905
|
+
homeDir,
|
|
906
|
+
nowIso = new Date().toISOString(),
|
|
907
|
+
} = {}) {
|
|
908
|
+
const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
|
|
909
|
+
const normalizedLimit = normalizePositiveInteger(limit, 10);
|
|
910
|
+
const storage = await resolveWatchdogStorage({
|
|
911
|
+
targetPath,
|
|
912
|
+
outputDir,
|
|
913
|
+
env,
|
|
914
|
+
homeDir,
|
|
915
|
+
});
|
|
916
|
+
const config = await loadWatchdogConfig({
|
|
917
|
+
targetPath,
|
|
918
|
+
env,
|
|
919
|
+
});
|
|
920
|
+
const state = await readJsonFile(storage.watchdogStatePath, () =>
|
|
921
|
+
createInitialState(normalizedNow)
|
|
922
|
+
).then((payload) => normalizeState(payload, normalizedNow));
|
|
923
|
+
|
|
924
|
+
let runEntries = [];
|
|
925
|
+
try {
|
|
926
|
+
runEntries = await fsp.readdir(storage.watchdogRunsDir, { withFileTypes: true });
|
|
927
|
+
} catch (error) {
|
|
928
|
+
if (!(error && typeof error === "object" && error.code === "ENOENT")) {
|
|
929
|
+
throw error;
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
const runFiles = runEntries
|
|
934
|
+
.filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
|
|
935
|
+
.map((entry) => entry.name)
|
|
936
|
+
.sort((left, right) => right.localeCompare(left))
|
|
937
|
+
.slice(0, normalizedLimit);
|
|
938
|
+
|
|
939
|
+
const recentRuns = [];
|
|
940
|
+
for (const runFile of runFiles) {
|
|
941
|
+
const runPath = path.join(storage.watchdogRunsDir, runFile);
|
|
942
|
+
try {
|
|
943
|
+
const parsed = JSON.parse(await fsp.readFile(runPath, "utf-8"));
|
|
944
|
+
recentRuns.push({
|
|
945
|
+
runId: normalizeString(parsed.runId),
|
|
946
|
+
generatedAt: normalizeIsoTimestamp(parsed.generatedAt, normalizedNow),
|
|
947
|
+
detectionCount: normalizeNonNegativeNumber(parsed.summary?.detectionCount, 0),
|
|
948
|
+
activatedCount: normalizeNonNegativeNumber(parsed.summary?.activatedCount, 0),
|
|
949
|
+
recoveredCount: normalizeNonNegativeNumber(parsed.summary?.recoveredCount, 0),
|
|
950
|
+
notificationCount: normalizeNonNegativeNumber(parsed.summary?.notificationCount, 0),
|
|
951
|
+
runPath,
|
|
952
|
+
});
|
|
953
|
+
} catch {
|
|
954
|
+
// Ignore malformed run artifacts.
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
return {
|
|
959
|
+
...storage,
|
|
960
|
+
configPath: config.configPath,
|
|
961
|
+
configExists: config.exists,
|
|
962
|
+
config,
|
|
963
|
+
statePath: storage.watchdogStatePath,
|
|
964
|
+
eventsPath: storage.watchdogEventsPath,
|
|
965
|
+
state,
|
|
966
|
+
activeAlerts: Object.values(state.activeAlerts),
|
|
967
|
+
activeAlertCount: Object.keys(state.activeAlerts).length,
|
|
968
|
+
runCount: state.runCount,
|
|
969
|
+
recentRuns,
|
|
970
|
+
};
|
|
971
|
+
}
|