sentinelayer-cli 0.6.2 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (280) hide show
  1. package/README.md +1009 -996
  2. package/bin/create-sentinelayer.js +5 -5
  3. package/bin/sentinelayer-cli.js +4 -4
  4. package/bin/sl.js +5 -5
  5. package/package.json +64 -63
  6. package/src/agents/ai-governance/index.js +12 -0
  7. package/src/agents/ai-governance/tools/base.js +171 -0
  8. package/src/agents/ai-governance/tools/eval-regression.js +47 -0
  9. package/src/agents/ai-governance/tools/hitl-audit.js +81 -0
  10. package/src/agents/ai-governance/tools/index.js +52 -0
  11. package/src/agents/ai-governance/tools/prompt-drift.js +42 -0
  12. package/src/agents/ai-governance/tools/provenance-check.js +69 -0
  13. package/src/agents/backend/index.js +12 -0
  14. package/src/agents/backend/tools/base.js +189 -0
  15. package/src/agents/backend/tools/circuit-breaker-check.js +123 -0
  16. package/src/agents/backend/tools/idempotency-audit.js +105 -0
  17. package/src/agents/backend/tools/index.js +87 -0
  18. package/src/agents/backend/tools/retry-audit.js +132 -0
  19. package/src/agents/backend/tools/timeout-audit.js +144 -0
  20. package/src/agents/code-quality/index.js +12 -0
  21. package/src/agents/code-quality/tools/base.js +159 -0
  22. package/src/agents/code-quality/tools/complexity-measure.js +197 -0
  23. package/src/agents/code-quality/tools/coupling-analysis.js +81 -0
  24. package/src/agents/code-quality/tools/cycle-detect.js +49 -0
  25. package/src/agents/code-quality/tools/dep-graph.js +196 -0
  26. package/src/agents/code-quality/tools/index.js +89 -0
  27. package/src/agents/data-layer/index.js +12 -0
  28. package/src/agents/data-layer/tools/base.js +181 -0
  29. package/src/agents/data-layer/tools/index-audit.js +165 -0
  30. package/src/agents/data-layer/tools/index.js +83 -0
  31. package/src/agents/data-layer/tools/migration-scan.js +135 -0
  32. package/src/agents/data-layer/tools/query-explain.js +120 -0
  33. package/src/agents/data-layer/tools/tenancy-scan.js +166 -0
  34. package/src/agents/documentation/index.js +12 -0
  35. package/src/agents/documentation/tools/api-diff.js +91 -0
  36. package/src/agents/documentation/tools/base.js +151 -0
  37. package/src/agents/documentation/tools/dead-link-check.js +58 -0
  38. package/src/agents/documentation/tools/docstring-coverage.js +78 -0
  39. package/src/agents/documentation/tools/index.js +52 -0
  40. package/src/agents/documentation/tools/readme-freshness.js +61 -0
  41. package/src/agents/envelope/fix-cycle.js +45 -0
  42. package/src/agents/envelope/index.js +31 -0
  43. package/src/agents/envelope/loop.js +150 -0
  44. package/src/agents/envelope/pulse.js +18 -0
  45. package/src/agents/envelope/stream.js +40 -0
  46. package/src/agents/infrastructure/index.js +12 -0
  47. package/src/agents/infrastructure/tools/base.js +171 -0
  48. package/src/agents/infrastructure/tools/checkov-run.js +32 -0
  49. package/src/agents/infrastructure/tools/drift-detect.js +59 -0
  50. package/src/agents/infrastructure/tools/iam-least-priv-check.js +78 -0
  51. package/src/agents/infrastructure/tools/index.js +52 -0
  52. package/src/agents/infrastructure/tools/tflint-run.js +31 -0
  53. package/src/agents/jules/config/definition.js +160 -160
  54. package/src/agents/jules/config/system-prompt.js +182 -182
  55. package/src/agents/jules/error-intake.js +51 -51
  56. package/src/agents/jules/fix-cycle.js +17 -17
  57. package/src/agents/jules/loop.js +460 -450
  58. package/src/agents/jules/pulse.js +10 -10
  59. package/src/agents/jules/stream.js +187 -186
  60. package/src/agents/jules/swarm/file-scanner.js +74 -74
  61. package/src/agents/jules/swarm/index.js +11 -11
  62. package/src/agents/jules/swarm/orchestrator.js +362 -362
  63. package/src/agents/jules/swarm/pattern-hunter.js +123 -123
  64. package/src/agents/jules/swarm/sub-agent.js +315 -309
  65. package/src/agents/jules/tools/aidenid-email.js +189 -189
  66. package/src/agents/jules/tools/auth-audit.js +1708 -1691
  67. package/src/agents/jules/tools/dispatch.js +340 -335
  68. package/src/agents/jules/tools/file-edit.js +2 -2
  69. package/src/agents/jules/tools/file-read.js +2 -2
  70. package/src/agents/jules/tools/frontend-analyze.js +570 -570
  71. package/src/agents/jules/tools/glob.js +2 -2
  72. package/src/agents/jules/tools/grep.js +2 -2
  73. package/src/agents/jules/tools/index.js +29 -29
  74. package/src/agents/jules/tools/path-guards.js +2 -2
  75. package/src/agents/jules/tools/runtime-audit.js +507 -507
  76. package/src/agents/jules/tools/shell.js +2 -2
  77. package/src/agents/jules/tools/url-policy.js +100 -100
  78. package/src/agents/mode.js +113 -0
  79. package/src/agents/observability/index.js +12 -0
  80. package/src/agents/observability/tools/alert-audit.js +39 -0
  81. package/src/agents/observability/tools/base.js +181 -0
  82. package/src/agents/observability/tools/dashboard-gap.js +42 -0
  83. package/src/agents/observability/tools/index.js +54 -0
  84. package/src/agents/observability/tools/log-schema-check.js +74 -0
  85. package/src/agents/observability/tools/span-coverage.js +74 -0
  86. package/src/agents/persona-visuals.js +102 -61
  87. package/src/agents/release/index.js +12 -0
  88. package/src/agents/release/tools/base.js +181 -0
  89. package/src/agents/release/tools/changelog-diff.js +86 -0
  90. package/src/agents/release/tools/feature-flag-audit.js +126 -0
  91. package/src/agents/release/tools/index.js +61 -0
  92. package/src/agents/release/tools/rollback-verify.js +129 -0
  93. package/src/agents/release/tools/semver-check.js +109 -0
  94. package/src/agents/reliability/index.js +12 -0
  95. package/src/agents/reliability/tools/backpressure-check.js +129 -0
  96. package/src/agents/reliability/tools/base.js +181 -0
  97. package/src/agents/reliability/tools/chaos-probe.js +109 -0
  98. package/src/agents/reliability/tools/graceful-degradation-check.js +114 -0
  99. package/src/agents/reliability/tools/health-check-audit.js +111 -0
  100. package/src/agents/reliability/tools/index.js +87 -0
  101. package/src/agents/run-persona.js +109 -0
  102. package/src/agents/security/index.js +12 -0
  103. package/src/agents/security/tools/authz-audit.js +134 -0
  104. package/src/agents/security/tools/base.js +190 -0
  105. package/src/agents/security/tools/crypto-review.js +175 -0
  106. package/src/agents/security/tools/index.js +97 -0
  107. package/src/agents/security/tools/sast-scan.js +175 -0
  108. package/src/agents/security/tools/secrets-scan.js +216 -0
  109. package/src/agents/shared-tools/dispatch-core.js +320 -315
  110. package/src/agents/shared-tools/file-edit.js +180 -180
  111. package/src/agents/shared-tools/file-read.js +100 -100
  112. package/src/agents/shared-tools/glob.js +168 -168
  113. package/src/agents/shared-tools/grep.js +228 -228
  114. package/src/agents/shared-tools/index.js +46 -46
  115. package/src/agents/shared-tools/path-guards.js +161 -161
  116. package/src/agents/shared-tools/shell.js +383 -383
  117. package/src/agents/supply-chain/index.js +12 -0
  118. package/src/agents/supply-chain/tools/attestation-check.js +42 -0
  119. package/src/agents/supply-chain/tools/base.js +151 -0
  120. package/src/agents/supply-chain/tools/index.js +52 -0
  121. package/src/agents/supply-chain/tools/lockfile-integrity.js +73 -0
  122. package/src/agents/supply-chain/tools/package-verify.js +56 -0
  123. package/src/agents/supply-chain/tools/sbom-diff.js +34 -0
  124. package/src/agents/testing/index.js +12 -0
  125. package/src/agents/testing/tools/base.js +202 -0
  126. package/src/agents/testing/tools/coverage-gap.js +144 -0
  127. package/src/agents/testing/tools/flake-detect.js +125 -0
  128. package/src/agents/testing/tools/index.js +85 -0
  129. package/src/agents/testing/tools/mutation-test.js +143 -0
  130. package/src/agents/testing/tools/snapshot-diff.js +103 -0
  131. package/src/ai/aidenid.js +1021 -1009
  132. package/src/ai/client.js +553 -553
  133. package/src/ai/domain-target-store.js +268 -268
  134. package/src/ai/identity-store.js +270 -270
  135. package/src/ai/proxy.js +137 -137
  136. package/src/ai/site-store.js +145 -145
  137. package/src/audit/agents/architecture.js +180 -180
  138. package/src/audit/agents/compliance.js +179 -179
  139. package/src/audit/agents/documentation.js +165 -165
  140. package/src/audit/agents/performance.js +145 -145
  141. package/src/audit/agents/security.js +215 -215
  142. package/src/audit/agents/testing.js +172 -172
  143. package/src/audit/orchestrator.js +557 -557
  144. package/src/audit/package.js +204 -204
  145. package/src/audit/registry.js +284 -284
  146. package/src/audit/replay.js +103 -103
  147. package/src/auth/gate.js +428 -371
  148. package/src/auth/http.js +681 -611
  149. package/src/auth/service.js +1106 -1106
  150. package/src/auth/session-store.js +813 -813
  151. package/src/cli.js +257 -252
  152. package/src/commands/ai/identity-lifecycle.js +1338 -1338
  153. package/src/commands/ai/provision-governance.js +1272 -1272
  154. package/src/commands/ai/shared.js +147 -147
  155. package/src/commands/ai.js +11 -11
  156. package/src/commands/apply.js +12 -12
  157. package/src/commands/audit.js +1171 -1166
  158. package/src/commands/auth.js +419 -419
  159. package/src/commands/chat.js +184 -191
  160. package/src/commands/config.js +184 -184
  161. package/src/commands/cost.js +311 -311
  162. package/src/commands/daemon/core.js +850 -850
  163. package/src/commands/daemon/extended.js +1048 -1048
  164. package/src/commands/daemon/shared.js +213 -213
  165. package/src/commands/daemon.js +11 -11
  166. package/src/commands/guide.js +174 -174
  167. package/src/commands/ingest.js +58 -58
  168. package/src/commands/init.js +55 -55
  169. package/src/commands/legacy-args.js +20 -10
  170. package/src/commands/mcp.js +461 -461
  171. package/src/commands/omargate.js +63 -29
  172. package/src/commands/persona.js +65 -20
  173. package/src/commands/plugin.js +260 -260
  174. package/src/commands/policy.js +132 -132
  175. package/src/commands/prompt.js +238 -238
  176. package/src/commands/review.js +704 -704
  177. package/src/commands/scan.js +865 -872
  178. package/src/commands/session.js +1238 -0
  179. package/src/commands/spec.js +771 -716
  180. package/src/commands/swarm.js +651 -651
  181. package/src/commands/telemetry.js +202 -202
  182. package/src/commands/watch.js +511 -511
  183. package/src/config/agent-dictionary.js +182 -182
  184. package/src/config/io.js +56 -56
  185. package/src/config/paths.js +18 -18
  186. package/src/config/schema.js +55 -55
  187. package/src/config/service.js +184 -184
  188. package/src/coord/events-log.js +141 -0
  189. package/src/coord/handshake.js +719 -0
  190. package/src/coord/index.js +35 -0
  191. package/src/coord/paths.js +84 -0
  192. package/src/coord/priority.js +62 -0
  193. package/src/coord/tarjan.js +157 -0
  194. package/src/cost/budget.js +235 -235
  195. package/src/cost/history.js +188 -188
  196. package/src/cost/tokenizer.js +160 -0
  197. package/src/cost/tracker.js +232 -171
  198. package/src/daemon/artifact-lineage.js +896 -534
  199. package/src/daemon/assignment-ledger.js +1083 -770
  200. package/src/daemon/ast-drift.js +496 -0
  201. package/src/daemon/ast-parser-layer.js +258 -258
  202. package/src/daemon/budget-governor.js +633 -633
  203. package/src/daemon/callgraph-overlay.js +646 -646
  204. package/src/daemon/error-worker.js +1209 -626
  205. package/src/daemon/fix-cycle.js +384 -377
  206. package/src/daemon/hybrid-mapper.js +929 -929
  207. package/src/daemon/ingest-refresh.js +79 -11
  208. package/src/daemon/jira-lifecycle.js +767 -632
  209. package/src/daemon/operator-control.js +657 -657
  210. package/src/daemon/pulse.js +327 -327
  211. package/src/daemon/reliability-lane.js +471 -471
  212. package/src/daemon/scope-engine.js +1068 -0
  213. package/src/daemon/watchdog.js +971 -971
  214. package/src/events/schema.js +190 -0
  215. package/src/guide/generator.js +316 -316
  216. package/src/ingest/engine.js +933 -918
  217. package/src/ingest/ownership.js +380 -0
  218. package/src/interactive/index.js +97 -97
  219. package/src/legacy-cli.js +3228 -2994
  220. package/src/mcp/registry.js +695 -695
  221. package/src/memory/blackboard.js +301 -301
  222. package/src/memory/retrieval.js +581 -581
  223. package/src/orchestrator/kai-chen.js +126 -0
  224. package/src/plugin/manifest.js +553 -553
  225. package/src/policy/packs.js +144 -144
  226. package/src/prompt/generator.js +136 -118
  227. package/src/review/ai-review.js +672 -679
  228. package/src/review/compliance-pack.js +389 -0
  229. package/src/review/investor-dd-config.js +54 -0
  230. package/src/review/investor-dd-file-loop.js +303 -0
  231. package/src/review/investor-dd-file-router.js +406 -0
  232. package/src/review/investor-dd-html-report.js +233 -0
  233. package/src/review/investor-dd-notification.js +120 -0
  234. package/src/review/investor-dd-orchestrator.js +405 -0
  235. package/src/review/investor-dd-persona-runner.js +275 -0
  236. package/src/review/live-validator.js +253 -0
  237. package/src/review/local-review.js +1351 -1305
  238. package/src/review/omargate-interactive.js +68 -68
  239. package/src/review/omargate-orchestrator.js +492 -300
  240. package/src/review/persona-prompts.js +484 -296
  241. package/src/review/reconciliation-rules.js +329 -0
  242. package/src/review/replay.js +235 -235
  243. package/src/review/report.js +664 -664
  244. package/src/review/reproducibility-chain.js +136 -0
  245. package/src/review/scan-modes.js +147 -42
  246. package/src/review/spec-binding.js +487 -487
  247. package/src/scaffold/generator.js +67 -67
  248. package/src/scaffold/templates.js +150 -150
  249. package/src/scan/generator.js +418 -418
  250. package/src/scan/gh-secrets.js +107 -107
  251. package/src/session/agent-registry.js +359 -0
  252. package/src/session/analytics.js +479 -0
  253. package/src/session/daemon.js +1396 -0
  254. package/src/session/file-locks.js +666 -0
  255. package/src/session/paths.js +37 -0
  256. package/src/session/recap.js +567 -0
  257. package/src/session/redact.js +82 -0
  258. package/src/session/runtime-bridge.js +762 -0
  259. package/src/session/scoring.js +406 -0
  260. package/src/session/setup-guides.js +304 -0
  261. package/src/session/store.js +704 -0
  262. package/src/session/stream.js +333 -0
  263. package/src/session/sync.js +753 -0
  264. package/src/session/tasks.js +1054 -0
  265. package/src/session/templates.js +188 -0
  266. package/src/spec/generator.js +619 -519
  267. package/src/spec/regenerate.js +237 -237
  268. package/src/spec/templates.js +91 -91
  269. package/src/swarm/dashboard.js +247 -247
  270. package/src/swarm/factory.js +363 -363
  271. package/src/swarm/pentest.js +934 -934
  272. package/src/swarm/registry.js +419 -419
  273. package/src/swarm/report.js +158 -158
  274. package/src/swarm/runtime.js +569 -576
  275. package/src/swarm/scenario-dsl.js +272 -272
  276. package/src/telemetry/ledger.js +302 -302
  277. package/src/telemetry/session-tracker.js +234 -234
  278. package/src/telemetry/sync.js +203 -203
  279. package/src/ui/command-hints.js +13 -13
  280. package/src/ui/markdown.js +220 -220
@@ -1,971 +1,971 @@
1
- import fsp from "node:fs/promises";
2
- import path from "node:path";
3
-
4
- import { parse as parseYaml } from "yaml";
5
-
6
- import { listAssignments, resolveAssignmentLedgerStorage } from "./assignment-ledger.js";
7
- import { listBudgetStates } from "./budget-governor.js";
8
- import { listErrorQueue, resolveErrorDaemonStorage } from "./error-worker.js";
9
-
10
- const WATCHDOG_SCHEMA_VERSION = "1.0.0";
11
- const STATE_SCHEMA_VERSION = "1.0.0";
12
-
13
- const ACTIVE_ASSIGNMENT_STATUSES = new Set(["CLAIMED", "IN_PROGRESS", "BLOCKED"]);
14
-
15
- export const WATCHDOG_EVENT_TYPES = Object.freeze([
16
- "agent_stuck",
17
- "budget_warning",
18
- "alert_recovered",
19
- "pr_merged",
20
- "audit_complete",
21
- "kill_switch_activated",
22
- ]);
23
-
24
- export const WATCHDOG_SIGNAL_CODES = Object.freeze([
25
- "NO_TOOL_CALL",
26
- "REPEATED_FILE_READ",
27
- "BUDGET_WARNING_NO_FINDINGS",
28
- "TURN_STALL",
29
- ]);
30
-
31
- const WATCHDOG_SIGNAL_SET = new Set(WATCHDOG_SIGNAL_CODES);
32
- const WATCHDOG_EVENT_SET = new Set(WATCHDOG_EVENT_TYPES);
33
-
34
- function normalizeString(value) {
35
- return String(value || "").trim();
36
- }
37
-
38
- function normalizeIsoTimestamp(value, fallbackIso = new Date().toISOString()) {
39
- const normalized = normalizeString(value);
40
- if (!normalized) {
41
- return fallbackIso;
42
- }
43
- const epoch = Date.parse(normalized);
44
- if (!Number.isFinite(epoch)) {
45
- return fallbackIso;
46
- }
47
- return new Date(epoch).toISOString();
48
- }
49
-
50
- function normalizeNumber(value, fallback = 0) {
51
- const parsed = Number(value);
52
- if (!Number.isFinite(parsed)) {
53
- return fallback;
54
- }
55
- return parsed;
56
- }
57
-
58
- function normalizePositiveInteger(value, fallbackValue) {
59
- const normalized = normalizeNumber(value, fallbackValue);
60
- if (!Number.isFinite(normalized) || normalized <= 0) {
61
- return fallbackValue;
62
- }
63
- return Math.max(1, Math.floor(normalized));
64
- }
65
-
66
- function normalizeNonNegativeNumber(value, fallbackValue = 0) {
67
- const normalized = normalizeNumber(value, fallbackValue);
68
- if (!Number.isFinite(normalized) || normalized < 0) {
69
- return fallbackValue;
70
- }
71
- return normalized;
72
- }
73
-
74
- function normalizeObject(value) {
75
- if (!value || typeof value !== "object" || Array.isArray(value)) {
76
- return {};
77
- }
78
- return { ...value };
79
- }
80
-
81
- function normalizeBoolean(value, fallbackValue = false) {
82
- if (typeof value === "boolean") {
83
- return value;
84
- }
85
- const normalized = normalizeString(value).toLowerCase();
86
- if (!normalized) {
87
- return fallbackValue;
88
- }
89
- if (normalized === "true" || normalized === "1" || normalized === "yes") {
90
- return true;
91
- }
92
- if (normalized === "false" || normalized === "0" || normalized === "no") {
93
- return false;
94
- }
95
- return fallbackValue;
96
- }
97
-
98
- function resolveEnvTemplate(value, env) {
99
- const normalized = normalizeString(value);
100
- if (!normalized) {
101
- return "";
102
- }
103
- return normalized.replace(/\$\{([A-Z0-9_]+)\}/g, (_, key) => normalizeString(env[key]));
104
- }
105
-
106
- function computeSecondsSince(previousIso, nowIso) {
107
- const previousEpoch = Date.parse(normalizeIsoTimestamp(previousIso, nowIso));
108
- const nowEpoch = Date.parse(normalizeIsoTimestamp(nowIso, new Date().toISOString()));
109
- if (!Number.isFinite(previousEpoch) || !Number.isFinite(nowEpoch)) {
110
- return null;
111
- }
112
- return Math.max(0, Math.floor((nowEpoch - previousEpoch) / 1000));
113
- }
114
-
115
- function pickLastToolCallAt(assignment = {}) {
116
- const snapshot = normalizeObject(assignment.budgetSnapshot);
117
- return (
118
- normalizeString(snapshot.lastToolCallAt) ||
119
- normalizeString(snapshot.lastActionAt) ||
120
- normalizeString(assignment.heartbeatAt) ||
121
- normalizeString(assignment.updatedAt) ||
122
- ""
123
- );
124
- }
125
-
126
- function extractRecentFileReads(snapshot = {}) {
127
- const candidates = [];
128
- const normalizedSnapshot = normalizeObject(snapshot);
129
- for (const key of ["recentFileReads", "fileReadHistory", "fileReads"]) {
130
- const value = normalizedSnapshot[key];
131
- if (Array.isArray(value)) {
132
- for (const item of value) {
133
- if (typeof item === "string") {
134
- const normalized = normalizeString(item);
135
- if (normalized) {
136
- candidates.push(normalized);
137
- }
138
- continue;
139
- }
140
- if (item && typeof item === "object") {
141
- const normalized =
142
- normalizeString(item.path) ||
143
- normalizeString(item.file) ||
144
- normalizeString(item.filePath);
145
- if (normalized) {
146
- candidates.push(normalized);
147
- }
148
- }
149
- }
150
- }
151
- }
152
- return candidates;
153
- }
154
-
155
- function computeRepeatedTailCount(values = []) {
156
- if (!Array.isArray(values) || values.length === 0) {
157
- return {
158
- repeatedValue: "",
159
- repeatCount: 0,
160
- };
161
- }
162
- const normalized = values.map((value) => normalizeString(value)).filter(Boolean);
163
- if (normalized.length === 0) {
164
- return {
165
- repeatedValue: "",
166
- repeatCount: 0,
167
- };
168
- }
169
- const tail = normalized[normalized.length - 1];
170
- let repeatCount = 0;
171
- for (let index = normalized.length - 1; index >= 0; index -= 1) {
172
- if (normalized[index] !== tail) {
173
- break;
174
- }
175
- repeatCount += 1;
176
- }
177
- return {
178
- repeatedValue: tail,
179
- repeatCount,
180
- };
181
- }
182
-
183
- function computeBudgetUsageRatio(record = {}) {
184
- const usage = normalizeObject(record.usage);
185
- const budget = normalizeObject(record.budget);
186
- const ratios = [];
187
- const pairs = [
188
- ["tokensUsed", "maxTokens"],
189
- ["costUsd", "maxCostUsd"],
190
- ["runtimeMs", "maxRuntimeMs"],
191
- ["toolCalls", "maxToolCalls"],
192
- ];
193
- for (const [usageKey, budgetKey] of pairs) {
194
- const used = normalizeNonNegativeNumber(usage[usageKey], 0);
195
- const limit = normalizeNonNegativeNumber(budget[budgetKey], 0);
196
- if (limit > 0) {
197
- ratios.push(used / limit);
198
- }
199
- }
200
- if (ratios.length === 0) {
201
- return 0;
202
- }
203
- return Math.max(...ratios);
204
- }
205
-
206
- function normalizeSeverity(value) {
207
- const normalized = normalizeString(value).toUpperCase();
208
- if (normalized === "P0" || normalized === "P1" || normalized === "P2" || normalized === "P3") {
209
- return normalized;
210
- }
211
- return "P3";
212
- }
213
-
214
- function createInitialState(nowIso) {
215
- return {
216
- schemaVersion: STATE_SCHEMA_VERSION,
217
- generatedAt: normalizeIsoTimestamp(nowIso, nowIso),
218
- activeAlerts: {},
219
- runCount: 0,
220
- lastRunId: null,
221
- lastRunAt: null,
222
- };
223
- }
224
-
225
- function normalizeState(state = {}, nowIso = new Date().toISOString()) {
226
- const rawAlerts = state.activeAlerts && typeof state.activeAlerts === "object" ? state.activeAlerts : {};
227
- const activeAlerts = {};
228
- for (const [alertId, alert] of Object.entries(rawAlerts)) {
229
- if (!normalizeString(alertId)) {
230
- continue;
231
- }
232
- activeAlerts[alertId] = {
233
- alertId,
234
- eventType: WATCHDOG_EVENT_SET.has(normalizeString(alert.eventType))
235
- ? normalizeString(alert.eventType)
236
- : "agent_stuck",
237
- signalCode: WATCHDOG_SIGNAL_SET.has(normalizeString(alert.signalCode))
238
- ? normalizeString(alert.signalCode)
239
- : "NO_TOOL_CALL",
240
- workItemId: normalizeString(alert.workItemId),
241
- agentIdentity: normalizeString(alert.agentIdentity),
242
- firstSeenAt: normalizeIsoTimestamp(alert.firstSeenAt, nowIso),
243
- lastSeenAt: normalizeIsoTimestamp(alert.lastSeenAt, nowIso),
244
- message: normalizeString(alert.message),
245
- severity: normalizeSeverity(alert.severity),
246
- };
247
- }
248
- return {
249
- schemaVersion: STATE_SCHEMA_VERSION,
250
- generatedAt: normalizeIsoTimestamp(state.generatedAt, nowIso),
251
- activeAlerts,
252
- runCount: Math.max(0, Math.floor(normalizeNumber(state.runCount, 0))),
253
- lastRunId: normalizeString(state.lastRunId) || null,
254
- lastRunAt: state.lastRunAt ? normalizeIsoTimestamp(state.lastRunAt, nowIso) : null,
255
- };
256
- }
257
-
258
- async function readJsonFile(filePath, defaultFactory) {
259
- try {
260
- const raw = await fsp.readFile(filePath, "utf-8");
261
- return JSON.parse(raw);
262
- } catch (error) {
263
- if (error && typeof error === "object" && error.code === "ENOENT") {
264
- return defaultFactory();
265
- }
266
- throw error;
267
- }
268
- }
269
-
270
- async function writeJsonFile(filePath, payload) {
271
- await fsp.mkdir(path.dirname(filePath), { recursive: true });
272
- await fsp.writeFile(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf-8");
273
- }
274
-
275
- async function appendEvent(filePath, payload) {
276
- await fsp.mkdir(path.dirname(filePath), { recursive: true });
277
- await fsp.appendFile(filePath, `${JSON.stringify(payload)}\n`, "utf-8");
278
- }
279
-
280
- function buildRunId(nowIso, count) {
281
- const token = normalizeIsoTimestamp(nowIso, new Date().toISOString()).replace(/[:.]/g, "-");
282
- return `watchdog-${token}-${String(count).padStart(4, "0")}`;
283
- }
284
-
285
- function normalizeChannel(channel = {}, env = process.env) {
286
- const type = normalizeString(channel.type).toLowerCase();
287
- if (type === "slack") {
288
- const webhookUrl = resolveEnvTemplate(
289
- channel.webhook_url || channel.webhookUrl || channel.url || "",
290
- env
291
- );
292
- return webhookUrl
293
- ? {
294
- type: "slack",
295
- webhookUrl,
296
- }
297
- : null;
298
- }
299
- if (type === "telegram") {
300
- const botToken = resolveEnvTemplate(channel.bot_token || channel.botToken || "", env);
301
- const chatId = resolveEnvTemplate(channel.chat_id || channel.chatId || "", env);
302
- return botToken && chatId
303
- ? {
304
- type: "telegram",
305
- botToken,
306
- chatId,
307
- }
308
- : null;
309
- }
310
- return null;
311
- }
312
-
313
- async function loadWatchdogConfig({ targetPath = ".", env = process.env } = {}) {
314
- const configPath = path.join(path.resolve(String(targetPath || ".")), ".sentinelayer.yml");
315
- const fallback = {
316
- channels: [],
317
- frequency: "smart",
318
- events: ["agent_stuck", "budget_warning", "alert_recovered"],
319
- };
320
- try {
321
- const parsed = parseYaml(await fsp.readFile(configPath, "utf-8")) || {};
322
- const alerts = parsed && typeof parsed === "object" ? normalizeObject(parsed.alerts) : {};
323
- const channels = Array.isArray(alerts.channels)
324
- ? alerts.channels.map((channel) => normalizeChannel(channel, env)).filter(Boolean)
325
- : [];
326
- const events = Array.isArray(alerts.events)
327
- ? alerts.events
328
- .map((eventType) => normalizeString(eventType))
329
- .filter((eventType) => WATCHDOG_EVENT_SET.has(eventType))
330
- : fallback.events;
331
- const frequency = normalizeString(alerts.frequency).toLowerCase() || fallback.frequency;
332
- return {
333
- configPath,
334
- exists: true,
335
- channels,
336
- frequency,
337
- events: events.length > 0 ? events : fallback.events,
338
- };
339
- } catch (error) {
340
- if (error && typeof error === "object" && error.code === "ENOENT") {
341
- return {
342
- configPath,
343
- exists: false,
344
- channels: [],
345
- frequency: fallback.frequency,
346
- events: fallback.events,
347
- };
348
- }
349
- throw error;
350
- }
351
- }
352
-
353
- function buildDetection({
354
- eventType,
355
- signalCode,
356
- workItemId,
357
- agentIdentity,
358
- severity,
359
- message,
360
- details = {},
361
- }) {
362
- return {
363
- alertId: `${workItemId}:${signalCode}`,
364
- eventType,
365
- signalCode,
366
- workItemId,
367
- agentIdentity,
368
- severity,
369
- message,
370
- details,
371
- };
372
- }
373
-
374
- function evaluateWatchdogSignals({
375
- assignment,
376
- queueItem,
377
- budgetRecord,
378
- nowIso,
379
- noToolCallSeconds,
380
- repeatedFileReadsThreshold,
381
- budgetWarningThreshold,
382
- turnStallTurns,
383
- }) {
384
- const detections = [];
385
- const workItemId = normalizeString(assignment.workItemId);
386
- const agentIdentity = normalizeString(assignment.assignedAgentIdentity) || "unassigned";
387
- const severity = normalizeSeverity(queueItem?.severity);
388
- const budgetSnapshot = normalizeObject(assignment.budgetSnapshot);
389
-
390
- const lastToolCallAt = pickLastToolCallAt(assignment);
391
- const idleSeconds = computeSecondsSince(lastToolCallAt, nowIso);
392
- if (idleSeconds !== null && idleSeconds >= noToolCallSeconds) {
393
- detections.push(
394
- buildDetection({
395
- eventType: "agent_stuck",
396
- signalCode: "NO_TOOL_CALL",
397
- workItemId,
398
- agentIdentity,
399
- severity,
400
- message: `No tool calls observed for ${idleSeconds}s (threshold ${noToolCallSeconds}s).`,
401
- details: {
402
- idleSeconds,
403
- thresholdSeconds: noToolCallSeconds,
404
- lastToolCallAt: normalizeIsoTimestamp(lastToolCallAt, nowIso),
405
- },
406
- })
407
- );
408
- }
409
-
410
- const recentFileReads = extractRecentFileReads(budgetSnapshot);
411
- const repetition = computeRepeatedTailCount(recentFileReads);
412
- if (repetition.repeatCount >= repeatedFileReadsThreshold) {
413
- detections.push(
414
- buildDetection({
415
- eventType: "agent_stuck",
416
- signalCode: "REPEATED_FILE_READ",
417
- workItemId,
418
- agentIdentity,
419
- severity,
420
- message: `Repeated file read detected (${repetition.repeatCount}x): ${repetition.repeatedValue}`,
421
- details: {
422
- filePath: repetition.repeatedValue,
423
- repeatCount: repetition.repeatCount,
424
- threshold: repeatedFileReadsThreshold,
425
- },
426
- })
427
- );
428
- }
429
-
430
- const turnCount = Math.floor(normalizeNonNegativeNumber(budgetSnapshot.turnCount, 0));
431
- const lastProgressTurn = Math.floor(
432
- normalizeNonNegativeNumber(
433
- budgetSnapshot.lastProgressTurn ?? budgetSnapshot.lastFindingTurn ?? turnCount,
434
- turnCount
435
- )
436
- );
437
- const stalledTurns = Math.max(0, turnCount - lastProgressTurn);
438
- if (turnCount > 0 && stalledTurns >= turnStallTurns) {
439
- detections.push(
440
- buildDetection({
441
- eventType: "agent_stuck",
442
- signalCode: "TURN_STALL",
443
- workItemId,
444
- agentIdentity,
445
- severity,
446
- message: `Turn progression stalled for ${stalledTurns} turns (threshold ${turnStallTurns}).`,
447
- details: {
448
- turnCount,
449
- lastProgressTurn,
450
- stalledTurns,
451
- threshold: turnStallTurns,
452
- },
453
- })
454
- );
455
- }
456
-
457
- const usageRatio = computeBudgetUsageRatio(budgetRecord || {});
458
- const findingsProduced = Math.floor(
459
- normalizeNonNegativeNumber(
460
- budgetSnapshot.findingsProduced ??
461
- queueItem?.metadata?.findingsProduced ??
462
- queueItem?.metadata?.findingsCount ??
463
- 0,
464
- 0
465
- )
466
- );
467
- if (usageRatio >= budgetWarningThreshold && findingsProduced <= 0) {
468
- detections.push(
469
- buildDetection({
470
- eventType: "budget_warning",
471
- signalCode: "BUDGET_WARNING_NO_FINDINGS",
472
- workItemId,
473
- agentIdentity,
474
- severity,
475
- message: `Budget usage ${(usageRatio * 100).toFixed(1)}% with no findings produced.`,
476
- details: {
477
- usageRatio: Number(usageRatio.toFixed(6)),
478
- threshold: budgetWarningThreshold,
479
- findingsProduced,
480
- lifecycleState: normalizeString(budgetRecord?.lifecycleState) || "WITHIN_BUDGET",
481
- },
482
- })
483
- );
484
- }
485
-
486
- return detections;
487
- }
488
-
489
- function toActiveAlertRecord(alert = {}, nowIso = new Date().toISOString()) {
490
- return {
491
- alertId: alert.alertId,
492
- eventType: alert.eventType,
493
- signalCode: alert.signalCode,
494
- workItemId: alert.workItemId,
495
- agentIdentity: alert.agentIdentity,
496
- firstSeenAt: normalizeIsoTimestamp(alert.firstSeenAt || nowIso, nowIso),
497
- lastSeenAt: normalizeIsoTimestamp(nowIso, nowIso),
498
- message: normalizeString(alert.message),
499
- severity: normalizeSeverity(alert.severity),
500
- };
501
- }
502
-
503
- function buildAlertTransitions({
504
- detections = [],
505
- previousState = {},
506
- nowIso = new Date().toISOString(),
507
- }) {
508
- const previousAlerts = normalizeObject(previousState.activeAlerts);
509
- const activeAlerts = {};
510
- const activated = [];
511
- const stillActive = [];
512
- const detectionById = new Map();
513
- for (const detection of detections) {
514
- detectionById.set(detection.alertId, detection);
515
- const previous = previousAlerts[detection.alertId] || null;
516
- const record = toActiveAlertRecord(
517
- {
518
- ...detection,
519
- firstSeenAt: previous?.firstSeenAt || nowIso,
520
- },
521
- nowIso
522
- );
523
- activeAlerts[detection.alertId] = record;
524
- if (previous) {
525
- stillActive.push({
526
- ...detection,
527
- firstSeenAt: previous.firstSeenAt,
528
- lastSeenAt: nowIso,
529
- });
530
- } else {
531
- activated.push({
532
- ...detection,
533
- firstSeenAt: nowIso,
534
- lastSeenAt: nowIso,
535
- });
536
- }
537
- }
538
-
539
- const recovered = [];
540
- for (const [alertId, previous] of Object.entries(previousAlerts)) {
541
- if (detectionById.has(alertId)) {
542
- continue;
543
- }
544
- recovered.push({
545
- alertId,
546
- eventType: "alert_recovered",
547
- signalCode: normalizeString(previous.signalCode),
548
- workItemId: normalizeString(previous.workItemId),
549
- agentIdentity: normalizeString(previous.agentIdentity),
550
- severity: normalizeSeverity(previous.severity),
551
- message: `Recovered: ${normalizeString(previous.message) || "watchdog signal cleared"}`,
552
- firstSeenAt: normalizeIsoTimestamp(previous.firstSeenAt, nowIso),
553
- lastSeenAt: normalizeIsoTimestamp(previous.lastSeenAt, nowIso),
554
- recoveredAt: normalizeIsoTimestamp(nowIso, nowIso),
555
- });
556
- }
557
-
558
- return {
559
- activeAlerts,
560
- activated,
561
- stillActive,
562
- recovered,
563
- };
564
- }
565
-
566
- function formatAlertMessage(alert = {}) {
567
- const eventType = normalizeString(alert.eventType);
568
- if (eventType === "agent_stuck") {
569
- const idleSeconds = normalizeNumber(alert.details?.idleSeconds, 0);
570
- const budgetRatio = normalizeNumber(alert.details?.usageRatio, 0);
571
- const budgetPct = budgetRatio > 0 ? ` | budget=${(budgetRatio * 100).toFixed(1)}%` : "";
572
- return `[SentinelLayer] Agent "${alert.agentIdentity}" stuck (${alert.signalCode}) on ${alert.workItemId}${idleSeconds > 0 ? ` | idle=${idleSeconds}s` : ""}${budgetPct}\n${alert.message}`;
573
- }
574
- if (eventType === "budget_warning") {
575
- const budgetRatio = normalizeNumber(alert.details?.usageRatio, 0);
576
- return `[SentinelLayer] Budget warning for ${alert.workItemId} (${alert.agentIdentity}) | usage=${(budgetRatio * 100).toFixed(1)}%\n${alert.message}`;
577
- }
578
- return `[SentinelLayer] ${alert.eventType} ${alert.workItemId || ""} ${alert.agentIdentity || ""}\n${alert.message}`;
579
- }
580
-
581
- async function sendSlackAlert(channel, message, fetchImpl) {
582
- const response = await fetchImpl(channel.webhookUrl, {
583
- method: "POST",
584
- headers: {
585
- "content-type": "application/json",
586
- },
587
- body: JSON.stringify({
588
- text: message,
589
- }),
590
- });
591
- if (!response.ok) {
592
- throw new Error(`Slack webhook returned ${response.status}.`);
593
- }
594
- }
595
-
596
- async function sendTelegramAlert(channel, message, fetchImpl) {
597
- const endpoint = `https://api.telegram.org/bot${channel.botToken}/sendMessage`;
598
- const response = await fetchImpl(endpoint, {
599
- method: "POST",
600
- headers: {
601
- "content-type": "application/json",
602
- },
603
- body: JSON.stringify({
604
- chat_id: channel.chatId,
605
- text: message,
606
- disable_web_page_preview: true,
607
- }),
608
- });
609
- if (!response.ok) {
610
- throw new Error(`Telegram sendMessage returned ${response.status}.`);
611
- }
612
- }
613
-
614
- async function dispatchAlertToChannel({
615
- channel,
616
- alert,
617
- execute = false,
618
- fetchImpl = globalThis.fetch,
619
- }) {
620
- const message = formatAlertMessage(alert);
621
- if (!execute) {
622
- return {
623
- channelType: channel.type,
624
- alertId: alert.alertId,
625
- eventType: alert.eventType,
626
- sent: false,
627
- dryRun: true,
628
- message,
629
- error: "",
630
- };
631
- }
632
- if (typeof fetchImpl !== "function") {
633
- return {
634
- channelType: channel.type,
635
- alertId: alert.alertId,
636
- eventType: alert.eventType,
637
- sent: false,
638
- dryRun: false,
639
- message,
640
- error: "Fetch implementation is unavailable.",
641
- };
642
- }
643
- try {
644
- if (channel.type === "slack") {
645
- await sendSlackAlert(channel, message, fetchImpl);
646
- } else if (channel.type === "telegram") {
647
- await sendTelegramAlert(channel, message, fetchImpl);
648
- } else {
649
- throw new Error(`Unsupported alert channel type '${channel.type}'.`);
650
- }
651
- return {
652
- channelType: channel.type,
653
- alertId: alert.alertId,
654
- eventType: alert.eventType,
655
- sent: true,
656
- dryRun: false,
657
- message,
658
- error: "",
659
- };
660
- } catch (error) {
661
- return {
662
- channelType: channel.type,
663
- alertId: alert.alertId,
664
- eventType: alert.eventType,
665
- sent: false,
666
- dryRun: false,
667
- message,
668
- error: normalizeString(error?.message || error),
669
- };
670
- }
671
- }
672
-
673
- async function dispatchAlerts({
674
- alerts = [],
675
- config = {},
676
- execute = false,
677
- fetchImpl = globalThis.fetch,
678
- }) {
679
- const channels = Array.isArray(config.channels) ? config.channels : [];
680
- const allowedEvents = new Set(Array.isArray(config.events) ? config.events : []);
681
- const tasks = [];
682
- for (const alert of alerts) {
683
- if (allowedEvents.size > 0 && !allowedEvents.has(alert.eventType)) {
684
- continue;
685
- }
686
- for (const channel of channels) {
687
- tasks.push(
688
- dispatchAlertToChannel({
689
- channel,
690
- alert,
691
- execute,
692
- fetchImpl,
693
- })
694
- );
695
- }
696
- }
697
- return Promise.all(tasks);
698
- }
699
-
700
- export async function resolveWatchdogStorage({
701
- targetPath = ".",
702
- outputDir = "",
703
- env,
704
- homeDir,
705
- } = {}) {
706
- const daemonStorage = await resolveErrorDaemonStorage({
707
- targetPath,
708
- outputDir,
709
- env,
710
- homeDir,
711
- });
712
- return {
713
- ...daemonStorage,
714
- watchdogStatePath: path.join(daemonStorage.baseDir, "watchdog-state.json"),
715
- watchdogEventsPath: path.join(daemonStorage.baseDir, "watchdog-events.ndjson"),
716
- watchdogRunsDir: path.join(daemonStorage.baseDir, "watchdog-runs"),
717
- };
718
- }
719
-
720
- export async function runWatchdogTick({
721
- targetPath = ".",
722
- outputDir = "",
723
- noToolCallSeconds = 60,
724
- repeatedFileReadsThreshold = 3,
725
- budgetWarningThreshold = 0.9,
726
- turnStallTurns = 5,
727
- execute = false,
728
- limit = 200,
729
- env = process.env,
730
- homeDir,
731
- nowIso = new Date().toISOString(),
732
- fetchImpl = globalThis.fetch,
733
- } = {}) {
734
- const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
735
- const normalizedNoToolCallSeconds = normalizePositiveInteger(noToolCallSeconds, 60);
736
- const normalizedRepeatedFileReads = normalizePositiveInteger(repeatedFileReadsThreshold, 3);
737
- const normalizedTurnStallTurns = normalizePositiveInteger(turnStallTurns, 5);
738
- const normalizedBudgetWarningThreshold = Math.max(
739
- 0,
740
- Math.min(1, normalizeNonNegativeNumber(budgetWarningThreshold, 0.9))
741
- );
742
- const normalizedLimit = normalizePositiveInteger(limit, 200);
743
- const normalizedExecute = normalizeBoolean(execute, false);
744
- const storage = await resolveWatchdogStorage({
745
- targetPath,
746
- outputDir,
747
- env,
748
- homeDir,
749
- });
750
- const config = await loadWatchdogConfig({
751
- targetPath,
752
- env,
753
- });
754
-
755
- const [assignments, queue, budgets, previousState] = await Promise.all([
756
- listAssignments({
757
- targetPath,
758
- outputDir,
759
- includeExpired: true,
760
- limit: normalizedLimit,
761
- env,
762
- homeDir,
763
- nowIso: normalizedNow,
764
- }),
765
- listErrorQueue({
766
- targetPath,
767
- outputDir,
768
- limit: normalizedLimit,
769
- env,
770
- homeDir,
771
- }),
772
- listBudgetStates({
773
- targetPath,
774
- outputDir,
775
- limit: normalizedLimit,
776
- env,
777
- homeDir,
778
- nowIso: normalizedNow,
779
- }),
780
- readJsonFile(storage.watchdogStatePath, () => createInitialState(normalizedNow)).then((state) =>
781
- normalizeState(state, normalizedNow)
782
- ),
783
- ]);
784
-
785
- const queueByWorkItem = new Map(queue.items.map((item) => [item.workItemId, item]));
786
- const budgetByWorkItem = new Map(budgets.records.map((record) => [record.workItemId, record]));
787
- const activeAssignments = assignments.assignments.filter((assignment) =>
788
- ACTIVE_ASSIGNMENT_STATUSES.has(normalizeString(assignment.status).toUpperCase())
789
- );
790
-
791
- const detections = [];
792
- for (const assignment of activeAssignments) {
793
- const queueItem = queueByWorkItem.get(assignment.workItemId) || null;
794
- const budgetRecord = budgetByWorkItem.get(assignment.workItemId) || null;
795
- detections.push(
796
- ...evaluateWatchdogSignals({
797
- assignment,
798
- queueItem,
799
- budgetRecord,
800
- nowIso: normalizedNow,
801
- noToolCallSeconds: normalizedNoToolCallSeconds,
802
- repeatedFileReadsThreshold: normalizedRepeatedFileReads,
803
- budgetWarningThreshold: normalizedBudgetWarningThreshold,
804
- turnStallTurns: normalizedTurnStallTurns,
805
- })
806
- );
807
- }
808
-
809
- const transitions = buildAlertTransitions({
810
- detections,
811
- previousState,
812
- nowIso: normalizedNow,
813
- });
814
- const stateChangedAlerts = [...transitions.activated, ...transitions.recovered];
815
- const notifications = await dispatchAlerts({
816
- alerts: stateChangedAlerts,
817
- config,
818
- execute: normalizedExecute,
819
- fetchImpl,
820
- });
821
-
822
- const nextState = normalizeState(
823
- {
824
- ...previousState,
825
- generatedAt: normalizedNow,
826
- activeAlerts: transitions.activeAlerts,
827
- runCount: previousState.runCount + 1,
828
- lastRunId: buildRunId(normalizedNow, previousState.runCount + 1),
829
- lastRunAt: normalizedNow,
830
- },
831
- normalizedNow
832
- );
833
-
834
- await fsp.mkdir(storage.watchdogRunsDir, { recursive: true });
835
- const runId = nextState.lastRunId;
836
- const runPath = path.join(storage.watchdogRunsDir, `${runId}.json`);
837
- const runPayload = {
838
- schemaVersion: WATCHDOG_SCHEMA_VERSION,
839
- generatedAt: normalizedNow,
840
- runId,
841
- config: {
842
- noToolCallSeconds: normalizedNoToolCallSeconds,
843
- repeatedFileReadsThreshold: normalizedRepeatedFileReads,
844
- budgetWarningThreshold: normalizedBudgetWarningThreshold,
845
- turnStallTurns: normalizedTurnStallTurns,
846
- execute: normalizedExecute,
847
- channelCount: config.channels.length,
848
- events: config.events,
849
- frequency: config.frequency,
850
- },
851
- summary: {
852
- assignmentCount: activeAssignments.length,
853
- detectionCount: detections.length,
854
- activeAlertCount: Object.keys(transitions.activeAlerts).length,
855
- activatedCount: transitions.activated.length,
856
- recoveredCount: transitions.recovered.length,
857
- notificationCount: notifications.length,
858
- sentNotificationCount: notifications.filter((item) => item.sent).length,
859
- failedNotificationCount: notifications.filter((item) => !item.sent && !item.dryRun).length,
860
- },
861
- detections,
862
- activatedAlerts: transitions.activated,
863
- recoveredAlerts: transitions.recovered,
864
- notifications,
865
- };
866
-
867
- await Promise.all([
868
- writeJsonFile(runPath, runPayload),
869
- writeJsonFile(storage.watchdogStatePath, nextState),
870
- appendEvent(storage.watchdogEventsPath, {
871
- timestamp: normalizedNow,
872
- eventType: "watchdog_tick",
873
- runId,
874
- detectionCount: detections.length,
875
- activatedCount: transitions.activated.length,
876
- recoveredCount: transitions.recovered.length,
877
- notificationCount: notifications.length,
878
- sentNotificationCount: notifications.filter((item) => item.sent).length,
879
- failedNotificationCount: notifications.filter((item) => !item.sent && !item.dryRun).length,
880
- }),
881
- ]);
882
-
883
- return {
884
- ...storage,
885
- configPath: config.configPath,
886
- configExists: config.exists,
887
- runId,
888
- runPath,
889
- statePath: storage.watchdogStatePath,
890
- eventsPath: storage.watchdogEventsPath,
891
- state: nextState,
892
- detections,
893
- activatedAlerts: transitions.activated,
894
- recoveredAlerts: transitions.recovered,
895
- notifications,
896
- summary: runPayload.summary,
897
- };
898
- }
899
-
900
- export async function getWatchdogStatus({
901
- targetPath = ".",
902
- outputDir = "",
903
- limit = 10,
904
- env = process.env,
905
- homeDir,
906
- nowIso = new Date().toISOString(),
907
- } = {}) {
908
- const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
909
- const normalizedLimit = normalizePositiveInteger(limit, 10);
910
- const storage = await resolveWatchdogStorage({
911
- targetPath,
912
- outputDir,
913
- env,
914
- homeDir,
915
- });
916
- const config = await loadWatchdogConfig({
917
- targetPath,
918
- env,
919
- });
920
- const state = await readJsonFile(storage.watchdogStatePath, () =>
921
- createInitialState(normalizedNow)
922
- ).then((payload) => normalizeState(payload, normalizedNow));
923
-
924
- let runEntries = [];
925
- try {
926
- runEntries = await fsp.readdir(storage.watchdogRunsDir, { withFileTypes: true });
927
- } catch (error) {
928
- if (!(error && typeof error === "object" && error.code === "ENOENT")) {
929
- throw error;
930
- }
931
- }
932
-
933
- const runFiles = runEntries
934
- .filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
935
- .map((entry) => entry.name)
936
- .sort((left, right) => right.localeCompare(left))
937
- .slice(0, normalizedLimit);
938
-
939
- const recentRuns = [];
940
- for (const runFile of runFiles) {
941
- const runPath = path.join(storage.watchdogRunsDir, runFile);
942
- try {
943
- const parsed = JSON.parse(await fsp.readFile(runPath, "utf-8"));
944
- recentRuns.push({
945
- runId: normalizeString(parsed.runId),
946
- generatedAt: normalizeIsoTimestamp(parsed.generatedAt, normalizedNow),
947
- detectionCount: normalizeNonNegativeNumber(parsed.summary?.detectionCount, 0),
948
- activatedCount: normalizeNonNegativeNumber(parsed.summary?.activatedCount, 0),
949
- recoveredCount: normalizeNonNegativeNumber(parsed.summary?.recoveredCount, 0),
950
- notificationCount: normalizeNonNegativeNumber(parsed.summary?.notificationCount, 0),
951
- runPath,
952
- });
953
- } catch {
954
- // Ignore malformed run artifacts.
955
- }
956
- }
957
-
958
- return {
959
- ...storage,
960
- configPath: config.configPath,
961
- configExists: config.exists,
962
- config,
963
- statePath: storage.watchdogStatePath,
964
- eventsPath: storage.watchdogEventsPath,
965
- state,
966
- activeAlerts: Object.values(state.activeAlerts),
967
- activeAlertCount: Object.keys(state.activeAlerts).length,
968
- runCount: state.runCount,
969
- recentRuns,
970
- };
971
- }
1
+ import fsp from "node:fs/promises";
2
+ import path from "node:path";
3
+
4
+ import { parse as parseYaml } from "yaml";
5
+
6
+ import { listAssignments, resolveAssignmentLedgerStorage } from "./assignment-ledger.js";
7
+ import { listBudgetStates } from "./budget-governor.js";
8
+ import { listErrorQueue, resolveErrorDaemonStorage } from "./error-worker.js";
9
+
10
+ const WATCHDOG_SCHEMA_VERSION = "1.0.0";
11
+ const STATE_SCHEMA_VERSION = "1.0.0";
12
+
13
+ const ACTIVE_ASSIGNMENT_STATUSES = new Set(["CLAIMED", "IN_PROGRESS", "BLOCKED"]);
14
+
15
+ export const WATCHDOG_EVENT_TYPES = Object.freeze([
16
+ "agent_stuck",
17
+ "budget_warning",
18
+ "alert_recovered",
19
+ "pr_merged",
20
+ "audit_complete",
21
+ "kill_switch_activated",
22
+ ]);
23
+
24
+ export const WATCHDOG_SIGNAL_CODES = Object.freeze([
25
+ "NO_TOOL_CALL",
26
+ "REPEATED_FILE_READ",
27
+ "BUDGET_WARNING_NO_FINDINGS",
28
+ "TURN_STALL",
29
+ ]);
30
+
31
+ const WATCHDOG_SIGNAL_SET = new Set(WATCHDOG_SIGNAL_CODES);
32
+ const WATCHDOG_EVENT_SET = new Set(WATCHDOG_EVENT_TYPES);
33
+
34
+ function normalizeString(value) {
35
+ return String(value || "").trim();
36
+ }
37
+
38
+ function normalizeIsoTimestamp(value, fallbackIso = new Date().toISOString()) {
39
+ const normalized = normalizeString(value);
40
+ if (!normalized) {
41
+ return fallbackIso;
42
+ }
43
+ const epoch = Date.parse(normalized);
44
+ if (!Number.isFinite(epoch)) {
45
+ return fallbackIso;
46
+ }
47
+ return new Date(epoch).toISOString();
48
+ }
49
+
50
+ function normalizeNumber(value, fallback = 0) {
51
+ const parsed = Number(value);
52
+ if (!Number.isFinite(parsed)) {
53
+ return fallback;
54
+ }
55
+ return parsed;
56
+ }
57
+
58
+ function normalizePositiveInteger(value, fallbackValue) {
59
+ const normalized = normalizeNumber(value, fallbackValue);
60
+ if (!Number.isFinite(normalized) || normalized <= 0) {
61
+ return fallbackValue;
62
+ }
63
+ return Math.max(1, Math.floor(normalized));
64
+ }
65
+
66
+ function normalizeNonNegativeNumber(value, fallbackValue = 0) {
67
+ const normalized = normalizeNumber(value, fallbackValue);
68
+ if (!Number.isFinite(normalized) || normalized < 0) {
69
+ return fallbackValue;
70
+ }
71
+ return normalized;
72
+ }
73
+
74
+ function normalizeObject(value) {
75
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
76
+ return {};
77
+ }
78
+ return { ...value };
79
+ }
80
+
81
+ function normalizeBoolean(value, fallbackValue = false) {
82
+ if (typeof value === "boolean") {
83
+ return value;
84
+ }
85
+ const normalized = normalizeString(value).toLowerCase();
86
+ if (!normalized) {
87
+ return fallbackValue;
88
+ }
89
+ if (normalized === "true" || normalized === "1" || normalized === "yes") {
90
+ return true;
91
+ }
92
+ if (normalized === "false" || normalized === "0" || normalized === "no") {
93
+ return false;
94
+ }
95
+ return fallbackValue;
96
+ }
97
+
98
+ function resolveEnvTemplate(value, env) {
99
+ const normalized = normalizeString(value);
100
+ if (!normalized) {
101
+ return "";
102
+ }
103
+ return normalized.replace(/\$\{([A-Z0-9_]+)\}/g, (_, key) => normalizeString(env[key]));
104
+ }
105
+
106
+ function computeSecondsSince(previousIso, nowIso) {
107
+ const previousEpoch = Date.parse(normalizeIsoTimestamp(previousIso, nowIso));
108
+ const nowEpoch = Date.parse(normalizeIsoTimestamp(nowIso, new Date().toISOString()));
109
+ if (!Number.isFinite(previousEpoch) || !Number.isFinite(nowEpoch)) {
110
+ return null;
111
+ }
112
+ return Math.max(0, Math.floor((nowEpoch - previousEpoch) / 1000));
113
+ }
114
+
115
+ function pickLastToolCallAt(assignment = {}) {
116
+ const snapshot = normalizeObject(assignment.budgetSnapshot);
117
+ return (
118
+ normalizeString(snapshot.lastToolCallAt) ||
119
+ normalizeString(snapshot.lastActionAt) ||
120
+ normalizeString(assignment.heartbeatAt) ||
121
+ normalizeString(assignment.updatedAt) ||
122
+ ""
123
+ );
124
+ }
125
+
126
+ function extractRecentFileReads(snapshot = {}) {
127
+ const candidates = [];
128
+ const normalizedSnapshot = normalizeObject(snapshot);
129
+ for (const key of ["recentFileReads", "fileReadHistory", "fileReads"]) {
130
+ const value = normalizedSnapshot[key];
131
+ if (Array.isArray(value)) {
132
+ for (const item of value) {
133
+ if (typeof item === "string") {
134
+ const normalized = normalizeString(item);
135
+ if (normalized) {
136
+ candidates.push(normalized);
137
+ }
138
+ continue;
139
+ }
140
+ if (item && typeof item === "object") {
141
+ const normalized =
142
+ normalizeString(item.path) ||
143
+ normalizeString(item.file) ||
144
+ normalizeString(item.filePath);
145
+ if (normalized) {
146
+ candidates.push(normalized);
147
+ }
148
+ }
149
+ }
150
+ }
151
+ }
152
+ return candidates;
153
+ }
154
+
155
+ function computeRepeatedTailCount(values = []) {
156
+ if (!Array.isArray(values) || values.length === 0) {
157
+ return {
158
+ repeatedValue: "",
159
+ repeatCount: 0,
160
+ };
161
+ }
162
+ const normalized = values.map((value) => normalizeString(value)).filter(Boolean);
163
+ if (normalized.length === 0) {
164
+ return {
165
+ repeatedValue: "",
166
+ repeatCount: 0,
167
+ };
168
+ }
169
+ const tail = normalized[normalized.length - 1];
170
+ let repeatCount = 0;
171
+ for (let index = normalized.length - 1; index >= 0; index -= 1) {
172
+ if (normalized[index] !== tail) {
173
+ break;
174
+ }
175
+ repeatCount += 1;
176
+ }
177
+ return {
178
+ repeatedValue: tail,
179
+ repeatCount,
180
+ };
181
+ }
182
+
183
+ function computeBudgetUsageRatio(record = {}) {
184
+ const usage = normalizeObject(record.usage);
185
+ const budget = normalizeObject(record.budget);
186
+ const ratios = [];
187
+ const pairs = [
188
+ ["tokensUsed", "maxTokens"],
189
+ ["costUsd", "maxCostUsd"],
190
+ ["runtimeMs", "maxRuntimeMs"],
191
+ ["toolCalls", "maxToolCalls"],
192
+ ];
193
+ for (const [usageKey, budgetKey] of pairs) {
194
+ const used = normalizeNonNegativeNumber(usage[usageKey], 0);
195
+ const limit = normalizeNonNegativeNumber(budget[budgetKey], 0);
196
+ if (limit > 0) {
197
+ ratios.push(used / limit);
198
+ }
199
+ }
200
+ if (ratios.length === 0) {
201
+ return 0;
202
+ }
203
+ return Math.max(...ratios);
204
+ }
205
+
206
+ function normalizeSeverity(value) {
207
+ const normalized = normalizeString(value).toUpperCase();
208
+ if (normalized === "P0" || normalized === "P1" || normalized === "P2" || normalized === "P3") {
209
+ return normalized;
210
+ }
211
+ return "P3";
212
+ }
213
+
214
+ function createInitialState(nowIso) {
215
+ return {
216
+ schemaVersion: STATE_SCHEMA_VERSION,
217
+ generatedAt: normalizeIsoTimestamp(nowIso, nowIso),
218
+ activeAlerts: {},
219
+ runCount: 0,
220
+ lastRunId: null,
221
+ lastRunAt: null,
222
+ };
223
+ }
224
+
225
+ function normalizeState(state = {}, nowIso = new Date().toISOString()) {
226
+ const rawAlerts = state.activeAlerts && typeof state.activeAlerts === "object" ? state.activeAlerts : {};
227
+ const activeAlerts = {};
228
+ for (const [alertId, alert] of Object.entries(rawAlerts)) {
229
+ if (!normalizeString(alertId)) {
230
+ continue;
231
+ }
232
+ activeAlerts[alertId] = {
233
+ alertId,
234
+ eventType: WATCHDOG_EVENT_SET.has(normalizeString(alert.eventType))
235
+ ? normalizeString(alert.eventType)
236
+ : "agent_stuck",
237
+ signalCode: WATCHDOG_SIGNAL_SET.has(normalizeString(alert.signalCode))
238
+ ? normalizeString(alert.signalCode)
239
+ : "NO_TOOL_CALL",
240
+ workItemId: normalizeString(alert.workItemId),
241
+ agentIdentity: normalizeString(alert.agentIdentity),
242
+ firstSeenAt: normalizeIsoTimestamp(alert.firstSeenAt, nowIso),
243
+ lastSeenAt: normalizeIsoTimestamp(alert.lastSeenAt, nowIso),
244
+ message: normalizeString(alert.message),
245
+ severity: normalizeSeverity(alert.severity),
246
+ };
247
+ }
248
+ return {
249
+ schemaVersion: STATE_SCHEMA_VERSION,
250
+ generatedAt: normalizeIsoTimestamp(state.generatedAt, nowIso),
251
+ activeAlerts,
252
+ runCount: Math.max(0, Math.floor(normalizeNumber(state.runCount, 0))),
253
+ lastRunId: normalizeString(state.lastRunId) || null,
254
+ lastRunAt: state.lastRunAt ? normalizeIsoTimestamp(state.lastRunAt, nowIso) : null,
255
+ };
256
+ }
257
+
258
+ async function readJsonFile(filePath, defaultFactory) {
259
+ try {
260
+ const raw = await fsp.readFile(filePath, "utf-8");
261
+ return JSON.parse(raw);
262
+ } catch (error) {
263
+ if (error && typeof error === "object" && error.code === "ENOENT") {
264
+ return defaultFactory();
265
+ }
266
+ throw error;
267
+ }
268
+ }
269
+
270
+ async function writeJsonFile(filePath, payload) {
271
+ await fsp.mkdir(path.dirname(filePath), { recursive: true });
272
+ await fsp.writeFile(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf-8");
273
+ }
274
+
275
+ async function appendEvent(filePath, payload) {
276
+ await fsp.mkdir(path.dirname(filePath), { recursive: true });
277
+ await fsp.appendFile(filePath, `${JSON.stringify(payload)}\n`, "utf-8");
278
+ }
279
+
280
+ function buildRunId(nowIso, count) {
281
+ const token = normalizeIsoTimestamp(nowIso, new Date().toISOString()).replace(/[:.]/g, "-");
282
+ return `watchdog-${token}-${String(count).padStart(4, "0")}`;
283
+ }
284
+
285
+ function normalizeChannel(channel = {}, env = process.env) {
286
+ const type = normalizeString(channel.type).toLowerCase();
287
+ if (type === "slack") {
288
+ const webhookUrl = resolveEnvTemplate(
289
+ channel.webhook_url || channel.webhookUrl || channel.url || "",
290
+ env
291
+ );
292
+ return webhookUrl
293
+ ? {
294
+ type: "slack",
295
+ webhookUrl,
296
+ }
297
+ : null;
298
+ }
299
+ if (type === "telegram") {
300
+ const botToken = resolveEnvTemplate(channel.bot_token || channel.botToken || "", env);
301
+ const chatId = resolveEnvTemplate(channel.chat_id || channel.chatId || "", env);
302
+ return botToken && chatId
303
+ ? {
304
+ type: "telegram",
305
+ botToken,
306
+ chatId,
307
+ }
308
+ : null;
309
+ }
310
+ return null;
311
+ }
312
+
313
+ async function loadWatchdogConfig({ targetPath = ".", env = process.env } = {}) {
314
+ const configPath = path.join(path.resolve(String(targetPath || ".")), ".sentinelayer.yml");
315
+ const fallback = {
316
+ channels: [],
317
+ frequency: "smart",
318
+ events: ["agent_stuck", "budget_warning", "alert_recovered"],
319
+ };
320
+ try {
321
+ const parsed = parseYaml(await fsp.readFile(configPath, "utf-8")) || {};
322
+ const alerts = parsed && typeof parsed === "object" ? normalizeObject(parsed.alerts) : {};
323
+ const channels = Array.isArray(alerts.channels)
324
+ ? alerts.channels.map((channel) => normalizeChannel(channel, env)).filter(Boolean)
325
+ : [];
326
+ const events = Array.isArray(alerts.events)
327
+ ? alerts.events
328
+ .map((eventType) => normalizeString(eventType))
329
+ .filter((eventType) => WATCHDOG_EVENT_SET.has(eventType))
330
+ : fallback.events;
331
+ const frequency = normalizeString(alerts.frequency).toLowerCase() || fallback.frequency;
332
+ return {
333
+ configPath,
334
+ exists: true,
335
+ channels,
336
+ frequency,
337
+ events: events.length > 0 ? events : fallback.events,
338
+ };
339
+ } catch (error) {
340
+ if (error && typeof error === "object" && error.code === "ENOENT") {
341
+ return {
342
+ configPath,
343
+ exists: false,
344
+ channels: [],
345
+ frequency: fallback.frequency,
346
+ events: fallback.events,
347
+ };
348
+ }
349
+ throw error;
350
+ }
351
+ }
352
+
353
+ function buildDetection({
354
+ eventType,
355
+ signalCode,
356
+ workItemId,
357
+ agentIdentity,
358
+ severity,
359
+ message,
360
+ details = {},
361
+ }) {
362
+ return {
363
+ alertId: `${workItemId}:${signalCode}`,
364
+ eventType,
365
+ signalCode,
366
+ workItemId,
367
+ agentIdentity,
368
+ severity,
369
+ message,
370
+ details,
371
+ };
372
+ }
373
+
374
+ function evaluateWatchdogSignals({
375
+ assignment,
376
+ queueItem,
377
+ budgetRecord,
378
+ nowIso,
379
+ noToolCallSeconds,
380
+ repeatedFileReadsThreshold,
381
+ budgetWarningThreshold,
382
+ turnStallTurns,
383
+ }) {
384
+ const detections = [];
385
+ const workItemId = normalizeString(assignment.workItemId);
386
+ const agentIdentity = normalizeString(assignment.assignedAgentIdentity) || "unassigned";
387
+ const severity = normalizeSeverity(queueItem?.severity);
388
+ const budgetSnapshot = normalizeObject(assignment.budgetSnapshot);
389
+
390
+ const lastToolCallAt = pickLastToolCallAt(assignment);
391
+ const idleSeconds = computeSecondsSince(lastToolCallAt, nowIso);
392
+ if (idleSeconds !== null && idleSeconds >= noToolCallSeconds) {
393
+ detections.push(
394
+ buildDetection({
395
+ eventType: "agent_stuck",
396
+ signalCode: "NO_TOOL_CALL",
397
+ workItemId,
398
+ agentIdentity,
399
+ severity,
400
+ message: `No tool calls observed for ${idleSeconds}s (threshold ${noToolCallSeconds}s).`,
401
+ details: {
402
+ idleSeconds,
403
+ thresholdSeconds: noToolCallSeconds,
404
+ lastToolCallAt: normalizeIsoTimestamp(lastToolCallAt, nowIso),
405
+ },
406
+ })
407
+ );
408
+ }
409
+
410
+ const recentFileReads = extractRecentFileReads(budgetSnapshot);
411
+ const repetition = computeRepeatedTailCount(recentFileReads);
412
+ if (repetition.repeatCount >= repeatedFileReadsThreshold) {
413
+ detections.push(
414
+ buildDetection({
415
+ eventType: "agent_stuck",
416
+ signalCode: "REPEATED_FILE_READ",
417
+ workItemId,
418
+ agentIdentity,
419
+ severity,
420
+ message: `Repeated file read detected (${repetition.repeatCount}x): ${repetition.repeatedValue}`,
421
+ details: {
422
+ filePath: repetition.repeatedValue,
423
+ repeatCount: repetition.repeatCount,
424
+ threshold: repeatedFileReadsThreshold,
425
+ },
426
+ })
427
+ );
428
+ }
429
+
430
+ const turnCount = Math.floor(normalizeNonNegativeNumber(budgetSnapshot.turnCount, 0));
431
+ const lastProgressTurn = Math.floor(
432
+ normalizeNonNegativeNumber(
433
+ budgetSnapshot.lastProgressTurn ?? budgetSnapshot.lastFindingTurn ?? turnCount,
434
+ turnCount
435
+ )
436
+ );
437
+ const stalledTurns = Math.max(0, turnCount - lastProgressTurn);
438
+ if (turnCount > 0 && stalledTurns >= turnStallTurns) {
439
+ detections.push(
440
+ buildDetection({
441
+ eventType: "agent_stuck",
442
+ signalCode: "TURN_STALL",
443
+ workItemId,
444
+ agentIdentity,
445
+ severity,
446
+ message: `Turn progression stalled for ${stalledTurns} turns (threshold ${turnStallTurns}).`,
447
+ details: {
448
+ turnCount,
449
+ lastProgressTurn,
450
+ stalledTurns,
451
+ threshold: turnStallTurns,
452
+ },
453
+ })
454
+ );
455
+ }
456
+
457
+ const usageRatio = computeBudgetUsageRatio(budgetRecord || {});
458
+ const findingsProduced = Math.floor(
459
+ normalizeNonNegativeNumber(
460
+ budgetSnapshot.findingsProduced ??
461
+ queueItem?.metadata?.findingsProduced ??
462
+ queueItem?.metadata?.findingsCount ??
463
+ 0,
464
+ 0
465
+ )
466
+ );
467
+ if (usageRatio >= budgetWarningThreshold && findingsProduced <= 0) {
468
+ detections.push(
469
+ buildDetection({
470
+ eventType: "budget_warning",
471
+ signalCode: "BUDGET_WARNING_NO_FINDINGS",
472
+ workItemId,
473
+ agentIdentity,
474
+ severity,
475
+ message: `Budget usage ${(usageRatio * 100).toFixed(1)}% with no findings produced.`,
476
+ details: {
477
+ usageRatio: Number(usageRatio.toFixed(6)),
478
+ threshold: budgetWarningThreshold,
479
+ findingsProduced,
480
+ lifecycleState: normalizeString(budgetRecord?.lifecycleState) || "WITHIN_BUDGET",
481
+ },
482
+ })
483
+ );
484
+ }
485
+
486
+ return detections;
487
+ }
488
+
489
+ function toActiveAlertRecord(alert = {}, nowIso = new Date().toISOString()) {
490
+ return {
491
+ alertId: alert.alertId,
492
+ eventType: alert.eventType,
493
+ signalCode: alert.signalCode,
494
+ workItemId: alert.workItemId,
495
+ agentIdentity: alert.agentIdentity,
496
+ firstSeenAt: normalizeIsoTimestamp(alert.firstSeenAt || nowIso, nowIso),
497
+ lastSeenAt: normalizeIsoTimestamp(nowIso, nowIso),
498
+ message: normalizeString(alert.message),
499
+ severity: normalizeSeverity(alert.severity),
500
+ };
501
+ }
502
+
503
+ function buildAlertTransitions({
504
+ detections = [],
505
+ previousState = {},
506
+ nowIso = new Date().toISOString(),
507
+ }) {
508
+ const previousAlerts = normalizeObject(previousState.activeAlerts);
509
+ const activeAlerts = {};
510
+ const activated = [];
511
+ const stillActive = [];
512
+ const detectionById = new Map();
513
+ for (const detection of detections) {
514
+ detectionById.set(detection.alertId, detection);
515
+ const previous = previousAlerts[detection.alertId] || null;
516
+ const record = toActiveAlertRecord(
517
+ {
518
+ ...detection,
519
+ firstSeenAt: previous?.firstSeenAt || nowIso,
520
+ },
521
+ nowIso
522
+ );
523
+ activeAlerts[detection.alertId] = record;
524
+ if (previous) {
525
+ stillActive.push({
526
+ ...detection,
527
+ firstSeenAt: previous.firstSeenAt,
528
+ lastSeenAt: nowIso,
529
+ });
530
+ } else {
531
+ activated.push({
532
+ ...detection,
533
+ firstSeenAt: nowIso,
534
+ lastSeenAt: nowIso,
535
+ });
536
+ }
537
+ }
538
+
539
+ const recovered = [];
540
+ for (const [alertId, previous] of Object.entries(previousAlerts)) {
541
+ if (detectionById.has(alertId)) {
542
+ continue;
543
+ }
544
+ recovered.push({
545
+ alertId,
546
+ eventType: "alert_recovered",
547
+ signalCode: normalizeString(previous.signalCode),
548
+ workItemId: normalizeString(previous.workItemId),
549
+ agentIdentity: normalizeString(previous.agentIdentity),
550
+ severity: normalizeSeverity(previous.severity),
551
+ message: `Recovered: ${normalizeString(previous.message) || "watchdog signal cleared"}`,
552
+ firstSeenAt: normalizeIsoTimestamp(previous.firstSeenAt, nowIso),
553
+ lastSeenAt: normalizeIsoTimestamp(previous.lastSeenAt, nowIso),
554
+ recoveredAt: normalizeIsoTimestamp(nowIso, nowIso),
555
+ });
556
+ }
557
+
558
+ return {
559
+ activeAlerts,
560
+ activated,
561
+ stillActive,
562
+ recovered,
563
+ };
564
+ }
565
+
566
+ function formatAlertMessage(alert = {}) {
567
+ const eventType = normalizeString(alert.eventType);
568
+ if (eventType === "agent_stuck") {
569
+ const idleSeconds = normalizeNumber(alert.details?.idleSeconds, 0);
570
+ const budgetRatio = normalizeNumber(alert.details?.usageRatio, 0);
571
+ const budgetPct = budgetRatio > 0 ? ` | budget=${(budgetRatio * 100).toFixed(1)}%` : "";
572
+ return `[SentinelLayer] Agent "${alert.agentIdentity}" stuck (${alert.signalCode}) on ${alert.workItemId}${idleSeconds > 0 ? ` | idle=${idleSeconds}s` : ""}${budgetPct}\n${alert.message}`;
573
+ }
574
+ if (eventType === "budget_warning") {
575
+ const budgetRatio = normalizeNumber(alert.details?.usageRatio, 0);
576
+ return `[SentinelLayer] Budget warning for ${alert.workItemId} (${alert.agentIdentity}) | usage=${(budgetRatio * 100).toFixed(1)}%\n${alert.message}`;
577
+ }
578
+ return `[SentinelLayer] ${alert.eventType} ${alert.workItemId || ""} ${alert.agentIdentity || ""}\n${alert.message}`;
579
+ }
580
+
581
+ async function sendSlackAlert(channel, message, fetchImpl) {
582
+ const response = await fetchImpl(channel.webhookUrl, {
583
+ method: "POST",
584
+ headers: {
585
+ "content-type": "application/json",
586
+ },
587
+ body: JSON.stringify({
588
+ text: message,
589
+ }),
590
+ });
591
+ if (!response.ok) {
592
+ throw new Error(`Slack webhook returned ${response.status}.`);
593
+ }
594
+ }
595
+
596
+ async function sendTelegramAlert(channel, message, fetchImpl) {
597
+ const endpoint = `https://api.telegram.org/bot${channel.botToken}/sendMessage`;
598
+ const response = await fetchImpl(endpoint, {
599
+ method: "POST",
600
+ headers: {
601
+ "content-type": "application/json",
602
+ },
603
+ body: JSON.stringify({
604
+ chat_id: channel.chatId,
605
+ text: message,
606
+ disable_web_page_preview: true,
607
+ }),
608
+ });
609
+ if (!response.ok) {
610
+ throw new Error(`Telegram sendMessage returned ${response.status}.`);
611
+ }
612
+ }
613
+
614
+ async function dispatchAlertToChannel({
615
+ channel,
616
+ alert,
617
+ execute = false,
618
+ fetchImpl = globalThis.fetch,
619
+ }) {
620
+ const message = formatAlertMessage(alert);
621
+ if (!execute) {
622
+ return {
623
+ channelType: channel.type,
624
+ alertId: alert.alertId,
625
+ eventType: alert.eventType,
626
+ sent: false,
627
+ dryRun: true,
628
+ message,
629
+ error: "",
630
+ };
631
+ }
632
+ if (typeof fetchImpl !== "function") {
633
+ return {
634
+ channelType: channel.type,
635
+ alertId: alert.alertId,
636
+ eventType: alert.eventType,
637
+ sent: false,
638
+ dryRun: false,
639
+ message,
640
+ error: "Fetch implementation is unavailable.",
641
+ };
642
+ }
643
+ try {
644
+ if (channel.type === "slack") {
645
+ await sendSlackAlert(channel, message, fetchImpl);
646
+ } else if (channel.type === "telegram") {
647
+ await sendTelegramAlert(channel, message, fetchImpl);
648
+ } else {
649
+ throw new Error(`Unsupported alert channel type '${channel.type}'.`);
650
+ }
651
+ return {
652
+ channelType: channel.type,
653
+ alertId: alert.alertId,
654
+ eventType: alert.eventType,
655
+ sent: true,
656
+ dryRun: false,
657
+ message,
658
+ error: "",
659
+ };
660
+ } catch (error) {
661
+ return {
662
+ channelType: channel.type,
663
+ alertId: alert.alertId,
664
+ eventType: alert.eventType,
665
+ sent: false,
666
+ dryRun: false,
667
+ message,
668
+ error: normalizeString(error?.message || error),
669
+ };
670
+ }
671
+ }
672
+
673
+ async function dispatchAlerts({
674
+ alerts = [],
675
+ config = {},
676
+ execute = false,
677
+ fetchImpl = globalThis.fetch,
678
+ }) {
679
+ const channels = Array.isArray(config.channels) ? config.channels : [];
680
+ const allowedEvents = new Set(Array.isArray(config.events) ? config.events : []);
681
+ const tasks = [];
682
+ for (const alert of alerts) {
683
+ if (allowedEvents.size > 0 && !allowedEvents.has(alert.eventType)) {
684
+ continue;
685
+ }
686
+ for (const channel of channels) {
687
+ tasks.push(
688
+ dispatchAlertToChannel({
689
+ channel,
690
+ alert,
691
+ execute,
692
+ fetchImpl,
693
+ })
694
+ );
695
+ }
696
+ }
697
+ return Promise.all(tasks);
698
+ }
699
+
700
+ export async function resolveWatchdogStorage({
701
+ targetPath = ".",
702
+ outputDir = "",
703
+ env,
704
+ homeDir,
705
+ } = {}) {
706
+ const daemonStorage = await resolveErrorDaemonStorage({
707
+ targetPath,
708
+ outputDir,
709
+ env,
710
+ homeDir,
711
+ });
712
+ return {
713
+ ...daemonStorage,
714
+ watchdogStatePath: path.join(daemonStorage.baseDir, "watchdog-state.json"),
715
+ watchdogEventsPath: path.join(daemonStorage.baseDir, "watchdog-events.ndjson"),
716
+ watchdogRunsDir: path.join(daemonStorage.baseDir, "watchdog-runs"),
717
+ };
718
+ }
719
+
720
+ export async function runWatchdogTick({
721
+ targetPath = ".",
722
+ outputDir = "",
723
+ noToolCallSeconds = 60,
724
+ repeatedFileReadsThreshold = 3,
725
+ budgetWarningThreshold = 0.9,
726
+ turnStallTurns = 5,
727
+ execute = false,
728
+ limit = 200,
729
+ env = process.env,
730
+ homeDir,
731
+ nowIso = new Date().toISOString(),
732
+ fetchImpl = globalThis.fetch,
733
+ } = {}) {
734
+ const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
735
+ const normalizedNoToolCallSeconds = normalizePositiveInteger(noToolCallSeconds, 60);
736
+ const normalizedRepeatedFileReads = normalizePositiveInteger(repeatedFileReadsThreshold, 3);
737
+ const normalizedTurnStallTurns = normalizePositiveInteger(turnStallTurns, 5);
738
+ const normalizedBudgetWarningThreshold = Math.max(
739
+ 0,
740
+ Math.min(1, normalizeNonNegativeNumber(budgetWarningThreshold, 0.9))
741
+ );
742
+ const normalizedLimit = normalizePositiveInteger(limit, 200);
743
+ const normalizedExecute = normalizeBoolean(execute, false);
744
+ const storage = await resolveWatchdogStorage({
745
+ targetPath,
746
+ outputDir,
747
+ env,
748
+ homeDir,
749
+ });
750
+ const config = await loadWatchdogConfig({
751
+ targetPath,
752
+ env,
753
+ });
754
+
755
+ const [assignments, queue, budgets, previousState] = await Promise.all([
756
+ listAssignments({
757
+ targetPath,
758
+ outputDir,
759
+ includeExpired: true,
760
+ limit: normalizedLimit,
761
+ env,
762
+ homeDir,
763
+ nowIso: normalizedNow,
764
+ }),
765
+ listErrorQueue({
766
+ targetPath,
767
+ outputDir,
768
+ limit: normalizedLimit,
769
+ env,
770
+ homeDir,
771
+ }),
772
+ listBudgetStates({
773
+ targetPath,
774
+ outputDir,
775
+ limit: normalizedLimit,
776
+ env,
777
+ homeDir,
778
+ nowIso: normalizedNow,
779
+ }),
780
+ readJsonFile(storage.watchdogStatePath, () => createInitialState(normalizedNow)).then((state) =>
781
+ normalizeState(state, normalizedNow)
782
+ ),
783
+ ]);
784
+
785
+ const queueByWorkItem = new Map(queue.items.map((item) => [item.workItemId, item]));
786
+ const budgetByWorkItem = new Map(budgets.records.map((record) => [record.workItemId, record]));
787
+ const activeAssignments = assignments.assignments.filter((assignment) =>
788
+ ACTIVE_ASSIGNMENT_STATUSES.has(normalizeString(assignment.status).toUpperCase())
789
+ );
790
+
791
+ const detections = [];
792
+ for (const assignment of activeAssignments) {
793
+ const queueItem = queueByWorkItem.get(assignment.workItemId) || null;
794
+ const budgetRecord = budgetByWorkItem.get(assignment.workItemId) || null;
795
+ detections.push(
796
+ ...evaluateWatchdogSignals({
797
+ assignment,
798
+ queueItem,
799
+ budgetRecord,
800
+ nowIso: normalizedNow,
801
+ noToolCallSeconds: normalizedNoToolCallSeconds,
802
+ repeatedFileReadsThreshold: normalizedRepeatedFileReads,
803
+ budgetWarningThreshold: normalizedBudgetWarningThreshold,
804
+ turnStallTurns: normalizedTurnStallTurns,
805
+ })
806
+ );
807
+ }
808
+
809
+ const transitions = buildAlertTransitions({
810
+ detections,
811
+ previousState,
812
+ nowIso: normalizedNow,
813
+ });
814
+ const stateChangedAlerts = [...transitions.activated, ...transitions.recovered];
815
+ const notifications = await dispatchAlerts({
816
+ alerts: stateChangedAlerts,
817
+ config,
818
+ execute: normalizedExecute,
819
+ fetchImpl,
820
+ });
821
+
822
+ const nextState = normalizeState(
823
+ {
824
+ ...previousState,
825
+ generatedAt: normalizedNow,
826
+ activeAlerts: transitions.activeAlerts,
827
+ runCount: previousState.runCount + 1,
828
+ lastRunId: buildRunId(normalizedNow, previousState.runCount + 1),
829
+ lastRunAt: normalizedNow,
830
+ },
831
+ normalizedNow
832
+ );
833
+
834
+ await fsp.mkdir(storage.watchdogRunsDir, { recursive: true });
835
+ const runId = nextState.lastRunId;
836
+ const runPath = path.join(storage.watchdogRunsDir, `${runId}.json`);
837
+ const runPayload = {
838
+ schemaVersion: WATCHDOG_SCHEMA_VERSION,
839
+ generatedAt: normalizedNow,
840
+ runId,
841
+ config: {
842
+ noToolCallSeconds: normalizedNoToolCallSeconds,
843
+ repeatedFileReadsThreshold: normalizedRepeatedFileReads,
844
+ budgetWarningThreshold: normalizedBudgetWarningThreshold,
845
+ turnStallTurns: normalizedTurnStallTurns,
846
+ execute: normalizedExecute,
847
+ channelCount: config.channels.length,
848
+ events: config.events,
849
+ frequency: config.frequency,
850
+ },
851
+ summary: {
852
+ assignmentCount: activeAssignments.length,
853
+ detectionCount: detections.length,
854
+ activeAlertCount: Object.keys(transitions.activeAlerts).length,
855
+ activatedCount: transitions.activated.length,
856
+ recoveredCount: transitions.recovered.length,
857
+ notificationCount: notifications.length,
858
+ sentNotificationCount: notifications.filter((item) => item.sent).length,
859
+ failedNotificationCount: notifications.filter((item) => !item.sent && !item.dryRun).length,
860
+ },
861
+ detections,
862
+ activatedAlerts: transitions.activated,
863
+ recoveredAlerts: transitions.recovered,
864
+ notifications,
865
+ };
866
+
867
+ await Promise.all([
868
+ writeJsonFile(runPath, runPayload),
869
+ writeJsonFile(storage.watchdogStatePath, nextState),
870
+ appendEvent(storage.watchdogEventsPath, {
871
+ timestamp: normalizedNow,
872
+ eventType: "watchdog_tick",
873
+ runId,
874
+ detectionCount: detections.length,
875
+ activatedCount: transitions.activated.length,
876
+ recoveredCount: transitions.recovered.length,
877
+ notificationCount: notifications.length,
878
+ sentNotificationCount: notifications.filter((item) => item.sent).length,
879
+ failedNotificationCount: notifications.filter((item) => !item.sent && !item.dryRun).length,
880
+ }),
881
+ ]);
882
+
883
+ return {
884
+ ...storage,
885
+ configPath: config.configPath,
886
+ configExists: config.exists,
887
+ runId,
888
+ runPath,
889
+ statePath: storage.watchdogStatePath,
890
+ eventsPath: storage.watchdogEventsPath,
891
+ state: nextState,
892
+ detections,
893
+ activatedAlerts: transitions.activated,
894
+ recoveredAlerts: transitions.recovered,
895
+ notifications,
896
+ summary: runPayload.summary,
897
+ };
898
+ }
899
+
900
+ export async function getWatchdogStatus({
901
+ targetPath = ".",
902
+ outputDir = "",
903
+ limit = 10,
904
+ env = process.env,
905
+ homeDir,
906
+ nowIso = new Date().toISOString(),
907
+ } = {}) {
908
+ const normalizedNow = normalizeIsoTimestamp(nowIso, new Date().toISOString());
909
+ const normalizedLimit = normalizePositiveInteger(limit, 10);
910
+ const storage = await resolveWatchdogStorage({
911
+ targetPath,
912
+ outputDir,
913
+ env,
914
+ homeDir,
915
+ });
916
+ const config = await loadWatchdogConfig({
917
+ targetPath,
918
+ env,
919
+ });
920
+ const state = await readJsonFile(storage.watchdogStatePath, () =>
921
+ createInitialState(normalizedNow)
922
+ ).then((payload) => normalizeState(payload, normalizedNow));
923
+
924
+ let runEntries = [];
925
+ try {
926
+ runEntries = await fsp.readdir(storage.watchdogRunsDir, { withFileTypes: true });
927
+ } catch (error) {
928
+ if (!(error && typeof error === "object" && error.code === "ENOENT")) {
929
+ throw error;
930
+ }
931
+ }
932
+
933
+ const runFiles = runEntries
934
+ .filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
935
+ .map((entry) => entry.name)
936
+ .sort((left, right) => right.localeCompare(left))
937
+ .slice(0, normalizedLimit);
938
+
939
+ const recentRuns = [];
940
+ for (const runFile of runFiles) {
941
+ const runPath = path.join(storage.watchdogRunsDir, runFile);
942
+ try {
943
+ const parsed = JSON.parse(await fsp.readFile(runPath, "utf-8"));
944
+ recentRuns.push({
945
+ runId: normalizeString(parsed.runId),
946
+ generatedAt: normalizeIsoTimestamp(parsed.generatedAt, normalizedNow),
947
+ detectionCount: normalizeNonNegativeNumber(parsed.summary?.detectionCount, 0),
948
+ activatedCount: normalizeNonNegativeNumber(parsed.summary?.activatedCount, 0),
949
+ recoveredCount: normalizeNonNegativeNumber(parsed.summary?.recoveredCount, 0),
950
+ notificationCount: normalizeNonNegativeNumber(parsed.summary?.notificationCount, 0),
951
+ runPath,
952
+ });
953
+ } catch {
954
+ // Ignore malformed run artifacts.
955
+ }
956
+ }
957
+
958
+ return {
959
+ ...storage,
960
+ configPath: config.configPath,
961
+ configExists: config.exists,
962
+ config,
963
+ statePath: storage.watchdogStatePath,
964
+ eventsPath: storage.watchdogEventsPath,
965
+ state,
966
+ activeAlerts: Object.values(state.activeAlerts),
967
+ activeAlertCount: Object.keys(state.activeAlerts).length,
968
+ runCount: state.runCount,
969
+ recentRuns,
970
+ };
971
+ }