aiden-runtime 4.1.5 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/README.md +250 -847
  2. package/dist/api/server.js +32 -5
  3. package/dist/cli/v4/aidenCLI.js +351 -53
  4. package/dist/cli/v4/callbacks.js +170 -0
  5. package/dist/cli/v4/chatSession.js +138 -3
  6. package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
  7. package/dist/cli/v4/commands/browserDepth.js +45 -0
  8. package/dist/cli/v4/commands/cron.js +264 -0
  9. package/dist/cli/v4/commands/daemon.js +541 -0
  10. package/dist/cli/v4/commands/daemonStatus.js +253 -0
  11. package/dist/cli/v4/commands/help.js +7 -0
  12. package/dist/cli/v4/commands/index.js +20 -1
  13. package/dist/cli/v4/commands/runs.js +203 -0
  14. package/dist/cli/v4/commands/sandbox.js +48 -0
  15. package/dist/cli/v4/commands/suggestions.js +68 -0
  16. package/dist/cli/v4/commands/tce.js +41 -0
  17. package/dist/cli/v4/commands/trigger.js +378 -0
  18. package/dist/cli/v4/commands/update.js +95 -3
  19. package/dist/cli/v4/daemonAgentBuilder.js +142 -0
  20. package/dist/cli/v4/defaultSoul.js +1 -1
  21. package/dist/cli/v4/display/capabilityCard.js +26 -0
  22. package/dist/cli/v4/display.js +18 -8
  23. package/dist/cli/v4/replyRenderer.js +31 -23
  24. package/dist/cli/v4/updateBootPrompt.js +170 -0
  25. package/dist/core/playwrightBridge.js +129 -0
  26. package/dist/core/v4/aidenAgent.js +308 -4
  27. package/dist/core/v4/browserState.js +436 -0
  28. package/dist/core/v4/checkpoint.js +79 -0
  29. package/dist/core/v4/daemon/bootstrap.js +604 -0
  30. package/dist/core/v4/daemon/cleanShutdown.js +154 -0
  31. package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
  32. package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
  33. package/dist/core/v4/daemon/cron/migration.js +199 -0
  34. package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
  35. package/dist/core/v4/daemon/daemonConfig.js +90 -0
  36. package/dist/core/v4/daemon/db/connection.js +106 -0
  37. package/dist/core/v4/daemon/db/migrations.js +296 -0
  38. package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
  39. package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
  40. package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
  41. package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
  42. package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
  43. package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
  44. package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
  45. package/dist/core/v4/daemon/dispatcher/index.js +53 -0
  46. package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
  47. package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
  48. package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
  49. package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
  50. package/dist/core/v4/daemon/drain.js +156 -0
  51. package/dist/core/v4/daemon/eventLoopLag.js +73 -0
  52. package/dist/core/v4/daemon/health.js +159 -0
  53. package/dist/core/v4/daemon/idempotencyStore.js +204 -0
  54. package/dist/core/v4/daemon/index.js +179 -0
  55. package/dist/core/v4/daemon/instanceTracker.js +99 -0
  56. package/dist/core/v4/daemon/resourceRegistry.js +150 -0
  57. package/dist/core/v4/daemon/restartCode.js +32 -0
  58. package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
  59. package/dist/core/v4/daemon/runStore.js +114 -0
  60. package/dist/core/v4/daemon/runtimeLock.js +167 -0
  61. package/dist/core/v4/daemon/signals.js +50 -0
  62. package/dist/core/v4/daemon/supervisor.js +272 -0
  63. package/dist/core/v4/daemon/triggerBus.js +279 -0
  64. package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
  65. package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
  66. package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
  67. package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
  68. package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
  69. package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
  70. package/dist/core/v4/daemon/triggers/email/index.js +332 -0
  71. package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
  72. package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
  73. package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
  74. package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
  75. package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
  76. package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
  77. package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
  78. package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
  79. package/dist/core/v4/daemon/triggers/webhook.js +376 -0
  80. package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
  81. package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
  82. package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
  83. package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
  84. package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
  85. package/dist/core/v4/daemon/types.js +15 -0
  86. package/dist/core/v4/dockerSession.js +461 -0
  87. package/dist/core/v4/dryRun.js +117 -0
  88. package/dist/core/v4/failureClassifier.js +779 -0
  89. package/dist/core/v4/recoveryReport.js +449 -0
  90. package/dist/core/v4/runtimeToggles.js +187 -0
  91. package/dist/core/v4/sandboxConfig.js +285 -0
  92. package/dist/core/v4/sandboxFs.js +316 -0
  93. package/dist/core/v4/suggestionCatalog.js +41 -0
  94. package/dist/core/v4/suggestionEngine.js +210 -0
  95. package/dist/core/v4/toolRegistry.js +18 -0
  96. package/dist/core/v4/turnState.js +587 -0
  97. package/dist/core/v4/update/checkUpdate.js +63 -3
  98. package/dist/core/v4/update/installMethodDetect.js +115 -0
  99. package/dist/core/v4/update/registryClient.js +121 -0
  100. package/dist/core/v4/update/skipState.js +75 -0
  101. package/dist/core/v4/verifier.js +448 -0
  102. package/dist/core/version.js +1 -1
  103. package/dist/tools/v4/browser/_observer.js +224 -0
  104. package/dist/tools/v4/browser/browserBlocker.js +396 -0
  105. package/dist/tools/v4/browser/browserClick.js +18 -1
  106. package/dist/tools/v4/browser/browserClose.js +18 -1
  107. package/dist/tools/v4/browser/browserExtract.js +5 -1
  108. package/dist/tools/v4/browser/browserFill.js +17 -1
  109. package/dist/tools/v4/browser/browserGetUrl.js +5 -1
  110. package/dist/tools/v4/browser/browserNavigate.js +16 -1
  111. package/dist/tools/v4/browser/browserScreenshot.js +5 -1
  112. package/dist/tools/v4/browser/browserScroll.js +18 -1
  113. package/dist/tools/v4/browser/browserType.js +17 -1
  114. package/dist/tools/v4/browser/captchaCheck.js +5 -1
  115. package/dist/tools/v4/executeCode.js +1 -0
  116. package/dist/tools/v4/files/fileCopy.js +56 -2
  117. package/dist/tools/v4/files/fileDelete.js +38 -1
  118. package/dist/tools/v4/files/fileList.js +12 -1
  119. package/dist/tools/v4/files/fileMove.js +59 -2
  120. package/dist/tools/v4/files/filePatch.js +43 -1
  121. package/dist/tools/v4/files/fileRead.js +12 -1
  122. package/dist/tools/v4/files/fileWrite.js +41 -1
  123. package/dist/tools/v4/index.js +71 -58
  124. package/dist/tools/v4/memory/memoryAdd.js +14 -0
  125. package/dist/tools/v4/memory/memoryRemove.js +14 -0
  126. package/dist/tools/v4/memory/memoryReplace.js +15 -0
  127. package/dist/tools/v4/memory/sessionSummary.js +12 -0
  128. package/dist/tools/v4/process/processKill.js +19 -0
  129. package/dist/tools/v4/process/processList.js +1 -0
  130. package/dist/tools/v4/process/processLogRead.js +1 -0
  131. package/dist/tools/v4/process/processSpawn.js +13 -0
  132. package/dist/tools/v4/process/processWait.js +1 -0
  133. package/dist/tools/v4/sessions/recallSession.js +1 -0
  134. package/dist/tools/v4/sessions/sessionList.js +1 -0
  135. package/dist/tools/v4/sessions/sessionSearch.js +1 -0
  136. package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
  137. package/dist/tools/v4/skills/skillManage.js +13 -0
  138. package/dist/tools/v4/skills/skillView.js +1 -0
  139. package/dist/tools/v4/skills/skillsList.js +1 -0
  140. package/dist/tools/v4/subagent/subagentFanout.js +1 -0
  141. package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
  142. package/dist/tools/v4/system/appClose.js +13 -0
  143. package/dist/tools/v4/system/appInput.js +13 -0
  144. package/dist/tools/v4/system/appLaunch.js +13 -0
  145. package/dist/tools/v4/system/clipboardRead.js +1 -0
  146. package/dist/tools/v4/system/clipboardWrite.js +14 -0
  147. package/dist/tools/v4/system/mediaKey.js +12 -0
  148. package/dist/tools/v4/system/mediaSessions.js +1 -0
  149. package/dist/tools/v4/system/mediaTransport.js +13 -0
  150. package/dist/tools/v4/system/naturalEvents.js +1 -0
  151. package/dist/tools/v4/system/nowPlaying.js +1 -0
  152. package/dist/tools/v4/system/osProcessList.js +1 -0
  153. package/dist/tools/v4/system/screenshot.js +1 -0
  154. package/dist/tools/v4/system/systemInfo.js +1 -0
  155. package/dist/tools/v4/system/volumeSet.js +17 -0
  156. package/dist/tools/v4/terminal/shellExec.js +81 -9
  157. package/dist/tools/v4/web/deepResearch.js +1 -0
  158. package/dist/tools/v4/web/openUrl.js +1 -0
  159. package/dist/tools/v4/web/webFetch.js +1 -0
  160. package/dist/tools/v4/web/webPage.js +1 -0
  161. package/dist/tools/v4/web/webSearch.js +1 -0
  162. package/dist/tools/v4/web/youtubeSearch.js +1 -0
  163. package/package.json +7 -1
@@ -0,0 +1,779 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * core/v4/failureClassifier.ts — v4.2 Phase 2: Tool-failure classifier.
10
+ *
11
+ * When Phase 1's verifier classifies a tool result as `!ok` (`failed`,
12
+ * `low_signal`, or `no_progress`), this layer enriches the failure
13
+ * with a structured WHY category. Categories drive Phase 3+'s recovery
14
+ * strategies (retry / surface / install / etc.) — Phase 2 only RECORDS
15
+ * the classification on the trace and TurnState diagnostics; no
16
+ * recovery action fires here.
17
+ *
18
+ * Ten categories, matching the v4.2 spec:
19
+ *
20
+ * timeout — connection/read deadline exceeded
21
+ * auth — 401/403, invalid API key, unauthorized
22
+ * hallucination — model invented a nonexistent entity (narrow
23
+ * Phase 2 scope: file-not-found + verbatim
24
+ * path match in args, confidence 0.6)
25
+ * network — connection refused, DNS, unreachable
26
+ * permission — local ACL ("Access denied"), refusing-to-act
27
+ * rate_limit — 429, throttled, "try again in N"
28
+ * invalid_input — missing required args, "No path provided",
29
+ * "is required", "must be non-empty"
30
+ * dependency_missing — binary not in PATH, "command not found",
31
+ * "not configured", process registry missing
32
+ * not_found — file not found, ENOENT (read tools), no
33
+ * such directory
34
+ * other — catch-all (renamed from "unknown" to match
35
+ * the v4.2 spec exactly)
36
+ *
37
+ * Priority-ordered pipeline (mirrors a layered failure-pattern approach
38
+ * used by a reference system, adapted for Aiden's tool-output domain):
39
+ *
40
+ * 1. Per-tool override (registered by toolName) — runs first; can
41
+ * short-circuit when a tool has a high-signal failure shape.
42
+ * 2. Outer envelope + verifier reason inspection — substring scan
43
+ * against priority-ordered pattern tables.
44
+ * 3. Hallucination heuristic (narrow): file-tool not_found AND args
45
+ * contain the path verbatim → escalate not_found to hallucination.
46
+ * 4. Fallback: `other` at confidence 0.3.
47
+ *
48
+ * Skips entirely when `verification.ok === true` — saves cycles on
49
+ * successful calls.
50
+ *
51
+ * Gated by the same TCE flag as Phase 1 verifier + TurnState (default
52
+ * ON as of v4.2 Phase 6; opt-out via `AIDEN_TCE=0`). When disabled,
53
+ * the classifier is never invoked from the agent loop.
54
+ */
55
+ Object.defineProperty(exports, "__esModule", { value: true });
56
+ exports.triggerDispatcherClassifier = exports.DAEMON_DISPATCHER_TOOL_NAME = exports.fileReadClassifierWithSandbox = exports.shellExecClassifierWithSandbox = exports.sandboxViolationClassifier = exports.browserNavigateClassifier = exports.browserInteractiveClassifier = exports.fileReadClassifier = exports.webFetchClassifier = exports.webSearchClassifier = exports.shellExecClassifier = exports.defaultClassifier = exports.FailureClassifier = void 0;
57
+ exports.buildDefaultClassifier = buildDefaultClassifier;
58
+ /**
59
+ * Per-tool override registry + fallback resolver. Symmetric with
60
+ * Phase 1's VerifierRegistry.
61
+ */
62
+ class FailureClassifier {
63
+ constructor(fallback = exports.defaultClassifier) {
64
+ this.overrides = new Map();
65
+ this.fallback = fallback;
66
+ }
67
+ register(toolName, fn) {
68
+ this.overrides.set(toolName, fn);
69
+ }
70
+ resolve(toolName) {
71
+ return this.overrides.get(toolName) ?? this.fallback;
72
+ }
73
+ hasOverride(toolName) {
74
+ return this.overrides.has(toolName);
75
+ }
76
+ /**
77
+ * Entry point used by the agent loop. Returns null for verifier-ok
78
+ * results — zero overhead in the happy path.
79
+ */
80
+ classify(verification, toolName, args, result) {
81
+ if (verification.ok)
82
+ return null;
83
+ return this.resolve(toolName)(verification, toolName, args, result);
84
+ }
85
+ }
86
+ exports.FailureClassifier = FailureClassifier;
87
+ // ── Pattern tables (priority-ordered) ──────────────────────────────────────
88
+ /** Timeout signals — checked first because they're high-confidence. */
89
+ const TIMEOUT_PATTERNS = [
90
+ 'timeout', 'timed out', 'etimedout',
91
+ 'deadline exceeded', 'deadline_exceeded',
92
+ 'read timed out', 'connect timeout', 'connection timeout',
93
+ ];
94
+ /** Rate-limit signals — distinct from auth/billing for now. */
95
+ const RATE_LIMIT_PATTERNS = [
96
+ 'rate limit', 'rate_limit', 'rate-limit',
97
+ 'too many requests', '429', 'throttled', 'throttling',
98
+ 'try again in', 'please retry after',
99
+ 'requests per minute', 'tokens per minute',
100
+ 'quota exceeded',
101
+ ];
102
+ /** Auth signals — provider credential failures. */
103
+ const AUTH_PATTERNS = [
104
+ '401', '403',
105
+ 'unauthorized', 'unauthorised',
106
+ 'invalid api key', 'invalid_api_key',
107
+ 'authentication failed', 'authentication required',
108
+ 'invalid token', 'token expired', 'token revoked',
109
+ 'forbidden',
110
+ ];
111
+ /** Network signals — pre-HTTP failures. */
112
+ const NETWORK_PATTERNS = [
113
+ 'econnrefused', 'enetunreach', 'enotfound', 'eai_again',
114
+ 'dns lookup', 'getaddrinfo',
115
+ 'connection refused', 'network unreachable',
116
+ 'host not found', 'no such host',
117
+ ];
118
+ /** Permission signals — local ACL + refusing-to-act. */
119
+ const PERMISSION_PATTERNS = [
120
+ 'eacces', 'eperm',
121
+ 'access denied', 'permission denied',
122
+ 'refusing to', 'protected path',
123
+ 'forbidden path', 'restricted path',
124
+ ];
125
+ /** Invalid-input signals — missing/malformed args. */
126
+ const INVALID_INPUT_PATTERNS = [
127
+ 'no path provided', 'no query provided', 'no command provided',
128
+ 'no url provided', 'no id provided', 'no topic provided',
129
+ 'is required', 'are required',
130
+ 'must be a string', 'must be non-empty',
131
+ 'invalid argument', 'malformed',
132
+ 'both from and to required', 'empty find string',
133
+ ];
134
+ /** Dependency-missing signals — missing binary / unset env / unconfigured. */
135
+ const DEPENDENCY_MISSING_PATTERNS = [
136
+ 'command not found',
137
+ 'not in path', 'not on path',
138
+ 'is not recognized as', // Windows shell wording
139
+ 'no such command',
140
+ 'not configured',
141
+ 'is not configured',
142
+ 'registry not configured',
143
+ 'paths not wired',
144
+ 'needs aiden',
145
+ ];
146
+ /** Not-found signals (file/resource, distinct from dep-missing). */
147
+ const NOT_FOUND_PATTERNS = [
148
+ 'enoent',
149
+ 'no such file',
150
+ 'no such directory',
151
+ 'file not found',
152
+ 'does not exist',
153
+ 'not found', // general — checked after dep-missing so it doesn't shadow
154
+ ];
155
+ // ── Helpers ────────────────────────────────────────────────────────────────
156
+ /**
157
+ * Build a lowercased haystack from all error sources the classifier
158
+ * can inspect. Mirrors the reference's multi-source extraction so
159
+ * patterns embedded in different envelope shapes still match.
160
+ */
161
+ function buildHaystack(verification, result) {
162
+ const parts = [];
163
+ if (verification.reason)
164
+ parts.push(verification.reason);
165
+ if (result.error)
166
+ parts.push(result.error);
167
+ const inner = result.result;
168
+ if (typeof inner === 'string') {
169
+ parts.push(inner.slice(0, 500));
170
+ }
171
+ else if (inner !== null && typeof inner === 'object') {
172
+ const obj = inner;
173
+ if (typeof obj.error === 'string')
174
+ parts.push(obj.error);
175
+ if (typeof obj.stderr === 'string')
176
+ parts.push(obj.stderr.slice(0, 500));
177
+ if (typeof obj.message === 'string')
178
+ parts.push(obj.message);
179
+ }
180
+ return parts.join(' ').toLowerCase();
181
+ }
182
+ /** First pattern in `list` that's contained in `haystack`, else undefined. */
183
+ function matchAny(haystack, list) {
184
+ for (const p of list) {
185
+ if (haystack.includes(p))
186
+ return p;
187
+ }
188
+ return undefined;
189
+ }
190
+ // ── Default classifier ─────────────────────────────────────────────────────
191
+ /**
192
+ * Heuristic default. Priority order:
193
+ * 1. timeout
194
+ * 2. rate_limit
195
+ * 3. auth
196
+ * 4. network
197
+ * 5. permission
198
+ * 6. invalid_input
199
+ * 7. dependency_missing (BEFORE not_found — "command not found" → dep, not not_found)
200
+ * 8. not_found
201
+ * 9. hallucination (narrow: only when not_found matched AND args contain path verbatim)
202
+ * 10. other (fallback)
203
+ */
204
+ const defaultClassifier = (verification, toolName, args, result) => {
205
+ const hay = buildHaystack(verification, result);
206
+ // 1. timeout — high signal, often standalone
207
+ const tMatch = matchAny(hay, TIMEOUT_PATTERNS);
208
+ if (tMatch) {
209
+ return {
210
+ category: 'timeout',
211
+ confidence: 0.9,
212
+ reason: 'tool call exceeded its deadline',
213
+ recoverable: true,
214
+ recoveryHint: { action: 'retry_with_backoff' },
215
+ matchedPattern: tMatch,
216
+ };
217
+ }
218
+ // 2. rate_limit
219
+ const rMatch = matchAny(hay, RATE_LIMIT_PATTERNS);
220
+ if (rMatch) {
221
+ return {
222
+ category: 'rate_limit',
223
+ confidence: 0.9,
224
+ reason: 'rate-limited by upstream',
225
+ recoverable: true,
226
+ recoveryHint: { action: 'retry_with_backoff' },
227
+ matchedPattern: rMatch,
228
+ };
229
+ }
230
+ // 3. auth
231
+ const aMatch = matchAny(hay, AUTH_PATTERNS);
232
+ if (aMatch) {
233
+ return {
234
+ category: 'auth',
235
+ confidence: 0.9,
236
+ reason: 'authentication failed or credential missing',
237
+ recoverable: false,
238
+ recoveryHint: { action: 'request_user_action', detail: 'check credentials' },
239
+ matchedPattern: aMatch,
240
+ };
241
+ }
242
+ // 4. network
243
+ const nMatch = matchAny(hay, NETWORK_PATTERNS);
244
+ if (nMatch) {
245
+ return {
246
+ category: 'network',
247
+ confidence: 0.85,
248
+ reason: 'network unreachable or DNS failure',
249
+ recoverable: true,
250
+ recoveryHint: { action: 'retry_with_backoff' },
251
+ matchedPattern: nMatch,
252
+ };
253
+ }
254
+ // 5. permission
255
+ const pMatch = matchAny(hay, PERMISSION_PATTERNS);
256
+ if (pMatch) {
257
+ return {
258
+ category: 'permission',
259
+ confidence: 0.9,
260
+ reason: 'permission denied or refused by safety policy',
261
+ recoverable: false,
262
+ recoveryHint: { action: 'surface_to_user' },
263
+ matchedPattern: pMatch,
264
+ };
265
+ }
266
+ // 6. invalid_input
267
+ const iMatch = matchAny(hay, INVALID_INPUT_PATTERNS);
268
+ if (iMatch) {
269
+ return {
270
+ category: 'invalid_input',
271
+ confidence: 0.8,
272
+ reason: 'tool call arguments missing or malformed',
273
+ recoverable: true,
274
+ recoveryHint: { action: 'retry', detail: 'fix the arguments and try again' },
275
+ matchedPattern: iMatch,
276
+ };
277
+ }
278
+ // 7. dependency_missing (BEFORE not_found — "command not found" is more
279
+ // specific than a generic "not found")
280
+ const dMatch = matchAny(hay, DEPENDENCY_MISSING_PATTERNS);
281
+ if (dMatch) {
282
+ return {
283
+ category: 'dependency_missing',
284
+ confidence: 0.85,
285
+ reason: 'required binary or runtime resource is missing',
286
+ recoverable: false,
287
+ recoveryHint: { action: 'install_dependency' },
288
+ matchedPattern: dMatch,
289
+ };
290
+ }
291
+ // 8. not_found (and 9. hallucination promotion)
292
+ const nfMatch = matchAny(hay, NOT_FOUND_PATTERNS);
293
+ if (nfMatch) {
294
+ // 9. Hallucination heuristic — narrow per Q-C2(a): only file-read /
295
+ // file-list family AND the not-found path appears verbatim in
296
+ // args (model invented a path that doesn't exist).
297
+ if (isFileReadFamily(toolName) && argsContainPathVerbatim(args, hay)) {
298
+ return {
299
+ category: 'hallucination',
300
+ confidence: 0.6,
301
+ reason: 'tool called with a path that does not exist on disk',
302
+ recoverable: true,
303
+ recoveryHint: {
304
+ action: 'retry',
305
+ detail: 'the path the model used does not exist — re-check before retrying',
306
+ },
307
+ matchedPattern: nfMatch,
308
+ };
309
+ }
310
+ return {
311
+ category: 'not_found',
312
+ confidence: 0.85,
313
+ reason: 'target resource was not found',
314
+ recoverable: true,
315
+ recoveryHint: { action: 'retry', detail: 'check the path/name and try again' },
316
+ matchedPattern: nfMatch,
317
+ };
318
+ }
319
+ // 10. fallback
320
+ return {
321
+ category: 'other',
322
+ confidence: 0.3,
323
+ reason: 'unclassified failure',
324
+ recoverable: true,
325
+ recoveryHint: { action: 'retry_with_backoff' },
326
+ };
327
+ };
328
+ exports.defaultClassifier = defaultClassifier;
329
+ // ── Hallucination heuristic helpers ─────────────────────────────────────────
330
+ /** File-read family — tools where a missing path strongly suggests hallucination. */
331
+ function isFileReadFamily(toolName) {
332
+ return (toolName === 'file_read' ||
333
+ toolName === 'file_list' ||
334
+ toolName === 'file_patch');
335
+ }
336
+ /**
337
+ * Heuristic: does `args` contain a non-trivial path value that's
338
+ * mentioned in the haystack (i.e. the failed-path string the model
339
+ * just used)? Length filter avoids matching common short tokens.
340
+ */
341
+ function argsContainPathVerbatim(args, hay) {
342
+ if (args === null || typeof args !== 'object')
343
+ return false;
344
+ const obj = args;
345
+ const candidates = [];
346
+ for (const k of ['path', 'file', 'from', 'to', 'target']) {
347
+ const v = obj[k];
348
+ if (typeof v === 'string' && v.length >= 4)
349
+ candidates.push(v.toLowerCase());
350
+ }
351
+ for (const c of candidates) {
352
+ if (hay.includes(c))
353
+ return true;
354
+ }
355
+ return false;
356
+ }
357
+ // ── Per-tool classifiers ───────────────────────────────────────────────────
358
+ /**
359
+ * `shell_exec` — inspect exitCode + stderr for canonical UNIX
360
+ * convention codes:
361
+ * - 124 = timeout (GNU coreutils `timeout` command)
362
+ * - 126 = permission (executable but cannot be invoked)
363
+ * - 127 = dependency_missing (command not found)
364
+ * - 130 = SIGINT (treat as recoverable other)
365
+ */
366
+ const shellExecClassifier = (verification, toolName, args, result) => {
367
+ const inner = result.result;
368
+ const exitCode = (inner && typeof inner.exitCode === 'number') ? inner.exitCode : undefined;
369
+ const stderr = (inner && typeof inner.stderr === 'string') ? inner.stderr : '';
370
+ // Canonical UNIX exit codes — high confidence when the code is set.
371
+ if (exitCode === 124) {
372
+ return {
373
+ category: 'timeout', confidence: 0.95,
374
+ reason: 'shell command timed out (exit 124)',
375
+ recoverable: true,
376
+ recoveryHint: { action: 'retry_with_backoff', detail: 'consider raising timeoutMs' },
377
+ matchedPattern: 'exit 124',
378
+ };
379
+ }
380
+ if (exitCode === 126) {
381
+ return {
382
+ category: 'permission', confidence: 0.95,
383
+ reason: 'shell command cannot be invoked (exit 126)',
384
+ recoverable: false,
385
+ recoveryHint: { action: 'surface_to_user' },
386
+ matchedPattern: 'exit 126',
387
+ };
388
+ }
389
+ if (exitCode === 127) {
390
+ return {
391
+ category: 'dependency_missing', confidence: 0.95,
392
+ reason: 'shell command not found (exit 127)',
393
+ recoverable: false,
394
+ recoveryHint: { action: 'install_dependency' },
395
+ matchedPattern: 'exit 127',
396
+ };
397
+ }
398
+ // Stderr-aware fallback — same priority pipeline as default but
399
+ // weighted toward stderr substrings rather than verification.reason.
400
+ return (0, exports.defaultClassifier)(verification, toolName, args, result);
401
+ };
402
+ exports.shellExecClassifier = shellExecClassifier;
403
+ /**
404
+ * `web_search` — same default pipeline but with stronger network
405
+ * priority since search failures cascade from upstream HTTP.
406
+ */
407
+ const webSearchClassifier = (verification, toolName, args, result) => {
408
+ return (0, exports.defaultClassifier)(verification, toolName, args, result);
409
+ };
410
+ exports.webSearchClassifier = webSearchClassifier;
411
+ /** `web_fetch` (+ aliases) — same defaults; behaviour symmetric with web_search. */
412
+ const webFetchClassifier = (verification, toolName, args, result) => {
413
+ return (0, exports.defaultClassifier)(verification, toolName, args, result);
414
+ };
415
+ exports.webFetchClassifier = webFetchClassifier;
416
+ /**
417
+ * `file_read` — verifier already filtered the easy cases; this
418
+ * override exists to RUN the hallucination heuristic on read-family
419
+ * failures with stronger weighting. Default classifier already
420
+ * implements that path; this wrapper exists so callers can intercept
421
+ * file_read classification specifically (e.g. plugin extensions).
422
+ */
423
+ const fileReadClassifier = (verification, toolName, args, result) => {
424
+ return (0, exports.defaultClassifier)(verification, toolName, args, result);
425
+ };
426
+ exports.fileReadClassifier = fileReadClassifier;
427
+ /** Extract the v4.3 sidecar from a tool result, defensively. */
428
+ function readBrowserStateSidecar(result) {
429
+ if (!result.result || typeof result.result !== 'object')
430
+ return null;
431
+ const r = result.result;
432
+ return r.browserState ?? null;
433
+ }
434
+ /**
435
+ * Classifier for the 3 interactive browser tools (browser_click,
436
+ * browser_type, browser_fill). Priority:
437
+ *
438
+ * 1. blocker present → manual_blocker (conf 0.95)
439
+ * 2. staleRefRetry failed → stale_ref (conf 0.9)
440
+ * 3. needs_verifier + low progress → stale_ref (conf 0.75)
441
+ * 4. fall through to defaultClassifier for generic patterns
442
+ *
443
+ * `manual_blocker` beats `stale_ref` because no retry can fix a
444
+ * login wall — the user has to act. Phase 6+ will surface this via
445
+ * the recovery card already wired by Phase 3.
446
+ */
447
+ const browserInteractiveClassifier = (verification, toolName, args, result) => {
448
+ const bs = readBrowserStateSidecar(result);
449
+ // Priority 1 — manual blocker.
450
+ if (bs?.blocker) {
451
+ return {
452
+ category: 'manual_blocker',
453
+ confidence: 0.95,
454
+ reason: `${bs.blocker.kind}${bs.blocker.subtype ? ` (${bs.blocker.subtype})` : ''} at ${bs.blocker.url}`,
455
+ recoverable: false,
456
+ recoveryHint: { action: 'request_user_action', detail: bs.blocker.message },
457
+ matchedPattern: `browserState.blocker.${bs.blocker.kind}`,
458
+ };
459
+ }
460
+ // Priority 2 — Phase 2 already retried and the retry failed.
461
+ if (bs?.staleRefRetry?.attempted && !bs.staleRefRetry.succeeded) {
462
+ return {
463
+ category: 'stale_ref',
464
+ confidence: 0.9,
465
+ reason: `stale ref after auto-retry: ${bs.staleRefRetry.reason}`,
466
+ recoverable: true,
467
+ recoveryHint: {
468
+ action: 'retry',
469
+ detail: 'wait for page to settle, then re-select the element',
470
+ },
471
+ matchedPattern: 'browserState.staleRefRetry.failed',
472
+ };
473
+ }
474
+ // Priority 3 — Phase 1 verifier flagged "no UI change despite success".
475
+ // Surface as stale_ref because the recovery shape is the same: the
476
+ // page didn't respond, model should re-read state before trying again.
477
+ if (bs && (bs.maybe_noop || (bs.needs_verifier && bs.progress_score < 0.3))) {
478
+ return {
479
+ category: 'stale_ref',
480
+ confidence: 0.75,
481
+ reason: `tool returned success but page did not change (progress_score=${bs.progress_score.toFixed(2)})`,
482
+ recoverable: true,
483
+ recoveryHint: {
484
+ action: 'retry',
485
+ detail: 'verify the page state then try a different approach',
486
+ },
487
+ matchedPattern: 'browserState.no_progress',
488
+ };
489
+ }
490
+ // Fall through to default for generic patterns.
491
+ return (0, exports.defaultClassifier)(verification, toolName, args, result);
492
+ };
493
+ exports.browserInteractiveClassifier = browserInteractiveClassifier;
494
+ /**
495
+ * Classifier for `browser_navigate`. Only checks for `blocker` —
496
+ * Phase 2's stale-ref retry doesn't fire on navigate (excluded from
497
+ * STALE_REF_RETRYABLE), so the stale_ref path is irrelevant here.
498
+ */
499
+ const browserNavigateClassifier = (verification, toolName, args, result) => {
500
+ const bs = readBrowserStateSidecar(result);
501
+ if (bs?.blocker) {
502
+ return {
503
+ category: 'manual_blocker',
504
+ confidence: 0.95,
505
+ reason: `${bs.blocker.kind}${bs.blocker.subtype ? ` (${bs.blocker.subtype})` : ''} at ${bs.blocker.url}`,
506
+ recoverable: false,
507
+ recoveryHint: { action: 'request_user_action', detail: bs.blocker.message },
508
+ matchedPattern: `browserState.blocker.${bs.blocker.kind}`,
509
+ };
510
+ }
511
+ return (0, exports.defaultClassifier)(verification, toolName, args, result);
512
+ };
513
+ exports.browserNavigateClassifier = browserNavigateClassifier;
514
+ /**
515
+ * Read the `sandbox_violation` envelope from a tool result. Returns
516
+ * null when absent or malformed.
517
+ *
518
+ * Phase 2 file tools attach this on `result.result.sandbox_violation`
519
+ * alongside `success: false`. Phase 3's shell-exec docker-start
520
+ * failure surfaces a different shape — handled separately below.
521
+ */
522
+ function readSandboxViolation(result) {
523
+ const inner = result.result;
524
+ if (!inner || typeof inner !== 'object' || Array.isArray(inner))
525
+ return null;
526
+ const env = inner.sandbox_violation;
527
+ if (!env || typeof env !== 'object' || Array.isArray(env))
528
+ return null;
529
+ const e = env;
530
+ if (e.category !== 'sandbox_violation')
531
+ return null;
532
+ if (typeof e.code !== 'string')
533
+ return null;
534
+ return {
535
+ code: e.code,
536
+ matched_policy: typeof e.matched_policy === 'string' ? e.matched_policy : '',
537
+ requested_path: typeof e.requested_path === 'string' ? e.requested_path : '',
538
+ resolved_path: typeof e.resolved_path === 'string' ? e.resolved_path : '',
539
+ retryable: false,
540
+ category: 'sandbox_violation',
541
+ };
542
+ }
543
+ /**
544
+ * Produce a concrete, user-actionable override suggestion from a
545
+ * violation envelope. Code-specific because the action differs: a
546
+ * write-outside-allowlist needs an `AIDEN_SANDBOX_ALLOW=...` extension,
547
+ * a denylist hit cannot be overridden, a symlink-escape needs the
548
+ * real path used directly.
549
+ */
550
+ function suggestOverride(env) {
551
+ switch (env.code) {
552
+ case 'fs.write_outside_allowlist': {
553
+ // The requested path is the agent-supplied string; the resolved
554
+ // path is the real path. Suggest the directory that contains it
555
+ // — most likely what the user wants to allowlist.
556
+ const target = env.resolved_path || env.requested_path;
557
+ const lastSep = target.lastIndexOf('/') >= 0
558
+ ? target.lastIndexOf('/')
559
+ : target.lastIndexOf('\\');
560
+ const dir = lastSep > 0 ? target.slice(0, lastSep) : target;
561
+ return dir
562
+ ? `Add to allowlist: AIDEN_SANDBOX_ALLOW=${dir}`
563
+ : 'Add the target directory to AIDEN_SANDBOX_ALLOW';
564
+ }
565
+ case 'fs.sensitive_path':
566
+ return `Sandbox refuses ${env.matched_policy || env.resolved_path} for safety. ` +
567
+ 'This path is on the denylist and cannot be allowlisted. ' +
568
+ 'Use a different path, or set AIDEN_SANDBOX=0 to disable the sandbox entirely (not recommended).';
569
+ case 'fs.symlink_escape':
570
+ return 'The path contains a symlink that resolves outside the sandbox. ' +
571
+ 'Use the real path directly, or extend AIDEN_SANDBOX_ALLOW to cover the symlink target.';
572
+ case 'fs.path_traversal':
573
+ return 'Path contains `..` segments that escape the working directory. Use an absolute path.';
574
+ case 'fs.read_denied':
575
+ return `Sandbox refuses read of ${env.matched_policy || env.resolved_path}.`;
576
+ default:
577
+ return env.matched_policy
578
+ ? `Sandbox blocked by policy: ${env.matched_policy}`
579
+ : 'Sandbox blocked this operation.';
580
+ }
581
+ }
582
+ /**
583
+ * Unified classifier for tools that go through the v4.4 sandbox
584
+ * preflight: 5 write-side file tools + 2 read-side file tools +
585
+ * shell_exec. Detects:
586
+ * 1. The Phase 2 `sandbox_violation` envelope (file tools, all
587
+ * five fs.* codes)
588
+ * 2. Phase 3 docker-start failure surfaced via shell_exec's
589
+ * "Sandbox: failed to start container" stderr — categorized as
590
+ * sandbox_violation with the install_dependency recovery hint
591
+ *
592
+ * Sandbox refusals are NEVER retryable (retryable:false) — the
593
+ * policy will reject the same input next call. The agent should
594
+ * surface the suggested env-var override to the user instead.
595
+ *
596
+ * Falls through to `defaultClassifier` (or a wrapped per-tool
597
+ * classifier) when no sandbox envelope is present — keeping the
598
+ * non-sandboxed path zero-cost.
599
+ */
600
+ const sandboxViolationClassifier = (verification, toolName, args, result) => {
601
+ const env = readSandboxViolation(result);
602
+ if (env) {
603
+ return {
604
+ category: 'sandbox_violation',
605
+ confidence: 0.95,
606
+ reason: `${env.code}${env.matched_policy ? ` matched ${env.matched_policy}` : ''}`,
607
+ recoverable: false,
608
+ recoveryHint: {
609
+ action: 'request_user_action',
610
+ detail: suggestOverride(env),
611
+ },
612
+ matchedPattern: env.code,
613
+ sandboxViolation: {
614
+ code: env.code,
615
+ matchedPolicy: env.matched_policy,
616
+ requestedPath: env.requested_path,
617
+ resolvedPath: env.resolved_path,
618
+ },
619
+ };
620
+ }
621
+ // Phase 3 docker-start failure path (shell_exec only).
622
+ if (toolName === 'shell_exec') {
623
+ const r = result.result;
624
+ if (r && typeof r === 'object' && !Array.isArray(r)) {
625
+ const stderr = r.stderr;
626
+ if (typeof stderr === 'string' && /Sandbox: failed to start container/.test(stderr)) {
627
+ return {
628
+ category: 'sandbox_violation',
629
+ confidence: 0.9,
630
+ reason: 'docker container failed to start',
631
+ recoverable: true,
632
+ recoveryHint: {
633
+ action: 'install_dependency',
634
+ detail: 'Start Docker and retry, or set AIDEN_SANDBOX=0 to disable the sandbox.',
635
+ },
636
+ matchedPattern: 'docker_unavailable',
637
+ };
638
+ }
639
+ }
640
+ }
641
+ return (0, exports.defaultClassifier)(verification, toolName, args, result);
642
+ };
643
+ exports.sandboxViolationClassifier = sandboxViolationClassifier;
644
+ /**
645
+ * Wraps `shellExecClassifier` so the sandbox envelope check fires
646
+ * BEFORE the existing dangerous-pattern / exit-code logic. Sandbox
647
+ * refusal beats every other shell-exec failure mode — the policy
648
+ * was the proximate cause and the actionable fix.
649
+ */
650
+ const shellExecClassifierWithSandbox = (verification, toolName, args, result) => {
651
+ const sb = (0, exports.sandboxViolationClassifier)(verification, toolName, args, result);
652
+ if (sb.category === 'sandbox_violation')
653
+ return sb;
654
+ return (0, exports.shellExecClassifier)(verification, toolName, args, result);
655
+ };
656
+ exports.shellExecClassifierWithSandbox = shellExecClassifierWithSandbox;
657
+ /**
658
+ * Wraps `fileReadClassifier` so denylist hits on read are categorized
659
+ * as sandbox_violation instead of the generic "not_found / permission"
660
+ * default. file_read is the only read-side tool with an existing
661
+ * override; file_list falls through to the unified classifier.
662
+ */
663
+ const fileReadClassifierWithSandbox = (verification, toolName, args, result) => {
664
+ const sb = (0, exports.sandboxViolationClassifier)(verification, toolName, args, result);
665
+ if (sb.category === 'sandbox_violation')
666
+ return sb;
667
+ return (0, exports.fileReadClassifier)(verification, toolName, args, result);
668
+ };
669
+ exports.fileReadClassifierWithSandbox = fileReadClassifierWithSandbox;
670
+ // ── v4.5 Phase 5a — trigger-dispatcher classifier ──────────────────────────
671
+ /**
672
+ * Synthetic "tool name" the daemon dispatcher uses when a turn
673
+ * failed for a daemon-trigger-specific reason (template missing
674
+ * vars, fire-rate cap exceeded, max retries exhausted). The
675
+ * dispatcher constructs a `ToolCallResult` envelope with this
676
+ * name + a reason string in `error`; the classifier inspects the
677
+ * substring tag to pick the right category.
678
+ *
679
+ * Tags are emitted by `core/v4/daemon/dispatcher/dispatcher.ts`
680
+ * and `triggerBus.markFailed` / `deadLetter` flows.
681
+ */
682
+ exports.DAEMON_DISPATCHER_TOOL_NAME = 'daemon:dispatcher';
683
+ const TRIGGER_MISCONFIGURED_TAG = 'trigger_misconfigured';
684
+ const TRIGGER_QUOTA_TAG = 'trigger_quota';
685
+ const TRIGGER_DEAD_LETTERED_TAG = 'trigger_dead_lettered';
686
+ /**
687
+ * Classifier for the synthetic `daemon:dispatcher` tool. Reads the
688
+ * envelope's `error` / verification reason for one of the three
689
+ * trigger-failure tags and returns the matching category. Falls
690
+ * through to `defaultClassifier` if no tag matched (defensive —
691
+ * the dispatcher always sets one).
692
+ *
693
+ * Recovery hints:
694
+ * - trigger_misconfigured → request_user_action (fix spec)
695
+ * - trigger_quota → request_user_action (raise cap or fix producer)
696
+ * - trigger_dead_lettered → request_user_action (inspect last_error, reset event)
697
+ */
698
+ const triggerDispatcherClassifier = (verification, toolName, args, result) => {
699
+ const hay = ((verification.reason ?? '') + ' ' +
700
+ (result.error ?? '') + ' ' +
701
+ (typeof result.result === 'string' ? result.result : '')).toLowerCase();
702
+ if (hay.includes(TRIGGER_MISCONFIGURED_TAG)) {
703
+ return {
704
+ category: 'trigger_misconfigured',
705
+ confidence: 0.95,
706
+ reason: 'trigger spec invalid or template variables missing',
707
+ recoverable: false,
708
+ recoveryHint: {
709
+ action: 'request_user_action',
710
+ detail: 'inspect the trigger spec and ensure all template variables are populated by the payload',
711
+ },
712
+ matchedPattern: TRIGGER_MISCONFIGURED_TAG,
713
+ };
714
+ }
715
+ if (hay.includes(TRIGGER_QUOTA_TAG)) {
716
+ return {
717
+ category: 'trigger_quota',
718
+ confidence: 0.95,
719
+ reason: 'per-trigger fire-rate cap exceeded',
720
+ recoverable: false,
721
+ recoveryHint: {
722
+ action: 'request_user_action',
723
+ detail: 'investigate the upstream producer or raise the trigger\'s fire-rate limit',
724
+ },
725
+ matchedPattern: TRIGGER_QUOTA_TAG,
726
+ };
727
+ }
728
+ if (hay.includes(TRIGGER_DEAD_LETTERED_TAG)) {
729
+ return {
730
+ category: 'trigger_dead_lettered',
731
+ confidence: 0.95,
732
+ reason: 'trigger event exhausted max retries and moved to dead letter',
733
+ recoverable: false,
734
+ recoveryHint: {
735
+ action: 'request_user_action',
736
+ detail: 'review the last_error on the dead-lettered trigger event and re-queue if appropriate',
737
+ },
738
+ matchedPattern: TRIGGER_DEAD_LETTERED_TAG,
739
+ };
740
+ }
741
+ return (0, exports.defaultClassifier)(verification, toolName, args, result);
742
+ };
743
+ exports.triggerDispatcherClassifier = triggerDispatcherClassifier;
744
+ // ── Factory ────────────────────────────────────────────────────────────────
745
+ function buildDefaultClassifier() {
746
+ const reg = new FailureClassifier();
747
+ // v4.4 Phase 5 — sandbox envelope check fires first for shell_exec
748
+ // + the file tools. Falls through to the existing per-tool
749
+ // classifier when no envelope is present (AIDEN_SANDBOX=0 → no
750
+ // envelopes → zero-cost passthrough).
751
+ reg.register('shell_exec', exports.shellExecClassifierWithSandbox);
752
+ reg.register('web_search', exports.webSearchClassifier);
753
+ reg.register('web_fetch', exports.webFetchClassifier);
754
+ reg.register('fetch_page', exports.webFetchClassifier);
755
+ reg.register('web_page', exports.webFetchClassifier);
756
+ reg.register('file_read', exports.fileReadClassifierWithSandbox);
757
+ // v4.4 Phase 5 — sandbox-aware classifiers for the file tools that
758
+ // didn't previously have overrides. Use the unified classifier
759
+ // directly since none of them have prior bespoke logic.
760
+ reg.register('file_list', exports.sandboxViolationClassifier);
761
+ reg.register('file_write', exports.sandboxViolationClassifier);
762
+ reg.register('file_patch', exports.sandboxViolationClassifier);
763
+ reg.register('file_copy', exports.sandboxViolationClassifier);
764
+ reg.register('file_move', exports.sandboxViolationClassifier);
765
+ reg.register('file_delete', exports.sandboxViolationClassifier);
766
+ // v4.3 Phase 5 — browser-tool overrides that read the
767
+ // BrowserState sidecars (staleRefRetry from Phase 2, blocker from
768
+ // Phase 3). Fall through to defaultClassifier when sidecars are
769
+ // absent (browser depth opt'd out via AIDEN_BROWSER_DEPTH=0 →
770
+ // no sidecar → generic patterns).
771
+ reg.register('browser_click', exports.browserInteractiveClassifier);
772
+ reg.register('browser_type', exports.browserInteractiveClassifier);
773
+ reg.register('browser_fill', exports.browserInteractiveClassifier);
774
+ reg.register('browser_navigate', exports.browserNavigateClassifier);
775
+ // v4.5 Phase 5a — daemon dispatcher synthetic "tool" routes the
776
+ // three trigger-specific failure categories.
777
+ reg.register(exports.DAEMON_DISPATCHER_TOOL_NAME, exports.triggerDispatcherClassifier);
778
+ return reg;
779
+ }