aiden-runtime 4.1.5 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +265 -847
  2. package/dist/api/server.js +32 -5
  3. package/dist/cli/v4/aidenCLI.js +536 -152
  4. package/dist/cli/v4/callbacks.js +170 -0
  5. package/dist/cli/v4/chatSession.js +245 -3
  6. package/dist/cli/v4/commands/_runtimeToggleHelpers.js +94 -0
  7. package/dist/cli/v4/commands/browserDepth.js +45 -0
  8. package/dist/cli/v4/commands/cron.js +264 -0
  9. package/dist/cli/v4/commands/daemon.js +541 -0
  10. package/dist/cli/v4/commands/daemonStatus.js +253 -0
  11. package/dist/cli/v4/commands/fanout.js +42 -59
  12. package/dist/cli/v4/commands/help.js +13 -0
  13. package/dist/cli/v4/commands/index.js +35 -1
  14. package/dist/cli/v4/commands/mcp.js +80 -54
  15. package/dist/cli/v4/commands/plannerGuard.js +53 -0
  16. package/dist/cli/v4/commands/recovery.js +122 -0
  17. package/dist/cli/v4/commands/runs.js +223 -0
  18. package/dist/cli/v4/commands/sandbox.js +48 -0
  19. package/dist/cli/v4/commands/spawnPause.js +93 -0
  20. package/dist/cli/v4/commands/suggestions.js +68 -0
  21. package/dist/cli/v4/commands/tce.js +41 -0
  22. package/dist/cli/v4/commands/trigger.js +378 -0
  23. package/dist/cli/v4/commands/update.js +95 -3
  24. package/dist/cli/v4/daemonAgentBuilder.js +145 -0
  25. package/dist/cli/v4/defaultSoul.js +1 -1
  26. package/dist/cli/v4/display/capabilityCard.js +26 -0
  27. package/dist/cli/v4/display.js +18 -8
  28. package/dist/cli/v4/replyRenderer.js +31 -23
  29. package/dist/cli/v4/updateBootPrompt.js +170 -0
  30. package/dist/core/playwrightBridge.js +129 -0
  31. package/dist/core/v4/aidenAgent.js +527 -5
  32. package/dist/core/v4/browserState.js +436 -0
  33. package/dist/core/v4/checkpoint.js +79 -0
  34. package/dist/core/v4/daemon/bootstrap.js +651 -0
  35. package/dist/core/v4/daemon/cleanShutdown.js +154 -0
  36. package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
  37. package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
  38. package/dist/core/v4/daemon/cron/migration.js +199 -0
  39. package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
  40. package/dist/core/v4/daemon/daemonConfig.js +90 -0
  41. package/dist/core/v4/daemon/db/connection.js +106 -0
  42. package/dist/core/v4/daemon/db/migrations.js +362 -0
  43. package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
  44. package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
  45. package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
  46. package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
  47. package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
  48. package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
  49. package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
  50. package/dist/core/v4/daemon/dispatcher/index.js +53 -0
  51. package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
  52. package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
  53. package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
  54. package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
  55. package/dist/core/v4/daemon/drain.js +156 -0
  56. package/dist/core/v4/daemon/eventLoopLag.js +73 -0
  57. package/dist/core/v4/daemon/health.js +159 -0
  58. package/dist/core/v4/daemon/idempotencyStore.js +204 -0
  59. package/dist/core/v4/daemon/index.js +179 -0
  60. package/dist/core/v4/daemon/instanceTracker.js +99 -0
  61. package/dist/core/v4/daemon/resourceRegistry.js +150 -0
  62. package/dist/core/v4/daemon/restartCode.js +32 -0
  63. package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
  64. package/dist/core/v4/daemon/runStore.js +144 -0
  65. package/dist/core/v4/daemon/runtimeLock.js +167 -0
  66. package/dist/core/v4/daemon/signals.js +50 -0
  67. package/dist/core/v4/daemon/supervisor.js +272 -0
  68. package/dist/core/v4/daemon/triggerBus.js +279 -0
  69. package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
  70. package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
  71. package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
  72. package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
  73. package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
  74. package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
  75. package/dist/core/v4/daemon/triggers/email/index.js +332 -0
  76. package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
  77. package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
  78. package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
  79. package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
  80. package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
  81. package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
  82. package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
  83. package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
  84. package/dist/core/v4/daemon/triggers/webhook.js +376 -0
  85. package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
  86. package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
  87. package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
  88. package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
  89. package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
  90. package/dist/core/v4/daemon/types.js +15 -0
  91. package/dist/core/v4/dockerSession.js +461 -0
  92. package/dist/core/v4/dryRun.js +117 -0
  93. package/dist/core/v4/failureClassifier.js +779 -0
  94. package/dist/core/v4/providerFallback.js +35 -2
  95. package/dist/core/v4/recoveryReport.js +449 -0
  96. package/dist/core/v4/runtimeToggles.js +214 -0
  97. package/dist/core/v4/sandboxConfig.js +285 -0
  98. package/dist/core/v4/sandboxFs.js +316 -0
  99. package/dist/core/v4/selfimprovement/recoveryStore.js +307 -0
  100. package/dist/core/v4/selfimprovement/signatureBuilder.js +158 -0
  101. package/dist/core/v4/subagent/childBuilder.js +391 -0
  102. package/dist/core/v4/subagent/fanout.js +75 -51
  103. package/dist/core/v4/subagent/spawnPause.js +191 -0
  104. package/dist/core/v4/subagent/spawnSubAgent.js +310 -0
  105. package/dist/core/v4/suggestionCatalog.js +41 -0
  106. package/dist/core/v4/suggestionEngine.js +210 -0
  107. package/dist/core/v4/toolRegistry.js +37 -3
  108. package/dist/core/v4/turnState.js +587 -0
  109. package/dist/core/v4/update/checkUpdate.js +63 -3
  110. package/dist/core/v4/update/installMethodDetect.js +115 -0
  111. package/dist/core/v4/update/registryClient.js +121 -0
  112. package/dist/core/v4/update/skipState.js +75 -0
  113. package/dist/core/v4/verifier.js +448 -0
  114. package/dist/core/version.js +1 -1
  115. package/dist/moat/plannerGuard.js +29 -0
  116. package/dist/providers/v4/anthropicAdapter.js +31 -3
  117. package/dist/providers/v4/chatCompletionsAdapter.js +26 -3
  118. package/dist/providers/v4/codexResponsesAdapter.js +25 -2
  119. package/dist/providers/v4/ollamaPromptToolsAdapter.js +57 -2
  120. package/dist/tools/v4/browser/_observer.js +224 -0
  121. package/dist/tools/v4/browser/browserBlocker.js +396 -0
  122. package/dist/tools/v4/browser/browserClick.js +18 -1
  123. package/dist/tools/v4/browser/browserClose.js +18 -1
  124. package/dist/tools/v4/browser/browserExtract.js +5 -1
  125. package/dist/tools/v4/browser/browserFill.js +17 -1
  126. package/dist/tools/v4/browser/browserGetUrl.js +5 -1
  127. package/dist/tools/v4/browser/browserNavigate.js +16 -1
  128. package/dist/tools/v4/browser/browserScreenshot.js +5 -1
  129. package/dist/tools/v4/browser/browserScroll.js +18 -1
  130. package/dist/tools/v4/browser/browserType.js +17 -1
  131. package/dist/tools/v4/browser/captchaCheck.js +5 -1
  132. package/dist/tools/v4/executeCode.js +1 -0
  133. package/dist/tools/v4/files/fileCopy.js +56 -2
  134. package/dist/tools/v4/files/fileDelete.js +38 -1
  135. package/dist/tools/v4/files/fileList.js +12 -1
  136. package/dist/tools/v4/files/fileMove.js +59 -2
  137. package/dist/tools/v4/files/filePatch.js +43 -1
  138. package/dist/tools/v4/files/fileRead.js +12 -1
  139. package/dist/tools/v4/files/fileWrite.js +41 -1
  140. package/dist/tools/v4/index.js +88 -61
  141. package/dist/tools/v4/memory/memoryAdd.js +14 -0
  142. package/dist/tools/v4/memory/memoryRemove.js +14 -0
  143. package/dist/tools/v4/memory/memoryReplace.js +15 -0
  144. package/dist/tools/v4/memory/sessionSummary.js +12 -0
  145. package/dist/tools/v4/process/processKill.js +19 -0
  146. package/dist/tools/v4/process/processList.js +1 -0
  147. package/dist/tools/v4/process/processLogRead.js +1 -0
  148. package/dist/tools/v4/process/processSpawn.js +13 -0
  149. package/dist/tools/v4/process/processWait.js +1 -0
  150. package/dist/tools/v4/sessions/recallSession.js +1 -0
  151. package/dist/tools/v4/sessions/sessionList.js +1 -0
  152. package/dist/tools/v4/sessions/sessionSearch.js +1 -0
  153. package/dist/tools/v4/skills/lookupToolSchema.js +7 -0
  154. package/dist/tools/v4/skills/skillManage.js +13 -0
  155. package/dist/tools/v4/skills/skillView.js +1 -0
  156. package/dist/tools/v4/skills/skillsList.js +1 -0
  157. package/dist/tools/v4/subagent/spawnSubAgentTool.js +334 -0
  158. package/dist/tools/v4/subagent/subagentFanout.js +54 -1
  159. package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
  160. package/dist/tools/v4/system/appClose.js +13 -0
  161. package/dist/tools/v4/system/appInput.js +13 -0
  162. package/dist/tools/v4/system/appLaunch.js +13 -0
  163. package/dist/tools/v4/system/clipboardRead.js +1 -0
  164. package/dist/tools/v4/system/clipboardWrite.js +14 -0
  165. package/dist/tools/v4/system/mediaKey.js +12 -0
  166. package/dist/tools/v4/system/mediaSessions.js +1 -0
  167. package/dist/tools/v4/system/mediaTransport.js +13 -0
  168. package/dist/tools/v4/system/naturalEvents.js +1 -0
  169. package/dist/tools/v4/system/nowPlaying.js +1 -0
  170. package/dist/tools/v4/system/osProcessList.js +1 -0
  171. package/dist/tools/v4/system/screenshot.js +1 -0
  172. package/dist/tools/v4/system/systemInfo.js +1 -0
  173. package/dist/tools/v4/system/volumeSet.js +17 -0
  174. package/dist/tools/v4/terminal/shellExec.js +81 -9
  175. package/dist/tools/v4/web/deepResearch.js +1 -0
  176. package/dist/tools/v4/web/openUrl.js +1 -0
  177. package/dist/tools/v4/web/webFetch.js +1 -0
  178. package/dist/tools/v4/web/webPage.js +1 -0
  179. package/dist/tools/v4/web/webSearch.js +1 -0
  180. package/dist/tools/v4/web/youtubeSearch.js +1 -0
  181. package/package.json +13 -3
@@ -0,0 +1,448 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * core/v4/verifier.ts — v4.2 Phase 1: Per-tool result verifier.
10
+ *
11
+ * After each tool dispatch, the verifier inspects the result and
12
+ * classifies the outcome:
13
+ *
14
+ * ok — tool produced a usable, non-failed output
15
+ * failed — tool errored, returned `success: false`, or matched
16
+ * a known failure shape
17
+ * no_progress — tool succeeded but produced no useful signal (empty
18
+ * payload, identical hash to a recent call — Phase 3
19
+ * wires the hash repeat detector)
20
+ * low_signal — tool succeeded but with a short / vague response
21
+ * that's informative but probably won't help the
22
+ * model make progress
23
+ * unknown — verifier couldn't classify with confidence
24
+ *
25
+ * Scope (Phase 1):
26
+ * - Pure inspection of `(toolName, args, result)` — NO goal awareness
27
+ * (deferred to Phase 5 / task graph).
28
+ * - Synchronous; runs in the agent's tool-dispatch loop between
29
+ * `onToolCall('after', result)` and `turnState.recordToolCall(...)`.
30
+ * - Default fallback handles ~99% of Aiden tools that return the
31
+ * `{ success: boolean, error?: string, ...payload }` envelope.
32
+ * - Built-in per-tool verifiers for 5 high-signal tools where the
33
+ * default envelope inspection isn't sufficient: `shell_exec`,
34
+ * `web_search`, `file_write`, `file_read`, `web_fetch`.
35
+ * - Behind the same gate as TurnState (default ON; opt-out via
36
+ * `AIDEN_TCE=0`). When disabled, the agent skips verifier
37
+ * classification — the registry is still constructed (cheap) but
38
+ * `resolve()` is never called inside the gated branch.
39
+ *
40
+ * Out of scope (deferred phases):
41
+ * - Phase 2 — typed failure reason taxonomy (timeout / auth /
42
+ * hallucination / network — separate from per-tool verifier).
43
+ * - Phase 3 — RecoveryReport (uses verifier output + Phase 2 classifier).
44
+ * - Phase 4 — checkpoint/restore (uses Phase 3 state shape).
45
+ * - Phase 5 — task-graph sub-step verification (extends VerifierFn
46
+ * signature with optional `subGoal` argument; backward-compatible).
47
+ *
48
+ * The design intentionally mirrors a layered-decision pattern from the
49
+ * reference system's tool-guardrail module: a pure classifier function
50
+ * driving a controller's threshold counters, with per-tool overrides
51
+ * for the small set of tools where heuristic inspection is too coarse.
52
+ */
53
+ Object.defineProperty(exports, "__esModule", { value: true });
54
+ exports.browserInteractiveVerifier = exports.webFetchVerifier = exports.fileReadVerifier = exports.fileWriteVerifier = exports.webSearchVerifier = exports.shellExecVerifier = exports.defaultVerifier = exports.VerifierRegistry = void 0;
55
+ exports.buildDefaultRegistry = buildDefaultRegistry;
56
+ /**
57
+ * Per-tool override registry with a default-fallback resolver. Cheap
58
+ * to construct; safe to keep instantiated even when TCE is disabled
59
+ * because nothing runs unless `resolve(...)` is called by the agent
60
+ * loop (which itself is gated).
61
+ */
62
+ class VerifierRegistry {
63
+ constructor(fallback = exports.defaultVerifier) {
64
+ this.overrides = new Map();
65
+ this.fallback = fallback;
66
+ }
67
+ register(toolName, fn) {
68
+ this.overrides.set(toolName, fn);
69
+ }
70
+ resolve(toolName) {
71
+ return this.overrides.get(toolName) ?? this.fallback;
72
+ }
73
+ /** Direct lookup for tests — returns true when a per-tool override is registered. */
74
+ hasOverride(toolName) {
75
+ return this.overrides.has(toolName);
76
+ }
77
+ }
78
+ exports.VerifierRegistry = VerifierRegistry;
79
+ // ── Default fallback verifier ──────────────────────────────────────────────
80
+ const SHORT_RESPONSE_THRESHOLD = 50; // chars — below this, raw strings are flagged low_signal
81
+ const RAW_STRING_SCAN_WINDOW = 500; // chars — generic error keyword scan only looks at the head
82
+ /**
83
+ * Heuristic default. Handles five result shapes in priority order:
84
+ *
85
+ * 1. Outer envelope error → ToolCallResult.error set → failed (conf 1.0)
86
+ * 2. Inner `success: false` → typed failure (conf 1.0)
87
+ * 3. Inner `success: true` → typed ok (conf 1.0)
88
+ * 4. Raw string < 50 chars → low_signal (conf 0.4, ok: true)
89
+ * 5. Raw string with error keywords in first 500 chars → failed (conf 0.6)
90
+ *
91
+ * Anything else (typed object without `success`, non-empty string
92
+ * without error keywords) is `ok` at conf 0.7 — the verifier doesn't
93
+ * have enough signal to be more precise without a per-tool override.
94
+ */
95
+ const defaultVerifier = (_toolName, _args, result) => {
96
+ // 1. Outer envelope error — executor threw or wrapped a known failure.
97
+ if (typeof result.error === 'string' && result.error.length > 0) {
98
+ return {
99
+ ok: false,
100
+ confidence: 1.0,
101
+ code: 'failed',
102
+ reason: result.error,
103
+ };
104
+ }
105
+ const inner = result.result;
106
+ // 2 + 3. Typed `{ success: boolean }` envelope — the common Aiden shape.
107
+ if (inner !== null && typeof inner === 'object' && !Array.isArray(inner)) {
108
+ const obj = inner;
109
+ if (obj.success === false) {
110
+ const reason = typeof obj.error === 'string' && obj.error.length > 0
111
+ ? obj.error
112
+ : 'tool returned success:false';
113
+ return {
114
+ ok: false,
115
+ confidence: 1.0,
116
+ code: 'failed',
117
+ reason,
118
+ };
119
+ }
120
+ if (obj.success === true) {
121
+ return { ok: true, confidence: 1.0, code: 'ok' };
122
+ }
123
+ // No `success` field — fall through to confidence-0.7 default.
124
+ return { ok: true, confidence: 0.7, code: 'ok' };
125
+ }
126
+ // 4 + 5. Raw string payload (the webSearch / deepResearch / openUrl shape).
127
+ if (typeof inner === 'string') {
128
+ const trimmed = inner.trim();
129
+ if (trimmed.length === 0) {
130
+ return {
131
+ ok: true,
132
+ confidence: 0.4,
133
+ code: 'low_signal',
134
+ reason: 'empty string result',
135
+ };
136
+ }
137
+ if (trimmed.length < SHORT_RESPONSE_THRESHOLD) {
138
+ return {
139
+ ok: true,
140
+ confidence: 0.4,
141
+ code: 'low_signal',
142
+ reason: `short result (${trimmed.length} chars)`,
143
+ };
144
+ }
145
+ const head = trimmed.slice(0, RAW_STRING_SCAN_WINDOW).toLowerCase();
146
+ if (head.startsWith('error') ||
147
+ head.includes('"error"') ||
148
+ head.includes('"failed"')) {
149
+ return {
150
+ ok: false,
151
+ confidence: 0.6,
152
+ code: 'failed',
153
+ reason: 'error keywords detected in raw string head',
154
+ };
155
+ }
156
+ return { ok: true, confidence: 0.7, code: 'ok' };
157
+ }
158
+ // null / undefined / array / number — no clear signal.
159
+ if (inner === null || inner === undefined) {
160
+ return {
161
+ ok: true,
162
+ confidence: 0.5,
163
+ code: 'unknown',
164
+ reason: 'null result',
165
+ };
166
+ }
167
+ return { ok: true, confidence: 0.5, code: 'unknown' };
168
+ };
169
+ exports.defaultVerifier = defaultVerifier;
170
+ // ── Built-in per-tool verifiers ────────────────────────────────────────────
171
+ /**
172
+ * `shell_exec` — inspect `exitCode` directly. A successful exit with
173
+ * empty stdout is suspicious (probe with no output) — surface as
174
+ * `low_signal` rather than ok-with-high-confidence so the loop
175
+ * controller can weight it.
176
+ */
177
+ const shellExecVerifier = (_n, _a, result) => {
178
+ if (typeof result.error === 'string' && result.error.length > 0) {
179
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
180
+ }
181
+ const inner = result.result;
182
+ if (inner === null || typeof inner !== 'object') {
183
+ return { ok: false, confidence: 0.5, code: 'unknown', reason: 'non-object shell_exec result' };
184
+ }
185
+ // Typed-failure envelope short-circuit — a wrapper returning
186
+ // `{success: false}` without exitCode is still definitively failed.
187
+ if (inner.success === false) {
188
+ return {
189
+ ok: false,
190
+ confidence: 1.0,
191
+ code: 'failed',
192
+ reason: typeof inner.error === 'string' ? inner.error : 'success:false',
193
+ };
194
+ }
195
+ const exitCode = typeof inner.exitCode === 'number' ? inner.exitCode : undefined;
196
+ if (exitCode === undefined) {
197
+ // Some wrappers omit exitCode on a successful run when the
198
+ // underlying command was trivial (e.g. a noop). Trust the typed
199
+ // success flag if present; otherwise we genuinely don't know.
200
+ if (inner.success === true) {
201
+ return { ok: true, confidence: 0.7, code: 'ok' };
202
+ }
203
+ return { ok: false, confidence: 0.5, code: 'unknown', reason: 'missing exitCode' };
204
+ }
205
+ if (exitCode !== 0) {
206
+ return {
207
+ ok: false,
208
+ confidence: 1.0,
209
+ code: 'failed',
210
+ reason: `non-zero exit (${exitCode})`,
211
+ suggestion: 'Inspect stderr and adjust the command — repeating the same invocation will not help.',
212
+ };
213
+ }
214
+ const stdout = typeof inner.stdout === 'string' ? inner.stdout.trim() : '';
215
+ if (stdout.length === 0) {
216
+ return {
217
+ ok: true,
218
+ confidence: 0.4,
219
+ code: 'low_signal',
220
+ reason: 'exit 0 with empty stdout',
221
+ };
222
+ }
223
+ return { ok: true, confidence: 1.0, code: 'ok' };
224
+ };
225
+ exports.shellExecVerifier = shellExecVerifier;
226
+ /**
227
+ * `web_search` — returns a raw string (synthesised answer). Short
228
+ * responses are low-signal, not failures (often "no results found"
229
+ * IS the answer). Generic error-keyword scan applies.
230
+ */
231
+ const webSearchVerifier = (_n, _a, result) => {
232
+ if (typeof result.error === 'string' && result.error.length > 0) {
233
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
234
+ }
235
+ const inner = result.result;
236
+ if (typeof inner !== 'string') {
237
+ // Some adapters might wrap the string in `{ success, result }`.
238
+ return (0, exports.defaultVerifier)(_n, _a, result);
239
+ }
240
+ const trimmed = inner.trim();
241
+ if (trimmed.length === 0) {
242
+ return {
243
+ ok: true,
244
+ confidence: 0.4,
245
+ code: 'low_signal',
246
+ reason: 'empty web_search result',
247
+ suggestion: 'Try a different query or use web_fetch with a known URL.',
248
+ };
249
+ }
250
+ if (trimmed.length < SHORT_RESPONSE_THRESHOLD) {
251
+ return {
252
+ ok: true,
253
+ confidence: 0.4,
254
+ code: 'low_signal',
255
+ reason: `short web_search result (${trimmed.length} chars)`,
256
+ };
257
+ }
258
+ return { ok: true, confidence: 0.9, code: 'ok' };
259
+ };
260
+ exports.webSearchVerifier = webSearchVerifier;
261
+ /**
262
+ * `file_write` — verify the write actually happened. We trust the
263
+ * tool's `success` flag but additionally require `bytesWritten > 0`
264
+ * when present (catches the "wrote 0 bytes" pathology).
265
+ */
266
+ const fileWriteVerifier = (_n, _a, result) => {
267
+ if (typeof result.error === 'string' && result.error.length > 0) {
268
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
269
+ }
270
+ const inner = result.result;
271
+ if (inner === null || typeof inner !== 'object') {
272
+ return { ok: false, confidence: 0.5, code: 'unknown', reason: 'non-object file_write result' };
273
+ }
274
+ if (inner.success === false) {
275
+ return {
276
+ ok: false,
277
+ confidence: 1.0,
278
+ code: 'failed',
279
+ reason: typeof inner.error === 'string' ? inner.error : 'success:false',
280
+ };
281
+ }
282
+ if (typeof inner.bytesWritten === 'number' && inner.bytesWritten === 0) {
283
+ return {
284
+ ok: true,
285
+ confidence: 0.4,
286
+ code: 'low_signal',
287
+ reason: 'wrote 0 bytes',
288
+ };
289
+ }
290
+ return { ok: true, confidence: 1.0, code: 'ok' };
291
+ };
292
+ exports.fileWriteVerifier = fileWriteVerifier;
293
+ /**
294
+ * `file_read` — verify content non-empty (a deliberately-empty file
295
+ * is rare; usually means a path mismatch or stale read). Trusts the
296
+ * tool's `success` flag.
297
+ */
298
+ const fileReadVerifier = (_n, _a, result) => {
299
+ if (typeof result.error === 'string' && result.error.length > 0) {
300
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
301
+ }
302
+ const inner = result.result;
303
+ if (inner === null || typeof inner !== 'object') {
304
+ return { ok: false, confidence: 0.5, code: 'unknown', reason: 'non-object file_read result' };
305
+ }
306
+ if (inner.success === false) {
307
+ return {
308
+ ok: false,
309
+ confidence: 1.0,
310
+ code: 'failed',
311
+ reason: typeof inner.error === 'string' ? inner.error : 'success:false',
312
+ };
313
+ }
314
+ const content = typeof inner.content === 'string' ? inner.content : '';
315
+ if (content.length === 0) {
316
+ return {
317
+ ok: true,
318
+ confidence: 0.4,
319
+ code: 'low_signal',
320
+ reason: 'empty file content',
321
+ };
322
+ }
323
+ return { ok: true, confidence: 1.0, code: 'ok' };
324
+ };
325
+ exports.fileReadVerifier = fileReadVerifier;
326
+ /**
327
+ * `web_fetch` (and aliases) — verify the body is substantive. A
328
+ * < 100 char fetch body is almost certainly a redirect / blank
329
+ * page / soft-block; surface as low_signal.
330
+ */
331
+ const WEB_FETCH_MIN_BODY = 100;
332
+ const webFetchVerifier = (_n, _a, result) => {
333
+ if (typeof result.error === 'string' && result.error.length > 0) {
334
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
335
+ }
336
+ const inner = result.result;
337
+ // Two shapes: typed `{ success, content/body }` or raw string.
338
+ if (typeof inner === 'string') {
339
+ if (inner.trim().length < WEB_FETCH_MIN_BODY) {
340
+ return {
341
+ ok: true,
342
+ confidence: 0.4,
343
+ code: 'low_signal',
344
+ reason: `short body (${inner.trim().length} chars)`,
345
+ suggestion: 'Try a different URL or check whether the page requires auth.',
346
+ };
347
+ }
348
+ return { ok: true, confidence: 0.9, code: 'ok' };
349
+ }
350
+ if (inner !== null && typeof inner === 'object') {
351
+ const obj = inner;
352
+ if (obj.success === false) {
353
+ return {
354
+ ok: false,
355
+ confidence: 1.0,
356
+ code: 'failed',
357
+ reason: typeof obj.error === 'string' ? obj.error : 'success:false',
358
+ };
359
+ }
360
+ const body = typeof obj.content === 'string' ? obj.content :
361
+ typeof obj.body === 'string' ? obj.body :
362
+ typeof obj.text === 'string' ? obj.text : '';
363
+ if (body.trim().length < WEB_FETCH_MIN_BODY) {
364
+ return {
365
+ ok: true,
366
+ confidence: 0.4,
367
+ code: 'low_signal',
368
+ reason: `short body (${body.trim().length} chars)`,
369
+ };
370
+ }
371
+ return { ok: true, confidence: 1.0, code: 'ok' };
372
+ }
373
+ return (0, exports.defaultVerifier)(_n, _a, result);
374
+ };
375
+ exports.webFetchVerifier = webFetchVerifier;
376
+ /**
377
+ * v4.3 Phase 5 — verifier for the 3 interactive browser tools
378
+ * (`browser_click`, `browser_type`, `browser_fill`) and
379
+ * `browser_navigate`. Extends defaultVerifier with one extra check:
380
+ * when the tool returns `success: true` BUT Phase 1's observer flagged
381
+ * `needs_verifier === true` (page state didn't meaningfully change),
382
+ * demote `ok` to false so the classifier runs and routes to
383
+ * `stale_ref` (page unresponsive) for the right recovery action.
384
+ *
385
+ * Without this demotion, the `needs_verifier` field would be a
386
+ * dormant hint with no behavioral effect. The whole point of Phase 1
387
+ * capturing it was to gate this verifier check.
388
+ *
389
+ * Conservative ordering — only runs the demotion AFTER the default
390
+ * verifier passed. Failed calls still classify via the existing
391
+ * path; success-but-noop is the specific case Phase 5 handles.
392
+ */
393
+ const browserInteractiveVerifier = (toolName, args, result) => {
394
+ const base = (0, exports.defaultVerifier)(toolName, args, result);
395
+ if (!base.ok)
396
+ return base;
397
+ // Read the v4.3 sidecar. Absent when browser depth is opt'd out
398
+ // (AIDEN_BROWSER_DEPTH=0) — in
399
+ // that case the verifier falls back to the default-passing result.
400
+ const inner = result.result;
401
+ if (!inner || typeof inner !== 'object')
402
+ return base;
403
+ const bs = inner.browserState;
404
+ if (!bs)
405
+ return base;
406
+ if (!bs.needs_verifier)
407
+ return base;
408
+ // Demote — the tool returned success but the page didn't change
409
+ // meaningfully. Classifier will route to stale_ref.
410
+ return {
411
+ ok: false,
412
+ confidence: 0.75,
413
+ code: bs.maybe_noop ? 'no_progress' : 'low_signal',
414
+ reason: bs.maybe_noop
415
+ ? 'tool returned success but page state did not change'
416
+ : `low progress (${bs.progress_score.toFixed(2)}) — UI may not have responded`,
417
+ };
418
+ };
419
+ exports.browserInteractiveVerifier = browserInteractiveVerifier;
420
+ // ── Factory ────────────────────────────────────────────────────────────────
421
+ /**
422
+ * Builds a registry pre-wired with the 5 built-in per-tool verifiers.
423
+ * The agent constructs one of these in `runConversation` when TCE is
424
+ * enabled. Plugin authors can register their own via the returned
425
+ * registry instance — Phase 1 doesn't expose a public registration
426
+ * API, but the foundation is here.
427
+ */
428
+ function buildDefaultRegistry() {
429
+ const reg = new VerifierRegistry();
430
+ reg.register('shell_exec', exports.shellExecVerifier);
431
+ reg.register('web_search', exports.webSearchVerifier);
432
+ reg.register('file_write', exports.fileWriteVerifier);
433
+ reg.register('file_read', exports.fileReadVerifier);
434
+ reg.register('web_fetch', exports.webFetchVerifier);
435
+ // Aliases — same verifier handles related shapes.
436
+ reg.register('fetch_page', exports.webFetchVerifier);
437
+ reg.register('web_page', exports.webFetchVerifier);
438
+ // v4.3 Phase 5 — browser interactive verifier reads the Phase 1
439
+ // sidecar (`needs_verifier` / `maybe_noop`) and demotes
440
+ // success-but-no-progress cases so the classifier routes them to
441
+ // `stale_ref` recovery. Falls back to defaultVerifier when sidecar
442
+ // absent (opt-out via AIDEN_BROWSER_DEPTH=0).
443
+ reg.register('browser_click', exports.browserInteractiveVerifier);
444
+ reg.register('browser_type', exports.browserInteractiveVerifier);
445
+ reg.register('browser_fill', exports.browserInteractiveVerifier);
446
+ reg.register('browser_navigate', exports.browserInteractiveVerifier);
447
+ return reg;
448
+ }
@@ -2,4 +2,4 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.VERSION = void 0;
4
4
  // AUTO-GENERATED by scripts/inject-version.js — do not edit by hand
5
- exports.VERSION = '4.1.5';
5
+ exports.VERSION = '4.6.0';
@@ -87,10 +87,39 @@ const RULES = [
87
87
  toolsets: ['execute'],
88
88
  },
89
89
  // Process registry
90
+ //
91
+ // Note: the bare word "spawn" appears in this rule's keyword list
92
+ // (legacy — predates v4.6 sub-agents). The dedicated 'subagent'
93
+ // rule below ALSO matches "spawn" via a tighter delegation
94
+ // vocabulary, so a message like "spawn a background server" hits
95
+ // BOTH rules and adds both toolsets — UNION semantics make this
96
+ // additive, not conflicting.
90
97
  {
91
98
  keywords: /\b(process|background|long.?running|server|spawn|kill|daemon)\b/i,
92
99
  toolsets: ['process'],
93
100
  },
101
+ // v4.6 Phase 1 — sub-agent delegation surface (spawn_sub_agent +
102
+ // subagent_fanout). Both tools live in toolset `'subagent'`, which
103
+ // no pre-v4.6 rule mapped to — so PlannerGuard's per-turn narrowing
104
+ // silently stripped them from the model's catalog whenever any
105
+ // other rule fired. The model could see them via lookup_tool_schema
106
+ // but failed to actually invoke them because the provider tool list
107
+ // (post-narrow) didn't include them — see Dispatch 2H diagnostic.
108
+ //
109
+ // Regex notes:
110
+ // - `spawn_sub_agent` and `subagent_fanout` literals are listed
111
+ // explicitly because `\bspawn\b` does NOT match within
112
+ // `spawn_sub_agent` (underscore is a word char in JS regex,
113
+ // so there's no word boundary between `n` and `_`). Users who
114
+ // name the tool directly hit the literal arm.
115
+ // - The free-form vocabulary arm (`spawn`, `delegate`, etc.)
116
+ // catches natural-language delegation intent. UNION semantics
117
+ // with other rules let "spawn a child to read files" surface
118
+ // both 'subagent' AND 'files'.
119
+ {
120
+ keywords: /\b(spawn_sub_agent|subagent_fanout|spawn|subagent|sub.?agent|delegate|fanout|fan.?out|child.?agent|parallel|isolated)\b/i,
121
+ toolsets: ['subagent'],
122
+ },
94
123
  // Media playback control (v4.1.4-media)
95
124
  //
96
125
  // Without this, intents like "list media sessions" matched the
@@ -92,14 +92,14 @@ class AnthropicAdapter {
92
92
  // ── Public: non-streaming ────────────────────────────────────────────────
93
93
  async call(input) {
94
94
  const body = this.buildBody(input, /* streaming */ false);
95
- const reply = await this.dispatch(body, /* streaming */ false);
95
+ const reply = await this.dispatch(body, /* streaming */ false, input.signal);
96
96
  const json = (await reply.json());
97
97
  return decodeResponse(json);
98
98
  }
99
99
  // ── Public: streaming ────────────────────────────────────────────────────
100
100
  async *callStream(input) {
101
101
  const body = this.buildBody(input, /* streaming */ true);
102
- const reply = await this.dispatch(body, /* streaming */ true);
102
+ const reply = await this.dispatch(body, /* streaming */ true, input.signal);
103
103
  if (!reply.body) {
104
104
  // Server promised SSE but gave us nothing — fall through to a synthetic
105
105
  // empty done event so the agent loop terminates rather than hangs.
@@ -163,7 +163,7 @@ class AnthropicAdapter {
163
163
  // beta flags, or per-deployment routing tags without forking the adapter.
164
164
  return { ...headers, ...this.extraHeaders };
165
165
  }
166
- async dispatch(body, streaming) {
166
+ async dispatch(body, streaming, externalSignal) {
167
167
  // Resolved once per process via the userAgent module's cache, so paying
168
168
  // for the version detection here is cheap on every retry/turn.
169
169
  const userAgent = await (0, userAgent_1.getClaudeCliUserAgent)();
@@ -174,6 +174,22 @@ class AnthropicAdapter {
174
174
  for (let attempt = 0; attempt < totalTries; attempt++) {
175
175
  const controller = new AbortController();
176
176
  const timer = setTimeout(() => controller.abort(), this.timeoutMs);
177
+ // v4.6 prep — forward an external AbortSignal into this attempt's
178
+ // internal controller so a parent agent that aborts mid-flight
179
+ // cancels the in-flight fetch. External aborts surface as a raw
180
+ // AbortError (NOT ProviderTimeoutError) so AidenAgent can route
181
+ // them as `finishReason: 'interrupted'` instead of treating them
182
+ // as a retryable timeout.
183
+ let externalAbortHandler = null;
184
+ if (externalSignal) {
185
+ if (externalSignal.aborted) {
186
+ controller.abort();
187
+ }
188
+ else {
189
+ externalAbortHandler = () => controller.abort();
190
+ externalSignal.addEventListener('abort', externalAbortHandler, { once: true });
191
+ }
192
+ }
177
193
  let response;
178
194
  try {
179
195
  response = await fetch(this.endpoint, {
@@ -185,7 +201,16 @@ class AnthropicAdapter {
185
201
  }
186
202
  catch (err) {
187
203
  clearTimeout(timer);
204
+ if (externalAbortHandler && externalSignal) {
205
+ externalSignal.removeEventListener('abort', externalAbortHandler);
206
+ }
188
207
  if (err?.name === 'AbortError') {
208
+ // v4.6 prep — external abort takes priority over internal
209
+ // timeout. Surface the raw AbortError immediately (no retry)
210
+ // so AidenAgent's catch routes it as 'interrupted'.
211
+ if (externalSignal?.aborted) {
212
+ throw err;
213
+ }
189
214
  // Treat timeout as retryable; only surface ProviderTimeoutError if
190
215
  // we've burned the last attempt.
191
216
  lastErr = new errors_1.ProviderTimeoutError(this.providerName, this.timeoutMs);
@@ -200,6 +225,9 @@ class AnthropicAdapter {
200
225
  throw lastErr;
201
226
  }
202
227
  clearTimeout(timer);
228
+ if (externalAbortHandler && externalSignal) {
229
+ externalSignal.removeEventListener('abort', externalAbortHandler);
230
+ }
203
231
  if (response.ok)
204
232
  return response;
205
233
  // Phase 25.1.5d diagnostic: gated dump of request + response so we
@@ -73,7 +73,7 @@ class ChatCompletionsAdapter {
73
73
  // ── Non-streaming ────────────────────────────────────────────────────
74
74
  async call(input) {
75
75
  const body = this.buildBody(input, /* streaming */ false);
76
- const reply = await this.dispatch(body, /* streaming */ false);
76
+ const reply = await this.dispatch(body, /* streaming */ false, input.signal);
77
77
  const text = await reply.text();
78
78
  let parsed;
79
79
  try {
@@ -91,7 +91,7 @@ class ChatCompletionsAdapter {
91
91
  // ── Streaming ────────────────────────────────────────────────────────
92
92
  async *callStream(input) {
93
93
  const body = this.buildBody(input, /* streaming */ true);
94
- const reply = await this.dispatch(body, /* streaming */ true);
94
+ const reply = await this.dispatch(body, /* streaming */ true, input.signal);
95
95
  if (!reply.body) {
96
96
  yield {
97
97
  type: 'done',
@@ -150,7 +150,7 @@ class ChatCompletionsAdapter {
150
150
  headers['Accept'] = 'text/event-stream';
151
151
  return { ...headers, ...this.extraHeaders };
152
152
  }
153
- async dispatch(body, streaming) {
153
+ async dispatch(body, streaming, externalSignal) {
154
154
  const headers = this.buildHeaders(streaming);
155
155
  const serialised = JSON.stringify(body);
156
156
  const totalTries = this.maxRetries + 1;
@@ -158,6 +158,19 @@ class ChatCompletionsAdapter {
158
158
  for (let attempt = 0; attempt < totalTries; attempt++) {
159
159
  const controller = new AbortController();
160
160
  const timer = setTimeout(() => controller.abort(), this.timeoutMs);
161
+ // v4.6 prep — forward external abort into the internal controller.
162
+ // External aborts surface as raw AbortError so AidenAgent routes
163
+ // them as 'interrupted' rather than retrying as ProviderTimeoutError.
164
+ let externalAbortHandler = null;
165
+ if (externalSignal) {
166
+ if (externalSignal.aborted) {
167
+ controller.abort();
168
+ }
169
+ else {
170
+ externalAbortHandler = () => controller.abort();
171
+ externalSignal.addEventListener('abort', externalAbortHandler, { once: true });
172
+ }
173
+ }
161
174
  let response;
162
175
  try {
163
176
  response = await fetch(this.endpoint, {
@@ -169,7 +182,14 @@ class ChatCompletionsAdapter {
169
182
  }
170
183
  catch (err) {
171
184
  clearTimeout(timer);
185
+ if (externalAbortHandler && externalSignal) {
186
+ externalSignal.removeEventListener('abort', externalAbortHandler);
187
+ }
172
188
  if (err?.name === 'AbortError') {
189
+ // v4.6 prep — external abort takes priority over internal timeout.
190
+ if (externalSignal?.aborted) {
191
+ throw err;
192
+ }
173
193
  lastErr = new errors_1.ProviderTimeoutError(this.providerName, this.timeoutMs);
174
194
  }
175
195
  else {
@@ -182,6 +202,9 @@ class ChatCompletionsAdapter {
182
202
  throw lastErr;
183
203
  }
184
204
  clearTimeout(timer);
205
+ if (externalAbortHandler && externalSignal) {
206
+ externalSignal.removeEventListener('abort', externalAbortHandler);
207
+ }
185
208
  if (response.ok)
186
209
  return response;
187
210
  const status = response.status;