aiden-runtime 4.1.5 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/README.md +250 -847
  2. package/dist/api/server.js +32 -5
  3. package/dist/cli/v4/aidenCLI.js +351 -53
  4. package/dist/cli/v4/callbacks.js +170 -0
  5. package/dist/cli/v4/chatSession.js +138 -3
  6. package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
  7. package/dist/cli/v4/commands/browserDepth.js +45 -0
  8. package/dist/cli/v4/commands/cron.js +264 -0
  9. package/dist/cli/v4/commands/daemon.js +541 -0
  10. package/dist/cli/v4/commands/daemonStatus.js +253 -0
  11. package/dist/cli/v4/commands/help.js +7 -0
  12. package/dist/cli/v4/commands/index.js +20 -1
  13. package/dist/cli/v4/commands/runs.js +203 -0
  14. package/dist/cli/v4/commands/sandbox.js +48 -0
  15. package/dist/cli/v4/commands/suggestions.js +68 -0
  16. package/dist/cli/v4/commands/tce.js +41 -0
  17. package/dist/cli/v4/commands/trigger.js +378 -0
  18. package/dist/cli/v4/commands/update.js +95 -3
  19. package/dist/cli/v4/daemonAgentBuilder.js +142 -0
  20. package/dist/cli/v4/defaultSoul.js +1 -1
  21. package/dist/cli/v4/display/capabilityCard.js +26 -0
  22. package/dist/cli/v4/display.js +18 -8
  23. package/dist/cli/v4/replyRenderer.js +31 -23
  24. package/dist/cli/v4/updateBootPrompt.js +170 -0
  25. package/dist/core/playwrightBridge.js +129 -0
  26. package/dist/core/v4/aidenAgent.js +308 -4
  27. package/dist/core/v4/browserState.js +436 -0
  28. package/dist/core/v4/checkpoint.js +79 -0
  29. package/dist/core/v4/daemon/bootstrap.js +604 -0
  30. package/dist/core/v4/daemon/cleanShutdown.js +154 -0
  31. package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
  32. package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
  33. package/dist/core/v4/daemon/cron/migration.js +199 -0
  34. package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
  35. package/dist/core/v4/daemon/daemonConfig.js +90 -0
  36. package/dist/core/v4/daemon/db/connection.js +106 -0
  37. package/dist/core/v4/daemon/db/migrations.js +296 -0
  38. package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
  39. package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
  40. package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
  41. package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
  42. package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
  43. package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
  44. package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
  45. package/dist/core/v4/daemon/dispatcher/index.js +53 -0
  46. package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
  47. package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
  48. package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
  49. package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
  50. package/dist/core/v4/daemon/drain.js +156 -0
  51. package/dist/core/v4/daemon/eventLoopLag.js +73 -0
  52. package/dist/core/v4/daemon/health.js +159 -0
  53. package/dist/core/v4/daemon/idempotencyStore.js +204 -0
  54. package/dist/core/v4/daemon/index.js +179 -0
  55. package/dist/core/v4/daemon/instanceTracker.js +99 -0
  56. package/dist/core/v4/daemon/resourceRegistry.js +150 -0
  57. package/dist/core/v4/daemon/restartCode.js +32 -0
  58. package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
  59. package/dist/core/v4/daemon/runStore.js +114 -0
  60. package/dist/core/v4/daemon/runtimeLock.js +167 -0
  61. package/dist/core/v4/daemon/signals.js +50 -0
  62. package/dist/core/v4/daemon/supervisor.js +272 -0
  63. package/dist/core/v4/daemon/triggerBus.js +279 -0
  64. package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
  65. package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
  66. package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
  67. package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
  68. package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
  69. package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
  70. package/dist/core/v4/daemon/triggers/email/index.js +332 -0
  71. package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
  72. package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
  73. package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
  74. package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
  75. package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
  76. package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
  77. package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
  78. package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
  79. package/dist/core/v4/daemon/triggers/webhook.js +376 -0
  80. package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
  81. package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
  82. package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
  83. package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
  84. package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
  85. package/dist/core/v4/daemon/types.js +15 -0
  86. package/dist/core/v4/dockerSession.js +461 -0
  87. package/dist/core/v4/dryRun.js +117 -0
  88. package/dist/core/v4/failureClassifier.js +779 -0
  89. package/dist/core/v4/recoveryReport.js +449 -0
  90. package/dist/core/v4/runtimeToggles.js +187 -0
  91. package/dist/core/v4/sandboxConfig.js +285 -0
  92. package/dist/core/v4/sandboxFs.js +316 -0
  93. package/dist/core/v4/suggestionCatalog.js +41 -0
  94. package/dist/core/v4/suggestionEngine.js +210 -0
  95. package/dist/core/v4/toolRegistry.js +18 -0
  96. package/dist/core/v4/turnState.js +587 -0
  97. package/dist/core/v4/update/checkUpdate.js +63 -3
  98. package/dist/core/v4/update/installMethodDetect.js +115 -0
  99. package/dist/core/v4/update/registryClient.js +121 -0
  100. package/dist/core/v4/update/skipState.js +75 -0
  101. package/dist/core/v4/verifier.js +448 -0
  102. package/dist/core/version.js +1 -1
  103. package/dist/tools/v4/browser/_observer.js +224 -0
  104. package/dist/tools/v4/browser/browserBlocker.js +396 -0
  105. package/dist/tools/v4/browser/browserClick.js +18 -1
  106. package/dist/tools/v4/browser/browserClose.js +18 -1
  107. package/dist/tools/v4/browser/browserExtract.js +5 -1
  108. package/dist/tools/v4/browser/browserFill.js +17 -1
  109. package/dist/tools/v4/browser/browserGetUrl.js +5 -1
  110. package/dist/tools/v4/browser/browserNavigate.js +16 -1
  111. package/dist/tools/v4/browser/browserScreenshot.js +5 -1
  112. package/dist/tools/v4/browser/browserScroll.js +18 -1
  113. package/dist/tools/v4/browser/browserType.js +17 -1
  114. package/dist/tools/v4/browser/captchaCheck.js +5 -1
  115. package/dist/tools/v4/executeCode.js +1 -0
  116. package/dist/tools/v4/files/fileCopy.js +56 -2
  117. package/dist/tools/v4/files/fileDelete.js +38 -1
  118. package/dist/tools/v4/files/fileList.js +12 -1
  119. package/dist/tools/v4/files/fileMove.js +59 -2
  120. package/dist/tools/v4/files/filePatch.js +43 -1
  121. package/dist/tools/v4/files/fileRead.js +12 -1
  122. package/dist/tools/v4/files/fileWrite.js +41 -1
  123. package/dist/tools/v4/index.js +71 -58
  124. package/dist/tools/v4/memory/memoryAdd.js +14 -0
  125. package/dist/tools/v4/memory/memoryRemove.js +14 -0
  126. package/dist/tools/v4/memory/memoryReplace.js +15 -0
  127. package/dist/tools/v4/memory/sessionSummary.js +12 -0
  128. package/dist/tools/v4/process/processKill.js +19 -0
  129. package/dist/tools/v4/process/processList.js +1 -0
  130. package/dist/tools/v4/process/processLogRead.js +1 -0
  131. package/dist/tools/v4/process/processSpawn.js +13 -0
  132. package/dist/tools/v4/process/processWait.js +1 -0
  133. package/dist/tools/v4/sessions/recallSession.js +1 -0
  134. package/dist/tools/v4/sessions/sessionList.js +1 -0
  135. package/dist/tools/v4/sessions/sessionSearch.js +1 -0
  136. package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
  137. package/dist/tools/v4/skills/skillManage.js +13 -0
  138. package/dist/tools/v4/skills/skillView.js +1 -0
  139. package/dist/tools/v4/skills/skillsList.js +1 -0
  140. package/dist/tools/v4/subagent/subagentFanout.js +1 -0
  141. package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
  142. package/dist/tools/v4/system/appClose.js +13 -0
  143. package/dist/tools/v4/system/appInput.js +13 -0
  144. package/dist/tools/v4/system/appLaunch.js +13 -0
  145. package/dist/tools/v4/system/clipboardRead.js +1 -0
  146. package/dist/tools/v4/system/clipboardWrite.js +14 -0
  147. package/dist/tools/v4/system/mediaKey.js +12 -0
  148. package/dist/tools/v4/system/mediaSessions.js +1 -0
  149. package/dist/tools/v4/system/mediaTransport.js +13 -0
  150. package/dist/tools/v4/system/naturalEvents.js +1 -0
  151. package/dist/tools/v4/system/nowPlaying.js +1 -0
  152. package/dist/tools/v4/system/osProcessList.js +1 -0
  153. package/dist/tools/v4/system/screenshot.js +1 -0
  154. package/dist/tools/v4/system/systemInfo.js +1 -0
  155. package/dist/tools/v4/system/volumeSet.js +17 -0
  156. package/dist/tools/v4/terminal/shellExec.js +81 -9
  157. package/dist/tools/v4/web/deepResearch.js +1 -0
  158. package/dist/tools/v4/web/openUrl.js +1 -0
  159. package/dist/tools/v4/web/webFetch.js +1 -0
  160. package/dist/tools/v4/web/webPage.js +1 -0
  161. package/dist/tools/v4/web/webSearch.js +1 -0
  162. package/dist/tools/v4/web/youtubeSearch.js +1 -0
  163. package/package.json +7 -1
@@ -0,0 +1,448 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * core/v4/verifier.ts — v4.2 Phase 1: Per-tool result verifier.
10
+ *
11
+ * After each tool dispatch, the verifier inspects the result and
12
+ * classifies the outcome:
13
+ *
14
+ * ok — tool produced a usable, non-failed output
15
+ * failed — tool errored, returned `success: false`, or matched
16
+ * a known failure shape
17
+ * no_progress — tool succeeded but produced no useful signal (empty
18
+ * payload, identical hash to a recent call — Phase 3
19
+ * wires the hash repeat detector)
20
+ * low_signal — tool succeeded but with a short / vague response
21
+ * that's informative but probably won't help the
22
+ * model make progress
23
+ * unknown — verifier couldn't classify with confidence
24
+ *
25
+ * Scope (Phase 1):
26
+ * - Pure inspection of `(toolName, args, result)` — NO goal awareness
27
+ * (deferred to Phase 5 / task graph).
28
+ * - Synchronous; runs in the agent's tool-dispatch loop between
29
+ * `onToolCall('after', result)` and `turnState.recordToolCall(...)`.
30
+ * - Default fallback handles ~99% of Aiden tools that return the
31
+ * `{ success: boolean, error?: string, ...payload }` envelope.
32
+ * - Built-in per-tool verifiers for 5 high-signal tools where the
33
+ * default envelope inspection isn't sufficient: `shell_exec`,
34
+ * `web_search`, `file_write`, `file_read`, `web_fetch`.
35
+ * - Behind the same gate as TurnState (default ON; opt-out via
36
+ * `AIDEN_TCE=0`). When disabled, the agent skips verifier
37
+ * classification — the registry is still constructed (cheap) but
38
+ * `resolve()` is never called inside the gated branch.
39
+ *
40
+ * Out of scope (deferred phases):
41
+ * - Phase 2 — typed failure reason taxonomy (timeout / auth /
42
+ * hallucination / network — separate from per-tool verifier).
43
+ * - Phase 3 — RecoveryReport (uses verifier output + Phase 2 classifier).
44
+ * - Phase 4 — checkpoint/restore (uses Phase 3 state shape).
45
+ * - Phase 5 — task-graph sub-step verification (extends VerifierFn
46
+ * signature with optional `subGoal` argument; backward-compatible).
47
+ *
48
+ * The design intentionally mirrors a layered-decision pattern from the
49
+ * reference system's tool-guardrail module: a pure classifier function
50
+ * driving a controller's threshold counters, with per-tool overrides
51
+ * for the small set of tools where heuristic inspection is too coarse.
52
+ */
53
+ Object.defineProperty(exports, "__esModule", { value: true });
54
+ exports.browserInteractiveVerifier = exports.webFetchVerifier = exports.fileReadVerifier = exports.fileWriteVerifier = exports.webSearchVerifier = exports.shellExecVerifier = exports.defaultVerifier = exports.VerifierRegistry = void 0;
55
+ exports.buildDefaultRegistry = buildDefaultRegistry;
56
+ /**
57
+ * Per-tool override registry with a default-fallback resolver. Cheap
58
+ * to construct; safe to keep instantiated even when TCE is disabled
59
+ * because nothing runs unless `resolve(...)` is called by the agent
60
+ * loop (which itself is gated).
61
+ */
62
+ class VerifierRegistry {
63
+ constructor(fallback = exports.defaultVerifier) {
64
+ this.overrides = new Map();
65
+ this.fallback = fallback;
66
+ }
67
+ register(toolName, fn) {
68
+ this.overrides.set(toolName, fn);
69
+ }
70
+ resolve(toolName) {
71
+ return this.overrides.get(toolName) ?? this.fallback;
72
+ }
73
+ /** Direct lookup for tests — returns true when a per-tool override is registered. */
74
+ hasOverride(toolName) {
75
+ return this.overrides.has(toolName);
76
+ }
77
+ }
78
+ exports.VerifierRegistry = VerifierRegistry;
79
+ // ── Default fallback verifier ──────────────────────────────────────────────
80
+ const SHORT_RESPONSE_THRESHOLD = 50; // chars — below this, raw strings are flagged low_signal
81
+ const RAW_STRING_SCAN_WINDOW = 500; // chars — generic error keyword scan only looks at the head
82
+ /**
83
+ * Heuristic default. Handles five result shapes in priority order:
84
+ *
85
+ * 1. Outer envelope error → ToolCallResult.error set → failed (conf 1.0)
86
+ * 2. Inner `success: false` → typed failure (conf 1.0)
87
+ * 3. Inner `success: true` → typed ok (conf 1.0)
88
+ * 4. Raw string < 50 chars → low_signal (conf 0.4, ok: true)
89
+ * 5. Raw string with error keywords in first 500 chars → failed (conf 0.6)
90
+ *
91
+ * Anything else (typed object without `success`, non-empty string
92
+ * without error keywords) is `ok` at conf 0.7 — the verifier doesn't
93
+ * have enough signal to be more precise without a per-tool override.
94
+ */
95
+ const defaultVerifier = (_toolName, _args, result) => {
96
+ // 1. Outer envelope error — executor threw or wrapped a known failure.
97
+ if (typeof result.error === 'string' && result.error.length > 0) {
98
+ return {
99
+ ok: false,
100
+ confidence: 1.0,
101
+ code: 'failed',
102
+ reason: result.error,
103
+ };
104
+ }
105
+ const inner = result.result;
106
+ // 2 + 3. Typed `{ success: boolean }` envelope — the common Aiden shape.
107
+ if (inner !== null && typeof inner === 'object' && !Array.isArray(inner)) {
108
+ const obj = inner;
109
+ if (obj.success === false) {
110
+ const reason = typeof obj.error === 'string' && obj.error.length > 0
111
+ ? obj.error
112
+ : 'tool returned success:false';
113
+ return {
114
+ ok: false,
115
+ confidence: 1.0,
116
+ code: 'failed',
117
+ reason,
118
+ };
119
+ }
120
+ if (obj.success === true) {
121
+ return { ok: true, confidence: 1.0, code: 'ok' };
122
+ }
123
+ // No `success` field — fall through to confidence-0.7 default.
124
+ return { ok: true, confidence: 0.7, code: 'ok' };
125
+ }
126
+ // 4 + 5. Raw string payload (the webSearch / deepResearch / openUrl shape).
127
+ if (typeof inner === 'string') {
128
+ const trimmed = inner.trim();
129
+ if (trimmed.length === 0) {
130
+ return {
131
+ ok: true,
132
+ confidence: 0.4,
133
+ code: 'low_signal',
134
+ reason: 'empty string result',
135
+ };
136
+ }
137
+ if (trimmed.length < SHORT_RESPONSE_THRESHOLD) {
138
+ return {
139
+ ok: true,
140
+ confidence: 0.4,
141
+ code: 'low_signal',
142
+ reason: `short result (${trimmed.length} chars)`,
143
+ };
144
+ }
145
+ const head = trimmed.slice(0, RAW_STRING_SCAN_WINDOW).toLowerCase();
146
+ if (head.startsWith('error') ||
147
+ head.includes('"error"') ||
148
+ head.includes('"failed"')) {
149
+ return {
150
+ ok: false,
151
+ confidence: 0.6,
152
+ code: 'failed',
153
+ reason: 'error keywords detected in raw string head',
154
+ };
155
+ }
156
+ return { ok: true, confidence: 0.7, code: 'ok' };
157
+ }
158
+ // null / undefined / array / number — no clear signal.
159
+ if (inner === null || inner === undefined) {
160
+ return {
161
+ ok: true,
162
+ confidence: 0.5,
163
+ code: 'unknown',
164
+ reason: 'null result',
165
+ };
166
+ }
167
+ return { ok: true, confidence: 0.5, code: 'unknown' };
168
+ };
169
+ exports.defaultVerifier = defaultVerifier;
170
+ // ── Built-in per-tool verifiers ────────────────────────────────────────────
171
+ /**
172
+ * `shell_exec` — inspect `exitCode` directly. A successful exit with
173
+ * empty stdout is suspicious (probe with no output) — surface as
174
+ * `low_signal` rather than ok-with-high-confidence so the loop
175
+ * controller can weight it.
176
+ */
177
+ const shellExecVerifier = (_n, _a, result) => {
178
+ if (typeof result.error === 'string' && result.error.length > 0) {
179
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
180
+ }
181
+ const inner = result.result;
182
+ if (inner === null || typeof inner !== 'object') {
183
+ return { ok: false, confidence: 0.5, code: 'unknown', reason: 'non-object shell_exec result' };
184
+ }
185
+ // Typed-failure envelope short-circuit — a wrapper returning
186
+ // `{success: false}` without exitCode is still definitively failed.
187
+ if (inner.success === false) {
188
+ return {
189
+ ok: false,
190
+ confidence: 1.0,
191
+ code: 'failed',
192
+ reason: typeof inner.error === 'string' ? inner.error : 'success:false',
193
+ };
194
+ }
195
+ const exitCode = typeof inner.exitCode === 'number' ? inner.exitCode : undefined;
196
+ if (exitCode === undefined) {
197
+ // Some wrappers omit exitCode on a successful run when the
198
+ // underlying command was trivial (e.g. a noop). Trust the typed
199
+ // success flag if present; otherwise we genuinely don't know.
200
+ if (inner.success === true) {
201
+ return { ok: true, confidence: 0.7, code: 'ok' };
202
+ }
203
+ return { ok: false, confidence: 0.5, code: 'unknown', reason: 'missing exitCode' };
204
+ }
205
+ if (exitCode !== 0) {
206
+ return {
207
+ ok: false,
208
+ confidence: 1.0,
209
+ code: 'failed',
210
+ reason: `non-zero exit (${exitCode})`,
211
+ suggestion: 'Inspect stderr and adjust the command — repeating the same invocation will not help.',
212
+ };
213
+ }
214
+ const stdout = typeof inner.stdout === 'string' ? inner.stdout.trim() : '';
215
+ if (stdout.length === 0) {
216
+ return {
217
+ ok: true,
218
+ confidence: 0.4,
219
+ code: 'low_signal',
220
+ reason: 'exit 0 with empty stdout',
221
+ };
222
+ }
223
+ return { ok: true, confidence: 1.0, code: 'ok' };
224
+ };
225
+ exports.shellExecVerifier = shellExecVerifier;
226
+ /**
227
+ * `web_search` — returns a raw string (synthesised answer). Short
228
+ * responses are low-signal, not failures (often "no results found"
229
+ * IS the answer). Generic error-keyword scan applies.
230
+ */
231
+ const webSearchVerifier = (_n, _a, result) => {
232
+ if (typeof result.error === 'string' && result.error.length > 0) {
233
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
234
+ }
235
+ const inner = result.result;
236
+ if (typeof inner !== 'string') {
237
+ // Some adapters might wrap the string in `{ success, result }`.
238
+ return (0, exports.defaultVerifier)(_n, _a, result);
239
+ }
240
+ const trimmed = inner.trim();
241
+ if (trimmed.length === 0) {
242
+ return {
243
+ ok: true,
244
+ confidence: 0.4,
245
+ code: 'low_signal',
246
+ reason: 'empty web_search result',
247
+ suggestion: 'Try a different query or use web_fetch with a known URL.',
248
+ };
249
+ }
250
+ if (trimmed.length < SHORT_RESPONSE_THRESHOLD) {
251
+ return {
252
+ ok: true,
253
+ confidence: 0.4,
254
+ code: 'low_signal',
255
+ reason: `short web_search result (${trimmed.length} chars)`,
256
+ };
257
+ }
258
+ return { ok: true, confidence: 0.9, code: 'ok' };
259
+ };
260
+ exports.webSearchVerifier = webSearchVerifier;
261
+ /**
262
+ * `file_write` — verify the write actually happened. We trust the
263
+ * tool's `success` flag but additionally require `bytesWritten > 0`
264
+ * when present (catches the "wrote 0 bytes" pathology).
265
+ */
266
+ const fileWriteVerifier = (_n, _a, result) => {
267
+ if (typeof result.error === 'string' && result.error.length > 0) {
268
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
269
+ }
270
+ const inner = result.result;
271
+ if (inner === null || typeof inner !== 'object') {
272
+ return { ok: false, confidence: 0.5, code: 'unknown', reason: 'non-object file_write result' };
273
+ }
274
+ if (inner.success === false) {
275
+ return {
276
+ ok: false,
277
+ confidence: 1.0,
278
+ code: 'failed',
279
+ reason: typeof inner.error === 'string' ? inner.error : 'success:false',
280
+ };
281
+ }
282
+ if (typeof inner.bytesWritten === 'number' && inner.bytesWritten === 0) {
283
+ return {
284
+ ok: true,
285
+ confidence: 0.4,
286
+ code: 'low_signal',
287
+ reason: 'wrote 0 bytes',
288
+ };
289
+ }
290
+ return { ok: true, confidence: 1.0, code: 'ok' };
291
+ };
292
+ exports.fileWriteVerifier = fileWriteVerifier;
293
+ /**
294
+ * `file_read` — verify content non-empty (a deliberately-empty file
295
+ * is rare; usually means a path mismatch or stale read). Trusts the
296
+ * tool's `success` flag.
297
+ */
298
+ const fileReadVerifier = (_n, _a, result) => {
299
+ if (typeof result.error === 'string' && result.error.length > 0) {
300
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
301
+ }
302
+ const inner = result.result;
303
+ if (inner === null || typeof inner !== 'object') {
304
+ return { ok: false, confidence: 0.5, code: 'unknown', reason: 'non-object file_read result' };
305
+ }
306
+ if (inner.success === false) {
307
+ return {
308
+ ok: false,
309
+ confidence: 1.0,
310
+ code: 'failed',
311
+ reason: typeof inner.error === 'string' ? inner.error : 'success:false',
312
+ };
313
+ }
314
+ const content = typeof inner.content === 'string' ? inner.content : '';
315
+ if (content.length === 0) {
316
+ return {
317
+ ok: true,
318
+ confidence: 0.4,
319
+ code: 'low_signal',
320
+ reason: 'empty file content',
321
+ };
322
+ }
323
+ return { ok: true, confidence: 1.0, code: 'ok' };
324
+ };
325
+ exports.fileReadVerifier = fileReadVerifier;
326
+ /**
327
+ * `web_fetch` (and aliases) — verify the body is substantive. A
328
+ * < 100 char fetch body is almost certainly a redirect / blank
329
+ * page / soft-block; surface as low_signal.
330
+ */
331
+ const WEB_FETCH_MIN_BODY = 100;
332
+ const webFetchVerifier = (_n, _a, result) => {
333
+ if (typeof result.error === 'string' && result.error.length > 0) {
334
+ return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
335
+ }
336
+ const inner = result.result;
337
+ // Two shapes: typed `{ success, content/body }` or raw string.
338
+ if (typeof inner === 'string') {
339
+ if (inner.trim().length < WEB_FETCH_MIN_BODY) {
340
+ return {
341
+ ok: true,
342
+ confidence: 0.4,
343
+ code: 'low_signal',
344
+ reason: `short body (${inner.trim().length} chars)`,
345
+ suggestion: 'Try a different URL or check whether the page requires auth.',
346
+ };
347
+ }
348
+ return { ok: true, confidence: 0.9, code: 'ok' };
349
+ }
350
+ if (inner !== null && typeof inner === 'object') {
351
+ const obj = inner;
352
+ if (obj.success === false) {
353
+ return {
354
+ ok: false,
355
+ confidence: 1.0,
356
+ code: 'failed',
357
+ reason: typeof obj.error === 'string' ? obj.error : 'success:false',
358
+ };
359
+ }
360
+ const body = typeof obj.content === 'string' ? obj.content :
361
+ typeof obj.body === 'string' ? obj.body :
362
+ typeof obj.text === 'string' ? obj.text : '';
363
+ if (body.trim().length < WEB_FETCH_MIN_BODY) {
364
+ return {
365
+ ok: true,
366
+ confidence: 0.4,
367
+ code: 'low_signal',
368
+ reason: `short body (${body.trim().length} chars)`,
369
+ };
370
+ }
371
+ return { ok: true, confidence: 1.0, code: 'ok' };
372
+ }
373
+ return (0, exports.defaultVerifier)(_n, _a, result);
374
+ };
375
+ exports.webFetchVerifier = webFetchVerifier;
376
+ /**
377
+ * v4.3 Phase 5 — verifier for the 3 interactive browser tools
378
+ * (`browser_click`, `browser_type`, `browser_fill`) and
379
+ * `browser_navigate`. Extends defaultVerifier with one extra check:
380
+ * when the tool returns `success: true` BUT Phase 1's observer flagged
381
+ * `needs_verifier === true` (page state didn't meaningfully change),
382
+ * demote `ok` to false so the classifier runs and routes to
383
+ * `stale_ref` (page unresponsive) for the right recovery action.
384
+ *
385
+ * Without this demotion, the `needs_verifier` field would be a
386
+ * dormant hint with no behavioral effect. The whole point of Phase 1
387
+ * capturing it was to gate this verifier check.
388
+ *
389
+ * Conservative ordering — only runs the demotion AFTER the default
390
+ * verifier passed. Failed calls still classify via the existing
391
+ * path; success-but-noop is the specific case Phase 5 handles.
392
+ */
393
+ const browserInteractiveVerifier = (toolName, args, result) => {
394
+ const base = (0, exports.defaultVerifier)(toolName, args, result);
395
+ if (!base.ok)
396
+ return base;
397
+ // Read the v4.3 sidecar. Absent when browser depth is opt'd out
398
+ // (AIDEN_BROWSER_DEPTH=0) — in
399
+ // that case the verifier falls back to the default-passing result.
400
+ const inner = result.result;
401
+ if (!inner || typeof inner !== 'object')
402
+ return base;
403
+ const bs = inner.browserState;
404
+ if (!bs)
405
+ return base;
406
+ if (!bs.needs_verifier)
407
+ return base;
408
+ // Demote — the tool returned success but the page didn't change
409
+ // meaningfully. Classifier will route to stale_ref.
410
+ return {
411
+ ok: false,
412
+ confidence: 0.75,
413
+ code: bs.maybe_noop ? 'no_progress' : 'low_signal',
414
+ reason: bs.maybe_noop
415
+ ? 'tool returned success but page state did not change'
416
+ : `low progress (${bs.progress_score.toFixed(2)}) — UI may not have responded`,
417
+ };
418
+ };
419
+ exports.browserInteractiveVerifier = browserInteractiveVerifier;
420
+ // ── Factory ────────────────────────────────────────────────────────────────
421
+ /**
422
+ * Builds a registry pre-wired with the 5 built-in per-tool verifiers.
423
+ * The agent constructs one of these in `runConversation` when TCE is
424
+ * enabled. Plugin authors can register their own via the returned
425
+ * registry instance — Phase 1 doesn't expose a public registration
426
+ * API, but the foundation is here.
427
+ */
428
+ function buildDefaultRegistry() {
429
+ const reg = new VerifierRegistry();
430
+ reg.register('shell_exec', exports.shellExecVerifier);
431
+ reg.register('web_search', exports.webSearchVerifier);
432
+ reg.register('file_write', exports.fileWriteVerifier);
433
+ reg.register('file_read', exports.fileReadVerifier);
434
+ reg.register('web_fetch', exports.webFetchVerifier);
435
+ // Aliases — same verifier handles related shapes.
436
+ reg.register('fetch_page', exports.webFetchVerifier);
437
+ reg.register('web_page', exports.webFetchVerifier);
438
+ // v4.3 Phase 5 — browser interactive verifier reads the Phase 1
439
+ // sidecar (`needs_verifier` / `maybe_noop`) and demotes
440
+ // success-but-no-progress cases so the classifier routes them to
441
+ // `stale_ref` recovery. Falls back to defaultVerifier when sidecar
442
+ // absent (opt-out via AIDEN_BROWSER_DEPTH=0).
443
+ reg.register('browser_click', exports.browserInteractiveVerifier);
444
+ reg.register('browser_type', exports.browserInteractiveVerifier);
445
+ reg.register('browser_fill', exports.browserInteractiveVerifier);
446
+ reg.register('browser_navigate', exports.browserInteractiveVerifier);
447
+ return reg;
448
+ }
@@ -2,4 +2,4 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.VERSION = void 0;
4
4
  // AUTO-GENERATED by scripts/inject-version.js — do not edit by hand
5
- exports.VERSION = '4.1.5';
5
+ exports.VERSION = '4.5.0';
@@ -0,0 +1,224 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * tools/v4/browser/_observer.ts — v4.3 Phase 1 + 2: shared BrowserState
10
+ * observer + stale-ref retry HOC for browser ToolHandlers.
11
+ *
12
+ * One BrowserState lives per server process (lifecycle matches the
13
+ * persistent playwrightBridge context). Every browser tool wraps its
14
+ * ToolHandler in `withBrowserState(...)` so the observer's pre/post
15
+ * snapshot capture happens automatically.
16
+ *
17
+ * Phase 1 — observer captures pre/post snapshots and embeds them as
18
+ * a `browserState` sidecar on the tool result when
19
+ * browser depth is enabled (default ON; opt-out via
20
+ * AIDEN_BROWSER_DEPTH=0). No-op when disabled.
21
+ *
22
+ * Phase 2 — stale-ref recovery. When an interactive browser tool
23
+ * (browser_click / browser_type / browser_fill) returns a
24
+ * resolution-class failure (`element not found`, `not visible`,
25
+ * `not attached`, `timeout`, `target closed`), the HOC resnapshots
26
+ * and retries the inner execute ONCE with the same args. The retry
27
+ * logic is reactive only — no preflight tax on success paths. The
28
+ * retry attempt + outcome lands on `ActionResult.staleRefRetry`
29
+ * for Phase 5's classifier to consume.
30
+ *
31
+ * The one-retry hard cap is the consult-derived non-negotiable: a
32
+ * second retry doesn't help (the cause isn't transient) and starts
33
+ * looking like agent thrashing. If the retry fails, the original
34
+ * failure result is preserved — same error message, but with the
35
+ * `staleRefRetry: { attempted: true, succeeded: false, ... }`
36
+ * sidecar so the classifier can recognise the pattern.
37
+ */
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.STALE_REF_PATTERNS = exports.STALE_REF_RETRYABLE = exports.browserState = void 0;
40
+ exports.detectStaleRefError = detectStaleRefError;
41
+ exports.withBrowserState = withBrowserState;
42
+ const browserState_1 = require("../../../core/v4/browserState");
43
+ const browserBlocker_1 = require("./browserBlocker");
44
+ const playwrightBridge_1 = require("../../../core/playwrightBridge");
45
+ /**
46
+ * Shared observer — one instance per server process. The HOC closes
47
+ * over this reference so all 9 browser tools share the same snapshot
48
+ * counter and gating decision.
49
+ *
50
+ * Tests can construct their own BrowserState with a stubbed bridge
51
+ * loader and call `withBrowserState(handler, customState)` directly.
52
+ */
53
+ exports.browserState = (0, browserState_1.createBrowserState)();
54
+ // ── Phase 2 — stale-ref retry primitives ─────────────────────────────
55
+ /**
56
+ * Interactive browser tools that operate on a selector. Stale-ref
57
+ * retry only fires for these — other tools either don't take a
58
+ * selector (browser_navigate, browser_close, browser_get_url) or
59
+ * are read-only (browser_extract, browser_screenshot, browser_scroll).
60
+ */
61
+ exports.STALE_REF_RETRYABLE = new Set([
62
+ 'browser_click',
63
+ 'browser_type',
64
+ 'browser_fill',
65
+ ]);
66
+ /**
67
+ * Error-message patterns that indicate a resolution-class failure
68
+ * (DOM lookup failed BEFORE any side-effect-producing action fired).
69
+ * Phase 2 retries only on these — never on action-failure messages
70
+ * (network errors, permission denials, etc.).
71
+ *
72
+ * The patterns are case-insensitive substrings; one match is enough.
73
+ * False positives are tolerable — retry-once costs ~200ms and produces
74
+ * the same result on the second attempt. False negatives miss the
75
+ * common transient-race case, so bias toward sensitivity.
76
+ */
77
+ exports.STALE_REF_PATTERNS = [
78
+ /element not found/i,
79
+ /not visible/i,
80
+ /not attached/i,
81
+ /detached from the DOM/i,
82
+ /target closed/i,
83
+ /timeout \d+ms exceeded/i,
84
+ ];
85
+ /**
86
+ * Check if a tool result represents a resolution-class failure.
87
+ * Returns the matched pattern (as a short string) when stale, null
88
+ * otherwise. Pure helper, exported for tests.
89
+ */
90
+ function detectStaleRefError(result) {
91
+ if (result === null || result === undefined || typeof result !== 'object')
92
+ return null;
93
+ const r = result;
94
+ if (r.success !== false)
95
+ return null;
96
+ if (typeof r.error !== 'string' || r.error.length === 0)
97
+ return null;
98
+ for (const pattern of exports.STALE_REF_PATTERNS) {
99
+ if (pattern.test(r.error)) {
100
+ return pattern.source;
101
+ }
102
+ }
103
+ return null;
104
+ }
105
+ /**
106
+ * Test whether a tool result represents success. Used by the HOC to
107
+ * decide whether the retry "succeeded" and should become canonical.
108
+ */
109
+ function isSuccessResult(result) {
110
+ if (result === null || result === undefined || typeof result !== 'object')
111
+ return false;
112
+ return result.success === true;
113
+ }
114
+ const defaultPageTextFetcher = () => (0, playwrightBridge_1.pwSnapshot)();
115
+ function withBrowserState(handler, state = exports.browserState,
116
+ /**
117
+ * Optional page-text fetcher. Production code uses pwSnapshot;
118
+ * tests inject a stub returning canned text for the blocker
119
+ * detection tier. The fetcher is called ONCE per action when
120
+ * browser depth is enabled — disabled path skips entirely.
121
+ */
122
+ pageTextFetcher = defaultPageTextFetcher) {
123
+ return {
124
+ ...handler,
125
+ async execute(args, ctx) {
126
+ if (!state.isEnabled()) {
127
+ return handler.execute(args, ctx);
128
+ }
129
+ const pre = await state.captureState();
130
+ let result = await handler.execute(args, ctx);
131
+ // v4.3 Phase 3 — manual-blocker detection. Runs on every
132
+ // browser-tool result when enabled. Uses the configured
133
+ // page-text fetcher (pwSnapshot in production). Detection
134
+ // never breaks the inner tool — pwSnapshot is wrapped in
135
+ // try/catch via the fetcher itself; failures produce no
136
+ // blocker and no observer sidecar field.
137
+ //
138
+ // The detected blocker is BOTH embedded on the result sidecar
139
+ // (Phase 5 + chat layer consumers) AND used to suppress
140
+ // Phase 2's stale-ref retry below. Pause-and-surface contract
141
+ // (Q-CDP5) — never auto-action a blocker.
142
+ let blocker;
143
+ try {
144
+ const snap = await pageTextFetcher();
145
+ if (snap.ok && snap.text) {
146
+ const url = result?.url ?? '';
147
+ const detected = (0, browserBlocker_1.detectBlocker)({ text: snap.text, url });
148
+ if (detected)
149
+ blocker = detected;
150
+ }
151
+ }
152
+ catch { /* detection never breaks the inner tool */ }
153
+ // v4.3 Phase 4 — propagate blocker (or its absence) to the
154
+ // active tab's metadata in BrowserState. Cross-tab queries can
155
+ // then ask "is there a pending blocker on any tab" without
156
+ // re-running detection. No-op when state is disabled or when
157
+ // the tabs map has no active entry (the reconciliation in
158
+ // captureState above sets activeTabId).
159
+ try {
160
+ state.updateActiveTabBlocker(blocker
161
+ ? {
162
+ kind: blocker.kind,
163
+ subtype: blocker.subtype,
164
+ url: blocker.url,
165
+ confidence: blocker.confidence,
166
+ }
167
+ : null);
168
+ }
169
+ catch { /* defensive — tab updates never break the inner tool */ }
170
+ // v4.3 Phase 2 — stale-ref retry. Reactive: fires only after a
171
+ // resolution-class failure on an interactive tool. One retry
172
+ // hard cap. Safe because the resolution-class errors fire
173
+ // BEFORE any DOM event is dispatched, so retry can't double-act.
174
+ //
175
+ // v4.3 Phase 3 suppression: skip the retry when a manual
176
+ // blocker is present (`!blocker` gate). A blocker means the
177
+ // page is asking for human action — retrying the same tool
178
+ // call against a sign-in wall or 2FA prompt won't help and
179
+ // looks like agent thrashing.
180
+ let staleRefRetry;
181
+ if (pre && !blocker &&
182
+ exports.STALE_REF_RETRYABLE.has(handler.schema.name)) {
183
+ const staleReason = detectStaleRefError(result);
184
+ if (staleReason !== null) {
185
+ // Resnapshot — the "between" state. We use it for the
186
+ // diagnostic state_delta. The retry fires unconditionally
187
+ // (per Q-P2-3 single-signal rule): even when DOM hash
188
+ // hasn't changed, a transient race condition (element
189
+ // attached one tick after the original timeout) is the
190
+ // common case the retry catches.
191
+ const between = await state.captureState();
192
+ const state_delta = state.computeStateDelta(pre, between);
193
+ const retryResult = await handler.execute(args, ctx);
194
+ const retryOk = isSuccessResult(retryResult);
195
+ staleRefRetry = {
196
+ attempted: true,
197
+ succeeded: retryOk,
198
+ reason: staleReason,
199
+ state_delta,
200
+ };
201
+ // If retry succeeded, the retry result becomes canonical.
202
+ // If retry failed, keep the original failure — its error
203
+ // context is what the model needs to see, and a same-error
204
+ // retry would just look like duplicated chrome.
205
+ if (retryOk)
206
+ result = retryResult;
207
+ }
208
+ }
209
+ const post = await state.captureState();
210
+ const observerMeta = state.buildActionResult({ pre, post });
211
+ if (observerMeta &&
212
+ result !== null && result !== undefined &&
213
+ typeof result === 'object' && !Array.isArray(result)) {
214
+ const sidecar = {
215
+ ...observerMeta,
216
+ ...(staleRefRetry && { staleRefRetry }),
217
+ ...(blocker && { blocker }),
218
+ };
219
+ return { ...result, browserState: sidecar };
220
+ }
221
+ return result;
222
+ },
223
+ };
224
+ }