aiden-runtime 4.1.5 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +265 -847
- package/dist/api/server.js +32 -5
- package/dist/cli/v4/aidenCLI.js +536 -152
- package/dist/cli/v4/callbacks.js +170 -0
- package/dist/cli/v4/chatSession.js +245 -3
- package/dist/cli/v4/commands/_runtimeToggleHelpers.js +94 -0
- package/dist/cli/v4/commands/browserDepth.js +45 -0
- package/dist/cli/v4/commands/cron.js +264 -0
- package/dist/cli/v4/commands/daemon.js +541 -0
- package/dist/cli/v4/commands/daemonStatus.js +253 -0
- package/dist/cli/v4/commands/fanout.js +42 -59
- package/dist/cli/v4/commands/help.js +13 -0
- package/dist/cli/v4/commands/index.js +35 -1
- package/dist/cli/v4/commands/mcp.js +80 -54
- package/dist/cli/v4/commands/plannerGuard.js +53 -0
- package/dist/cli/v4/commands/recovery.js +122 -0
- package/dist/cli/v4/commands/runs.js +223 -0
- package/dist/cli/v4/commands/sandbox.js +48 -0
- package/dist/cli/v4/commands/spawnPause.js +93 -0
- package/dist/cli/v4/commands/suggestions.js +68 -0
- package/dist/cli/v4/commands/tce.js +41 -0
- package/dist/cli/v4/commands/trigger.js +378 -0
- package/dist/cli/v4/commands/update.js +95 -3
- package/dist/cli/v4/daemonAgentBuilder.js +145 -0
- package/dist/cli/v4/defaultSoul.js +1 -1
- package/dist/cli/v4/display/capabilityCard.js +26 -0
- package/dist/cli/v4/display.js +18 -8
- package/dist/cli/v4/replyRenderer.js +31 -23
- package/dist/cli/v4/updateBootPrompt.js +170 -0
- package/dist/core/playwrightBridge.js +129 -0
- package/dist/core/v4/aidenAgent.js +527 -5
- package/dist/core/v4/browserState.js +436 -0
- package/dist/core/v4/checkpoint.js +79 -0
- package/dist/core/v4/daemon/bootstrap.js +651 -0
- package/dist/core/v4/daemon/cleanShutdown.js +154 -0
- package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
- package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
- package/dist/core/v4/daemon/cron/migration.js +199 -0
- package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
- package/dist/core/v4/daemon/daemonConfig.js +90 -0
- package/dist/core/v4/daemon/db/connection.js +106 -0
- package/dist/core/v4/daemon/db/migrations.js +362 -0
- package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
- package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
- package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
- package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
- package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
- package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
- package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
- package/dist/core/v4/daemon/dispatcher/index.js +53 -0
- package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
- package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
- package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
- package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
- package/dist/core/v4/daemon/drain.js +156 -0
- package/dist/core/v4/daemon/eventLoopLag.js +73 -0
- package/dist/core/v4/daemon/health.js +159 -0
- package/dist/core/v4/daemon/idempotencyStore.js +204 -0
- package/dist/core/v4/daemon/index.js +179 -0
- package/dist/core/v4/daemon/instanceTracker.js +99 -0
- package/dist/core/v4/daemon/resourceRegistry.js +150 -0
- package/dist/core/v4/daemon/restartCode.js +32 -0
- package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
- package/dist/core/v4/daemon/runStore.js +144 -0
- package/dist/core/v4/daemon/runtimeLock.js +167 -0
- package/dist/core/v4/daemon/signals.js +50 -0
- package/dist/core/v4/daemon/supervisor.js +272 -0
- package/dist/core/v4/daemon/triggerBus.js +279 -0
- package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
- package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
- package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
- package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
- package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
- package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
- package/dist/core/v4/daemon/triggers/email/index.js +332 -0
- package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
- package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
- package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
- package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
- package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
- package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
- package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
- package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
- package/dist/core/v4/daemon/triggers/webhook.js +376 -0
- package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
- package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
- package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
- package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
- package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
- package/dist/core/v4/daemon/types.js +15 -0
- package/dist/core/v4/dockerSession.js +461 -0
- package/dist/core/v4/dryRun.js +117 -0
- package/dist/core/v4/failureClassifier.js +779 -0
- package/dist/core/v4/providerFallback.js +35 -2
- package/dist/core/v4/recoveryReport.js +449 -0
- package/dist/core/v4/runtimeToggles.js +214 -0
- package/dist/core/v4/sandboxConfig.js +285 -0
- package/dist/core/v4/sandboxFs.js +316 -0
- package/dist/core/v4/selfimprovement/recoveryStore.js +307 -0
- package/dist/core/v4/selfimprovement/signatureBuilder.js +158 -0
- package/dist/core/v4/subagent/childBuilder.js +391 -0
- package/dist/core/v4/subagent/fanout.js +75 -51
- package/dist/core/v4/subagent/spawnPause.js +191 -0
- package/dist/core/v4/subagent/spawnSubAgent.js +310 -0
- package/dist/core/v4/suggestionCatalog.js +41 -0
- package/dist/core/v4/suggestionEngine.js +210 -0
- package/dist/core/v4/toolRegistry.js +37 -3
- package/dist/core/v4/turnState.js +587 -0
- package/dist/core/v4/update/checkUpdate.js +63 -3
- package/dist/core/v4/update/installMethodDetect.js +115 -0
- package/dist/core/v4/update/registryClient.js +121 -0
- package/dist/core/v4/update/skipState.js +75 -0
- package/dist/core/v4/verifier.js +448 -0
- package/dist/core/version.js +1 -1
- package/dist/moat/plannerGuard.js +29 -0
- package/dist/providers/v4/anthropicAdapter.js +31 -3
- package/dist/providers/v4/chatCompletionsAdapter.js +26 -3
- package/dist/providers/v4/codexResponsesAdapter.js +25 -2
- package/dist/providers/v4/ollamaPromptToolsAdapter.js +57 -2
- package/dist/tools/v4/browser/_observer.js +224 -0
- package/dist/tools/v4/browser/browserBlocker.js +396 -0
- package/dist/tools/v4/browser/browserClick.js +18 -1
- package/dist/tools/v4/browser/browserClose.js +18 -1
- package/dist/tools/v4/browser/browserExtract.js +5 -1
- package/dist/tools/v4/browser/browserFill.js +17 -1
- package/dist/tools/v4/browser/browserGetUrl.js +5 -1
- package/dist/tools/v4/browser/browserNavigate.js +16 -1
- package/dist/tools/v4/browser/browserScreenshot.js +5 -1
- package/dist/tools/v4/browser/browserScroll.js +18 -1
- package/dist/tools/v4/browser/browserType.js +17 -1
- package/dist/tools/v4/browser/captchaCheck.js +5 -1
- package/dist/tools/v4/executeCode.js +1 -0
- package/dist/tools/v4/files/fileCopy.js +56 -2
- package/dist/tools/v4/files/fileDelete.js +38 -1
- package/dist/tools/v4/files/fileList.js +12 -1
- package/dist/tools/v4/files/fileMove.js +59 -2
- package/dist/tools/v4/files/filePatch.js +43 -1
- package/dist/tools/v4/files/fileRead.js +12 -1
- package/dist/tools/v4/files/fileWrite.js +41 -1
- package/dist/tools/v4/index.js +88 -61
- package/dist/tools/v4/memory/memoryAdd.js +14 -0
- package/dist/tools/v4/memory/memoryRemove.js +14 -0
- package/dist/tools/v4/memory/memoryReplace.js +15 -0
- package/dist/tools/v4/memory/sessionSummary.js +12 -0
- package/dist/tools/v4/process/processKill.js +19 -0
- package/dist/tools/v4/process/processList.js +1 -0
- package/dist/tools/v4/process/processLogRead.js +1 -0
- package/dist/tools/v4/process/processSpawn.js +13 -0
- package/dist/tools/v4/process/processWait.js +1 -0
- package/dist/tools/v4/sessions/recallSession.js +1 -0
- package/dist/tools/v4/sessions/sessionList.js +1 -0
- package/dist/tools/v4/sessions/sessionSearch.js +1 -0
- package/dist/tools/v4/skills/lookupToolSchema.js +7 -0
- package/dist/tools/v4/skills/skillManage.js +13 -0
- package/dist/tools/v4/skills/skillView.js +1 -0
- package/dist/tools/v4/skills/skillsList.js +1 -0
- package/dist/tools/v4/subagent/spawnSubAgentTool.js +334 -0
- package/dist/tools/v4/subagent/subagentFanout.js +54 -1
- package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
- package/dist/tools/v4/system/appClose.js +13 -0
- package/dist/tools/v4/system/appInput.js +13 -0
- package/dist/tools/v4/system/appLaunch.js +13 -0
- package/dist/tools/v4/system/clipboardRead.js +1 -0
- package/dist/tools/v4/system/clipboardWrite.js +14 -0
- package/dist/tools/v4/system/mediaKey.js +12 -0
- package/dist/tools/v4/system/mediaSessions.js +1 -0
- package/dist/tools/v4/system/mediaTransport.js +13 -0
- package/dist/tools/v4/system/naturalEvents.js +1 -0
- package/dist/tools/v4/system/nowPlaying.js +1 -0
- package/dist/tools/v4/system/osProcessList.js +1 -0
- package/dist/tools/v4/system/screenshot.js +1 -0
- package/dist/tools/v4/system/systemInfo.js +1 -0
- package/dist/tools/v4/system/volumeSet.js +17 -0
- package/dist/tools/v4/terminal/shellExec.js +81 -9
- package/dist/tools/v4/web/deepResearch.js +1 -0
- package/dist/tools/v4/web/openUrl.js +1 -0
- package/dist/tools/v4/web/webFetch.js +1 -0
- package/dist/tools/v4/web/webPage.js +1 -0
- package/dist/tools/v4/web/webSearch.js +1 -0
- package/dist/tools/v4/web/youtubeSearch.js +1 -0
- package/package.json +13 -3
|
@@ -0,0 +1,779 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* core/v4/failureClassifier.ts — v4.2 Phase 2: Tool-failure classifier.
|
|
10
|
+
*
|
|
11
|
+
* When Phase 1's verifier classifies a tool result as `!ok` (`failed`,
|
|
12
|
+
* `low_signal`, or `no_progress`), this layer enriches the failure
|
|
13
|
+
* with a structured WHY category. Categories drive Phase 3+'s recovery
|
|
14
|
+
* strategies (retry / surface / install / etc.) — Phase 2 only RECORDS
|
|
15
|
+
* the classification on the trace and TurnState diagnostics; no
|
|
16
|
+
* recovery action fires here.
|
|
17
|
+
*
|
|
18
|
+
* Ten categories, matching the v4.2 spec:
|
|
19
|
+
*
|
|
20
|
+
* timeout — connection/read deadline exceeded
|
|
21
|
+
* auth — 401/403, invalid API key, unauthorized
|
|
22
|
+
* hallucination — model invented a nonexistent entity (narrow
|
|
23
|
+
* Phase 2 scope: file-not-found + verbatim
|
|
24
|
+
* path match in args, confidence 0.6)
|
|
25
|
+
* network — connection refused, DNS, unreachable
|
|
26
|
+
* permission — local ACL ("Access denied"), refusing-to-act
|
|
27
|
+
* rate_limit — 429, throttled, "try again in N"
|
|
28
|
+
* invalid_input — missing required args, "No path provided",
|
|
29
|
+
* "is required", "must be non-empty"
|
|
30
|
+
* dependency_missing — binary not in PATH, "command not found",
|
|
31
|
+
* "not configured", process registry missing
|
|
32
|
+
* not_found — file not found, ENOENT (read tools), no
|
|
33
|
+
* such directory
|
|
34
|
+
* other — catch-all (renamed from "unknown" to match
|
|
35
|
+
* the v4.2 spec exactly)
|
|
36
|
+
*
|
|
37
|
+
* Priority-ordered pipeline (mirrors a layered failure-pattern approach
|
|
38
|
+
* used by a reference system, adapted for Aiden's tool-output domain):
|
|
39
|
+
*
|
|
40
|
+
* 1. Per-tool override (registered by toolName) — runs first; can
|
|
41
|
+
* short-circuit when a tool has a high-signal failure shape.
|
|
42
|
+
* 2. Outer envelope + verifier reason inspection — substring scan
|
|
43
|
+
* against priority-ordered pattern tables.
|
|
44
|
+
* 3. Hallucination heuristic (narrow): file-tool not_found AND args
|
|
45
|
+
* contain the path verbatim → escalate not_found to hallucination.
|
|
46
|
+
* 4. Fallback: `other` at confidence 0.3.
|
|
47
|
+
*
|
|
48
|
+
* Skips entirely when `verification.ok === true` — saves cycles on
|
|
49
|
+
* successful calls.
|
|
50
|
+
*
|
|
51
|
+
* Gated by the same TCE flag as Phase 1 verifier + TurnState (default
|
|
52
|
+
* ON as of v4.2 Phase 6; opt-out via `AIDEN_TCE=0`). When disabled,
|
|
53
|
+
* the classifier is never invoked from the agent loop.
|
|
54
|
+
*/
|
|
55
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
56
|
+
exports.triggerDispatcherClassifier = exports.DAEMON_DISPATCHER_TOOL_NAME = exports.fileReadClassifierWithSandbox = exports.shellExecClassifierWithSandbox = exports.sandboxViolationClassifier = exports.browserNavigateClassifier = exports.browserInteractiveClassifier = exports.fileReadClassifier = exports.webFetchClassifier = exports.webSearchClassifier = exports.shellExecClassifier = exports.defaultClassifier = exports.FailureClassifier = void 0;
|
|
57
|
+
exports.buildDefaultClassifier = buildDefaultClassifier;
|
|
58
|
+
/**
|
|
59
|
+
* Per-tool override registry + fallback resolver. Symmetric with
|
|
60
|
+
* Phase 1's VerifierRegistry.
|
|
61
|
+
*/
|
|
62
|
+
class FailureClassifier {
|
|
63
|
+
constructor(fallback = exports.defaultClassifier) {
|
|
64
|
+
this.overrides = new Map();
|
|
65
|
+
this.fallback = fallback;
|
|
66
|
+
}
|
|
67
|
+
register(toolName, fn) {
|
|
68
|
+
this.overrides.set(toolName, fn);
|
|
69
|
+
}
|
|
70
|
+
resolve(toolName) {
|
|
71
|
+
return this.overrides.get(toolName) ?? this.fallback;
|
|
72
|
+
}
|
|
73
|
+
hasOverride(toolName) {
|
|
74
|
+
return this.overrides.has(toolName);
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Entry point used by the agent loop. Returns null for verifier-ok
|
|
78
|
+
* results — zero overhead in the happy path.
|
|
79
|
+
*/
|
|
80
|
+
classify(verification, toolName, args, result) {
|
|
81
|
+
if (verification.ok)
|
|
82
|
+
return null;
|
|
83
|
+
return this.resolve(toolName)(verification, toolName, args, result);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
exports.FailureClassifier = FailureClassifier;
|
|
87
|
+
// ── Pattern tables (priority-ordered) ──────────────────────────────────────
|
|
88
|
+
/** Timeout signals — checked first because they're high-confidence. */
|
|
89
|
+
const TIMEOUT_PATTERNS = [
|
|
90
|
+
'timeout', 'timed out', 'etimedout',
|
|
91
|
+
'deadline exceeded', 'deadline_exceeded',
|
|
92
|
+
'read timed out', 'connect timeout', 'connection timeout',
|
|
93
|
+
];
|
|
94
|
+
/** Rate-limit signals — distinct from auth/billing for now. */
|
|
95
|
+
const RATE_LIMIT_PATTERNS = [
|
|
96
|
+
'rate limit', 'rate_limit', 'rate-limit',
|
|
97
|
+
'too many requests', '429', 'throttled', 'throttling',
|
|
98
|
+
'try again in', 'please retry after',
|
|
99
|
+
'requests per minute', 'tokens per minute',
|
|
100
|
+
'quota exceeded',
|
|
101
|
+
];
|
|
102
|
+
/** Auth signals — provider credential failures. */
|
|
103
|
+
const AUTH_PATTERNS = [
|
|
104
|
+
'401', '403',
|
|
105
|
+
'unauthorized', 'unauthorised',
|
|
106
|
+
'invalid api key', 'invalid_api_key',
|
|
107
|
+
'authentication failed', 'authentication required',
|
|
108
|
+
'invalid token', 'token expired', 'token revoked',
|
|
109
|
+
'forbidden',
|
|
110
|
+
];
|
|
111
|
+
/** Network signals — pre-HTTP failures. */
|
|
112
|
+
const NETWORK_PATTERNS = [
|
|
113
|
+
'econnrefused', 'enetunreach', 'enotfound', 'eai_again',
|
|
114
|
+
'dns lookup', 'getaddrinfo',
|
|
115
|
+
'connection refused', 'network unreachable',
|
|
116
|
+
'host not found', 'no such host',
|
|
117
|
+
];
|
|
118
|
+
/** Permission signals — local ACL + refusing-to-act. */
|
|
119
|
+
const PERMISSION_PATTERNS = [
|
|
120
|
+
'eacces', 'eperm',
|
|
121
|
+
'access denied', 'permission denied',
|
|
122
|
+
'refusing to', 'protected path',
|
|
123
|
+
'forbidden path', 'restricted path',
|
|
124
|
+
];
|
|
125
|
+
/** Invalid-input signals — missing/malformed args. */
|
|
126
|
+
const INVALID_INPUT_PATTERNS = [
|
|
127
|
+
'no path provided', 'no query provided', 'no command provided',
|
|
128
|
+
'no url provided', 'no id provided', 'no topic provided',
|
|
129
|
+
'is required', 'are required',
|
|
130
|
+
'must be a string', 'must be non-empty',
|
|
131
|
+
'invalid argument', 'malformed',
|
|
132
|
+
'both from and to required', 'empty find string',
|
|
133
|
+
];
|
|
134
|
+
/** Dependency-missing signals — missing binary / unset env / unconfigured. */
|
|
135
|
+
const DEPENDENCY_MISSING_PATTERNS = [
|
|
136
|
+
'command not found',
|
|
137
|
+
'not in path', 'not on path',
|
|
138
|
+
'is not recognized as', // Windows shell wording
|
|
139
|
+
'no such command',
|
|
140
|
+
'not configured',
|
|
141
|
+
'is not configured',
|
|
142
|
+
'registry not configured',
|
|
143
|
+
'paths not wired',
|
|
144
|
+
'needs aiden',
|
|
145
|
+
];
|
|
146
|
+
/** Not-found signals (file/resource, distinct from dep-missing). */
|
|
147
|
+
const NOT_FOUND_PATTERNS = [
|
|
148
|
+
'enoent',
|
|
149
|
+
'no such file',
|
|
150
|
+
'no such directory',
|
|
151
|
+
'file not found',
|
|
152
|
+
'does not exist',
|
|
153
|
+
'not found', // general — checked after dep-missing so it doesn't shadow
|
|
154
|
+
];
|
|
155
|
+
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
156
|
+
/**
|
|
157
|
+
* Build a lowercased haystack from all error sources the classifier
|
|
158
|
+
* can inspect. Mirrors the reference's multi-source extraction so
|
|
159
|
+
* patterns embedded in different envelope shapes still match.
|
|
160
|
+
*/
|
|
161
|
+
function buildHaystack(verification, result) {
|
|
162
|
+
const parts = [];
|
|
163
|
+
if (verification.reason)
|
|
164
|
+
parts.push(verification.reason);
|
|
165
|
+
if (result.error)
|
|
166
|
+
parts.push(result.error);
|
|
167
|
+
const inner = result.result;
|
|
168
|
+
if (typeof inner === 'string') {
|
|
169
|
+
parts.push(inner.slice(0, 500));
|
|
170
|
+
}
|
|
171
|
+
else if (inner !== null && typeof inner === 'object') {
|
|
172
|
+
const obj = inner;
|
|
173
|
+
if (typeof obj.error === 'string')
|
|
174
|
+
parts.push(obj.error);
|
|
175
|
+
if (typeof obj.stderr === 'string')
|
|
176
|
+
parts.push(obj.stderr.slice(0, 500));
|
|
177
|
+
if (typeof obj.message === 'string')
|
|
178
|
+
parts.push(obj.message);
|
|
179
|
+
}
|
|
180
|
+
return parts.join(' ').toLowerCase();
|
|
181
|
+
}
|
|
182
|
+
/** First pattern in `list` that's contained in `haystack`, else undefined. */
|
|
183
|
+
function matchAny(haystack, list) {
|
|
184
|
+
for (const p of list) {
|
|
185
|
+
if (haystack.includes(p))
|
|
186
|
+
return p;
|
|
187
|
+
}
|
|
188
|
+
return undefined;
|
|
189
|
+
}
|
|
190
|
+
// ── Default classifier ─────────────────────────────────────────────────────
|
|
191
|
+
/**
|
|
192
|
+
* Heuristic default. Priority order:
|
|
193
|
+
* 1. timeout
|
|
194
|
+
* 2. rate_limit
|
|
195
|
+
* 3. auth
|
|
196
|
+
* 4. network
|
|
197
|
+
* 5. permission
|
|
198
|
+
* 6. invalid_input
|
|
199
|
+
* 7. dependency_missing (BEFORE not_found — "command not found" → dep, not not_found)
|
|
200
|
+
* 8. not_found
|
|
201
|
+
* 9. hallucination (narrow: only when not_found matched AND args contain path verbatim)
|
|
202
|
+
* 10. other (fallback)
|
|
203
|
+
*/
|
|
204
|
+
const defaultClassifier = (verification, toolName, args, result) => {
|
|
205
|
+
const hay = buildHaystack(verification, result);
|
|
206
|
+
// 1. timeout — high signal, often standalone
|
|
207
|
+
const tMatch = matchAny(hay, TIMEOUT_PATTERNS);
|
|
208
|
+
if (tMatch) {
|
|
209
|
+
return {
|
|
210
|
+
category: 'timeout',
|
|
211
|
+
confidence: 0.9,
|
|
212
|
+
reason: 'tool call exceeded its deadline',
|
|
213
|
+
recoverable: true,
|
|
214
|
+
recoveryHint: { action: 'retry_with_backoff' },
|
|
215
|
+
matchedPattern: tMatch,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
// 2. rate_limit
|
|
219
|
+
const rMatch = matchAny(hay, RATE_LIMIT_PATTERNS);
|
|
220
|
+
if (rMatch) {
|
|
221
|
+
return {
|
|
222
|
+
category: 'rate_limit',
|
|
223
|
+
confidence: 0.9,
|
|
224
|
+
reason: 'rate-limited by upstream',
|
|
225
|
+
recoverable: true,
|
|
226
|
+
recoveryHint: { action: 'retry_with_backoff' },
|
|
227
|
+
matchedPattern: rMatch,
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
// 3. auth
|
|
231
|
+
const aMatch = matchAny(hay, AUTH_PATTERNS);
|
|
232
|
+
if (aMatch) {
|
|
233
|
+
return {
|
|
234
|
+
category: 'auth',
|
|
235
|
+
confidence: 0.9,
|
|
236
|
+
reason: 'authentication failed or credential missing',
|
|
237
|
+
recoverable: false,
|
|
238
|
+
recoveryHint: { action: 'request_user_action', detail: 'check credentials' },
|
|
239
|
+
matchedPattern: aMatch,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
// 4. network
|
|
243
|
+
const nMatch = matchAny(hay, NETWORK_PATTERNS);
|
|
244
|
+
if (nMatch) {
|
|
245
|
+
return {
|
|
246
|
+
category: 'network',
|
|
247
|
+
confidence: 0.85,
|
|
248
|
+
reason: 'network unreachable or DNS failure',
|
|
249
|
+
recoverable: true,
|
|
250
|
+
recoveryHint: { action: 'retry_with_backoff' },
|
|
251
|
+
matchedPattern: nMatch,
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
// 5. permission
|
|
255
|
+
const pMatch = matchAny(hay, PERMISSION_PATTERNS);
|
|
256
|
+
if (pMatch) {
|
|
257
|
+
return {
|
|
258
|
+
category: 'permission',
|
|
259
|
+
confidence: 0.9,
|
|
260
|
+
reason: 'permission denied or refused by safety policy',
|
|
261
|
+
recoverable: false,
|
|
262
|
+
recoveryHint: { action: 'surface_to_user' },
|
|
263
|
+
matchedPattern: pMatch,
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
// 6. invalid_input
|
|
267
|
+
const iMatch = matchAny(hay, INVALID_INPUT_PATTERNS);
|
|
268
|
+
if (iMatch) {
|
|
269
|
+
return {
|
|
270
|
+
category: 'invalid_input',
|
|
271
|
+
confidence: 0.8,
|
|
272
|
+
reason: 'tool call arguments missing or malformed',
|
|
273
|
+
recoverable: true,
|
|
274
|
+
recoveryHint: { action: 'retry', detail: 'fix the arguments and try again' },
|
|
275
|
+
matchedPattern: iMatch,
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
// 7. dependency_missing (BEFORE not_found — "command not found" is more
|
|
279
|
+
// specific than a generic "not found")
|
|
280
|
+
const dMatch = matchAny(hay, DEPENDENCY_MISSING_PATTERNS);
|
|
281
|
+
if (dMatch) {
|
|
282
|
+
return {
|
|
283
|
+
category: 'dependency_missing',
|
|
284
|
+
confidence: 0.85,
|
|
285
|
+
reason: 'required binary or runtime resource is missing',
|
|
286
|
+
recoverable: false,
|
|
287
|
+
recoveryHint: { action: 'install_dependency' },
|
|
288
|
+
matchedPattern: dMatch,
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
// 8. not_found (and 9. hallucination promotion)
|
|
292
|
+
const nfMatch = matchAny(hay, NOT_FOUND_PATTERNS);
|
|
293
|
+
if (nfMatch) {
|
|
294
|
+
// 9. Hallucination heuristic — narrow per Q-C2(a): only file-read /
|
|
295
|
+
// file-list family AND the not-found path appears verbatim in
|
|
296
|
+
// args (model invented a path that doesn't exist).
|
|
297
|
+
if (isFileReadFamily(toolName) && argsContainPathVerbatim(args, hay)) {
|
|
298
|
+
return {
|
|
299
|
+
category: 'hallucination',
|
|
300
|
+
confidence: 0.6,
|
|
301
|
+
reason: 'tool called with a path that does not exist on disk',
|
|
302
|
+
recoverable: true,
|
|
303
|
+
recoveryHint: {
|
|
304
|
+
action: 'retry',
|
|
305
|
+
detail: 'the path the model used does not exist — re-check before retrying',
|
|
306
|
+
},
|
|
307
|
+
matchedPattern: nfMatch,
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
return {
|
|
311
|
+
category: 'not_found',
|
|
312
|
+
confidence: 0.85,
|
|
313
|
+
reason: 'target resource was not found',
|
|
314
|
+
recoverable: true,
|
|
315
|
+
recoveryHint: { action: 'retry', detail: 'check the path/name and try again' },
|
|
316
|
+
matchedPattern: nfMatch,
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
// 10. fallback
|
|
320
|
+
return {
|
|
321
|
+
category: 'other',
|
|
322
|
+
confidence: 0.3,
|
|
323
|
+
reason: 'unclassified failure',
|
|
324
|
+
recoverable: true,
|
|
325
|
+
recoveryHint: { action: 'retry_with_backoff' },
|
|
326
|
+
};
|
|
327
|
+
};
|
|
328
|
+
exports.defaultClassifier = defaultClassifier;
|
|
329
|
+
// ── Hallucination heuristic helpers ─────────────────────────────────────────
|
|
330
|
+
/** File-read family — tools where a missing path strongly suggests hallucination. */
|
|
331
|
+
function isFileReadFamily(toolName) {
|
|
332
|
+
return (toolName === 'file_read' ||
|
|
333
|
+
toolName === 'file_list' ||
|
|
334
|
+
toolName === 'file_patch');
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* Heuristic: does `args` contain a non-trivial path value that's
|
|
338
|
+
* mentioned in the haystack (i.e. the failed-path string the model
|
|
339
|
+
* just used)? Length filter avoids matching common short tokens.
|
|
340
|
+
*/
|
|
341
|
+
function argsContainPathVerbatim(args, hay) {
|
|
342
|
+
if (args === null || typeof args !== 'object')
|
|
343
|
+
return false;
|
|
344
|
+
const obj = args;
|
|
345
|
+
const candidates = [];
|
|
346
|
+
for (const k of ['path', 'file', 'from', 'to', 'target']) {
|
|
347
|
+
const v = obj[k];
|
|
348
|
+
if (typeof v === 'string' && v.length >= 4)
|
|
349
|
+
candidates.push(v.toLowerCase());
|
|
350
|
+
}
|
|
351
|
+
for (const c of candidates) {
|
|
352
|
+
if (hay.includes(c))
|
|
353
|
+
return true;
|
|
354
|
+
}
|
|
355
|
+
return false;
|
|
356
|
+
}
|
|
357
|
+
// ── Per-tool classifiers ───────────────────────────────────────────────────
|
|
358
|
+
/**
|
|
359
|
+
* `shell_exec` — inspect exitCode + stderr for canonical UNIX
|
|
360
|
+
* convention codes:
|
|
361
|
+
* - 124 = timeout (GNU coreutils `timeout` command)
|
|
362
|
+
* - 126 = permission (executable but cannot be invoked)
|
|
363
|
+
* - 127 = dependency_missing (command not found)
|
|
364
|
+
* - 130 = SIGINT (treat as recoverable other)
|
|
365
|
+
*/
|
|
366
|
+
const shellExecClassifier = (verification, toolName, args, result) => {
|
|
367
|
+
const inner = result.result;
|
|
368
|
+
const exitCode = (inner && typeof inner.exitCode === 'number') ? inner.exitCode : undefined;
|
|
369
|
+
const stderr = (inner && typeof inner.stderr === 'string') ? inner.stderr : '';
|
|
370
|
+
// Canonical UNIX exit codes — high confidence when the code is set.
|
|
371
|
+
if (exitCode === 124) {
|
|
372
|
+
return {
|
|
373
|
+
category: 'timeout', confidence: 0.95,
|
|
374
|
+
reason: 'shell command timed out (exit 124)',
|
|
375
|
+
recoverable: true,
|
|
376
|
+
recoveryHint: { action: 'retry_with_backoff', detail: 'consider raising timeoutMs' },
|
|
377
|
+
matchedPattern: 'exit 124',
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
if (exitCode === 126) {
|
|
381
|
+
return {
|
|
382
|
+
category: 'permission', confidence: 0.95,
|
|
383
|
+
reason: 'shell command cannot be invoked (exit 126)',
|
|
384
|
+
recoverable: false,
|
|
385
|
+
recoveryHint: { action: 'surface_to_user' },
|
|
386
|
+
matchedPattern: 'exit 126',
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
if (exitCode === 127) {
|
|
390
|
+
return {
|
|
391
|
+
category: 'dependency_missing', confidence: 0.95,
|
|
392
|
+
reason: 'shell command not found (exit 127)',
|
|
393
|
+
recoverable: false,
|
|
394
|
+
recoveryHint: { action: 'install_dependency' },
|
|
395
|
+
matchedPattern: 'exit 127',
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
// Stderr-aware fallback — same priority pipeline as default but
|
|
399
|
+
// weighted toward stderr substrings rather than verification.reason.
|
|
400
|
+
return (0, exports.defaultClassifier)(verification, toolName, args, result);
|
|
401
|
+
};
|
|
402
|
+
exports.shellExecClassifier = shellExecClassifier;
|
|
403
|
+
/**
|
|
404
|
+
* `web_search` — same default pipeline but with stronger network
|
|
405
|
+
* priority since search failures cascade from upstream HTTP.
|
|
406
|
+
*/
|
|
407
|
+
const webSearchClassifier = (verification, toolName, args, result) => {
|
|
408
|
+
return (0, exports.defaultClassifier)(verification, toolName, args, result);
|
|
409
|
+
};
|
|
410
|
+
exports.webSearchClassifier = webSearchClassifier;
|
|
411
|
+
/** `web_fetch` (+ aliases) — same defaults; behaviour symmetric with web_search. */
|
|
412
|
+
const webFetchClassifier = (verification, toolName, args, result) => {
|
|
413
|
+
return (0, exports.defaultClassifier)(verification, toolName, args, result);
|
|
414
|
+
};
|
|
415
|
+
exports.webFetchClassifier = webFetchClassifier;
|
|
416
|
+
/**
|
|
417
|
+
* `file_read` — verifier already filtered the easy cases; this
|
|
418
|
+
* override exists to RUN the hallucination heuristic on read-family
|
|
419
|
+
* failures with stronger weighting. Default classifier already
|
|
420
|
+
* implements that path; this wrapper exists so callers can intercept
|
|
421
|
+
* file_read classification specifically (e.g. plugin extensions).
|
|
422
|
+
*/
|
|
423
|
+
const fileReadClassifier = (verification, toolName, args, result) => {
|
|
424
|
+
return (0, exports.defaultClassifier)(verification, toolName, args, result);
|
|
425
|
+
};
|
|
426
|
+
exports.fileReadClassifier = fileReadClassifier;
|
|
427
|
+
/** Extract the v4.3 sidecar from a tool result, defensively. */
|
|
428
|
+
function readBrowserStateSidecar(result) {
|
|
429
|
+
if (!result.result || typeof result.result !== 'object')
|
|
430
|
+
return null;
|
|
431
|
+
const r = result.result;
|
|
432
|
+
return r.browserState ?? null;
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* Classifier for the 3 interactive browser tools (browser_click,
|
|
436
|
+
* browser_type, browser_fill). Priority:
|
|
437
|
+
*
|
|
438
|
+
* 1. blocker present → manual_blocker (conf 0.95)
|
|
439
|
+
* 2. staleRefRetry failed → stale_ref (conf 0.9)
|
|
440
|
+
* 3. needs_verifier + low progress → stale_ref (conf 0.75)
|
|
441
|
+
* 4. fall through to defaultClassifier for generic patterns
|
|
442
|
+
*
|
|
443
|
+
* `manual_blocker` beats `stale_ref` because no retry can fix a
|
|
444
|
+
* login wall — the user has to act. Phase 6+ will surface this via
|
|
445
|
+
* the recovery card already wired by Phase 3.
|
|
446
|
+
*/
|
|
447
|
+
const browserInteractiveClassifier = (verification, toolName, args, result) => {
|
|
448
|
+
const bs = readBrowserStateSidecar(result);
|
|
449
|
+
// Priority 1 — manual blocker.
|
|
450
|
+
if (bs?.blocker) {
|
|
451
|
+
return {
|
|
452
|
+
category: 'manual_blocker',
|
|
453
|
+
confidence: 0.95,
|
|
454
|
+
reason: `${bs.blocker.kind}${bs.blocker.subtype ? ` (${bs.blocker.subtype})` : ''} at ${bs.blocker.url}`,
|
|
455
|
+
recoverable: false,
|
|
456
|
+
recoveryHint: { action: 'request_user_action', detail: bs.blocker.message },
|
|
457
|
+
matchedPattern: `browserState.blocker.${bs.blocker.kind}`,
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
// Priority 2 — Phase 2 already retried and the retry failed.
|
|
461
|
+
if (bs?.staleRefRetry?.attempted && !bs.staleRefRetry.succeeded) {
|
|
462
|
+
return {
|
|
463
|
+
category: 'stale_ref',
|
|
464
|
+
confidence: 0.9,
|
|
465
|
+
reason: `stale ref after auto-retry: ${bs.staleRefRetry.reason}`,
|
|
466
|
+
recoverable: true,
|
|
467
|
+
recoveryHint: {
|
|
468
|
+
action: 'retry',
|
|
469
|
+
detail: 'wait for page to settle, then re-select the element',
|
|
470
|
+
},
|
|
471
|
+
matchedPattern: 'browserState.staleRefRetry.failed',
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
// Priority 3 — Phase 1 verifier flagged "no UI change despite success".
|
|
475
|
+
// Surface as stale_ref because the recovery shape is the same: the
|
|
476
|
+
// page didn't respond, model should re-read state before trying again.
|
|
477
|
+
if (bs && (bs.maybe_noop || (bs.needs_verifier && bs.progress_score < 0.3))) {
|
|
478
|
+
return {
|
|
479
|
+
category: 'stale_ref',
|
|
480
|
+
confidence: 0.75,
|
|
481
|
+
reason: `tool returned success but page did not change (progress_score=${bs.progress_score.toFixed(2)})`,
|
|
482
|
+
recoverable: true,
|
|
483
|
+
recoveryHint: {
|
|
484
|
+
action: 'retry',
|
|
485
|
+
detail: 'verify the page state then try a different approach',
|
|
486
|
+
},
|
|
487
|
+
matchedPattern: 'browserState.no_progress',
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
// Fall through to default for generic patterns.
|
|
491
|
+
return (0, exports.defaultClassifier)(verification, toolName, args, result);
|
|
492
|
+
};
|
|
493
|
+
exports.browserInteractiveClassifier = browserInteractiveClassifier;
|
|
494
|
+
/**
|
|
495
|
+
* Classifier for `browser_navigate`. Only checks for `blocker` —
|
|
496
|
+
* Phase 2's stale-ref retry doesn't fire on navigate (excluded from
|
|
497
|
+
* STALE_REF_RETRYABLE), so the stale_ref path is irrelevant here.
|
|
498
|
+
*/
|
|
499
|
+
const browserNavigateClassifier = (verification, toolName, args, result) => {
|
|
500
|
+
const bs = readBrowserStateSidecar(result);
|
|
501
|
+
if (bs?.blocker) {
|
|
502
|
+
return {
|
|
503
|
+
category: 'manual_blocker',
|
|
504
|
+
confidence: 0.95,
|
|
505
|
+
reason: `${bs.blocker.kind}${bs.blocker.subtype ? ` (${bs.blocker.subtype})` : ''} at ${bs.blocker.url}`,
|
|
506
|
+
recoverable: false,
|
|
507
|
+
recoveryHint: { action: 'request_user_action', detail: bs.blocker.message },
|
|
508
|
+
matchedPattern: `browserState.blocker.${bs.blocker.kind}`,
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
return (0, exports.defaultClassifier)(verification, toolName, args, result);
|
|
512
|
+
};
|
|
513
|
+
exports.browserNavigateClassifier = browserNavigateClassifier;
|
|
514
|
+
/**
|
|
515
|
+
* Read the `sandbox_violation` envelope from a tool result. Returns
|
|
516
|
+
* null when absent or malformed.
|
|
517
|
+
*
|
|
518
|
+
* Phase 2 file tools attach this on `result.result.sandbox_violation`
|
|
519
|
+
* alongside `success: false`. Phase 3's shell-exec docker-start
|
|
520
|
+
* failure surfaces a different shape — handled separately below.
|
|
521
|
+
*/
|
|
522
|
+
function readSandboxViolation(result) {
|
|
523
|
+
const inner = result.result;
|
|
524
|
+
if (!inner || typeof inner !== 'object' || Array.isArray(inner))
|
|
525
|
+
return null;
|
|
526
|
+
const env = inner.sandbox_violation;
|
|
527
|
+
if (!env || typeof env !== 'object' || Array.isArray(env))
|
|
528
|
+
return null;
|
|
529
|
+
const e = env;
|
|
530
|
+
if (e.category !== 'sandbox_violation')
|
|
531
|
+
return null;
|
|
532
|
+
if (typeof e.code !== 'string')
|
|
533
|
+
return null;
|
|
534
|
+
return {
|
|
535
|
+
code: e.code,
|
|
536
|
+
matched_policy: typeof e.matched_policy === 'string' ? e.matched_policy : '',
|
|
537
|
+
requested_path: typeof e.requested_path === 'string' ? e.requested_path : '',
|
|
538
|
+
resolved_path: typeof e.resolved_path === 'string' ? e.resolved_path : '',
|
|
539
|
+
retryable: false,
|
|
540
|
+
category: 'sandbox_violation',
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
/**
|
|
544
|
+
* Produce a concrete, user-actionable override suggestion from a
|
|
545
|
+
* violation envelope. Code-specific because the action differs: a
|
|
546
|
+
* write-outside-allowlist needs an `AIDEN_SANDBOX_ALLOW=...` extension,
|
|
547
|
+
* a denylist hit cannot be overridden, a symlink-escape needs the
|
|
548
|
+
* real path used directly.
|
|
549
|
+
*/
|
|
550
|
+
function suggestOverride(env) {
|
|
551
|
+
switch (env.code) {
|
|
552
|
+
case 'fs.write_outside_allowlist': {
|
|
553
|
+
// The requested path is the agent-supplied string; the resolved
|
|
554
|
+
// path is the real path. Suggest the directory that contains it
|
|
555
|
+
// — most likely what the user wants to allowlist.
|
|
556
|
+
const target = env.resolved_path || env.requested_path;
|
|
557
|
+
const lastSep = target.lastIndexOf('/') >= 0
|
|
558
|
+
? target.lastIndexOf('/')
|
|
559
|
+
: target.lastIndexOf('\\');
|
|
560
|
+
const dir = lastSep > 0 ? target.slice(0, lastSep) : target;
|
|
561
|
+
return dir
|
|
562
|
+
? `Add to allowlist: AIDEN_SANDBOX_ALLOW=${dir}`
|
|
563
|
+
: 'Add the target directory to AIDEN_SANDBOX_ALLOW';
|
|
564
|
+
}
|
|
565
|
+
case 'fs.sensitive_path':
|
|
566
|
+
return `Sandbox refuses ${env.matched_policy || env.resolved_path} for safety. ` +
|
|
567
|
+
'This path is on the denylist and cannot be allowlisted. ' +
|
|
568
|
+
'Use a different path, or set AIDEN_SANDBOX=0 to disable the sandbox entirely (not recommended).';
|
|
569
|
+
case 'fs.symlink_escape':
|
|
570
|
+
return 'The path contains a symlink that resolves outside the sandbox. ' +
|
|
571
|
+
'Use the real path directly, or extend AIDEN_SANDBOX_ALLOW to cover the symlink target.';
|
|
572
|
+
case 'fs.path_traversal':
|
|
573
|
+
return 'Path contains `..` segments that escape the working directory. Use an absolute path.';
|
|
574
|
+
case 'fs.read_denied':
|
|
575
|
+
return `Sandbox refuses read of ${env.matched_policy || env.resolved_path}.`;
|
|
576
|
+
default:
|
|
577
|
+
return env.matched_policy
|
|
578
|
+
? `Sandbox blocked by policy: ${env.matched_policy}`
|
|
579
|
+
: 'Sandbox blocked this operation.';
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
/**
|
|
583
|
+
* Unified classifier for tools that go through the v4.4 sandbox
|
|
584
|
+
* preflight: 5 write-side file tools + 2 read-side file tools +
|
|
585
|
+
* shell_exec. Detects:
|
|
586
|
+
* 1. The Phase 2 `sandbox_violation` envelope (file tools, all
|
|
587
|
+
* five fs.* codes)
|
|
588
|
+
* 2. Phase 3 docker-start failure surfaced via shell_exec's
|
|
589
|
+
* "Sandbox: failed to start container" stderr — categorized as
|
|
590
|
+
* sandbox_violation with the install_dependency recovery hint
|
|
591
|
+
*
|
|
592
|
+
* Sandbox refusals are NEVER retryable (retryable:false) — the
|
|
593
|
+
* policy will reject the same input next call. The agent should
|
|
594
|
+
* surface the suggested env-var override to the user instead.
|
|
595
|
+
*
|
|
596
|
+
* Falls through to `defaultClassifier` (or a wrapped per-tool
|
|
597
|
+
* classifier) when no sandbox envelope is present — keeping the
|
|
598
|
+
* non-sandboxed path zero-cost.
|
|
599
|
+
*/
|
|
600
|
+
const sandboxViolationClassifier = (verification, toolName, args, result) => {
|
|
601
|
+
const env = readSandboxViolation(result);
|
|
602
|
+
if (env) {
|
|
603
|
+
return {
|
|
604
|
+
category: 'sandbox_violation',
|
|
605
|
+
confidence: 0.95,
|
|
606
|
+
reason: `${env.code}${env.matched_policy ? ` matched ${env.matched_policy}` : ''}`,
|
|
607
|
+
recoverable: false,
|
|
608
|
+
recoveryHint: {
|
|
609
|
+
action: 'request_user_action',
|
|
610
|
+
detail: suggestOverride(env),
|
|
611
|
+
},
|
|
612
|
+
matchedPattern: env.code,
|
|
613
|
+
sandboxViolation: {
|
|
614
|
+
code: env.code,
|
|
615
|
+
matchedPolicy: env.matched_policy,
|
|
616
|
+
requestedPath: env.requested_path,
|
|
617
|
+
resolvedPath: env.resolved_path,
|
|
618
|
+
},
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
// Phase 3 docker-start failure path (shell_exec only).
|
|
622
|
+
if (toolName === 'shell_exec') {
|
|
623
|
+
const r = result.result;
|
|
624
|
+
if (r && typeof r === 'object' && !Array.isArray(r)) {
|
|
625
|
+
const stderr = r.stderr;
|
|
626
|
+
if (typeof stderr === 'string' && /Sandbox: failed to start container/.test(stderr)) {
|
|
627
|
+
return {
|
|
628
|
+
category: 'sandbox_violation',
|
|
629
|
+
confidence: 0.9,
|
|
630
|
+
reason: 'docker container failed to start',
|
|
631
|
+
recoverable: true,
|
|
632
|
+
recoveryHint: {
|
|
633
|
+
action: 'install_dependency',
|
|
634
|
+
detail: 'Start Docker and retry, or set AIDEN_SANDBOX=0 to disable the sandbox.',
|
|
635
|
+
},
|
|
636
|
+
matchedPattern: 'docker_unavailable',
|
|
637
|
+
};
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
return (0, exports.defaultClassifier)(verification, toolName, args, result);
|
|
642
|
+
};
|
|
643
|
+
exports.sandboxViolationClassifier = sandboxViolationClassifier;
|
|
644
|
+
/**
|
|
645
|
+
* Wraps `shellExecClassifier` so the sandbox envelope check fires
|
|
646
|
+
* BEFORE the existing dangerous-pattern / exit-code logic. Sandbox
|
|
647
|
+
* refusal beats every other shell-exec failure mode — the policy
|
|
648
|
+
* was the proximate cause and the actionable fix.
|
|
649
|
+
*/
|
|
650
|
+
const shellExecClassifierWithSandbox = (verification, toolName, args, result) => {
|
|
651
|
+
const sb = (0, exports.sandboxViolationClassifier)(verification, toolName, args, result);
|
|
652
|
+
if (sb.category === 'sandbox_violation')
|
|
653
|
+
return sb;
|
|
654
|
+
return (0, exports.shellExecClassifier)(verification, toolName, args, result);
|
|
655
|
+
};
|
|
656
|
+
exports.shellExecClassifierWithSandbox = shellExecClassifierWithSandbox;
|
|
657
|
+
/**
|
|
658
|
+
* Wraps `fileReadClassifier` so denylist hits on read are categorized
|
|
659
|
+
* as sandbox_violation instead of the generic "not_found / permission"
|
|
660
|
+
* default. file_read is the only read-side tool with an existing
|
|
661
|
+
* override; file_list falls through to the unified classifier.
|
|
662
|
+
*/
|
|
663
|
+
const fileReadClassifierWithSandbox = (verification, toolName, args, result) => {
|
|
664
|
+
const sb = (0, exports.sandboxViolationClassifier)(verification, toolName, args, result);
|
|
665
|
+
if (sb.category === 'sandbox_violation')
|
|
666
|
+
return sb;
|
|
667
|
+
return (0, exports.fileReadClassifier)(verification, toolName, args, result);
|
|
668
|
+
};
|
|
669
|
+
exports.fileReadClassifierWithSandbox = fileReadClassifierWithSandbox;
|
|
670
|
+
// ── v4.5 Phase 5a — trigger-dispatcher classifier ──────────────────────────
|
|
671
|
+
/**
|
|
672
|
+
* Synthetic "tool name" the daemon dispatcher uses when a turn
|
|
673
|
+
* failed for a daemon-trigger-specific reason (template missing
|
|
674
|
+
* vars, fire-rate cap exceeded, max retries exhausted). The
|
|
675
|
+
* dispatcher constructs a `ToolCallResult` envelope with this
|
|
676
|
+
* name + a reason string in `error`; the classifier inspects the
|
|
677
|
+
* substring tag to pick the right category.
|
|
678
|
+
*
|
|
679
|
+
* Tags are emitted by `core/v4/daemon/dispatcher/dispatcher.ts`
|
|
680
|
+
* and `triggerBus.markFailed` / `deadLetter` flows.
|
|
681
|
+
*/
|
|
682
|
+
exports.DAEMON_DISPATCHER_TOOL_NAME = 'daemon:dispatcher';
|
|
683
|
+
const TRIGGER_MISCONFIGURED_TAG = 'trigger_misconfigured';
|
|
684
|
+
const TRIGGER_QUOTA_TAG = 'trigger_quota';
|
|
685
|
+
const TRIGGER_DEAD_LETTERED_TAG = 'trigger_dead_lettered';
|
|
686
|
+
/**
|
|
687
|
+
* Classifier for the synthetic `daemon:dispatcher` tool. Reads the
|
|
688
|
+
* envelope's `error` / verification reason for one of the three
|
|
689
|
+
* trigger-failure tags and returns the matching category. Falls
|
|
690
|
+
* through to `defaultClassifier` if no tag matched (defensive —
|
|
691
|
+
* the dispatcher always sets one).
|
|
692
|
+
*
|
|
693
|
+
* Recovery hints:
|
|
694
|
+
* - trigger_misconfigured → request_user_action (fix spec)
|
|
695
|
+
* - trigger_quota → request_user_action (raise cap or fix producer)
|
|
696
|
+
* - trigger_dead_lettered → request_user_action (inspect last_error, reset event)
|
|
697
|
+
*/
|
|
698
|
+
const triggerDispatcherClassifier = (verification, toolName, args, result) => {
|
|
699
|
+
const hay = ((verification.reason ?? '') + ' ' +
|
|
700
|
+
(result.error ?? '') + ' ' +
|
|
701
|
+
(typeof result.result === 'string' ? result.result : '')).toLowerCase();
|
|
702
|
+
if (hay.includes(TRIGGER_MISCONFIGURED_TAG)) {
|
|
703
|
+
return {
|
|
704
|
+
category: 'trigger_misconfigured',
|
|
705
|
+
confidence: 0.95,
|
|
706
|
+
reason: 'trigger spec invalid or template variables missing',
|
|
707
|
+
recoverable: false,
|
|
708
|
+
recoveryHint: {
|
|
709
|
+
action: 'request_user_action',
|
|
710
|
+
detail: 'inspect the trigger spec and ensure all template variables are populated by the payload',
|
|
711
|
+
},
|
|
712
|
+
matchedPattern: TRIGGER_MISCONFIGURED_TAG,
|
|
713
|
+
};
|
|
714
|
+
}
|
|
715
|
+
if (hay.includes(TRIGGER_QUOTA_TAG)) {
|
|
716
|
+
return {
|
|
717
|
+
category: 'trigger_quota',
|
|
718
|
+
confidence: 0.95,
|
|
719
|
+
reason: 'per-trigger fire-rate cap exceeded',
|
|
720
|
+
recoverable: false,
|
|
721
|
+
recoveryHint: {
|
|
722
|
+
action: 'request_user_action',
|
|
723
|
+
detail: 'investigate the upstream producer or raise the trigger\'s fire-rate limit',
|
|
724
|
+
},
|
|
725
|
+
matchedPattern: TRIGGER_QUOTA_TAG,
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
if (hay.includes(TRIGGER_DEAD_LETTERED_TAG)) {
|
|
729
|
+
return {
|
|
730
|
+
category: 'trigger_dead_lettered',
|
|
731
|
+
confidence: 0.95,
|
|
732
|
+
reason: 'trigger event exhausted max retries and moved to dead letter',
|
|
733
|
+
recoverable: false,
|
|
734
|
+
recoveryHint: {
|
|
735
|
+
action: 'request_user_action',
|
|
736
|
+
detail: 'review the last_error on the dead-lettered trigger event and re-queue if appropriate',
|
|
737
|
+
},
|
|
738
|
+
matchedPattern: TRIGGER_DEAD_LETTERED_TAG,
|
|
739
|
+
};
|
|
740
|
+
}
|
|
741
|
+
return (0, exports.defaultClassifier)(verification, toolName, args, result);
|
|
742
|
+
};
|
|
743
|
+
exports.triggerDispatcherClassifier = triggerDispatcherClassifier;
|
|
744
|
+
// ── Factory ────────────────────────────────────────────────────────────────
|
|
745
|
+
function buildDefaultClassifier() {
|
|
746
|
+
const reg = new FailureClassifier();
|
|
747
|
+
// v4.4 Phase 5 — sandbox envelope check fires first for shell_exec
|
|
748
|
+
// + the file tools. Falls through to the existing per-tool
|
|
749
|
+
// classifier when no envelope is present (AIDEN_SANDBOX=0 → no
|
|
750
|
+
// envelopes → zero-cost passthrough).
|
|
751
|
+
reg.register('shell_exec', exports.shellExecClassifierWithSandbox);
|
|
752
|
+
reg.register('web_search', exports.webSearchClassifier);
|
|
753
|
+
reg.register('web_fetch', exports.webFetchClassifier);
|
|
754
|
+
reg.register('fetch_page', exports.webFetchClassifier);
|
|
755
|
+
reg.register('web_page', exports.webFetchClassifier);
|
|
756
|
+
reg.register('file_read', exports.fileReadClassifierWithSandbox);
|
|
757
|
+
// v4.4 Phase 5 — sandbox-aware classifiers for the file tools that
|
|
758
|
+
// didn't previously have overrides. Use the unified classifier
|
|
759
|
+
// directly since none of them have prior bespoke logic.
|
|
760
|
+
reg.register('file_list', exports.sandboxViolationClassifier);
|
|
761
|
+
reg.register('file_write', exports.sandboxViolationClassifier);
|
|
762
|
+
reg.register('file_patch', exports.sandboxViolationClassifier);
|
|
763
|
+
reg.register('file_copy', exports.sandboxViolationClassifier);
|
|
764
|
+
reg.register('file_move', exports.sandboxViolationClassifier);
|
|
765
|
+
reg.register('file_delete', exports.sandboxViolationClassifier);
|
|
766
|
+
// v4.3 Phase 5 — browser-tool overrides that read the
|
|
767
|
+
// BrowserState sidecars (staleRefRetry from Phase 2, blocker from
|
|
768
|
+
// Phase 3). Fall through to defaultClassifier when sidecars are
|
|
769
|
+
// absent (browser depth opt'd out via AIDEN_BROWSER_DEPTH=0 →
|
|
770
|
+
// no sidecar → generic patterns).
|
|
771
|
+
reg.register('browser_click', exports.browserInteractiveClassifier);
|
|
772
|
+
reg.register('browser_type', exports.browserInteractiveClassifier);
|
|
773
|
+
reg.register('browser_fill', exports.browserInteractiveClassifier);
|
|
774
|
+
reg.register('browser_navigate', exports.browserNavigateClassifier);
|
|
775
|
+
// v4.5 Phase 5a — daemon dispatcher synthetic "tool" routes the
|
|
776
|
+
// three trigger-specific failure categories.
|
|
777
|
+
reg.register(exports.DAEMON_DISPATCHER_TOOL_NAME, exports.triggerDispatcherClassifier);
|
|
778
|
+
return reg;
|
|
779
|
+
}
|