aiden-runtime 4.1.5 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +265 -847
- package/dist/api/server.js +32 -5
- package/dist/cli/v4/aidenCLI.js +536 -152
- package/dist/cli/v4/callbacks.js +170 -0
- package/dist/cli/v4/chatSession.js +245 -3
- package/dist/cli/v4/commands/_runtimeToggleHelpers.js +94 -0
- package/dist/cli/v4/commands/browserDepth.js +45 -0
- package/dist/cli/v4/commands/cron.js +264 -0
- package/dist/cli/v4/commands/daemon.js +541 -0
- package/dist/cli/v4/commands/daemonStatus.js +253 -0
- package/dist/cli/v4/commands/fanout.js +42 -59
- package/dist/cli/v4/commands/help.js +13 -0
- package/dist/cli/v4/commands/index.js +35 -1
- package/dist/cli/v4/commands/mcp.js +80 -54
- package/dist/cli/v4/commands/plannerGuard.js +53 -0
- package/dist/cli/v4/commands/recovery.js +122 -0
- package/dist/cli/v4/commands/runs.js +223 -0
- package/dist/cli/v4/commands/sandbox.js +48 -0
- package/dist/cli/v4/commands/spawnPause.js +93 -0
- package/dist/cli/v4/commands/suggestions.js +68 -0
- package/dist/cli/v4/commands/tce.js +41 -0
- package/dist/cli/v4/commands/trigger.js +378 -0
- package/dist/cli/v4/commands/update.js +95 -3
- package/dist/cli/v4/daemonAgentBuilder.js +145 -0
- package/dist/cli/v4/defaultSoul.js +1 -1
- package/dist/cli/v4/display/capabilityCard.js +26 -0
- package/dist/cli/v4/display.js +18 -8
- package/dist/cli/v4/replyRenderer.js +31 -23
- package/dist/cli/v4/updateBootPrompt.js +170 -0
- package/dist/core/playwrightBridge.js +129 -0
- package/dist/core/v4/aidenAgent.js +527 -5
- package/dist/core/v4/browserState.js +436 -0
- package/dist/core/v4/checkpoint.js +79 -0
- package/dist/core/v4/daemon/bootstrap.js +651 -0
- package/dist/core/v4/daemon/cleanShutdown.js +154 -0
- package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
- package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
- package/dist/core/v4/daemon/cron/migration.js +199 -0
- package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
- package/dist/core/v4/daemon/daemonConfig.js +90 -0
- package/dist/core/v4/daemon/db/connection.js +106 -0
- package/dist/core/v4/daemon/db/migrations.js +362 -0
- package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
- package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
- package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
- package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
- package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
- package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
- package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
- package/dist/core/v4/daemon/dispatcher/index.js +53 -0
- package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
- package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
- package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
- package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
- package/dist/core/v4/daemon/drain.js +156 -0
- package/dist/core/v4/daemon/eventLoopLag.js +73 -0
- package/dist/core/v4/daemon/health.js +159 -0
- package/dist/core/v4/daemon/idempotencyStore.js +204 -0
- package/dist/core/v4/daemon/index.js +179 -0
- package/dist/core/v4/daemon/instanceTracker.js +99 -0
- package/dist/core/v4/daemon/resourceRegistry.js +150 -0
- package/dist/core/v4/daemon/restartCode.js +32 -0
- package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
- package/dist/core/v4/daemon/runStore.js +144 -0
- package/dist/core/v4/daemon/runtimeLock.js +167 -0
- package/dist/core/v4/daemon/signals.js +50 -0
- package/dist/core/v4/daemon/supervisor.js +272 -0
- package/dist/core/v4/daemon/triggerBus.js +279 -0
- package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
- package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
- package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
- package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
- package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
- package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
- package/dist/core/v4/daemon/triggers/email/index.js +332 -0
- package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
- package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
- package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
- package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
- package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
- package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
- package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
- package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
- package/dist/core/v4/daemon/triggers/webhook.js +376 -0
- package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
- package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
- package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
- package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
- package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
- package/dist/core/v4/daemon/types.js +15 -0
- package/dist/core/v4/dockerSession.js +461 -0
- package/dist/core/v4/dryRun.js +117 -0
- package/dist/core/v4/failureClassifier.js +779 -0
- package/dist/core/v4/providerFallback.js +35 -2
- package/dist/core/v4/recoveryReport.js +449 -0
- package/dist/core/v4/runtimeToggles.js +214 -0
- package/dist/core/v4/sandboxConfig.js +285 -0
- package/dist/core/v4/sandboxFs.js +316 -0
- package/dist/core/v4/selfimprovement/recoveryStore.js +307 -0
- package/dist/core/v4/selfimprovement/signatureBuilder.js +158 -0
- package/dist/core/v4/subagent/childBuilder.js +391 -0
- package/dist/core/v4/subagent/fanout.js +75 -51
- package/dist/core/v4/subagent/spawnPause.js +191 -0
- package/dist/core/v4/subagent/spawnSubAgent.js +310 -0
- package/dist/core/v4/suggestionCatalog.js +41 -0
- package/dist/core/v4/suggestionEngine.js +210 -0
- package/dist/core/v4/toolRegistry.js +37 -3
- package/dist/core/v4/turnState.js +587 -0
- package/dist/core/v4/update/checkUpdate.js +63 -3
- package/dist/core/v4/update/installMethodDetect.js +115 -0
- package/dist/core/v4/update/registryClient.js +121 -0
- package/dist/core/v4/update/skipState.js +75 -0
- package/dist/core/v4/verifier.js +448 -0
- package/dist/core/version.js +1 -1
- package/dist/moat/plannerGuard.js +29 -0
- package/dist/providers/v4/anthropicAdapter.js +31 -3
- package/dist/providers/v4/chatCompletionsAdapter.js +26 -3
- package/dist/providers/v4/codexResponsesAdapter.js +25 -2
- package/dist/providers/v4/ollamaPromptToolsAdapter.js +57 -2
- package/dist/tools/v4/browser/_observer.js +224 -0
- package/dist/tools/v4/browser/browserBlocker.js +396 -0
- package/dist/tools/v4/browser/browserClick.js +18 -1
- package/dist/tools/v4/browser/browserClose.js +18 -1
- package/dist/tools/v4/browser/browserExtract.js +5 -1
- package/dist/tools/v4/browser/browserFill.js +17 -1
- package/dist/tools/v4/browser/browserGetUrl.js +5 -1
- package/dist/tools/v4/browser/browserNavigate.js +16 -1
- package/dist/tools/v4/browser/browserScreenshot.js +5 -1
- package/dist/tools/v4/browser/browserScroll.js +18 -1
- package/dist/tools/v4/browser/browserType.js +17 -1
- package/dist/tools/v4/browser/captchaCheck.js +5 -1
- package/dist/tools/v4/executeCode.js +1 -0
- package/dist/tools/v4/files/fileCopy.js +56 -2
- package/dist/tools/v4/files/fileDelete.js +38 -1
- package/dist/tools/v4/files/fileList.js +12 -1
- package/dist/tools/v4/files/fileMove.js +59 -2
- package/dist/tools/v4/files/filePatch.js +43 -1
- package/dist/tools/v4/files/fileRead.js +12 -1
- package/dist/tools/v4/files/fileWrite.js +41 -1
- package/dist/tools/v4/index.js +88 -61
- package/dist/tools/v4/memory/memoryAdd.js +14 -0
- package/dist/tools/v4/memory/memoryRemove.js +14 -0
- package/dist/tools/v4/memory/memoryReplace.js +15 -0
- package/dist/tools/v4/memory/sessionSummary.js +12 -0
- package/dist/tools/v4/process/processKill.js +19 -0
- package/dist/tools/v4/process/processList.js +1 -0
- package/dist/tools/v4/process/processLogRead.js +1 -0
- package/dist/tools/v4/process/processSpawn.js +13 -0
- package/dist/tools/v4/process/processWait.js +1 -0
- package/dist/tools/v4/sessions/recallSession.js +1 -0
- package/dist/tools/v4/sessions/sessionList.js +1 -0
- package/dist/tools/v4/sessions/sessionSearch.js +1 -0
- package/dist/tools/v4/skills/lookupToolSchema.js +7 -0
- package/dist/tools/v4/skills/skillManage.js +13 -0
- package/dist/tools/v4/skills/skillView.js +1 -0
- package/dist/tools/v4/skills/skillsList.js +1 -0
- package/dist/tools/v4/subagent/spawnSubAgentTool.js +334 -0
- package/dist/tools/v4/subagent/subagentFanout.js +54 -1
- package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
- package/dist/tools/v4/system/appClose.js +13 -0
- package/dist/tools/v4/system/appInput.js +13 -0
- package/dist/tools/v4/system/appLaunch.js +13 -0
- package/dist/tools/v4/system/clipboardRead.js +1 -0
- package/dist/tools/v4/system/clipboardWrite.js +14 -0
- package/dist/tools/v4/system/mediaKey.js +12 -0
- package/dist/tools/v4/system/mediaSessions.js +1 -0
- package/dist/tools/v4/system/mediaTransport.js +13 -0
- package/dist/tools/v4/system/naturalEvents.js +1 -0
- package/dist/tools/v4/system/nowPlaying.js +1 -0
- package/dist/tools/v4/system/osProcessList.js +1 -0
- package/dist/tools/v4/system/screenshot.js +1 -0
- package/dist/tools/v4/system/systemInfo.js +1 -0
- package/dist/tools/v4/system/volumeSet.js +17 -0
- package/dist/tools/v4/terminal/shellExec.js +81 -9
- package/dist/tools/v4/web/deepResearch.js +1 -0
- package/dist/tools/v4/web/openUrl.js +1 -0
- package/dist/tools/v4/web/webFetch.js +1 -0
- package/dist/tools/v4/web/webPage.js +1 -0
- package/dist/tools/v4/web/webSearch.js +1 -0
- package/dist/tools/v4/web/youtubeSearch.js +1 -0
- package/package.json +13 -3
|
@@ -427,16 +427,49 @@ class FallbackAdapter {
|
|
|
427
427
|
* provider key, but isolation prevents one slow subagent from
|
|
428
428
|
* starving siblings via parent-side cooldown state.
|
|
429
429
|
*/
|
|
430
|
-
clone() {
|
|
430
|
+
clone(opts) {
|
|
431
|
+
// v4.6 Phase 2P — optional `providerId` filter restricts the clone
|
|
432
|
+
// to slots matching the named provider. Used by `spawn_sub_agent`'s
|
|
433
|
+
// per-spawn provider override: fanout (and future callers) pass a
|
|
434
|
+
// specific provider name so the child's FallbackAdapter rotates
|
|
435
|
+
// only within that provider's slots, preserving the diversity
|
|
436
|
+
// invariant fanout depends on. When omitted, full-slot clone is
|
|
437
|
+
// the Phase 1 behaviour (unchanged).
|
|
438
|
+
//
|
|
439
|
+
// Caller is responsible for validating `providerId` against
|
|
440
|
+
// `getProviderIds()` before calling — an unknown providerId
|
|
441
|
+
// here yields a degenerate clone (zero slots), which the
|
|
442
|
+
// FallbackAdapter's own dispatch path treats as "no providers"
|
|
443
|
+
// and would error on first call. Defending here would mask the
|
|
444
|
+
// upstream validation gap; we prefer fail-loud at the validation
|
|
445
|
+
// layer instead.
|
|
446
|
+
const slots = opts?.providerId
|
|
447
|
+
? this.slots.filter((s) => s.providerId === opts.providerId)
|
|
448
|
+
: this.slots;
|
|
431
449
|
return new FallbackAdapter({
|
|
432
450
|
apiMode: this.apiMode,
|
|
433
|
-
slots
|
|
451
|
+
slots,
|
|
434
452
|
cooldownMs: this.cooldownMs,
|
|
435
453
|
now: this.clock,
|
|
436
454
|
onRateLimit: this.onRateLimit,
|
|
437
455
|
onFallback: this.onFallback,
|
|
438
456
|
});
|
|
439
457
|
}
|
|
458
|
+
/**
|
|
459
|
+
* v4.6 Phase 2P — return the set of provider IDs the adapter knows
|
|
460
|
+
* about (deduplicated, key-present slots only). Used by
|
|
461
|
+
* `spawn_sub_agent`'s per-spawn provider override validation: the
|
|
462
|
+
* spec's `provider` field must name one of these. Sorted for
|
|
463
|
+
* stable diagnostic output in error messages.
|
|
464
|
+
*/
|
|
465
|
+
getProviderIds() {
|
|
466
|
+
const seen = new Set();
|
|
467
|
+
for (const slot of this.slots) {
|
|
468
|
+
if (slot.keyPresent)
|
|
469
|
+
seen.add(slot.providerId);
|
|
470
|
+
}
|
|
471
|
+
return [...seen].sort();
|
|
472
|
+
}
|
|
440
473
|
/**
|
|
441
474
|
* Diagnostic snapshot for `/providers`. Per-slot cooldown is reported
|
|
442
475
|
* in seconds remaining (0 when the slot is fresh) so the slash command
|
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* core/v4/recoveryReport.ts — v4.2 Phase 3: Evidence Output +
|
|
10
|
+
* RecoveryReport.
|
|
11
|
+
*
|
|
12
|
+
* Pure synthesis. Consumes a TurnStateDiagnosticSnapshot (populated by
|
|
13
|
+
* Phase 1's verifier + Phase 2's classifier records) and produces a
|
|
14
|
+
* structured RecoveryReport that captures what the agent tried, what
|
|
15
|
+
* failed, why, what was recovered, and what's next.
|
|
16
|
+
*
|
|
17
|
+
* Surfaced ONLY when the TurnState recovery controller reaches the
|
|
18
|
+
* `surfaced` stage — quiet by design on hint/cooldown turns where the
|
|
19
|
+
* model self-corrects without user intervention. The report enriches
|
|
20
|
+
* the existing v4.1.6 tool_loop capability card by attaching summary
|
|
21
|
+
* lines (whatHappened) and a category breakdown (failuresByCategory).
|
|
22
|
+
*
|
|
23
|
+
* Reference notes: a comparable reference system's failure surface is
|
|
24
|
+
* text-only metadata (flat dict + appended guidance strings). Aiden's
|
|
25
|
+
* structured report is genuinely new — no patterns to port, but the
|
|
26
|
+
* single-source-of-truth synthesis approach mirrors the reference's
|
|
27
|
+
* `to_metadata()` style.
|
|
28
|
+
*
|
|
29
|
+
* Phase 3 stays consume-only: no changes to TurnState, verifier, or
|
|
30
|
+
* failureClassifier. Imports flow downstream (recoveryReport depends
|
|
31
|
+
* on TurnState's snapshot type and failureClassifier's category enum).
|
|
32
|
+
*
|
|
33
|
+
* Pure module — no I/O, no async, no side effects. Easy to unit test
|
|
34
|
+
* with synthetic snapshots.
|
|
35
|
+
*/
|
|
36
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
37
|
+
exports.extractGoal = extractGoal;
|
|
38
|
+
exports.guidanceFor = guidanceFor;
|
|
39
|
+
exports.buildRecoveryReport = buildRecoveryReport;
|
|
40
|
+
exports.enrichCardWithReport = enrichCardWithReport;
|
|
41
|
+
// ── Goal extraction ────────────────────────────────────────────────────────
|
|
42
|
+
const MAX_GOAL_CHARS = 140;
|
|
43
|
+
/**
|
|
44
|
+
* Pull the first user message from the conversation as the turn's
|
|
45
|
+
* goal. Handles three shapes:
|
|
46
|
+
* - string content (the common path)
|
|
47
|
+
* - ContentBlock[] content (Anthropic structured shape) — concatenates
|
|
48
|
+
* text blocks; ignores tool_use / image blocks
|
|
49
|
+
* - missing user message — returns empty string
|
|
50
|
+
*
|
|
51
|
+
* Result truncated to MAX_GOAL_CHARS with ellipsis to keep the report
|
|
52
|
+
* line bounded.
|
|
53
|
+
*/
|
|
54
|
+
function extractGoal(messages) {
|
|
55
|
+
const firstUser = messages.find((m) => m.role === 'user');
|
|
56
|
+
if (!firstUser)
|
|
57
|
+
return '';
|
|
58
|
+
const raw = stringifyContent(firstUser.content);
|
|
59
|
+
const trimmed = raw.trim();
|
|
60
|
+
if (trimmed.length <= MAX_GOAL_CHARS)
|
|
61
|
+
return trimmed;
|
|
62
|
+
return trimmed.slice(0, MAX_GOAL_CHARS - 3) + '...';
|
|
63
|
+
}
|
|
64
|
+
function stringifyContent(content) {
|
|
65
|
+
if (typeof content === 'string')
|
|
66
|
+
return content;
|
|
67
|
+
if (Array.isArray(content)) {
|
|
68
|
+
const parts = [];
|
|
69
|
+
for (const block of content) {
|
|
70
|
+
if (block && typeof block === 'object') {
|
|
71
|
+
const b = block;
|
|
72
|
+
if (typeof b.text === 'string')
|
|
73
|
+
parts.push(b.text);
|
|
74
|
+
else if (b.type === 'text' && typeof b.text === 'string')
|
|
75
|
+
parts.push(b.text);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return parts.join(' ');
|
|
79
|
+
}
|
|
80
|
+
return '';
|
|
81
|
+
}
|
|
82
|
+
// ── Guidance map ───────────────────────────────────────────────────────────
|
|
83
|
+
const GUIDANCE_BY_CATEGORY = {
|
|
84
|
+
permission: 'Adjust permissions or surface this to the user — the tool refused, so retrying without changes will not help.',
|
|
85
|
+
auth: 'Provide credentials before retrying — the tool needs auth that has not been supplied.',
|
|
86
|
+
timeout: 'Network or tool deadline exceeded. Retry with a longer budget or check connectivity.',
|
|
87
|
+
dependency_missing: 'A required binary or service is not available. Install it or use a different approach.',
|
|
88
|
+
rate_limit: 'Upstream rate-limited the call. Wait a moment and retry, or rotate to a different credential.',
|
|
89
|
+
network: 'Network unreachable or DNS failure. Check the connection and retry once it is stable.',
|
|
90
|
+
invalid_input: 'The tool arguments were rejected. Re-read the tool schema and fix the arguments before retrying.',
|
|
91
|
+
hallucination: 'The model used a path or name that does not exist. Re-read the surrounding state before retrying.',
|
|
92
|
+
not_found: 'The target resource was not found. Verify the path or name and try again with a corrected value.',
|
|
93
|
+
stale_ref: 'The page changed between snapshot and action. The observer already attempted resnapshot+retry once — re-read the visible state and try a different selector or approach.',
|
|
94
|
+
manual_blocker: 'The site requires a human action (login, 2FA, captcha, or verification). Surface this to the user and wait — do not retry automatically.',
|
|
95
|
+
sandbox_violation: 'Aiden\'s execution sandbox refused this operation. Surface the matched policy to the user and either widen the allowlist via AIDEN_SANDBOX_ALLOW=path1:path2 or disable the sandbox with AIDEN_SANDBOX=0 (not recommended). Denylist matches cannot be overridden — they signal sensitive paths the user explicitly wants protected.',
|
|
96
|
+
trigger_misconfigured: 'The trigger spec is invalid or its prompt template references variables the payload does not supply. Inspect the trigger via `aiden trigger list` and fix the spec — retrying without changes will produce the same failure.',
|
|
97
|
+
trigger_quota: 'The trigger\'s per-source fire-rate cap was hit. Investigate the upstream producer (file watcher loop, runaway webhook caller, mis-scheduled cron) or raise the fire_rate_limit on the trigger spec.',
|
|
98
|
+
trigger_dead_lettered: 'The trigger event exceeded its retry budget and moved to the dead-letter queue. Review the last_error on the trigger event row and either fix the root cause + re-queue, or accept the event as lost.',
|
|
99
|
+
other: 'The tool failed for an unclassified reason. Inspect the trace for details before retrying.',
|
|
100
|
+
};
|
|
101
|
+
/** Public for tests + plugin extensions. */
|
|
102
|
+
function guidanceFor(category) {
|
|
103
|
+
return GUIDANCE_BY_CATEGORY[category] ?? GUIDANCE_BY_CATEGORY.other;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Pure function. Given the per-turn diagnostic snapshot plus three
|
|
107
|
+
* scalar inputs, produces a deterministic RecoveryReport. No I/O,
|
|
108
|
+
* no async, no Date.now() — all timestamps come from the snapshot
|
|
109
|
+
* or are passed explicitly.
|
|
110
|
+
*/
|
|
111
|
+
function buildRecoveryReport(input) {
|
|
112
|
+
const { snapshot, goal, exitReason, durationMs } = input;
|
|
113
|
+
// ── Attempts ────────────────────────────────────────────────────────────
|
|
114
|
+
// Total = every recorded tool call (toolCalls array).
|
|
115
|
+
// Succeeded = verifications with ok=true.
|
|
116
|
+
// Failed = verifications with ok=false.
|
|
117
|
+
//
|
|
118
|
+
// Note: total may exceed succeeded+failed when callers run without
|
|
119
|
+
// a verification (verifier disabled or threw). The arithmetic
|
|
120
|
+
// tolerates that — the counters report exactly what's recorded.
|
|
121
|
+
const total = snapshot.toolCalls.length;
|
|
122
|
+
const succeeded = snapshot.verifications.filter((v) => v.verification.ok).length;
|
|
123
|
+
const failed = snapshot.verifications.filter((v) => !v.verification.ok).length;
|
|
124
|
+
// ── Failure breakdown ───────────────────────────────────────────────────
|
|
125
|
+
const breakdown = {};
|
|
126
|
+
for (const entry of snapshot.classifications) {
|
|
127
|
+
const cat = entry.classification.category;
|
|
128
|
+
breakdown[cat] = (breakdown[cat] ?? 0) + 1;
|
|
129
|
+
}
|
|
130
|
+
// ── Failed tools (latest classification per tool name) ──────────────────
|
|
131
|
+
// Iterate forward; later entries overwrite earlier ones, so the
|
|
132
|
+
// resulting map holds the most recent classification per name.
|
|
133
|
+
const latestByName = new Map();
|
|
134
|
+
for (const entry of snapshot.classifications) {
|
|
135
|
+
latestByName.set(entry.name, {
|
|
136
|
+
name: entry.name,
|
|
137
|
+
category: entry.classification.category,
|
|
138
|
+
reason: entry.classification.reason,
|
|
139
|
+
confidence: entry.classification.confidence,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
const failedTools = [...latestByName.values()];
|
|
143
|
+
// ── Recovery stages (passthrough — already ordered by recordToolCall) ──
|
|
144
|
+
const recoveryStages = snapshot.recoveryEvents.map((e) => ({
|
|
145
|
+
stage: e.stage,
|
|
146
|
+
toolName: e.toolName,
|
|
147
|
+
count: e.count,
|
|
148
|
+
}));
|
|
149
|
+
// ── Guidance — dominant failure category ────────────────────────────────
|
|
150
|
+
const guidance = synthesizeGuidance(breakdown);
|
|
151
|
+
// ── v4.3 Phase 5 — browserContext enrichment ────────────────────────────
|
|
152
|
+
// Populated when an optional BrowserStateLike is provided AND it
|
|
153
|
+
// reports at least one tab. Counts stale-ref retries from the
|
|
154
|
+
// recoveryStages signal indirectly — Phase 2's auto-retry fires
|
|
155
|
+
// via the HOC, not TurnState's recovery state machine, so we look
|
|
156
|
+
// for retried classifications in the snapshot instead.
|
|
157
|
+
const browserContext = buildBrowserContext(input.browserState, snapshot);
|
|
158
|
+
// ── v4.4 Phase 5 — sandboxContext enrichment ────────────────────────────
|
|
159
|
+
// Populated when any classification this turn has category
|
|
160
|
+
// `sandbox_violation`. The classifier (Phase 5) attaches the raw
|
|
161
|
+
// envelope to ClassificationResult.sandboxViolation, so we don't
|
|
162
|
+
// re-parse tool results here.
|
|
163
|
+
const sandboxContext = buildSandboxContext(snapshot);
|
|
164
|
+
// ── v4.5 Phase 5a — triggerContext passthrough ─────────────────────────
|
|
165
|
+
// The dispatcher hands the context in directly; this module just
|
|
166
|
+
// attaches it to the report shape without re-deriving fields. Keeps
|
|
167
|
+
// the report module decoupled from the daemon dispatcher.
|
|
168
|
+
const triggerContext = input.triggerContext;
|
|
169
|
+
return {
|
|
170
|
+
goal,
|
|
171
|
+
exitReason,
|
|
172
|
+
durationMs,
|
|
173
|
+
attempts: { total, succeeded, failed },
|
|
174
|
+
failureBreakdown: breakdown,
|
|
175
|
+
failedTools,
|
|
176
|
+
successfulTools: [...snapshot.successfulTools],
|
|
177
|
+
recoveryStages,
|
|
178
|
+
guidance,
|
|
179
|
+
...(browserContext ? { browserContext } : {}),
|
|
180
|
+
...(sandboxContext ? { sandboxContext } : {}),
|
|
181
|
+
...(triggerContext ? { triggerContext } : {}),
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* v4.4 Phase 5 — build the `sandboxContext` sidecar from the turn's
|
|
186
|
+
* classifications. Returns null when no `sandbox_violation` fired
|
|
187
|
+
* (the common path).
|
|
188
|
+
*
|
|
189
|
+
* Aggregates FS vs shell violation counts (FS = code starts with
|
|
190
|
+
* `fs.`; shell = anything else under the sandbox_violation category)
|
|
191
|
+
* and surfaces the most recent envelope's matched policy +
|
|
192
|
+
* auto-derived override suggestion.
|
|
193
|
+
*/
|
|
194
|
+
function buildSandboxContext(snapshot) {
|
|
195
|
+
const violations = snapshot.classifications.filter((c) => c.classification.category === 'sandbox_violation');
|
|
196
|
+
if (violations.length === 0)
|
|
197
|
+
return null;
|
|
198
|
+
const last = violations[violations.length - 1].classification;
|
|
199
|
+
const lastCode = last.matchedPattern ?? last.sandboxViolation?.code ?? '';
|
|
200
|
+
let fsViolations = 0;
|
|
201
|
+
let shellViolations = 0;
|
|
202
|
+
for (const v of violations) {
|
|
203
|
+
const code = v.classification.matchedPattern
|
|
204
|
+
?? v.classification.sandboxViolation?.code
|
|
205
|
+
?? '';
|
|
206
|
+
if (code.startsWith('fs.'))
|
|
207
|
+
fsViolations += 1;
|
|
208
|
+
else
|
|
209
|
+
shellViolations += 1;
|
|
210
|
+
}
|
|
211
|
+
const ctx = {
|
|
212
|
+
violationCount: violations.length,
|
|
213
|
+
fsViolations,
|
|
214
|
+
shellViolations,
|
|
215
|
+
lastCode,
|
|
216
|
+
lastMatched: last.sandboxViolation?.matchedPolicy ?? '',
|
|
217
|
+
};
|
|
218
|
+
if (last.sandboxViolation?.requestedPath) {
|
|
219
|
+
ctx.lastRequested = last.sandboxViolation.requestedPath;
|
|
220
|
+
}
|
|
221
|
+
if (last.recoveryHint?.detail) {
|
|
222
|
+
ctx.suggestedEnv = last.recoveryHint.detail;
|
|
223
|
+
}
|
|
224
|
+
return ctx;
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* v4.3 Phase 5 — build the `browserContext` sidecar from an optional
|
|
228
|
+
* BrowserStateLike + diagnostic snapshot. Returns null when no tabs
|
|
229
|
+
* exist (opt-out via AIDEN_BROWSER_DEPTH=0 or no browser action
|
|
230
|
+
* this turn) so
|
|
231
|
+
* the caller can decide whether to include the field.
|
|
232
|
+
*
|
|
233
|
+
* Stale-ref retry count derives from classifications with category
|
|
234
|
+
* `stale_ref` — Phase 5's classifier produces those when Phase 2's
|
|
235
|
+
* HOC-level retry attempted but failed. Successful retries don't
|
|
236
|
+
* appear in classifications (their final result has `success:true`).
|
|
237
|
+
*/
|
|
238
|
+
function buildBrowserContext(browserState, snapshot) {
|
|
239
|
+
if (!browserState)
|
|
240
|
+
return null;
|
|
241
|
+
const tabs = browserState.getTabs();
|
|
242
|
+
if (tabs.length === 0)
|
|
243
|
+
return null;
|
|
244
|
+
const active = browserState.getActiveTab();
|
|
245
|
+
const otherTabCount = active
|
|
246
|
+
? tabs.filter((t) => !t.is_active).length
|
|
247
|
+
: tabs.length;
|
|
248
|
+
// Count stale_ref classifications recorded by Phase 5's browser
|
|
249
|
+
// classifier in the turn's classifications log.
|
|
250
|
+
const staleRefRetries = snapshot.classifications.filter((c) => c.classification.category === 'stale_ref').length;
|
|
251
|
+
const ctx = {
|
|
252
|
+
otherTabCount,
|
|
253
|
+
staleRefRetries,
|
|
254
|
+
};
|
|
255
|
+
if (active?.url)
|
|
256
|
+
ctx.activeTabUrl = active.url;
|
|
257
|
+
if (active?.title)
|
|
258
|
+
ctx.activeTabTitle = active.title;
|
|
259
|
+
if (active?.last_blocker)
|
|
260
|
+
ctx.activeBlocker = active.last_blocker.kind;
|
|
261
|
+
return ctx;
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Pick the most-frequent failure category and return its guidance
|
|
265
|
+
* string. Ties broken by category priority (more recoverable first):
|
|
266
|
+
* timeout > rate_limit > network > invalid_input > not_found >
|
|
267
|
+
* hallucination > dependency_missing > permission > auth > other.
|
|
268
|
+
*
|
|
269
|
+
* No failures recorded → returns the generic `other` guidance.
|
|
270
|
+
*/
|
|
271
|
+
function synthesizeGuidance(breakdown) {
|
|
272
|
+
const entries = Object.entries(breakdown);
|
|
273
|
+
if (entries.length === 0)
|
|
274
|
+
return GUIDANCE_BY_CATEGORY.other;
|
|
275
|
+
const PRIORITY = [
|
|
276
|
+
'timeout', 'rate_limit', 'network', 'invalid_input',
|
|
277
|
+
'not_found',
|
|
278
|
+
'stale_ref', // v4.3 Phase 5 — auto-recoverable via wait+retry
|
|
279
|
+
'hallucination', 'dependency_missing',
|
|
280
|
+
'manual_blocker', // v4.3 Phase 5 — requires human action; semi-blocking
|
|
281
|
+
'sandbox_violation', // v4.4 Phase 5 — env-var override is the specific user action
|
|
282
|
+
'trigger_misconfigured', // v4.5 Phase 5a — trigger spec fix required
|
|
283
|
+
'trigger_quota', // v4.5 Phase 5a — anti-thrash, producer/cap fix
|
|
284
|
+
'trigger_dead_lettered', // v4.5 Phase 5a — terminal; re-queue or accept loss
|
|
285
|
+
'permission', 'auth', 'other',
|
|
286
|
+
];
|
|
287
|
+
const rank = (c) => {
|
|
288
|
+
const i = PRIORITY.indexOf(c);
|
|
289
|
+
return i === -1 ? PRIORITY.length : i;
|
|
290
|
+
};
|
|
291
|
+
entries.sort((a, b) => {
|
|
292
|
+
if (b[1] !== a[1])
|
|
293
|
+
return b[1] - a[1]; // desc by count
|
|
294
|
+
return rank(a[0]) - rank(b[0]); // tie → priority rank asc
|
|
295
|
+
});
|
|
296
|
+
return GUIDANCE_BY_CATEGORY[entries[0][0]];
|
|
297
|
+
}
|
|
298
|
+
// ── Card enrichment ────────────────────────────────────────────────────────
|
|
299
|
+
/**
|
|
300
|
+
* Take a base CapabilityCardData (typically from TurnState's surface
|
|
301
|
+
* card) and overlay the RecoveryReport's summary lines. Returns a new
|
|
302
|
+
* card object — the base is not mutated. When report is undefined,
|
|
303
|
+
* returns the base unchanged.
|
|
304
|
+
*
|
|
305
|
+
* Three additions:
|
|
306
|
+
* - whatHappened: one-line summary string with attempt counts +
|
|
307
|
+
* duration (rendered above canStill section).
|
|
308
|
+
* - failuresByCategory: inline pill row of non-zero category counts,
|
|
309
|
+
* ordered by descending count then priority.
|
|
310
|
+
* - fix: replaced with the report's guidance text (one sentence,
|
|
311
|
+
* dominant-category aware).
|
|
312
|
+
*
|
|
313
|
+
* The base card's title / canStill / cannotReliably pass through.
|
|
314
|
+
*/
|
|
315
|
+
function enrichCardWithReport(base, report) {
|
|
316
|
+
const whatHappened = buildWhatHappenedLine(report);
|
|
317
|
+
const failuresByCategory = buildFailuresPills(report.failureBreakdown);
|
|
318
|
+
// v4.3 Phase 5 — browser-context inline row. Only present when the
|
|
319
|
+
// report carries browserContext (which requires an active BrowserState
|
|
320
|
+
// with tabs). Renderer treats this as a single-line muted addition
|
|
321
|
+
// below whatHappened.
|
|
322
|
+
const browserContext = report.browserContext
|
|
323
|
+
? buildBrowserContextLine(report.browserContext)
|
|
324
|
+
: undefined;
|
|
325
|
+
// v4.4 Phase 5 — sandbox-context inline row. Present when the
|
|
326
|
+
// report carries sandboxContext (any sandbox_violation this turn).
|
|
327
|
+
// Renderer surfaces this as another muted line right below
|
|
328
|
+
// browserContext (or whatHappened when no browser activity).
|
|
329
|
+
const sandboxContext = report.sandboxContext
|
|
330
|
+
? buildSandboxContextLine(report.sandboxContext)
|
|
331
|
+
: undefined;
|
|
332
|
+
// v4.5 Phase 5a — trigger-context inline row. Present when the
|
|
333
|
+
// run was fired from the daemon trigger bus. Surfaces below
|
|
334
|
+
// browser/sandbox lines so the operator sees the trigger
|
|
335
|
+
// identity + attempt count at-a-glance.
|
|
336
|
+
const triggerContext = report.triggerContext
|
|
337
|
+
? buildTriggerContextLine(report.triggerContext)
|
|
338
|
+
: undefined;
|
|
339
|
+
return {
|
|
340
|
+
title: base.title,
|
|
341
|
+
canStill: base.canStill,
|
|
342
|
+
cannotReliably: base.cannotReliably,
|
|
343
|
+
fix: report.guidance,
|
|
344
|
+
whatHappened,
|
|
345
|
+
failuresByCategory,
|
|
346
|
+
...(browserContext ? { browserContext } : {}),
|
|
347
|
+
...(sandboxContext ? { sandboxContext } : {}),
|
|
348
|
+
...(triggerContext ? { triggerContext } : {}),
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* v4.5 Phase 5a — format the triggerContext fields into a compact
|
|
353
|
+
* single-line summary for the recovery card. Mirrors
|
|
354
|
+
* `buildBrowserContextLine` / `buildSandboxContextLine` shape.
|
|
355
|
+
* Returns empty string only when no signal worth surfacing
|
|
356
|
+
* (defensive — the dispatcher always sets the core fields).
|
|
357
|
+
*/
|
|
358
|
+
function buildTriggerContextLine(ctx) {
|
|
359
|
+
const parts = [];
|
|
360
|
+
parts.push(`${ctx.source}/${ctx.triggerId}`);
|
|
361
|
+
parts.push(`attempt ${ctx.attempt}/${ctx.maxAttempts}`);
|
|
362
|
+
if (ctx.promptTemplateUsed)
|
|
363
|
+
parts.push('templated');
|
|
364
|
+
if (ctx.fireReason && ctx.fireReason !== 'trigger_fired') {
|
|
365
|
+
parts.push(`reason=${ctx.fireReason}`);
|
|
366
|
+
}
|
|
367
|
+
return parts.length > 0 ? `Trigger: ${parts.join(' · ')}` : '';
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* v4.3 Phase 5 — format the browserContext fields into a compact
|
|
371
|
+
* single-line summary for the recovery card. Returns empty string
|
|
372
|
+
* when no signal worth surfacing.
|
|
373
|
+
*/
|
|
374
|
+
function buildBrowserContextLine(ctx) {
|
|
375
|
+
const parts = [];
|
|
376
|
+
if (ctx.activeTabUrl) {
|
|
377
|
+
try {
|
|
378
|
+
parts.push(`active=${new URL(ctx.activeTabUrl).hostname || ctx.activeTabUrl}`);
|
|
379
|
+
}
|
|
380
|
+
catch {
|
|
381
|
+
parts.push(`active=${ctx.activeTabUrl}`);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
if (ctx.activeBlocker)
|
|
385
|
+
parts.push(`${ctx.activeBlocker} blocker`);
|
|
386
|
+
if (ctx.otherTabCount > 0) {
|
|
387
|
+
parts.push(`${ctx.otherTabCount} other tab${ctx.otherTabCount === 1 ? '' : 's'}`);
|
|
388
|
+
}
|
|
389
|
+
if (ctx.staleRefRetries > 0) {
|
|
390
|
+
parts.push(`${ctx.staleRefRetries} stale-ref retr${ctx.staleRefRetries === 1 ? 'y' : 'ies'}`);
|
|
391
|
+
}
|
|
392
|
+
return parts.length > 0 ? `Browser: ${parts.join(' · ')}` : '';
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* v4.4 Phase 5 — format the sandboxContext fields into a compact
|
|
396
|
+
* single-line summary for the recovery card. Mirrors
|
|
397
|
+
* `buildBrowserContextLine` shape. Returns empty string when no
|
|
398
|
+
* signal worth surfacing.
|
|
399
|
+
*/
|
|
400
|
+
function buildSandboxContextLine(ctx) {
|
|
401
|
+
const parts = [];
|
|
402
|
+
parts.push(`${ctx.violationCount} blocked`);
|
|
403
|
+
if (ctx.fsViolations > 0 && ctx.shellViolations > 0) {
|
|
404
|
+
parts.push(`${ctx.fsViolations} fs · ${ctx.shellViolations} shell`);
|
|
405
|
+
}
|
|
406
|
+
else if (ctx.shellViolations > 0) {
|
|
407
|
+
parts.push(`${ctx.shellViolations} shell`);
|
|
408
|
+
}
|
|
409
|
+
if (ctx.lastCode)
|
|
410
|
+
parts.push(`last: ${ctx.lastCode}`);
|
|
411
|
+
if (ctx.suggestedEnv)
|
|
412
|
+
parts.push(`try: ${ctx.suggestedEnv}`);
|
|
413
|
+
return parts.length > 0 ? `Sandbox: ${parts.join(' · ')}` : '';
|
|
414
|
+
}
|
|
415
|
+
function buildWhatHappenedLine(report) {
|
|
416
|
+
const { attempts, durationMs } = report;
|
|
417
|
+
const dur = (durationMs / 1000).toFixed(1);
|
|
418
|
+
return (`Tried ${attempts.total} tool ${plural(attempts.total, 'call')} · ` +
|
|
419
|
+
`${attempts.succeeded} succeeded · ${attempts.failed} failed · ${dur}s`);
|
|
420
|
+
}
|
|
421
|
+
function plural(n, word) {
|
|
422
|
+
return n === 1 ? word : `${word}s`;
|
|
423
|
+
}
|
|
424
|
+
function buildFailuresPills(breakdown) {
|
|
425
|
+
const entries = Object.entries(breakdown);
|
|
426
|
+
// Same ordering rule as guidance synthesis: count desc, priority asc.
|
|
427
|
+
const PRIORITY = [
|
|
428
|
+
'timeout', 'rate_limit', 'network', 'invalid_input',
|
|
429
|
+
'not_found',
|
|
430
|
+
'stale_ref', // v4.3 Phase 5 — auto-recoverable via wait+retry
|
|
431
|
+
'hallucination', 'dependency_missing',
|
|
432
|
+
'manual_blocker', // v4.3 Phase 5 — requires human action; semi-blocking
|
|
433
|
+
'sandbox_violation', // v4.4 Phase 5 — env-var override is the specific user action
|
|
434
|
+
'trigger_misconfigured', // v4.5 Phase 5a — trigger spec fix required
|
|
435
|
+
'trigger_quota', // v4.5 Phase 5a — anti-thrash, producer/cap fix
|
|
436
|
+
'trigger_dead_lettered', // v4.5 Phase 5a — terminal; re-queue or accept loss
|
|
437
|
+
'permission', 'auth', 'other',
|
|
438
|
+
];
|
|
439
|
+
const rank = (c) => {
|
|
440
|
+
const i = PRIORITY.indexOf(c);
|
|
441
|
+
return i === -1 ? PRIORITY.length : i;
|
|
442
|
+
};
|
|
443
|
+
entries.sort((a, b) => {
|
|
444
|
+
if (b[1] !== a[1])
|
|
445
|
+
return b[1] - a[1];
|
|
446
|
+
return rank(a[0]) - rank(b[0]);
|
|
447
|
+
});
|
|
448
|
+
return entries.map(([category, count]) => ({ category, count }));
|
|
449
|
+
}
|