npm - aiden-runtime - Versions diffs - 4.1.5 → 4.5.0 - Mend

aiden-runtime 4.1.5 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (163) hide show

package/README.md +250 -847
package/dist/api/server.js +32 -5
package/dist/cli/v4/aidenCLI.js +351 -53
package/dist/cli/v4/callbacks.js +170 -0
package/dist/cli/v4/chatSession.js +138 -3
package/dist/cli/v4/commands/_runtimeToggleHelpers.js +92 -0
package/dist/cli/v4/commands/browserDepth.js +45 -0
package/dist/cli/v4/commands/cron.js +264 -0
package/dist/cli/v4/commands/daemon.js +541 -0
package/dist/cli/v4/commands/daemonStatus.js +253 -0
package/dist/cli/v4/commands/help.js +7 -0
package/dist/cli/v4/commands/index.js +20 -1
package/dist/cli/v4/commands/runs.js +203 -0
package/dist/cli/v4/commands/sandbox.js +48 -0
package/dist/cli/v4/commands/suggestions.js +68 -0
package/dist/cli/v4/commands/tce.js +41 -0
package/dist/cli/v4/commands/trigger.js +378 -0
package/dist/cli/v4/commands/update.js +95 -3
package/dist/cli/v4/daemonAgentBuilder.js +142 -0
package/dist/cli/v4/defaultSoul.js +1 -1
package/dist/cli/v4/display/capabilityCard.js +26 -0
package/dist/cli/v4/display.js +18 -8
package/dist/cli/v4/replyRenderer.js +31 -23
package/dist/cli/v4/updateBootPrompt.js +170 -0
package/dist/core/playwrightBridge.js +129 -0
package/dist/core/v4/aidenAgent.js +308 -4
package/dist/core/v4/browserState.js +436 -0
package/dist/core/v4/checkpoint.js +79 -0
package/dist/core/v4/daemon/bootstrap.js +604 -0
package/dist/core/v4/daemon/cleanShutdown.js +154 -0
package/dist/core/v4/daemon/cron/cronBridge.js +126 -0
package/dist/core/v4/daemon/cron/cronEmitter.js +173 -0
package/dist/core/v4/daemon/cron/migration.js +199 -0
package/dist/core/v4/daemon/cron/misfirePolicy.js +115 -0
package/dist/core/v4/daemon/daemonConfig.js +90 -0
package/dist/core/v4/daemon/db/connection.js +106 -0
package/dist/core/v4/daemon/db/migrations.js +296 -0
package/dist/core/v4/daemon/db/schema/v1.spec.js +18 -0
package/dist/core/v4/daemon/dispatcher/agentRunner.js +98 -0
package/dist/core/v4/daemon/dispatcher/budgetGate.js +127 -0
package/dist/core/v4/daemon/dispatcher/daemonApproval.js +113 -0
package/dist/core/v4/daemon/dispatcher/dailyBudgetTracker.js +120 -0
package/dist/core/v4/daemon/dispatcher/dispatcher.js +389 -0
package/dist/core/v4/daemon/dispatcher/fireRateLimiter.js +113 -0
package/dist/core/v4/daemon/dispatcher/index.js +53 -0
package/dist/core/v4/daemon/dispatcher/promptTemplate.js +95 -0
package/dist/core/v4/daemon/dispatcher/realAgentRunner.js +356 -0
package/dist/core/v4/daemon/dispatcher/resolveModel.js +93 -0
package/dist/core/v4/daemon/dispatcher/sessionId.js +93 -0
package/dist/core/v4/daemon/drain.js +156 -0
package/dist/core/v4/daemon/eventLoopLag.js +73 -0
package/dist/core/v4/daemon/health.js +159 -0
package/dist/core/v4/daemon/idempotencyStore.js +204 -0
package/dist/core/v4/daemon/index.js +179 -0
package/dist/core/v4/daemon/instanceTracker.js +99 -0
package/dist/core/v4/daemon/resourceRegistry.js +150 -0
package/dist/core/v4/daemon/restartCode.js +32 -0
package/dist/core/v4/daemon/restartFailureCounter.js +77 -0
package/dist/core/v4/daemon/runStore.js +114 -0
package/dist/core/v4/daemon/runtimeLock.js +167 -0
package/dist/core/v4/daemon/signals.js +50 -0
package/dist/core/v4/daemon/supervisor.js +272 -0
package/dist/core/v4/daemon/triggerBus.js +279 -0
package/dist/core/v4/daemon/triggers/email/allowlist.js +70 -0
package/dist/core/v4/daemon/triggers/email/automatedSender.js +78 -0
package/dist/core/v4/daemon/triggers/email/bodyExtractor.js +0 -0
package/dist/core/v4/daemon/triggers/email/emailSeenStore.js +99 -0
package/dist/core/v4/daemon/triggers/email/emailSpec.js +107 -0
package/dist/core/v4/daemon/triggers/email/imapConnection.js +211 -0
package/dist/core/v4/daemon/triggers/email/index.js +332 -0
package/dist/core/v4/daemon/triggers/email/seenUids.js +60 -0
package/dist/core/v4/daemon/triggers/fileObservationsStore.js +93 -0
package/dist/core/v4/daemon/triggers/fileWatcher.js +253 -0
package/dist/core/v4/daemon/triggers/fileWatcherSpec.js +88 -0
package/dist/core/v4/daemon/triggers/fsIdentity.js +42 -0
package/dist/core/v4/daemon/triggers/globMatcher.js +100 -0
package/dist/core/v4/daemon/triggers/reconcile.js +206 -0
package/dist/core/v4/daemon/triggers/settleStat.js +81 -0
package/dist/core/v4/daemon/triggers/webhook.js +376 -0
package/dist/core/v4/daemon/triggers/webhookDeliveriesStore.js +109 -0
package/dist/core/v4/daemon/triggers/webhookIdempotency.js +72 -0
package/dist/core/v4/daemon/triggers/webhookRateLimit.js +56 -0
package/dist/core/v4/daemon/triggers/webhookSpec.js +76 -0
package/dist/core/v4/daemon/triggers/webhookVerifier.js +128 -0
package/dist/core/v4/daemon/types.js +15 -0
package/dist/core/v4/dockerSession.js +461 -0
package/dist/core/v4/dryRun.js +117 -0
package/dist/core/v4/failureClassifier.js +779 -0
package/dist/core/v4/recoveryReport.js +449 -0
package/dist/core/v4/runtimeToggles.js +187 -0
package/dist/core/v4/sandboxConfig.js +285 -0
package/dist/core/v4/sandboxFs.js +316 -0
package/dist/core/v4/suggestionCatalog.js +41 -0
package/dist/core/v4/suggestionEngine.js +210 -0
package/dist/core/v4/toolRegistry.js +18 -0
package/dist/core/v4/turnState.js +587 -0
package/dist/core/v4/update/checkUpdate.js +63 -3
package/dist/core/v4/update/installMethodDetect.js +115 -0
package/dist/core/v4/update/registryClient.js +121 -0
package/dist/core/v4/update/skipState.js +75 -0
package/dist/core/v4/verifier.js +448 -0
package/dist/core/version.js +1 -1
package/dist/tools/v4/browser/_observer.js +224 -0
package/dist/tools/v4/browser/browserBlocker.js +396 -0
package/dist/tools/v4/browser/browserClick.js +18 -1
package/dist/tools/v4/browser/browserClose.js +18 -1
package/dist/tools/v4/browser/browserExtract.js +5 -1
package/dist/tools/v4/browser/browserFill.js +17 -1
package/dist/tools/v4/browser/browserGetUrl.js +5 -1
package/dist/tools/v4/browser/browserNavigate.js +16 -1
package/dist/tools/v4/browser/browserScreenshot.js +5 -1
package/dist/tools/v4/browser/browserScroll.js +18 -1
package/dist/tools/v4/browser/browserType.js +17 -1
package/dist/tools/v4/browser/captchaCheck.js +5 -1
package/dist/tools/v4/executeCode.js +1 -0
package/dist/tools/v4/files/fileCopy.js +56 -2
package/dist/tools/v4/files/fileDelete.js +38 -1
package/dist/tools/v4/files/fileList.js +12 -1
package/dist/tools/v4/files/fileMove.js +59 -2
package/dist/tools/v4/files/filePatch.js +43 -1
package/dist/tools/v4/files/fileRead.js +12 -1
package/dist/tools/v4/files/fileWrite.js +41 -1
package/dist/tools/v4/index.js +71 -58
package/dist/tools/v4/memory/memoryAdd.js +14 -0
package/dist/tools/v4/memory/memoryRemove.js +14 -0
package/dist/tools/v4/memory/memoryReplace.js +15 -0
package/dist/tools/v4/memory/sessionSummary.js +12 -0
package/dist/tools/v4/process/processKill.js +19 -0
package/dist/tools/v4/process/processList.js +1 -0
package/dist/tools/v4/process/processLogRead.js +1 -0
package/dist/tools/v4/process/processSpawn.js +13 -0
package/dist/tools/v4/process/processWait.js +1 -0
package/dist/tools/v4/sessions/recallSession.js +1 -0
package/dist/tools/v4/sessions/sessionList.js +1 -0
package/dist/tools/v4/sessions/sessionSearch.js +1 -0
package/dist/tools/v4/skills/lookupToolSchema.js +2 -0
package/dist/tools/v4/skills/skillManage.js +13 -0
package/dist/tools/v4/skills/skillView.js +1 -0
package/dist/tools/v4/skills/skillsList.js +1 -0
package/dist/tools/v4/subagent/subagentFanout.js +1 -0
package/dist/tools/v4/system/aidenSelfUpdate.js +16 -0
package/dist/tools/v4/system/appClose.js +13 -0
package/dist/tools/v4/system/appInput.js +13 -0
package/dist/tools/v4/system/appLaunch.js +13 -0
package/dist/tools/v4/system/clipboardRead.js +1 -0
package/dist/tools/v4/system/clipboardWrite.js +14 -0
package/dist/tools/v4/system/mediaKey.js +12 -0
package/dist/tools/v4/system/mediaSessions.js +1 -0
package/dist/tools/v4/system/mediaTransport.js +13 -0
package/dist/tools/v4/system/naturalEvents.js +1 -0
package/dist/tools/v4/system/nowPlaying.js +1 -0
package/dist/tools/v4/system/osProcessList.js +1 -0
package/dist/tools/v4/system/screenshot.js +1 -0
package/dist/tools/v4/system/systemInfo.js +1 -0
package/dist/tools/v4/system/volumeSet.js +17 -0
package/dist/tools/v4/terminal/shellExec.js +81 -9
package/dist/tools/v4/web/deepResearch.js +1 -0
package/dist/tools/v4/web/openUrl.js +1 -0
package/dist/tools/v4/web/webFetch.js +1 -0
package/dist/tools/v4/web/webPage.js +1 -0
package/dist/tools/v4/web/webSearch.js +1 -0
package/dist/tools/v4/web/youtubeSearch.js +1 -0
package/package.json +7 -1

package/dist/core/v4/verifier.js ADDED Viewed

@@ -0,0 +1,448 @@
+"use strict";
+/**
+ * Copyright (c) 2026 Shiva Deore (Taracod).
+ * Licensed under AGPL-3.0. See LICENSE for details.
+ *
+ * Aiden — local-first agent.
+ */
+/**
+ * core/v4/verifier.ts — v4.2 Phase 1: Per-tool result verifier.
+ *
+ * After each tool dispatch, the verifier inspects the result and
+ * classifies the outcome:
+ *
+ *   ok            — tool produced a usable, non-failed output
+ *   failed        — tool errored, returned `success: false`, or matched
+ *                   a known failure shape
+ *   no_progress   — tool succeeded but produced no useful signal (empty
+ *                   payload, identical hash to a recent call — Phase 3
+ *                   wires the hash repeat detector)
+ *   low_signal    — tool succeeded but with a short / vague response
+ *                   that's informative but probably won't help the
+ *                   model make progress
+ *   unknown       — verifier couldn't classify with confidence
+ *
+ * Scope (Phase 1):
+ * - Pure inspection of `(toolName, args, result)` — NO goal awareness
+ *   (deferred to Phase 5 / task graph).
+ * - Synchronous; runs in the agent's tool-dispatch loop between
+ *   `onToolCall('after', result)` and `turnState.recordToolCall(...)`.
+ * - Default fallback handles ~99% of Aiden tools that return the
+ *   `{ success: boolean, error?: string, ...payload }` envelope.
+ * - Built-in per-tool verifiers for 5 high-signal tools where the
+ *   default envelope inspection isn't sufficient: `shell_exec`,
+ *   `web_search`, `file_write`, `file_read`, `web_fetch`.
+ * - Behind the same gate as TurnState (default ON; opt-out via
+ *   `AIDEN_TCE=0`). When disabled, the agent skips verifier
+ *   classification — the registry is still constructed (cheap) but
+ *   `resolve()` is never called inside the gated branch.
+ *
+ * Out of scope (deferred phases):
+ * - Phase 2 — typed failure reason taxonomy (timeout / auth /
+ *   hallucination / network — separate from per-tool verifier).
+ * - Phase 3 — RecoveryReport (uses verifier output + Phase 2 classifier).
+ * - Phase 4 — checkpoint/restore (uses Phase 3 state shape).
+ * - Phase 5 — task-graph sub-step verification (extends VerifierFn
+ *   signature with optional `subGoal` argument; backward-compatible).
+ *
+ * The design intentionally mirrors a layered-decision pattern from the
+ * reference system's tool-guardrail module: a pure classifier function
+ * driving a controller's threshold counters, with per-tool overrides
+ * for the small set of tools where heuristic inspection is too coarse.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.browserInteractiveVerifier = exports.webFetchVerifier = exports.fileReadVerifier = exports.fileWriteVerifier = exports.webSearchVerifier = exports.shellExecVerifier = exports.defaultVerifier = exports.VerifierRegistry = void 0;
+exports.buildDefaultRegistry = buildDefaultRegistry;
+/**
+ * Per-tool override registry with a default-fallback resolver. Cheap
+ * to construct; safe to keep instantiated even when TCE is disabled
+ * because nothing runs unless `resolve(...)` is called by the agent
+ * loop (which itself is gated).
+ */
+class VerifierRegistry {
+    constructor(fallback = exports.defaultVerifier) {
+        this.overrides = new Map();
+        this.fallback = fallback;
+    }
+    register(toolName, fn) {
+        this.overrides.set(toolName, fn);
+    }
+    resolve(toolName) {
+        return this.overrides.get(toolName) ?? this.fallback;
+    }
+    /** Direct lookup for tests — returns true when a per-tool override is registered. */
+    hasOverride(toolName) {
+        return this.overrides.has(toolName);
+    }
+}
+exports.VerifierRegistry = VerifierRegistry;
+// ── Default fallback verifier ──────────────────────────────────────────────
+const SHORT_RESPONSE_THRESHOLD = 50; // chars — below this, raw strings are flagged low_signal
+const RAW_STRING_SCAN_WINDOW = 500; // chars — generic error keyword scan only looks at the head
+/**
+ * Heuristic default. Handles five result shapes in priority order:
+ *
+ *   1. Outer envelope error  → ToolCallResult.error set → failed (conf 1.0)
+ *   2. Inner `success: false` → typed failure (conf 1.0)
+ *   3. Inner `success: true`  → typed ok (conf 1.0)
+ *   4. Raw string < 50 chars  → low_signal (conf 0.4, ok: true)
+ *   5. Raw string with error keywords in first 500 chars → failed (conf 0.6)
+ *
+ * Anything else (typed object without `success`, non-empty string
+ * without error keywords) is `ok` at conf 0.7 — the verifier doesn't
+ * have enough signal to be more precise without a per-tool override.
+ */
+const defaultVerifier = (_toolName, _args, result) => {
+    // 1. Outer envelope error — executor threw or wrapped a known failure.
+    if (typeof result.error === 'string' && result.error.length > 0) {
+        return {
+            ok: false,
+            confidence: 1.0,
+            code: 'failed',
+            reason: result.error,
+        };
+    }
+    const inner = result.result;
+    // 2 + 3. Typed `{ success: boolean }` envelope — the common Aiden shape.
+    if (inner !== null && typeof inner === 'object' && !Array.isArray(inner)) {
+        const obj = inner;
+        if (obj.success === false) {
+            const reason = typeof obj.error === 'string' && obj.error.length > 0
+                ? obj.error
+                : 'tool returned success:false';
+            return {
+                ok: false,
+                confidence: 1.0,
+                code: 'failed',
+                reason,
+            };
+        }
+        if (obj.success === true) {
+            return { ok: true, confidence: 1.0, code: 'ok' };
+        }
+        // No `success` field — fall through to confidence-0.7 default.
+        return { ok: true, confidence: 0.7, code: 'ok' };
+    }
+    // 4 + 5. Raw string payload (the webSearch / deepResearch / openUrl shape).
+    if (typeof inner === 'string') {
+        const trimmed = inner.trim();
+        if (trimmed.length === 0) {
+            return {
+                ok: true,
+                confidence: 0.4,
+                code: 'low_signal',
+                reason: 'empty string result',
+            };
+        }
+        if (trimmed.length < SHORT_RESPONSE_THRESHOLD) {
+            return {
+                ok: true,
+                confidence: 0.4,
+                code: 'low_signal',
+                reason: `short result (${trimmed.length} chars)`,
+            };
+        }
+        const head = trimmed.slice(0, RAW_STRING_SCAN_WINDOW).toLowerCase();
+        if (head.startsWith('error') ||
+            head.includes('"error"') ||
+            head.includes('"failed"')) {
+            return {
+                ok: false,
+                confidence: 0.6,
+                code: 'failed',
+                reason: 'error keywords detected in raw string head',
+            };
+        }
+        return { ok: true, confidence: 0.7, code: 'ok' };
+    }
+    // null / undefined / array / number — no clear signal.
+    if (inner === null || inner === undefined) {
+        return {
+            ok: true,
+            confidence: 0.5,
+            code: 'unknown',
+            reason: 'null result',
+        };
+    }
+    return { ok: true, confidence: 0.5, code: 'unknown' };
+};
+exports.defaultVerifier = defaultVerifier;
+// ── Built-in per-tool verifiers ────────────────────────────────────────────
+/**
+ * `shell_exec` — inspect `exitCode` directly. A successful exit with
+ * empty stdout is suspicious (probe with no output) — surface as
+ * `low_signal` rather than ok-with-high-confidence so the loop
+ * controller can weight it.
+ */
+const shellExecVerifier = (_n, _a, result) => {
+    if (typeof result.error === 'string' && result.error.length > 0) {
+        return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
+    }
+    const inner = result.result;
+    if (inner === null || typeof inner !== 'object') {
+        return { ok: false, confidence: 0.5, code: 'unknown', reason: 'non-object shell_exec result' };
+    }
+    // Typed-failure envelope short-circuit — a wrapper returning
+    // `{success: false}` without exitCode is still definitively failed.
+    if (inner.success === false) {
+        return {
+            ok: false,
+            confidence: 1.0,
+            code: 'failed',
+            reason: typeof inner.error === 'string' ? inner.error : 'success:false',
+        };
+    }
+    const exitCode = typeof inner.exitCode === 'number' ? inner.exitCode : undefined;
+    if (exitCode === undefined) {
+        // Some wrappers omit exitCode on a successful run when the
+        // underlying command was trivial (e.g. a noop). Trust the typed
+        // success flag if present; otherwise we genuinely don't know.
+        if (inner.success === true) {
+            return { ok: true, confidence: 0.7, code: 'ok' };
+        }
+        return { ok: false, confidence: 0.5, code: 'unknown', reason: 'missing exitCode' };
+    }
+    if (exitCode !== 0) {
+        return {
+            ok: false,
+            confidence: 1.0,
+            code: 'failed',
+            reason: `non-zero exit (${exitCode})`,
+            suggestion: 'Inspect stderr and adjust the command — repeating the same invocation will not help.',
+        };
+    }
+    const stdout = typeof inner.stdout === 'string' ? inner.stdout.trim() : '';
+    if (stdout.length === 0) {
+        return {
+            ok: true,
+            confidence: 0.4,
+            code: 'low_signal',
+            reason: 'exit 0 with empty stdout',
+        };
+    }
+    return { ok: true, confidence: 1.0, code: 'ok' };
+};
+exports.shellExecVerifier = shellExecVerifier;
+/**
+ * `web_search` — returns a raw string (synthesised answer). Short
+ * responses are low-signal, not failures (often "no results found"
+ * IS the answer). Generic error-keyword scan applies.
+ */
+const webSearchVerifier = (_n, _a, result) => {
+    if (typeof result.error === 'string' && result.error.length > 0) {
+        return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
+    }
+    const inner = result.result;
+    if (typeof inner !== 'string') {
+        // Some adapters might wrap the string in `{ success, result }`.
+        return (0, exports.defaultVerifier)(_n, _a, result);
+    }
+    const trimmed = inner.trim();
+    if (trimmed.length === 0) {
+        return {
+            ok: true,
+            confidence: 0.4,
+            code: 'low_signal',
+            reason: 'empty web_search result',
+            suggestion: 'Try a different query or use web_fetch with a known URL.',
+        };
+    }
+    if (trimmed.length < SHORT_RESPONSE_THRESHOLD) {
+        return {
+            ok: true,
+            confidence: 0.4,
+            code: 'low_signal',
+            reason: `short web_search result (${trimmed.length} chars)`,
+        };
+    }
+    return { ok: true, confidence: 0.9, code: 'ok' };
+};
+exports.webSearchVerifier = webSearchVerifier;
+/**
+ * `file_write` — verify the write actually happened. We trust the
+ * tool's `success` flag but additionally require `bytesWritten > 0`
+ * when present (catches the "wrote 0 bytes" pathology).
+ */
+const fileWriteVerifier = (_n, _a, result) => {
+    if (typeof result.error === 'string' && result.error.length > 0) {
+        return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
+    }
+    const inner = result.result;
+    if (inner === null || typeof inner !== 'object') {
+        return { ok: false, confidence: 0.5, code: 'unknown', reason: 'non-object file_write result' };
+    }
+    if (inner.success === false) {
+        return {
+            ok: false,
+            confidence: 1.0,
+            code: 'failed',
+            reason: typeof inner.error === 'string' ? inner.error : 'success:false',
+        };
+    }
+    if (typeof inner.bytesWritten === 'number' && inner.bytesWritten === 0) {
+        return {
+            ok: true,
+            confidence: 0.4,
+            code: 'low_signal',
+            reason: 'wrote 0 bytes',
+        };
+    }
+    return { ok: true, confidence: 1.0, code: 'ok' };
+};
+exports.fileWriteVerifier = fileWriteVerifier;
+/**
+ * `file_read` — verify content non-empty (a deliberately-empty file
+ * is rare; usually means a path mismatch or stale read). Trusts the
+ * tool's `success` flag.
+ */
+const fileReadVerifier = (_n, _a, result) => {
+    if (typeof result.error === 'string' && result.error.length > 0) {
+        return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
+    }
+    const inner = result.result;
+    if (inner === null || typeof inner !== 'object') {
+        return { ok: false, confidence: 0.5, code: 'unknown', reason: 'non-object file_read result' };
+    }
+    if (inner.success === false) {
+        return {
+            ok: false,
+            confidence: 1.0,
+            code: 'failed',
+            reason: typeof inner.error === 'string' ? inner.error : 'success:false',
+        };
+    }
+    const content = typeof inner.content === 'string' ? inner.content : '';
+    if (content.length === 0) {
+        return {
+            ok: true,
+            confidence: 0.4,
+            code: 'low_signal',
+            reason: 'empty file content',
+        };
+    }
+    return { ok: true, confidence: 1.0, code: 'ok' };
+};
+exports.fileReadVerifier = fileReadVerifier;
+/**
+ * `web_fetch` (and aliases) — verify the body is substantive. A
+ * < 100 char fetch body is almost certainly a redirect / blank
+ * page / soft-block; surface as low_signal.
+ */
+const WEB_FETCH_MIN_BODY = 100;
+const webFetchVerifier = (_n, _a, result) => {
+    if (typeof result.error === 'string' && result.error.length > 0) {
+        return { ok: false, confidence: 1.0, code: 'failed', reason: result.error };
+    }
+    const inner = result.result;
+    // Two shapes: typed `{ success, content/body }` or raw string.
+    if (typeof inner === 'string') {
+        if (inner.trim().length < WEB_FETCH_MIN_BODY) {
+            return {
+                ok: true,
+                confidence: 0.4,
+                code: 'low_signal',
+                reason: `short body (${inner.trim().length} chars)`,
+                suggestion: 'Try a different URL or check whether the page requires auth.',
+            };
+        }
+        return { ok: true, confidence: 0.9, code: 'ok' };
+    }
+    if (inner !== null && typeof inner === 'object') {
+        const obj = inner;
+        if (obj.success === false) {
+            return {
+                ok: false,
+                confidence: 1.0,
+                code: 'failed',
+                reason: typeof obj.error === 'string' ? obj.error : 'success:false',
+            };
+        }
+        const body = typeof obj.content === 'string' ? obj.content :
+            typeof obj.body === 'string' ? obj.body :
+                typeof obj.text === 'string' ? obj.text : '';
+        if (body.trim().length < WEB_FETCH_MIN_BODY) {
+            return {
+                ok: true,
+                confidence: 0.4,
+                code: 'low_signal',
+                reason: `short body (${body.trim().length} chars)`,
+            };
+        }
+        return { ok: true, confidence: 1.0, code: 'ok' };
+    }
+    return (0, exports.defaultVerifier)(_n, _a, result);
+};
+exports.webFetchVerifier = webFetchVerifier;
+/**
+ * v4.3 Phase 5 — verifier for the 3 interactive browser tools
+ * (`browser_click`, `browser_type`, `browser_fill`) and
+ * `browser_navigate`. Extends defaultVerifier with one extra check:
+ * when the tool returns `success: true` BUT Phase 1's observer flagged
+ * `needs_verifier === true` (page state didn't meaningfully change),
+ * demote `ok` to false so the classifier runs and routes to
+ * `stale_ref` (page unresponsive) for the right recovery action.
+ *
+ * Without this demotion, the `needs_verifier` field would be a
+ * dormant hint with no behavioral effect. The whole point of Phase 1
+ * capturing it was to gate this verifier check.
+ *
+ * Conservative ordering — only runs the demotion AFTER the default
+ * verifier passed. Failed calls still classify via the existing
+ * path; success-but-noop is the specific case Phase 5 handles.
+ */
+const browserInteractiveVerifier = (toolName, args, result) => {
+    const base = (0, exports.defaultVerifier)(toolName, args, result);
+    if (!base.ok)
+        return base;
+    // Read the v4.3 sidecar. Absent when browser depth is opt'd out
+    // (AIDEN_BROWSER_DEPTH=0) — in
+    // that case the verifier falls back to the default-passing result.
+    const inner = result.result;
+    if (!inner || typeof inner !== 'object')
+        return base;
+    const bs = inner.browserState;
+    if (!bs)
+        return base;
+    if (!bs.needs_verifier)
+        return base;
+    // Demote — the tool returned success but the page didn't change
+    // meaningfully. Classifier will route to stale_ref.
+    return {
+        ok: false,
+        confidence: 0.75,
+        code: bs.maybe_noop ? 'no_progress' : 'low_signal',
+        reason: bs.maybe_noop
+            ? 'tool returned success but page state did not change'
+            : `low progress (${bs.progress_score.toFixed(2)}) — UI may not have responded`,
+    };
+};
+exports.browserInteractiveVerifier = browserInteractiveVerifier;
+// ── Factory ────────────────────────────────────────────────────────────────
+/**
+ * Builds a registry pre-wired with the 5 built-in per-tool verifiers.
+ * The agent constructs one of these in `runConversation` when TCE is
+ * enabled. Plugin authors can register their own via the returned
+ * registry instance — Phase 1 doesn't expose a public registration
+ * API, but the foundation is here.
+ */
+function buildDefaultRegistry() {
+    const reg = new VerifierRegistry();
+    reg.register('shell_exec', exports.shellExecVerifier);
+    reg.register('web_search', exports.webSearchVerifier);
+    reg.register('file_write', exports.fileWriteVerifier);
+    reg.register('file_read', exports.fileReadVerifier);
+    reg.register('web_fetch', exports.webFetchVerifier);
+    // Aliases — same verifier handles related shapes.
+    reg.register('fetch_page', exports.webFetchVerifier);
+    reg.register('web_page', exports.webFetchVerifier);
+    // v4.3 Phase 5 — browser interactive verifier reads the Phase 1
+    // sidecar (`needs_verifier` / `maybe_noop`) and demotes
+    // success-but-no-progress cases so the classifier routes them to
+    // `stale_ref` recovery. Falls back to defaultVerifier when sidecar
+    // absent (opt-out via AIDEN_BROWSER_DEPTH=0).
+    reg.register('browser_click', exports.browserInteractiveVerifier);
+    reg.register('browser_type', exports.browserInteractiveVerifier);
+    reg.register('browser_fill', exports.browserInteractiveVerifier);
+    reg.register('browser_navigate', exports.browserInteractiveVerifier);
+    return reg;
+}

package/dist/core/version.js CHANGED Viewed

@@ -2,4 +2,4 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.VERSION = void 0;
 // AUTO-GENERATED by scripts/inject-version.js — do not edit by hand
-exports.VERSION = '4.1.5';
+exports.VERSION = '4.5.0';

package/dist/tools/v4/browser/_observer.js ADDED Viewed

@@ -0,0 +1,224 @@
+"use strict";
+/**
+ * Copyright (c) 2026 Shiva Deore (Taracod).
+ * Licensed under AGPL-3.0. See LICENSE for details.
+ *
+ * Aiden — local-first agent.
+ */
+/**
+ * tools/v4/browser/_observer.ts — v4.3 Phase 1 + 2: shared BrowserState
+ * observer + stale-ref retry HOC for browser ToolHandlers.
+ *
+ * One BrowserState lives per server process (lifecycle matches the
+ * persistent playwrightBridge context). Every browser tool wraps its
+ * ToolHandler in `withBrowserState(...)` so the observer's pre/post
+ * snapshot capture happens automatically.
+ *
+ * Phase 1 — observer captures pre/post snapshots and embeds them as
+ * a `browserState` sidecar on the tool result when
+ * browser depth is enabled (default ON; opt-out via
+ * AIDEN_BROWSER_DEPTH=0). No-op when disabled.
+ *
+ * Phase 2 — stale-ref recovery. When an interactive browser tool
+ * (browser_click / browser_type / browser_fill) returns a
+ * resolution-class failure (`element not found`, `not visible`,
+ * `not attached`, `timeout`, `target closed`), the HOC resnapshots
+ * and retries the inner execute ONCE with the same args. The retry
+ * logic is reactive only — no preflight tax on success paths. The
+ * retry attempt + outcome lands on `ActionResult.staleRefRetry`
+ * for Phase 5's classifier to consume.
+ *
+ * The one-retry hard cap is the consult-derived non-negotiable: a
+ * second retry doesn't help (the cause isn't transient) and starts
+ * looking like agent thrashing. If the retry fails, the original
+ * failure result is preserved — same error message, but with the
+ * `staleRefRetry: { attempted: true, succeeded: false, ... }`
+ * sidecar so the classifier can recognise the pattern.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.STALE_REF_PATTERNS = exports.STALE_REF_RETRYABLE = exports.browserState = void 0;
+exports.detectStaleRefError = detectStaleRefError;
+exports.withBrowserState = withBrowserState;
+const browserState_1 = require("../../../core/v4/browserState");
+const browserBlocker_1 = require("./browserBlocker");
+const playwrightBridge_1 = require("../../../core/playwrightBridge");
+/**
+ * Shared observer — one instance per server process. The HOC closes
+ * over this reference so all 9 browser tools share the same snapshot
+ * counter and gating decision.
+ *
+ * Tests can construct their own BrowserState with a stubbed bridge
+ * loader and call `withBrowserState(handler, customState)` directly.
+ */
+exports.browserState = (0, browserState_1.createBrowserState)();
+// ── Phase 2 — stale-ref retry primitives ─────────────────────────────
+/**
+ * Interactive browser tools that operate on a selector. Stale-ref
+ * retry only fires for these — other tools either don't take a
+ * selector (browser_navigate, browser_close, browser_get_url) or
+ * are read-only (browser_extract, browser_screenshot, browser_scroll).
+ */
+exports.STALE_REF_RETRYABLE = new Set([
+    'browser_click',
+    'browser_type',
+    'browser_fill',
+]);
+/**
+ * Error-message patterns that indicate a resolution-class failure
+ * (DOM lookup failed BEFORE any side-effect-producing action fired).
+ * Phase 2 retries only on these — never on action-failure messages
+ * (network errors, permission denials, etc.).
+ *
+ * The patterns are case-insensitive substrings; one match is enough.
+ * False positives are tolerable — retry-once costs ~200ms and produces
+ * the same result on the second attempt. False negatives miss the
+ * common transient-race case, so bias toward sensitivity.
+ */
+exports.STALE_REF_PATTERNS = [
+    /element not found/i,
+    /not visible/i,
+    /not attached/i,
+    /detached from the DOM/i,
+    /target closed/i,
+    /timeout \d+ms exceeded/i,
+];
+/**
+ * Check if a tool result represents a resolution-class failure.
+ * Returns the matched pattern (as a short string) when stale, null
+ * otherwise. Pure helper, exported for tests.
+ */
+function detectStaleRefError(result) {
+    if (result === null || result === undefined || typeof result !== 'object')
+        return null;
+    const r = result;
+    if (r.success !== false)
+        return null;
+    if (typeof r.error !== 'string' || r.error.length === 0)
+        return null;
+    for (const pattern of exports.STALE_REF_PATTERNS) {
+        if (pattern.test(r.error)) {
+            return pattern.source;
+        }
+    }
+    return null;
+}
+/**
+ * Test whether a tool result represents success. Used by the HOC to
+ * decide whether the retry "succeeded" and should become canonical.
+ */
+function isSuccessResult(result) {
+    if (result === null || result === undefined || typeof result !== 'object')
+        return false;
+    return result.success === true;
+}
+const defaultPageTextFetcher = () => (0, playwrightBridge_1.pwSnapshot)();
+function withBrowserState(handler, state = exports.browserState,
+/**
+ * Optional page-text fetcher. Production code uses pwSnapshot;
+ * tests inject a stub returning canned text for the blocker
+ * detection tier. The fetcher is called ONCE per action when
+ * browser depth is enabled — disabled path skips entirely.
+ */
+pageTextFetcher = defaultPageTextFetcher) {
+    return {
+        ...handler,
+        async execute(args, ctx) {
+            if (!state.isEnabled()) {
+                return handler.execute(args, ctx);
+            }
+            const pre = await state.captureState();
+            let result = await handler.execute(args, ctx);
+            // v4.3 Phase 3 — manual-blocker detection. Runs on every
+            // browser-tool result when enabled. Uses the configured
+            // page-text fetcher (pwSnapshot in production). Detection
+            // never breaks the inner tool — pwSnapshot is wrapped in
+            // try/catch via the fetcher itself; failures produce no
+            // blocker and no observer sidecar field.
+            //
+            // The detected blocker is BOTH embedded on the result sidecar
+            // (Phase 5 + chat layer consumers) AND used to suppress
+            // Phase 2's stale-ref retry below. Pause-and-surface contract
+            // (Q-CDP5) — never auto-action a blocker.
+            let blocker;
+            try {
+                const snap = await pageTextFetcher();
+                if (snap.ok && snap.text) {
+                    const url = result?.url ?? '';
+                    const detected = (0, browserBlocker_1.detectBlocker)({ text: snap.text, url });
+                    if (detected)
+                        blocker = detected;
+                }
+            }
+            catch { /* detection never breaks the inner tool */ }
+            // v4.3 Phase 4 — propagate blocker (or its absence) to the
+            // active tab's metadata in BrowserState. Cross-tab queries can
+            // then ask "is there a pending blocker on any tab" without
+            // re-running detection. No-op when state is disabled or when
+            // the tabs map has no active entry (the reconciliation in
+            // captureState above sets activeTabId).
+            try {
+                state.updateActiveTabBlocker(blocker
+                    ? {
+                        kind: blocker.kind,
+                        subtype: blocker.subtype,
+                        url: blocker.url,
+                        confidence: blocker.confidence,
+                    }
+                    : null);
+            }
+            catch { /* defensive — tab updates never break the inner tool */ }
+            // v4.3 Phase 2 — stale-ref retry. Reactive: fires only after a
+            // resolution-class failure on an interactive tool. One retry
+            // hard cap. Safe because the resolution-class errors fire
+            // BEFORE any DOM event is dispatched, so retry can't double-act.
+            //
+            // v4.3 Phase 3 suppression: skip the retry when a manual
+            // blocker is present (`!blocker` gate). A blocker means the
+            // page is asking for human action — retrying the same tool
+            // call against a sign-in wall or 2FA prompt won't help and
+            // looks like agent thrashing.
+            let staleRefRetry;
+            if (pre && !blocker &&
+                exports.STALE_REF_RETRYABLE.has(handler.schema.name)) {
+                const staleReason = detectStaleRefError(result);
+                if (staleReason !== null) {
+                    // Resnapshot — the "between" state. We use it for the
+                    // diagnostic state_delta. The retry fires unconditionally
+                    // (per Q-P2-3 single-signal rule): even when DOM hash
+                    // hasn't changed, a transient race condition (element
+                    // attached one tick after the original timeout) is the
+                    // common case the retry catches.
+                    const between = await state.captureState();
+                    const state_delta = state.computeStateDelta(pre, between);
+                    const retryResult = await handler.execute(args, ctx);
+                    const retryOk = isSuccessResult(retryResult);
+                    staleRefRetry = {
+                        attempted: true,
+                        succeeded: retryOk,
+                        reason: staleReason,
+                        state_delta,
+                    };
+                    // If retry succeeded, the retry result becomes canonical.
+                    // If retry failed, keep the original failure — its error
+                    // context is what the model needs to see, and a same-error
+                    // retry would just look like duplicated chrome.
+                    if (retryOk)
+                        result = retryResult;
+                }
+            }
+            const post = await state.captureState();
+            const observerMeta = state.buildActionResult({ pre, post });
+            if (observerMeta &&
+                result !== null && result !== undefined &&
+                typeof result === 'object' && !Array.isArray(result)) {
+                const sidecar = {
+                    ...observerMeta,
+                    ...(staleRefRetry && { staleRefRetry }),
+                    ...(blocker && { blocker }),
+                };
+                return { ...result, browserState: sidecar };
+            }
+            return result;
+        },
+    };
+}