npm - aiden-runtime - Versions diffs - 4.6.0 → 4.7.0 - Mend

aiden-runtime 4.6.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +488 -265
package/dist/cli/v4/aidenCLI.js +26 -1
package/dist/cli/v4/chatSession.js +13 -0
package/dist/cli/v4/commands/help.js +2 -0
package/dist/cli/v4/commands/index.js +6 -1
package/dist/cli/v4/commands/walkthrough.js +140 -0
package/dist/cli/v4/daemonAgentBuilder.js +12 -4
package/dist/cli/v4/onboarding/disclaimer.js +162 -0
package/dist/cli/v4/onboarding/loading.js +208 -0
package/dist/cli/v4/onboarding/providerPicker.js +126 -0
package/dist/cli/v4/onboarding/successScreen.js +68 -0
package/dist/cli/v4/repl/firstRunHint.js +107 -0
package/dist/cli/v4/setupWizard.js +201 -31
package/dist/core/v4/aidenAgent.js +19 -14
package/dist/core/v4/providers/modelFetch.js +179 -0
package/dist/core/v4/providers/probe.js +275 -0
package/dist/core/v4/sandboxFs.js +1 -1
package/dist/core/v4/subagent/childBuilder.js +12 -4
package/dist/core/v4/ui/banner.js +133 -0
package/dist/core/v4/ui/theme.js +164 -0
package/dist/core/version.js +1 -1
package/dist/moat/honestyEnforcement.js +143 -241
package/dist/tools/v4/ui/_uiSmokeTool.js +60 -0
package/package.json +10 -4

package/dist/moat/honestyEnforcement.js CHANGED Viewed

@@ -6,111 +6,92 @@
  * Aiden — local-first agent.
  */
 /**
- * moat/honestyEnforcement.ts — Aiden v4.0.0
+ * moat/honestyEnforcement.ts — Aiden v4.7.0 (Phase 2.3 — outcome-based verifier)
  *
- * Post-loop trace inspector. Runs after AidenAgent returns its final
- * response. Compares the response's stated actions to the actual tool
- * calls in the trace. If the model claims it did something but the trace
- * says no tool fired (or fired and failed verification), Honesty refuses
- * the claim and rewrites the response.
+ * The regex-based natural-language claim scanner (deleted in Phase 2.2)
+ * has been replaced with a deterministic outcome recorder that consumes
+ * `toolCallTrace` structurally. Two failure modes are recorded:
  *
- * The failure modes this catches:
- *   - "I saved your file to ~/notes/today.md"   → no file_write call
- *   - "I sent the email"                         → no email tool call
- *   - "I remembered that"                        → no memory_add OR memory_add returned verified=false
- *   - "I searched the web"                       → no web_search call
- *   - "I ran X"                                  → no shell_exec call
+ *   1. mutation_errored  — a tool tagged `mutates: true` (in the
+ *      registry, stamped onto trace entries at dispatch time via
+ *      `handlerMutates`) returned an `error` envelope. Path is
+ *      extracted from `result.path` when present.
  *
- * Three modes:
- *   off      — passes everything; no inspection.
- *   detect   — runs checks and populates findings, but does NOT modify the
- *              response. Useful for telemetry / canary measurement.
- *   enforce  — DEFAULT. Rewrites failed claims into honest text that lists
- *              the actual trace summary.
+ *   2. memory_unverified — a memory_* tool's result carries
+ *      `verified === false` (per Phase 9 MemoryGuard). This was
+ *      the v3 C20/C21 lying surface and remains the only memory-
+ *      specific check the verifier performs.
  *
- * Detection:
- *   1. Pattern-based (default, $0 cost) — past-tense action verbs matched
- *      against tool registry. This file owns the table.
- *   2. LLM-classified — auxiliary LLM call. Wired via the optional
- *      `llmAdapter`; defaulted off in Phase 12. Phase 13 turns it on.
+ * Modes:
+ *   off      — bypass entirely. No events recorded.
+ *   detect   — Record events; never user-visible. `findings` populated;
+ *              no `footer`.
+ *   enforce  — DEFAULT. Record events + append a short footer to the
+ *              assistant reply summarising the unverified outcomes.
+ *              The footer is APPEND-ONLY — the assistant's text is
+ *              never rewritten. (This is the key behaviour change vs
+ *              v4.6.x — append-only, never an in-place edit.)
  *
- * Critical invariant for memory:
- *   Every memory_add / memory_replace / memory_remove tool result carries
- *   a `verified` flag (per Phase 9 MemoryGuard). If the model claims
- *   "I remembered X" but `verified=false`, Honesty MUST flag this — even
- *   though a memory tool DID fire. This was the v3 C20/C21 lying surface.
- *
- * Status: PHASE 12.
+ * What the verifier intentionally does NOT do (delta vs the deleted
+ * scanner):
+ *   - It does not look at the assistant's natural-language text at all.
+ *     There's no regex matching of English verbs to tool names.
+ *   - It does not emit `no_tool_call` findings. The previous "model
+ *     claimed X but no tool fired" failure mode is gone — that was
+ *     the false-refusal class. We only record OUTCOMES that ran.
+ *   - It does not mutate `loopResult.messages`. The caller appends
+ *     the footer to its own `finalContent` string variable.
  */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.__test__ = exports.HonestyEnforcement = void 0;
-/** Allows optional adverbs/auxiliaries between "I" and the verb:
- *  "I have", "I also", "I just", "I successfully", "I have just", etc. */
-const I_PREFIX = String.raw `\bI\s+(?:have\s+|just\s+|also\s+|already\s+|successfully\s+|then\s+|now\s+){0,3}`;
-const PATTERNS = [
-    // ── File operations (past tense only) ─────────────────────────
-    {
-        pattern: new RegExp(`${I_PREFIX}(?:saved|wrote|created|modified|patched|updated)\\b[^.]*\\b(?:file|to|at|in)\\b`, 'i'),
-        tools: ['file_write', 'file_patch', 'skill_manage'],
-        label: 'file_write',
-    },
-    {
-        pattern: new RegExp(`${I_PREFIX}(?:deleted|removed)\\s+(?:the\\s+)?(?:file|directory|folder)\\b`, 'i'),
-        tools: ['file_delete'],
-        label: 'file_delete',
-    },
-    // ── Web ────────────────────────────────────────────────────────
-    {
-        pattern: new RegExp(`${I_PREFIX}(?:searched|looked\\s+up|found(?:\\s+online)?|googled)\\b`, 'i'),
-        tools: ['web_search', 'deep_research'],
-        label: 'web_search',
-    },
-    {
-        pattern: new RegExp(`${I_PREFIX}(?:fetched|downloaded|retrieved)\\b`, 'i'),
-        tools: ['web_fetch', 'fetch_url'],
-        label: 'web_fetch',
-    },
-    // ── Shell / execution ──────────────────────────────────────────
-    {
-        pattern: new RegExp(`${I_PREFIX}(?:ran|executed|called)\\b`, 'i'),
-        tools: ['shell_exec', 'execute_code', 'run_python', 'run_node'],
-        label: 'shell_exec',
-    },
-    // ── Browser ────────────────────────────────────────────────────
-    {
-        pattern: new RegExp(`${I_PREFIX}(?:navigated|clicked|typed|scrolled)\\b`, 'i'),
-        tools: [
-            'browser_navigate',
-            'open_browser',
-            'browser_click',
-            'browser_type',
-            'browser_scroll',
-        ],
-        label: 'browser_action',
-    },
-    // ── Memory (verified=true required) ────────────────────────────
-    {
-        pattern: new RegExp(`${I_PREFIX}(?:remembered|memori[sz]ed|noted\\s+that|saved\\s+that\\s+to\\s+memory)\\b`, 'i'),
-        tools: ['memory_add', 'memory_upsert'],
-        label: 'memory_add',
-        kind: 'memory',
-    },
-    {
-        pattern: new RegExp(`${I_PREFIX}(?:forgot(?:ten)?|removed)\\b[^.]*\\bmemory\\b`, 'i'),
-        tools: ['memory_remove', 'memory_forget'],
-        label: 'memory_remove',
-        kind: 'memory',
-    },
-    // ── Model switch ───────────────────────────────────────────────
-    {
-        pattern: new RegExp(`${I_PREFIX}(?:switched\\s+to|changed\\s+(?:to|model\\s+to)|am\\s+now\\s+using)\\s+\\S+`, 'i'),
-        tools: ['model_switch'],
-        label: 'model_switch',
-    },
-];
-/** Negation patterns. If matched at the start of a sentence containing
- *  the claim, the claim is NOT flagged. */
-const NEGATION_RE = /\b(?:couldn'?t|cannot|can'?t|wasn'?t\s+able|unable\s+to|failed\s+to|did\s+not|didn'?t|won'?t|will\s+not)\b/i;
+exports.HonestyEnforcement = void 0;
+/**
+ * Memory tools whose results carry the `verified` flag set by
+ * MemoryGuard. The list is closed — adding a new memory_* tool
+ * means extending this set.
+ */
+const MEMORY_TOOLS = new Set([
+    'memory_add',
+    'memory_replace',
+    'memory_remove',
+]);
+/**
+ * Read `result.path` when present (file_* tools' result envelopes
+ * carry it). Returns undefined otherwise. Used only for cosmetic
+ * footer detail — never affects pass/fail outcome.
+ */
+function extractPath(result) {
+    if (result && typeof result === 'object' && 'path' in result) {
+        const p = result.path;
+        if (typeof p === 'string')
+            return p;
+    }
+    return undefined;
+}
+/**
+ * Translate a `HonestyEvent` to the legacy `HonestyFinding` shape so
+ * existing downstream consumers (chatSession, telemetry) keep working.
+ * The fine-grained kind is preserved via `reason`.
+ */
+function toFinding(event) {
+    switch (event.kind) {
+        case 'mutation_errored':
+            return {
+                claim: event.tool,
+                expectedTool: event.tool,
+                found: false,
+                confidence: 1,
+                reason: 'tool_errored',
+            };
+        case 'memory_unverified':
+            return {
+                claim: event.tool,
+                expectedTool: event.tool,
+                found: false,
+                confidence: 1,
+                reason: 'memory_verified_false',
+            };
+    }
+}
 class HonestyEnforcement {
     constructor(mode = 'enforce', llmAdapter, logger) {
         this.llmAdapter = llmAdapter;
@@ -124,20 +105,63 @@ class HonestyEnforcement {
         return this.mode;
     }
     /**
-     * Inspect a finished response against the actual tool-call trace.
-     * Returns a structured result. Caller (AidenAgent) decides whether to
-     * use `correctedResponse` or `originalResponse` based on `passed`.
+     * v4.7.0 Phase 2.3 — record deterministic unverified outcomes from
+     * the per-turn tool trace. Pure function; no I/O, no side effects.
      */
-    async check(response, _messages, toolCallTrace) {
-        if (this.mode === 'off') {
-            return {
-                passed: true,
-                findings: [],
-                confidence: 1,
-                originalResponse: response,
-            };
+    recordOutcomes(trace) {
+        const events = [];
+        for (const t of trace) {
+            if (t.error && t.handlerMutates === true) {
+                events.push({
+                    kind: 'mutation_errored',
+                    tool: t.name,
+                    reason: t.error,
+                    path: extractPath(t.result),
+                });
+                continue;
+            }
+            if (MEMORY_TOOLS.has(t.name) && t.verified === false) {
+                events.push({
+                    kind: 'memory_unverified',
+                    tool: t.name,
+                    reason: 'verification failed',
+                });
+            }
         }
-        if (!response || !response.trim()) {
+        return events;
+    }
+    /**
+     * v4.7.0 Phase 2.3 — render the append-only footer used in enforce
+     * mode. Caller concatenates with a blank line; we own the lines
+     * inside. Format: one summary line + one row per event.
+     */
+    buildFooter(events) {
+        const lines = [];
+        lines.push(`⚠️ Verifier: ${events.length} tool outcome(s) not verified this turn.`);
+        for (const e of events) {
+            if (e.kind === 'mutation_errored') {
+                const where = e.path ? ` (path: ${e.path})` : '';
+                lines.push(`- ${e.tool}${where}: errored — ${e.reason}`);
+            }
+            else {
+                lines.push(`- ${e.tool}: not verified`);
+            }
+        }
+        return lines.join('\n');
+    }
+    /**
+     * v4.7.0 Phase 2.3 — entry point. Records outcome events from the
+     * trace, converts to legacy `HonestyFinding[]` for downstream
+     * consumers, and renders an append-only footer in enforce mode.
+     *
+     * NEVER rewrites `response`. The returned `footer` is what the
+     * caller appends; the original text is preserved verbatim.
+     *
+     * Off mode short-circuits without touching the trace — minimal cost
+     * for users who opt out.
+     */
+    async check(response, _messages, trace) {
+        if (this.mode === 'off') {
             return {
                 passed: true,
                 findings: [],
@@ -145,145 +169,23 @@ class HonestyEnforcement {
                 originalResponse: response,
             };
         }
-        const findings = this.detectClaimsPattern(response, toolCallTrace);
-        const failed = findings.filter((f) => !f.found);
-        const passed = failed.length === 0;
-        const confidence = findings.length === 0
-            ? 1
-            : findings.reduce((s, f) => s + f.confidence, 0) /
-                findings.length;
-        if (this.mode === 'detect') {
-            this.logger?.('info', `[HonestyEnforcement] detect mode: ${findings.length} findings (${failed.length} failed)`);
-            return {
-                passed,
-                findings,
-                confidence,
-                originalResponse: response,
-            };
+        const events = this.recordOutcomes(trace);
+        const findings = events.map(toFinding);
+        const passed = findings.length === 0;
+        let footer;
+        if (this.mode === 'enforce' && !passed) {
+            footer = this.buildFooter(events);
         }
-        // enforce mode
-        let correctedResponse;
         if (!passed) {
-            correctedResponse = this.buildCorrection(response, failed, toolCallTrace);
-            this.logger?.('warn', `[HonestyEnforcement] enforce: rewrote response (${failed.length} failed claims)`);
+            this.logger?.('info', `honesty: ${events.length} unverified outcome(s) this turn`);
         }
         return {
             passed,
             findings,
-            confidence,
+            confidence: 1,
             originalResponse: response,
-            correctedResponse,
+            footer,
         };
     }
-    // ─────────────────────────────────────────────────────────────────────
-    // pattern detection
-    // ─────────────────────────────────────────────────────────────────────
-    detectClaimsPattern(response, trace) {
-        const findings = [];
-        const sentences = splitSentences(response);
-        for (const sentence of sentences) {
-            // Skip negated sentences entirely.
-            if (NEGATION_RE.test(sentence))
-                continue;
-            for (const pat of PATTERNS) {
-                if (!pat.pattern.test(sentence))
-                    continue;
-                const matched = sentence.match(pat.pattern);
-                const claimText = matched?.[0] ?? sentence.trim();
-                const found = this.traceSatisfies(pat, trace);
-                let reason;
-                if (!found) {
-                    if (pat.kind === 'memory' && memoryFiredButUnverified(pat, trace)) {
-                        reason = 'memory_verified_false';
-                    }
-                    else if (toolFiredButErrored(pat, trace)) {
-                        reason = 'tool_errored';
-                    }
-                    else {
-                        reason = 'no_tool_call';
-                    }
-                }
-                findings.push({
-                    claim: claimText.trim(),
-                    expectedTool: pat.tools.length === 1 ? pat.tools[0] : pat.tools,
-                    found,
-                    confidence: 0.8,
-                    reason,
-                });
-            }
-        }
-        return findings;
-    }
-    traceSatisfies(pat, trace) {
-        const matching = trace.filter((t) => pat.tools.includes(t.name) && !t.error);
-        if (matching.length === 0)
-            return false;
-        if (pat.kind === 'memory') {
-            // verified must be explicitly true
-            return matching.some((m) => m.verified === true);
-        }
-        return true;
-    }
-    // ─────────────────────────────────────────────────────────────────────
-    // correction builder
-    // ─────────────────────────────────────────────────────────────────────
-    buildCorrection(_original, failed, trace) {
-        const lines = [];
-        lines.push("I shouldn't claim actions I didn't take. Honest summary of what I actually did:");
-        lines.push('');
-        if (trace.length === 0) {
-            lines.push('- No tools were called this turn.');
-        }
-        else {
-            for (const entry of trace) {
-                const status = entry.error ? `errored (${entry.error})` : 'succeeded';
-                const verified = entry.verified === false
-                    ? ' (NOT VERIFIED)'
-                    : entry.verified === true
-                        ? ' (verified)'
-                        : '';
-                lines.push(`- ${entry.name}: ${status}${verified}`);
-            }
-        }
-        lines.push('');
-        lines.push('Refused claims:');
-        for (const f of failed) {
-            const tool = Array.isArray(f.expectedTool)
-                ? f.expectedTool.join('/')
-                : f.expectedTool;
-            const why = f.reason === 'memory_verified_false'
-                ? `(memory write returned verified=false — fact was not stored)`
-                : f.reason === 'tool_errored'
-                    ? `(tool errored)`
-                    : `(no ${tool} call in trace)`;
-            lines.push(`- "${f.claim}" ${why}`);
-        }
-        return lines.join('\n');
-    }
 }
 exports.HonestyEnforcement = HonestyEnforcement;
-// ─────────────────────────────────────────────────────────────────────
-// helpers (exported for tests)
-// ─────────────────────────────────────────────────────────────────────
-function splitSentences(text) {
-    // Split on sentence terminators while keeping reasonable bounds.
-    // Don't try to be clever about abbreviations — false positives are
-    // benign (we just inspect more granular slices).
-    return text
-        .split(/(?<=[.!?])\s+|\n+/)
-        .map((s) => s.trim())
-        .filter((s) => s.length > 0);
-}
-function memoryFiredButUnverified(pat, trace) {
-    if (pat.kind !== 'memory')
-        return false;
-    return trace.some((t) => pat.tools.includes(t.name) && !t.error && t.verified === false);
-}
-function toolFiredButErrored(pat, trace) {
-    return trace.some((t) => pat.tools.includes(t.name) && !!t.error);
-}
-exports.__test__ = {
-    splitSentences,
-    PATTERNS,
-    NEGATION_RE,
-};

package/dist/tools/v4/ui/_uiSmokeTool.js ADDED Viewed

@@ -0,0 +1,60 @@
+"use strict";
+/**
+ * Copyright (c) 2026 Shiva Deore (Taracod).
+ * Licensed under AGPL-3.0. See LICENSE for details.
+ *
+ * Aiden — local-first agent.
+ */
+/**
+ * tools/v4/ui/_uiSmokeTool.ts — v4.7 Slice 1 smoke harness.
+ *
+ * Internal tool used ONLY to verify the uiOnly dispatch seam from
+ * Slice 1 (ToolHandler.uiOnly + resolveUiOnly + onUiEvent +
+ * Display.renderUiEvent). NOT for end-user LLM workflows.
+ *
+ * Registered behind `AIDEN_UI_SMOKE=1` env flag in
+ * `tools/v4/index.ts::registerAllTools`. Will be deleted once
+ * Slice 2 lands the real ui_task_update / ui_task_done tools.
+ *
+ * When invoked, the agent's dispatch loop:
+ *   - resolves uiOnly=true via the resolveUiOnly closure
+ *   - fires runOptions.onUiEvent('_ui_smoke', args)
+ *   - SKIPS execute() entirely
+ *   - SKIPS turnToolMessages push + toolCallCount increment + verifier
+ *
+ * The handler's `execute` MUST never be called by the dispatch path
+ * when uiOnly is honoured. Throws if reached — that throw is a
+ * regression alarm.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.uiSmokeTool = void 0;
+exports.uiSmokeTool = {
+    schema: {
+        name: '_ui_smoke',
+        description: 'Internal smoke-test tool for the v4.7 uiOnly dispatch path. ' +
+            'Renders a single debug line through the UI event seam. ' +
+            'Does NOT round-trip back to the model. Only available when ' +
+            'AIDEN_UI_SMOKE=1.',
+        inputSchema: {
+            type: 'object',
+            properties: {
+                message: {
+                    type: 'string',
+                    description: 'Free-text payload echoed in the rendered debug line.',
+                },
+            },
+            required: ['message'],
+        },
+    },
+    category: 'read',
+    mutates: false,
+    uiOnly: true,
+    execute() {
+        // Defensive — if `resolveUiOnly` is wired correctly, the
+        // dispatch loop short-circuits BEFORE reaching this body. A
+        // call here means the resolver returned false/undefined and
+        // the seam regressed. Throwing surfaces the regression at
+        // smoke time instead of silently behaving like a regular tool.
+        throw new Error('_ui_smoke.execute() should never be called — uiOnly dispatch path regressed');
+    },
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "aiden-runtime",
-  "version": "4.6.0",
+  "version": "4.7.0",
   "publishConfig": {
     "access": "public"
   },
@@ -254,7 +254,7 @@
     "epub2": "^3.0.2",
     "execa": "^8.0.1",
     "express": "^4.18.2",
-    "form-data": "^4.0.0",
+    "form-data": "^4.0.4",
     "imap-simple": "^5.1.0",
     "js-tiktoken": "^1.0.21",
     "js-yaml": "^4.1.1",
@@ -284,7 +284,7 @@
     "uuid": "^9.0.0",
     "whatsapp-web.js": "^1.26.0",
     "wrap-ansi": "^9.0.2",
-    "ws": "^8.20.0"
+    "ws": "^8.20.1"
   },
   "optionalDependencies": {
     "decibri": "*",
@@ -296,7 +296,13 @@
     "semver": "^7.5.2",
     "postcss": "^8.5.10",
     "hono": "^4.12.16",
-    "minimatch": "^9.0.9"
+    "minimatch": "^9.0.9",
+    "qs": ">=6.14.1",
+    "tough-cookie": ">=4.1.3",
+    "protobufjs": ">=7.5.8",
+    "request": {
+      "form-data": "^2.5.5"
+    }
   },
   "devDependencies": {
     "@types/better-sqlite3": "^7.6.13",