npm - aiden-runtime - Versions diffs - 4.1.1 → 4.1.3 - Mend

aiden-runtime 4.1.1 → 4.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/README.md +78 -26
package/dist/cli/v4/aidenCLI.js +169 -9
package/dist/cli/v4/callbacks.js +20 -2
package/dist/cli/v4/chatSession.js +644 -16
package/dist/cli/v4/commands/auth.js +6 -3
package/dist/cli/v4/commands/doctor.js +23 -27
package/dist/cli/v4/commands/help.js +4 -0
package/dist/cli/v4/commands/index.js +10 -1
package/dist/cli/v4/commands/model.js +30 -1
package/dist/cli/v4/commands/reloadSoul.js +37 -0
package/dist/cli/v4/commands/update.js +102 -0
package/dist/cli/v4/defaultSoul.js +68 -2
package/dist/cli/v4/display/capabilityCard.js +135 -0
package/dist/cli/v4/display/sessionEndCard.js +127 -0
package/dist/cli/v4/display/toolTrail.js +172 -0
package/dist/cli/v4/display.js +492 -142
package/dist/cli/v4/doctor.js +472 -58
package/dist/cli/v4/doctorLiveness.js +65 -10
package/dist/cli/v4/promotionPrompt.js +332 -0
package/dist/cli/v4/providerBootSelector.js +144 -0
package/dist/cli/v4/replyRenderer.js +311 -20
package/dist/cli/v4/sessionSummaryGate.js +66 -0
package/dist/cli/v4/skinEngine.js +14 -3
package/dist/cli/v4/toolPreview.js +153 -0
package/dist/core/tools/nowPlaying.js +7 -15
package/dist/core/v4/aidenAgent.js +91 -29
package/dist/core/v4/capabilities.js +89 -0
package/dist/core/v4/contextCompressor.js +25 -8
package/dist/core/v4/distillationIndex.js +167 -0
package/dist/core/v4/distillationStore.js +98 -0
package/dist/core/v4/logger/logger.js +40 -9
package/dist/core/v4/promotionCandidates.js +234 -0
package/dist/core/v4/promptBuilder.js +145 -1
package/dist/core/v4/sessionDistiller.js +452 -0
package/dist/core/v4/skillMining/skillMiner.js +43 -6
package/dist/core/v4/skillOutcomeTracker.js +323 -0
package/dist/core/v4/subsystemHealth.js +143 -0
package/dist/core/v4/toolRegistry.js +16 -1
package/dist/core/v4/update/executeInstall.js +233 -0
package/dist/core/version.js +1 -1
package/dist/moat/memoryGuard.js +111 -0
package/dist/moat/plannerGuard.js +19 -0
package/dist/moat/skillTeacher.js +14 -5
package/dist/providers/v4/chatCompletionsAdapter.js +9 -0
package/dist/providers/v4/errors.js +112 -4
package/dist/providers/v4/modelDefaults.js +65 -0
package/dist/providers/v4/registry.js +9 -2
package/dist/providers/v4/runtimeResolver.js +6 -0
package/dist/tools/v4/index.js +80 -1
package/dist/tools/v4/memory/memoryRemove.js +57 -2
package/dist/tools/v4/memory/sessionSummary.js +151 -0
package/dist/tools/v4/sessions/recallSession.js +177 -0
package/dist/tools/v4/sessions/sessionSearch.js +5 -1
package/dist/tools/v4/system/_psHelpers.js +123 -0
package/dist/tools/v4/system/aidenSelfUpdate.js +162 -0
package/dist/tools/v4/system/appClose.js +79 -0
package/dist/tools/v4/system/appInput.js +154 -0
package/dist/tools/v4/system/appLaunch.js +218 -0
package/dist/tools/v4/system/clipboardRead.js +54 -0
package/dist/tools/v4/system/clipboardWrite.js +84 -0
package/dist/tools/v4/system/mediaKey.js +109 -0
package/dist/tools/v4/system/mediaSessions.js +163 -0
package/dist/tools/v4/system/mediaTransport.js +211 -0
package/dist/tools/v4/system/osProcessList.js +99 -0
package/dist/tools/v4/system/screenshot.js +106 -0
package/dist/tools/v4/system/volumeSet.js +157 -0
package/package.json +4 -1
package/skills/system_control.md +185 -69

package/dist/core/v4/skillOutcomeTracker.js ADDED Viewed

@@ -0,0 +1,323 @@
+"use strict";
+/**
+ * Copyright (c) 2026 Shiva Deore (Taracod).
+ * Licensed under AGPL-3.0. See LICENSE for details.
+ *
+ * Aiden — local-first agent.
+ */
+/**
+ * core/v4/skillOutcomeTracker.ts — Phase v4.1.2-slice4.
+ *
+ * Track whether skills actually succeed when loaded. The mining-time
+ * confidence score (skillMining/skillMiner.ts:computeConfidence) is
+ * set once and never updated — skills that consistently produce bad
+ * tool-call traces stay confident; skills that consistently work well
+ * never accumulate evidence of that.
+ *
+ * Mechanism:
+ *   - When `skill_view` fires (the model just received a skill body),
+ *     open an attribution WINDOW for that skill: the next N tool calls
+ *     are attributed as that skill's downstream outcomes.
+ *   - Tool successes / failures attributed to the skill (counter-bump).
+ *   - Another `skill_view` supersedes the window (last-write-wins).
+ *   - Window closes after N tool calls or when superseded.
+ *
+ * What this is NOT:
+ *   - Not a quality judge. We don't ask an LLM "did that skill help?".
+ *     Tool success is a proxy — a noisy one — but it's deterministic
+ *     and free. Per slice4 Phase 3 decision tree: Option A.
+ *   - Not a promotion engine. Surfaced via `aiden doctor`; the existing
+ *     SkillTeacher.flaggedSkillNames() flagging path stays dead (it
+ *     would change SkillLoader behavior — separate decision).
+ *
+ * Persistence:
+ *   `<skillsDir>/.skill-outcomes.json` — sidecar, atomic write
+ *   (tmp + rename), best-effort failure handling via slice3
+ *   SubsystemHealthTracker. Lazy hydrate on first `onTool` call so
+ *   sessions that never load a skill pay zero disk I/O.
+ *
+ * Status: PHASE v4.1.2-slice4.
+ */
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.SkillOutcomeTracker = exports.ATTRIBUTION_WINDOW = void 0;
+exports.isFailure = isFailure;
+const node_fs_1 = require("node:fs");
+const node_path_1 = __importDefault(require("node:path"));
+/**
+ * Attribution window size — number of non-skill_view tool calls
+ * following a `skill_view` whose outcomes are attributed to that
+ * skill. Hard-coded per slice4 Phase 3 Q1: don't add config knobs
+ * we won't tune. If empirical signal shows 5 is wrong, change it
+ * here.
+ */
+exports.ATTRIBUTION_WINDOW = 5;
+/** Cap for `lastError.message` — keep snapshots small. */
+const ERROR_MESSAGE_CAP = 200;
+class SkillOutcomeTracker {
+    /**
+     * @param persistPath     Absolute path to the sidecar JSON file.
+     * @param healthTracker   Optional slice3 tracker for persist failures.
+     */
+    constructor(persistPath, healthTracker) {
+        this.persistPath = persistPath;
+        this.healthTracker = healthTracker;
+        /** Currently-loaded skill (last skill_view, while its window is open). */
+        this.currentSkill = null;
+        /** Tool calls remaining in the current attribution window. */
+        this.remaining = 0;
+        /** In-memory outcomes, keyed by skill name. Hydrated lazily. */
+        this.outcomes = new Map();
+        /** True once we've attempted hydration from disk. */
+        this.hydrated = false;
+        /** Pending persist requested while one is in flight. */
+        this.persistQueued = false;
+    }
+    /**
+     * Unified hook compatible with `AidenAgentOptions.onToolCall`.
+     * The agent fires it as `(call, 'before')` then `(call, 'after', result)`.
+     */
+    onTool(call, phase, result) {
+        if (phase === 'before')
+            this.onToolBefore(call);
+        else
+            this.onToolAfter(call, result);
+    }
+    /** Called before each tool. Opens / supersedes the attribution window. */
+    onToolBefore(call) {
+        if (call.name !== 'skill_view')
+            return;
+        const name = extractSkillName(call.arguments);
+        if (!name)
+            return;
+        // Hydrate synchronously so the bump below merges with any prior
+        // persisted state. The file is small (one row per ever-loaded
+        // skill), so the one-time sync read is cheap and avoids the
+        // ordering hazard of awaiting in an inherently sync hook.
+        this.ensureHydratedSync();
+        this.currentSkill = name;
+        this.remaining = exports.ATTRIBUTION_WINDOW;
+        this.bump(name, (o) => {
+            o.loaded += 1;
+            o.lastUsed = new Date().toISOString();
+        });
+        void this.queuePersist();
+    }
+    /**
+     * Called after each tool. Attributes success/failure to the currently
+     * open window. `skill_view` itself does NOT attribute back to itself
+     * (the window's purpose is to grade DOWNSTREAM tools).
+     */
+    onToolAfter(call, result) {
+        if (call.name === 'skill_view')
+            return;
+        if (!this.currentSkill || this.remaining <= 0)
+            return;
+        const skill = this.currentSkill;
+        const failed = isFailure(result);
+        this.bump(skill, (o) => {
+            if (failed) {
+                o.toolFailures += 1;
+                const msg = extractErrorMessage(result);
+                if (msg) {
+                    o.lastError = {
+                        message: truncate(msg, ERROR_MESSAGE_CAP),
+                        at: new Date().toISOString(),
+                    };
+                }
+            }
+            else {
+                o.toolSuccesses += 1;
+            }
+        });
+        this.remaining -= 1;
+        if (this.remaining === 0)
+            this.currentSkill = null;
+        void this.queuePersist();
+    }
+    /**
+     * Read-only snapshot for `aiden doctor`. Sorted by `loaded` descending
+     * so the most-used skills surface first.
+     */
+    snapshot() {
+        const arr = Array.from(this.outcomes.values());
+        arr.sort((a, b) => b.loaded - a.loaded);
+        return arr;
+    }
+    /** Total skills with at least one observation. */
+    size() {
+        return this.outcomes.size;
+    }
+    // ── private ───────────────────────────────────────────────────────
+    bump(skillName, mutator) {
+        const cur = this.outcomes.get(skillName) ?? {
+            skillName,
+            loaded: 0,
+            toolSuccesses: 0,
+            toolFailures: 0,
+        };
+        mutator(cur);
+        this.outcomes.set(skillName, cur);
+    }
+    /**
+     * Synchronous disk-hydration. Called once per instance lifetime on
+     * the first `skill_view` observation. The sidecar is small (one row
+     * per ever-loaded skill) so a sync read is cheap and removes the
+     * race between async hydration and immediately-following bumps.
+     *
+     * Failures (parse, EACCES) get recorded into the health tracker —
+     * doctor surfaces them. ENOENT (no file yet) is the common case on
+     * first run and stays silent.
+     */
+    ensureHydratedSync() {
+        if (this.hydrated)
+            return;
+        this.hydrated = true;
+        try {
+            if (!(0, node_fs_1.existsSync)(this.persistPath))
+                return;
+            const raw = (0, node_fs_1.readFileSync)(this.persistPath, 'utf-8');
+            const parsed = JSON.parse(raw);
+            if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+                for (const [name, val] of Object.entries(parsed)) {
+                    if (val && typeof val === 'object' && !Array.isArray(val)) {
+                        const v = val;
+                        this.outcomes.set(name, {
+                            skillName: v.skillName ?? name,
+                            loaded: Number(v.loaded ?? 0),
+                            toolSuccesses: Number(v.toolSuccesses ?? 0),
+                            toolFailures: Number(v.toolFailures ?? 0),
+                            ...(v.lastUsed ? { lastUsed: v.lastUsed } : {}),
+                            ...(v.lastError ? { lastError: v.lastError } : {}),
+                        });
+                    }
+                }
+            }
+        }
+        catch (err) {
+            this.healthTracker?.recordFailure(err);
+        }
+    }
+    /**
+     * Test/shutdown seam. Awaits any in-flight or queued persist so the
+     * caller knows the sidecar is on disk. The agent runtime doesn't
+     * need to call this (writes are durable enough via the coalescing
+     * queue); tests use it to deterministically wait for I/O.
+     */
+    async flush() {
+        while (this.persisting) {
+            await this.persisting;
+        }
+    }
+    /**
+     * Coalescing persist. If a write is in flight, queue exactly one
+     * follow-up; further requests collapse into that single follow-up.
+     * Keeps disk I/O cheap when many tool calls happen in a burst.
+     */
+    queuePersist() {
+        if (this.persisting) {
+            this.persistQueued = true;
+            return this.persisting;
+        }
+        this.persisting = this.persist()
+            .finally(() => {
+            const wasQueued = this.persistQueued;
+            this.persistQueued = false;
+            this.persisting = undefined;
+            if (wasQueued) {
+                // Fire-and-forget the queued follow-up.
+                void this.queuePersist();
+            }
+        });
+        return this.persisting;
+    }
+    async persist() {
+        try {
+            await node_fs_1.promises.mkdir(node_path_1.default.dirname(this.persistPath), { recursive: true });
+            const payload = {};
+            for (const [k, v] of this.outcomes)
+                payload[k] = v;
+            const tmp = `${this.persistPath}.tmp`;
+            await node_fs_1.promises.writeFile(tmp, JSON.stringify(payload, null, 2) + '\n', 'utf-8');
+            await node_fs_1.promises.rename(tmp, this.persistPath);
+            this.healthTracker?.recordSuccess();
+        }
+        catch (err) {
+            this.healthTracker?.recordFailure(err);
+            // Best-effort: clean up tmp file if it exists. Ignore errors.
+            try {
+                await node_fs_1.promises.unlink(`${this.persistPath}.tmp`);
+            }
+            catch { /* ignore */ }
+        }
+    }
+}
+exports.SkillOutcomeTracker = SkillOutcomeTracker;
+// ── private helpers ───────────────────────────────────────────────────
+function extractSkillName(args) {
+    if (!args || typeof args !== 'object')
+        return '';
+    const v = args.name;
+    return typeof v === 'string' ? v.trim() : '';
+}
+/**
+ * Failure classification rules (per slice4 Phase 3 explicit decision):
+ *   - result.success === false   → failure
+ *   - result.error truthy        → failure
+ *   - everything else            → success
+ *
+ * "Tool succeeded but result was wrong" is NOT classifiable without an
+ * LLM judge and is intentionally out of scope.
+ */
+function isFailure(result) {
+    if (!result)
+        return false;
+    // The ToolCallResult shape from providers/v4/types is { id, name, result }.
+    // Tool implementations conventionally return `{ success: boolean, error?, ... }`
+    // inside the `result` payload — both are surveyed.
+    const top = result;
+    if (top.error)
+        return true;
+    if (top.success === false)
+        return true;
+    const inner = result.result;
+    if (inner && typeof inner === 'object') {
+        const i = inner;
+        if (i.error)
+            return true;
+        if (i.success === false)
+            return true;
+    }
+    return false;
+}
+function extractErrorMessage(result) {
+    if (!result)
+        return '';
+    const top = result;
+    if (typeof top.error === 'string')
+        return top.error;
+    if (top.error && typeof top.error === 'object') {
+        const m = top.error.message;
+        if (typeof m === 'string')
+            return m;
+    }
+    const inner = result.result;
+    if (inner && typeof inner === 'object') {
+        const i = inner;
+        if (typeof i.error === 'string')
+            return i.error;
+        if (i.error && typeof i.error === 'object') {
+            const m = i.error.message;
+            if (typeof m === 'string')
+                return m;
+        }
+    }
+    return '';
+}
+function truncate(s, max) {
+    if (s.length <= max)
+        return s;
+    return s.slice(0, max - 3) + '...';
+}

package/dist/core/v4/subsystemHealth.js ADDED Viewed

@@ -0,0 +1,143 @@
+"use strict";
+/**
+ * Copyright (c) 2026 Shiva Deore (Taracod).
+ * Licensed under AGPL-3.0. See LICENSE for details.
+ *
+ * Aiden — local-first agent.
+ */
+/**
+ * core/v4/subsystemHealth.ts — Phase v4.1.2-slice3.
+ *
+ * Lightweight in-process telemetry for the silent-failure layers.
+ * Four subsystems (ContextCompressor, SkillTeacher, SkillMiner,
+ * Logger) historically caught errors and continued without
+ * surfacing them — masking real bugs that were diagnosable only
+ * after manual instrumentation. This module is the surface.
+ *
+ * Design (decision tree from slice3 Phase 3):
+ *   Option C — subsystem-owned state object, optionally registered
+ *   with a shared registry. The registry is constructor-injected
+ *   (no singleton — singletons leak state between parallel tests),
+ *   and every record op is O(1) and side-effect-free (no I/O, no
+ *   log writes, no recursion through the Logger we are tracking).
+ *
+ * Surface:
+ *   - `SubsystemHealth`        — read-only snapshot shape doctor renders
+ *   - `SubsystemHealthTracker` — per-subsystem owned counter
+ *   - `SubsystemHealthRegistry`— optional aggregator AidenAgent owns
+ *
+ * Subsystems may operate without a tracker (back-compat); when a
+ * tracker is wired they call `recordSuccess()` / `recordFailure(err)`
+ * at the appropriate points. The registry is read by `aiden doctor`
+ * via the AidenAgent public field.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.SubsystemHealthTracker = void 0;
+exports.createSubsystemHealthRegistry = createSubsystemHealthRegistry;
+/**
+ * Per-subsystem health counter. One instance per subsystem; cheap
+ * to construct (no I/O, no allocations beyond the counter object).
+ *
+ * Subsystems hold a private tracker (or undefined for back-compat)
+ * and call `recordSuccess()` / `recordFailure(err)` from their
+ * critical paths. The tracker is registered with the registry at
+ * construction; doctor reads the snapshot lazily.
+ */
+class SubsystemHealthTracker {
+    /**
+     * @param subsystem  Stable id rendered by doctor. Prefer kebab-case
+     *                   ('compressor', 'skill-teacher', 'logger:file-sink').
+     */
+    constructor(subsystem) {
+        this.subsystem = subsystem;
+        this._totalCalls = 0;
+        this._totalErrors = 0;
+        this._consecutive = 0;
+    }
+    /** O(1): bump call counter, reset consecutive-failure streak. */
+    recordSuccess() {
+        this._totalCalls += 1;
+        this._consecutive = 0;
+    }
+    /**
+     * O(1): bump call + error counters, update lastError with a
+     * length-capped message. Never logs (would recurse through the
+     * Logger we are tracking) and never writes to disk.
+     */
+    recordFailure(err) {
+        this._totalCalls += 1;
+        this._totalErrors += 1;
+        this._consecutive += 1;
+        const raw = err instanceof Error ? err.message
+            : typeof err === 'string' ? err
+                : safeStringify(err);
+        this._lastError = {
+            message: truncate(raw, 200),
+            at: new Date(),
+        };
+    }
+    /** Render the current state. Doctor invokes this on demand. */
+    snapshot() {
+        const snap = {
+            subsystem: this.subsystem,
+            totalCalls: this._totalCalls,
+            totalErrors: this._totalErrors,
+        };
+        if (this._lastError) {
+            snap.lastError = {
+                message: this._lastError.message,
+                at: this._lastError.at,
+                consecutive: this._consecutive,
+            };
+        }
+        return snap;
+    }
+}
+exports.SubsystemHealthTracker = SubsystemHealthTracker;
+/** Build a fresh registry. No I/O; cheap. */
+function createSubsystemHealthRegistry() {
+    const readers = new Map();
+    return {
+        register(subsystem, reader) {
+            readers.set(subsystem, reader);
+        },
+        snapshot() {
+            const out = [];
+            for (const reader of readers.values()) {
+                try {
+                    const v = reader();
+                    if (Array.isArray(v))
+                        out.push(...v);
+                    else
+                        out.push(v);
+                }
+                catch {
+                    // Reader threw — skip it. Telemetry must never break doctor.
+                }
+            }
+            return out;
+        },
+        reset() {
+            readers.clear();
+        },
+    };
+}
+// ── private helpers ───────────────────────────────────────────────────
+function truncate(s, max) {
+    if (s.length <= max)
+        return s;
+    return s.slice(0, max - 3) + '...';
+}
+function safeStringify(v) {
+    // `JSON.stringify(undefined)` returns the value `undefined`, not the
+    // string "undefined" — guard so the downstream length-cap doesn't
+    // crash. Symbols, functions, and circular objects also need a
+    // String() fallback.
+    try {
+        const out = JSON.stringify(v);
+        return typeof out === 'string' ? out : String(v);
+    }
+    catch {
+        return String(v);
+    }
+}

package/dist/core/v4/toolRegistry.js CHANGED Viewed

@@ -154,7 +154,22 @@ class ToolRegistry {
             }
             try {
                 const result = await handler.execute(args, context);
-                return { id: call.id, name: call.name, result };
+                // v4.1.3-repl-polish: lift `degraded` + `degradedReason` from the
+                // handler's inner result to the outer ToolCallResult so the CLI
+                // trail row can render the partial-yellow state. Tools opt in by
+                // setting these on the object they return; without this lift the
+                // flags would sit on `out.result.degraded` where callbacks.ts
+                // can't see them. Strict typeof checks avoid promoting truthy-
+                // but-wrong-shape junk (numbers, strings, nested objects).
+                const inner = result;
+                const out = { id: call.id, name: call.name, result };
+                if (typeof inner?.degraded === 'boolean' && inner.degraded) {
+                    out.degraded = true;
+                    if (typeof inner.degradedReason === 'string') {
+                        out.degradedReason = inner.degradedReason;
+                    }
+                }
+                return out;
             }
             catch (err) {
                 const message = err instanceof Error ? err.message : String(err);