npm - aiden-runtime - Versions diffs - 4.1.1 → 4.1.2 - Mend

aiden-runtime 4.1.1 → 4.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/README.md +78 -26
package/dist/cli/v4/aidenCLI.js +159 -9
package/dist/cli/v4/callbacks.js +5 -2
package/dist/cli/v4/chatSession.js +525 -15
package/dist/cli/v4/commands/auth.js +6 -3
package/dist/cli/v4/commands/help.js +4 -0
package/dist/cli/v4/commands/index.js +10 -1
package/dist/cli/v4/commands/reloadSoul.js +37 -0
package/dist/cli/v4/commands/update.js +102 -0
package/dist/cli/v4/defaultSoul.js +68 -2
package/dist/cli/v4/display.js +28 -10
package/dist/cli/v4/doctor.js +112 -0
package/dist/cli/v4/doctorLiveness.js +65 -10
package/dist/cli/v4/promotionPrompt.js +202 -0
package/dist/cli/v4/providerBootSelector.js +144 -0
package/dist/cli/v4/sessionSummaryGate.js +66 -0
package/dist/cli/v4/toolPreview.js +139 -0
package/dist/core/v4/aidenAgent.js +91 -29
package/dist/core/v4/capabilities.js +89 -0
package/dist/core/v4/contextCompressor.js +25 -8
package/dist/core/v4/distillationIndex.js +167 -0
package/dist/core/v4/distillationStore.js +98 -0
package/dist/core/v4/logger/logger.js +40 -9
package/dist/core/v4/promotionCandidates.js +234 -0
package/dist/core/v4/promptBuilder.js +145 -1
package/dist/core/v4/sessionDistiller.js +405 -0
package/dist/core/v4/skillMining/skillMiner.js +43 -6
package/dist/core/v4/skillOutcomeTracker.js +323 -0
package/dist/core/v4/subsystemHealth.js +143 -0
package/dist/core/v4/update/executeInstall.js +233 -0
package/dist/core/version.js +1 -1
package/dist/moat/memoryGuard.js +111 -0
package/dist/moat/skillTeacher.js +14 -5
package/dist/providers/v4/chatCompletionsAdapter.js +9 -0
package/dist/providers/v4/errors.js +20 -4
package/dist/providers/v4/modelDefaults.js +65 -0
package/dist/providers/v4/registry.js +9 -2
package/dist/providers/v4/runtimeResolver.js +6 -0
package/dist/tools/v4/index.js +57 -1
package/dist/tools/v4/memory/memoryRemove.js +57 -2
package/dist/tools/v4/memory/sessionSummary.js +151 -0
package/dist/tools/v4/sessions/recallSession.js +163 -0
package/dist/tools/v4/sessions/sessionSearch.js +5 -1
package/dist/tools/v4/system/_psHelpers.js +55 -0
package/dist/tools/v4/system/aidenSelfUpdate.js +162 -0
package/dist/tools/v4/system/appClose.js +79 -0
package/dist/tools/v4/system/appLaunch.js +92 -0
package/dist/tools/v4/system/clipboardRead.js +54 -0
package/dist/tools/v4/system/clipboardWrite.js +84 -0
package/dist/tools/v4/system/mediaKey.js +78 -0
package/dist/tools/v4/system/osProcessList.js +99 -0
package/dist/tools/v4/system/screenshot.js +106 -0
package/dist/tools/v4/system/volumeSet.js +157 -0
package/package.json +4 -1
package/skills/system_control.md +135 -69

package/dist/cli/v4/commands/index.js CHANGED Viewed

@@ -12,7 +12,7 @@
  * and registers each on the global CommandRegistry at boot.
  */
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.allCommands = exports.history = exports.show = exports.status = exports.voice = exports.channel = exports.setup = exports.cron = exports.doctor = exports.license = exports.auth = exports.plugins = exports.streaming = exports.debugPrompt = exports.identity = exports.providers = exports.quit = exports.clear = exports.verbose = exports.reasoning = exports.reloadMcp = exports.skills = exports.skin = exports.yolo = exports.usage = exports.compress = exports.title = exports.save = exports.personality = exports.model = exports.tools = exports.help = void 0;
+exports.allCommands = exports.update = exports.reloadSoul = exports.history = exports.show = exports.status = exports.voice = exports.channel = exports.setup = exports.cron = exports.doctor = exports.license = exports.auth = exports.plugins = exports.streaming = exports.debugPrompt = exports.identity = exports.providers = exports.quit = exports.clear = exports.verbose = exports.reasoning = exports.reloadMcp = exports.skills = exports.skin = exports.yolo = exports.usage = exports.compress = exports.title = exports.save = exports.personality = exports.model = exports.tools = exports.help = void 0;
 const help_1 = require("./help");
 Object.defineProperty(exports, "help", { enumerable: true, get: function () { return help_1.help; } });
 const tools_1 = require("./tools");
@@ -75,6 +75,10 @@ const show_1 = require("./show");
 Object.defineProperty(exports, "show", { enumerable: true, get: function () { return show_1.show; } });
 const history_1 = require("./history");
 Object.defineProperty(exports, "history", { enumerable: true, get: function () { return history_1.history; } });
+const reloadSoul_1 = require("./reloadSoul");
+Object.defineProperty(exports, "reloadSoul", { enumerable: true, get: function () { return reloadSoul_1.reloadSoul; } });
+const update_1 = require("./update");
+Object.defineProperty(exports, "update", { enumerable: true, get: function () { return update_1.update; } });
 /** All built-in system commands, in canonical order. */
 exports.allCommands = [
     help_1.help,
@@ -103,9 +107,14 @@ exports.allCommands = [
     status_1.status,
     show_1.show,
     history_1.history,
+    reloadSoul_1.reloadSoul,
     reloadMcp_1.reloadMcp,
     reasoning_1.reasoning,
     verbose_1.verbose,
+    // Phase v4.1.2-update: /update + /update install — fresh registry
+    // probe + shared executeInstall executor (also wired into
+    // aiden_self_update tool for natural-language requests).
+    update_1.update,
     clear_1.clear,
     quit_1.quit,
 ];

package/dist/cli/v4/commands/reloadSoul.js ADDED Viewed

@@ -0,0 +1,37 @@
+"use strict";
+/**
+ * Copyright (c) 2026 Shiva Deore (Taracod).
+ * Licensed under AGPL-3.0. See LICENSE for details.
+ *
+ * Aiden — local-first agent.
+ */
+/**
+ * cli/v4/commands/reload-soul.ts — Phase v4.1.2 alive-core.
+ *
+ * `/reload-soul` — explicit fallback for the SOUL.md file-watcher path.
+ * Some filesystems (network mounts, certain WSL configs) don't support
+ * `fs.watch` reliably; this command lets users force a system-prompt
+ * rebuild after editing SOUL.md without restarting `aiden`.
+ *
+ * Mechanism: marks the agent's 'soul' dirty bit; the next turn calls
+ * `refreshSystemPromptIfDirty()` which invalidates the cached prompt.
+ * `PromptBuilder.build()` then re-reads SOUL.md from disk.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.reloadSoul = void 0;
+exports.reloadSoul = {
+    name: 'reload-soul',
+    description: 'Re-read SOUL.md from disk on the next turn (manual cache invalidation).',
+    category: 'system',
+    icon: '🔁',
+    aliases: ['soul-reload'],
+    handler: async (ctx) => {
+        if (!ctx.agent) {
+            ctx.display.warn('Reload-soul cannot run before the agent boots.');
+            return {};
+        }
+        ctx.agent.markMemoryDirty('soul');
+        ctx.display.success('SOUL.md flagged for reload — the next turn will pick up your edits.');
+        return {};
+    },
+};

package/dist/cli/v4/commands/update.js ADDED Viewed

@@ -0,0 +1,102 @@
+"use strict";
+/**
+ * Copyright (c) 2026 Shiva Deore (Taracod).
+ * Licensed under AGPL-3.0. See LICENSE for details.
+ *
+ * Aiden — local-first agent.
+ */
+/**
+ * cli/v4/commands/update.ts — Phase v4.1.2-update.
+ *
+ *   /update          — bypass the boot-time 6h cache, probe npm registry
+ *                      fresh, print current vs latest with hint.
+ *   /update install  — spawn `npm install -g aiden-runtime@latest`
+ *                      via the shared executeInstall executor; print
+ *                      restart hint on success or platform-specific
+ *                      remediation on permission failure.
+ *
+ * No auto-restart on success — the user keeps control by typing
+ * /quit and re-launching aiden. Honest UX: never claim the current
+ * process is upgraded after a successful install. Auto-restart of
+ * a node REPL via re-exec is also fragile across Windows/macOS/
+ * Linux, so the explicit /quit path is both honest and reliable.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.update = void 0;
+const version_1 = require("../../../core/version");
+const checkUpdate_1 = require("../../../core/v4/update/checkUpdate");
+const executeInstall_1 = require("../../../core/v4/update/executeInstall");
+async function printStatus(ctx) {
+    if (!ctx.paths) {
+        ctx.display.warn('/update needs Aiden user-data paths — try in a real session.');
+        return;
+    }
+    ctx.display.dim('Checking for updates…');
+    // cacheTtlMs: 0 — user explicitly asked, so bypass the 6h boot cache.
+    const status = await (0, checkUpdate_1.checkForUpdate)({
+        paths: ctx.paths,
+        installedVersion: version_1.VERSION,
+        cacheTtlMs: 0,
+    });
+    ctx.display.write(`  installed: v${status.installed}\n`);
+    if (status.latest === null) {
+        ctx.display.write('  latest:    unknown (registry unreachable)\n');
+        ctx.display.dim('Could not reach the npm registry. Check your network and try again.');
+        return;
+    }
+    ctx.display.write(`  latest:    v${status.latest}\n`);
+    if (status.updateAvailable) {
+        ctx.display.write(`\n  update available: v${status.installed} → v${status.latest}\n` +
+            `  run \`/update install\` to install, or \`npm install -g aiden-runtime@latest\` manually.\n`);
+    }
+    else {
+        ctx.display.dim("You're on the latest version.");
+    }
+}
+async function runInstall(ctx) {
+    if (!ctx.paths) {
+        ctx.display.warn('/update install needs Aiden user-data paths — try in a real session.');
+        return;
+    }
+    // Status probe first so we don't run a no-op install. Also bypasses
+    // cache — same rationale as the bare /update path.
+    ctx.display.dim('Checking for updates…');
+    const status = await (0, checkUpdate_1.checkForUpdate)({
+        paths: ctx.paths,
+        installedVersion: version_1.VERSION,
+        cacheTtlMs: 0,
+    });
+    if (status.latest === null) {
+        ctx.display.warn("Couldn't check for updates (registry unreachable). " +
+            'Try `/update` first, or run `npm install -g aiden-runtime@latest` manually.');
+        return;
+    }
+    if (!status.updateAvailable) {
+        ctx.display.dim(`You're already on the latest version (v${status.installed}).`);
+        return;
+    }
+    ctx.display.write(`Installing aiden-runtime v${status.latest} (current: v${status.installed})…\n`);
+    const result = await (0, executeInstall_1.executeInstall)();
+    if (result.success) {
+        const v = result.installedVersion ?? status.latest;
+        ctx.display.write(`\n  ✓ aiden-runtime v${v} installed.\n`);
+        ctx.display.dim('Restart Aiden to apply: type /quit then re-run `aiden`.');
+        return;
+    }
+    ctx.display.warn(result.error ?? 'Install failed (no error message).');
+}
+exports.update = {
+    name: 'update',
+    description: 'Check for / install the latest aiden-runtime. Use "install" subcommand to apply.',
+    category: 'system',
+    icon: '⬆',
+    handler: async (ctx) => {
+        const sub = (ctx.args[0] ?? '').toLowerCase();
+        if (sub === 'install') {
+            await runInstall(ctx);
+        }
+        else {
+            await printStatus(ctx);
+        }
+    },
+};

package/dist/cli/v4/defaultSoul.js CHANGED Viewed

@@ -30,7 +30,7 @@ exports.PREVIOUS_BUNDLED_SOULS = exports.DEFAULT_SOUL_MD = exports.BUNDLED_SOUL_
 // <act_dont_ask>. ensureSoulMdSeeded compares this against the user's
 // on-disk SOUL.md to decide whether to silent-replace (matches a prior
 // bundled default) or preserve+notify (user-edited).
-exports.BUNDLED_SOUL_VERSION = '16h';
+exports.BUNDLED_SOUL_VERSION = 'v4.1.2';
 exports.DEFAULT_SOUL_MD = `You are Aiden — a local-first AI agent built by Taracod.
 Identity:
@@ -88,7 +88,6 @@ asking the user what to do next.
 </keep_going>
 Limits:
-- You're a CLI agent in v4.0.0. No voice, no scheduled jobs, no messaging gateway yet — those are v4.1.
 - You can't bypass approval prompts for dangerous commands.
 - You don't lie to look smart. If you don't know, you say so.
 `;
@@ -184,6 +183,73 @@ the tool calls within a single turn instead of returning halfway and
 asking the user what to do next.
 </keep_going>
+Limits:
+- You're a CLI agent in v4.0.0. No voice, no scheduled jobs, no messaging gateway yet — those are v4.1.
+- You can't bypass approval prompts for dangerous commands.
+- You don't lie to look smart. If you don't know, you say so.
+`,
+    // 16h default — media-search anti-pattern + skill_view reference,
+    // but still carried the stale 'v4.0.0 / planned for v4.1' limits line
+    // claiming voice, cron, and messaging were not yet shipped. Phase
+    // v4.1.2-followup strips that line because cron / Telegram / 9 channels
+    // ALL shipped in v4.1.0. Users on the v4.1.0 / v4.1.1 install have this
+    // verbatim text on disk; silent-upgrade picks them up here.
+    `You are Aiden — a local-first AI agent built by Taracod.
+Identity:
+- You run on the user's machine, native Windows/Linux/macOS (not WSL2).
+- You have 72 bundled skills + access to install more via skills.sh.
+- You remember past sessions via persistent storage.
+- You have 40 tools spanning files, browser, terminal, web, memory.
+Voice:
+- Direct. No fluff. Match the user's energy.
+- Honest above all — if you didn't do something, say so. If you're not sure, say so.
+- You never claim to "have run" a tool unless the trace shows it.
+Behavior:
+- Default to action over discussion. The user wants results.
+- When asked who you are, identify as Aiden. Not "a large language model."
+- When asked what you can do, mention specific skills/tools, not generic capabilities.
+- If user mentions trading/NSE/markets, you have specialized skills for that.
+<act_dont_ask>
+When a request has an obvious default interpretation, act on it
+immediately instead of asking for clarification. Examples:
+- "play me a popular song" / "play X on youtube" → load skill_view(media-search)
+  and follow it. Substitute fuzzy phrases ("popular song") with a specific
+  chart-topper BEFORE searching, then open_url a /watch?v= URL once.
+  NEVER search verbatim "popular song" — that returns articles, not music.
+- "what files are in my Downloads?" → file_list on Downloads. Don't ask
+  "which user?" — it's the current user.
+- "is port 443 open?" → check this machine. Don't ask "open where?"
+Only ask for clarification when the ambiguity genuinely changes which
+tool you would call.
+</act_dont_ask>
+<prerequisite_checks>
+Before acting, check whether prerequisite discovery, lookup, or
+context-gathering steps are needed. If a step depends on output from a
+prior step, resolve that dependency first. Don't skip prerequisite
+steps just because the final action seems obvious.
+</prerequisite_checks>
+<missing_context>
+If required context is missing, do NOT guess or hallucinate. Use the
+appropriate lookup tool when missing information is retrievable
+(file_read, file_list, web_search, fetch_url, session_search,
+system_info). Ask a clarifying question ONLY when no tool can resolve
+the ambiguity.
+</missing_context>
+<keep_going>
+Work autonomously until the task is fully resolved. Don't stop with a
+plan — execute it. Multi-step tasks (open browser → search → click
+result; or list files → read each → summarise) are expected; chain
+the tool calls within a single turn instead of returning halfway and
+asking the user what to do next.
+</keep_going>
 Limits:
 - You're a CLI agent in v4.0.0. No voice, no scheduled jobs, no messaging gateway yet — those are v4.1.
 - You can't bypass approval prompts for dangerous commands.

package/dist/cli/v4/display.js CHANGED Viewed

@@ -42,6 +42,7 @@ const box_1 = require("./box");
 const replyRenderer_1 = require("./replyRenderer");
 // Optional "Sources" footer when AIDEN_CITATIONS=1 (default off).
 const citationFooter_1 = require("./citationFooter");
+const toolPreview_1 = require("./toolPreview");
 /**
  * Phase 26.2.7 — category emoji icons for the tool-row prefix when
  * `AIDEN_UI_ICONS=1` is set in the environment. Default OFF (the
@@ -393,13 +394,18 @@ class Display {
         const pill = (on, label, value) => `${dot(on)} ${lab(label)} ${val(value)}`;
         const providerOk = args.providerOk !== false;
         const modelValue = providerOk ? args.model : 'not configured';
-        return ('  ' +
-            [
-                pill(args.coreOnline, 'core', args.coreOnline ? 'online' : 'starting'),
-                pill(true, 'mode', args.mode),
-                pill(providerOk, 'model', modelValue),
-                pill(args.memoryActive, 'memory', args.memoryActive ? 'active' : 'off'),
-            ].join('    '));
+        const pills = [
+            pill(args.coreOnline, 'core', args.coreOnline ? 'online' : 'starting'),
+            pill(true, 'mode', args.mode),
+            pill(providerOk, 'model', modelValue),
+            pill(args.memoryActive, 'memory', args.memoryActive ? 'active' : 'off'),
+        ];
+        if (args.version) {
+            // Version pill: dot + value, no label (the `v` prefix is the label).
+            // Always-on dot — informational, not a health indicator.
+            pills.push(`${dot(true)} ${val(`v${args.version}`)}`);
+        }
+        return '  ' + pills.join('    ');
     }
     /**
      * Two-column block (Environment + Capabilities). Side-by-side when
@@ -794,11 +800,24 @@ class Display {
         };
     }
     /**
-     * Pretty-print a tool call before it executes. Args are JSON-stringified
-     * with a 200-char hard cap so megabyte arguments don't flood the screen.
+     * Pretty-print a tool call before it executes. Phase v4.1.2 first
+     * consults the `TOOL_PRIMARY_ARG` map in `toolPreview.ts` to render
+     * just the meaningful argument (e.g. `terminal: npm test`); falls
+     * back to the legacy full-JSON stringification (200-char hard cap)
+     * for tools that aren't in the map.
      */
     toolPreview(name, args) {
         const sk = this.skin;
+        const arrow = sk.getActive().glyphs?.arrow ?? '>';
+        // Phase v4.1.2: per-tool primary-arg preview.
+        const preview = (0, toolPreview_1.buildToolPreview)(name, args);
+        if (preview !== null) {
+            if (preview === '') {
+                return `${sk.applyColors(arrow, 'tool')} ${sk.applyColors(name, 'tool')}`;
+            }
+            return `${sk.applyColors(arrow, 'tool')} ${sk.applyColors(name, 'tool')} ${sk.applyColors(preview, 'muted')}`;
+        }
+        // Unknown tool — original behaviour (full JSON, 200-char cap).
         let serialized;
         try {
             serialized = JSON.stringify(args);
@@ -808,7 +827,6 @@ class Display {
         }
         if (serialized.length > 200)
             serialized = `${serialized.slice(0, 197)}...`;
-        const arrow = sk.getActive().glyphs?.arrow ?? '>';
         return `${sk.applyColors(arrow, 'tool')} ${sk.applyColors(name, 'tool')} ${sk.applyColors(serialized, 'muted')}`;
     }
     /**

package/dist/cli/v4/doctor.js CHANGED Viewed

@@ -55,6 +55,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
+exports.renderSubsystemHealthSection = renderSubsystemHealthSection;
+exports.renderSkillOutcomesSection = renderSkillOutcomesSection;
 exports.resolveBinaryPath = resolveBinaryPath;
 exports._resetBinaryResolutionCacheForTests = _resetBinaryResolutionCacheForTests;
 exports.buildProbeInvocation = buildProbeInvocation;
@@ -83,6 +85,104 @@ const license_1 = require("../../core/v4/license");
 const checkUpdate_1 = require("../../core/v4/update/checkUpdate");
 const box_1 = require("./box");
 const audioBackend_1 = require("../../core/voice/audioBackend");
+/**
+ * Phase v4.1.2-slice3: render the Subsystem health section. Decision
+ * tree (per slice3 Phase 3 Q4):
+ *   - registry undefined → render nothing (no live state to report)
+ *   - all subsystems healthy → one-line green summary
+ *   - any degradation → expand block with last-error per failed sub
+ *
+ * The Honesty layer is intentionally listed as "(not instrumented yet)"
+ * when the expanded block fires, because the audit determined the
+ * pure-pattern path has no I/O failure surface today.
+ */
+function renderSubsystemHealthSection(registry) {
+    if (!registry)
+        return '';
+    const snaps = registry.snapshot();
+    if (snaps.length === 0)
+        return '';
+    const degraded = snaps.filter((s) => s.totalErrors > 0);
+    if (degraded.length === 0) {
+        return `\nSubsystem health: all green (${snaps.length} subsystems instrumented)\n`;
+    }
+    // Expanded form. Per-subsystem rows:
+    //   ✓ name        N calls, 0 errors
+    //   ✗ name        N calls, E errors  (last <duration> ago: "message")
+    //   - honesty     (not instrumented yet)
+    const lines = ['\nSubsystem health'];
+    for (const s of snaps) {
+        const mark = s.totalErrors > 0 ? 'x' : 'ok';
+        const stats = `${s.totalCalls} call${s.totalCalls === 1 ? '' : 's'}, ${s.totalErrors} error${s.totalErrors === 1 ? '' : 's'}`;
+        if (s.lastError) {
+            const ago = humanAge(Date.now() - s.lastError.at.getTime());
+            const streak = s.lastError.consecutive > 1
+                ? ` (${s.lastError.consecutive} consecutive)`
+                : '';
+            lines.push(`  [${mark}] ${s.subsystem.padEnd(16)} ${stats}${streak}  (last ${ago} ago: "${s.lastError.message}")`);
+        }
+        else {
+            lines.push(`  [${mark}] ${s.subsystem.padEnd(16)} ${stats}`);
+        }
+    }
+    // Slice3 audit decision: HonestyEnforcement was deliberately not
+    // instrumented (pure-pattern path has no failure surface). Surface
+    // that explicitly so users know the gap is known, not forgotten.
+    lines.push(`  [-]  honesty          (not instrumented yet)`);
+    lines.push('');
+    return lines.join('\n');
+}
+function humanAge(ms) {
+    if (ms < 1000)
+        return `${ms}ms`;
+    if (ms < 60000)
+        return `${(ms / 1000).toFixed(0)}s`;
+    if (ms < 3600000)
+        return `${Math.floor(ms / 60000)}m`;
+    if (ms < 86400000)
+        return `${Math.floor(ms / 3600000)}h`;
+    return `${Math.floor(ms / 86400000)}d`;
+}
+/**
+ * Phase v4.1.2-slice4: render the Skill outcomes section. Per Q3
+ * decision: silent on empty state (no tracker, or no skills tracked
+ * yet) — doctor output for healthy systems stays short.
+ *
+ * Output (when not empty): top N skills sorted by load count, with
+ * total observations and success percentage. Last-error message
+ * shown for the one most-recently failing skill (cap one row of
+ * detail so the block stays compact).
+ */
+function renderSkillOutcomesSection(tracker, topN = 5) {
+    if (!tracker)
+        return '';
+    const snaps = tracker.snapshot();
+    if (snaps.length === 0)
+        return '';
+    const lines = ['\nSkill outcomes (top ' + Math.min(topN, snaps.length) + ' by load count)'];
+    for (const s of snaps.slice(0, topN)) {
+        const attributed = s.toolSuccesses + s.toolFailures;
+        const rate = attributed === 0
+            ? '—'
+            : `${Math.round((s.toolSuccesses / attributed) * 100)}% success`;
+        const stats = `loaded ${s.loaded}, ${s.toolSuccesses} ok, ${s.toolFailures} err  (${rate})`;
+        const last = s.lastUsed
+            ? `  last ${humanAge(Date.now() - new Date(s.lastUsed).getTime())} ago`
+            : '';
+        lines.push(`  ${s.skillName.padEnd(32)} ${stats}${last}`);
+    }
+    // Spotlight the most-recent failure across all tracked skills so a
+    // single broken skill is visible without scanning every row.
+    const recentFailures = snaps
+        .filter((s) => s.lastError)
+        .sort((a, b) => new Date(b.lastError.at).getTime() - new Date(a.lastError.at).getTime());
+    if (recentFailures.length > 0) {
+        const f = recentFailures[0];
+        lines.push(`  ↳ last failure: ${f.skillName} — "${f.lastError.message}"`);
+    }
+    lines.push('');
+    return lines.join('\n');
+}
 const DEFAULT_TIMEOUT_MS = 3000;
 /** Wrap a promise with a timeout. The timed-out path resolves to the fallback result. */
 async function withTimeout(p, ms, fallback) {
@@ -818,6 +918,18 @@ async function runDoctorCli(opts) {
         process.stdout.write(renderProviderLivenessSection(results, summary));
         livenessFailed = summary.red > 0;
     }
+    // Phase v4.1.2-slice3: subsystem-health surface. Renders only when
+    // a registry was passed (in-REPL doctor); standalone CLI doctor has
+    // no live agent so the section is omitted.
+    const subsystemBlock = renderSubsystemHealthSection(opts?.subsystemHealthRegistry);
+    if (subsystemBlock)
+        process.stdout.write(subsystemBlock);
+    // Phase v4.1.2-slice4: skill-outcome surface. Same gating — only
+    // renders when a tracker was passed and has at least one observed
+    // skill. Standalone CLI invocations skip it.
+    const outcomesBlock = renderSkillOutcomesSection(opts?.skillOutcomeTracker);
+    if (outcomesBlock)
+        process.stdout.write(outcomesBlock);
     // Liveness reds count toward the overall exit code so CI / scripts
     // can `aiden doctor --providers && deploy`.
     process.exitCode = (report.passed && !livenessFailed) ? 0 : 1;

package/dist/cli/v4/doctorLiveness.js CHANGED Viewed

@@ -18,11 +18,12 @@
  *   - `--providers` is opt-in. When the user types it we extend the
  *     report with one liveness row per probe, then render a summary
  *     line at the bottom.
- *   - Tool-catalog validation is deliberately OUT of scope. Liveness
- *     probes ship `tools: []` (see comment in checkProviderLiveness)
- *     so one bad tool schema doesn't false-red every provider that
- *     validates strictly. The eval-harness / registration-time schema
- *     validator (v4.1.1 main) is the right home for that concern.
+ *   - Tool-catalog validation is deliberately OUT of scope. The
+ *     probe ships ONE hardcoded no-op tool (`probe_noop`) so the
+ *     Codex backend accepts the request (it rejects empty `tools`),
+ *     while user-registered tool schemas stay un-validated here. The
+ *     eval-harness / registration-time schema validator (v4.1.1
+ *     main) is the right home for that concern.
  *
  * Trust artifact:
  *   - On failure we surface `err.message` VERBATIM (truncated to 200
@@ -31,6 +32,7 @@
  *     prints the actual OpenAI reason, not a generic "provider failed."
  */
 Object.defineProperty(exports, "__esModule", { value: true });
+exports.pickProbeModel = pickProbeModel;
 exports.enumerateConfiguredProviders = enumerateConfiguredProviders;
 exports.checkProviderLiveness = checkProviderLiveness;
 exports.runProviderLiveness = runProviderLiveness;
@@ -55,6 +57,28 @@ function truncate(s, max = ERROR_TRUNCATE_CHARS) {
         return s;
     return `${s.slice(0, max - 1)}…`;
 }
+/**
+ * Phase v4.1.2-slice5: pick a probe-safe model id from the registry.
+ *
+ * Some providers list model slugs that only work for enterprise / CLI
+ * accounts. ChatGPT Plus is the canonical case: the registry's
+ * `modelIds[0]` is `gpt-5.1-codex-max`, which is rejected by the
+ * subscription-account Codex backend with
+ * `"The 'gpt-5.1-codex-max' model is not supported when using Codex
+ * with a ChatGPT account."` — even though real REPL chat on the same
+ * account works because the user has selected a non-Codex slug
+ * (`gpt-5.5`).
+ *
+ * Heuristic: skip any slug containing `-codex` (covers `-codex-max`,
+ * `-codex-mini`, plain `-codex` suffix variants). Falls back to
+ * `modelIds[0]` if every slug is Codex-flavoured. No provider id
+ * special-casing — the heuristic is shape-based so future-similar
+ * providers benefit too.
+ */
+function pickProbeModel(entry) {
+    const safe = entry.modelIds.find((m) => !m.includes('-codex'));
+    return safe ?? entry.modelIds[0] ?? '';
+}
 /**
  * Wrap a promise with a hard timeout. Resolves to the inner result on
  * success, throws a clearly-labelled `Error` on timeout. Cleans up the
@@ -91,7 +115,7 @@ async function enumerateConfiguredProviders(opts) {
     const out = [];
     for (const entry of Object.values(registry_1.PROVIDER_REGISTRY)) {
         // Every provider needs at least one model to probe against.
-        const model = entry.modelIds[0];
+        const model = pickProbeModel(entry);
         if (!model) {
             out.push({
                 entry,
@@ -199,11 +223,42 @@ async function checkProviderLiveness(provider, model, adapter, opts) {
     const start = Date.now();
     // Liveness probes "is this provider reachable + authenticated?".
     // Tool-catalog validation is a separate concern (eval harness,
-    // v4.1.1 main). Sending tools: [] ensures one bad tool schema
-    // doesn't false-red every provider that validates strictly.
+    // v4.1.1 main).
+    //
+    // Phase v4.1.2-slice5: the probe used to send `messages: [user]`
+    // only, with `tools: []`. That body 400s against the Codex backend
+    // for two reasons:
+    //   1. No system message → empty `instructions` field in the wire
+    //      body. Codex rejects requests without `instructions` (same
+    //      root cause as the eval-runner fix in 6535d531).
+    //   2. Empty tools array → the codex adapter omits `tools`,
+    //      `tool_choice`, `parallel_tool_calls` from the wire body
+    //      entirely. The Codex backend treats this as malformed.
+    //
+    // Fix: add a minimal one-line system message (collapses into
+    // `instructions`) and one hand-crafted no-op tool. The probe tool
+    // is hardcoded with a conservative JSON Schema
+    // (`additionalProperties: false`) so strict validators accept it.
+    // The "one bad tool schema false-reds everyone" concern from the
+    // pre-slice5 comment applied to USER tools; this tool is internal.
     const input = {
-        messages: [{ role: 'user', content: 'ping' }],
-        tools: [],
+        messages: [
+            {
+                role: 'system',
+                content: 'You are an availability probe. Respond with a single word.',
+            },
+            { role: 'user', content: 'ping' },
+        ],
+        tools: [
+            {
+                name: 'probe_noop',
+                description: 'Probe placeholder. Do not call — the probe ignores any tool calls.',
+                inputSchema: {
+                    type: 'object',
+                    properties: {},
+                },
+            },
+        ],
         maxTokens: PROBE_MAX_TOKENS,
     };
     try {