npm - aiden-runtime - Versions diffs - 4.1.1 → 4.1.3 - Mend

aiden-runtime 4.1.1 → 4.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/README.md +78 -26
package/dist/cli/v4/aidenCLI.js +169 -9
package/dist/cli/v4/callbacks.js +20 -2
package/dist/cli/v4/chatSession.js +644 -16
package/dist/cli/v4/commands/auth.js +6 -3
package/dist/cli/v4/commands/doctor.js +23 -27
package/dist/cli/v4/commands/help.js +4 -0
package/dist/cli/v4/commands/index.js +10 -1
package/dist/cli/v4/commands/model.js +30 -1
package/dist/cli/v4/commands/reloadSoul.js +37 -0
package/dist/cli/v4/commands/update.js +102 -0
package/dist/cli/v4/defaultSoul.js +68 -2
package/dist/cli/v4/display/capabilityCard.js +135 -0
package/dist/cli/v4/display/sessionEndCard.js +127 -0
package/dist/cli/v4/display/toolTrail.js +172 -0
package/dist/cli/v4/display.js +492 -142
package/dist/cli/v4/doctor.js +472 -58
package/dist/cli/v4/doctorLiveness.js +65 -10
package/dist/cli/v4/promotionPrompt.js +332 -0
package/dist/cli/v4/providerBootSelector.js +144 -0
package/dist/cli/v4/replyRenderer.js +311 -20
package/dist/cli/v4/sessionSummaryGate.js +66 -0
package/dist/cli/v4/skinEngine.js +14 -3
package/dist/cli/v4/toolPreview.js +153 -0
package/dist/core/tools/nowPlaying.js +7 -15
package/dist/core/v4/aidenAgent.js +91 -29
package/dist/core/v4/capabilities.js +89 -0
package/dist/core/v4/contextCompressor.js +25 -8
package/dist/core/v4/distillationIndex.js +167 -0
package/dist/core/v4/distillationStore.js +98 -0
package/dist/core/v4/logger/logger.js +40 -9
package/dist/core/v4/promotionCandidates.js +234 -0
package/dist/core/v4/promptBuilder.js +145 -1
package/dist/core/v4/sessionDistiller.js +452 -0
package/dist/core/v4/skillMining/skillMiner.js +43 -6
package/dist/core/v4/skillOutcomeTracker.js +323 -0
package/dist/core/v4/subsystemHealth.js +143 -0
package/dist/core/v4/toolRegistry.js +16 -1
package/dist/core/v4/update/executeInstall.js +233 -0
package/dist/core/version.js +1 -1
package/dist/moat/memoryGuard.js +111 -0
package/dist/moat/plannerGuard.js +19 -0
package/dist/moat/skillTeacher.js +14 -5
package/dist/providers/v4/chatCompletionsAdapter.js +9 -0
package/dist/providers/v4/errors.js +112 -4
package/dist/providers/v4/modelDefaults.js +65 -0
package/dist/providers/v4/registry.js +9 -2
package/dist/providers/v4/runtimeResolver.js +6 -0
package/dist/tools/v4/index.js +80 -1
package/dist/tools/v4/memory/memoryRemove.js +57 -2
package/dist/tools/v4/memory/sessionSummary.js +151 -0
package/dist/tools/v4/sessions/recallSession.js +177 -0
package/dist/tools/v4/sessions/sessionSearch.js +5 -1
package/dist/tools/v4/system/_psHelpers.js +123 -0
package/dist/tools/v4/system/aidenSelfUpdate.js +162 -0
package/dist/tools/v4/system/appClose.js +79 -0
package/dist/tools/v4/system/appInput.js +154 -0
package/dist/tools/v4/system/appLaunch.js +218 -0
package/dist/tools/v4/system/clipboardRead.js +54 -0
package/dist/tools/v4/system/clipboardWrite.js +84 -0
package/dist/tools/v4/system/mediaKey.js +109 -0
package/dist/tools/v4/system/mediaSessions.js +163 -0
package/dist/tools/v4/system/mediaTransport.js +211 -0
package/dist/tools/v4/system/osProcessList.js +99 -0
package/dist/tools/v4/system/screenshot.js +106 -0
package/dist/tools/v4/system/volumeSet.js +157 -0
package/package.json +4 -1
package/skills/system_control.md +185 -69

package/README.md CHANGED Viewed

@@ -13,7 +13,7 @@
 Autonomous AI Engine
-72 skills · 42 tools · 19 providers · 9 channels · AGPL-3.0
+74 skills · 53 tools · 19 providers · 9 channels · AGPL-3.0
 Windows · Linux · WSL · macOS (API Mode)
@@ -97,8 +97,8 @@ Local-first · Self-healing routing · Browser & terminal control · Persistent
 <p align="center">
   <img src="https://img.shields.io/badge/providers-19-f97316?style=for-the-badge" alt="19 providers" />
-  <img src="https://img.shields.io/badge/skills-68-43853d?style=for-the-badge" alt="68 skills" />
-  <img src="https://img.shields.io/badge/tools-42-blueviolet?style=for-the-badge" alt="42 tools" />
+  <img src="https://img.shields.io/badge/skills-74-43853d?style=for-the-badge" alt="74 skills" />
+  <img src="https://img.shields.io/badge/tools-53-blueviolet?style=for-the-badge" alt="53 tools" />
   <img src="https://img.shields.io/badge/channels-9-5865f2?style=for-the-badge" alt="9 channels" />
   <img src="https://img.shields.io/badge/offline-Ollama-22c55e?style=for-the-badge" alt="offline" />
   <img src="https://img.shields.io/badge/OAuth-Claude%20Pro%20%2B%20ChatGPT%20Plus-9333ea?style=for-the-badge" alt="OAuth subscriptions" />
@@ -115,7 +115,7 @@ Local-first · Self-healing routing · Browser & terminal control · Persistent
 ---
-> **v4.1.0 — Multi-channel autonomous AI engine**
+> **v4.1 — Multi-channel autonomous AI engine**
 > Telegram + MCP server + subagent fanout + voice CLI + skill mining. Hardened cron, structured markdown, cross-platform CI. See [changelog](#changelog) below.
 ---
@@ -139,14 +139,14 @@ Most AI agents answer questions. Aiden runs work end-to-end on your machine.
 - **Automates any browser** — 10 Playwright-driven tools (navigate, click, type, fill, scroll, extract, screenshot, get-url, close, captcha-check)
 - **Self-healing provider routing** — 6-slot fallback chain (`together → groq × 4`) advances slots in under a second on rate-limit
 - **OAuth subscription routing** — sign in with Claude Pro or ChatGPT Plus; queries route to your subscription quota, not pay-as-you-go
-- **Persistent memory** — `MEMORY.md`, `USER.md`, `SOUL.md`, plus semantic recall and a `LESSONS.md` failure log that grows every session
+- **Persistent memory** — `USER.md`, `SOUL.md`, `MEMORY.md` (durable facts + recent-session distillations), plus semantic recall over past sessions via the `recall_session` tool. Memory promotes itself: each session ends with a structured distillation, and durable facts graduate to a protected section that survives compression.
 - **Lives where you do** — identity files re-read every turn; edit `USER.md` mid-conversation and the change lands within one reply
 - **One command to start** — `npx aiden-runtime` installs, configures, and runs everything
 - **Honest failures** — every tool error names the tool, provider, retry count, fallback chain, error, and next step. No silent swallowing.
 - **Plugin extension** — drop a plugin into `<aiden-home>/plugins/` and call `ctx.commandRegistry.register()` to add slash commands without touching core
 - **Open source** — AGPL-3.0 core, Apache-2.0 skills. Read every line, modify anything, contribute back.
-Aiden is a local-first AI operating system. It runs entirely on your machine — no cloud account required, no telemetry, no data leaving your hardware unless you configure a cloud provider. It installs as a global npm package (`aiden-runtime`, ~16 MB) on Windows, Linux, WSL, and macOS — Node.js 18+ is the only prerequisite. Features: 68 bundled skills, 42 built-in tools across 11 categories, multi-layer memory architecture, self-healing provider routing across 19 providers, the ability to control your screen, browse the web, run code, send emails and messages, manage files, and hold a full conversation — offline via Ollama.
+Aiden is a local-first AI operating system. It runs entirely on your machine — no cloud account required, no telemetry, no data leaving your hardware unless you configure a cloud provider. It installs as a global npm package (`aiden-runtime`, ~16 MB) on Windows, Linux, WSL, and macOS — Node.js 18+ is the only prerequisite. Features: 74 bundled skills, 53 built-in tools across 11 categories, multi-layer memory architecture, self-healing provider routing across 19 providers, the ability to control your screen, browse the web, run code, send emails and messages, manage files, and hold a full conversation — offline via Ollama.
 ---
@@ -175,10 +175,10 @@ All platforms use the same npm-based install path. Node.js 18+ is the only prere
 | Platform | Install | Skills available |
 |---|---|---|
-| **Windows 10/11** | ✅ `npm install -g aiden-runtime` | All 68 (including Windows-only skills) |
-| **Linux** | ✅ `npm install -g aiden-runtime` | ~62 (Windows-only skills auto-skipped) |
-| **WSL 2** | ✅ `npm install -g aiden-runtime` | ~62 (Windows-only skills auto-skipped) |
-| **macOS** | ✅ `npm install -g aiden-runtime` | ~62 (Windows-only skills auto-skipped) |
+| **Windows 10/11** | ✅ `npm install -g aiden-runtime` | All 74 (including Windows-only skills) |
+| **Linux** | ✅ `npm install -g aiden-runtime` | ~68 (Windows-only skills auto-skipped) |
+| **WSL 2** | ✅ `npm install -g aiden-runtime` | ~68 (Windows-only skills auto-skipped) |
+| **macOS** | ✅ `npm install -g aiden-runtime` | ~68 (Windows-only skills auto-skipped) |
 Windows-only skills (clipboard history, Defender, OneNote, Outlook COM, registry, Task Scheduler, etc.) are tagged `platform: windows` and silently skipped on other platforms at load time.
@@ -311,7 +311,7 @@ Set `AIDEN_HEADLESS=true` to suppress the Electron GUI when running the packaged
 ---
-## Known limitations (v4.0.0)
+## Known limitations
 We're shipping honest. Things that work, things that don't:
@@ -339,6 +339,12 @@ We're shipping honest. Things that work, things that don't:
 **Landed in v4.1:**
 - Telegram channel adapter (DM polling + per-chat memory) — see [docs/channels/telegram.md](docs/channels/telegram.md)
+- DeepSeek V4 Pro provider with reasoning-token streaming and per-model defaults
+- `/update` slash command + `aiden_self_update` tool — registry probe, in-process installer, platform-specific permission-denied remediations
+- Structured session distillation — each session ends with a JSON summary that promotes durable facts into a protected `MEMORY.md` section
+- `recall_session` tool — semantic search over past distilled sessions
+- Eval harness — 18 honesty scenarios (10 easy + 8 hard) with `npm run eval -- --suite honesty`
+- Subsystem health registry surfacing component status in `/doctor`
 **Beta features:**
@@ -375,7 +381,7 @@ play me a popular hindi song
 what files did I download today
 ```
-Type `/` to browse all 28 commands with instant search. Skills register their own dynamic slash commands at load time.
+Type `/` to browse all 33 commands with instant search. Skills register their own dynamic slash commands at load time.
 ---
@@ -445,10 +451,10 @@ Multi-layer memory visualised — every conversation, task, and learned pattern
 | Category | What Aiden does |
 |---|---|
-| **Inference & providers** | 19 providers including Anthropic, OpenAI, Groq (4-slot fallback), Together, Gemini, NVIDIA NIM, OpenRouter, DeepSeek, Mistral, Z.ai, Kimi, MiniMax, Hugging Face, custom OpenAI-compatible endpoints, and **Ollama** for fully offline. OAuth subscription routing for Claude Pro and ChatGPT Plus. |
-| **42 built-in tools** | Web search & fetch, deep research, YouTube search, Playwright browser automation (10 tools), file ops (read, list, write, patch, delete, move, copy), process control (spawn, kill, list, log-read, wait), shell exec, code execution, system info, MCP bridge, memory add/replace/remove, session list/search, skill view/list/manage. |
-| **68 bundled skills** | Composable workflows each with a `SKILL.md` prompt, optional helper scripts, and tool requirements. Includes: GitHub PR/issue workflows, NSE / Upstox / Zerodha trading, Censys / Shodan / VirusTotal lookups, Windows Defender / Task Scheduler, Docker management, YouTube content tools, ASCII art, and more. |
-| **6-layer memory** | `MEMORY.md` (declarative facts), conversation/session/workspace memory, semantic search (BM25 + embeddings), learning memory (`LESSONS.md`), structured user profile. Dirty-bit invalidation rebuilds the prompt when files change mid-session. |
+| **Inference & providers** | 19 providers including Anthropic, OpenAI, Groq (4-slot fallback), Together, Gemini, NVIDIA NIM, OpenRouter, **DeepSeek V4 Pro** (reasoning-token streaming), Mistral, Z.ai, Kimi, MiniMax, Hugging Face, custom OpenAI-compatible endpoints, and **Ollama** for fully offline. OAuth subscription routing for Claude Pro and ChatGPT Plus. |
+| **53 built-in tools** | Web search & fetch, deep research, YouTube search, Playwright browser automation (10 tools), file ops (read, list, write, patch, delete, move, copy), process control (spawn, kill, list, log-read, wait), shell exec, code execution, system info, screenshot, clipboard, app launch/close, media keys, MCP bridge, memory add/replace/remove, session list/search/summary/recall, skill view/list/manage, and `aiden_self_update`. |
+| **74 bundled skills** | Composable workflows each with a `SKILL.md` prompt, optional helper scripts, and tool requirements. Includes: GitHub PR/issue workflows, NSE / Upstox / Zerodha trading, Censys / Shodan / VirusTotal lookups, Windows Defender / Task Scheduler, Docker management, YouTube content tools, ASCII art, and more. |
+| **Self-promoting memory** | `USER.md` + `SOUL.md` identity, plus a `MEMORY.md` split between durable facts (compression-protected) and recent-session distillations. Each session ends with a structured JSON summary that graduates durable facts into the protected section. Semantic recall over past sessions via the `recall_session` tool. Dirty-bit invalidation rebuilds the prompt when files change mid-session. |
 | **Voice** | Edge TTS / Windows SAPI text-to-speech, speech-to-text helpers. |
 | **Channel adapters** | Discord, Slack, Telegram, WhatsApp, Email (IMAP+SMTP), Webhook, Twilio SMS, iMessage (macOS), Signal — any channel triggers the same agent loop. |
 | **Computer use** | Screenshot capture, screen-state vision loop, browser automation. Mouse/keyboard automation partial. |
@@ -484,16 +490,16 @@ User input (any channel)
        │           │
        │           ▼
        │     ┌──────────────────┐
-       │     │  Tool dispatcher │──▶ 42 built-in tools
+       │     │  Tool dispatcher │──▶ 53 built-in tools
        │     └──────────────────┘    + skill-driven dynamic tools
        │
        ▼
   ┌─────────────────────────────────────┐
   │  Memory                             │
-  │  MEMORY.md · USER.md · SOUL.md      │
-  │  conversation · session · workspace │
-  │  semantic (BM25 + embeddings)       │
-  │  learning (LESSONS.md)              │
+  │  USER.md · SOUL.md · MEMORY.md      │
+  │  (durable facts · recent sessions)  │
+  │  semantic recall (recall_session)   │
+  │  end-of-session distillation        │
   └─────────────────────────────────────┘
        │
        ▼
@@ -577,7 +583,7 @@ Optional: set `AIDEN_API_KEY=your-secret` in `.env` to require Bearer-token auth
 | `npm start` | Start the API server (port 4200) |
 | `npm run build` | Rebuild after source changes |
-### In-chat slash commands (28 total)
+### In-chat slash commands (33 total)
 **Session**
 | Command | Description |
@@ -586,6 +592,9 @@ Optional: set `AIDEN_API_KEY=your-secret` in `.env` to require Bearer-token auth
 | `/compress` | Compress the conversation to free context |
 | `/save` | Save the current session |
 | `/title` | Set a title for the session |
+| `/history` | Browse past sessions |
+| `/show` | Show session metadata |
+| `/status` | Show current session status |
 **Configuration**
 | Command | Description |
@@ -611,11 +620,16 @@ Optional: set `AIDEN_API_KEY=your-secret` in `.env` to require Bearer-token auth
 | `/license` | Show / set Pro license |
 | `/plugins` | List, grant, suspend plugins |
 | `/reload-mcp` | Reconnect MCP servers |
+| `/reload-soul` | Reload SOUL.md / USER.md mid-session |
 | `/tools` | List registered tools |
 | `/skills` | List, view, install skills |
 | `/usage` | Token usage + cost summary |
 | `/yolo` | No-approval mode (use carefully) |
 | `/cron` | Schedule recurring tasks |
+| `/update` | Check for / install the latest `aiden-runtime` (`install` subcommand applies) |
+| `/setup` | Re-run the setup wizard from the REPL |
+| `/channel` | List / manage channel adapters (Discord, Slack, Telegram, …) |
+| `/voice` | Toggle voice output (Edge TTS / Windows SAPI) |
 | `/quit` | Exit the REPL |
 **Authentication**
@@ -644,8 +658,8 @@ Both the terminal CLI and the browser dashboard (`localhost:4200/ui`) expose the
 | Chat | ✅ inline prompt | ✅ chat panel |
 | Streaming responses | ✅ token-by-token | ✅ live SSE |
 | Markdown rendering | ✅ | ✅ |
-| Slash commands | ✅ all 28 | ✅ same commands |
-| `/` command dropdown | ✅ instant, 28 commands | ✅ |
+| Slash commands | ✅ all 33 | ✅ same commands |
+| `/` command dropdown | ✅ instant, 33 commands | ✅ |
 | Provider panel | `/providers` | ✅ Providers tab |
 | Memory panel | `/identity` + tool calls | ✅ Memory tab |
 | Skills panel | `/skills` | ✅ Skills tab |
@@ -668,7 +682,7 @@ Both the terminal CLI and the browser dashboard (`localhost:4200/ui`) expose the
 - **better-sqlite3** + **sql.js** — local persistence.
 - **croner** — cron scheduler.
 - **discord.js**, **@slack/web-api**, **whatsapp-web.js**, **twilio**, **nodemailer**, **imap-simple** — channel adapters.
-- **Vitest 4** — test runner; ~1,500 unit + integration tests.
+- **Vitest 4** — test runner; ~1,983 unit + integration tests.
 - **esbuild** — bundler for the npm package; **electron-builder** — optional desktop wrapper.
 - **Cloudflare Workers** — landing page + license server + install-script proxy.
@@ -694,6 +708,31 @@ aiden                  # CLI
 - Follow [Conventional Commits](https://www.conventionalcommits.org/).
 - Run `npm run typecheck` and `npm test` before opening a PR.
+### Evals — measuring behavior at scale
+Aiden ships an opt-in eval harness that runs scenario-based behavior checks
+against a real provider. Distinct from `npm test` (unit / integration) — evals
+are scenario-driven, make live LLM calls, and are *measurement* rather than
+release gates.
+```bash
+npm run eval                                  # default suite (honesty), default provider
+npm run eval:honesty                          # explicit suite
+npm run eval -- --scenario honesty/no-fabricated-file-contents
+npm run eval -- --provider groq --model llama-3.3-70b-versatile
+npm run eval -- --strict                      # exit 1 on any failure (for CI)
+```
+Results land in `evals/results/<timestamp>.json` (gitignored — local history).
+Eval failures are signal, not gates: the runner exits 0 unless `--strict`.
+Default provider: `chatgpt-plus / gpt-5.5`. Falls back to the test-provider
+chain (Groq / Together via env-var keys) when ChatGPT Plus isn't authed.
+Available suites: `honesty` (18 scenarios — 10 easy + 8 hard — covering
+fabricated content, fake "I found" claims, claimed actions without tool calls,
+unverified completions, write/read mismatches, and post-cutoff version claims).
 ---
 ## Community
@@ -759,7 +798,20 @@ Aiden is built and maintained by one person. If it saves you time, consider spon
 ## Changelog
-See [CHANGELOG.md](CHANGELOG.md) for the full history. **v4.0.0 highlights:**
+See [CHANGELOG.md](CHANGELOG.md) for the full history.
+**v4.1.2 highlights:**
+- 🧠 **Self-promoting memory** — sessions end with a structured JSON distillation; durable facts graduate into a compression-protected `MEMORY.md` section that survives `/compress`. Recent-session distillations are kept separately for `recall_session` semantic search.
+- 🔍 **`recall_session` tool** — query past sessions in natural language; returns ranked distillations with date + summary + relevant facts.
+- 🛰 **DeepSeek V4 Pro provider** — reasoning-token streaming, per-model defaults (`MODEL_DEFAULTS`), probe filtering for codex-only slugs.
+- ⬆ **`/update` slash command + `aiden_self_update` tool** — npm-registry probe with 6h boot cache, in-process `executeInstall` shared between both surfaces, platform-specific permission-denied remediations (Windows admin / sudo / user-local prefix). No silent self-escalation, no false claims of in-place upgrade.
+- 🧪 **Eval harness** — 18 honesty scenarios (10 easy + 8 hard); `npm run eval -- --suite honesty`, results land in `evals/results/<timestamp>.json`.
+- 🩺 **Subsystem health registry** — `/doctor` surfaces component status with a uniform OK / WARN / FAIL contract.
+- 🔒 **Memory-guard hardening** — section-aware `## Durable facts` protection, word-boundary regex anchors, entry-delimited storage, case-insensitive dedup with separator tolerance.
+- ✅ **~1,983 tests passing** — regression guards for every smoke-test bug found in earlier slices.
+**v4.0.0 highlights:**
 - 🧠 **Clean-room core rewrite** — every adapter, every prompt slot, every loop. 7 dual-attribution files rewritten under full Aiden copyright.
 - 🔌 **19 providers** including OAuth subscription routing for Claude Pro and ChatGPT Plus (subscription quota, not pay-as-you-go).

package/dist/cli/v4/aidenCLI.js CHANGED Viewed

@@ -108,6 +108,10 @@ const plannerGuard_1 = require("../../moat/plannerGuard");
 const honestyEnforcement_1 = require("../../moat/honestyEnforcement");
 const skillTeacher_1 = require("../../moat/skillTeacher");
 const skillMiner_1 = require("../../core/v4/skillMining/skillMiner");
+const subsystemHealth_1 = require("../../core/v4/subsystemHealth");
+const skillOutcomeTracker_1 = require("../../core/v4/skillOutcomeTracker");
+const providerBootSelector_1 = require("./providerBootSelector");
+const doctorLiveness_1 = require("./doctorLiveness");
 const uiBuild_2 = require("./uiBuild");
 const memoryGuard_1 = require("../../moat/memoryGuard");
 const ssrfProtection_1 = require("../../moat/ssrfProtection");
@@ -162,7 +166,11 @@ function coerceMode(raw, valid, fallback, label, warn) {
     warn(`Invalid ${label} '${raw}' — falling back to '${fallback}' (valid: ${valid.join(', ')})`);
     return fallback;
 }
-const VERSION = '4.0.0';
+// Post-v4.1.1 cleanup: read VERSION from the auto-generated source-of-
+// truth (scripts/inject-version.js writes it from package.json on every
+// prebuild hook). Previous hardcoded '4.0.0' string had been stale since
+// v4.0.1 and made `aiden --version` lie.
+const version_1 = require("../../core/version");
 // Phase 16c.2: env-source tracking lives in `cli/v4/envSources.ts` so
 // `commands/providers.ts` can import getEnvSource without circular deps.
 const envSources_1 = require("./envSources");
@@ -204,7 +212,7 @@ async function main(argv, opts = {}) {
     program
         .name('aiden')
         .description('Aiden — local-first AI agent')
-        .version(VERSION, '-v, --version')
+        .version(version_1.VERSION, '-v, --version')
         .option('--tui', 'Launch full-screen TUI renderer', false)
         .option('-c, --continue', 'Resume the most recent session')
         .option('-r, --resume <title>', 'Resume a session by id-prefix or partial title')
@@ -533,10 +541,48 @@ async function buildAgentRuntime(cliOpts, opts) {
         (0, envSources_1.loadAidenEnvFile)(paths.envFile);
         await config.load();
     }
-    const providerId = cliOpts.provider ??
-        config.getValue('model.provider', 'groq');
-    const modelId = cliOpts.model ??
-        config.getValue('model.modelId', 'llama-3.3-70b-versatile');
+    // Phase v4.1.2-bug1: boot model selection now consults the priority-
+    // list auto-picker (cli/v4/providerBootSelector.ts) instead of
+    // hardcoded `groq + llama-3.3-70b-versatile`. Users with chatgpt-plus
+    // OAuth (the post-v4.1.1 onboarding default) used to boot into Groq
+    // and hit a 400 on the first tool-bearing request — llama-3.3-70b's
+    // tool emission is rejected by Groq's first-party validator.
+    //
+    // Precedence (handled inside resolveBootProvider):
+    //   1. Both --provider + --model flags  → use them
+    //   2. One flag only                     → use it, resolve other
+    //   3. Persisted model-selection.json   → use it
+    //   4. Partial config                    → use it, resolve other
+    //   5. Auto-pick from priority list      → first authed provider
+    //   6. Nothing authed                    → hardcoded groq fallback
+    let providerId;
+    let modelId;
+    let bootSource;
+    try {
+        const selection = await (0, providerBootSelector_1.resolveBootProvider)({
+            cliProviderId: cliOpts.provider,
+            cliModelId: cliOpts.model,
+            cfgProviderId: config.getValue('model.provider'),
+            cfgModelId: config.getValue('model.modelId'),
+        }, () => (0, doctorLiveness_1.enumerateConfiguredProviders)({ paths, env: process.env }));
+        if (selection) {
+            providerId = selection.providerId;
+            modelId = selection.modelId;
+            bootSource = selection.source;
+        }
+        else {
+            // Case 6: nothing authed — preserve the prior hardcoded default
+            // so the legacy first-run path (manual API-key entry into .env)
+            // still works.
+            providerId = 'groq';
+            modelId = 'llama-3.3-70b-versatile';
+            bootSource = 'hardcoded-fallback';
+        }
+    }
+    catch (err) {
+        process.stderr.write(`aiden: ${err.message}\n`);
+        process.exit(1);
+    }
     // Resolve session continuation.
     const store = new sessionStore_1.SessionStore(paths.sessionsDb);
     const sessionManager = new sessionManager_1.SessionManager(store);
@@ -578,6 +624,15 @@ async function buildAgentRuntime(cliOpts, opts) {
             display.printError(`Could not resolve provider '${providerId}' / model '${modelId}': ${err.message}`, 'Run `aiden model` to pick a valid provider, or `aiden doctor`.');
             process.exit(1);
         }
+        // Phase v4.1.2-bug1: surface the auto-pick in the boot log when
+        // neither CLI flags nor persisted config specified the choice.
+        // Silent on explicit selections so power users don't see noise.
+        if (bootSource === 'auto-priority') {
+            display.dim(`[boot] ${providerId} · ${modelId}  (auto · first authed provider)`);
+        }
+        else if (bootSource === 'hardcoded-fallback') {
+            display.dim(`[boot] ${providerId} · ${modelId}  (no authed providers detected — using legacy default)`);
+        }
     }
     // Phase 16b.1: wrap chat_completions providers in a FallbackAdapter so
     // 429s on Groq slot 1 transparently retry Groq slot 2/3 and Together.
@@ -818,7 +873,35 @@ async function buildAgentRuntime(cliOpts, opts) {
             });
         },
     };
-    const skillTeacher = new skillTeacher_1.SkillTeacher(skillLoader, skillManageProxy, skillTeacherTier, undefined, (name) => toolRegistry.get(name));
+    // Phase v4.1.2-slice3: subsystem-health registry. AidenAgent owns
+    // the one instance (constructor-injected, not a singleton — so
+    // parallel tests don't cross-contaminate). Per-subsystem trackers
+    // hang off the registry and are passed into each subsystem's
+    // constructor so they can record success/failure as it happens.
+    // `aiden doctor` reads `agent.subsystemHealthRegistry.snapshot()`.
+    const subsystemHealthRegistry = (0, subsystemHealth_1.createSubsystemHealthRegistry)();
+    const skillTeacherHealth = new subsystemHealth_1.SubsystemHealthTracker('skill-teacher');
+    const skillMinerHealth = new subsystemHealth_1.SubsystemHealthTracker('skill-miner');
+    // Phase v4.1.2-slice4: outcome tracker — observes tool-call lifecycle,
+    // attributes downstream successes/failures to skills loaded via
+    // skill_view. Persisted to <skillsDir>/.skill-outcomes.json (atomic
+    // write, lazy hydrate). Persist failures surface to doctor via a
+    // shared slice3 SubsystemHealthTracker.
+    const skillOutcomeHealth = new subsystemHealth_1.SubsystemHealthTracker('skill-outcome-tracker');
+    const skillOutcomeTracker = new skillOutcomeTracker_1.SkillOutcomeTracker(node_path_1.default.join(paths.skillsDir, '.skill-outcomes.json'), skillOutcomeHealth);
+    subsystemHealthRegistry.register('skill-teacher', () => skillTeacherHealth.snapshot());
+    subsystemHealthRegistry.register('skill-miner', () => skillMinerHealth.snapshot());
+    subsystemHealthRegistry.register('skill-outcome-tracker', () => skillOutcomeHealth.snapshot());
+    // Phase v4.1.2-memory-D fold-in (memory-C Q3 open): recall-session
+    // health tracker. The tool itself (tools/v4/sessions/recallSession.ts)
+    // stays pure of registry knowledge for testability; the registry
+    // caller wires a tracker the tool can record into via ctx. Until
+    // the tool plumbs ctx → tracker (separate follow-up), the slot stays
+    // registered with a snapshot reader so doctor's expand-on-degradation
+    // path sees the subsystem exists even at zero observations.
+    const recallSessionHealth = new subsystemHealth_1.SubsystemHealthTracker('recall-session');
+    subsystemHealthRegistry.register('recall-session', () => recallSessionHealth.snapshot());
+    const skillTeacher = new skillTeacher_1.SkillTeacher(skillLoader, skillManageProxy, skillTeacherTier, undefined, (name) => toolRegistry.get(name), skillTeacherHealth);
     // ── Tool executor with full Phase 9 + 10 context ─────────────────────
     const toolExecutor = toolRegistry.buildExecutor({
         cwd: process.cwd(),
@@ -881,10 +964,26 @@ async function buildAgentRuntime(cliOpts, opts) {
     catch {
         skillsList = [];
     }
+    // Phase v4.1.2 alive-core: enumerate which toolset tags are loaded
+    // so PromptBuilder can inject tool-conditional guidance. Pure
+    // string-set; no ToolRegistry reference threaded through the builder.
+    const toolsetsLoaded = new Set();
+    for (const name of toolRegistry.list()) {
+        const ts = toolRegistry.get(name)?.toolset;
+        if (ts)
+            toolsetsLoaded.add(ts);
+    }
     const promptBuilderOptions = {
         paths,
         memorySnapshot,
         skillsList,
+        toolsetsLoaded,
+        // Phase v4.1.2-followup self-awareness: feed the runtime slot.
+        // toolCount comes from the same registry we just walked to build
+        // toolsetsLoaded; providerId joins modelId so both halves of the
+        // active route are in the prompt.
+        toolCount: toolRegistry.list().length,
+        providerId,
         personalityOverlay: activeOverlay,
         modelId,
     };
@@ -895,7 +994,14 @@ async function buildAgentRuntime(cliOpts, opts) {
     // mutate skill state from inside JSON-RPC handling).
     const skillMiner = (0, uiBuild_2.isMcpServeMode)()
         ? undefined
-        : new skillMiner_1.SkillMiner({ auxiliaryClient });
+        : new skillMiner_1.SkillMiner({ auxiliaryClient, healthTracker: skillMinerHealth });
+    // Phase v4.1.2-slice3: the structured CoreLogger isn't yet plumbed
+    // through buildAgentRuntime — it's created via factory at boot but
+    // not passed in here. We leave its sink-health surface available
+    // via `CoreLogger.getSinkHealth()` for any caller that holds the
+    // instance, and the registry stays empty for the logger slot until
+    // the structured-logger wiring catches up. The registry mechanism
+    // itself is exercised end-to-end by skill-teacher and skill-miner.
     // ── Build agent with all moat layers attached ────────────────────────
     const agent = new aidenAgent_1.AidenAgent({
         provider: adapter,
@@ -907,6 +1013,8 @@ async function buildAgentRuntime(cliOpts, opts) {
         honestyEnforcement,
         skillTeacher,
         skillMiner,
+        subsystemHealthRegistry,
+        skillOutcomeTracker,
         onSkillCandidate: (candidate) => {
             try {
                 callbacks.onSkillCandidate?.(candidate);
@@ -916,7 +1024,18 @@ async function buildAgentRuntime(cliOpts, opts) {
         // Phase 23.5: tool event rows. CliCallbacks.onToolCall
         // emits a single line per call — `· tool <name> <args> [running]`
         // mutates to `[ok 220ms]` / `[fail 1.4s]` / `[blocked]` on resolve.
-        onToolCall: callbacks.onToolCall,
+        //
+        // Phase v4.1.2-slice4: compose (do NOT replace) so the
+        // SkillOutcomeTracker observes the same lifecycle the CLI display
+        // is rendering. Tracker hooks run first so attribution lands even
+        // if the display callback throws.
+        onToolCall: (call, phase, result) => {
+            try {
+                skillOutcomeTracker.onTool(call, phase, result);
+            }
+            catch { /* telemetry must not break the turn */ }
+            callbacks.onToolCall?.(call, phase, result);
+        },
         onCompression: callbacks.onCompression,
         onBudgetWarning: callbacks.onBudgetWarning,
         onPlannerGuardDecision: callbacks.onPlannerGuardDecision,
@@ -966,6 +1085,32 @@ async function buildAgentRuntime(cliOpts, opts) {
     memoryManager.onMutation((file) => {
         agent.markMemoryDirty(file === 'user' ? 'user' : 'memory');
     });
+    // Phase v4.1.2 alive-core: SOUL.md file watcher. Best-effort —
+    // some filesystems (network mounts, certain WSL configs) don't
+    // support fs.watch reliably. We try to attach; if it fails, the
+    // /reload-soul slash command stays as the manual fallback.
+    try {
+        const soulWatcher = (0, node_fs_1.watch)(paths.soulMd, { persistent: false }, (eventType) => {
+            if (eventType === 'change' || eventType === 'rename') {
+                agent.markMemoryDirty('soul');
+            }
+        });
+        soulWatcher.on('error', () => {
+            // Some FS backends emit errors mid-stream; degrade to manual
+            // fallback. The slash command still works.
+        });
+        // Phase 23.4b: leak-free shutdown — closed by the existing
+        // process-exit cleanup path. We don't unref since we *want* the
+        // watcher to keep the process alive only as long as the REPL does.
+        process.on('exit', () => { try {
+            soulWatcher.close();
+        }
+        catch { /* noop */ } });
+    }
+    catch (err) {
+        display.warn(`SOUL.md watcher could not attach (${err.message}). ` +
+            'Use `/reload-soul` to apply edits mid-session.');
+    }
     // ── Phase v4.1-subagent.1 — subagent_fanout wiring is below
     // (after `bootLogger` is declared and the gateway processor is set
     // up). Stub registered at boot is replaced there with the real
@@ -1304,6 +1449,12 @@ async function buildAgentRuntime(cliOpts, opts) {
         mcpClient,
         providerId,
         modelId,
+        // v4.1.3-prebump: forward the precedence-case label so the boot
+        // card can render a "where this choice came from" annotation.
+        // The case-3 (persisted-config) branch was confusing users who
+        // expected auto-pick to kick in — surfacing the source closes the
+        // information asymmetry.
+        bootSource,
         resumeSessionId,
         fallbackAdapter,
         personalityManager,
@@ -1330,6 +1481,10 @@ async function runInteractiveChat(cliOpts, opts) {
         config: runtime.config,
         initialProviderId: runtime.providerId,
         initialModelId: runtime.modelId,
+        // v4.1.3-prebump: pass through the precedence-case label so the
+        // boot card can render a dim source annotation under the version
+        // pill ("persisted from prior session" / "auto-picked" / …).
+        initialBootSource: runtime.bootSource,
         resumeSessionId: runtime.resumeSessionId,
         yoloMode: !!cliOpts.yolo,
         fallbackAdapter: runtime.fallbackAdapter,
@@ -1342,6 +1497,11 @@ async function runInteractiveChat(cliOpts, opts) {
         // Phase v4.1-1.1 — live ChannelManager so /channel commands can
         // list, add, remove, and inspect adapters without an external server.
         channelManager: runtime.channelManager,
+        // Phase v4.1.2 session-summary-followup: ChatSession.maybeAutoSummarize
+        // needs these to write MEMORY.md directly (bypassing the agent loop)
+        // when /quit fires the auto-summary path.
+        memoryManager: runtime.memoryManager,
+        memoryGuard: runtime.memoryGuard,
     };
     if (cliOpts.tui) {
         await (0, aidenTUI_1.runTuiMode)({

package/dist/cli/v4/callbacks.js CHANGED Viewed

@@ -102,6 +102,21 @@ class CliCallbacks {
             }
             if (err) {
                 handle.fail(ms);
+                // v4.1.3-essentials: when the tool's failure payload includes a
+                // structured capability card (auth missing, platform unsupported),
+                // render the card immediately after the fail row. The card sits
+                // on its own multi-line block — the fail row is still useful as
+                // the action timeline anchor; the card adds the state assessment
+                // the user actually needs. No card → plain failure surface.
+                if (result?.capabilityCard) {
+                    this.display.capabilityCard(result.capabilityCard);
+                }
+                return;
+            }
+            // v4.1.3-repl-polish: degraded outcome — tool completed but with a
+            // partial / best-effort result. Show in trail yellow instead of silent.
+            if (result?.degraded) {
+                handle.degraded(ms, result.degradedReason);
                 return;
             }
             handle.ok(ms);
@@ -238,8 +253,11 @@ Reply with ONE word: safe, caution, or dangerous.`;
          * by the chat loop right after `memory_add` returns `verified=true` —
          * this hook is the diagnostic counterpart for verbose mode.
          */
-        this.onMemoryRefresh = (which) => {
-            this.display.dim(`[memory] refreshed system prompt (${which})`);
+        this.onMemoryRefresh = (files) => {
+            // Phase v4.1.2: argument switched from single-string-or-'both' to
+            // the full sorted set of dirty files (SOUL.md joined the rotation).
+            const label = files.length > 0 ? files.join(', ') : 'none';
+            this.display.dim(`[memory] refreshed system prompt (${label})`);
         };
         this.display = opts.display;
         this.auxiliaryClient = opts.auxiliaryClient;