aiden-runtime 4.1.1 → 4.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +78 -26
  2. package/dist/cli/v4/aidenCLI.js +169 -9
  3. package/dist/cli/v4/callbacks.js +20 -2
  4. package/dist/cli/v4/chatSession.js +644 -16
  5. package/dist/cli/v4/commands/auth.js +6 -3
  6. package/dist/cli/v4/commands/doctor.js +23 -27
  7. package/dist/cli/v4/commands/help.js +4 -0
  8. package/dist/cli/v4/commands/index.js +10 -1
  9. package/dist/cli/v4/commands/model.js +30 -1
  10. package/dist/cli/v4/commands/reloadSoul.js +37 -0
  11. package/dist/cli/v4/commands/update.js +102 -0
  12. package/dist/cli/v4/defaultSoul.js +68 -2
  13. package/dist/cli/v4/display/capabilityCard.js +135 -0
  14. package/dist/cli/v4/display/sessionEndCard.js +127 -0
  15. package/dist/cli/v4/display/toolTrail.js +172 -0
  16. package/dist/cli/v4/display.js +492 -142
  17. package/dist/cli/v4/doctor.js +472 -58
  18. package/dist/cli/v4/doctorLiveness.js +65 -10
  19. package/dist/cli/v4/promotionPrompt.js +332 -0
  20. package/dist/cli/v4/providerBootSelector.js +144 -0
  21. package/dist/cli/v4/replyRenderer.js +311 -20
  22. package/dist/cli/v4/sessionSummaryGate.js +66 -0
  23. package/dist/cli/v4/skinEngine.js +14 -3
  24. package/dist/cli/v4/toolPreview.js +153 -0
  25. package/dist/core/tools/nowPlaying.js +7 -15
  26. package/dist/core/v4/aidenAgent.js +91 -29
  27. package/dist/core/v4/capabilities.js +89 -0
  28. package/dist/core/v4/contextCompressor.js +25 -8
  29. package/dist/core/v4/distillationIndex.js +167 -0
  30. package/dist/core/v4/distillationStore.js +98 -0
  31. package/dist/core/v4/logger/logger.js +40 -9
  32. package/dist/core/v4/promotionCandidates.js +234 -0
  33. package/dist/core/v4/promptBuilder.js +145 -1
  34. package/dist/core/v4/sessionDistiller.js +452 -0
  35. package/dist/core/v4/skillMining/skillMiner.js +43 -6
  36. package/dist/core/v4/skillOutcomeTracker.js +323 -0
  37. package/dist/core/v4/subsystemHealth.js +143 -0
  38. package/dist/core/v4/toolRegistry.js +16 -1
  39. package/dist/core/v4/update/executeInstall.js +233 -0
  40. package/dist/core/version.js +1 -1
  41. package/dist/moat/memoryGuard.js +111 -0
  42. package/dist/moat/plannerGuard.js +19 -0
  43. package/dist/moat/skillTeacher.js +14 -5
  44. package/dist/providers/v4/chatCompletionsAdapter.js +9 -0
  45. package/dist/providers/v4/errors.js +112 -4
  46. package/dist/providers/v4/modelDefaults.js +65 -0
  47. package/dist/providers/v4/registry.js +9 -2
  48. package/dist/providers/v4/runtimeResolver.js +6 -0
  49. package/dist/tools/v4/index.js +80 -1
  50. package/dist/tools/v4/memory/memoryRemove.js +57 -2
  51. package/dist/tools/v4/memory/sessionSummary.js +151 -0
  52. package/dist/tools/v4/sessions/recallSession.js +177 -0
  53. package/dist/tools/v4/sessions/sessionSearch.js +5 -1
  54. package/dist/tools/v4/system/_psHelpers.js +123 -0
  55. package/dist/tools/v4/system/aidenSelfUpdate.js +162 -0
  56. package/dist/tools/v4/system/appClose.js +79 -0
  57. package/dist/tools/v4/system/appInput.js +154 -0
  58. package/dist/tools/v4/system/appLaunch.js +218 -0
  59. package/dist/tools/v4/system/clipboardRead.js +54 -0
  60. package/dist/tools/v4/system/clipboardWrite.js +84 -0
  61. package/dist/tools/v4/system/mediaKey.js +109 -0
  62. package/dist/tools/v4/system/mediaSessions.js +163 -0
  63. package/dist/tools/v4/system/mediaTransport.js +211 -0
  64. package/dist/tools/v4/system/osProcessList.js +99 -0
  65. package/dist/tools/v4/system/screenshot.js +106 -0
  66. package/dist/tools/v4/system/volumeSet.js +157 -0
  67. package/package.json +4 -1
  68. package/skills/system_control.md +185 -69
package/README.md CHANGED
@@ -13,7 +13,7 @@
13
13
 
14
14
  Autonomous AI Engine
15
15
 
16
- 72 skills · 42 tools · 19 providers · 9 channels · AGPL-3.0
16
+ 74 skills · 53 tools · 19 providers · 9 channels · AGPL-3.0
17
17
 
18
18
  Windows · Linux · WSL · macOS (API Mode)
19
19
 
@@ -97,8 +97,8 @@ Local-first · Self-healing routing · Browser & terminal control · Persistent
97
97
 
98
98
  <p align="center">
99
99
  <img src="https://img.shields.io/badge/providers-19-f97316?style=for-the-badge" alt="19 providers" />
100
- <img src="https://img.shields.io/badge/skills-68-43853d?style=for-the-badge" alt="68 skills" />
101
- <img src="https://img.shields.io/badge/tools-42-blueviolet?style=for-the-badge" alt="42 tools" />
100
+ <img src="https://img.shields.io/badge/skills-74-43853d?style=for-the-badge" alt="74 skills" />
101
+ <img src="https://img.shields.io/badge/tools-53-blueviolet?style=for-the-badge" alt="53 tools" />
102
102
  <img src="https://img.shields.io/badge/channels-9-5865f2?style=for-the-badge" alt="9 channels" />
103
103
  <img src="https://img.shields.io/badge/offline-Ollama-22c55e?style=for-the-badge" alt="offline" />
104
104
  <img src="https://img.shields.io/badge/OAuth-Claude%20Pro%20%2B%20ChatGPT%20Plus-9333ea?style=for-the-badge" alt="OAuth subscriptions" />
@@ -115,7 +115,7 @@ Local-first · Self-healing routing · Browser & terminal control · Persistent
115
115
 
116
116
  ---
117
117
 
118
- > **v4.1.0 — Multi-channel autonomous AI engine**
118
+ > **v4.1 — Multi-channel autonomous AI engine**
119
119
  > Telegram + MCP server + subagent fanout + voice CLI + skill mining. Hardened cron, structured markdown, cross-platform CI. See [changelog](#changelog) below.
120
120
 
121
121
  ---
@@ -139,14 +139,14 @@ Most AI agents answer questions. Aiden runs work end-to-end on your machine.
139
139
  - **Automates any browser** — 10 Playwright-driven tools (navigate, click, type, fill, scroll, extract, screenshot, get-url, close, captcha-check)
140
140
  - **Self-healing provider routing** — 6-slot fallback chain (`together → groq × 4`) advances slots in under a second on rate-limit
141
141
  - **OAuth subscription routing** — sign in with Claude Pro or ChatGPT Plus; queries route to your subscription quota, not pay-as-you-go
142
- - **Persistent memory** — `MEMORY.md`, `USER.md`, `SOUL.md`, plus semantic recall and a `LESSONS.md` failure log that grows every session
142
+ - **Persistent memory** — `USER.md`, `SOUL.md`, `MEMORY.md` (durable facts + recent-session distillations), plus semantic recall over past sessions via the `recall_session` tool. Memory promotes itself: each session ends with a structured distillation, and durable facts graduate to a protected section that survives compression.
143
143
  - **Lives where you do** — identity files re-read every turn; edit `USER.md` mid-conversation and the change lands within one reply
144
144
  - **One command to start** — `npx aiden-runtime` installs, configures, and runs everything
145
145
  - **Honest failures** — every tool error names the tool, provider, retry count, fallback chain, error, and next step. No silent swallowing.
146
146
  - **Plugin extension** — drop a plugin into `<aiden-home>/plugins/` and call `ctx.commandRegistry.register()` to add slash commands without touching core
147
147
  - **Open source** — AGPL-3.0 core, Apache-2.0 skills. Read every line, modify anything, contribute back.
148
148
 
149
- Aiden is a local-first AI operating system. It runs entirely on your machine — no cloud account required, no telemetry, no data leaving your hardware unless you configure a cloud provider. It installs as a global npm package (`aiden-runtime`, ~16 MB) on Windows, Linux, WSL, and macOS — Node.js 18+ is the only prerequisite. Features: 68 bundled skills, 42 built-in tools across 11 categories, multi-layer memory architecture, self-healing provider routing across 19 providers, the ability to control your screen, browse the web, run code, send emails and messages, manage files, and hold a full conversation — offline via Ollama.
149
+ Aiden is a local-first AI operating system. It runs entirely on your machine — no cloud account required, no telemetry, no data leaving your hardware unless you configure a cloud provider. It installs as a global npm package (`aiden-runtime`, ~16 MB) on Windows, Linux, WSL, and macOS — Node.js 18+ is the only prerequisite. Features: 74 bundled skills, 53 built-in tools across 11 categories, multi-layer memory architecture, self-healing provider routing across 19 providers, the ability to control your screen, browse the web, run code, send emails and messages, manage files, and hold a full conversation — offline via Ollama.
150
150
 
151
151
  ---
152
152
 
@@ -175,10 +175,10 @@ All platforms use the same npm-based install path. Node.js 18+ is the only prere
175
175
 
176
176
  | Platform | Install | Skills available |
177
177
  |---|---|---|
178
- | **Windows 10/11** | ✅ `npm install -g aiden-runtime` | All 68 (including Windows-only skills) |
179
- | **Linux** | ✅ `npm install -g aiden-runtime` | ~62 (Windows-only skills auto-skipped) |
180
- | **WSL 2** | ✅ `npm install -g aiden-runtime` | ~62 (Windows-only skills auto-skipped) |
181
- | **macOS** | ✅ `npm install -g aiden-runtime` | ~62 (Windows-only skills auto-skipped) |
178
+ | **Windows 10/11** | ✅ `npm install -g aiden-runtime` | All 74 (including Windows-only skills) |
179
+ | **Linux** | ✅ `npm install -g aiden-runtime` | ~68 (Windows-only skills auto-skipped) |
180
+ | **WSL 2** | ✅ `npm install -g aiden-runtime` | ~68 (Windows-only skills auto-skipped) |
181
+ | **macOS** | ✅ `npm install -g aiden-runtime` | ~68 (Windows-only skills auto-skipped) |
182
182
 
183
183
  Windows-only skills (clipboard history, Defender, OneNote, Outlook COM, registry, Task Scheduler, etc.) are tagged `platform: windows` and silently skipped on other platforms at load time.
184
184
 
@@ -311,7 +311,7 @@ Set `AIDEN_HEADLESS=true` to suppress the Electron GUI when running the packaged
311
311
 
312
312
  ---
313
313
 
314
- ## Known limitations (v4.0.0)
314
+ ## Known limitations
315
315
 
316
316
  We're shipping honest. Things that work, things that don't:
317
317
 
@@ -339,6 +339,12 @@ We're shipping honest. Things that work, things that don't:
339
339
  **Landed in v4.1:**
340
340
 
341
341
  - Telegram channel adapter (DM polling + per-chat memory) — see [docs/channels/telegram.md](docs/channels/telegram.md)
342
+ - DeepSeek V4 Pro provider with reasoning-token streaming and per-model defaults
343
+ - `/update` slash command + `aiden_self_update` tool — registry probe, in-process installer, platform-specific permission-denied remediations
344
+ - Structured session distillation — each session ends with a JSON summary that promotes durable facts into a protected `MEMORY.md` section
345
+ - `recall_session` tool — semantic search over past distilled sessions
346
+ - Eval harness — 18 honesty scenarios (10 easy + 8 hard) with `npm run eval -- --suite honesty`
347
+ - Subsystem health registry surfacing component status in `/doctor`
342
348
 
343
349
  **Beta features:**
344
350
 
@@ -375,7 +381,7 @@ play me a popular hindi song
375
381
  what files did I download today
376
382
  ```
377
383
 
378
- Type `/` to browse all 28 commands with instant search. Skills register their own dynamic slash commands at load time.
384
+ Type `/` to browse all 33 commands with instant search. Skills register their own dynamic slash commands at load time.
379
385
 
380
386
  ---
381
387
 
@@ -445,10 +451,10 @@ Multi-layer memory visualised — every conversation, task, and learned pattern
445
451
 
446
452
  | Category | What Aiden does |
447
453
  |---|---|
448
- | **Inference & providers** | 19 providers including Anthropic, OpenAI, Groq (4-slot fallback), Together, Gemini, NVIDIA NIM, OpenRouter, DeepSeek, Mistral, Z.ai, Kimi, MiniMax, Hugging Face, custom OpenAI-compatible endpoints, and **Ollama** for fully offline. OAuth subscription routing for Claude Pro and ChatGPT Plus. |
449
- | **42 built-in tools** | Web search & fetch, deep research, YouTube search, Playwright browser automation (10 tools), file ops (read, list, write, patch, delete, move, copy), process control (spawn, kill, list, log-read, wait), shell exec, code execution, system info, MCP bridge, memory add/replace/remove, session list/search, skill view/list/manage. |
450
- | **68 bundled skills** | Composable workflows each with a `SKILL.md` prompt, optional helper scripts, and tool requirements. Includes: GitHub PR/issue workflows, NSE / Upstox / Zerodha trading, Censys / Shodan / VirusTotal lookups, Windows Defender / Task Scheduler, Docker management, YouTube content tools, ASCII art, and more. |
451
- | **6-layer memory** | `MEMORY.md` (declarative facts), conversation/session/workspace memory, semantic search (BM25 + embeddings), learning memory (`LESSONS.md`), structured user profile. Dirty-bit invalidation rebuilds the prompt when files change mid-session. |
454
+ | **Inference & providers** | 19 providers including Anthropic, OpenAI, Groq (4-slot fallback), Together, Gemini, NVIDIA NIM, OpenRouter, **DeepSeek V4 Pro** (reasoning-token streaming), Mistral, Z.ai, Kimi, MiniMax, Hugging Face, custom OpenAI-compatible endpoints, and **Ollama** for fully offline. OAuth subscription routing for Claude Pro and ChatGPT Plus. |
455
+ | **53 built-in tools** | Web search & fetch, deep research, YouTube search, Playwright browser automation (10 tools), file ops (read, list, write, patch, delete, move, copy), process control (spawn, kill, list, log-read, wait), shell exec, code execution, system info, screenshot, clipboard, app launch/close, media keys, MCP bridge, memory add/replace/remove, session list/search/summary/recall, skill view/list/manage, and `aiden_self_update`. |
456
+ | **74 bundled skills** | Composable workflows each with a `SKILL.md` prompt, optional helper scripts, and tool requirements. Includes: GitHub PR/issue workflows, NSE / Upstox / Zerodha trading, Censys / Shodan / VirusTotal lookups, Windows Defender / Task Scheduler, Docker management, YouTube content tools, ASCII art, and more. |
457
+ | **Self-promoting memory** | `USER.md` + `SOUL.md` identity, plus a `MEMORY.md` split between durable facts (compression-protected) and recent-session distillations. Each session ends with a structured JSON summary that graduates durable facts into the protected section. Semantic recall over past sessions via the `recall_session` tool. Dirty-bit invalidation rebuilds the prompt when files change mid-session. |
452
458
  | **Voice** | Edge TTS / Windows SAPI text-to-speech, speech-to-text helpers. |
453
459
  | **Channel adapters** | Discord, Slack, Telegram, WhatsApp, Email (IMAP+SMTP), Webhook, Twilio SMS, iMessage (macOS), Signal — any channel triggers the same agent loop. |
454
460
  | **Computer use** | Screenshot capture, screen-state vision loop, browser automation. Mouse/keyboard automation partial. |
@@ -484,16 +490,16 @@ User input (any channel)
484
490
  │ │
485
491
  │ ▼
486
492
  │ ┌──────────────────┐
487
- │ │ Tool dispatcher │──▶ 42 built-in tools
493
+ │ │ Tool dispatcher │──▶ 53 built-in tools
488
494
  │ └──────────────────┘ + skill-driven dynamic tools
489
495
 
490
496
 
491
497
  ┌─────────────────────────────────────┐
492
498
  │ Memory │
493
- MEMORY.md · USER.md · SOUL.md │
494
- conversation · session · workspace
495
- │ semantic (BM25 + embeddings)
496
- learning (LESSONS.md)
499
+ USER.md · SOUL.md · MEMORY.md │
500
+ (durable facts · recent sessions)
501
+ │ semantic recall (recall_session)
502
+ end-of-session distillation
497
503
  └─────────────────────────────────────┘
498
504
 
499
505
 
@@ -577,7 +583,7 @@ Optional: set `AIDEN_API_KEY=your-secret` in `.env` to require Bearer-token auth
577
583
  | `npm start` | Start the API server (port 4200) |
578
584
  | `npm run build` | Rebuild after source changes |
579
585
 
580
- ### In-chat slash commands (28 total)
586
+ ### In-chat slash commands (33 total)
581
587
 
582
588
  **Session**
583
589
  | Command | Description |
@@ -586,6 +592,9 @@ Optional: set `AIDEN_API_KEY=your-secret` in `.env` to require Bearer-token auth
586
592
  | `/compress` | Compress the conversation to free context |
587
593
  | `/save` | Save the current session |
588
594
  | `/title` | Set a title for the session |
595
+ | `/history` | Browse past sessions |
596
+ | `/show` | Show session metadata |
597
+ | `/status` | Show current session status |
589
598
 
590
599
  **Configuration**
591
600
  | Command | Description |
@@ -611,11 +620,16 @@ Optional: set `AIDEN_API_KEY=your-secret` in `.env` to require Bearer-token auth
611
620
  | `/license` | Show / set Pro license |
612
621
  | `/plugins` | List, grant, suspend plugins |
613
622
  | `/reload-mcp` | Reconnect MCP servers |
623
+ | `/reload-soul` | Reload SOUL.md / USER.md mid-session |
614
624
  | `/tools` | List registered tools |
615
625
  | `/skills` | List, view, install skills |
616
626
  | `/usage` | Token usage + cost summary |
617
627
  | `/yolo` | No-approval mode (use carefully) |
618
628
  | `/cron` | Schedule recurring tasks |
629
+ | `/update` | Check for / install the latest `aiden-runtime` (`install` subcommand applies) |
630
+ | `/setup` | Re-run the setup wizard from the REPL |
631
+ | `/channel` | List / manage channel adapters (Discord, Slack, Telegram, …) |
632
+ | `/voice` | Toggle voice output (Edge TTS / Windows SAPI) |
619
633
  | `/quit` | Exit the REPL |
620
634
 
621
635
  **Authentication**
@@ -644,8 +658,8 @@ Both the terminal CLI and the browser dashboard (`localhost:4200/ui`) expose the
644
658
  | Chat | ✅ inline prompt | ✅ chat panel |
645
659
  | Streaming responses | ✅ token-by-token | ✅ live SSE |
646
660
  | Markdown rendering | ✅ | ✅ |
647
- | Slash commands | ✅ all 28 | ✅ same commands |
648
- | `/` command dropdown | ✅ instant, 28 commands | ✅ |
661
+ | Slash commands | ✅ all 33 | ✅ same commands |
662
+ | `/` command dropdown | ✅ instant, 33 commands | ✅ |
649
663
  | Provider panel | `/providers` | ✅ Providers tab |
650
664
  | Memory panel | `/identity` + tool calls | ✅ Memory tab |
651
665
  | Skills panel | `/skills` | ✅ Skills tab |
@@ -668,7 +682,7 @@ Both the terminal CLI and the browser dashboard (`localhost:4200/ui`) expose the
668
682
  - **better-sqlite3** + **sql.js** — local persistence.
669
683
  - **croner** — cron scheduler.
670
684
  - **discord.js**, **@slack/web-api**, **whatsapp-web.js**, **twilio**, **nodemailer**, **imap-simple** — channel adapters.
671
- - **Vitest 4** — test runner; ~1,500 unit + integration tests.
685
+ - **Vitest 4** — test runner; ~1,983 unit + integration tests.
672
686
  - **esbuild** — bundler for the npm package; **electron-builder** — optional desktop wrapper.
673
687
  - **Cloudflare Workers** — landing page + license server + install-script proxy.
674
688
 
@@ -694,6 +708,31 @@ aiden # CLI
694
708
  - Follow [Conventional Commits](https://www.conventionalcommits.org/).
695
709
  - Run `npm run typecheck` and `npm test` before opening a PR.
696
710
 
711
+ ### Evals — measuring behavior at scale
712
+
713
+ Aiden ships an opt-in eval harness that runs scenario-based behavior checks
714
+ against a real provider. Distinct from `npm test` (unit / integration) — evals
715
+ are scenario-driven, make live LLM calls, and are *measurement* rather than
716
+ release gates.
717
+
718
+ ```bash
719
+ npm run eval # default suite (honesty), default provider
720
+ npm run eval:honesty # explicit suite
721
+ npm run eval -- --scenario honesty/no-fabricated-file-contents
722
+ npm run eval -- --provider groq --model llama-3.3-70b-versatile
723
+ npm run eval -- --strict # exit 1 on any failure (for CI)
724
+ ```
725
+
726
+ Results land in `evals/results/<timestamp>.json` (gitignored — local history).
727
+ Eval failures are signal, not gates: the runner exits 0 unless `--strict`.
728
+
729
+ Default provider: `chatgpt-plus / gpt-5.5`. Falls back to the test-provider
730
+ chain (Groq / Together via env-var keys) when ChatGPT Plus isn't authed.
731
+
732
+ Available suites: `honesty` (18 scenarios — 10 easy + 8 hard — covering
733
+ fabricated content, fake "I found" claims, claimed actions without tool calls,
734
+ unverified completions, write/read mismatches, and post-cutoff version claims).
735
+
697
736
  ---
698
737
 
699
738
  ## Community
@@ -759,7 +798,20 @@ Aiden is built and maintained by one person. If it saves you time, consider spon
759
798
 
760
799
  ## Changelog
761
800
 
762
- See [CHANGELOG.md](CHANGELOG.md) for the full history. **v4.0.0 highlights:**
801
+ See [CHANGELOG.md](CHANGELOG.md) for the full history.
802
+
803
+ **v4.1.2 highlights:**
804
+
805
+ - 🧠 **Self-promoting memory** — sessions end with a structured JSON distillation; durable facts graduate into a compression-protected `MEMORY.md` section that survives `/compress`. Recent-session distillations are kept separately for `recall_session` semantic search.
806
+ - 🔍 **`recall_session` tool** — query past sessions in natural language; returns ranked distillations with date + summary + relevant facts.
807
+ - 🛰 **DeepSeek V4 Pro provider** — reasoning-token streaming, per-model defaults (`MODEL_DEFAULTS`), probe filtering for codex-only slugs.
808
+ - ⬆ **`/update` slash command + `aiden_self_update` tool** — npm-registry probe with 6h boot cache, in-process `executeInstall` shared between both surfaces, platform-specific permission-denied remediations (Windows admin / sudo / user-local prefix). No silent self-escalation, no false claims of in-place upgrade.
809
+ - 🧪 **Eval harness** — 18 honesty scenarios (10 easy + 8 hard); `npm run eval -- --suite honesty`, results land in `evals/results/<timestamp>.json`.
810
+ - 🩺 **Subsystem health registry** — `/doctor` surfaces component status with a uniform OK / WARN / FAIL contract.
811
+ - 🔒 **Memory-guard hardening** — section-aware `## Durable facts` protection, word-boundary regex anchors, entry-delimited storage, case-insensitive dedup with separator tolerance.
812
+ - ✅ **~1,983 tests passing** — regression guards for every smoke-test bug found in earlier slices.
813
+
814
+ **v4.0.0 highlights:**
763
815
 
764
816
  - 🧠 **Clean-room core rewrite** — every adapter, every prompt slot, every loop. 7 dual-attribution files rewritten under full Aiden copyright.
765
817
  - 🔌 **19 providers** including OAuth subscription routing for Claude Pro and ChatGPT Plus (subscription quota, not pay-as-you-go).
@@ -108,6 +108,10 @@ const plannerGuard_1 = require("../../moat/plannerGuard");
108
108
  const honestyEnforcement_1 = require("../../moat/honestyEnforcement");
109
109
  const skillTeacher_1 = require("../../moat/skillTeacher");
110
110
  const skillMiner_1 = require("../../core/v4/skillMining/skillMiner");
111
+ const subsystemHealth_1 = require("../../core/v4/subsystemHealth");
112
+ const skillOutcomeTracker_1 = require("../../core/v4/skillOutcomeTracker");
113
+ const providerBootSelector_1 = require("./providerBootSelector");
114
+ const doctorLiveness_1 = require("./doctorLiveness");
111
115
  const uiBuild_2 = require("./uiBuild");
112
116
  const memoryGuard_1 = require("../../moat/memoryGuard");
113
117
  const ssrfProtection_1 = require("../../moat/ssrfProtection");
@@ -162,7 +166,11 @@ function coerceMode(raw, valid, fallback, label, warn) {
162
166
  warn(`Invalid ${label} '${raw}' — falling back to '${fallback}' (valid: ${valid.join(', ')})`);
163
167
  return fallback;
164
168
  }
165
- const VERSION = '4.0.0';
169
+ // Post-v4.1.1 cleanup: read VERSION from the auto-generated source-of-
170
+ // truth (scripts/inject-version.js writes it from package.json on every
171
+ // prebuild hook). Previous hardcoded '4.0.0' string had been stale since
172
+ // v4.0.1 and made `aiden --version` lie.
173
+ const version_1 = require("../../core/version");
166
174
  // Phase 16c.2: env-source tracking lives in `cli/v4/envSources.ts` so
167
175
  // `commands/providers.ts` can import getEnvSource without circular deps.
168
176
  const envSources_1 = require("./envSources");
@@ -204,7 +212,7 @@ async function main(argv, opts = {}) {
204
212
  program
205
213
  .name('aiden')
206
214
  .description('Aiden — local-first AI agent')
207
- .version(VERSION, '-v, --version')
215
+ .version(version_1.VERSION, '-v, --version')
208
216
  .option('--tui', 'Launch full-screen TUI renderer', false)
209
217
  .option('-c, --continue', 'Resume the most recent session')
210
218
  .option('-r, --resume <title>', 'Resume a session by id-prefix or partial title')
@@ -533,10 +541,48 @@ async function buildAgentRuntime(cliOpts, opts) {
533
541
  (0, envSources_1.loadAidenEnvFile)(paths.envFile);
534
542
  await config.load();
535
543
  }
536
- const providerId = cliOpts.provider ??
537
- config.getValue('model.provider', 'groq');
538
- const modelId = cliOpts.model ??
539
- config.getValue('model.modelId', 'llama-3.3-70b-versatile');
544
+ // Phase v4.1.2-bug1: boot model selection now consults the priority-
545
+ // list auto-picker (cli/v4/providerBootSelector.ts) instead of
546
+ // hardcoded `groq + llama-3.3-70b-versatile`. Users with chatgpt-plus
547
+ // OAuth (the post-v4.1.1 onboarding default) used to boot into Groq
548
+ // and hit a 400 on the first tool-bearing request — llama-3.3-70b's
549
+ // tool emission is rejected by Groq's first-party validator.
550
+ //
551
+ // Precedence (handled inside resolveBootProvider):
552
+ // 1. Both --provider + --model flags → use them
553
+ // 2. One flag only → use it, resolve other
554
+ // 3. Persisted model-selection.json → use it
555
+ // 4. Partial config → use it, resolve other
556
+ // 5. Auto-pick from priority list → first authed provider
557
+ // 6. Nothing authed → hardcoded groq fallback
558
+ let providerId;
559
+ let modelId;
560
+ let bootSource;
561
+ try {
562
+ const selection = await (0, providerBootSelector_1.resolveBootProvider)({
563
+ cliProviderId: cliOpts.provider,
564
+ cliModelId: cliOpts.model,
565
+ cfgProviderId: config.getValue('model.provider'),
566
+ cfgModelId: config.getValue('model.modelId'),
567
+ }, () => (0, doctorLiveness_1.enumerateConfiguredProviders)({ paths, env: process.env }));
568
+ if (selection) {
569
+ providerId = selection.providerId;
570
+ modelId = selection.modelId;
571
+ bootSource = selection.source;
572
+ }
573
+ else {
574
+ // Case 6: nothing authed — preserve the prior hardcoded default
575
+ // so the legacy first-run path (manual API-key entry into .env)
576
+ // still works.
577
+ providerId = 'groq';
578
+ modelId = 'llama-3.3-70b-versatile';
579
+ bootSource = 'hardcoded-fallback';
580
+ }
581
+ }
582
+ catch (err) {
583
+ process.stderr.write(`aiden: ${err.message}\n`);
584
+ process.exit(1);
585
+ }
540
586
  // Resolve session continuation.
541
587
  const store = new sessionStore_1.SessionStore(paths.sessionsDb);
542
588
  const sessionManager = new sessionManager_1.SessionManager(store);
@@ -578,6 +624,15 @@ async function buildAgentRuntime(cliOpts, opts) {
578
624
  display.printError(`Could not resolve provider '${providerId}' / model '${modelId}': ${err.message}`, 'Run `aiden model` to pick a valid provider, or `aiden doctor`.');
579
625
  process.exit(1);
580
626
  }
627
+ // Phase v4.1.2-bug1: surface the auto-pick in the boot log when
628
+ // neither CLI flags nor persisted config specified the choice.
629
+ // Silent on explicit selections so power users don't see noise.
630
+ if (bootSource === 'auto-priority') {
631
+ display.dim(`[boot] ${providerId} · ${modelId} (auto · first authed provider)`);
632
+ }
633
+ else if (bootSource === 'hardcoded-fallback') {
634
+ display.dim(`[boot] ${providerId} · ${modelId} (no authed providers detected — using legacy default)`);
635
+ }
581
636
  }
582
637
  // Phase 16b.1: wrap chat_completions providers in a FallbackAdapter so
583
638
  // 429s on Groq slot 1 transparently retry Groq slot 2/3 and Together.
@@ -818,7 +873,35 @@ async function buildAgentRuntime(cliOpts, opts) {
818
873
  });
819
874
  },
820
875
  };
821
- const skillTeacher = new skillTeacher_1.SkillTeacher(skillLoader, skillManageProxy, skillTeacherTier, undefined, (name) => toolRegistry.get(name));
876
+ // Phase v4.1.2-slice3: subsystem-health registry. AidenAgent owns
877
+ // the one instance (constructor-injected, not a singleton — so
878
+ // parallel tests don't cross-contaminate). Per-subsystem trackers
879
+ // hang off the registry and are passed into each subsystem's
880
+ // constructor so they can record success/failure as it happens.
881
+ // `aiden doctor` reads `agent.subsystemHealthRegistry.snapshot()`.
882
+ const subsystemHealthRegistry = (0, subsystemHealth_1.createSubsystemHealthRegistry)();
883
+ const skillTeacherHealth = new subsystemHealth_1.SubsystemHealthTracker('skill-teacher');
884
+ const skillMinerHealth = new subsystemHealth_1.SubsystemHealthTracker('skill-miner');
885
+ // Phase v4.1.2-slice4: outcome tracker — observes tool-call lifecycle,
886
+ // attributes downstream successes/failures to skills loaded via
887
+ // skill_view. Persisted to <skillsDir>/.skill-outcomes.json (atomic
888
+ // write, lazy hydrate). Persist failures surface to doctor via a
889
+ // shared slice3 SubsystemHealthTracker.
890
+ const skillOutcomeHealth = new subsystemHealth_1.SubsystemHealthTracker('skill-outcome-tracker');
891
+ const skillOutcomeTracker = new skillOutcomeTracker_1.SkillOutcomeTracker(node_path_1.default.join(paths.skillsDir, '.skill-outcomes.json'), skillOutcomeHealth);
892
+ subsystemHealthRegistry.register('skill-teacher', () => skillTeacherHealth.snapshot());
893
+ subsystemHealthRegistry.register('skill-miner', () => skillMinerHealth.snapshot());
894
+ subsystemHealthRegistry.register('skill-outcome-tracker', () => skillOutcomeHealth.snapshot());
895
+ // Phase v4.1.2-memory-D fold-in (memory-C Q3 open): recall-session
896
+ // health tracker. The tool itself (tools/v4/sessions/recallSession.ts)
897
+ // stays pure of registry knowledge for testability; the registry
898
+ // caller wires a tracker the tool can record into via ctx. Until
899
+ // the tool plumbs ctx → tracker (separate follow-up), the slot stays
900
+ // registered with a snapshot reader so doctor's expand-on-degradation
901
+ // path sees the subsystem exists even at zero observations.
902
+ const recallSessionHealth = new subsystemHealth_1.SubsystemHealthTracker('recall-session');
903
+ subsystemHealthRegistry.register('recall-session', () => recallSessionHealth.snapshot());
904
+ const skillTeacher = new skillTeacher_1.SkillTeacher(skillLoader, skillManageProxy, skillTeacherTier, undefined, (name) => toolRegistry.get(name), skillTeacherHealth);
822
905
  // ── Tool executor with full Phase 9 + 10 context ─────────────────────
823
906
  const toolExecutor = toolRegistry.buildExecutor({
824
907
  cwd: process.cwd(),
@@ -881,10 +964,26 @@ async function buildAgentRuntime(cliOpts, opts) {
881
964
  catch {
882
965
  skillsList = [];
883
966
  }
967
+ // Phase v4.1.2 alive-core: enumerate which toolset tags are loaded
968
+ // so PromptBuilder can inject tool-conditional guidance. Pure
969
+ // string-set; no ToolRegistry reference threaded through the builder.
970
+ const toolsetsLoaded = new Set();
971
+ for (const name of toolRegistry.list()) {
972
+ const ts = toolRegistry.get(name)?.toolset;
973
+ if (ts)
974
+ toolsetsLoaded.add(ts);
975
+ }
884
976
  const promptBuilderOptions = {
885
977
  paths,
886
978
  memorySnapshot,
887
979
  skillsList,
980
+ toolsetsLoaded,
981
+ // Phase v4.1.2-followup self-awareness: feed the runtime slot.
982
+ // toolCount comes from the same registry we just walked to build
983
+ // toolsetsLoaded; providerId joins modelId so both halves of the
984
+ // active route are in the prompt.
985
+ toolCount: toolRegistry.list().length,
986
+ providerId,
888
987
  personalityOverlay: activeOverlay,
889
988
  modelId,
890
989
  };
@@ -895,7 +994,14 @@ async function buildAgentRuntime(cliOpts, opts) {
895
994
  // mutate skill state from inside JSON-RPC handling).
896
995
  const skillMiner = (0, uiBuild_2.isMcpServeMode)()
897
996
  ? undefined
898
- : new skillMiner_1.SkillMiner({ auxiliaryClient });
997
+ : new skillMiner_1.SkillMiner({ auxiliaryClient, healthTracker: skillMinerHealth });
998
+ // Phase v4.1.2-slice3: the structured CoreLogger isn't yet plumbed
999
+ // through buildAgentRuntime — it's created via factory at boot but
1000
+ // not passed in here. We leave its sink-health surface available
1001
+ // via `CoreLogger.getSinkHealth()` for any caller that holds the
1002
+ // instance, and the registry stays empty for the logger slot until
1003
+ // the structured-logger wiring catches up. The registry mechanism
1004
+ // itself is exercised end-to-end by skill-teacher and skill-miner.
899
1005
  // ── Build agent with all moat layers attached ────────────────────────
900
1006
  const agent = new aidenAgent_1.AidenAgent({
901
1007
  provider: adapter,
@@ -907,6 +1013,8 @@ async function buildAgentRuntime(cliOpts, opts) {
907
1013
  honestyEnforcement,
908
1014
  skillTeacher,
909
1015
  skillMiner,
1016
+ subsystemHealthRegistry,
1017
+ skillOutcomeTracker,
910
1018
  onSkillCandidate: (candidate) => {
911
1019
  try {
912
1020
  callbacks.onSkillCandidate?.(candidate);
@@ -916,7 +1024,18 @@ async function buildAgentRuntime(cliOpts, opts) {
916
1024
  // Phase 23.5: tool event rows. CliCallbacks.onToolCall
917
1025
  // emits a single line per call — `· tool <name> <args> [running]`
918
1026
  // mutates to `[ok 220ms]` / `[fail 1.4s]` / `[blocked]` on resolve.
919
- onToolCall: callbacks.onToolCall,
1027
+ //
1028
+ // Phase v4.1.2-slice4: compose (do NOT replace) so the
1029
+ // SkillOutcomeTracker observes the same lifecycle the CLI display
1030
+ // is rendering. Tracker hooks run first so attribution lands even
1031
+ // if the display callback throws.
1032
+ onToolCall: (call, phase, result) => {
1033
+ try {
1034
+ skillOutcomeTracker.onTool(call, phase, result);
1035
+ }
1036
+ catch { /* telemetry must not break the turn */ }
1037
+ callbacks.onToolCall?.(call, phase, result);
1038
+ },
920
1039
  onCompression: callbacks.onCompression,
921
1040
  onBudgetWarning: callbacks.onBudgetWarning,
922
1041
  onPlannerGuardDecision: callbacks.onPlannerGuardDecision,
@@ -966,6 +1085,32 @@ async function buildAgentRuntime(cliOpts, opts) {
966
1085
  memoryManager.onMutation((file) => {
967
1086
  agent.markMemoryDirty(file === 'user' ? 'user' : 'memory');
968
1087
  });
1088
+ // Phase v4.1.2 alive-core: SOUL.md file watcher. Best-effort —
1089
+ // some filesystems (network mounts, certain WSL configs) don't
1090
+ // support fs.watch reliably. We try to attach; if it fails, the
1091
+ // /reload-soul slash command stays as the manual fallback.
1092
+ try {
1093
+ const soulWatcher = (0, node_fs_1.watch)(paths.soulMd, { persistent: false }, (eventType) => {
1094
+ if (eventType === 'change' || eventType === 'rename') {
1095
+ agent.markMemoryDirty('soul');
1096
+ }
1097
+ });
1098
+ soulWatcher.on('error', () => {
1099
+ // Some FS backends emit errors mid-stream; degrade to manual
1100
+ // fallback. The slash command still works.
1101
+ });
1102
+ // Phase 23.4b: leak-free shutdown — closed by the existing
1103
+ // process-exit cleanup path. We don't unref since we *want* the
1104
+ // watcher to keep the process alive only as long as the REPL does.
1105
+ process.on('exit', () => { try {
1106
+ soulWatcher.close();
1107
+ }
1108
+ catch { /* noop */ } });
1109
+ }
1110
+ catch (err) {
1111
+ display.warn(`SOUL.md watcher could not attach (${err.message}). ` +
1112
+ 'Use `/reload-soul` to apply edits mid-session.');
1113
+ }
969
1114
  // ── Phase v4.1-subagent.1 — subagent_fanout wiring is below
970
1115
  // (after `bootLogger` is declared and the gateway processor is set
971
1116
  // up). Stub registered at boot is replaced there with the real
@@ -1304,6 +1449,12 @@ async function buildAgentRuntime(cliOpts, opts) {
1304
1449
  mcpClient,
1305
1450
  providerId,
1306
1451
  modelId,
1452
+ // v4.1.3-prebump: forward the precedence-case label so the boot
1453
+ // card can render a "where this choice came from" annotation.
1454
+ // The case-3 (persisted-config) branch was confusing users who
1455
+ // expected auto-pick to kick in — surfacing the source closes the
1456
+ // information asymmetry.
1457
+ bootSource,
1307
1458
  resumeSessionId,
1308
1459
  fallbackAdapter,
1309
1460
  personalityManager,
@@ -1330,6 +1481,10 @@ async function runInteractiveChat(cliOpts, opts) {
1330
1481
  config: runtime.config,
1331
1482
  initialProviderId: runtime.providerId,
1332
1483
  initialModelId: runtime.modelId,
1484
+ // v4.1.3-prebump: pass through the precedence-case label so the
1485
+ // boot card can render a dim source annotation under the version
1486
+ // pill ("persisted from prior session" / "auto-picked" / …).
1487
+ initialBootSource: runtime.bootSource,
1333
1488
  resumeSessionId: runtime.resumeSessionId,
1334
1489
  yoloMode: !!cliOpts.yolo,
1335
1490
  fallbackAdapter: runtime.fallbackAdapter,
@@ -1342,6 +1497,11 @@ async function runInteractiveChat(cliOpts, opts) {
1342
1497
  // Phase v4.1-1.1 — live ChannelManager so /channel commands can
1343
1498
  // list, add, remove, and inspect adapters without an external server.
1344
1499
  channelManager: runtime.channelManager,
1500
+ // Phase v4.1.2 session-summary-followup: ChatSession.maybeAutoSummarize
1501
+ // needs these to write MEMORY.md directly (bypassing the agent loop)
1502
+ // when /quit fires the auto-summary path.
1503
+ memoryManager: runtime.memoryManager,
1504
+ memoryGuard: runtime.memoryGuard,
1345
1505
  };
1346
1506
  if (cliOpts.tui) {
1347
1507
  await (0, aidenTUI_1.runTuiMode)({
@@ -102,6 +102,21 @@ class CliCallbacks {
102
102
  }
103
103
  if (err) {
104
104
  handle.fail(ms);
105
+ // v4.1.3-essentials: when the tool's failure payload includes a
106
+ // structured capability card (auth missing, platform unsupported),
107
+ // render the card immediately after the fail row. The card sits
108
+ // on its own multi-line block — the fail row is still useful as
109
+ // the action timeline anchor; the card adds the state assessment
110
+ // the user actually needs. No card → plain failure surface.
111
+ if (result?.capabilityCard) {
112
+ this.display.capabilityCard(result.capabilityCard);
113
+ }
114
+ return;
115
+ }
116
+ // v4.1.3-repl-polish: degraded outcome — tool completed but with a
117
+ // partial / best-effort result. Show in trail yellow instead of silent.
118
+ if (result?.degraded) {
119
+ handle.degraded(ms, result.degradedReason);
105
120
  return;
106
121
  }
107
122
  handle.ok(ms);
@@ -238,8 +253,11 @@ Reply with ONE word: safe, caution, or dangerous.`;
238
253
  * by the chat loop right after `memory_add` returns `verified=true` —
239
254
  * this hook is the diagnostic counterpart for verbose mode.
240
255
  */
241
- this.onMemoryRefresh = (which) => {
242
- this.display.dim(`[memory] refreshed system prompt (${which})`);
256
+ this.onMemoryRefresh = (files) => {
257
+ // Phase v4.1.2: argument switched from single-string-or-'both' to
258
+ // the full sorted set of dirty files (SOUL.md joined the rotation).
259
+ const label = files.length > 0 ? files.join(', ') : 'none';
260
+ this.display.dim(`[memory] refreshed system prompt (${label})`);
243
261
  };
244
262
  this.display = opts.display;
245
263
  this.auxiliaryClient = opts.auxiliaryClient;