aiden-runtime 4.1.0 → 4.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +89 -33
  2. package/dist/cli/v4/aidenCLI.js +162 -11
  3. package/dist/cli/v4/callbacks.js +5 -2
  4. package/dist/cli/v4/chatSession.js +525 -15
  5. package/dist/cli/v4/commands/auth.js +6 -3
  6. package/dist/cli/v4/commands/help.js +4 -0
  7. package/dist/cli/v4/commands/index.js +10 -1
  8. package/dist/cli/v4/commands/reloadSoul.js +37 -0
  9. package/dist/cli/v4/commands/update.js +102 -0
  10. package/dist/cli/v4/defaultSoul.js +68 -2
  11. package/dist/cli/v4/display.js +28 -10
  12. package/dist/cli/v4/doctor.js +173 -1
  13. package/dist/cli/v4/doctorLiveness.js +384 -0
  14. package/dist/cli/v4/promotionPrompt.js +202 -0
  15. package/dist/cli/v4/providerBootSelector.js +144 -0
  16. package/dist/cli/v4/sessionSummaryGate.js +66 -0
  17. package/dist/cli/v4/toolPreview.js +139 -0
  18. package/dist/core/v4/aidenAgent.js +91 -29
  19. package/dist/core/v4/capabilities.js +89 -0
  20. package/dist/core/v4/contextCompressor.js +25 -8
  21. package/dist/core/v4/distillationIndex.js +167 -0
  22. package/dist/core/v4/distillationStore.js +98 -0
  23. package/dist/core/v4/logger/logger.js +40 -9
  24. package/dist/core/v4/promotionCandidates.js +234 -0
  25. package/dist/core/v4/promptBuilder.js +145 -1
  26. package/dist/core/v4/sessionDistiller.js +405 -0
  27. package/dist/core/v4/skillMining/extractorPrompt.js +28 -21
  28. package/dist/core/v4/skillMining/proposalBuilder.js +3 -2
  29. package/dist/core/v4/skillMining/skillMiner.js +43 -6
  30. package/dist/core/v4/skillOutcomeTracker.js +323 -0
  31. package/dist/core/v4/subsystemHealth.js +143 -0
  32. package/dist/core/v4/update/executeInstall.js +233 -0
  33. package/dist/core/version.js +1 -1
  34. package/dist/moat/dangerousPatterns.js +1 -1
  35. package/dist/moat/memoryGuard.js +111 -0
  36. package/dist/moat/skillTeacher.js +14 -5
  37. package/dist/providers/v4/chatCompletionsAdapter.js +9 -0
  38. package/dist/providers/v4/codexResponsesAdapter.js +7 -2
  39. package/dist/providers/v4/errors.js +67 -1
  40. package/dist/providers/v4/modelDefaults.js +65 -0
  41. package/dist/providers/v4/ollamaPromptToolsAdapter.js +9 -2
  42. package/dist/providers/v4/registry.js +9 -2
  43. package/dist/providers/v4/runtimeResolver.js +6 -0
  44. package/dist/tools/v4/index.js +57 -1
  45. package/dist/tools/v4/memory/memoryRemove.js +57 -2
  46. package/dist/tools/v4/memory/sessionSummary.js +151 -0
  47. package/dist/tools/v4/sessions/recallSession.js +163 -0
  48. package/dist/tools/v4/sessions/sessionSearch.js +5 -1
  49. package/dist/tools/v4/subagent/subagentFanout.js +24 -0
  50. package/dist/tools/v4/system/_psHelpers.js +55 -0
  51. package/dist/tools/v4/system/aidenSelfUpdate.js +162 -0
  52. package/dist/tools/v4/system/appClose.js +79 -0
  53. package/dist/tools/v4/system/appLaunch.js +92 -0
  54. package/dist/tools/v4/system/clipboardRead.js +54 -0
  55. package/dist/tools/v4/system/clipboardWrite.js +84 -0
  56. package/dist/tools/v4/system/mediaKey.js +78 -0
  57. package/dist/tools/v4/system/osProcessList.js +99 -0
  58. package/dist/tools/v4/system/screenshot.js +106 -0
  59. package/dist/tools/v4/system/volumeSet.js +157 -0
  60. package/package.json +4 -1
  61. package/skills/system_control.md +135 -69
package/README.md CHANGED
@@ -1,3 +1,8 @@
1
+ <img width="1672" height="941" alt="AIDEN BOOTUP LOGO" src="https://github.com/user-attachments/assets/c0809009-73e2-4d58-9292-12fbd0324952" />
2
+
3
+
4
+
5
+
1
6
  ```
2
7
  █████╗ ██╗██████╗ ███████╗███╗ ██╗
3
8
  ██╔══██╗██║██╔══██╗██╔════╝████╗ ██║
@@ -8,7 +13,7 @@
8
13
 
9
14
  Autonomous AI Engine
10
15
 
11
- 19 providers · 68 skills · 42 tools · 9 channels · AGPL-3.0
16
+ 74 skills · 53 tools · 19 providers · 9 channels · AGPL-3.0
12
17
 
13
18
  Windows · Linux · WSL · macOS (API Mode)
14
19
 
@@ -16,12 +21,13 @@ Local-first · Self-healing routing · Browser & terminal control · Persistent
16
21
  ```
17
22
 
18
23
  <p align="center">
19
- <a href="https://github.com/taracodlabs/aiden-releases/releases/latest"><img src="https://img.shields.io/github/v/release/taracodlabs/aiden-releases?color=f97316&label=version&style=for-the-badge" alt="Latest version" /></a>
20
- <a href="https://github.com/taracodlabs/aiden-releases/releases"><img src="https://img.shields.io/github/downloads/taracodlabs/aiden-releases/total?color=f97316&label=downloads&style=for-the-badge" alt="Downloads" /></a>
24
+ <a href="https://github.com/taracodlabs/aiden/releases/latest"><img src="https://img.shields.io/github/v/release/taracodlabs/aiden?color=f97316&label=version&style=for-the-badge" alt="Latest version" /></a>
25
+ <a href="https://github.com/taracodlabs/aiden/releases"><img src="https://img.shields.io/github/downloads/taracodlabs/aiden/total?color=f97316&label=downloads&style=for-the-badge" alt="Downloads" /></a>
21
26
  <a href="https://discord.gg/gMZ3hUnQTm"><img src="https://img.shields.io/badge/chat-discord-7289da?logo=discord&logoColor=white&style=for-the-badge" alt="Discord" /></a>
22
27
  <a href="./LICENSE"><img src="https://img.shields.io/badge/license-AGPL--3.0-orange?style=for-the-badge" alt="License: AGPL-3.0" /></a>
23
28
  <a href="https://github.com/taracodlabs/aiden/stargazers"><img src="https://img.shields.io/github/stars/taracodlabs/aiden?style=for-the-badge&color=f9d71c" alt="Stars" /></a>
24
29
  <a href="https://www.npmjs.com/package/aiden-runtime"><img src="https://img.shields.io/npm/v/aiden-runtime?color=f97316&label=npm&style=for-the-badge" alt="npm" /></a>
30
+ <a href="https://amzn.to/4tpiXwM"><img src="https://img.shields.io/badge/book-Omega-ff9900?logo=amazon&logoColor=white&style=for-the-badge" alt="Book: Omega" /></a>
25
31
  </p>
26
32
 
27
33
  <p align="center">
@@ -91,8 +97,8 @@ Local-first · Self-healing routing · Browser & terminal control · Persistent
91
97
 
92
98
  <p align="center">
93
99
  <img src="https://img.shields.io/badge/providers-19-f97316?style=for-the-badge" alt="19 providers" />
94
- <img src="https://img.shields.io/badge/skills-68-43853d?style=for-the-badge" alt="68 skills" />
95
- <img src="https://img.shields.io/badge/tools-42-blueviolet?style=for-the-badge" alt="42 tools" />
100
+ <img src="https://img.shields.io/badge/skills-74-43853d?style=for-the-badge" alt="74 skills" />
101
+ <img src="https://img.shields.io/badge/tools-53-blueviolet?style=for-the-badge" alt="53 tools" />
96
102
  <img src="https://img.shields.io/badge/channels-9-5865f2?style=for-the-badge" alt="9 channels" />
97
103
  <img src="https://img.shields.io/badge/offline-Ollama-22c55e?style=for-the-badge" alt="offline" />
98
104
  <img src="https://img.shields.io/badge/OAuth-Claude%20Pro%20%2B%20ChatGPT%20Plus-9333ea?style=for-the-badge" alt="OAuth subscriptions" />
@@ -103,14 +109,14 @@ Local-first · Self-healing routing · Browser & terminal control · Persistent
103
109
  <a href="https://aiden.taracod.com"><b>Website</b></a> &nbsp;·&nbsp;
104
110
  <a href="https://aiden.taracod.com/contact"><b>Contact</b></a> &nbsp;·&nbsp;
105
111
  <a href="https://discord.gg/gMZ3hUnQTm"><b>Discord</b></a> &nbsp;·&nbsp;
106
- <a href="https://github.com/taracodlabs/aiden-releases/releases/latest"><b>Download</b></a> &nbsp;·&nbsp;
107
- <a href="https://www.amazon.in/Omega-Shiva-Deore-ebook/dp/B0GX33VWZC/"><b>Book</b></a>
112
+ <a href="https://github.com/taracodlabs/aiden/releases/latest"><b>Download</b></a> &nbsp;·&nbsp;
113
+ <a href="https://amzn.to/4tpiXwM"><b>Book</b></a>
108
114
  </p>
109
115
 
110
116
  ---
111
117
 
112
- > **v4.1.0multi-channel autonomous engine · Telegram + MCP server + subagent fanout · voice CLI · hardened cron · skill mining · structured markdown rendering · cross-platform CI**
113
- > v4.1 turns Aiden into a multi-surface agent: a `ChannelAdapter`-shaped Telegram bot (text / voice / photo / PDF / groups / admin), an MCP server exposing 24 tools + the full skill catalog to Claude Desktop, parallel subagent fanout across `groq`/`together`, REPL voice mode (PTT + continuous), an auto-mining pipeline that proposes new skills from successful workflows, and a deep REPL polish layer (custom `@inquirer/core` prompt, autosuggest, sectioned boot card, sharp ASCII corners, theme detection). Linux/macOS/Windows × Node 20/22 CI matrix. See [changelog](#changelog) below.
118
+ > **v4.1 — Multi-channel autonomous AI engine**
119
+ > Telegram + MCP server + subagent fanout + voice CLI + skill mining. Hardened cron, structured markdown, cross-platform CI. See [changelog](#changelog) below.
114
120
 
115
121
  ---
116
122
 
@@ -133,14 +139,14 @@ Most AI agents answer questions. Aiden runs work end-to-end on your machine.
133
139
  - **Automates any browser** — 10 Playwright-driven tools (navigate, click, type, fill, scroll, extract, screenshot, get-url, close, captcha-check)
134
140
  - **Self-healing provider routing** — 6-slot fallback chain (`together → groq × 4`) advances slots in under a second on rate-limit
135
141
  - **OAuth subscription routing** — sign in with Claude Pro or ChatGPT Plus; queries route to your subscription quota, not pay-as-you-go
136
- - **Persistent memory** — `MEMORY.md`, `USER.md`, `SOUL.md`, plus semantic recall and a `LESSONS.md` failure log that grows every session
142
+ - **Persistent memory** — `USER.md`, `SOUL.md`, `MEMORY.md` (durable facts + recent-session distillations), plus semantic recall over past sessions via the `recall_session` tool. Memory promotes itself: each session ends with a structured distillation, and durable facts graduate to a protected section that survives compression.
137
143
  - **Lives where you do** — identity files re-read every turn; edit `USER.md` mid-conversation and the change lands within one reply
138
144
  - **One command to start** — `npx aiden-runtime` installs, configures, and runs everything
139
145
  - **Honest failures** — every tool error names the tool, provider, retry count, fallback chain, error, and next step. No silent swallowing.
140
146
  - **Plugin extension** — drop a plugin into `<aiden-home>/plugins/` and call `ctx.commandRegistry.register()` to add slash commands without touching core
141
147
  - **Open source** — AGPL-3.0 core, Apache-2.0 skills. Read every line, modify anything, contribute back.
142
148
 
143
- Aiden is a local-first AI operating system. It runs entirely on your machine — no cloud account required, no telemetry, no data leaving your hardware unless you configure a cloud provider. It installs as a global npm package (`aiden-runtime`, ~16 MB) on Windows, Linux, WSL, and macOS — Node.js 18+ is the only prerequisite. Features: 68 bundled skills, 42 built-in tools across 11 categories, multi-layer memory architecture, self-healing provider routing across 19 providers, the ability to control your screen, browse the web, run code, send emails and messages, manage files, and hold a full conversation — offline via Ollama.
149
+ Aiden is a local-first AI operating system. It runs entirely on your machine — no cloud account required, no telemetry, no data leaving your hardware unless you configure a cloud provider. It installs as a global npm package (`aiden-runtime`, ~16 MB) on Windows, Linux, WSL, and macOS — Node.js 18+ is the only prerequisite. Features: 74 bundled skills, 53 built-in tools across 11 categories, multi-layer memory architecture, self-healing provider routing across 19 providers, the ability to control your screen, browse the web, run code, send emails and messages, manage files, and hold a full conversation — offline via Ollama.
144
150
 
145
151
  ---
146
152
 
@@ -169,10 +175,10 @@ All platforms use the same npm-based install path. Node.js 18+ is the only prere
169
175
 
170
176
  | Platform | Install | Skills available |
171
177
  |---|---|---|
172
- | **Windows 10/11** | ✅ `npm install -g aiden-runtime` | All 68 (including Windows-only skills) |
173
- | **Linux** | ✅ `npm install -g aiden-runtime` | ~62 (Windows-only skills auto-skipped) |
174
- | **WSL 2** | ✅ `npm install -g aiden-runtime` | ~62 (Windows-only skills auto-skipped) |
175
- | **macOS** | ✅ `npm install -g aiden-runtime` | ~62 (Windows-only skills auto-skipped) |
178
+ | **Windows 10/11** | ✅ `npm install -g aiden-runtime` | All 74 (including Windows-only skills) |
179
+ | **Linux** | ✅ `npm install -g aiden-runtime` | ~68 (Windows-only skills auto-skipped) |
180
+ | **WSL 2** | ✅ `npm install -g aiden-runtime` | ~68 (Windows-only skills auto-skipped) |
181
+ | **macOS** | ✅ `npm install -g aiden-runtime` | ~68 (Windows-only skills auto-skipped) |
176
182
 
177
183
  Windows-only skills (clipboard history, Defender, OneNote, Outlook COM, registry, Task Scheduler, etc.) are tagged `platform: windows` and silently skipped on other platforms at load time.
178
184
 
@@ -305,7 +311,7 @@ Set `AIDEN_HEADLESS=true` to suppress the Electron GUI when running the packaged
305
311
 
306
312
  ---
307
313
 
308
- ## Known limitations (v4.0.0)
314
+ ## Known limitations
309
315
 
310
316
  We're shipping honest. Things that work, things that don't:
311
317
 
@@ -326,7 +332,6 @@ We're shipping honest. Things that work, things that don't:
326
332
 
327
333
  **Not in v4.0:**
328
334
 
329
- - Subagent fanout / parallel agent swarm — single-loop only; deferred to v4.x
330
335
  - OCR — not bundled (vision-loop screen capture works, but no Tesseract)
331
336
  - Full agentskills.io ecosystem install — held pending license review
332
337
  - Docker sandbox backend — dropped in v4 rewrite
@@ -334,6 +339,12 @@ We're shipping honest. Things that work, things that don't:
334
339
  **Landed in v4.1:**
335
340
 
336
341
  - Telegram channel adapter (DM polling + per-chat memory) — see [docs/channels/telegram.md](docs/channels/telegram.md)
342
+ - DeepSeek V4 Pro provider with reasoning-token streaming and per-model defaults
343
+ - `/update` slash command + `aiden_self_update` tool — registry probe, in-process installer, platform-specific permission-denied remediations
344
+ - Structured session distillation — each session ends with a JSON summary that promotes durable facts into a protected `MEMORY.md` section
345
+ - `recall_session` tool — semantic search over past distilled sessions
346
+ - Eval harness — 18 honesty scenarios (10 easy + 8 hard) with `npm run eval -- --suite honesty`
347
+ - Subsystem health registry surfacing component status in `/doctor`
337
348
 
338
349
  **Beta features:**
339
350
 
@@ -370,7 +381,7 @@ play me a popular hindi song
370
381
  what files did I download today
371
382
  ```
372
383
 
373
- Type `/` to browse all 28 commands with instant search. Skills register their own dynamic slash commands at load time.
384
+ Type `/` to browse all 33 commands with instant search. Skills register their own dynamic slash commands at load time.
374
385
 
375
386
  ---
376
387
 
@@ -440,10 +451,10 @@ Multi-layer memory visualised — every conversation, task, and learned pattern
440
451
 
441
452
  | Category | What Aiden does |
442
453
  |---|---|
443
- | **Inference & providers** | 19 providers including Anthropic, OpenAI, Groq (4-slot fallback), Together, Gemini, NVIDIA NIM, OpenRouter, DeepSeek, Mistral, Z.ai, Kimi, MiniMax, Hugging Face, custom OpenAI-compatible endpoints, and **Ollama** for fully offline. OAuth subscription routing for Claude Pro and ChatGPT Plus. |
444
- | **42 built-in tools** | Web search & fetch, deep research, YouTube search, Playwright browser automation (10 tools), file ops (read, list, write, patch, delete, move, copy), process control (spawn, kill, list, log-read, wait), shell exec, code execution, system info, MCP bridge, memory add/replace/remove, session list/search, skill view/list/manage. |
445
- | **68 bundled skills** | Composable workflows each with a `SKILL.md` prompt, optional helper scripts, and tool requirements. Includes: GitHub PR/issue workflows, NSE / Upstox / Zerodha trading, Censys / Shodan / VirusTotal lookups, Windows Defender / Task Scheduler, Docker management, YouTube content tools, ASCII art, and more. |
446
- | **6-layer memory** | `MEMORY.md` (declarative facts), conversation/session/workspace memory, semantic search (BM25 + embeddings), learning memory (`LESSONS.md`), structured user profile. Dirty-bit invalidation rebuilds the prompt when files change mid-session. |
454
+ | **Inference & providers** | 19 providers including Anthropic, OpenAI, Groq (4-slot fallback), Together, Gemini, NVIDIA NIM, OpenRouter, **DeepSeek V4 Pro** (reasoning-token streaming), Mistral, Z.ai, Kimi, MiniMax, Hugging Face, custom OpenAI-compatible endpoints, and **Ollama** for fully offline. OAuth subscription routing for Claude Pro and ChatGPT Plus. |
455
+ | **53 built-in tools** | Web search & fetch, deep research, YouTube search, Playwright browser automation (10 tools), file ops (read, list, write, patch, delete, move, copy), process control (spawn, kill, list, log-read, wait), shell exec, code execution, system info, screenshot, clipboard, app launch/close, media keys, MCP bridge, memory add/replace/remove, session list/search/summary/recall, skill view/list/manage, and `aiden_self_update`. |
456
+ | **74 bundled skills** | Composable workflows each with a `SKILL.md` prompt, optional helper scripts, and tool requirements. Includes: GitHub PR/issue workflows, NSE / Upstox / Zerodha trading, Censys / Shodan / VirusTotal lookups, Windows Defender / Task Scheduler, Docker management, YouTube content tools, ASCII art, and more. |
457
+ | **Self-promoting memory** | `USER.md` + `SOUL.md` identity, plus a `MEMORY.md` split between durable facts (compression-protected) and recent-session distillations. Each session ends with a structured JSON summary that graduates durable facts into the protected section. Semantic recall over past sessions via the `recall_session` tool. Dirty-bit invalidation rebuilds the prompt when files change mid-session. |
447
458
  | **Voice** | Edge TTS / Windows SAPI text-to-speech, speech-to-text helpers. |
448
459
  | **Channel adapters** | Discord, Slack, Telegram, WhatsApp, Email (IMAP+SMTP), Webhook, Twilio SMS, iMessage (macOS), Signal — any channel triggers the same agent loop. |
449
460
  | **Computer use** | Screenshot capture, screen-state vision loop, browser automation. Mouse/keyboard automation partial. |
@@ -479,16 +490,16 @@ User input (any channel)
479
490
  │ │
480
491
  │ ▼
481
492
  │ ┌──────────────────┐
482
- │ │ Tool dispatcher │──▶ 42 built-in tools
493
+ │ │ Tool dispatcher │──▶ 53 built-in tools
483
494
  │ └──────────────────┘ + skill-driven dynamic tools
484
495
 
485
496
 
486
497
  ┌─────────────────────────────────────┐
487
498
  │ Memory │
488
- MEMORY.md · USER.md · SOUL.md │
489
- conversation · session · workspace
490
- │ semantic (BM25 + embeddings)
491
- learning (LESSONS.md)
499
+ USER.md · SOUL.md · MEMORY.md │
500
+ (durable facts · recent sessions)
501
+ │ semantic recall (recall_session)
502
+ end-of-session distillation
492
503
  └─────────────────────────────────────┘
493
504
 
494
505
 
@@ -572,7 +583,7 @@ Optional: set `AIDEN_API_KEY=your-secret` in `.env` to require Bearer-token auth
572
583
  | `npm start` | Start the API server (port 4200) |
573
584
  | `npm run build` | Rebuild after source changes |
574
585
 
575
- ### In-chat slash commands (28 total)
586
+ ### In-chat slash commands (33 total)
576
587
 
577
588
  **Session**
578
589
  | Command | Description |
@@ -581,6 +592,9 @@ Optional: set `AIDEN_API_KEY=your-secret` in `.env` to require Bearer-token auth
581
592
  | `/compress` | Compress the conversation to free context |
582
593
  | `/save` | Save the current session |
583
594
  | `/title` | Set a title for the session |
595
+ | `/history` | Browse past sessions |
596
+ | `/show` | Show session metadata |
597
+ | `/status` | Show current session status |
584
598
 
585
599
  **Configuration**
586
600
  | Command | Description |
@@ -606,11 +620,16 @@ Optional: set `AIDEN_API_KEY=your-secret` in `.env` to require Bearer-token auth
606
620
  | `/license` | Show / set Pro license |
607
621
  | `/plugins` | List, grant, suspend plugins |
608
622
  | `/reload-mcp` | Reconnect MCP servers |
623
+ | `/reload-soul` | Reload SOUL.md / USER.md mid-session |
609
624
  | `/tools` | List registered tools |
610
625
  | `/skills` | List, view, install skills |
611
626
  | `/usage` | Token usage + cost summary |
612
627
  | `/yolo` | No-approval mode (use carefully) |
613
628
  | `/cron` | Schedule recurring tasks |
629
+ | `/update` | Check for / install the latest `aiden-runtime` (`install` subcommand applies) |
630
+ | `/setup` | Re-run the setup wizard from the REPL |
631
+ | `/channel` | List / manage channel adapters (Discord, Slack, Telegram, …) |
632
+ | `/voice` | Toggle voice output (Edge TTS / Windows SAPI) |
614
633
  | `/quit` | Exit the REPL |
615
634
 
616
635
  **Authentication**
@@ -639,8 +658,8 @@ Both the terminal CLI and the browser dashboard (`localhost:4200/ui`) expose the
639
658
  | Chat | ✅ inline prompt | ✅ chat panel |
640
659
  | Streaming responses | ✅ token-by-token | ✅ live SSE |
641
660
  | Markdown rendering | ✅ | ✅ |
642
- | Slash commands | ✅ all 28 | ✅ same commands |
643
- | `/` command dropdown | ✅ instant, 28 commands | ✅ |
661
+ | Slash commands | ✅ all 33 | ✅ same commands |
662
+ | `/` command dropdown | ✅ instant, 33 commands | ✅ |
644
663
  | Provider panel | `/providers` | ✅ Providers tab |
645
664
  | Memory panel | `/identity` + tool calls | ✅ Memory tab |
646
665
  | Skills panel | `/skills` | ✅ Skills tab |
@@ -663,7 +682,7 @@ Both the terminal CLI and the browser dashboard (`localhost:4200/ui`) expose the
663
682
  - **better-sqlite3** + **sql.js** — local persistence.
664
683
  - **croner** — cron scheduler.
665
684
  - **discord.js**, **@slack/web-api**, **whatsapp-web.js**, **twilio**, **nodemailer**, **imap-simple** — channel adapters.
666
- - **Vitest 4** — test runner; ~1,500 unit + integration tests.
685
+ - **Vitest 4** — test runner; ~1,983 unit + integration tests.
667
686
  - **esbuild** — bundler for the npm package; **electron-builder** — optional desktop wrapper.
668
687
  - **Cloudflare Workers** — landing page + license server + install-script proxy.
669
688
 
@@ -689,6 +708,31 @@ aiden # CLI
689
708
  - Follow [Conventional Commits](https://www.conventionalcommits.org/).
690
709
  - Run `npm run typecheck` and `npm test` before opening a PR.
691
710
 
711
+ ### Evals — measuring behavior at scale
712
+
713
+ Aiden ships an opt-in eval harness that runs scenario-based behavior checks
714
+ against a real provider. Distinct from `npm test` (unit / integration) — evals
715
+ are scenario-driven, make live LLM calls, and are *measurement* rather than
716
+ release gates.
717
+
718
+ ```bash
719
+ npm run eval # default suite (honesty), default provider
720
+ npm run eval:honesty # explicit suite
721
+ npm run eval -- --scenario honesty/no-fabricated-file-contents
722
+ npm run eval -- --provider groq --model llama-3.3-70b-versatile
723
+ npm run eval -- --strict # exit 1 on any failure (for CI)
724
+ ```
725
+
726
+ Results land in `evals/results/<timestamp>.json` (gitignored — local history).
727
+ Eval failures are signal, not gates: the runner exits 0 unless `--strict`.
728
+
729
+ Default provider: `chatgpt-plus / gpt-5.5`. Falls back to the test-provider
730
+ chain (Groq / Together via env-var keys) when ChatGPT Plus isn't authed.
731
+
732
+ Available suites: `honesty` (18 scenarios — 10 easy + 8 hard — covering
733
+ fabricated content, fake "I found" claims, claimed actions without tool calls,
734
+ unverified completions, write/read mismatches, and post-cutoff version claims).
735
+
692
736
  ---
693
737
 
694
738
  ## Community
@@ -727,7 +771,6 @@ aiden # CLI
727
771
 
728
772
  - **npm package renamed** — `aiden-os` → `aiden-runtime`. Run `npm uninstall -g aiden-os && npm install -g aiden-runtime`.
729
773
  - **Slash commands consolidated** — v3's `/switch`, `/budget`, `/memory`, `/profile`, `/permissions`, `/sandbox`, `/retry`, `/failed`, `/publish` are gone. Use `/model`, `/usage`, `/identity`, `/yolo` for equivalent functionality. See `/help` for the v4 list.
730
- - **Subagent fanout removed** — v4 is single-loop only; subagent support deferred to v4.x.
731
774
  - **Docker sandbox dropped** — `AIDEN_SANDBOX_MODE` no longer applies. Tools run on the host. The `tirithScanner` secret/PII guard, `ssrfProtection`, and tiered approval engine remain as the safety layer.
732
775
  - **Skill registry install changed** — auto-fetch from external repos held pending license review. Skills install via `/skills install <local-path-or-url>` only at v4.0.
733
776
  - **Config compatible** — most environment variables (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GROQ_API_KEY`, etc.) are recognised as-is. Copy your existing `.env` and Aiden picks them up.
@@ -755,7 +798,20 @@ Aiden is built and maintained by one person. If it saves you time, consider spon
755
798
 
756
799
  ## Changelog
757
800
 
758
- See [CHANGELOG.md](CHANGELOG.md) for the full history. **v4.0.0 highlights:**
801
+ See [CHANGELOG.md](CHANGELOG.md) for the full history.
802
+
803
+ **v4.1.2 highlights:**
804
+
805
+ - 🧠 **Self-promoting memory** — sessions end with a structured JSON distillation; durable facts graduate into a compression-protected `MEMORY.md` section that survives `/compress`. Recent-session distillations are kept separately for `recall_session` semantic search.
806
+ - 🔍 **`recall_session` tool** — query past sessions in natural language; returns ranked distillations with date + summary + relevant facts.
807
+ - 🛰 **DeepSeek V4 Pro provider** — reasoning-token streaming, per-model defaults (`MODEL_DEFAULTS`), probe filtering for codex-only slugs.
808
+ - ⬆ **`/update` slash command + `aiden_self_update` tool** — npm-registry probe with 6h boot cache, in-process `executeInstall` shared between both surfaces, platform-specific permission-denied remediations (Windows admin / sudo / user-local prefix). No silent self-escalation, no false claims of in-place upgrade.
809
+ - 🧪 **Eval harness** — 18 honesty scenarios (10 easy + 8 hard); `npm run eval -- --suite honesty`, results land in `evals/results/<timestamp>.json`.
810
+ - 🩺 **Subsystem health registry** — `/doctor` surfaces component status with a uniform OK / WARN / FAIL contract.
811
+ - 🔒 **Memory-guard hardening** — section-aware `## Durable facts` protection, word-boundary regex anchors, entry-delimited storage, case-insensitive dedup with separator tolerance.
812
+ - ✅ **~1,983 tests passing** — regression guards for every smoke-test bug found in earlier slices.
813
+
814
+ **v4.0.0 highlights:**
759
815
 
760
816
  - 🧠 **Clean-room core rewrite** — every adapter, every prompt slot, every loop. 7 dual-attribution files rewritten under full Aiden copyright.
761
817
  - 🔌 **19 providers** including OAuth subscription routing for Claude Pro and ChatGPT Plus (subscription quota, not pay-as-you-go).
@@ -108,6 +108,10 @@ const plannerGuard_1 = require("../../moat/plannerGuard");
108
108
  const honestyEnforcement_1 = require("../../moat/honestyEnforcement");
109
109
  const skillTeacher_1 = require("../../moat/skillTeacher");
110
110
  const skillMiner_1 = require("../../core/v4/skillMining/skillMiner");
111
+ const subsystemHealth_1 = require("../../core/v4/subsystemHealth");
112
+ const skillOutcomeTracker_1 = require("../../core/v4/skillOutcomeTracker");
113
+ const providerBootSelector_1 = require("./providerBootSelector");
114
+ const doctorLiveness_1 = require("./doctorLiveness");
111
115
  const uiBuild_2 = require("./uiBuild");
112
116
  const memoryGuard_1 = require("../../moat/memoryGuard");
113
117
  const ssrfProtection_1 = require("../../moat/ssrfProtection");
@@ -162,7 +166,11 @@ function coerceMode(raw, valid, fallback, label, warn) {
162
166
  warn(`Invalid ${label} '${raw}' — falling back to '${fallback}' (valid: ${valid.join(', ')})`);
163
167
  return fallback;
164
168
  }
165
- const VERSION = '4.0.0';
169
+ // Post-v4.1.1 cleanup: read VERSION from the auto-generated source-of-
170
+ // truth (scripts/inject-version.js writes it from package.json on every
171
+ // prebuild hook). Previous hardcoded '4.0.0' string had been stale since
172
+ // v4.0.1 and made `aiden --version` lie.
173
+ const version_1 = require("../../core/version");
166
174
  // Phase 16c.2: env-source tracking lives in `cli/v4/envSources.ts` so
167
175
  // `commands/providers.ts` can import getEnvSource without circular deps.
168
176
  const envSources_1 = require("./envSources");
@@ -204,7 +212,7 @@ async function main(argv, opts = {}) {
204
212
  program
205
213
  .name('aiden')
206
214
  .description('Aiden — local-first AI agent')
207
- .version(VERSION, '-v, --version')
215
+ .version(version_1.VERSION, '-v, --version')
208
216
  .option('--tui', 'Launch full-screen TUI renderer', false)
209
217
  .option('-c, --continue', 'Resume the most recent session')
210
218
  .option('-r, --resume <title>', 'Resume a session by id-prefix or partial title')
@@ -282,12 +290,13 @@ async function main(argv, opts = {}) {
282
290
  program
283
291
  .command('doctor')
284
292
  .description('Run diagnostic checks')
285
- .action(async () => {
293
+ .option('--providers', 'Also ping each configured / authed provider and report live status (deep check). Slower; useful before shipping or when a provider regression is suspected.')
294
+ .action(async (cmdOpts) => {
286
295
  if (opts.runDoctorHook) {
287
296
  await opts.runDoctorHook();
288
297
  return;
289
298
  }
290
- await (0, doctor_1.runDoctorCli)();
299
+ await (0, doctor_1.runDoctorCli)({ liveness: cmdOpts.providers === true });
291
300
  });
292
301
  program
293
302
  .command('sessions <action> [arg]')
@@ -532,10 +541,48 @@ async function buildAgentRuntime(cliOpts, opts) {
532
541
  (0, envSources_1.loadAidenEnvFile)(paths.envFile);
533
542
  await config.load();
534
543
  }
535
- const providerId = cliOpts.provider ??
536
- config.getValue('model.provider', 'groq');
537
- const modelId = cliOpts.model ??
538
- config.getValue('model.modelId', 'llama-3.3-70b-versatile');
544
+ // Phase v4.1.2-bug1: boot model selection now consults the priority-
545
+ // list auto-picker (cli/v4/providerBootSelector.ts) instead of
546
+ // hardcoded `groq + llama-3.3-70b-versatile`. Users with chatgpt-plus
547
+ // OAuth (the post-v4.1.1 onboarding default) used to boot into Groq
548
+ // and hit a 400 on the first tool-bearing request — llama-3.3-70b's
549
+ // tool emission is rejected by Groq's first-party validator.
550
+ //
551
+ // Precedence (handled inside resolveBootProvider):
552
+ // 1. Both --provider + --model flags → use them
553
+ // 2. One flag only → use it, resolve other
554
+ // 3. Persisted model-selection.json → use it
555
+ // 4. Partial config → use it, resolve other
556
+ // 5. Auto-pick from priority list → first authed provider
557
+ // 6. Nothing authed → hardcoded groq fallback
558
+ let providerId;
559
+ let modelId;
560
+ let bootSource;
561
+ try {
562
+ const selection = await (0, providerBootSelector_1.resolveBootProvider)({
563
+ cliProviderId: cliOpts.provider,
564
+ cliModelId: cliOpts.model,
565
+ cfgProviderId: config.getValue('model.provider'),
566
+ cfgModelId: config.getValue('model.modelId'),
567
+ }, () => (0, doctorLiveness_1.enumerateConfiguredProviders)({ paths, env: process.env }));
568
+ if (selection) {
569
+ providerId = selection.providerId;
570
+ modelId = selection.modelId;
571
+ bootSource = selection.source;
572
+ }
573
+ else {
574
+ // Case 6: nothing authed — preserve the prior hardcoded default
575
+ // so the legacy first-run path (manual API-key entry into .env)
576
+ // still works.
577
+ providerId = 'groq';
578
+ modelId = 'llama-3.3-70b-versatile';
579
+ bootSource = 'hardcoded-fallback';
580
+ }
581
+ }
582
+ catch (err) {
583
+ process.stderr.write(`aiden: ${err.message}\n`);
584
+ process.exit(1);
585
+ }
539
586
  // Resolve session continuation.
540
587
  const store = new sessionStore_1.SessionStore(paths.sessionsDb);
541
588
  const sessionManager = new sessionManager_1.SessionManager(store);
@@ -577,6 +624,15 @@ async function buildAgentRuntime(cliOpts, opts) {
577
624
  display.printError(`Could not resolve provider '${providerId}' / model '${modelId}': ${err.message}`, 'Run `aiden model` to pick a valid provider, or `aiden doctor`.');
578
625
  process.exit(1);
579
626
  }
627
+ // Phase v4.1.2-bug1: surface the auto-pick in the boot log when
628
+ // neither CLI flags nor persisted config specified the choice.
629
+ // Silent on explicit selections so power users don't see noise.
630
+ if (bootSource === 'auto-priority') {
631
+ display.dim(`[boot] ${providerId} · ${modelId} (auto · first authed provider)`);
632
+ }
633
+ else if (bootSource === 'hardcoded-fallback') {
634
+ display.dim(`[boot] ${providerId} · ${modelId} (no authed providers detected — using legacy default)`);
635
+ }
580
636
  }
581
637
  // Phase 16b.1: wrap chat_completions providers in a FallbackAdapter so
582
638
  // 429s on Groq slot 1 transparently retry Groq slot 2/3 and Together.
@@ -817,7 +873,35 @@ async function buildAgentRuntime(cliOpts, opts) {
817
873
  });
818
874
  },
819
875
  };
820
- const skillTeacher = new skillTeacher_1.SkillTeacher(skillLoader, skillManageProxy, skillTeacherTier, undefined, (name) => toolRegistry.get(name));
876
+ // Phase v4.1.2-slice3: subsystem-health registry. AidenAgent owns
877
+ // the one instance (constructor-injected, not a singleton — so
878
+ // parallel tests don't cross-contaminate). Per-subsystem trackers
879
+ // hang off the registry and are passed into each subsystem's
880
+ // constructor so they can record success/failure as it happens.
881
+ // `aiden doctor` reads `agent.subsystemHealthRegistry.snapshot()`.
882
+ const subsystemHealthRegistry = (0, subsystemHealth_1.createSubsystemHealthRegistry)();
883
+ const skillTeacherHealth = new subsystemHealth_1.SubsystemHealthTracker('skill-teacher');
884
+ const skillMinerHealth = new subsystemHealth_1.SubsystemHealthTracker('skill-miner');
885
+ // Phase v4.1.2-slice4: outcome tracker — observes tool-call lifecycle,
886
+ // attributes downstream successes/failures to skills loaded via
887
+ // skill_view. Persisted to <skillsDir>/.skill-outcomes.json (atomic
888
+ // write, lazy hydrate). Persist failures surface to doctor via a
889
+ // shared slice3 SubsystemHealthTracker.
890
+ const skillOutcomeHealth = new subsystemHealth_1.SubsystemHealthTracker('skill-outcome-tracker');
891
+ const skillOutcomeTracker = new skillOutcomeTracker_1.SkillOutcomeTracker(node_path_1.default.join(paths.skillsDir, '.skill-outcomes.json'), skillOutcomeHealth);
892
+ subsystemHealthRegistry.register('skill-teacher', () => skillTeacherHealth.snapshot());
893
+ subsystemHealthRegistry.register('skill-miner', () => skillMinerHealth.snapshot());
894
+ subsystemHealthRegistry.register('skill-outcome-tracker', () => skillOutcomeHealth.snapshot());
895
+ // Phase v4.1.2-memory-D fold-in (memory-C Q3 open): recall-session
896
+ // health tracker. The tool itself (tools/v4/sessions/recallSession.ts)
897
+ // stays pure of registry knowledge for testability; the registry
898
+ // caller wires a tracker the tool can record into via ctx. Until
899
+ // the tool plumbs ctx → tracker (separate follow-up), the slot stays
900
+ // registered with a snapshot reader so doctor's expand-on-degradation
901
+ // path sees the subsystem exists even at zero observations.
902
+ const recallSessionHealth = new subsystemHealth_1.SubsystemHealthTracker('recall-session');
903
+ subsystemHealthRegistry.register('recall-session', () => recallSessionHealth.snapshot());
904
+ const skillTeacher = new skillTeacher_1.SkillTeacher(skillLoader, skillManageProxy, skillTeacherTier, undefined, (name) => toolRegistry.get(name), skillTeacherHealth);
821
905
  // ── Tool executor with full Phase 9 + 10 context ─────────────────────
822
906
  const toolExecutor = toolRegistry.buildExecutor({
823
907
  cwd: process.cwd(),
@@ -880,10 +964,26 @@ async function buildAgentRuntime(cliOpts, opts) {
880
964
  catch {
881
965
  skillsList = [];
882
966
  }
967
+ // Phase v4.1.2 alive-core: enumerate which toolset tags are loaded
968
+ // so PromptBuilder can inject tool-conditional guidance. Pure
969
+ // string-set; no ToolRegistry reference threaded through the builder.
970
+ const toolsetsLoaded = new Set();
971
+ for (const name of toolRegistry.list()) {
972
+ const ts = toolRegistry.get(name)?.toolset;
973
+ if (ts)
974
+ toolsetsLoaded.add(ts);
975
+ }
883
976
  const promptBuilderOptions = {
884
977
  paths,
885
978
  memorySnapshot,
886
979
  skillsList,
980
+ toolsetsLoaded,
981
+ // Phase v4.1.2-followup self-awareness: feed the runtime slot.
982
+ // toolCount comes from the same registry we just walked to build
983
+ // toolsetsLoaded; providerId joins modelId so both halves of the
984
+ // active route are in the prompt.
985
+ toolCount: toolRegistry.list().length,
986
+ providerId,
887
987
  personalityOverlay: activeOverlay,
888
988
  modelId,
889
989
  };
@@ -894,7 +994,14 @@ async function buildAgentRuntime(cliOpts, opts) {
894
994
  // mutate skill state from inside JSON-RPC handling).
895
995
  const skillMiner = (0, uiBuild_2.isMcpServeMode)()
896
996
  ? undefined
897
- : new skillMiner_1.SkillMiner({ auxiliaryClient });
997
+ : new skillMiner_1.SkillMiner({ auxiliaryClient, healthTracker: skillMinerHealth });
998
+ // Phase v4.1.2-slice3: the structured CoreLogger isn't yet plumbed
999
+ // through buildAgentRuntime — it's created via factory at boot but
1000
+ // not passed in here. We leave its sink-health surface available
1001
+ // via `CoreLogger.getSinkHealth()` for any caller that holds the
1002
+ // instance, and the registry stays empty for the logger slot until
1003
+ // the structured-logger wiring catches up. The registry mechanism
1004
+ // itself is exercised end-to-end by skill-teacher and skill-miner.
898
1005
  // ── Build agent with all moat layers attached ────────────────────────
899
1006
  const agent = new aidenAgent_1.AidenAgent({
900
1007
  provider: adapter,
@@ -906,6 +1013,8 @@ async function buildAgentRuntime(cliOpts, opts) {
906
1013
  honestyEnforcement,
907
1014
  skillTeacher,
908
1015
  skillMiner,
1016
+ subsystemHealthRegistry,
1017
+ skillOutcomeTracker,
909
1018
  onSkillCandidate: (candidate) => {
910
1019
  try {
911
1020
  callbacks.onSkillCandidate?.(candidate);
@@ -915,7 +1024,18 @@ async function buildAgentRuntime(cliOpts, opts) {
915
1024
  // Phase 23.5: tool event rows. CliCallbacks.onToolCall
916
1025
  // emits a single line per call — `· tool <name> <args> [running]`
917
1026
  // mutates to `[ok 220ms]` / `[fail 1.4s]` / `[blocked]` on resolve.
918
- onToolCall: callbacks.onToolCall,
1027
+ //
1028
+ // Phase v4.1.2-slice4: compose (do NOT replace) so the
1029
+ // SkillOutcomeTracker observes the same lifecycle the CLI display
1030
+ // is rendering. Tracker hooks run first so attribution lands even
1031
+ // if the display callback throws.
1032
+ onToolCall: (call, phase, result) => {
1033
+ try {
1034
+ skillOutcomeTracker.onTool(call, phase, result);
1035
+ }
1036
+ catch { /* telemetry must not break the turn */ }
1037
+ callbacks.onToolCall?.(call, phase, result);
1038
+ },
919
1039
  onCompression: callbacks.onCompression,
920
1040
  onBudgetWarning: callbacks.onBudgetWarning,
921
1041
  onPlannerGuardDecision: callbacks.onPlannerGuardDecision,
@@ -965,6 +1085,32 @@ async function buildAgentRuntime(cliOpts, opts) {
965
1085
  memoryManager.onMutation((file) => {
966
1086
  agent.markMemoryDirty(file === 'user' ? 'user' : 'memory');
967
1087
  });
1088
+ // Phase v4.1.2 alive-core: SOUL.md file watcher. Best-effort —
1089
+ // some filesystems (network mounts, certain WSL configs) don't
1090
+ // support fs.watch reliably. We try to attach; if it fails, the
1091
+ // /reload-soul slash command stays as the manual fallback.
1092
+ try {
1093
+ const soulWatcher = (0, node_fs_1.watch)(paths.soulMd, { persistent: false }, (eventType) => {
1094
+ if (eventType === 'change' || eventType === 'rename') {
1095
+ agent.markMemoryDirty('soul');
1096
+ }
1097
+ });
1098
+ soulWatcher.on('error', () => {
1099
+ // Some FS backends emit errors mid-stream; degrade to manual
1100
+ // fallback. The slash command still works.
1101
+ });
1102
+ // Phase 23.4b: leak-free shutdown — closed by the existing
1103
+ // process-exit cleanup path. We don't unref since we *want* the
1104
+ // watcher to keep the process alive only as long as the REPL does.
1105
+ process.on('exit', () => { try {
1106
+ soulWatcher.close();
1107
+ }
1108
+ catch { /* noop */ } });
1109
+ }
1110
+ catch (err) {
1111
+ display.warn(`SOUL.md watcher could not attach (${err.message}). ` +
1112
+ 'Use `/reload-soul` to apply edits mid-session.');
1113
+ }
968
1114
  // ── Phase v4.1-subagent.1 — subagent_fanout wiring is below
969
1115
  // (after `bootLogger` is declared and the gateway processor is set
970
1116
  // up). Stub registered at boot is replaced there with the real
@@ -1341,6 +1487,11 @@ async function runInteractiveChat(cliOpts, opts) {
1341
1487
  // Phase v4.1-1.1 — live ChannelManager so /channel commands can
1342
1488
  // list, add, remove, and inspect adapters without an external server.
1343
1489
  channelManager: runtime.channelManager,
1490
+ // Phase v4.1.2 session-summary-followup: ChatSession.maybeAutoSummarize
1491
+ // needs these to write MEMORY.md directly (bypassing the agent loop)
1492
+ // when /quit fires the auto-summary path.
1493
+ memoryManager: runtime.memoryManager,
1494
+ memoryGuard: runtime.memoryGuard,
1344
1495
  };
1345
1496
  if (cliOpts.tui) {
1346
1497
  await (0, aidenTUI_1.runTuiMode)({
@@ -238,8 +238,11 @@ Reply with ONE word: safe, caution, or dangerous.`;
238
238
  * by the chat loop right after `memory_add` returns `verified=true` —
239
239
  * this hook is the diagnostic counterpart for verbose mode.
240
240
  */
241
- this.onMemoryRefresh = (which) => {
242
- this.display.dim(`[memory] refreshed system prompt (${which})`);
241
+ this.onMemoryRefresh = (files) => {
242
+ // Phase v4.1.2: argument switched from single-string-or-'both' to
243
+ // the full sorted set of dirty files (SOUL.md joined the rotation).
244
+ const label = files.length > 0 ? files.join(', ') : 'none';
245
+ this.display.dim(`[memory] refreshed system prompt (${label})`);
243
246
  };
244
247
  this.display = opts.display;
245
248
  this.auxiliaryClient = opts.auxiliaryClient;