aiden-runtime 4.1.1 → 4.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +78 -26
  2. package/dist/cli/v4/aidenCLI.js +159 -9
  3. package/dist/cli/v4/callbacks.js +5 -2
  4. package/dist/cli/v4/chatSession.js +525 -15
  5. package/dist/cli/v4/commands/auth.js +6 -3
  6. package/dist/cli/v4/commands/help.js +4 -0
  7. package/dist/cli/v4/commands/index.js +10 -1
  8. package/dist/cli/v4/commands/reloadSoul.js +37 -0
  9. package/dist/cli/v4/commands/update.js +102 -0
  10. package/dist/cli/v4/defaultSoul.js +68 -2
  11. package/dist/cli/v4/display.js +28 -10
  12. package/dist/cli/v4/doctor.js +112 -0
  13. package/dist/cli/v4/doctorLiveness.js +65 -10
  14. package/dist/cli/v4/promotionPrompt.js +202 -0
  15. package/dist/cli/v4/providerBootSelector.js +144 -0
  16. package/dist/cli/v4/sessionSummaryGate.js +66 -0
  17. package/dist/cli/v4/toolPreview.js +139 -0
  18. package/dist/core/v4/aidenAgent.js +91 -29
  19. package/dist/core/v4/capabilities.js +89 -0
  20. package/dist/core/v4/contextCompressor.js +25 -8
  21. package/dist/core/v4/distillationIndex.js +167 -0
  22. package/dist/core/v4/distillationStore.js +98 -0
  23. package/dist/core/v4/logger/logger.js +40 -9
  24. package/dist/core/v4/promotionCandidates.js +234 -0
  25. package/dist/core/v4/promptBuilder.js +145 -1
  26. package/dist/core/v4/sessionDistiller.js +405 -0
  27. package/dist/core/v4/skillMining/skillMiner.js +43 -6
  28. package/dist/core/v4/skillOutcomeTracker.js +323 -0
  29. package/dist/core/v4/subsystemHealth.js +143 -0
  30. package/dist/core/v4/update/executeInstall.js +233 -0
  31. package/dist/core/version.js +1 -1
  32. package/dist/moat/memoryGuard.js +111 -0
  33. package/dist/moat/skillTeacher.js +14 -5
  34. package/dist/providers/v4/chatCompletionsAdapter.js +9 -0
  35. package/dist/providers/v4/errors.js +20 -4
  36. package/dist/providers/v4/modelDefaults.js +65 -0
  37. package/dist/providers/v4/registry.js +9 -2
  38. package/dist/providers/v4/runtimeResolver.js +6 -0
  39. package/dist/tools/v4/index.js +57 -1
  40. package/dist/tools/v4/memory/memoryRemove.js +57 -2
  41. package/dist/tools/v4/memory/sessionSummary.js +151 -0
  42. package/dist/tools/v4/sessions/recallSession.js +163 -0
  43. package/dist/tools/v4/sessions/sessionSearch.js +5 -1
  44. package/dist/tools/v4/system/_psHelpers.js +55 -0
  45. package/dist/tools/v4/system/aidenSelfUpdate.js +162 -0
  46. package/dist/tools/v4/system/appClose.js +79 -0
  47. package/dist/tools/v4/system/appLaunch.js +92 -0
  48. package/dist/tools/v4/system/clipboardRead.js +54 -0
  49. package/dist/tools/v4/system/clipboardWrite.js +84 -0
  50. package/dist/tools/v4/system/mediaKey.js +78 -0
  51. package/dist/tools/v4/system/osProcessList.js +99 -0
  52. package/dist/tools/v4/system/screenshot.js +106 -0
  53. package/dist/tools/v4/system/volumeSet.js +157 -0
  54. package/package.json +4 -1
  55. package/skills/system_control.md +135 -69
@@ -12,7 +12,7 @@
12
12
  * and registers each on the global CommandRegistry at boot.
13
13
  */
14
14
  Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.allCommands = exports.history = exports.show = exports.status = exports.voice = exports.channel = exports.setup = exports.cron = exports.doctor = exports.license = exports.auth = exports.plugins = exports.streaming = exports.debugPrompt = exports.identity = exports.providers = exports.quit = exports.clear = exports.verbose = exports.reasoning = exports.reloadMcp = exports.skills = exports.skin = exports.yolo = exports.usage = exports.compress = exports.title = exports.save = exports.personality = exports.model = exports.tools = exports.help = void 0;
15
+ exports.allCommands = exports.update = exports.reloadSoul = exports.history = exports.show = exports.status = exports.voice = exports.channel = exports.setup = exports.cron = exports.doctor = exports.license = exports.auth = exports.plugins = exports.streaming = exports.debugPrompt = exports.identity = exports.providers = exports.quit = exports.clear = exports.verbose = exports.reasoning = exports.reloadMcp = exports.skills = exports.skin = exports.yolo = exports.usage = exports.compress = exports.title = exports.save = exports.personality = exports.model = exports.tools = exports.help = void 0;
16
16
  const help_1 = require("./help");
17
17
  Object.defineProperty(exports, "help", { enumerable: true, get: function () { return help_1.help; } });
18
18
  const tools_1 = require("./tools");
@@ -75,6 +75,10 @@ const show_1 = require("./show");
75
75
  Object.defineProperty(exports, "show", { enumerable: true, get: function () { return show_1.show; } });
76
76
  const history_1 = require("./history");
77
77
  Object.defineProperty(exports, "history", { enumerable: true, get: function () { return history_1.history; } });
78
+ const reloadSoul_1 = require("./reloadSoul");
79
+ Object.defineProperty(exports, "reloadSoul", { enumerable: true, get: function () { return reloadSoul_1.reloadSoul; } });
80
+ const update_1 = require("./update");
81
+ Object.defineProperty(exports, "update", { enumerable: true, get: function () { return update_1.update; } });
78
82
  /** All built-in system commands, in canonical order. */
79
83
  exports.allCommands = [
80
84
  help_1.help,
@@ -103,9 +107,14 @@ exports.allCommands = [
103
107
  status_1.status,
104
108
  show_1.show,
105
109
  history_1.history,
110
+ reloadSoul_1.reloadSoul,
106
111
  reloadMcp_1.reloadMcp,
107
112
  reasoning_1.reasoning,
108
113
  verbose_1.verbose,
114
+ // Phase v4.1.2-update: /update + /update install — fresh registry
115
+ // probe + shared executeInstall executor (also wired into
116
+ // aiden_self_update tool for natural-language requests).
117
+ update_1.update,
109
118
  clear_1.clear,
110
119
  quit_1.quit,
111
120
  ];
@@ -0,0 +1,37 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * cli/v4/commands/reload-soul.ts — Phase v4.1.2 alive-core.
10
+ *
11
+ * `/reload-soul` — explicit fallback for the SOUL.md file-watcher path.
12
+ * Some filesystems (network mounts, certain WSL configs) don't support
13
+ * `fs.watch` reliably; this command lets users force a system-prompt
14
+ * rebuild after editing SOUL.md without restarting `aiden`.
15
+ *
16
+ * Mechanism: marks the agent's 'soul' dirty bit; the next turn calls
17
+ * `refreshSystemPromptIfDirty()` which invalidates the cached prompt.
18
+ * `PromptBuilder.build()` then re-reads SOUL.md from disk.
19
+ */
20
+ Object.defineProperty(exports, "__esModule", { value: true });
21
+ exports.reloadSoul = void 0;
22
+ exports.reloadSoul = {
23
+ name: 'reload-soul',
24
+ description: 'Re-read SOUL.md from disk on the next turn (manual cache invalidation).',
25
+ category: 'system',
26
+ icon: '🔁',
27
+ aliases: ['soul-reload'],
28
+ handler: async (ctx) => {
29
+ if (!ctx.agent) {
30
+ ctx.display.warn('Reload-soul cannot run before the agent boots.');
31
+ return {};
32
+ }
33
+ ctx.agent.markMemoryDirty('soul');
34
+ ctx.display.success('SOUL.md flagged for reload — the next turn will pick up your edits.');
35
+ return {};
36
+ },
37
+ };
@@ -0,0 +1,102 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * cli/v4/commands/update.ts — Phase v4.1.2-update.
10
+ *
11
+ * /update — bypass the boot-time 6h cache, probe npm registry
12
+ * fresh, print current vs latest with hint.
13
+ * /update install — spawn `npm install -g aiden-runtime@latest`
14
+ * via the shared executeInstall executor; print
15
+ * restart hint on success or platform-specific
16
+ * remediation on permission failure.
17
+ *
18
+ * No auto-restart on success — the user keeps control by typing
19
+ * /quit and re-launching aiden. Honest UX: never claim the current
20
+ * process is upgraded after a successful install. Auto-restart of
21
+ * a node REPL via re-exec is also fragile across Windows/macOS/
22
+ * Linux, so the explicit /quit path is both honest and reliable.
23
+ */
24
+ Object.defineProperty(exports, "__esModule", { value: true });
25
+ exports.update = void 0;
26
+ const version_1 = require("../../../core/version");
27
+ const checkUpdate_1 = require("../../../core/v4/update/checkUpdate");
28
+ const executeInstall_1 = require("../../../core/v4/update/executeInstall");
29
+ async function printStatus(ctx) {
30
+ if (!ctx.paths) {
31
+ ctx.display.warn('/update needs Aiden user-data paths — try in a real session.');
32
+ return;
33
+ }
34
+ ctx.display.dim('Checking for updates…');
35
+ // cacheTtlMs: 0 — user explicitly asked, so bypass the 6h boot cache.
36
+ const status = await (0, checkUpdate_1.checkForUpdate)({
37
+ paths: ctx.paths,
38
+ installedVersion: version_1.VERSION,
39
+ cacheTtlMs: 0,
40
+ });
41
+ ctx.display.write(` installed: v${status.installed}\n`);
42
+ if (status.latest === null) {
43
+ ctx.display.write(' latest: unknown (registry unreachable)\n');
44
+ ctx.display.dim('Could not reach the npm registry. Check your network and try again.');
45
+ return;
46
+ }
47
+ ctx.display.write(` latest: v${status.latest}\n`);
48
+ if (status.updateAvailable) {
49
+ ctx.display.write(`\n update available: v${status.installed} → v${status.latest}\n` +
50
+ ` run \`/update install\` to install, or \`npm install -g aiden-runtime@latest\` manually.\n`);
51
+ }
52
+ else {
53
+ ctx.display.dim("You're on the latest version.");
54
+ }
55
+ }
56
+ async function runInstall(ctx) {
57
+ if (!ctx.paths) {
58
+ ctx.display.warn('/update install needs Aiden user-data paths — try in a real session.');
59
+ return;
60
+ }
61
+ // Status probe first so we don't run a no-op install. Also bypasses
62
+ // cache — same rationale as the bare /update path.
63
+ ctx.display.dim('Checking for updates…');
64
+ const status = await (0, checkUpdate_1.checkForUpdate)({
65
+ paths: ctx.paths,
66
+ installedVersion: version_1.VERSION,
67
+ cacheTtlMs: 0,
68
+ });
69
+ if (status.latest === null) {
70
+ ctx.display.warn("Couldn't check for updates (registry unreachable). " +
71
+ 'Try `/update` first, or run `npm install -g aiden-runtime@latest` manually.');
72
+ return;
73
+ }
74
+ if (!status.updateAvailable) {
75
+ ctx.display.dim(`You're already on the latest version (v${status.installed}).`);
76
+ return;
77
+ }
78
+ ctx.display.write(`Installing aiden-runtime v${status.latest} (current: v${status.installed})…\n`);
79
+ const result = await (0, executeInstall_1.executeInstall)();
80
+ if (result.success) {
81
+ const v = result.installedVersion ?? status.latest;
82
+ ctx.display.write(`\n ✓ aiden-runtime v${v} installed.\n`);
83
+ ctx.display.dim('Restart Aiden to apply: type /quit then re-run `aiden`.');
84
+ return;
85
+ }
86
+ ctx.display.warn(result.error ?? 'Install failed (no error message).');
87
+ }
88
+ exports.update = {
89
+ name: 'update',
90
+ description: 'Check for / install the latest aiden-runtime. Use "install" subcommand to apply.',
91
+ category: 'system',
92
+ icon: '⬆',
93
+ handler: async (ctx) => {
94
+ const sub = (ctx.args[0] ?? '').toLowerCase();
95
+ if (sub === 'install') {
96
+ await runInstall(ctx);
97
+ }
98
+ else {
99
+ await printStatus(ctx);
100
+ }
101
+ },
102
+ };
@@ -30,7 +30,7 @@ exports.PREVIOUS_BUNDLED_SOULS = exports.DEFAULT_SOUL_MD = exports.BUNDLED_SOUL_
30
30
  // <act_dont_ask>. ensureSoulMdSeeded compares this against the user's
31
31
  // on-disk SOUL.md to decide whether to silent-replace (matches a prior
32
32
  // bundled default) or preserve+notify (user-edited).
33
- exports.BUNDLED_SOUL_VERSION = '16h';
33
+ exports.BUNDLED_SOUL_VERSION = 'v4.1.2';
34
34
  exports.DEFAULT_SOUL_MD = `You are Aiden — a local-first AI agent built by Taracod.
35
35
 
36
36
  Identity:
@@ -88,7 +88,6 @@ asking the user what to do next.
88
88
  </keep_going>
89
89
 
90
90
  Limits:
91
- - You're a CLI agent in v4.0.0. No voice, no scheduled jobs, no messaging gateway yet — those are v4.1.
92
91
  - You can't bypass approval prompts for dangerous commands.
93
92
  - You don't lie to look smart. If you don't know, you say so.
94
93
  `;
@@ -184,6 +183,73 @@ the tool calls within a single turn instead of returning halfway and
184
183
  asking the user what to do next.
185
184
  </keep_going>
186
185
 
186
+ Limits:
187
+ - You're a CLI agent in v4.0.0. No voice, no scheduled jobs, no messaging gateway yet — those are v4.1.
188
+ - You can't bypass approval prompts for dangerous commands.
189
+ - You don't lie to look smart. If you don't know, you say so.
190
+ `,
191
+ // 16h default — media-search anti-pattern + skill_view reference,
192
+ // but still carried the stale 'v4.0.0 / planned for v4.1' limits line
193
+ // claiming voice, cron, and messaging were not yet shipped. Phase
194
+ // v4.1.2-followup strips that line because cron / Telegram / 9 channels
195
+ // ALL shipped in v4.1.0. Users on the v4.1.0 / v4.1.1 install have this
196
+ // verbatim text on disk; silent-upgrade picks them up here.
197
+ `You are Aiden — a local-first AI agent built by Taracod.
198
+
199
+ Identity:
200
+ - You run on the user's machine, native Windows/Linux/macOS (not WSL2).
201
+ - You have 72 bundled skills + access to install more via skills.sh.
202
+ - You remember past sessions via persistent storage.
203
+ - You have 40 tools spanning files, browser, terminal, web, memory.
204
+
205
+ Voice:
206
+ - Direct. No fluff. Match the user's energy.
207
+ - Honest above all — if you didn't do something, say so. If you're not sure, say so.
208
+ - You never claim to "have run" a tool unless the trace shows it.
209
+
210
+ Behavior:
211
+ - Default to action over discussion. The user wants results.
212
+ - When asked who you are, identify as Aiden. Not "a large language model."
213
+ - When asked what you can do, mention specific skills/tools, not generic capabilities.
214
+ - If user mentions trading/NSE/markets, you have specialized skills for that.
215
+
216
+ <act_dont_ask>
217
+ When a request has an obvious default interpretation, act on it
218
+ immediately instead of asking for clarification. Examples:
219
+ - "play me a popular song" / "play X on youtube" → load skill_view(media-search)
220
+ and follow it. Substitute fuzzy phrases ("popular song") with a specific
221
+ chart-topper BEFORE searching, then open_url a /watch?v= URL once.
222
+ NEVER search verbatim "popular song" — that returns articles, not music.
223
+ - "what files are in my Downloads?" → file_list on Downloads. Don't ask
224
+ "which user?" — it's the current user.
225
+ - "is port 443 open?" → check this machine. Don't ask "open where?"
226
+ Only ask for clarification when the ambiguity genuinely changes which
227
+ tool you would call.
228
+ </act_dont_ask>
229
+
230
+ <prerequisite_checks>
231
+ Before acting, check whether prerequisite discovery, lookup, or
232
+ context-gathering steps are needed. If a step depends on output from a
233
+ prior step, resolve that dependency first. Don't skip prerequisite
234
+ steps just because the final action seems obvious.
235
+ </prerequisite_checks>
236
+
237
+ <missing_context>
238
+ If required context is missing, do NOT guess or hallucinate. Use the
239
+ appropriate lookup tool when missing information is retrievable
240
+ (file_read, file_list, web_search, fetch_url, session_search,
241
+ system_info). Ask a clarifying question ONLY when no tool can resolve
242
+ the ambiguity.
243
+ </missing_context>
244
+
245
+ <keep_going>
246
+ Work autonomously until the task is fully resolved. Don't stop with a
247
+ plan — execute it. Multi-step tasks (open browser → search → click
248
+ result; or list files → read each → summarise) are expected; chain
249
+ the tool calls within a single turn instead of returning halfway and
250
+ asking the user what to do next.
251
+ </keep_going>
252
+
187
253
  Limits:
188
254
  - You're a CLI agent in v4.0.0. No voice, no scheduled jobs, no messaging gateway yet — those are v4.1.
189
255
  - You can't bypass approval prompts for dangerous commands.
@@ -42,6 +42,7 @@ const box_1 = require("./box");
42
42
  const replyRenderer_1 = require("./replyRenderer");
43
43
  // Optional "Sources" footer when AIDEN_CITATIONS=1 (default off).
44
44
  const citationFooter_1 = require("./citationFooter");
45
+ const toolPreview_1 = require("./toolPreview");
45
46
  /**
46
47
  * Phase 26.2.7 — category emoji icons for the tool-row prefix when
47
48
  * `AIDEN_UI_ICONS=1` is set in the environment. Default OFF (the
@@ -393,13 +394,18 @@ class Display {
393
394
  const pill = (on, label, value) => `${dot(on)} ${lab(label)} ${val(value)}`;
394
395
  const providerOk = args.providerOk !== false;
395
396
  const modelValue = providerOk ? args.model : 'not configured';
396
- return (' ' +
397
- [
398
- pill(args.coreOnline, 'core', args.coreOnline ? 'online' : 'starting'),
399
- pill(true, 'mode', args.mode),
400
- pill(providerOk, 'model', modelValue),
401
- pill(args.memoryActive, 'memory', args.memoryActive ? 'active' : 'off'),
402
- ].join(' '));
397
+ const pills = [
398
+ pill(args.coreOnline, 'core', args.coreOnline ? 'online' : 'starting'),
399
+ pill(true, 'mode', args.mode),
400
+ pill(providerOk, 'model', modelValue),
401
+ pill(args.memoryActive, 'memory', args.memoryActive ? 'active' : 'off'),
402
+ ];
403
+ if (args.version) {
404
+ // Version pill: dot + value, no label (the `v` prefix is the label).
405
+ // Always-on dot — informational, not a health indicator.
406
+ pills.push(`${dot(true)} ${val(`v${args.version}`)}`);
407
+ }
408
+ return ' ' + pills.join(' ');
403
409
  }
404
410
  /**
405
411
  * Two-column block (Environment + Capabilities). Side-by-side when
@@ -794,11 +800,24 @@ class Display {
794
800
  };
795
801
  }
796
802
  /**
797
- * Pretty-print a tool call before it executes. Args are JSON-stringified
798
- * with a 200-char hard cap so megabyte arguments don't flood the screen.
803
+ * Pretty-print a tool call before it executes. Phase v4.1.2 first
804
+ * consults the `TOOL_PRIMARY_ARG` map in `toolPreview.ts` to render
805
+ * just the meaningful argument (e.g. `terminal: npm test`); falls
806
+ * back to the legacy full-JSON stringification (200-char hard cap)
807
+ * for tools that aren't in the map.
799
808
  */
800
809
  toolPreview(name, args) {
801
810
  const sk = this.skin;
811
+ const arrow = sk.getActive().glyphs?.arrow ?? '>';
812
+ // Phase v4.1.2: per-tool primary-arg preview.
813
+ const preview = (0, toolPreview_1.buildToolPreview)(name, args);
814
+ if (preview !== null) {
815
+ if (preview === '') {
816
+ return `${sk.applyColors(arrow, 'tool')} ${sk.applyColors(name, 'tool')}`;
817
+ }
818
+ return `${sk.applyColors(arrow, 'tool')} ${sk.applyColors(name, 'tool')} ${sk.applyColors(preview, 'muted')}`;
819
+ }
820
+ // Unknown tool — original behaviour (full JSON, 200-char cap).
802
821
  let serialized;
803
822
  try {
804
823
  serialized = JSON.stringify(args);
@@ -808,7 +827,6 @@ class Display {
808
827
  }
809
828
  if (serialized.length > 200)
810
829
  serialized = `${serialized.slice(0, 197)}...`;
811
- const arrow = sk.getActive().glyphs?.arrow ?? '>';
812
830
  return `${sk.applyColors(arrow, 'tool')} ${sk.applyColors(name, 'tool')} ${sk.applyColors(serialized, 'muted')}`;
813
831
  }
814
832
  /**
@@ -55,6 +55,8 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
55
55
  return (mod && mod.__esModule) ? mod : { "default": mod };
56
56
  };
57
57
  Object.defineProperty(exports, "__esModule", { value: true });
58
+ exports.renderSubsystemHealthSection = renderSubsystemHealthSection;
59
+ exports.renderSkillOutcomesSection = renderSkillOutcomesSection;
58
60
  exports.resolveBinaryPath = resolveBinaryPath;
59
61
  exports._resetBinaryResolutionCacheForTests = _resetBinaryResolutionCacheForTests;
60
62
  exports.buildProbeInvocation = buildProbeInvocation;
@@ -83,6 +85,104 @@ const license_1 = require("../../core/v4/license");
83
85
  const checkUpdate_1 = require("../../core/v4/update/checkUpdate");
84
86
  const box_1 = require("./box");
85
87
  const audioBackend_1 = require("../../core/voice/audioBackend");
88
+ /**
89
+ * Phase v4.1.2-slice3: render the Subsystem health section. Decision
90
+ * tree (per slice3 Phase 3 Q4):
91
+ * - registry undefined → render nothing (no live state to report)
92
+ * - all subsystems healthy → one-line green summary
93
+ * - any degradation → expand block with last-error per failed sub
94
+ *
95
+ * The Honesty layer is intentionally listed as "(not instrumented yet)"
96
+ * when the expanded block fires, because the audit determined the
97
+ * pure-pattern path has no I/O failure surface today.
98
+ */
99
+ function renderSubsystemHealthSection(registry) {
100
+ if (!registry)
101
+ return '';
102
+ const snaps = registry.snapshot();
103
+ if (snaps.length === 0)
104
+ return '';
105
+ const degraded = snaps.filter((s) => s.totalErrors > 0);
106
+ if (degraded.length === 0) {
107
+ return `\nSubsystem health: all green (${snaps.length} subsystems instrumented)\n`;
108
+ }
109
+ // Expanded form. Per-subsystem rows:
110
+ // ✓ name N calls, 0 errors
111
+ // ✗ name N calls, E errors (last <duration> ago: "message")
112
+ // - honesty (not instrumented yet)
113
+ const lines = ['\nSubsystem health'];
114
+ for (const s of snaps) {
115
+ const mark = s.totalErrors > 0 ? 'x' : 'ok';
116
+ const stats = `${s.totalCalls} call${s.totalCalls === 1 ? '' : 's'}, ${s.totalErrors} error${s.totalErrors === 1 ? '' : 's'}`;
117
+ if (s.lastError) {
118
+ const ago = humanAge(Date.now() - s.lastError.at.getTime());
119
+ const streak = s.lastError.consecutive > 1
120
+ ? ` (${s.lastError.consecutive} consecutive)`
121
+ : '';
122
+ lines.push(` [${mark}] ${s.subsystem.padEnd(16)} ${stats}${streak} (last ${ago} ago: "${s.lastError.message}")`);
123
+ }
124
+ else {
125
+ lines.push(` [${mark}] ${s.subsystem.padEnd(16)} ${stats}`);
126
+ }
127
+ }
128
+ // Slice3 audit decision: HonestyEnforcement was deliberately not
129
+ // instrumented (pure-pattern path has no failure surface). Surface
130
+ // that explicitly so users know the gap is known, not forgotten.
131
+ lines.push(` [-] honesty (not instrumented yet)`);
132
+ lines.push('');
133
+ return lines.join('\n');
134
+ }
135
+ function humanAge(ms) {
136
+ if (ms < 1000)
137
+ return `${ms}ms`;
138
+ if (ms < 60000)
139
+ return `${(ms / 1000).toFixed(0)}s`;
140
+ if (ms < 3600000)
141
+ return `${Math.floor(ms / 60000)}m`;
142
+ if (ms < 86400000)
143
+ return `${Math.floor(ms / 3600000)}h`;
144
+ return `${Math.floor(ms / 86400000)}d`;
145
+ }
146
+ /**
147
+ * Phase v4.1.2-slice4: render the Skill outcomes section. Per Q3
148
+ * decision: silent on empty state (no tracker, or no skills tracked
149
+ * yet) — doctor output for healthy systems stays short.
150
+ *
151
+ * Output (when not empty): top N skills sorted by load count, with
152
+ * total observations and success percentage. Last-error message
153
+ * shown for the one most-recently failing skill (cap one row of
154
+ * detail so the block stays compact).
155
+ */
156
+ function renderSkillOutcomesSection(tracker, topN = 5) {
157
+ if (!tracker)
158
+ return '';
159
+ const snaps = tracker.snapshot();
160
+ if (snaps.length === 0)
161
+ return '';
162
+ const lines = ['\nSkill outcomes (top ' + Math.min(topN, snaps.length) + ' by load count)'];
163
+ for (const s of snaps.slice(0, topN)) {
164
+ const attributed = s.toolSuccesses + s.toolFailures;
165
+ const rate = attributed === 0
166
+ ? '—'
167
+ : `${Math.round((s.toolSuccesses / attributed) * 100)}% success`;
168
+ const stats = `loaded ${s.loaded}, ${s.toolSuccesses} ok, ${s.toolFailures} err (${rate})`;
169
+ const last = s.lastUsed
170
+ ? ` last ${humanAge(Date.now() - new Date(s.lastUsed).getTime())} ago`
171
+ : '';
172
+ lines.push(` ${s.skillName.padEnd(32)} ${stats}${last}`);
173
+ }
174
+ // Spotlight the most-recent failure across all tracked skills so a
175
+ // single broken skill is visible without scanning every row.
176
+ const recentFailures = snaps
177
+ .filter((s) => s.lastError)
178
+ .sort((a, b) => new Date(b.lastError.at).getTime() - new Date(a.lastError.at).getTime());
179
+ if (recentFailures.length > 0) {
180
+ const f = recentFailures[0];
181
+ lines.push(` ↳ last failure: ${f.skillName} — "${f.lastError.message}"`);
182
+ }
183
+ lines.push('');
184
+ return lines.join('\n');
185
+ }
86
186
  const DEFAULT_TIMEOUT_MS = 3000;
87
187
  /** Wrap a promise with a timeout. The timed-out path resolves to the fallback result. */
88
188
  async function withTimeout(p, ms, fallback) {
@@ -818,6 +918,18 @@ async function runDoctorCli(opts) {
818
918
  process.stdout.write(renderProviderLivenessSection(results, summary));
819
919
  livenessFailed = summary.red > 0;
820
920
  }
921
+ // Phase v4.1.2-slice3: subsystem-health surface. Renders only when
922
+ // a registry was passed (in-REPL doctor); standalone CLI doctor has
923
+ // no live agent so the section is omitted.
924
+ const subsystemBlock = renderSubsystemHealthSection(opts?.subsystemHealthRegistry);
925
+ if (subsystemBlock)
926
+ process.stdout.write(subsystemBlock);
927
+ // Phase v4.1.2-slice4: skill-outcome surface. Same gating — only
928
+ // renders when a tracker was passed and has at least one observed
929
+ // skill. Standalone CLI invocations skip it.
930
+ const outcomesBlock = renderSkillOutcomesSection(opts?.skillOutcomeTracker);
931
+ if (outcomesBlock)
932
+ process.stdout.write(outcomesBlock);
821
933
  // Liveness reds count toward the overall exit code so CI / scripts
822
934
  // can `aiden doctor --providers && deploy`.
823
935
  process.exitCode = (report.passed && !livenessFailed) ? 0 : 1;
@@ -18,11 +18,12 @@
18
18
  * - `--providers` is opt-in. When the user types it we extend the
19
19
  * report with one liveness row per probe, then render a summary
20
20
  * line at the bottom.
21
- * - Tool-catalog validation is deliberately OUT of scope. Liveness
22
- * probes ship `tools: []` (see comment in checkProviderLiveness)
23
- * so one bad tool schema doesn't false-red every provider that
24
- * validates strictly. The eval-harness / registration-time schema
25
- * validator (v4.1.1 main) is the right home for that concern.
21
+ * - Tool-catalog validation is deliberately OUT of scope. The
22
+ * probe ships ONE hardcoded no-op tool (`probe_noop`) so the
23
+ * Codex backend accepts the request (it rejects empty `tools`),
24
+ * while user-registered tool schemas stay un-validated here. The
25
+ * eval-harness / registration-time schema validator (v4.1.1
26
+ * main) is the right home for that concern.
26
27
  *
27
28
  * Trust artifact:
28
29
  * - On failure we surface `err.message` VERBATIM (truncated to 200
@@ -31,6 +32,7 @@
31
32
  * prints the actual OpenAI reason, not a generic "provider failed."
32
33
  */
33
34
  Object.defineProperty(exports, "__esModule", { value: true });
35
+ exports.pickProbeModel = pickProbeModel;
34
36
  exports.enumerateConfiguredProviders = enumerateConfiguredProviders;
35
37
  exports.checkProviderLiveness = checkProviderLiveness;
36
38
  exports.runProviderLiveness = runProviderLiveness;
@@ -55,6 +57,28 @@ function truncate(s, max = ERROR_TRUNCATE_CHARS) {
55
57
  return s;
56
58
  return `${s.slice(0, max - 1)}…`;
57
59
  }
60
+ /**
61
+ * Phase v4.1.2-slice5: pick a probe-safe model id from the registry.
62
+ *
63
+ * Some providers list model slugs that only work for enterprise / CLI
64
+ * accounts. ChatGPT Plus is the canonical case: the registry's
65
+ * `modelIds[0]` is `gpt-5.1-codex-max`, which is rejected by the
66
+ * subscription-account Codex backend with
67
+ * `"The 'gpt-5.1-codex-max' model is not supported when using Codex
68
+ * with a ChatGPT account."` — even though real REPL chat on the same
69
+ * account works because the user has selected a non-Codex slug
70
+ * (`gpt-5.5`).
71
+ *
72
+ * Heuristic: skip any slug containing `-codex` (covers `-codex-max`,
73
+ * `-codex-mini`, plain `-codex` suffix variants). Falls back to
74
+ * `modelIds[0]` if every slug is Codex-flavoured. No provider id
75
+ * special-casing — the heuristic is shape-based so future-similar
76
+ * providers benefit too.
77
+ */
78
+ function pickProbeModel(entry) {
79
+ const safe = entry.modelIds.find((m) => !m.includes('-codex'));
80
+ return safe ?? entry.modelIds[0] ?? '';
81
+ }
58
82
  /**
59
83
  * Wrap a promise with a hard timeout. Resolves to the inner result on
60
84
  * success, throws a clearly-labelled `Error` on timeout. Cleans up the
@@ -91,7 +115,7 @@ async function enumerateConfiguredProviders(opts) {
91
115
  const out = [];
92
116
  for (const entry of Object.values(registry_1.PROVIDER_REGISTRY)) {
93
117
  // Every provider needs at least one model to probe against.
94
- const model = entry.modelIds[0];
118
+ const model = pickProbeModel(entry);
95
119
  if (!model) {
96
120
  out.push({
97
121
  entry,
@@ -199,11 +223,42 @@ async function checkProviderLiveness(provider, model, adapter, opts) {
199
223
  const start = Date.now();
200
224
  // Liveness probes "is this provider reachable + authenticated?".
201
225
  // Tool-catalog validation is a separate concern (eval harness,
202
- // v4.1.1 main). Sending tools: [] ensures one bad tool schema
203
- // doesn't false-red every provider that validates strictly.
226
+ // v4.1.1 main).
227
+ //
228
+ // Phase v4.1.2-slice5: the probe used to send `messages: [user]`
229
+ // only, with `tools: []`. That body 400s against the Codex backend
230
+ // for two reasons:
231
+ // 1. No system message → empty `instructions` field in the wire
232
+ // body. Codex rejects requests without `instructions` (same
233
+ // root cause as the eval-runner fix in 6535d531).
234
+ // 2. Empty tools array → the codex adapter omits `tools`,
235
+ // `tool_choice`, `parallel_tool_calls` from the wire body
236
+ // entirely. The Codex backend treats this as malformed.
237
+ //
238
+ // Fix: add a minimal one-line system message (collapses into
239
+ // `instructions`) and one hand-crafted no-op tool. The probe tool
240
+ // is hardcoded with a conservative JSON Schema
241
+ // (`additionalProperties: false`) so strict validators accept it.
242
+ // The "one bad tool schema false-reds everyone" concern from the
243
+ // pre-slice5 comment applied to USER tools; this tool is internal.
204
244
  const input = {
205
- messages: [{ role: 'user', content: 'ping' }],
206
- tools: [],
245
+ messages: [
246
+ {
247
+ role: 'system',
248
+ content: 'You are an availability probe. Respond with a single word.',
249
+ },
250
+ { role: 'user', content: 'ping' },
251
+ ],
252
+ tools: [
253
+ {
254
+ name: 'probe_noop',
255
+ description: 'Probe placeholder. Do not call — the probe ignores any tool calls.',
256
+ inputSchema: {
257
+ type: 'object',
258
+ properties: {},
259
+ },
260
+ },
261
+ ],
207
262
  maxTokens: PROBE_MAX_TOKENS,
208
263
  };
209
264
  try {