aiden-runtime 4.1.1 → 4.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +78 -26
  2. package/dist/cli/v4/aidenCLI.js +169 -9
  3. package/dist/cli/v4/callbacks.js +20 -2
  4. package/dist/cli/v4/chatSession.js +644 -16
  5. package/dist/cli/v4/commands/auth.js +6 -3
  6. package/dist/cli/v4/commands/doctor.js +23 -27
  7. package/dist/cli/v4/commands/help.js +4 -0
  8. package/dist/cli/v4/commands/index.js +10 -1
  9. package/dist/cli/v4/commands/model.js +30 -1
  10. package/dist/cli/v4/commands/reloadSoul.js +37 -0
  11. package/dist/cli/v4/commands/update.js +102 -0
  12. package/dist/cli/v4/defaultSoul.js +68 -2
  13. package/dist/cli/v4/display/capabilityCard.js +135 -0
  14. package/dist/cli/v4/display/sessionEndCard.js +127 -0
  15. package/dist/cli/v4/display/toolTrail.js +172 -0
  16. package/dist/cli/v4/display.js +492 -142
  17. package/dist/cli/v4/doctor.js +472 -58
  18. package/dist/cli/v4/doctorLiveness.js +65 -10
  19. package/dist/cli/v4/promotionPrompt.js +332 -0
  20. package/dist/cli/v4/providerBootSelector.js +144 -0
  21. package/dist/cli/v4/replyRenderer.js +311 -20
  22. package/dist/cli/v4/sessionSummaryGate.js +66 -0
  23. package/dist/cli/v4/skinEngine.js +14 -3
  24. package/dist/cli/v4/toolPreview.js +153 -0
  25. package/dist/core/tools/nowPlaying.js +7 -15
  26. package/dist/core/v4/aidenAgent.js +91 -29
  27. package/dist/core/v4/capabilities.js +89 -0
  28. package/dist/core/v4/contextCompressor.js +25 -8
  29. package/dist/core/v4/distillationIndex.js +167 -0
  30. package/dist/core/v4/distillationStore.js +98 -0
  31. package/dist/core/v4/logger/logger.js +40 -9
  32. package/dist/core/v4/promotionCandidates.js +234 -0
  33. package/dist/core/v4/promptBuilder.js +145 -1
  34. package/dist/core/v4/sessionDistiller.js +452 -0
  35. package/dist/core/v4/skillMining/skillMiner.js +43 -6
  36. package/dist/core/v4/skillOutcomeTracker.js +323 -0
  37. package/dist/core/v4/subsystemHealth.js +143 -0
  38. package/dist/core/v4/toolRegistry.js +16 -1
  39. package/dist/core/v4/update/executeInstall.js +233 -0
  40. package/dist/core/version.js +1 -1
  41. package/dist/moat/memoryGuard.js +111 -0
  42. package/dist/moat/plannerGuard.js +19 -0
  43. package/dist/moat/skillTeacher.js +14 -5
  44. package/dist/providers/v4/chatCompletionsAdapter.js +9 -0
  45. package/dist/providers/v4/errors.js +112 -4
  46. package/dist/providers/v4/modelDefaults.js +65 -0
  47. package/dist/providers/v4/registry.js +9 -2
  48. package/dist/providers/v4/runtimeResolver.js +6 -0
  49. package/dist/tools/v4/index.js +80 -1
  50. package/dist/tools/v4/memory/memoryRemove.js +57 -2
  51. package/dist/tools/v4/memory/sessionSummary.js +151 -0
  52. package/dist/tools/v4/sessions/recallSession.js +177 -0
  53. package/dist/tools/v4/sessions/sessionSearch.js +5 -1
  54. package/dist/tools/v4/system/_psHelpers.js +123 -0
  55. package/dist/tools/v4/system/aidenSelfUpdate.js +162 -0
  56. package/dist/tools/v4/system/appClose.js +79 -0
  57. package/dist/tools/v4/system/appInput.js +154 -0
  58. package/dist/tools/v4/system/appLaunch.js +218 -0
  59. package/dist/tools/v4/system/clipboardRead.js +54 -0
  60. package/dist/tools/v4/system/clipboardWrite.js +84 -0
  61. package/dist/tools/v4/system/mediaKey.js +109 -0
  62. package/dist/tools/v4/system/mediaSessions.js +163 -0
  63. package/dist/tools/v4/system/mediaTransport.js +211 -0
  64. package/dist/tools/v4/system/osProcessList.js +99 -0
  65. package/dist/tools/v4/system/screenshot.js +106 -0
  66. package/dist/tools/v4/system/volumeSet.js +157 -0
  67. package/package.json +4 -1
  68. package/skills/system_control.md +185 -69
@@ -0,0 +1,323 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * core/v4/skillOutcomeTracker.ts — Phase v4.1.2-slice4.
10
+ *
11
+ * Track whether skills actually succeed when loaded. The mining-time
12
+ * confidence score (skillMining/skillMiner.ts:computeConfidence) is
13
+ * set once and never updated — skills that consistently produce bad
14
+ * tool-call traces stay confident; skills that consistently work well
15
+ * never accumulate evidence of that.
16
+ *
17
+ * Mechanism:
18
+ * - When `skill_view` fires (the model just received a skill body),
19
+ * open an attribution WINDOW for that skill: the next N tool calls
20
+ * are attributed as that skill's downstream outcomes.
21
+ * - Tool successes / failures attributed to the skill (counter-bump).
22
+ * - Another `skill_view` supersedes the window (last-write-wins).
23
+ * - Window closes after N tool calls or when superseded.
24
+ *
25
+ * What this is NOT:
26
+ * - Not a quality judge. We don't ask an LLM "did that skill help?".
27
+ * Tool success is a proxy — a noisy one — but it's deterministic
28
+ * and free. Per slice4 Phase 3 decision tree: Option A.
29
+ * - Not a promotion engine. Surfaced via `aiden doctor`; the existing
30
+ * SkillTeacher.flaggedSkillNames() flagging path stays dead (it
31
+ * would change SkillLoader behavior — separate decision).
32
+ *
33
+ * Persistence:
34
+ * `<skillsDir>/.skill-outcomes.json` — sidecar, atomic write
35
+ * (tmp + rename), best-effort failure handling via slice3
36
+ * SubsystemHealthTracker. Lazy hydrate on first `onTool` call so
37
+ * sessions that never load a skill pay zero disk I/O.
38
+ *
39
+ * Status: PHASE v4.1.2-slice4.
40
+ */
41
+ var __importDefault = (this && this.__importDefault) || function (mod) {
42
+ return (mod && mod.__esModule) ? mod : { "default": mod };
43
+ };
44
+ Object.defineProperty(exports, "__esModule", { value: true });
45
+ exports.SkillOutcomeTracker = exports.ATTRIBUTION_WINDOW = void 0;
46
+ exports.isFailure = isFailure;
47
+ const node_fs_1 = require("node:fs");
48
+ const node_path_1 = __importDefault(require("node:path"));
49
+ /**
50
+ * Attribution window size — number of non-skill_view tool calls
51
+ * following a `skill_view` whose outcomes are attributed to that
52
+ * skill. Hard-coded per slice4 Phase 3 Q1: don't add config knobs
53
+ * we won't tune. If empirical signal shows 5 is wrong, change it
54
+ * here.
55
+ */
56
+ exports.ATTRIBUTION_WINDOW = 5;
57
+ /** Cap for `lastError.message` — keep snapshots small. */
58
+ const ERROR_MESSAGE_CAP = 200;
59
+ class SkillOutcomeTracker {
60
+ /**
61
+ * @param persistPath Absolute path to the sidecar JSON file.
62
+ * @param healthTracker Optional slice3 tracker for persist failures.
63
+ */
64
+ constructor(persistPath, healthTracker) {
65
+ this.persistPath = persistPath;
66
+ this.healthTracker = healthTracker;
67
+ /** Currently-loaded skill (last skill_view, while its window is open). */
68
+ this.currentSkill = null;
69
+ /** Tool calls remaining in the current attribution window. */
70
+ this.remaining = 0;
71
+ /** In-memory outcomes, keyed by skill name. Hydrated lazily. */
72
+ this.outcomes = new Map();
73
+ /** True once we've attempted hydration from disk. */
74
+ this.hydrated = false;
75
+ /** Pending persist requested while one is in flight. */
76
+ this.persistQueued = false;
77
+ }
78
+ /**
79
+ * Unified hook compatible with `AidenAgentOptions.onToolCall`.
80
+ * The agent fires it as `(call, 'before')` then `(call, 'after', result)`.
81
+ */
82
+ onTool(call, phase, result) {
83
+ if (phase === 'before')
84
+ this.onToolBefore(call);
85
+ else
86
+ this.onToolAfter(call, result);
87
+ }
88
+ /** Called before each tool. Opens / supersedes the attribution window. */
89
+ onToolBefore(call) {
90
+ if (call.name !== 'skill_view')
91
+ return;
92
+ const name = extractSkillName(call.arguments);
93
+ if (!name)
94
+ return;
95
+ // Hydrate synchronously so the bump below merges with any prior
96
+ // persisted state. The file is small (one row per ever-loaded
97
+ // skill), so the one-time sync read is cheap and avoids the
98
+ // ordering hazard of awaiting in an inherently sync hook.
99
+ this.ensureHydratedSync();
100
+ this.currentSkill = name;
101
+ this.remaining = exports.ATTRIBUTION_WINDOW;
102
+ this.bump(name, (o) => {
103
+ o.loaded += 1;
104
+ o.lastUsed = new Date().toISOString();
105
+ });
106
+ void this.queuePersist();
107
+ }
108
+ /**
109
+ * Called after each tool. Attributes success/failure to the currently
110
+ * open window. `skill_view` itself does NOT attribute back to itself
111
+ * (the window's purpose is to grade DOWNSTREAM tools).
112
+ */
113
+ onToolAfter(call, result) {
114
+ if (call.name === 'skill_view')
115
+ return;
116
+ if (!this.currentSkill || this.remaining <= 0)
117
+ return;
118
+ const skill = this.currentSkill;
119
+ const failed = isFailure(result);
120
+ this.bump(skill, (o) => {
121
+ if (failed) {
122
+ o.toolFailures += 1;
123
+ const msg = extractErrorMessage(result);
124
+ if (msg) {
125
+ o.lastError = {
126
+ message: truncate(msg, ERROR_MESSAGE_CAP),
127
+ at: new Date().toISOString(),
128
+ };
129
+ }
130
+ }
131
+ else {
132
+ o.toolSuccesses += 1;
133
+ }
134
+ });
135
+ this.remaining -= 1;
136
+ if (this.remaining === 0)
137
+ this.currentSkill = null;
138
+ void this.queuePersist();
139
+ }
140
+ /**
141
+ * Read-only snapshot for `aiden doctor`. Sorted by `loaded` descending
142
+ * so the most-used skills surface first.
143
+ */
144
+ snapshot() {
145
+ const arr = Array.from(this.outcomes.values());
146
+ arr.sort((a, b) => b.loaded - a.loaded);
147
+ return arr;
148
+ }
149
+ /** Total skills with at least one observation. */
150
+ size() {
151
+ return this.outcomes.size;
152
+ }
153
+ // ── private ───────────────────────────────────────────────────────
154
+ bump(skillName, mutator) {
155
+ const cur = this.outcomes.get(skillName) ?? {
156
+ skillName,
157
+ loaded: 0,
158
+ toolSuccesses: 0,
159
+ toolFailures: 0,
160
+ };
161
+ mutator(cur);
162
+ this.outcomes.set(skillName, cur);
163
+ }
164
+ /**
165
+ * Synchronous disk-hydration. Called once per instance lifetime on
166
+ * the first `skill_view` observation. The sidecar is small (one row
167
+ * per ever-loaded skill) so a sync read is cheap and removes the
168
+ * race between async hydration and immediately-following bumps.
169
+ *
170
+ * Failures (parse, EACCES) get recorded into the health tracker —
171
+ * doctor surfaces them. ENOENT (no file yet) is the common case on
172
+ * first run and stays silent.
173
+ */
174
+ ensureHydratedSync() {
175
+ if (this.hydrated)
176
+ return;
177
+ this.hydrated = true;
178
+ try {
179
+ if (!(0, node_fs_1.existsSync)(this.persistPath))
180
+ return;
181
+ const raw = (0, node_fs_1.readFileSync)(this.persistPath, 'utf-8');
182
+ const parsed = JSON.parse(raw);
183
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
184
+ for (const [name, val] of Object.entries(parsed)) {
185
+ if (val && typeof val === 'object' && !Array.isArray(val)) {
186
+ const v = val;
187
+ this.outcomes.set(name, {
188
+ skillName: v.skillName ?? name,
189
+ loaded: Number(v.loaded ?? 0),
190
+ toolSuccesses: Number(v.toolSuccesses ?? 0),
191
+ toolFailures: Number(v.toolFailures ?? 0),
192
+ ...(v.lastUsed ? { lastUsed: v.lastUsed } : {}),
193
+ ...(v.lastError ? { lastError: v.lastError } : {}),
194
+ });
195
+ }
196
+ }
197
+ }
198
+ }
199
+ catch (err) {
200
+ this.healthTracker?.recordFailure(err);
201
+ }
202
+ }
203
+ /**
204
+ * Test/shutdown seam. Awaits any in-flight or queued persist so the
205
+ * caller knows the sidecar is on disk. The agent runtime doesn't
206
+ * need to call this (writes are durable enough via the coalescing
207
+ * queue); tests use it to deterministically wait for I/O.
208
+ */
209
+ async flush() {
210
+ while (this.persisting) {
211
+ await this.persisting;
212
+ }
213
+ }
214
+ /**
215
+ * Coalescing persist. If a write is in flight, queue exactly one
216
+ * follow-up; further requests collapse into that single follow-up.
217
+ * Keeps disk I/O cheap when many tool calls happen in a burst.
218
+ */
219
+ queuePersist() {
220
+ if (this.persisting) {
221
+ this.persistQueued = true;
222
+ return this.persisting;
223
+ }
224
+ this.persisting = this.persist()
225
+ .finally(() => {
226
+ const wasQueued = this.persistQueued;
227
+ this.persistQueued = false;
228
+ this.persisting = undefined;
229
+ if (wasQueued) {
230
+ // Fire-and-forget the queued follow-up.
231
+ void this.queuePersist();
232
+ }
233
+ });
234
+ return this.persisting;
235
+ }
236
+ async persist() {
237
+ try {
238
+ await node_fs_1.promises.mkdir(node_path_1.default.dirname(this.persistPath), { recursive: true });
239
+ const payload = {};
240
+ for (const [k, v] of this.outcomes)
241
+ payload[k] = v;
242
+ const tmp = `${this.persistPath}.tmp`;
243
+ await node_fs_1.promises.writeFile(tmp, JSON.stringify(payload, null, 2) + '\n', 'utf-8');
244
+ await node_fs_1.promises.rename(tmp, this.persistPath);
245
+ this.healthTracker?.recordSuccess();
246
+ }
247
+ catch (err) {
248
+ this.healthTracker?.recordFailure(err);
249
+ // Best-effort: clean up tmp file if it exists. Ignore errors.
250
+ try {
251
+ await node_fs_1.promises.unlink(`${this.persistPath}.tmp`);
252
+ }
253
+ catch { /* ignore */ }
254
+ }
255
+ }
256
+ }
257
+ exports.SkillOutcomeTracker = SkillOutcomeTracker;
258
+ // ── private helpers ───────────────────────────────────────────────────
259
+ function extractSkillName(args) {
260
+ if (!args || typeof args !== 'object')
261
+ return '';
262
+ const v = args.name;
263
+ return typeof v === 'string' ? v.trim() : '';
264
+ }
265
+ /**
266
+ * Failure classification rules (per slice4 Phase 3 explicit decision):
267
+ * - result.success === false → failure
268
+ * - result.error truthy → failure
269
+ * - everything else → success
270
+ *
271
+ * "Tool succeeded but result was wrong" is NOT classifiable without an
272
+ * LLM judge and is intentionally out of scope.
273
+ */
274
+ function isFailure(result) {
275
+ if (!result)
276
+ return false;
277
+ // The ToolCallResult shape from providers/v4/types is { id, name, result }.
278
+ // Tool implementations conventionally return `{ success: boolean, error?, ... }`
279
+ // inside the `result` payload — both are surveyed.
280
+ const top = result;
281
+ if (top.error)
282
+ return true;
283
+ if (top.success === false)
284
+ return true;
285
+ const inner = result.result;
286
+ if (inner && typeof inner === 'object') {
287
+ const i = inner;
288
+ if (i.error)
289
+ return true;
290
+ if (i.success === false)
291
+ return true;
292
+ }
293
+ return false;
294
+ }
295
+ function extractErrorMessage(result) {
296
+ if (!result)
297
+ return '';
298
+ const top = result;
299
+ if (typeof top.error === 'string')
300
+ return top.error;
301
+ if (top.error && typeof top.error === 'object') {
302
+ const m = top.error.message;
303
+ if (typeof m === 'string')
304
+ return m;
305
+ }
306
+ const inner = result.result;
307
+ if (inner && typeof inner === 'object') {
308
+ const i = inner;
309
+ if (typeof i.error === 'string')
310
+ return i.error;
311
+ if (i.error && typeof i.error === 'object') {
312
+ const m = i.error.message;
313
+ if (typeof m === 'string')
314
+ return m;
315
+ }
316
+ }
317
+ return '';
318
+ }
319
+ function truncate(s, max) {
320
+ if (s.length <= max)
321
+ return s;
322
+ return s.slice(0, max - 3) + '...';
323
+ }
@@ -0,0 +1,143 @@
1
+ "use strict";
2
+ /**
3
+ * Copyright (c) 2026 Shiva Deore (Taracod).
4
+ * Licensed under AGPL-3.0. See LICENSE for details.
5
+ *
6
+ * Aiden — local-first agent.
7
+ */
8
+ /**
9
+ * core/v4/subsystemHealth.ts — Phase v4.1.2-slice3.
10
+ *
11
+ * Lightweight in-process telemetry for the silent-failure layers.
12
+ * Four subsystems (ContextCompressor, SkillTeacher, SkillMiner,
13
+ * Logger) historically caught errors and continued without
14
+ * surfacing them — masking real bugs that were diagnosable only
15
+ * after manual instrumentation. This module is the surface.
16
+ *
17
+ * Design (decision tree from slice3 Phase 3):
18
+ * Option C — subsystem-owned state object, optionally registered
19
+ * with a shared registry. The registry is constructor-injected
20
+ * (no singleton — singletons leak state between parallel tests),
21
+ * and every record op is O(1) and side-effect-free (no I/O, no
22
+ * log writes, no recursion through the Logger we are tracking).
23
+ *
24
+ * Surface:
25
+ * - `SubsystemHealth` — read-only snapshot shape doctor renders
26
+ * - `SubsystemHealthTracker` — per-subsystem owned counter
27
+ * - `SubsystemHealthRegistry`— optional aggregator AidenAgent owns
28
+ *
29
+ * Subsystems may operate without a tracker (back-compat); when a
30
+ * tracker is wired they call `recordSuccess()` / `recordFailure(err)`
31
+ * at the appropriate points. The registry is read by `aiden doctor`
32
+ * via the AidenAgent public field.
33
+ */
34
+ Object.defineProperty(exports, "__esModule", { value: true });
35
+ exports.SubsystemHealthTracker = void 0;
36
+ exports.createSubsystemHealthRegistry = createSubsystemHealthRegistry;
37
+ /**
38
+ * Per-subsystem health counter. One instance per subsystem; cheap
39
+ * to construct (no I/O, no allocations beyond the counter object).
40
+ *
41
+ * Subsystems hold a private tracker (or undefined for back-compat)
42
+ * and call `recordSuccess()` / `recordFailure(err)` from their
43
+ * critical paths. The tracker is registered with the registry at
44
+ * construction; doctor reads the snapshot lazily.
45
+ */
46
+ class SubsystemHealthTracker {
47
+ /**
48
+ * @param subsystem Stable id rendered by doctor. Prefer kebab-case
49
+ * ('compressor', 'skill-teacher', 'logger:file-sink').
50
+ */
51
+ constructor(subsystem) {
52
+ this.subsystem = subsystem;
53
+ this._totalCalls = 0;
54
+ this._totalErrors = 0;
55
+ this._consecutive = 0;
56
+ }
57
+ /** O(1): bump call counter, reset consecutive-failure streak. */
58
+ recordSuccess() {
59
+ this._totalCalls += 1;
60
+ this._consecutive = 0;
61
+ }
62
+ /**
63
+ * O(1): bump call + error counters, update lastError with a
64
+ * length-capped message. Never logs (would recurse through the
65
+ * Logger we are tracking) and never writes to disk.
66
+ */
67
+ recordFailure(err) {
68
+ this._totalCalls += 1;
69
+ this._totalErrors += 1;
70
+ this._consecutive += 1;
71
+ const raw = err instanceof Error ? err.message
72
+ : typeof err === 'string' ? err
73
+ : safeStringify(err);
74
+ this._lastError = {
75
+ message: truncate(raw, 200),
76
+ at: new Date(),
77
+ };
78
+ }
79
+ /** Render the current state. Doctor invokes this on demand. */
80
+ snapshot() {
81
+ const snap = {
82
+ subsystem: this.subsystem,
83
+ totalCalls: this._totalCalls,
84
+ totalErrors: this._totalErrors,
85
+ };
86
+ if (this._lastError) {
87
+ snap.lastError = {
88
+ message: this._lastError.message,
89
+ at: this._lastError.at,
90
+ consecutive: this._consecutive,
91
+ };
92
+ }
93
+ return snap;
94
+ }
95
+ }
96
+ exports.SubsystemHealthTracker = SubsystemHealthTracker;
97
+ /** Build a fresh registry. No I/O; cheap. */
98
+ function createSubsystemHealthRegistry() {
99
+ const readers = new Map();
100
+ return {
101
+ register(subsystem, reader) {
102
+ readers.set(subsystem, reader);
103
+ },
104
+ snapshot() {
105
+ const out = [];
106
+ for (const reader of readers.values()) {
107
+ try {
108
+ const v = reader();
109
+ if (Array.isArray(v))
110
+ out.push(...v);
111
+ else
112
+ out.push(v);
113
+ }
114
+ catch {
115
+ // Reader threw — skip it. Telemetry must never break doctor.
116
+ }
117
+ }
118
+ return out;
119
+ },
120
+ reset() {
121
+ readers.clear();
122
+ },
123
+ };
124
+ }
125
+ // ── private helpers ───────────────────────────────────────────────────
126
+ function truncate(s, max) {
127
+ if (s.length <= max)
128
+ return s;
129
+ return s.slice(0, max - 3) + '...';
130
+ }
131
+ function safeStringify(v) {
132
+ // `JSON.stringify(undefined)` returns the value `undefined`, not the
133
+ // string "undefined" — guard so the downstream length-cap doesn't
134
+ // crash. Symbols, functions, and circular objects also need a
135
+ // String() fallback.
136
+ try {
137
+ const out = JSON.stringify(v);
138
+ return typeof out === 'string' ? out : String(v);
139
+ }
140
+ catch {
141
+ return String(v);
142
+ }
143
+ }
@@ -154,7 +154,22 @@ class ToolRegistry {
154
154
  }
155
155
  try {
156
156
  const result = await handler.execute(args, context);
157
- return { id: call.id, name: call.name, result };
157
+ // v4.1.3-repl-polish: lift `degraded` + `degradedReason` from the
158
+ // handler's inner result to the outer ToolCallResult so the CLI
159
+ // trail row can render the partial-yellow state. Tools opt in by
160
+ // setting these on the object they return; without this lift the
161
+ // flags would sit on `out.result.degraded` where callbacks.ts
162
+ // can't see them. Strict typeof checks avoid promoting truthy-
163
+ // but-wrong-shape junk (numbers, strings, nested objects).
164
+ const inner = result;
165
+ const out = { id: call.id, name: call.name, result };
166
+ if (typeof inner?.degraded === 'boolean' && inner.degraded) {
167
+ out.degraded = true;
168
+ if (typeof inner.degradedReason === 'string') {
169
+ out.degradedReason = inner.degradedReason;
170
+ }
171
+ }
172
+ return out;
158
173
  }
159
174
  catch (err) {
160
175
  const message = err instanceof Error ? err.message : String(err);