aiden-runtime 4.1.0 → 4.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -33
- package/dist/cli/v4/aidenCLI.js +162 -11
- package/dist/cli/v4/callbacks.js +5 -2
- package/dist/cli/v4/chatSession.js +525 -15
- package/dist/cli/v4/commands/auth.js +6 -3
- package/dist/cli/v4/commands/help.js +4 -0
- package/dist/cli/v4/commands/index.js +10 -1
- package/dist/cli/v4/commands/reloadSoul.js +37 -0
- package/dist/cli/v4/commands/update.js +102 -0
- package/dist/cli/v4/defaultSoul.js +68 -2
- package/dist/cli/v4/display.js +28 -10
- package/dist/cli/v4/doctor.js +173 -1
- package/dist/cli/v4/doctorLiveness.js +384 -0
- package/dist/cli/v4/promotionPrompt.js +202 -0
- package/dist/cli/v4/providerBootSelector.js +144 -0
- package/dist/cli/v4/sessionSummaryGate.js +66 -0
- package/dist/cli/v4/toolPreview.js +139 -0
- package/dist/core/v4/aidenAgent.js +91 -29
- package/dist/core/v4/capabilities.js +89 -0
- package/dist/core/v4/contextCompressor.js +25 -8
- package/dist/core/v4/distillationIndex.js +167 -0
- package/dist/core/v4/distillationStore.js +98 -0
- package/dist/core/v4/logger/logger.js +40 -9
- package/dist/core/v4/promotionCandidates.js +234 -0
- package/dist/core/v4/promptBuilder.js +145 -1
- package/dist/core/v4/sessionDistiller.js +405 -0
- package/dist/core/v4/skillMining/extractorPrompt.js +28 -21
- package/dist/core/v4/skillMining/proposalBuilder.js +3 -2
- package/dist/core/v4/skillMining/skillMiner.js +43 -6
- package/dist/core/v4/skillOutcomeTracker.js +323 -0
- package/dist/core/v4/subsystemHealth.js +143 -0
- package/dist/core/v4/update/executeInstall.js +233 -0
- package/dist/core/version.js +1 -1
- package/dist/moat/dangerousPatterns.js +1 -1
- package/dist/moat/memoryGuard.js +111 -0
- package/dist/moat/skillTeacher.js +14 -5
- package/dist/providers/v4/chatCompletionsAdapter.js +9 -0
- package/dist/providers/v4/codexResponsesAdapter.js +7 -2
- package/dist/providers/v4/errors.js +67 -1
- package/dist/providers/v4/modelDefaults.js +65 -0
- package/dist/providers/v4/ollamaPromptToolsAdapter.js +9 -2
- package/dist/providers/v4/registry.js +9 -2
- package/dist/providers/v4/runtimeResolver.js +6 -0
- package/dist/tools/v4/index.js +57 -1
- package/dist/tools/v4/memory/memoryRemove.js +57 -2
- package/dist/tools/v4/memory/sessionSummary.js +151 -0
- package/dist/tools/v4/sessions/recallSession.js +163 -0
- package/dist/tools/v4/sessions/sessionSearch.js +5 -1
- package/dist/tools/v4/subagent/subagentFanout.js +24 -0
- package/dist/tools/v4/system/_psHelpers.js +55 -0
- package/dist/tools/v4/system/aidenSelfUpdate.js +162 -0
- package/dist/tools/v4/system/appClose.js +79 -0
- package/dist/tools/v4/system/appLaunch.js +92 -0
- package/dist/tools/v4/system/clipboardRead.js +54 -0
- package/dist/tools/v4/system/clipboardWrite.js +84 -0
- package/dist/tools/v4/system/mediaKey.js +78 -0
- package/dist/tools/v4/system/osProcessList.js +99 -0
- package/dist/tools/v4/system/screenshot.js +106 -0
- package/dist/tools/v4/system/volumeSet.js +157 -0
- package/package.json +4 -1
- package/skills/system_control.md +135 -69
|
@@ -38,11 +38,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
38
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
39
|
exports.PromptBuilder = void 0;
|
|
40
40
|
exports.shouldInjectLlama33ToolHint = shouldInjectLlama33ToolHint;
|
|
41
|
+
exports.shouldInjectExecutionDiscipline = shouldInjectExecutionDiscipline;
|
|
41
42
|
const node_fs_1 = require("node:fs");
|
|
42
43
|
const node_os_1 = __importDefault(require("node:os"));
|
|
43
44
|
// When SOUL.md is missing or whitespace-only the bundled default takes
|
|
44
45
|
// over so a fresh install still has a working identity.
|
|
45
46
|
const defaultSoul_1 = require("../../cli/v4/defaultSoul");
|
|
47
|
+
// Phase v4.1.2-followup: runtime-injected version + capabilities slot.
|
|
48
|
+
const capabilities_1 = require("./capabilities");
|
|
46
49
|
// ── Section header / sentinel string contract ─────────────────────────
|
|
47
50
|
//
|
|
48
51
|
// Every literal here is part of the API contract pinned by tests. Header
|
|
@@ -59,6 +62,74 @@ const NOTE_USER_LIVE = '[System note: Treat as live identity, not past conversat
|
|
|
59
62
|
const NOTE_MEMORY_LIVE = '[System note: Treat as live working memory, not past conversation.]';
|
|
60
63
|
const SKILLS_LOAD_NOTE = 'You MUST load it first via the `skill_view` tool before invoking ' +
|
|
61
64
|
'the underlying capability. Skills carry the procedure the tools alone don\'t.';
|
|
65
|
+
/**
|
|
66
|
+
* Phase v4.1.2 alive-core: when the user has authored a real SOUL.md
|
|
67
|
+
* (not the bundled default), prepend a one-line embodiment directive
|
|
68
|
+
* to its content. The directive tells the model to *be* the identity,
|
|
69
|
+
* not narrate about it — closes the most common "stiff generic reply"
|
|
70
|
+
* failure mode where the model paraphrases SOUL.md back at the user.
|
|
71
|
+
*
|
|
72
|
+
* Intentionally suppressed when the identity slot falls back to
|
|
73
|
+
* DEFAULT_SOUL_MD: that text is generic by design and the directive
|
|
74
|
+
* would coach the model to perform a flat persona.
|
|
75
|
+
*/
|
|
76
|
+
const EMBODIMENT_DIRECTIVE = 'Embody this identity and tone. Speak as Aiden, not about Aiden. ' +
|
|
77
|
+
'Avoid generic, stiff replies.';
|
|
78
|
+
/**
|
|
79
|
+
* Phase v4.1.2 alive-core: tool-conditional guidance blocks. Each one
|
|
80
|
+
* is injected only when the corresponding toolset tag is in
|
|
81
|
+
* `opts.toolsetsLoaded`. Replaces the "fixed slot order regardless of
|
|
82
|
+
* capability" assumption — persona shape-shifts per available
|
|
83
|
+
* capability (prior-art pattern surfaced during v4.2 recon).
|
|
84
|
+
*
|
|
85
|
+
* Key match strings:
|
|
86
|
+
* - 'memory' → MEMORY_GUIDANCE
|
|
87
|
+
* - 'session-search' → SESSION_SEARCH_GUIDANCE
|
|
88
|
+
* - 'skills' → SKILLS_GUIDANCE
|
|
89
|
+
*
|
|
90
|
+
* Match the strings in `ToolHandler.toolset` on the registered tools
|
|
91
|
+
* (tools/v4/memory/*.ts ships `toolset: 'memory'`,
|
|
92
|
+
* tools/v4/sessions/sessionSearch.ts ships `toolset: 'session-search'`,
|
|
93
|
+
* skill tools ship `toolset: 'skills'`).
|
|
94
|
+
*/
|
|
95
|
+
const MEMORY_GUIDANCE = [
|
|
96
|
+
'## Persistent memory',
|
|
97
|
+
'',
|
|
98
|
+
'You have persistent memory across sessions. Save durable facts using `memory_add`:',
|
|
99
|
+
'user preferences, environment details, stable conventions. Memory is injected into',
|
|
100
|
+
'every turn; keep it compact and focused on facts that will still matter later.',
|
|
101
|
+
'Prioritize what reduces future user steering.',
|
|
102
|
+
].join('\n');
|
|
103
|
+
const SESSION_SEARCH_GUIDANCE = [
|
|
104
|
+
'## Session recall',
|
|
105
|
+
'',
|
|
106
|
+
'When the user references something from a past conversation or you suspect',
|
|
107
|
+
'relevant cross-session context exists, use `session_search` to recall it before',
|
|
108
|
+
'asking them to repeat themselves.',
|
|
109
|
+
].join('\n');
|
|
110
|
+
const SKILLS_GUIDANCE = [
|
|
111
|
+
'## Skill upkeep',
|
|
112
|
+
'',
|
|
113
|
+
'After completing a complex task (5+ tool calls), fixing a tricky error, or',
|
|
114
|
+
'discovering a non-trivial workflow, save it as a skill so you can reuse it next',
|
|
115
|
+
'time. When using an existing skill and finding it outdated, patch it immediately',
|
|
116
|
+
'— don\'t wait to be asked.',
|
|
117
|
+
].join('\n');
|
|
118
|
+
/**
|
|
119
|
+
* Phase v4.1.2 alive-core: execution-discipline prose. Counters the
|
|
120
|
+
* "I'll run the tests" → no tool call → end-of-turn failure mode by
|
|
121
|
+
* making the contract explicit. Injected when
|
|
122
|
+
* `shouldInjectExecutionDiscipline(modelId)` is true (currently always).
|
|
123
|
+
*/
|
|
124
|
+
const EXECUTION_DISCIPLINE_PROSE = [
|
|
125
|
+
'## Tool use enforcement',
|
|
126
|
+
'',
|
|
127
|
+
'When you say you will perform an action ("I\'ll run the tests", "let me check the',
|
|
128
|
+
'file"), you MUST immediately make the corresponding tool call in the same response.',
|
|
129
|
+
'Never end your turn with a promise of future action — execute it now. Every',
|
|
130
|
+
'response should either contain tool calls that make progress, or deliver a final',
|
|
131
|
+
'result. Responses that only describe intentions without acting are not acceptable.',
|
|
132
|
+
].join('\n');
|
|
62
133
|
/**
|
|
63
134
|
* Llama-3.3-specific tool-call format guard. Adapter-side recovery picks
|
|
64
135
|
* up failures, but we'd rather avoid the 400 round-trip.
|
|
@@ -73,6 +144,16 @@ function shouldInjectLlama33ToolHint(modelId) {
|
|
|
73
144
|
return false;
|
|
74
145
|
return /llama-?3\.3/i.test(modelId);
|
|
75
146
|
}
|
|
147
|
+
/**
|
|
148
|
+
* Phase v4.1.2: predicate for the execution-discipline prose slot.
|
|
149
|
+
* Currently always-on — the "act, don't narrate" directive helps every
|
|
150
|
+
* tool-using model we route through. Narrow this if a specific model
|
|
151
|
+
* proves counter-productive; better to over-apply a useful prompt than
|
|
152
|
+
* guess incorrectly which models need it.
|
|
153
|
+
*/
|
|
154
|
+
function shouldInjectExecutionDiscipline(_modelId) {
|
|
155
|
+
return true;
|
|
156
|
+
}
|
|
76
157
|
// ── Internal helpers ──────────────────────────────────────────────────
|
|
77
158
|
function detectPlatform() {
|
|
78
159
|
const p = node_os_1.default.platform();
|
|
@@ -171,9 +252,16 @@ class PromptBuilder {
|
|
|
171
252
|
if (!opts.skipFilesystem) {
|
|
172
253
|
identity = await readNonEmpty(opts.paths.soulMd);
|
|
173
254
|
}
|
|
255
|
+
// Phase v4.1.2: track whether the identity came from a real
|
|
256
|
+
// user-authored SOUL.md so the embodiment directive only fires
|
|
257
|
+
// when there's a meaningful persona to embody.
|
|
258
|
+
const identityFromDisk = identity !== null;
|
|
174
259
|
if (!identity)
|
|
175
260
|
identity = defaultSoul_1.DEFAULT_SOUL_MD;
|
|
176
|
-
|
|
261
|
+
const identityContent = identityFromDisk
|
|
262
|
+
? `${EMBODIMENT_DIRECTIVE}\n\n${identity.trim()}`
|
|
263
|
+
: identity.trim();
|
|
264
|
+
slots.push({ name: 'identity', content: identityContent, optional: false });
|
|
177
265
|
// ── 2. Personality overlay ────────────────────────────────────────
|
|
178
266
|
const overlay = opts.personalityOverlay?.trim();
|
|
179
267
|
if (overlay) {
|
|
@@ -197,6 +285,51 @@ class PromptBuilder {
|
|
|
197
285
|
optional: true,
|
|
198
286
|
});
|
|
199
287
|
}
|
|
288
|
+
// ── 4.25. Runtime manifest (self-awareness) ───────────────────────
|
|
289
|
+
// High-signal facts about what Aiden actually has loaded right now:
|
|
290
|
+
// version, tool count, skill count, channel/surface list, current
|
|
291
|
+
// provider/model. Always present so "what version are you" /
|
|
292
|
+
// "what tools do you have" answers come from facts in context,
|
|
293
|
+
// not from whatever stale text used to live in SOUL.md.
|
|
294
|
+
const runtimeManifest = (0, capabilities_1.buildRuntimeManifest)({
|
|
295
|
+
toolCount: opts.toolCount ?? 0,
|
|
296
|
+
skillCount: opts.skillsList?.length ?? 0,
|
|
297
|
+
providerId: opts.providerId,
|
|
298
|
+
modelId: opts.modelId,
|
|
299
|
+
});
|
|
300
|
+
slots.push({
|
|
301
|
+
name: 'runtime',
|
|
302
|
+
content: (0, capabilities_1.renderRuntimeSlot)(runtimeManifest),
|
|
303
|
+
optional: false,
|
|
304
|
+
});
|
|
305
|
+
// ── 4.5. Tool-conditional guidance ────────────────────────────────
|
|
306
|
+
// Each block fires only when its corresponding toolset is loaded.
|
|
307
|
+
// Order is deterministic so the prefix cache stays stable across
|
|
308
|
+
// turns with the same toolset set.
|
|
309
|
+
const toolsets = opts.toolsetsLoaded;
|
|
310
|
+
if (toolsets && toolsets.size > 0) {
|
|
311
|
+
if (toolsets.has('memory')) {
|
|
312
|
+
slots.push({
|
|
313
|
+
name: 'guidance.memory',
|
|
314
|
+
content: MEMORY_GUIDANCE,
|
|
315
|
+
optional: true,
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
if (toolsets.has('session-search')) {
|
|
319
|
+
slots.push({
|
|
320
|
+
name: 'guidance.sessionSearch',
|
|
321
|
+
content: SESSION_SEARCH_GUIDANCE,
|
|
322
|
+
optional: true,
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
if (toolsets.has('skills')) {
|
|
326
|
+
slots.push({
|
|
327
|
+
name: 'guidance.skills',
|
|
328
|
+
content: SKILLS_GUIDANCE,
|
|
329
|
+
optional: true,
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
}
|
|
200
333
|
// ── 5. Skills ─────────────────────────────────────────────────────
|
|
201
334
|
if (opts.skillsList && opts.skillsList.length > 0) {
|
|
202
335
|
slots.push({
|
|
@@ -213,6 +346,17 @@ class PromptBuilder {
|
|
|
213
346
|
optional: true,
|
|
214
347
|
});
|
|
215
348
|
}
|
|
349
|
+
// ── 6.5. Execution discipline ─────────────────────────────────────
|
|
350
|
+
// Phase v4.1.2: closes the "promise without acting" failure mode.
|
|
351
|
+
// Model-conditional via shouldInjectExecutionDiscipline so we can
|
|
352
|
+
// narrow later if a specific model proves counter-productive.
|
|
353
|
+
if (shouldInjectExecutionDiscipline(opts.modelId)) {
|
|
354
|
+
slots.push({
|
|
355
|
+
name: 'executionDiscipline',
|
|
356
|
+
content: EXECUTION_DISCIPLINE_PROSE,
|
|
357
|
+
optional: true,
|
|
358
|
+
});
|
|
359
|
+
}
|
|
216
360
|
// ── 7. Iteration budget ───────────────────────────────────────────
|
|
217
361
|
if (opts.initialBudget) {
|
|
218
362
|
const { used, max } = opts.initialBudget;
|
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* core/v4/sessionDistiller.ts — Phase v4.1.2-memory-AB.
|
|
10
|
+
*
|
|
11
|
+
* Replaces the lossy 5-bullet auxiliary summary with a structured
|
|
12
|
+
* SessionDistillation:
|
|
13
|
+
*
|
|
14
|
+
* - bullets[] (5 bullets, back-compat with MEMORY.md `## Recent sessions`)
|
|
15
|
+
* - decisions[] (higher-fidelity than bullets)
|
|
16
|
+
* - open_items[] (unfinished work, useful for next session)
|
|
17
|
+
* - keywords[] (for future retrieval ranking — Phase C)
|
|
18
|
+
* - files_touched[] (DETERMINISTIC — derived from tool-call result payloads)
|
|
19
|
+
* - tools_used[] (DETERMINISTIC — counted from tool-call trace names)
|
|
20
|
+
* - schema_version (always 1; reserved for future migrations)
|
|
21
|
+
* - exit_path (which exit caused the distillation: quit/sigint/etc.)
|
|
22
|
+
* - partial (set true when LLM JSON parse falls back to bullets-only)
|
|
23
|
+
*
|
|
24
|
+
* Source-of-truth split:
|
|
25
|
+
* - Programmatic fields (files_touched, tools_used) → trace inspection.
|
|
26
|
+
* - Semantic fields (bullets, decisions, open_items, keywords) → single
|
|
27
|
+
* auxiliary-LLM call with strict-then-lenient JSON parsing.
|
|
28
|
+
*
|
|
29
|
+
* Phase A's CLI ChatSession owns the per-session HonestyTraceEntry[]
|
|
30
|
+
* accumulator and passes it here. The auxiliary call sees the full
|
|
31
|
+
* message history (not the trace — the trace is purely for programmatic
|
|
32
|
+
* field derivation).
|
|
33
|
+
*/
|
|
34
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
35
|
+
exports.TOOL_RESULT_TRUNCATION = exports.SESSION_DISTILLATION_SCHEMA_VERSION = void 0;
|
|
36
|
+
exports.deriveProgrammaticFields = deriveProgrammaticFields;
|
|
37
|
+
exports.parseLLMDistillation = parseLLMDistillation;
|
|
38
|
+
exports.filterMessagesForDistillation = filterMessagesForDistillation;
|
|
39
|
+
exports.distillSession = distillSession;
|
|
40
|
+
// ── Public surface ───────────────────────────────────────────────────────
|
|
41
|
+
exports.SESSION_DISTILLATION_SCHEMA_VERSION = 1;
|
|
42
|
+
// ── Programmatic field derivation ─────────────────────────────────────────
|
|
43
|
+
/**
|
|
44
|
+
* Tools whose result payload SHOULD contain a `path` field naming the
|
|
45
|
+
* file they touched. Used to populate `files_touched`.
|
|
46
|
+
*
|
|
47
|
+
* Curated rather than "any tool with a path in its result" because
|
|
48
|
+
* read-only tools (`file_read`, `file_list`) shouldn't count as
|
|
49
|
+
* "touched" — only mutating ops do.
|
|
50
|
+
*/
|
|
51
|
+
const FILE_TOUCH_TOOLS = new Set([
|
|
52
|
+
'file_write',
|
|
53
|
+
'file_patch',
|
|
54
|
+
'file_create',
|
|
55
|
+
'file_delete',
|
|
56
|
+
'memory_add', // writes MEMORY.md / USER.md
|
|
57
|
+
'memory_remove',
|
|
58
|
+
'memory_replace',
|
|
59
|
+
'session_summary', // writes MEMORY.md
|
|
60
|
+
]);
|
|
61
|
+
/**
|
|
62
|
+
* Extract programmatic fields from the accumulated tool trace. Pure
|
|
63
|
+
* function — no I/O.
|
|
64
|
+
*/
|
|
65
|
+
function deriveProgrammaticFields(trace) {
|
|
66
|
+
// tools_used: count by name, sorted by count desc, name asc.
|
|
67
|
+
const counts = new Map();
|
|
68
|
+
for (const e of trace) {
|
|
69
|
+
counts.set(e.name, (counts.get(e.name) ?? 0) + 1);
|
|
70
|
+
}
|
|
71
|
+
const tools_used = Array.from(counts.entries())
|
|
72
|
+
.map(([name, count]) => ({ name, count }))
|
|
73
|
+
.sort((a, b) => b.count - a.count || a.name.localeCompare(b.name));
|
|
74
|
+
// files_touched: unique paths from mutating tool results.
|
|
75
|
+
// Each entry's `result` may be { success, path, ... } or { path: ... }
|
|
76
|
+
// depending on the tool. We accept either shape.
|
|
77
|
+
const paths = new Set();
|
|
78
|
+
for (const e of trace) {
|
|
79
|
+
if (e.error)
|
|
80
|
+
continue; // failed tool — don't credit
|
|
81
|
+
if (!FILE_TOUCH_TOOLS.has(e.name))
|
|
82
|
+
continue;
|
|
83
|
+
const candidate = extractPath(e.result);
|
|
84
|
+
if (candidate)
|
|
85
|
+
paths.add(candidate);
|
|
86
|
+
}
|
|
87
|
+
const files_touched = Array.from(paths).sort();
|
|
88
|
+
return { files_touched, tools_used };
|
|
89
|
+
}
|
|
90
|
+
function extractPath(result) {
|
|
91
|
+
if (!result || typeof result !== 'object')
|
|
92
|
+
return null;
|
|
93
|
+
// Top-level path field — most write tools.
|
|
94
|
+
const top = result.path;
|
|
95
|
+
if (typeof top === 'string' && top.length > 0)
|
|
96
|
+
return top;
|
|
97
|
+
// Nested under .result (some adapters wrap output).
|
|
98
|
+
const inner = result.result;
|
|
99
|
+
if (inner && typeof inner === 'object') {
|
|
100
|
+
const innerPath = inner.path;
|
|
101
|
+
if (typeof innerPath === 'string' && innerPath.length > 0)
|
|
102
|
+
return innerPath;
|
|
103
|
+
}
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
// ── LLM extraction ────────────────────────────────────────────────────────
|
|
107
|
+
/**
|
|
108
|
+
* Strict-then-lenient parser for the auxiliary LLM's distillation JSON.
|
|
109
|
+
*
|
|
110
|
+
* Strict path: parse as JSON, validate shape, return all four semantic
|
|
111
|
+
* fields. Lenient path (only when strict fails): try to extract a
|
|
112
|
+
* bullets array from a malformed body (codepath shared with slice2's
|
|
113
|
+
* parseSessionBulletsResponse fallback), set the other three fields to
|
|
114
|
+
* empty arrays, and signal `partial: true` to the caller.
|
|
115
|
+
*
|
|
116
|
+
* Pure function — no I/O. Caller decides what to do with `partial`.
|
|
117
|
+
*/
|
|
118
|
+
function parseLLMDistillation(raw) {
|
|
119
|
+
const trimmed = raw.trim();
|
|
120
|
+
if (!trimmed) {
|
|
121
|
+
return { bullets: [], decisions: [], open_items: [], keywords: [], partial: true };
|
|
122
|
+
}
|
|
123
|
+
// Strict path.
|
|
124
|
+
const strict = tryStrictParse(trimmed);
|
|
125
|
+
if (strict)
|
|
126
|
+
return { ...strict, partial: false };
|
|
127
|
+
// Lenient: scan for a JSON object embedded in prose (some models
|
|
128
|
+
// prefix "Here is the JSON:\n{...}"). Trim to the first '{' through
|
|
129
|
+
// the last '}' and retry.
|
|
130
|
+
const first = trimmed.indexOf('{');
|
|
131
|
+
const last = trimmed.lastIndexOf('}');
|
|
132
|
+
if (first >= 0 && last > first) {
|
|
133
|
+
const inner = trimmed.slice(first, last + 1);
|
|
134
|
+
const second = tryStrictParse(inner);
|
|
135
|
+
if (second)
|
|
136
|
+
return { ...second, partial: false };
|
|
137
|
+
}
|
|
138
|
+
// Bullets-only fallback — recover what we can. Tries a bare bullet
|
|
139
|
+
// list ("- ...", "* ...", numbered lines) or a JSON-array fragment.
|
|
140
|
+
const fallbackBullets = recoverBullets(trimmed);
|
|
141
|
+
return {
|
|
142
|
+
bullets: fallbackBullets,
|
|
143
|
+
decisions: [],
|
|
144
|
+
open_items: [],
|
|
145
|
+
keywords: [],
|
|
146
|
+
partial: true,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
function tryStrictParse(s) {
|
|
150
|
+
try {
|
|
151
|
+
const obj = JSON.parse(s);
|
|
152
|
+
if (!obj || typeof obj !== 'object' || Array.isArray(obj))
|
|
153
|
+
return null;
|
|
154
|
+
const o = obj;
|
|
155
|
+
const bullets = toStringArray(o.bullets);
|
|
156
|
+
const decisions = toStringArray(o.decisions);
|
|
157
|
+
const open_items = toStringArray(o.open_items ?? o.openItems);
|
|
158
|
+
const keywords = toStringArray(o.keywords);
|
|
159
|
+
if (bullets.length === 0 && decisions.length === 0 && open_items.length === 0) {
|
|
160
|
+
return null; // nothing useful — let the lenient path try
|
|
161
|
+
}
|
|
162
|
+
return { bullets, decisions, open_items, keywords };
|
|
163
|
+
}
|
|
164
|
+
catch {
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
function toStringArray(v) {
|
|
169
|
+
if (!Array.isArray(v))
|
|
170
|
+
return [];
|
|
171
|
+
return v
|
|
172
|
+
.filter((x) => typeof x === 'string')
|
|
173
|
+
.map((x) => x.trim())
|
|
174
|
+
.filter((x) => x.length > 0);
|
|
175
|
+
}
|
|
176
|
+
function recoverBullets(raw) {
|
|
177
|
+
// Strategy 1: bullet-prefixed lines.
|
|
178
|
+
const lines = raw.split(/\r?\n/);
|
|
179
|
+
const bulleted = lines
|
|
180
|
+
.map((l) => l.replace(/^\s*(?:[-*•]|\d+\.)\s+/, '').trim())
|
|
181
|
+
.filter((l, i, arr) => l.length > 0 && /^\s*(?:[-*•]|\d+\.)\s+/.test(lines[i] ?? ''));
|
|
182
|
+
if (bulleted.length > 0)
|
|
183
|
+
return bulleted.slice(0, 5);
|
|
184
|
+
// Strategy 2: a JSON array of strings, with or without the object wrapper.
|
|
185
|
+
const arrMatch = raw.match(/\[\s*"[\s\S]*?"\s*\]/);
|
|
186
|
+
if (arrMatch) {
|
|
187
|
+
try {
|
|
188
|
+
const arr = JSON.parse(arrMatch[0]);
|
|
189
|
+
return toStringArray(arr).slice(0, 5);
|
|
190
|
+
}
|
|
191
|
+
catch { /* fall through */ }
|
|
192
|
+
}
|
|
193
|
+
return [];
|
|
194
|
+
}
|
|
195
|
+
const DEFAULT_TIMEOUT_MS = 4000;
|
|
196
|
+
/**
|
|
197
|
+
* Phase v4.1.2-bug-Y: max chars of tool-result content surfaced to the
|
|
198
|
+
* auxiliary LLM. Covers typical error messages + JSON-payload heads
|
|
199
|
+
* without bloating the prompt with full tool-output dumps. User and
|
|
200
|
+
* assistant TEXT are never truncated — user intent must survive in
|
|
201
|
+
* full. Widen this only after eval shows truncation eating signal.
|
|
202
|
+
*/
|
|
203
|
+
exports.TOOL_RESULT_TRUNCATION = 200;
|
|
204
|
+
/**
|
|
205
|
+
* Pure: filter + format the conversation history into the transcript
|
|
206
|
+
* the auxiliary LLM sees. Phase v4.1.2-bug-Y root-cause fix:
|
|
207
|
+
*
|
|
208
|
+
* The previous distiller dumped chatSession.history verbatim,
|
|
209
|
+
* including the giant `role: 'system'` block PromptBuilder
|
|
210
|
+
* constructs (SOUL.md identity, MEMORY.md, USER.md, Runtime slot,
|
|
211
|
+
* Capabilities boilerplate, tool-catalog descriptions, personality
|
|
212
|
+
* overlay, execution-discipline notes). Weak summarizer models
|
|
213
|
+
* latched onto this longest-coherent-block in context as the
|
|
214
|
+
* session topic, returning bullets like "I'm Aiden, a local-first
|
|
215
|
+
* AI agent built by Taracod" regardless of what the user and
|
|
216
|
+
* assistant actually discussed.
|
|
217
|
+
*
|
|
218
|
+
* This filter drops ALL `role: 'system'` messages and emits the
|
|
219
|
+
* remaining traffic as role-tagged lines:
|
|
220
|
+
*
|
|
221
|
+
* [USER] full user message verbatim
|
|
222
|
+
* [ASSISTANT] assistant text (if non-empty)
|
|
223
|
+
* [TOOL:name] {args}
|
|
224
|
+
* [TOOL:name] → result-payload, truncated to TOOL_RESULT_TRUNCATION
|
|
225
|
+
*
|
|
226
|
+
* Tool results carry their tool name (resolved via toolCallId →
|
|
227
|
+
* call-name map walked through preceding assistant turns) so the
|
|
228
|
+
* model can correlate tool intent with output. Empty messages are
|
|
229
|
+
* dropped entirely. Multi-line content within a message is
|
|
230
|
+
* preserved.
|
|
231
|
+
*/
|
|
232
|
+
function filterMessagesForDistillation(messages) {
|
|
233
|
+
/** Per-toolCallId → toolName, populated as we walk assistant turns. */
|
|
234
|
+
const callNames = new Map();
|
|
235
|
+
const lines = [];
|
|
236
|
+
for (const m of messages) {
|
|
237
|
+
if (m.role === 'system')
|
|
238
|
+
continue; // entire boilerplate source — dropped
|
|
239
|
+
if (m.role === 'user') {
|
|
240
|
+
const text = m.content.trim();
|
|
241
|
+
if (text.length === 0)
|
|
242
|
+
continue;
|
|
243
|
+
lines.push(`[USER] ${text}`);
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
if (m.role === 'assistant') {
|
|
247
|
+
// Emit assistant text only if non-empty — avoid empty `[ASSISTANT]`
|
|
248
|
+
// placeholder for tool-only turns.
|
|
249
|
+
const text = (m.content ?? '').trim();
|
|
250
|
+
if (text.length > 0)
|
|
251
|
+
lines.push(`[ASSISTANT] ${text}`);
|
|
252
|
+
// Tool calls: cache the id → name pair so the matching tool
|
|
253
|
+
// result downstream can render with its tool name. Emit the
|
|
254
|
+
// call line in original order.
|
|
255
|
+
if (m.toolCalls && m.toolCalls.length > 0) {
|
|
256
|
+
for (const tc of m.toolCalls) {
|
|
257
|
+
callNames.set(tc.id, tc.name);
|
|
258
|
+
const argsStr = compactArgs(tc.arguments);
|
|
259
|
+
lines.push(`[TOOL:${tc.name}] ${argsStr}`);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
264
|
+
if (m.role === 'tool') {
|
|
265
|
+
const name = callNames.get(m.toolCallId) ?? 'unknown';
|
|
266
|
+
const truncated = truncateForTranscript(m.content);
|
|
267
|
+
lines.push(`[TOOL:${name}] → ${truncated}`);
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return lines.join('\n');
|
|
272
|
+
}
|
|
273
|
+
/**
|
|
274
|
+
* Compact tool-call args into a one-line representation. JSON shape
|
|
275
|
+
* preserved; large strings get truncated alongside everything else
|
|
276
|
+
* to keep the transcript focused on intent, not full payloads.
|
|
277
|
+
*/
|
|
278
|
+
function compactArgs(args) {
|
|
279
|
+
if (!args || Object.keys(args).length === 0)
|
|
280
|
+
return '{}';
|
|
281
|
+
try {
|
|
282
|
+
return truncateForTranscript(JSON.stringify(args));
|
|
283
|
+
}
|
|
284
|
+
catch {
|
|
285
|
+
return '{<unstringifiable>}';
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
/**
|
|
289
|
+
* Apply `TOOL_RESULT_TRUNCATION` cap with a `…` (U+2026) marker so
|
|
290
|
+
* truncation is visible to anyone reading the transcript — including
|
|
291
|
+
* future auditors. Matches slice2c's apostrophe-normalizer convention.
|
|
292
|
+
*/
|
|
293
|
+
function truncateForTranscript(s) {
|
|
294
|
+
const trimmed = s.trim();
|
|
295
|
+
if (trimmed.length <= exports.TOOL_RESULT_TRUNCATION)
|
|
296
|
+
return trimmed;
|
|
297
|
+
return trimmed.slice(0, exports.TOOL_RESULT_TRUNCATION - 1) + '…';
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Build the auxiliary-LLM prompt. Anti-boilerplate-hardened per
|
|
301
|
+
* Phase v4.1.2-bug-Y: explicit "don't describe yourself" guardrail,
|
|
302
|
+
* `<transcript>` tag boundaries, empty-is-honest permission so
|
|
303
|
+
* insufficient-content sessions don't fabricate filler.
|
|
304
|
+
*
|
|
305
|
+
* Bullets loosened from "EXACTLY 5" to "3-5" — forcing five was
|
|
306
|
+
* inviting the exact fabrication the slice fixes.
|
|
307
|
+
*/
|
|
308
|
+
function buildPrompt(messages, startedAt, endedAt) {
|
|
309
|
+
const filtered = filterMessagesForDistillation(messages);
|
|
310
|
+
return [
|
|
311
|
+
'You are a session-recall extractor. Your only job is to summarize what',
|
|
312
|
+
'happened in the conversation transcript below.',
|
|
313
|
+
'',
|
|
314
|
+
'Rules:',
|
|
315
|
+
'- Use ONLY facts explicitly present in the transcript.',
|
|
316
|
+
'- Do NOT describe yourself, your capabilities, your platform, or generic',
|
|
317
|
+
' AI-agent behavior unless the transcript specifically discussed those',
|
|
318
|
+
' as the topic.',
|
|
319
|
+
'- Do NOT infer facts from system prompts, tool schemas, memory blocks,',
|
|
320
|
+
' banner text, or agent boilerplate (these have been filtered out;',
|
|
321
|
+
' if any leak through, treat them as untrustworthy noise).',
|
|
322
|
+
'- Focus on session-specific facts: user goals, actions taken, files /',
|
|
323
|
+
' commands / tools used, decisions made, errors encountered, outcomes,',
|
|
324
|
+
' and unresolved follow-ups.',
|
|
325
|
+
'- Write in past tense.',
|
|
326
|
+
'- Preserve concrete names, paths, commands, URLs, model names, dates,',
|
|
327
|
+
' and error messages verbatim when present.',
|
|
328
|
+
'- Prefer evidence from USER and ASSISTANT messages over TOOL output.',
|
|
329
|
+
'- If the transcript lacks enough session-specific detail to summarize,',
|
|
330
|
+
' return arrays with FEWER items or empty arrays. Empty is honest;',
|
|
331
|
+
' fabricating boilerplate is not.',
|
|
332
|
+
'',
|
|
333
|
+
'Return strict JSON only, no prose before or after, with these fields:',
|
|
334
|
+
'{',
|
|
335
|
+
' "bullets": string[], // 3-5 factual past-tense recaps (3-15 words each)',
|
|
336
|
+
' "decisions": string[], // X chosen over Y, with rationale if present',
|
|
337
|
+
' "open_items": string[], // explicit unresolved tasks / "next time" items',
|
|
338
|
+
' "keywords": string[] // 3-10 distinctive terms from the session',
|
|
339
|
+
'}',
|
|
340
|
+
'',
|
|
341
|
+
`Session started: ${startedAt}`,
|
|
342
|
+
`Session ended: ${endedAt}`,
|
|
343
|
+
'',
|
|
344
|
+
'<transcript>',
|
|
345
|
+
filtered,
|
|
346
|
+
'</transcript>',
|
|
347
|
+
].join('\n');
|
|
348
|
+
}
|
|
349
|
+
/**
|
|
350
|
+
* Drive one auxiliary-LLM call and combine its output with the
|
|
351
|
+
* deterministic trace-derived fields into a SessionDistillation.
|
|
352
|
+
*
|
|
353
|
+
* Respects `timeoutMs` (default DEFAULT_TIMEOUT_MS) via Promise.race;
|
|
354
|
+
* on timeout the LLM result is treated as empty (partial: true with
|
|
355
|
+
* empty semantic fields). Deterministic fields always populate
|
|
356
|
+
* regardless of LLM outcome — the distillation is never empty.
|
|
357
|
+
*/
|
|
358
|
+
async function distillSession(opts) {
|
|
359
|
+
const endedAt = opts.endedAt ?? new Date().toISOString();
|
|
360
|
+
const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
361
|
+
const programmatic = deriveProgrammaticFields(opts.toolTrace);
|
|
362
|
+
// Run the auxiliary call under a hard timeout. The race resolves
|
|
363
|
+
// with `{timedOut: true}` if the LLM doesn't return in time — we
|
|
364
|
+
// record that as a partial distillation.
|
|
365
|
+
const prompt = buildPrompt(opts.messages, opts.startedAt, endedAt);
|
|
366
|
+
const llmRaw = await Promise.race([
|
|
367
|
+
opts.auxiliaryClient
|
|
368
|
+
.call({ purpose: 'session_summary', prompt, maxTokens: 800 })
|
|
369
|
+
.then((r) => ({ ok: true, content: r.content ?? '' }))
|
|
370
|
+
.catch((e) => ({ ok: false, error: e })),
|
|
371
|
+
new Promise((resolve) => {
|
|
372
|
+
setTimeout(() => resolve({ ok: false, error: new Error(`auxiliary call timed out after ${timeoutMs}ms`), timedOut: true }), timeoutMs);
|
|
373
|
+
}),
|
|
374
|
+
]);
|
|
375
|
+
let semantic;
|
|
376
|
+
if (llmRaw.ok) {
|
|
377
|
+
semantic = parseLLMDistillation(llmRaw.content);
|
|
378
|
+
}
|
|
379
|
+
else {
|
|
380
|
+
semantic = {
|
|
381
|
+
bullets: [],
|
|
382
|
+
decisions: [],
|
|
383
|
+
open_items: [],
|
|
384
|
+
keywords: [],
|
|
385
|
+
partial: true,
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
const dist = {
|
|
389
|
+
schema_version: exports.SESSION_DISTILLATION_SCHEMA_VERSION,
|
|
390
|
+
session_id: opts.sessionId,
|
|
391
|
+
started_at: opts.startedAt,
|
|
392
|
+
ended_at: endedAt,
|
|
393
|
+
exit_path: opts.exitPath,
|
|
394
|
+
user_turns: opts.userTurns,
|
|
395
|
+
bullets: semantic.bullets,
|
|
396
|
+
decisions: semantic.decisions,
|
|
397
|
+
open_items: semantic.open_items,
|
|
398
|
+
keywords: semantic.keywords,
|
|
399
|
+
files_touched: programmatic.files_touched,
|
|
400
|
+
tools_used: programmatic.tools_used,
|
|
401
|
+
};
|
|
402
|
+
if (semantic.partial)
|
|
403
|
+
dist.partial = true;
|
|
404
|
+
return dist;
|
|
405
|
+
}
|
|
@@ -16,10 +16,10 @@
|
|
|
16
16
|
* metadata.aiden.* must round-trip unchanged. We re-parse the
|
|
17
17
|
* refined output and discard it if any required field drifts.
|
|
18
18
|
*
|
|
19
|
-
* 2. Never write attribution tokens
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
19
|
+
* 2. Never write attribution tokens or "portions adapted from..." /
|
|
20
|
+
* "original copyright" strings. The permanent attribution
|
|
21
|
+
* sweep validates this; if a refined output contains any
|
|
22
|
+
* forbidden token, we fall back to the skeleton.
|
|
23
23
|
*
|
|
24
24
|
* If the auxiliary client is unavailable, the call times out, or
|
|
25
25
|
* the refined output fails validation, the function returns the
|
|
@@ -29,23 +29,30 @@
|
|
|
29
29
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
30
30
|
exports.refine = refine;
|
|
31
31
|
const skillSpec_1 = require("../skillSpec");
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
32
|
+
const BANNED_TOKENS = [
|
|
33
|
+
'portions adapted from',
|
|
34
|
+
'original copyright',
|
|
35
|
+
'derived from',
|
|
36
|
+
'based on the',
|
|
37
|
+
'adapted from',
|
|
38
|
+
];
|
|
39
|
+
const FORBIDDEN_TOKENS_RE = new RegExp(`\\b(${BANNED_TOKENS.join('|')})\\b`, 'i');
|
|
40
|
+
const REFINER_SYSTEM_PROMPT = `
|
|
41
|
+
You polish auto-generated skill markdown for a local-first AI agent.
|
|
42
|
+
|
|
43
|
+
Your job is to improve the WORDING ONLY of an already-valid SKILL.md
|
|
44
|
+
file. The frontmatter (everything between the leading "---" markers)
|
|
45
|
+
must round-trip BYTE-FOR-BYTE unchanged. The "# <name>" heading must
|
|
46
|
+
stay first in the body. The numbered "## Steps" list must remain
|
|
47
|
+
numbered and in the same order; you may rephrase step descriptions
|
|
48
|
+
but must NOT add or remove steps.
|
|
49
|
+
|
|
50
|
+
Hard rules:
|
|
51
|
+
- Output the COMPLETE SKILL.md, not a diff.
|
|
52
|
+
- Do not add boilerplate, citations, or attribution to any other
|
|
53
|
+
agent or codebase. The skill is 100% the user's own.
|
|
54
|
+
- Do not introduce mock/fake values into commands.
|
|
55
|
+
- Keep total length under 6000 characters.
|
|
49
56
|
`.trim();
|
|
50
57
|
/**
|
|
51
58
|
* Refine `skeleton` via the auxiliary client. Always returns a
|
|
@@ -18,8 +18,9 @@
|
|
|
18
18
|
* - emits required fields `name`, `description`, `version`
|
|
19
19
|
* - emits `metadata.aiden` with the mining provenance fields
|
|
20
20
|
* - body is numbered tool-call steps in markdown
|
|
21
|
-
* - never writes
|
|
22
|
-
*
|
|
21
|
+
* - never writes attribution strings — the banned-token regex
|
|
22
|
+
* strips them at extraction time and the permanent attribution
|
|
23
|
+
* sweep validates the result
|
|
23
24
|
*/
|
|
24
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
25
26
|
exports.deriveName = deriveName;
|