selftune 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +156 -0
- package/.claude/agents/evolution-reviewer.md +180 -0
- package/.claude/agents/integration-guide.md +212 -0
- package/.claude/agents/pattern-analyst.md +160 -0
- package/CHANGELOG.md +46 -1
- package/README.md +105 -257
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +99 -0
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +103 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +1049 -0
- package/cli/selftune/dashboard.ts +43 -156
- package/cli/selftune/eval/baseline.ts +248 -0
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +101 -16
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +586 -0
- package/cli/selftune/evolution/evolve.ts +825 -116
- package/cli/selftune/evolution/extract-patterns.ts +105 -16
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +21 -4
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +513 -42
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +415 -48
- package/cli/selftune/ingestors/claude-replay.ts +377 -0
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +376 -16
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +90 -16
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +59 -33
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +525 -5
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +103 -19
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +307 -26
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +28 -11
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +180 -33
- package/skill/Workflows/AutoActivation.md +145 -0
- package/skill/Workflows/Badge.md +124 -0
- package/skill/Workflows/Baseline.md +144 -0
- package/skill/Workflows/Composability.md +107 -0
- package/skill/Workflows/Contribute.md +94 -0
- package/skill/Workflows/Cron.md +132 -0
- package/skill/Workflows/Dashboard.md +214 -0
- package/skill/Workflows/Doctor.md +63 -14
- package/skill/Workflows/Evals.md +110 -18
- package/skill/Workflows/EvolutionMemory.md +154 -0
- package/skill/Workflows/Evolve.md +181 -21
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +117 -0
- package/skill/Workflows/Ingest.md +142 -21
- package/skill/Workflows/Initialize.md +91 -23
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +91 -0
- package/skill/Workflows/Rollback.md +23 -4
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +150 -0
- package/skill/Workflows/Watch.md +33 -1
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
- package/dashboard/index.html +0 -1119
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* frontmatter.ts
|
|
3
|
+
*
|
|
4
|
+
* Line-based YAML frontmatter parser for SKILL.md files.
|
|
5
|
+
* Extracts name, description, and version without a YAML library.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Types
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
export interface SkillFrontmatter {
|
|
13
|
+
name: string;
|
|
14
|
+
description: string;
|
|
15
|
+
version: string;
|
|
16
|
+
body: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Parser
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Parse YAML frontmatter from a SKILL.md file.
|
|
25
|
+
*
|
|
26
|
+
* Handles two description formats:
|
|
27
|
+
* - Single-line: `description: When the user wants to...`
|
|
28
|
+
* - Folded scalar: `description: >\n Multi-line text...`
|
|
29
|
+
*
|
|
30
|
+
* Handles two version locations:
|
|
31
|
+
* - Top-level: `version: 1.0.0`
|
|
32
|
+
* - Nested: `metadata:\n version: 1.0.0`
|
|
33
|
+
*
|
|
34
|
+
* Returns the full content as description if no frontmatter is found.
|
|
35
|
+
*/
|
|
36
|
+
export function parseFrontmatter(content: string): SkillFrontmatter {
|
|
37
|
+
const lines = content.split("\n");
|
|
38
|
+
|
|
39
|
+
// Check for opening delimiter
|
|
40
|
+
if (lines[0]?.trim() !== "---") {
|
|
41
|
+
return { name: "", description: content, version: "", body: content };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Find closing delimiter
|
|
45
|
+
let endIdx = -1;
|
|
46
|
+
for (let i = 1; i < lines.length; i++) {
|
|
47
|
+
if (lines[i].trim() === "---") {
|
|
48
|
+
endIdx = i;
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (endIdx < 0) {
|
|
54
|
+
return { name: "", description: content, version: "", body: content };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const yamlLines = lines.slice(1, endIdx);
|
|
58
|
+
const body = lines
|
|
59
|
+
.slice(endIdx + 1)
|
|
60
|
+
.join("\n")
|
|
61
|
+
.replace(/^\n+/, "");
|
|
62
|
+
|
|
63
|
+
let name = "";
|
|
64
|
+
let description = "";
|
|
65
|
+
let version = "";
|
|
66
|
+
let inMetadata = false;
|
|
67
|
+
|
|
68
|
+
for (let i = 0; i < yamlLines.length; i++) {
|
|
69
|
+
const line = yamlLines[i];
|
|
70
|
+
const trimmed = line.trimEnd();
|
|
71
|
+
|
|
72
|
+
// Top-level `name:`
|
|
73
|
+
if (trimmed.startsWith("name:")) {
|
|
74
|
+
name = trimmed.slice("name:".length).trim();
|
|
75
|
+
inMetadata = false;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Top-level `version:`
|
|
80
|
+
if (trimmed.startsWith("version:") && !trimmed.startsWith(" ")) {
|
|
81
|
+
version = trimmed.slice("version:".length).trim();
|
|
82
|
+
inMetadata = false;
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// `metadata:` block start
|
|
87
|
+
if (trimmed === "metadata:" || trimmed.startsWith("metadata:")) {
|
|
88
|
+
inMetadata = true;
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Nested `version:` inside metadata
|
|
93
|
+
if (inMetadata && /^\s+version:/.test(trimmed)) {
|
|
94
|
+
version = trimmed.replace(/^\s+version:\s*/, "");
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Top-level `description:` — single-line or folded scalar
|
|
99
|
+
if (trimmed.startsWith("description:")) {
|
|
100
|
+
inMetadata = false;
|
|
101
|
+
const afterKey = trimmed.slice("description:".length).trim();
|
|
102
|
+
|
|
103
|
+
if (afterKey === ">" || afterKey === "|") {
|
|
104
|
+
// Folded/literal scalar: collect indented continuation lines
|
|
105
|
+
const descParts: string[] = [];
|
|
106
|
+
let j = i + 1;
|
|
107
|
+
while (j < yamlLines.length) {
|
|
108
|
+
const next = yamlLines[j];
|
|
109
|
+
// Continuation line must be indented (starts with whitespace)
|
|
110
|
+
if (next.length > 0 && /^\s/.test(next)) {
|
|
111
|
+
descParts.push(next.replace(/^\s+/, ""));
|
|
112
|
+
} else {
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
j++;
|
|
116
|
+
}
|
|
117
|
+
description = descParts.join(" ").trim();
|
|
118
|
+
i = j - 1; // advance past consumed lines
|
|
119
|
+
} else {
|
|
120
|
+
// Single-line value
|
|
121
|
+
description = afterKey;
|
|
122
|
+
}
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Any other top-level key resets inMetadata
|
|
127
|
+
if (/^\S/.test(trimmed) && trimmed.includes(":")) {
|
|
128
|
+
inMetadata = false;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return { name, description, version, body };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ---------------------------------------------------------------------------
|
|
136
|
+
// Frontmatter description replacement
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Replace the `description:` field in YAML frontmatter, preserving all other
|
|
141
|
+
* content. If the new description contains special YAML characters, it is
|
|
142
|
+
* written as a folded scalar (`description: >`).
|
|
143
|
+
*
|
|
144
|
+
* Returns the original content unchanged if no frontmatter is found.
|
|
145
|
+
*/
|
|
146
|
+
export function replaceFrontmatterDescription(content: string, newDescription: string): string {
|
|
147
|
+
const lines = content.split("\n");
|
|
148
|
+
|
|
149
|
+
if (lines[0]?.trim() !== "---") return content;
|
|
150
|
+
|
|
151
|
+
let endIdx = -1;
|
|
152
|
+
for (let i = 1; i < lines.length; i++) {
|
|
153
|
+
if (lines[i].trim() === "---") {
|
|
154
|
+
endIdx = i;
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
if (endIdx < 0) return content;
|
|
159
|
+
|
|
160
|
+
// Find and replace the description within frontmatter lines
|
|
161
|
+
const yamlLines = lines.slice(1, endIdx);
|
|
162
|
+
const newYamlLines: string[] = [];
|
|
163
|
+
let i = 0;
|
|
164
|
+
let replaced = false;
|
|
165
|
+
|
|
166
|
+
while (i < yamlLines.length) {
|
|
167
|
+
const trimmed = yamlLines[i].trimEnd();
|
|
168
|
+
|
|
169
|
+
if (trimmed.startsWith("description:")) {
|
|
170
|
+
replaced = true;
|
|
171
|
+
const afterKey = trimmed.slice("description:".length).trim();
|
|
172
|
+
|
|
173
|
+
// Skip continuation lines of folded/literal scalars
|
|
174
|
+
if (afterKey === ">" || afterKey === "|") {
|
|
175
|
+
i++;
|
|
176
|
+
while (i < yamlLines.length && yamlLines[i].length > 0 && /^\s/.test(yamlLines[i])) {
|
|
177
|
+
i++;
|
|
178
|
+
}
|
|
179
|
+
} else {
|
|
180
|
+
i++;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Write new description — use folded scalar if it's long or has special chars
|
|
184
|
+
const needsFolded = newDescription.length > 120 || /[:#"'[\]{}|>]/.test(newDescription);
|
|
185
|
+
if (needsFolded) {
|
|
186
|
+
newYamlLines.push("description: >");
|
|
187
|
+
// Wrap at ~78 chars with 2-space indent
|
|
188
|
+
const words = newDescription.split(/\s+/);
|
|
189
|
+
let line = " ";
|
|
190
|
+
for (const word of words) {
|
|
191
|
+
if (line.length + word.length + 1 > 80 && line.trim().length > 0) {
|
|
192
|
+
newYamlLines.push(line);
|
|
193
|
+
line = ` ${word}`;
|
|
194
|
+
} else {
|
|
195
|
+
line = line.trim().length === 0 ? ` ${word}` : `${line} ${word}`;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
if (line.trim().length > 0) newYamlLines.push(line);
|
|
199
|
+
} else {
|
|
200
|
+
newYamlLines.push(`description: ${newDescription}`);
|
|
201
|
+
}
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
newYamlLines.push(yamlLines[i]);
|
|
206
|
+
i++;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// If description wasn't found in frontmatter, add it
|
|
210
|
+
if (!replaced) {
|
|
211
|
+
newYamlLines.push(`description: ${newDescription}`);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const before = lines[0]; // "---"
|
|
215
|
+
const after = lines.slice(endIdx); // "---" + body
|
|
216
|
+
return [before, ...newYamlLines, ...after].join("\n");
|
|
217
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { CLAUDE_CODE_HOOK_KEYS } from "../constants.js";
|
|
2
|
+
|
|
3
|
+
export interface ClaudeCodeHookCommand {
|
|
4
|
+
command?: string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export interface ClaudeCodeHookEntry {
|
|
8
|
+
command?: string;
|
|
9
|
+
hooks?: ClaudeCodeHookCommand[];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function isHookEntry(value: unknown): value is ClaudeCodeHookEntry {
|
|
13
|
+
return typeof value === "object" && value !== null;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function entryReferencesSelftune(entry: ClaudeCodeHookEntry): boolean {
|
|
17
|
+
if (typeof entry.command === "string" && entry.command.includes("selftune")) {
|
|
18
|
+
return true;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (Array.isArray(entry.hooks)) {
|
|
22
|
+
return entry.hooks.some(
|
|
23
|
+
(hook) => typeof hook.command === "string" && hook.command.includes("selftune"),
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function hookKeyHasSelftuneEntry(hooks: Record<string, unknown>, key: string): boolean {
|
|
31
|
+
const entries = hooks[key];
|
|
32
|
+
if (!Array.isArray(entries) || entries.length === 0) {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return entries.some((entry) => isHookEntry(entry) && entryReferencesSelftune(entry));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function missingClaudeCodeHookKeys(hooks: Record<string, unknown>): string[] {
|
|
40
|
+
return CLAUDE_CODE_HOOK_KEYS.filter((key) => !hookKeyHasSelftuneEntry(hooks, key));
|
|
41
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escape JSON for safe embedding inside an HTML <script> tag.
|
|
3
|
+
*
|
|
4
|
+
* Escaping only </script> is not sufficient because arbitrary log payloads can
|
|
5
|
+
* contain HTML-like substrings that confuse the parser in raw-text script
|
|
6
|
+
* blocks. We escape all `<`, `>`, and `&` characters, plus the Unicode line
|
|
7
|
+
* separators that can break JavaScript parsing in some engines.
|
|
8
|
+
*/
|
|
9
|
+
export function escapeJsonForHtmlScript(data: unknown): string {
|
|
10
|
+
const json = JSON.stringify(data);
|
|
11
|
+
return (json ?? "null").replace(/[<>&\u2028\u2029]/g, (char) => {
|
|
12
|
+
switch (char) {
|
|
13
|
+
case "<":
|
|
14
|
+
return "\\u003c";
|
|
15
|
+
case ">":
|
|
16
|
+
return "\\u003e";
|
|
17
|
+
case "&":
|
|
18
|
+
return "\\u0026";
|
|
19
|
+
case "\u2028":
|
|
20
|
+
return "\\u2028";
|
|
21
|
+
case "\u2029":
|
|
22
|
+
return "\\u2029";
|
|
23
|
+
default:
|
|
24
|
+
return char;
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
}
|
|
@@ -11,6 +11,27 @@ import { tmpdir } from "node:os";
|
|
|
11
11
|
import { join } from "node:path";
|
|
12
12
|
|
|
13
13
|
import { AGENT_CANDIDATES } from "../constants.js";
|
|
14
|
+
import { createLogger } from "./logging.js";
|
|
15
|
+
|
|
16
|
+
const logger = createLogger("llm-call");
|
|
17
|
+
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Model alias resolution
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* The claude CLI --model flag only accepts "sonnet" and "opus" as aliases.
|
|
24
|
+
* "haiku" is NOT a valid --model alias (only valid in --agents subagent config).
|
|
25
|
+
* Map short names to full model IDs so callers can use friendly names.
|
|
26
|
+
*/
|
|
27
|
+
const CLAUDE_MODEL_ALIASES: Record<string, string> = {
|
|
28
|
+
haiku: "claude-haiku-4-5-20251001",
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
/** Resolve a model alias to its full ID for the claude CLI --model flag. */
|
|
32
|
+
function resolveModelFlag(flag: string): string {
|
|
33
|
+
return CLAUDE_MODEL_ALIASES[flag] ?? flag;
|
|
34
|
+
}
|
|
14
35
|
|
|
15
36
|
// ---------------------------------------------------------------------------
|
|
16
37
|
// Agent detection
|
|
@@ -68,6 +89,35 @@ export function stripMarkdownFences(raw: string): string {
|
|
|
68
89
|
return text;
|
|
69
90
|
}
|
|
70
91
|
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
93
|
+
// Retry configuration
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
const DEFAULT_MAX_RETRIES = 2;
|
|
97
|
+
const DEFAULT_INITIAL_BACKOFF_MS = 2_000;
|
|
98
|
+
|
|
99
|
+
/** Options to control retry behavior. All fields optional with sensible defaults. */
|
|
100
|
+
export interface RetryOptions {
|
|
101
|
+
/** Maximum number of retries (default: 2). Set to 0 to disable retries. */
|
|
102
|
+
maxRetries?: number;
|
|
103
|
+
/** Initial backoff in ms before first retry (default: 2000). Doubles each retry. */
|
|
104
|
+
initialBackoffMs?: number;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/** Returns true for errors that are transient and worth retrying. */
|
|
108
|
+
function isTransientError(err: unknown): boolean {
|
|
109
|
+
if (!(err instanceof Error)) return false;
|
|
110
|
+
const msg = err.message;
|
|
111
|
+
// Transient: non-zero exit codes from agent subprocess (crash, OOM, timeout kill)
|
|
112
|
+
if (/exited with code/i.test(msg)) return true;
|
|
113
|
+
return false;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** Sleep for the given number of milliseconds. */
|
|
117
|
+
function sleep(ms: number): Promise<void> {
|
|
118
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
119
|
+
}
|
|
120
|
+
|
|
71
121
|
// ---------------------------------------------------------------------------
|
|
72
122
|
// Call LLM via agent subprocess
|
|
73
123
|
// ---------------------------------------------------------------------------
|
|
@@ -77,6 +127,8 @@ export async function callViaAgent(
|
|
|
77
127
|
systemPrompt: string,
|
|
78
128
|
userPrompt: string,
|
|
79
129
|
agent: string,
|
|
130
|
+
modelFlag?: string,
|
|
131
|
+
retryOpts?: RetryOptions,
|
|
80
132
|
): Promise<string> {
|
|
81
133
|
// Write prompt to temp file to avoid shell quoting issues
|
|
82
134
|
const promptFile = join(tmpdir(), `selftune-llm-${Date.now()}.txt`);
|
|
@@ -88,6 +140,10 @@ export async function callViaAgent(
|
|
|
88
140
|
|
|
89
141
|
if (agent === "claude") {
|
|
90
142
|
cmd = ["claude", "-p", promptContent];
|
|
143
|
+
if (modelFlag) {
|
|
144
|
+
const resolved = resolveModelFlag(modelFlag);
|
|
145
|
+
cmd.push("--model", resolved);
|
|
146
|
+
}
|
|
91
147
|
} else if (agent === "codex") {
|
|
92
148
|
cmd = ["codex", "exec", "--skip-git-repo-check", promptContent];
|
|
93
149
|
} else if (agent === "opencode") {
|
|
@@ -96,26 +152,53 @@ export async function callViaAgent(
|
|
|
96
152
|
throw new Error(`Unknown agent: ${agent}`);
|
|
97
153
|
}
|
|
98
154
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
155
|
+
// Retry loop with exponential backoff for transient failures
|
|
156
|
+
const maxRetries = retryOpts?.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
157
|
+
const initialBackoffMs = retryOpts?.initialBackoffMs ?? DEFAULT_INITIAL_BACKOFF_MS;
|
|
158
|
+
let lastError: Error | undefined;
|
|
159
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
160
|
+
if (attempt > 0) {
|
|
161
|
+
const backoffMs = initialBackoffMs * 2 ** (attempt - 1);
|
|
162
|
+
logger.warn(
|
|
163
|
+
`Retry ${attempt}/${maxRetries} for agent '${agent}' after ${backoffMs}ms backoff`,
|
|
164
|
+
);
|
|
165
|
+
await sleep(backoffMs);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
const proc = Bun.spawn(cmd, {
|
|
170
|
+
stdout: "pipe",
|
|
171
|
+
stderr: "pipe",
|
|
172
|
+
env: { ...process.env, CLAUDECODE: "" },
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// Longer timeout for heavier models (sonnet/opus take longer than haiku)
|
|
176
|
+
const isLightModel = modelFlag === "haiku" || modelFlag?.includes("haiku");
|
|
177
|
+
const timeoutMs = isLightModel ? 120_000 : 300_000;
|
|
178
|
+
const timeout = setTimeout(() => proc.kill(), timeoutMs);
|
|
179
|
+
const exitCode = await proc.exited;
|
|
180
|
+
clearTimeout(timeout);
|
|
181
|
+
|
|
182
|
+
if (exitCode !== 0) {
|
|
183
|
+
const stderr = await new Response(proc.stderr).text();
|
|
184
|
+
throw new Error(
|
|
185
|
+
`Agent '${agent}' exited with code ${exitCode}.\nstderr: ${stderr.slice(0, 500)}`,
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const raw = await new Response(proc.stdout).text();
|
|
190
|
+
return raw;
|
|
191
|
+
} catch (err) {
|
|
192
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
193
|
+
if (!isTransientError(lastError) || attempt === maxRetries) {
|
|
194
|
+
throw lastError;
|
|
195
|
+
}
|
|
196
|
+
logger.warn(`Transient failure on attempt ${attempt + 1}: ${lastError.message}`);
|
|
197
|
+
}
|
|
115
198
|
}
|
|
116
199
|
|
|
117
|
-
|
|
118
|
-
|
|
200
|
+
// Unreachable, but satisfies TypeScript
|
|
201
|
+
throw lastError ?? new Error("callViaAgent: unexpected retry loop exit");
|
|
119
202
|
} finally {
|
|
120
203
|
try {
|
|
121
204
|
const { unlinkSync } = await import("node:fs");
|
|
@@ -135,9 +218,10 @@ export async function callLlm(
|
|
|
135
218
|
systemPrompt: string,
|
|
136
219
|
userPrompt: string,
|
|
137
220
|
agent: string,
|
|
221
|
+
modelFlag?: string,
|
|
138
222
|
): Promise<string> {
|
|
139
223
|
if (!agent) {
|
|
140
224
|
throw new Error("Agent must be specified for callLlm");
|
|
141
225
|
}
|
|
142
|
-
return callViaAgent(systemPrompt, userPrompt, agent);
|
|
226
|
+
return callViaAgent(systemPrompt, userPrompt, agent, modelFlag);
|
|
143
227
|
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { SKIP_PREFIXES } from "../constants.js";
|
|
2
|
+
import type { QueryLogRecord, SkillUsageRecord } from "../types.js";
|
|
3
|
+
|
|
4
|
+
const NON_USER_QUERY_PREFIXES = [
|
|
5
|
+
"<system_instruction>",
|
|
6
|
+
"<system-instruction>",
|
|
7
|
+
"<system-reminder>",
|
|
8
|
+
"<available-deferred-tools>",
|
|
9
|
+
"<fast_mode_info>",
|
|
10
|
+
"<local-command-caveat>",
|
|
11
|
+
"<local-command-stdout>",
|
|
12
|
+
"<local-command-stderr>",
|
|
13
|
+
"<command-name>",
|
|
14
|
+
"<task-notification>",
|
|
15
|
+
"<teammate-message",
|
|
16
|
+
"[Request interrupted by user for tool use]",
|
|
17
|
+
"[Request interrupted by user]",
|
|
18
|
+
"Base directory for this skill:",
|
|
19
|
+
"This session is being continued from a previous conversation that ran out of context.",
|
|
20
|
+
"USER'S CURRENT MESSAGE (summarize THIS):",
|
|
21
|
+
"CONTEXT:",
|
|
22
|
+
"Completing task",
|
|
23
|
+
"Tool loaded.",
|
|
24
|
+
"Continue from where you left off.",
|
|
25
|
+
"You are an evaluation assistant.",
|
|
26
|
+
"You are a skill description optimizer for an AI agent routing system.",
|
|
27
|
+
"The following skills are available",
|
|
28
|
+
] as const;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Regex patterns for wrapper/hook pipeline artifacts that are never real user prompts.
|
|
32
|
+
* These fire after prefix checks and cover structured hook callback lines.
|
|
33
|
+
*/
|
|
34
|
+
const NON_USER_QUERY_PATTERNS = [
|
|
35
|
+
// Hook callback output lines (e.g. "SessionStart:startup hook success: ...")
|
|
36
|
+
// "Stop" excluded from general alternation — too common as English word.
|
|
37
|
+
/^(SessionStart|UserPromptSubmit|PreToolUse|PostToolUse):/,
|
|
38
|
+
// Stop hook callbacks follow a structured shape: "Stop:" + lowercase/callback text
|
|
39
|
+
/^Stop:(session |cleanup |hook |Callback )/,
|
|
40
|
+
// Injected git context blocks
|
|
41
|
+
/^gitStatus:\s/,
|
|
42
|
+
] as const;
|
|
43
|
+
|
|
44
|
+
const LEADING_WRAPPED_QUERY_TAGS = [
|
|
45
|
+
"system_instruction",
|
|
46
|
+
"system-instruction",
|
|
47
|
+
"system-reminder",
|
|
48
|
+
"available-deferred-tools",
|
|
49
|
+
"fast_mode_info",
|
|
50
|
+
"task-notification",
|
|
51
|
+
"teammate-message",
|
|
52
|
+
"local-command-caveat",
|
|
53
|
+
"local-command-stdout",
|
|
54
|
+
"local-command-stderr",
|
|
55
|
+
"command-name",
|
|
56
|
+
] as const;
|
|
57
|
+
|
|
58
|
+
function stripLeadingWrappedQueryText(query: string): string {
|
|
59
|
+
let current = query.trim();
|
|
60
|
+
|
|
61
|
+
for (;;) {
|
|
62
|
+
let changed = false;
|
|
63
|
+
|
|
64
|
+
for (const tag of LEADING_WRAPPED_QUERY_TAGS) {
|
|
65
|
+
const pattern = new RegExp(`^<${tag}\\b[^>]*>[\\s\\S]*?<\\/${tag}>\\s*`, "i");
|
|
66
|
+
const next = current.replace(pattern, "").trim();
|
|
67
|
+
if (next !== current) {
|
|
68
|
+
current = next;
|
|
69
|
+
changed = true;
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (!changed) return current;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function extractActionableQueryText(query: string): string | null {
|
|
79
|
+
if (typeof query !== "string") return null;
|
|
80
|
+
|
|
81
|
+
const trimmed = query.trim();
|
|
82
|
+
if (!trimmed || trimmed === "-" || trimmed === "(query not found)") return null;
|
|
83
|
+
|
|
84
|
+
const candidate = stripLeadingWrappedQueryText(trimmed) || trimmed;
|
|
85
|
+
if (!candidate || candidate === "-" || candidate === "(query not found)") return null;
|
|
86
|
+
|
|
87
|
+
const isBlocked =
|
|
88
|
+
SKIP_PREFIXES.some((prefix) => candidate.startsWith(prefix)) ||
|
|
89
|
+
NON_USER_QUERY_PREFIXES.some((prefix) => candidate.startsWith(prefix)) ||
|
|
90
|
+
NON_USER_QUERY_PATTERNS.some((pattern) => pattern.test(candidate));
|
|
91
|
+
|
|
92
|
+
return isBlocked ? null : candidate;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export function isActionableQueryText(query: string): boolean {
|
|
96
|
+
return extractActionableQueryText(query) !== null;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export function filterActionableQueryRecords(queryRecords: QueryLogRecord[]): QueryLogRecord[] {
|
|
100
|
+
const actionable: QueryLogRecord[] = [];
|
|
101
|
+
|
|
102
|
+
for (const record of queryRecords) {
|
|
103
|
+
if (record == null) continue;
|
|
104
|
+
const normalizedQuery = extractActionableQueryText((record as QueryLogRecord).query);
|
|
105
|
+
if (!normalizedQuery) continue;
|
|
106
|
+
actionable.push(
|
|
107
|
+
normalizedQuery === record.query ? record : { ...record, query: normalizedQuery },
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return actionable;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export function isActionableSkillUsageRecord(record: SkillUsageRecord | null | undefined): boolean {
|
|
115
|
+
if (record == null) return false;
|
|
116
|
+
if (typeof record.skill_name !== "string" || !record.skill_name.trim()) return false;
|
|
117
|
+
if (typeof record.query !== "string") return false;
|
|
118
|
+
|
|
119
|
+
const query = record.query.trim();
|
|
120
|
+
if (!query || query === "(query not found)") return false;
|
|
121
|
+
|
|
122
|
+
return extractActionableQueryText(query) !== null;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export function filterActionableSkillUsageRecords(
|
|
126
|
+
skillRecords: SkillUsageRecord[],
|
|
127
|
+
): SkillUsageRecord[] {
|
|
128
|
+
const actionable: SkillUsageRecord[] = [];
|
|
129
|
+
|
|
130
|
+
for (const record of skillRecords) {
|
|
131
|
+
const normalizedQuery = extractActionableQueryText(record?.query);
|
|
132
|
+
if (!normalizedQuery) continue;
|
|
133
|
+
actionable.push(
|
|
134
|
+
normalizedQuery === record.query ? record : { ...record, query: normalizedQuery },
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return actionable;
|
|
139
|
+
}
|