selftune 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +146 -0
- package/.claude/agents/evolution-reviewer.md +167 -0
- package/.claude/agents/integration-guide.md +200 -0
- package/.claude/agents/pattern-analyst.md +147 -0
- package/CHANGELOG.md +37 -0
- package/README.md +96 -256
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +103 -0
- package/cli/selftune/constants.ts +75 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-server.ts +582 -0
- package/cli/selftune/dashboard.ts +25 -3
- package/cli/selftune/eval/baseline.ts +247 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +68 -2
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evolve-body.ts +492 -0
- package/cli/selftune/evolution/evolve.ts +466 -103
- package/cli/selftune/evolution/extract-patterns.ts +32 -1
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +19 -2
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/grade-session.ts +138 -18
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/index.ts +88 -0
- package/cli/selftune/ingestors/claude-replay.ts +351 -0
- package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
- package/cli/selftune/init.ts +150 -3
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +25 -2
- package/cli/selftune/status.ts +17 -13
- package/cli/selftune/types.ts +377 -5
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/llm-call.ts +29 -3
- package/cli/selftune/utils/transcript.ts +35 -0
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/dashboard/index.html +569 -8
- package/package.json +8 -4
- package/skill/SKILL.md +124 -8
- package/skill/Workflows/AutoActivation.md +144 -0
- package/skill/Workflows/Badge.md +118 -0
- package/skill/Workflows/Baseline.md +121 -0
- package/skill/Workflows/Composability.md +100 -0
- package/skill/Workflows/Contribute.md +91 -0
- package/skill/Workflows/Cron.md +155 -0
- package/skill/Workflows/Dashboard.md +203 -0
- package/skill/Workflows/Doctor.md +37 -1
- package/skill/Workflows/Evals.md +69 -1
- package/skill/Workflows/EvolutionMemory.md +152 -0
- package/skill/Workflows/Evolve.md +111 -6
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/ImportSkillsBench.md +111 -0
- package/skill/Workflows/Ingest.md +117 -3
- package/skill/Workflows/Initialize.md +57 -3
- package/skill/Workflows/Replay.md +70 -0
- package/skill/Workflows/Rollback.md +20 -1
- package/skill/Workflows/UnitTest.md +138 -0
- package/skill/Workflows/Watch.md +22 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
|
@@ -12,6 +12,24 @@ import { join } from "node:path";
|
|
|
12
12
|
|
|
13
13
|
import { AGENT_CANDIDATES } from "../constants.js";
|
|
14
14
|
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Model alias resolution
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* The claude CLI --model flag only accepts "sonnet" and "opus" as aliases.
|
|
21
|
+
* "haiku" is NOT a valid --model alias (only valid in --agents subagent config).
|
|
22
|
+
* Map short names to full model IDs so callers can use friendly names.
|
|
23
|
+
*/
|
|
24
|
+
const CLAUDE_MODEL_ALIASES: Record<string, string> = {
|
|
25
|
+
haiku: "claude-haiku-4-5-20251001",
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/** Resolve a model alias to its full ID for the claude CLI --model flag. */
|
|
29
|
+
function resolveModelFlag(flag: string): string {
|
|
30
|
+
return CLAUDE_MODEL_ALIASES[flag] ?? flag;
|
|
31
|
+
}
|
|
32
|
+
|
|
15
33
|
// ---------------------------------------------------------------------------
|
|
16
34
|
// Agent detection
|
|
17
35
|
// ---------------------------------------------------------------------------
|
|
@@ -77,6 +95,7 @@ export async function callViaAgent(
|
|
|
77
95
|
systemPrompt: string,
|
|
78
96
|
userPrompt: string,
|
|
79
97
|
agent: string,
|
|
98
|
+
modelFlag?: string,
|
|
80
99
|
): Promise<string> {
|
|
81
100
|
// Write prompt to temp file to avoid shell quoting issues
|
|
82
101
|
const promptFile = join(tmpdir(), `selftune-llm-${Date.now()}.txt`);
|
|
@@ -88,6 +107,10 @@ export async function callViaAgent(
|
|
|
88
107
|
|
|
89
108
|
if (agent === "claude") {
|
|
90
109
|
cmd = ["claude", "-p", promptContent];
|
|
110
|
+
if (modelFlag) {
|
|
111
|
+
const resolved = resolveModelFlag(modelFlag);
|
|
112
|
+
cmd.push("--model", resolved);
|
|
113
|
+
}
|
|
91
114
|
} else if (agent === "codex") {
|
|
92
115
|
cmd = ["codex", "exec", "--skip-git-repo-check", promptContent];
|
|
93
116
|
} else if (agent === "opencode") {
|
|
@@ -102,8 +125,10 @@ export async function callViaAgent(
|
|
|
102
125
|
env: { ...process.env, CLAUDECODE: "" },
|
|
103
126
|
});
|
|
104
127
|
|
|
105
|
-
//
|
|
106
|
-
const
|
|
128
|
+
// Longer timeout for heavier models (sonnet/opus take longer than haiku)
|
|
129
|
+
const isLightModel = modelFlag === "haiku" || modelFlag?.includes("haiku");
|
|
130
|
+
const timeoutMs = isLightModel ? 120_000 : 300_000;
|
|
131
|
+
const timeout = setTimeout(() => proc.kill(), timeoutMs);
|
|
107
132
|
const exitCode = await proc.exited;
|
|
108
133
|
clearTimeout(timeout);
|
|
109
134
|
|
|
@@ -135,9 +160,10 @@ export async function callLlm(
|
|
|
135
160
|
systemPrompt: string,
|
|
136
161
|
userPrompt: string,
|
|
137
162
|
agent: string,
|
|
163
|
+
modelFlag?: string,
|
|
138
164
|
): Promise<string> {
|
|
139
165
|
if (!agent) {
|
|
140
166
|
throw new Error("Agent must be specified for callLlm");
|
|
141
167
|
}
|
|
142
|
-
return callViaAgent(systemPrompt, userPrompt, agent);
|
|
168
|
+
return callViaAgent(systemPrompt, userPrompt, agent, modelFlag);
|
|
143
169
|
}
|
|
@@ -228,6 +228,41 @@ export function readExcerpt(transcriptPath: string, maxChars = 8000): string {
|
|
|
228
228
|
return `${full.slice(0, head)}\n\n... [truncated] ...\n\n${full.slice(-tail)}`;
|
|
229
229
|
}
|
|
230
230
|
|
|
231
|
+
/**
|
|
232
|
+
* Extract token usage from a transcript JSONL by summing usage fields.
|
|
233
|
+
*
|
|
234
|
+
* Scans for entries with a `usage` object containing `input_tokens` and
|
|
235
|
+
* `output_tokens` (the format Claude Code transcripts use).
|
|
236
|
+
*/
|
|
237
|
+
export function extractTokenUsage(transcriptPath: string): { input: number; output: number } {
|
|
238
|
+
if (!existsSync(transcriptPath)) return { input: 0, output: 0 };
|
|
239
|
+
|
|
240
|
+
const content = readFileSync(transcriptPath, "utf-8");
|
|
241
|
+
const lines = content.split("\n");
|
|
242
|
+
let input = 0;
|
|
243
|
+
let output = 0;
|
|
244
|
+
|
|
245
|
+
for (const raw of lines) {
|
|
246
|
+
const line = raw.trim();
|
|
247
|
+
if (!line) continue;
|
|
248
|
+
|
|
249
|
+
let entry: Record<string, unknown>;
|
|
250
|
+
try {
|
|
251
|
+
entry = JSON.parse(line);
|
|
252
|
+
} catch {
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const usage = entry.usage as Record<string, unknown> | undefined;
|
|
257
|
+
if (usage && typeof usage === "object") {
|
|
258
|
+
if (typeof usage.input_tokens === "number") input += usage.input_tokens;
|
|
259
|
+
if (typeof usage.output_tokens === "number") output += usage.output_tokens;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return { input, output };
|
|
264
|
+
}
|
|
265
|
+
|
|
231
266
|
function emptyMetrics(): TranscriptMetrics {
|
|
232
267
|
return {
|
|
233
268
|
tool_calls: {},
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared trigger-check utilities.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from validate-proposal.ts so other modules (e.g. body validation,
|
|
5
|
+
* routing validation) can reuse the same prompt-building and response-parsing
|
|
6
|
+
* logic without depending on the evolution layer.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Prompt building
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
/** Build the trigger check prompt for the LLM. */
|
|
14
|
+
export function buildTriggerCheckPrompt(description: string, query: string): string {
|
|
15
|
+
return [
|
|
16
|
+
"Given this skill description, would the following user query trigger this skill?",
|
|
17
|
+
"Respond YES or NO only.",
|
|
18
|
+
"",
|
|
19
|
+
"Skill description:",
|
|
20
|
+
description,
|
|
21
|
+
"",
|
|
22
|
+
"User query:",
|
|
23
|
+
query,
|
|
24
|
+
].join("\n");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Response parsing
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
/** Parse YES/NO from LLM response. */
|
|
32
|
+
export function parseTriggerResponse(response: string): boolean {
|
|
33
|
+
const normalized = response.trim().toUpperCase();
|
|
34
|
+
if (normalized.startsWith("YES")) return true;
|
|
35
|
+
if (normalized.startsWith("NO")) return false;
|
|
36
|
+
return false; // conservative default
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// Batch prompt building
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
/** Build a batch trigger check prompt for multiple queries at once. */
|
|
44
|
+
export function buildBatchTriggerCheckPrompt(description: string, queries: string[]): string {
|
|
45
|
+
const numbered = queries.map((q, i) => `${i + 1}. "${q}"`).join("\n");
|
|
46
|
+
return [
|
|
47
|
+
"Given this skill description, would each query trigger this skill?",
|
|
48
|
+
"Respond with the query number followed by YES or NO, one per line.",
|
|
49
|
+
"",
|
|
50
|
+
"Skill description:",
|
|
51
|
+
description,
|
|
52
|
+
"",
|
|
53
|
+
"Queries:",
|
|
54
|
+
numbered,
|
|
55
|
+
].join("\n");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
// Batch response parsing
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Parse a batch YES/NO response. Returns a boolean array aligned to the
|
|
64
|
+
* original query order. Defaults to false for unparseable or missing lines.
|
|
65
|
+
*/
|
|
66
|
+
export function parseBatchTriggerResponse(response: string, queryCount: number): boolean[] {
|
|
67
|
+
const results: boolean[] = new Array(queryCount).fill(false);
|
|
68
|
+
const lines = response.trim().split("\n");
|
|
69
|
+
|
|
70
|
+
for (const line of lines) {
|
|
71
|
+
const trimmed = line.trim();
|
|
72
|
+
if (!trimmed) continue;
|
|
73
|
+
|
|
74
|
+
// Try to extract a number prefix: "1. YES", "1: YES", "1 YES", "1) YES"
|
|
75
|
+
const match = trimmed.match(/^(\d+)[.):\s]+\s*(.*)/);
|
|
76
|
+
if (!match) continue;
|
|
77
|
+
|
|
78
|
+
const idx = parseInt(match[1], 10) - 1; // 1-based to 0-based
|
|
79
|
+
if (idx < 0 || idx >= queryCount) continue;
|
|
80
|
+
|
|
81
|
+
const answer = match[2].trim().toUpperCase();
|
|
82
|
+
if (answer.startsWith("YES")) {
|
|
83
|
+
results[idx] = true;
|
|
84
|
+
}
|
|
85
|
+
// NO or anything else stays false (the default)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return results;
|
|
89
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tui.ts
|
|
3
|
+
*
|
|
4
|
+
* Zero-dependency TUI primitives for the selftune evolve pipeline.
|
|
5
|
+
* Uses raw ANSI escape codes for spinners, timers, and step progression.
|
|
6
|
+
* All output goes to stderr to keep stdout clean for JSON results.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
10
|
+
const TICK_MS = 80;
|
|
11
|
+
|
|
12
|
+
export interface EvolveTUI {
|
|
13
|
+
/** Start a new step with a spinner. Completes the previous step (if any) with checkmark. */
|
|
14
|
+
step(label: string): void;
|
|
15
|
+
/** Complete the current step with checkmark and a custom label. */
|
|
16
|
+
done(label: string): void;
|
|
17
|
+
/** Complete the current step as failed with cross mark and a custom label. */
|
|
18
|
+
fail(label: string): void;
|
|
19
|
+
/** Stop all timers and print a summary line. */
|
|
20
|
+
finish(summary: string): void;
|
|
21
|
+
/** Clean up timers without printing. For error paths. */
|
|
22
|
+
destroy(): void;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function createNoopTUI(): EvolveTUI {
|
|
26
|
+
return { step() {}, done() {}, fail() {}, finish() {}, destroy() {} };
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function createEvolveTUI(opts: { skillName: string; model: string }): EvolveTUI {
|
|
30
|
+
const noColor = !!process.env.NO_COLOR;
|
|
31
|
+
const isTTY = !!process.stderr.isTTY;
|
|
32
|
+
|
|
33
|
+
// If not a TTY, return no-op to avoid ANSI noise in pipes/tests
|
|
34
|
+
if (!isTTY && !process.env.SELFTUNE_TUI_FORCE) {
|
|
35
|
+
return createNoopTUI();
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const write = (s: string) => process.stderr.write(s);
|
|
39
|
+
|
|
40
|
+
let spinnerFrame = 0;
|
|
41
|
+
let stepStartTime = Date.now();
|
|
42
|
+
let currentLabel = "";
|
|
43
|
+
let hasActiveSpinner = false;
|
|
44
|
+
let intervalId: ReturnType<typeof setInterval> | null = null;
|
|
45
|
+
let destroyed = false;
|
|
46
|
+
|
|
47
|
+
const checkMark = noColor ? "+" : "\u2713";
|
|
48
|
+
const crossMark = noColor ? "x" : "\u2717";
|
|
49
|
+
|
|
50
|
+
// Print header
|
|
51
|
+
write(`\n selftune evolve \u2500\u2500 ${opts.skillName} \u2500\u2500 ${opts.model}\n\n`);
|
|
52
|
+
|
|
53
|
+
function formatTime(ms: number): string {
|
|
54
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function clearSpinnerLine(): void {
|
|
58
|
+
if (hasActiveSpinner) {
|
|
59
|
+
write("\x1b[A\x1b[2K");
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function writeSpinnerLine(): void {
|
|
64
|
+
const frame = noColor ? ">" : SPINNER_FRAMES[spinnerFrame % SPINNER_FRAMES.length];
|
|
65
|
+
const elapsed = formatTime(Date.now() - stepStartTime);
|
|
66
|
+
const padding = Math.max(1, 48 - currentLabel.length);
|
|
67
|
+
write(` ${frame} ${currentLabel}${" ".repeat(padding)}${elapsed}\n`);
|
|
68
|
+
hasActiveSpinner = true;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function startSpinner(label: string): void {
|
|
72
|
+
currentLabel = label;
|
|
73
|
+
stepStartTime = Date.now();
|
|
74
|
+
spinnerFrame = 0;
|
|
75
|
+
writeSpinnerLine();
|
|
76
|
+
intervalId = setInterval(() => {
|
|
77
|
+
spinnerFrame++;
|
|
78
|
+
clearSpinnerLine();
|
|
79
|
+
writeSpinnerLine();
|
|
80
|
+
}, TICK_MS);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function stopSpinner(): void {
|
|
84
|
+
if (intervalId !== null) {
|
|
85
|
+
clearInterval(intervalId);
|
|
86
|
+
intervalId = null;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function writeCompletedLine(marker: string, label: string, elapsed: number): void {
|
|
91
|
+
const time = formatTime(elapsed);
|
|
92
|
+
const padding = Math.max(1, 48 - label.length);
|
|
93
|
+
write(` ${marker} ${label}${" ".repeat(padding)}${time}\n`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function completeCurrentStep(marker: string, label: string): void {
|
|
97
|
+
const elapsed = Date.now() - stepStartTime;
|
|
98
|
+
stopSpinner();
|
|
99
|
+
clearSpinnerLine();
|
|
100
|
+
hasActiveSpinner = false;
|
|
101
|
+
writeCompletedLine(marker, label, elapsed);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
step(label: string): void {
|
|
106
|
+
if (destroyed) return;
|
|
107
|
+
// Complete previous step if there was one
|
|
108
|
+
if (hasActiveSpinner) {
|
|
109
|
+
completeCurrentStep(checkMark, currentLabel);
|
|
110
|
+
}
|
|
111
|
+
startSpinner(label);
|
|
112
|
+
},
|
|
113
|
+
|
|
114
|
+
done(label: string): void {
|
|
115
|
+
if (destroyed) return;
|
|
116
|
+
if (hasActiveSpinner) {
|
|
117
|
+
// Complete active spinner with custom label
|
|
118
|
+
completeCurrentStep(checkMark, label);
|
|
119
|
+
} else {
|
|
120
|
+
// No active spinner — instant step
|
|
121
|
+
writeCompletedLine(checkMark, label, 0);
|
|
122
|
+
}
|
|
123
|
+
currentLabel = "";
|
|
124
|
+
},
|
|
125
|
+
|
|
126
|
+
fail(label: string): void {
|
|
127
|
+
if (destroyed) return;
|
|
128
|
+
if (hasActiveSpinner) {
|
|
129
|
+
completeCurrentStep(crossMark, label);
|
|
130
|
+
} else {
|
|
131
|
+
writeCompletedLine(crossMark, label, 0);
|
|
132
|
+
}
|
|
133
|
+
currentLabel = "";
|
|
134
|
+
},
|
|
135
|
+
|
|
136
|
+
finish(summary: string): void {
|
|
137
|
+
if (destroyed) return;
|
|
138
|
+
if (hasActiveSpinner) {
|
|
139
|
+
completeCurrentStep(checkMark, currentLabel);
|
|
140
|
+
}
|
|
141
|
+
stopSpinner();
|
|
142
|
+
write(`\n ${summary}\n`);
|
|
143
|
+
destroyed = true;
|
|
144
|
+
},
|
|
145
|
+
|
|
146
|
+
destroy(): void {
|
|
147
|
+
if (destroyed) return;
|
|
148
|
+
stopSpinner();
|
|
149
|
+
if (hasActiveSpinner) {
|
|
150
|
+
clearSpinnerLine();
|
|
151
|
+
hasActiveSpinner = false;
|
|
152
|
+
}
|
|
153
|
+
destroyed = true;
|
|
154
|
+
},
|
|
155
|
+
};
|
|
156
|
+
}
|