selftune 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.claude/agents/diagnosis-analyst.md +146 -0
  2. package/.claude/agents/evolution-reviewer.md +167 -0
  3. package/.claude/agents/integration-guide.md +200 -0
  4. package/.claude/agents/pattern-analyst.md +147 -0
  5. package/CHANGELOG.md +37 -0
  6. package/README.md +96 -256
  7. package/assets/BeforeAfter.gif +0 -0
  8. package/assets/FeedbackLoop.gif +0 -0
  9. package/assets/logo.svg +9 -0
  10. package/assets/skill-health-badge.svg +20 -0
  11. package/cli/selftune/activation-rules.ts +171 -0
  12. package/cli/selftune/badge/badge-data.ts +108 -0
  13. package/cli/selftune/badge/badge-svg.ts +212 -0
  14. package/cli/selftune/badge/badge.ts +103 -0
  15. package/cli/selftune/constants.ts +75 -1
  16. package/cli/selftune/contribute/bundle.ts +314 -0
  17. package/cli/selftune/contribute/contribute.ts +214 -0
  18. package/cli/selftune/contribute/sanitize.ts +162 -0
  19. package/cli/selftune/cron/setup.ts +266 -0
  20. package/cli/selftune/dashboard-server.ts +582 -0
  21. package/cli/selftune/dashboard.ts +25 -3
  22. package/cli/selftune/eval/baseline.ts +247 -0
  23. package/cli/selftune/eval/composability.ts +117 -0
  24. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  25. package/cli/selftune/eval/hooks-to-evals.ts +68 -2
  26. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  27. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  28. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  29. package/cli/selftune/eval/unit-test.ts +196 -0
  30. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  31. package/cli/selftune/evolution/evolve-body.ts +492 -0
  32. package/cli/selftune/evolution/evolve.ts +466 -103
  33. package/cli/selftune/evolution/extract-patterns.ts +32 -1
  34. package/cli/selftune/evolution/pareto.ts +314 -0
  35. package/cli/selftune/evolution/propose-body.ts +171 -0
  36. package/cli/selftune/evolution/propose-description.ts +100 -2
  37. package/cli/selftune/evolution/propose-routing.ts +166 -0
  38. package/cli/selftune/evolution/refine-body.ts +141 -0
  39. package/cli/selftune/evolution/rollback.ts +19 -2
  40. package/cli/selftune/evolution/validate-body.ts +254 -0
  41. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  42. package/cli/selftune/evolution/validate-routing.ts +177 -0
  43. package/cli/selftune/grading/grade-session.ts +138 -18
  44. package/cli/selftune/grading/pre-gates.ts +104 -0
  45. package/cli/selftune/hooks/auto-activate.ts +185 -0
  46. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  47. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  48. package/cli/selftune/index.ts +88 -0
  49. package/cli/selftune/ingestors/claude-replay.ts +351 -0
  50. package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
  51. package/cli/selftune/init.ts +150 -3
  52. package/cli/selftune/memory/writer.ts +447 -0
  53. package/cli/selftune/monitoring/watch.ts +25 -2
  54. package/cli/selftune/status.ts +17 -13
  55. package/cli/selftune/types.ts +377 -5
  56. package/cli/selftune/utils/frontmatter.ts +217 -0
  57. package/cli/selftune/utils/llm-call.ts +29 -3
  58. package/cli/selftune/utils/transcript.ts +35 -0
  59. package/cli/selftune/utils/trigger-check.ts +89 -0
  60. package/cli/selftune/utils/tui.ts +156 -0
  61. package/dashboard/index.html +569 -8
  62. package/package.json +8 -4
  63. package/skill/SKILL.md +124 -8
  64. package/skill/Workflows/AutoActivation.md +144 -0
  65. package/skill/Workflows/Badge.md +118 -0
  66. package/skill/Workflows/Baseline.md +121 -0
  67. package/skill/Workflows/Composability.md +100 -0
  68. package/skill/Workflows/Contribute.md +91 -0
  69. package/skill/Workflows/Cron.md +155 -0
  70. package/skill/Workflows/Dashboard.md +203 -0
  71. package/skill/Workflows/Doctor.md +37 -1
  72. package/skill/Workflows/Evals.md +69 -1
  73. package/skill/Workflows/EvolutionMemory.md +152 -0
  74. package/skill/Workflows/Evolve.md +111 -6
  75. package/skill/Workflows/EvolveBody.md +159 -0
  76. package/skill/Workflows/ImportSkillsBench.md +111 -0
  77. package/skill/Workflows/Ingest.md +117 -3
  78. package/skill/Workflows/Initialize.md +57 -3
  79. package/skill/Workflows/Replay.md +70 -0
  80. package/skill/Workflows/Rollback.md +20 -1
  81. package/skill/Workflows/UnitTest.md +138 -0
  82. package/skill/Workflows/Watch.md +22 -0
  83. package/skill/settings_snippet.json +23 -0
  84. package/templates/activation-rules-default.json +27 -0
  85. package/templates/multi-skill-settings.json +64 -0
  86. package/templates/single-skill-settings.json +58 -0
@@ -12,6 +12,24 @@ import { join } from "node:path";
12
12
 
13
13
  import { AGENT_CANDIDATES } from "../constants.js";
14
14
 
15
+ // ---------------------------------------------------------------------------
16
+ // Model alias resolution
17
+ // ---------------------------------------------------------------------------
18
+
19
+ /**
20
+ * The claude CLI --model flag only accepts "sonnet" and "opus" as aliases.
21
+ * "haiku" is NOT a valid --model alias (only valid in --agents subagent config).
22
+ * Map short names to full model IDs so callers can use friendly names.
23
+ */
24
+ const CLAUDE_MODEL_ALIASES: Record<string, string> = {
25
+ haiku: "claude-haiku-4-5-20251001",
26
+ };
27
+
28
+ /** Resolve a model alias to its full ID for the claude CLI --model flag. */
29
+ function resolveModelFlag(flag: string): string {
30
+ return CLAUDE_MODEL_ALIASES[flag] ?? flag;
31
+ }
32
+
15
33
  // ---------------------------------------------------------------------------
16
34
  // Agent detection
17
35
  // ---------------------------------------------------------------------------
@@ -77,6 +95,7 @@ export async function callViaAgent(
77
95
  systemPrompt: string,
78
96
  userPrompt: string,
79
97
  agent: string,
98
+ modelFlag?: string,
80
99
  ): Promise<string> {
81
100
  // Write prompt to temp file to avoid shell quoting issues
82
101
  const promptFile = join(tmpdir(), `selftune-llm-${Date.now()}.txt`);
@@ -88,6 +107,10 @@ export async function callViaAgent(
88
107
 
89
108
  if (agent === "claude") {
90
109
  cmd = ["claude", "-p", promptContent];
110
+ if (modelFlag) {
111
+ const resolved = resolveModelFlag(modelFlag);
112
+ cmd.push("--model", resolved);
113
+ }
91
114
  } else if (agent === "codex") {
92
115
  cmd = ["codex", "exec", "--skip-git-repo-check", promptContent];
93
116
  } else if (agent === "opencode") {
@@ -102,8 +125,10 @@ export async function callViaAgent(
102
125
  env: { ...process.env, CLAUDECODE: "" },
103
126
  });
104
127
 
105
- // 120s timeout
106
- const timeout = setTimeout(() => proc.kill(), 120_000);
128
+ // Longer timeout for heavier models (sonnet/opus take longer than haiku)
129
+ const isLightModel = modelFlag === "haiku" || modelFlag?.includes("haiku");
130
+ const timeoutMs = isLightModel ? 120_000 : 300_000;
131
+ const timeout = setTimeout(() => proc.kill(), timeoutMs);
107
132
  const exitCode = await proc.exited;
108
133
  clearTimeout(timeout);
109
134
 
@@ -135,9 +160,10 @@ export async function callLlm(
135
160
  systemPrompt: string,
136
161
  userPrompt: string,
137
162
  agent: string,
163
+ modelFlag?: string,
138
164
  ): Promise<string> {
139
165
  if (!agent) {
140
166
  throw new Error("Agent must be specified for callLlm");
141
167
  }
142
- return callViaAgent(systemPrompt, userPrompt, agent);
168
+ return callViaAgent(systemPrompt, userPrompt, agent, modelFlag);
143
169
  }
@@ -228,6 +228,41 @@ export function readExcerpt(transcriptPath: string, maxChars = 8000): string {
228
228
  return `${full.slice(0, head)}\n\n... [truncated] ...\n\n${full.slice(-tail)}`;
229
229
  }
230
230
 
231
+ /**
232
+ * Extract token usage from a transcript JSONL by summing usage fields.
233
+ *
234
+ * Scans for entries with a `usage` object containing `input_tokens` and
235
+ * `output_tokens` (the format Claude Code transcripts use).
236
+ */
237
+ export function extractTokenUsage(transcriptPath: string): { input: number; output: number } {
238
+ if (!existsSync(transcriptPath)) return { input: 0, output: 0 };
239
+
240
+ const content = readFileSync(transcriptPath, "utf-8");
241
+ const lines = content.split("\n");
242
+ let input = 0;
243
+ let output = 0;
244
+
245
+ for (const raw of lines) {
246
+ const line = raw.trim();
247
+ if (!line) continue;
248
+
249
+ let entry: Record<string, unknown>;
250
+ try {
251
+ entry = JSON.parse(line);
252
+ } catch {
253
+ continue;
254
+ }
255
+
256
+ const usage = entry.usage as Record<string, unknown> | undefined;
257
+ if (usage && typeof usage === "object") {
258
+ if (typeof usage.input_tokens === "number") input += usage.input_tokens;
259
+ if (typeof usage.output_tokens === "number") output += usage.output_tokens;
260
+ }
261
+ }
262
+
263
+ return { input, output };
264
+ }
265
+
231
266
  function emptyMetrics(): TranscriptMetrics {
232
267
  return {
233
268
  tool_calls: {},
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Shared trigger-check utilities.
3
+ *
4
+ * Extracted from validate-proposal.ts so other modules (e.g. body validation,
5
+ * routing validation) can reuse the same prompt-building and response-parsing
6
+ * logic without depending on the evolution layer.
7
+ */
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // Prompt building
11
+ // ---------------------------------------------------------------------------
12
+
13
+ /** Build the trigger check prompt for the LLM. */
14
+ export function buildTriggerCheckPrompt(description: string, query: string): string {
15
+ return [
16
+ "Given this skill description, would the following user query trigger this skill?",
17
+ "Respond YES or NO only.",
18
+ "",
19
+ "Skill description:",
20
+ description,
21
+ "",
22
+ "User query:",
23
+ query,
24
+ ].join("\n");
25
+ }
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // Response parsing
29
+ // ---------------------------------------------------------------------------
30
+
31
+ /** Parse YES/NO from LLM response. */
32
+ export function parseTriggerResponse(response: string): boolean {
33
+ const normalized = response.trim().toUpperCase();
34
+ if (normalized.startsWith("YES")) return true;
35
+ if (normalized.startsWith("NO")) return false;
36
+ return false; // conservative default
37
+ }
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Batch prompt building
41
+ // ---------------------------------------------------------------------------
42
+
43
+ /** Build a batch trigger check prompt for multiple queries at once. */
44
+ export function buildBatchTriggerCheckPrompt(description: string, queries: string[]): string {
45
+ const numbered = queries.map((q, i) => `${i + 1}. "${q}"`).join("\n");
46
+ return [
47
+ "Given this skill description, would each query trigger this skill?",
48
+ "Respond with the query number followed by YES or NO, one per line.",
49
+ "",
50
+ "Skill description:",
51
+ description,
52
+ "",
53
+ "Queries:",
54
+ numbered,
55
+ ].join("\n");
56
+ }
57
+
58
+ // ---------------------------------------------------------------------------
59
+ // Batch response parsing
60
+ // ---------------------------------------------------------------------------
61
+
62
+ /**
63
+ * Parse a batch YES/NO response. Returns a boolean array aligned to the
64
+ * original query order. Defaults to false for unparseable or missing lines.
65
+ */
66
+ export function parseBatchTriggerResponse(response: string, queryCount: number): boolean[] {
67
+ const results: boolean[] = new Array(queryCount).fill(false);
68
+ const lines = response.trim().split("\n");
69
+
70
+ for (const line of lines) {
71
+ const trimmed = line.trim();
72
+ if (!trimmed) continue;
73
+
74
+ // Try to extract a number prefix: "1. YES", "1: YES", "1 YES", "1) YES"
75
+ const match = trimmed.match(/^(\d+)[.):\s]+\s*(.*)/);
76
+ if (!match) continue;
77
+
78
+ const idx = parseInt(match[1], 10) - 1; // 1-based to 0-based
79
+ if (idx < 0 || idx >= queryCount) continue;
80
+
81
+ const answer = match[2].trim().toUpperCase();
82
+ if (answer.startsWith("YES")) {
83
+ results[idx] = true;
84
+ }
85
+ // NO or anything else stays false (the default)
86
+ }
87
+
88
+ return results;
89
+ }
@@ -0,0 +1,156 @@
1
+ /**
2
+ * tui.ts
3
+ *
4
+ * Zero-dependency TUI primitives for the selftune evolve pipeline.
5
+ * Uses raw ANSI escape codes for spinners, timers, and step progression.
6
+ * All output goes to stderr to keep stdout clean for JSON results.
7
+ */
8
+
9
+ const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
10
+ const TICK_MS = 80;
11
+
12
+ export interface EvolveTUI {
13
+ /** Start a new step with a spinner. Completes the previous step (if any) with checkmark. */
14
+ step(label: string): void;
15
+ /** Complete the current step with checkmark and a custom label. */
16
+ done(label: string): void;
17
+ /** Complete the current step as failed with cross mark and a custom label. */
18
+ fail(label: string): void;
19
+ /** Stop all timers and print a summary line. */
20
+ finish(summary: string): void;
21
+ /** Clean up timers without printing. For error paths. */
22
+ destroy(): void;
23
+ }
24
+
25
+ function createNoopTUI(): EvolveTUI {
26
+ return { step() {}, done() {}, fail() {}, finish() {}, destroy() {} };
27
+ }
28
+
29
+ export function createEvolveTUI(opts: { skillName: string; model: string }): EvolveTUI {
30
+ const noColor = !!process.env.NO_COLOR;
31
+ const isTTY = !!process.stderr.isTTY;
32
+
33
+ // If not a TTY, return no-op to avoid ANSI noise in pipes/tests
34
+ if (!isTTY && !process.env.SELFTUNE_TUI_FORCE) {
35
+ return createNoopTUI();
36
+ }
37
+
38
+ const write = (s: string) => process.stderr.write(s);
39
+
40
+ let spinnerFrame = 0;
41
+ let stepStartTime = Date.now();
42
+ let currentLabel = "";
43
+ let hasActiveSpinner = false;
44
+ let intervalId: ReturnType<typeof setInterval> | null = null;
45
+ let destroyed = false;
46
+
47
+ const checkMark = noColor ? "+" : "\u2713";
48
+ const crossMark = noColor ? "x" : "\u2717";
49
+
50
+ // Print header
51
+ write(`\n selftune evolve \u2500\u2500 ${opts.skillName} \u2500\u2500 ${opts.model}\n\n`);
52
+
53
+ function formatTime(ms: number): string {
54
+ return `${(ms / 1000).toFixed(1)}s`;
55
+ }
56
+
57
+ function clearSpinnerLine(): void {
58
+ if (hasActiveSpinner) {
59
+ write("\x1b[A\x1b[2K");
60
+ }
61
+ }
62
+
63
+ function writeSpinnerLine(): void {
64
+ const frame = noColor ? ">" : SPINNER_FRAMES[spinnerFrame % SPINNER_FRAMES.length];
65
+ const elapsed = formatTime(Date.now() - stepStartTime);
66
+ const padding = Math.max(1, 48 - currentLabel.length);
67
+ write(` ${frame} ${currentLabel}${" ".repeat(padding)}${elapsed}\n`);
68
+ hasActiveSpinner = true;
69
+ }
70
+
71
+ function startSpinner(label: string): void {
72
+ currentLabel = label;
73
+ stepStartTime = Date.now();
74
+ spinnerFrame = 0;
75
+ writeSpinnerLine();
76
+ intervalId = setInterval(() => {
77
+ spinnerFrame++;
78
+ clearSpinnerLine();
79
+ writeSpinnerLine();
80
+ }, TICK_MS);
81
+ }
82
+
83
+ function stopSpinner(): void {
84
+ if (intervalId !== null) {
85
+ clearInterval(intervalId);
86
+ intervalId = null;
87
+ }
88
+ }
89
+
90
+ function writeCompletedLine(marker: string, label: string, elapsed: number): void {
91
+ const time = formatTime(elapsed);
92
+ const padding = Math.max(1, 48 - label.length);
93
+ write(` ${marker} ${label}${" ".repeat(padding)}${time}\n`);
94
+ }
95
+
96
+ function completeCurrentStep(marker: string, label: string): void {
97
+ const elapsed = Date.now() - stepStartTime;
98
+ stopSpinner();
99
+ clearSpinnerLine();
100
+ hasActiveSpinner = false;
101
+ writeCompletedLine(marker, label, elapsed);
102
+ }
103
+
104
+ return {
105
+ step(label: string): void {
106
+ if (destroyed) return;
107
+ // Complete previous step if there was one
108
+ if (hasActiveSpinner) {
109
+ completeCurrentStep(checkMark, currentLabel);
110
+ }
111
+ startSpinner(label);
112
+ },
113
+
114
+ done(label: string): void {
115
+ if (destroyed) return;
116
+ if (hasActiveSpinner) {
117
+ // Complete active spinner with custom label
118
+ completeCurrentStep(checkMark, label);
119
+ } else {
120
+ // No active spinner — instant step
121
+ writeCompletedLine(checkMark, label, 0);
122
+ }
123
+ currentLabel = "";
124
+ },
125
+
126
+ fail(label: string): void {
127
+ if (destroyed) return;
128
+ if (hasActiveSpinner) {
129
+ completeCurrentStep(crossMark, label);
130
+ } else {
131
+ writeCompletedLine(crossMark, label, 0);
132
+ }
133
+ currentLabel = "";
134
+ },
135
+
136
+ finish(summary: string): void {
137
+ if (destroyed) return;
138
+ if (hasActiveSpinner) {
139
+ completeCurrentStep(checkMark, currentLabel);
140
+ }
141
+ stopSpinner();
142
+ write(`\n ${summary}\n`);
143
+ destroyed = true;
144
+ },
145
+
146
+ destroy(): void {
147
+ if (destroyed) return;
148
+ stopSpinner();
149
+ if (hasActiveSpinner) {
150
+ clearSpinnerLine();
151
+ hasActiveSpinner = false;
152
+ }
153
+ destroyed = true;
154
+ },
155
+ };
156
+ }