selftune 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/README.md +259 -0
  3. package/bin/selftune.cjs +29 -0
  4. package/cli/selftune/constants.ts +71 -0
  5. package/cli/selftune/eval/hooks-to-evals.ts +422 -0
  6. package/cli/selftune/evolution/audit.ts +44 -0
  7. package/cli/selftune/evolution/deploy-proposal.ts +244 -0
  8. package/cli/selftune/evolution/evolve.ts +406 -0
  9. package/cli/selftune/evolution/extract-patterns.ts +145 -0
  10. package/cli/selftune/evolution/propose-description.ts +146 -0
  11. package/cli/selftune/evolution/rollback.ts +242 -0
  12. package/cli/selftune/evolution/stopping-criteria.ts +69 -0
  13. package/cli/selftune/evolution/validate-proposal.ts +137 -0
  14. package/cli/selftune/grading/grade-session.ts +459 -0
  15. package/cli/selftune/hooks/prompt-log.ts +52 -0
  16. package/cli/selftune/hooks/session-stop.ts +54 -0
  17. package/cli/selftune/hooks/skill-eval.ts +73 -0
  18. package/cli/selftune/index.ts +104 -0
  19. package/cli/selftune/ingestors/codex-rollout.ts +416 -0
  20. package/cli/selftune/ingestors/codex-wrapper.ts +332 -0
  21. package/cli/selftune/ingestors/opencode-ingest.ts +565 -0
  22. package/cli/selftune/init.ts +297 -0
  23. package/cli/selftune/monitoring/watch.ts +328 -0
  24. package/cli/selftune/observability.ts +255 -0
  25. package/cli/selftune/types.ts +255 -0
  26. package/cli/selftune/utils/jsonl.ts +75 -0
  27. package/cli/selftune/utils/llm-call.ts +192 -0
  28. package/cli/selftune/utils/logging.ts +40 -0
  29. package/cli/selftune/utils/schema-validator.ts +47 -0
  30. package/cli/selftune/utils/seeded-random.ts +31 -0
  31. package/cli/selftune/utils/transcript.ts +260 -0
  32. package/package.json +29 -0
  33. package/skill/SKILL.md +120 -0
  34. package/skill/Workflows/Doctor.md +145 -0
  35. package/skill/Workflows/Evals.md +193 -0
  36. package/skill/Workflows/Evolve.md +159 -0
  37. package/skill/Workflows/Grade.md +157 -0
  38. package/skill/Workflows/Ingest.md +159 -0
  39. package/skill/Workflows/Initialize.md +125 -0
  40. package/skill/Workflows/Rollback.md +131 -0
  41. package/skill/Workflows/Watch.md +128 -0
  42. package/skill/references/grading-methodology.md +176 -0
  43. package/skill/references/invocation-taxonomy.md +144 -0
  44. package/skill/references/logs.md +168 -0
  45. package/skill/settings_snippet.json +41 -0
@@ -0,0 +1,244 @@
1
+ /**
2
+ * deploy-proposal.ts
3
+ *
4
+ * Deploys a validated evolution proposal by updating SKILL.md, creating a
5
+ * backup, building a commit message with metrics, and optionally creating
6
+ * a git branch and PR via `gh pr create`.
7
+ */
8
+
9
+ import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
10
+ import type { EvolutionProposal } from "../types.js";
11
+ import type { ValidationResult } from "./validate-proposal.js";
12
+
13
+ // ---------------------------------------------------------------------------
14
+ // Types
15
+ // ---------------------------------------------------------------------------
16
+
17
+ export interface DeployOptions {
18
+ proposal: EvolutionProposal;
19
+ validation: ValidationResult;
20
+ skillPath: string;
21
+ createPr: boolean;
22
+ branchPrefix?: string; // default "selftune/evolve"
23
+ }
24
+
25
+ export interface DeployResult {
26
+ skillMdUpdated: boolean;
27
+ backupPath: string | null;
28
+ branchName: string | null;
29
+ commitMessage: string;
30
+ }
31
+
32
+ // ---------------------------------------------------------------------------
33
+ // SKILL.md reading
34
+ // ---------------------------------------------------------------------------
35
+
36
+ /** Read the contents of a SKILL.md file. Throws if the file does not exist. */
37
+ export function readSkillMd(skillPath: string): string {
38
+ if (!existsSync(skillPath)) {
39
+ throw new Error(`SKILL.md not found at ${skillPath}`);
40
+ }
41
+ return readFileSync(skillPath, "utf-8");
42
+ }
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // Description replacement
46
+ // ---------------------------------------------------------------------------
47
+
48
+ /**
49
+ * Replace the description section of a SKILL.md file.
50
+ *
51
+ * The description is defined as the content between the first `#` heading
52
+ * and the first `##` heading. If no `##` heading exists, the entire body
53
+ * after the first heading is replaced.
54
+ */
55
+ export function replaceDescription(currentContent: string, newDescription: string): string {
56
+ const lines = currentContent.split("\n");
57
+
58
+ // Find the first # heading line
59
+ let headingIndex = -1;
60
+ for (let i = 0; i < lines.length; i++) {
61
+ if (lines[i].startsWith("# ") && !lines[i].startsWith("## ")) {
62
+ headingIndex = i;
63
+ break;
64
+ }
65
+ }
66
+
67
+ // If no heading found, just prepend the description
68
+ if (headingIndex === -1) {
69
+ return `${newDescription}\n${currentContent}`;
70
+ }
71
+
72
+ // Find the first ## heading after the main heading
73
+ let subHeadingIndex = -1;
74
+ for (let i = headingIndex + 1; i < lines.length; i++) {
75
+ if (lines[i].startsWith("## ")) {
76
+ subHeadingIndex = i;
77
+ break;
78
+ }
79
+ }
80
+
81
+ // Build the new content, preserving any preamble before the first heading
82
+ const preamble = headingIndex > 0 ? `${lines.slice(0, headingIndex).join("\n")}\n` : "";
83
+ const headingLine = lines[headingIndex];
84
+ const descriptionBlock = newDescription.length > 0 ? `\n${newDescription}\n` : "\n";
85
+
86
+ if (subHeadingIndex === -1) {
87
+ // No sub-heading: preamble + heading + new description + trailing newline
88
+ return `${preamble}${headingLine}\n${descriptionBlock}\n`;
89
+ }
90
+
91
+ // Preamble + heading + description + everything from the first ## onward
92
+ const afterSubHeading = lines.slice(subHeadingIndex).join("\n");
93
+ return `${preamble}${headingLine}\n${descriptionBlock}\n${afterSubHeading}`;
94
+ }
95
+
96
+ // ---------------------------------------------------------------------------
97
+ // Commit message builder
98
+ // ---------------------------------------------------------------------------
99
+
100
+ /** Build a commit message that includes the skill name and pass rate change. */
101
+ export function buildCommitMessage(
102
+ proposal: EvolutionProposal,
103
+ validation: ValidationResult,
104
+ ): string {
105
+ const changePercent = Math.round(validation.net_change * 100);
106
+ const sign = changePercent >= 0 ? "+" : "";
107
+ const passRateStr = `${sign}${changePercent}% pass rate`;
108
+
109
+ return `evolve(${proposal.skill_name}): ${passRateStr}`;
110
+ }
111
+
112
+ // ---------------------------------------------------------------------------
113
+ // Git/GH operations (PR creation)
114
+ // ---------------------------------------------------------------------------
115
+
116
+ /** Sanitize a string for use in a git branch name. */
117
+ function sanitizeForGitRef(name: string): string {
118
+ return name
119
+ .replace(/[^a-zA-Z0-9._-]/g, "-")
120
+ .replace(/\.{2,}/g, ".")
121
+ .replace(/^[.-]|[.-]$/g, "")
122
+ .replace(/-{2,}/g, "-");
123
+ }
124
+
125
+ /** Generate a branch name from the prefix and skill name. */
126
+ function makeBranchName(prefix: string, skillName: string): string {
127
+ const timestamp = Date.now();
128
+ const safeName = sanitizeForGitRef(skillName) || "untitled";
129
+ return `${prefix}/${safeName}-${timestamp}`;
130
+ }
131
+
132
+ /**
133
+ * Run a git/gh command via Bun.spawn. Returns stdout on success.
134
+ * Throws on non-zero exit code or if the command exceeds timeoutMs.
135
+ */
136
+ async function runCommand(args: string[], cwd?: string, timeoutMs = 30_000): Promise<string> {
137
+ const proc = Bun.spawn(args, {
138
+ cwd,
139
+ stdout: "pipe",
140
+ stderr: "pipe",
141
+ });
142
+
143
+ let timedOut = false;
144
+ const timer = setTimeout(() => {
145
+ timedOut = true;
146
+ proc.kill();
147
+ }, timeoutMs);
148
+
149
+ try {
150
+ // Read stdout and stderr concurrently to avoid deadlock when both pipes fill.
151
+ const [stdout, stderr] = await Promise.all([
152
+ new Response(proc.stdout).text(),
153
+ new Response(proc.stderr).text(),
154
+ ]);
155
+ const exitCode = await proc.exited;
156
+
157
+ if (timedOut) {
158
+ throw new Error(`Command timed out after ${timeoutMs}ms: ${args.join(" ")}`);
159
+ }
160
+
161
+ if (exitCode !== 0) {
162
+ throw new Error(`Command failed (exit ${exitCode}): ${args.join(" ")}\n${stderr}`);
163
+ }
164
+
165
+ return stdout.trim();
166
+ } finally {
167
+ clearTimeout(timer);
168
+ }
169
+ }
170
+
171
+ // ---------------------------------------------------------------------------
172
+ // Main deploy function
173
+ // ---------------------------------------------------------------------------
174
+
175
+ /** Deploy a validated evolution proposal to SKILL.md and optionally create a PR. */
176
+ export async function deployProposal(options: DeployOptions): Promise<DeployResult> {
177
+ const { proposal, validation, skillPath, createPr, branchPrefix = "selftune/evolve" } = options;
178
+
179
+ // Step 1: Read current SKILL.md
180
+ const currentContent = readSkillMd(skillPath);
181
+
182
+ // Step 2: Create backup (unique per deploy to avoid overwriting previous backups)
183
+ const backupTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
184
+ const backupPath = `${skillPath}.${backupTimestamp}.bak`;
185
+ copyFileSync(skillPath, backupPath);
186
+
187
+ // Step 3: Replace description and write
188
+ const updatedContent = replaceDescription(currentContent, proposal.proposed_description);
189
+ writeFileSync(skillPath, updatedContent, "utf-8");
190
+
191
+ // Step 4: Build commit message
192
+ const commitMessage = buildCommitMessage(proposal, validation);
193
+
194
+ // Step 5: Optionally create branch and PR
195
+ let branchName: string | null = null;
196
+
197
+ if (createPr) {
198
+ branchName = makeBranchName(branchPrefix, proposal.skill_name);
199
+
200
+ try {
201
+ // Create and checkout branch
202
+ await runCommand(["git", "checkout", "-b", branchName]);
203
+
204
+ // Stage the SKILL.md
205
+ await runCommand(["git", "add", skillPath]);
206
+
207
+ // Commit
208
+ await runCommand(["git", "commit", "-m", commitMessage]);
209
+
210
+ // Push
211
+ await runCommand(["git", "push", "-u", "origin", branchName]);
212
+
213
+ // Create PR
214
+ await runCommand([
215
+ "gh",
216
+ "pr",
217
+ "create",
218
+ "--title",
219
+ commitMessage,
220
+ "--body",
221
+ `Proposal: ${proposal.proposal_id}\nRationale: ${proposal.rationale}\nNet change: ${validation.net_change > 0 ? "+" : ""}${Math.round(validation.net_change * 100)}%`,
222
+ ]);
223
+ } catch (err) {
224
+ // Git/GH operations are best-effort in test environments.
225
+ // The branch name is still returned for tracking.
226
+ console.error(`[WARN] Git/GH operation failed: ${err instanceof Error ? err.message : err}`);
227
+ }
228
+ }
229
+
230
+ return {
231
+ skillMdUpdated: true,
232
+ backupPath,
233
+ branchName,
234
+ commitMessage,
235
+ };
236
+ }
237
+
238
+ // ---------------------------------------------------------------------------
239
+ // CLI entry guard
240
+ // ---------------------------------------------------------------------------
241
+
242
+ if (import.meta.main) {
243
+ console.log("deploy-proposal: use deployProposal() programmatically or via evolve CLI");
244
+ }
@@ -0,0 +1,406 @@
1
+ /**
2
+ * evolve.ts
3
+ *
4
+ * Evolution orchestrator: coordinates failure pattern extraction, proposal
5
+ * generation, validation, and deployment into a single pipeline with retry
6
+ * logic and comprehensive audit tracking.
7
+ */
8
+
9
+ import { existsSync, readFileSync } from "node:fs";
10
+ import { parseArgs } from "node:util";
11
+
12
+ import { QUERY_LOG, SKILL_LOG } from "../constants.js";
13
+ import { buildEvalSet } from "../eval/hooks-to-evals.js";
14
+ import type {
15
+ EvalEntry,
16
+ EvalPassRate,
17
+ EvolutionAuditEntry,
18
+ EvolutionProposal,
19
+ QueryLogRecord,
20
+ SkillUsageRecord,
21
+ } from "../types.js";
22
+ import { readJsonl } from "../utils/jsonl.js";
23
+ import { appendAuditEntry } from "./audit.js";
24
+ import { extractFailurePatterns } from "./extract-patterns.js";
25
+ import { generateProposal } from "./propose-description.js";
26
+ import { validateProposal } from "./validate-proposal.js";
27
+ import type { ValidationResult } from "./validate-proposal.js";
28
+
29
+ // ---------------------------------------------------------------------------
30
+ // Types
31
+ // ---------------------------------------------------------------------------
32
+
33
+ export interface EvolveOptions {
34
+ skillName: string;
35
+ skillPath: string;
36
+ evalSetPath?: string;
37
+ mode: "agent" | "api";
38
+ agent?: string;
39
+ dryRun: boolean;
40
+ confidenceThreshold: number; // default 0.6
41
+ maxIterations: number; // default 3
42
+ }
43
+
44
+ export interface EvolveResult {
45
+ proposal: EvolutionProposal | null;
46
+ validation: ValidationResult | null;
47
+ deployed: boolean;
48
+ auditEntries: EvolutionAuditEntry[];
49
+ reason: string;
50
+ }
51
+
52
+ /**
53
+ * Injectable dependencies for evolve(). When omitted, the real module
54
+ * imports are used. Pass overrides in tests to avoid mock.module().
55
+ */
56
+ export interface EvolveDeps {
57
+ extractFailurePatterns?: typeof import("./extract-patterns.js").extractFailurePatterns;
58
+ generateProposal?: typeof import("./propose-description.js").generateProposal;
59
+ validateProposal?: typeof import("./validate-proposal.js").validateProposal;
60
+ appendAuditEntry?: typeof import("./audit.js").appendAuditEntry;
61
+ buildEvalSet?: typeof import("../eval/hooks-to-evals.js").buildEvalSet;
62
+ }
63
+
64
+ // ---------------------------------------------------------------------------
65
+ // Audit helper
66
+ // ---------------------------------------------------------------------------
67
+
68
+ function createAuditEntry(
69
+ proposalId: string,
70
+ action: EvolutionAuditEntry["action"],
71
+ details: string,
72
+ evalSnapshot?: EvalPassRate,
73
+ ): EvolutionAuditEntry {
74
+ return {
75
+ timestamp: new Date().toISOString(),
76
+ proposal_id: proposalId,
77
+ action,
78
+ details,
79
+ ...(evalSnapshot ? { eval_snapshot: evalSnapshot } : {}),
80
+ };
81
+ }
82
+
83
+ // ---------------------------------------------------------------------------
84
+ // Main orchestrator
85
+ // ---------------------------------------------------------------------------
86
+
87
+ export async function evolve(
88
+ options: EvolveOptions,
89
+ _deps: EvolveDeps = {},
90
+ ): Promise<EvolveResult> {
91
+ const {
92
+ skillName,
93
+ skillPath,
94
+ evalSetPath,
95
+ mode,
96
+ agent,
97
+ dryRun,
98
+ confidenceThreshold,
99
+ maxIterations,
100
+ } = options;
101
+
102
+ // Resolve injectable dependencies with real-import fallbacks
103
+ const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
104
+ const _generateProposal = _deps.generateProposal ?? generateProposal;
105
+ const _validateProposal = _deps.validateProposal ?? validateProposal;
106
+ const _appendAuditEntry = _deps.appendAuditEntry ?? appendAuditEntry;
107
+ const _buildEvalSet = _deps.buildEvalSet ?? buildEvalSet;
108
+
109
+ const auditEntries: EvolutionAuditEntry[] = [];
110
+
111
+ function recordAudit(
112
+ proposalId: string,
113
+ action: EvolutionAuditEntry["action"],
114
+ details: string,
115
+ evalSnapshot?: EvalPassRate,
116
+ ): void {
117
+ const entry = createAuditEntry(proposalId, action, details, evalSnapshot);
118
+ auditEntries.push(entry);
119
+ try {
120
+ _appendAuditEntry(entry);
121
+ } catch {
122
+ // Fail-open: audit write failures should not break the pipeline
123
+ }
124
+ }
125
+
126
+ try {
127
+ // -----------------------------------------------------------------------
128
+ // Step 1: Read current SKILL.md
129
+ // -----------------------------------------------------------------------
130
+ if (!existsSync(skillPath)) {
131
+ return {
132
+ proposal: null,
133
+ validation: null,
134
+ deployed: false,
135
+ auditEntries,
136
+ reason: `SKILL.md not found at ${skillPath}`,
137
+ };
138
+ }
139
+
140
+ const currentDescription = readFileSync(skillPath, "utf-8");
141
+
142
+ // -----------------------------------------------------------------------
143
+ // Step 2: Load eval set
144
+ // -----------------------------------------------------------------------
145
+ let evalSet: EvalEntry[];
146
+
147
+ if (evalSetPath && existsSync(evalSetPath)) {
148
+ const raw = readFileSync(evalSetPath, "utf-8");
149
+ evalSet = JSON.parse(raw) as EvalEntry[];
150
+ } else {
151
+ // Build from logs
152
+ const skillRecords = readJsonl<SkillUsageRecord>(SKILL_LOG);
153
+ const queryRecords = readJsonl<QueryLogRecord>(QUERY_LOG);
154
+ evalSet = _buildEvalSet(skillRecords, queryRecords, skillName);
155
+ }
156
+
157
+ // -----------------------------------------------------------------------
158
+ // Step 3: Load skill usage records
159
+ // -----------------------------------------------------------------------
160
+ const skillUsage = readJsonl<SkillUsageRecord>(SKILL_LOG);
161
+
162
+ // -----------------------------------------------------------------------
163
+ // Step 4: Extract failure patterns
164
+ // -----------------------------------------------------------------------
165
+ const failurePatterns = _extractFailurePatterns(evalSet, skillUsage, skillName);
166
+
167
+ // -----------------------------------------------------------------------
168
+ // Step 5: Early exit if no patterns
169
+ // -----------------------------------------------------------------------
170
+ if (failurePatterns.length === 0) {
171
+ return {
172
+ proposal: null,
173
+ validation: null,
174
+ deployed: false,
175
+ auditEntries,
176
+ reason: "No failure patterns found",
177
+ };
178
+ }
179
+
180
+ // -----------------------------------------------------------------------
181
+ // Step 6: Collect all missed queries
182
+ // -----------------------------------------------------------------------
183
+ const missedQueries = failurePatterns.flatMap((p) => p.missed_queries);
184
+
185
+ // -----------------------------------------------------------------------
186
+ // Steps 7-12: Retry loop for proposal generation and validation
187
+ // -----------------------------------------------------------------------
188
+ let lastProposal: EvolutionProposal | null = null;
189
+ let lastValidation: ValidationResult | null = null;
190
+ let feedbackReason = "";
191
+
192
+ for (let iteration = 0; iteration < maxIterations; iteration++) {
193
+ // Step 7: Generate proposal
194
+ const effectiveMissedQueries = feedbackReason
195
+ ? [...missedQueries, `[Previous attempt failed: ${feedbackReason}]`]
196
+ : missedQueries;
197
+
198
+ const proposal = await _generateProposal(
199
+ currentDescription,
200
+ failurePatterns,
201
+ effectiveMissedQueries,
202
+ skillName,
203
+ skillPath,
204
+ mode,
205
+ agent,
206
+ );
207
+
208
+ lastProposal = proposal;
209
+
210
+ // Step 8: Audit "created"
211
+ recordAudit(
212
+ proposal.proposal_id,
213
+ "created",
214
+ `Proposal created for ${skillName} (iteration ${iteration + 1})`,
215
+ );
216
+
217
+ // Step 9: Check confidence threshold
218
+ if (proposal.confidence < confidenceThreshold) {
219
+ feedbackReason = `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`;
220
+ recordAudit(
221
+ proposal.proposal_id,
222
+ "rejected",
223
+ `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
224
+ );
225
+
226
+ // If this is the last iteration, return early with rejection
227
+ if (iteration === maxIterations - 1) {
228
+ return {
229
+ proposal: lastProposal,
230
+ validation: null,
231
+ deployed: false,
232
+ auditEntries,
233
+ reason: `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
234
+ };
235
+ }
236
+
237
+ continue;
238
+ }
239
+
240
+ // Step 10: Validate against eval set
241
+ const validation = await _validateProposal(proposal, evalSet, mode, agent);
242
+ lastValidation = validation;
243
+
244
+ // Step 11: Audit "validated"
245
+ const evalSnapshot: EvalPassRate = {
246
+ total: evalSet.length,
247
+ passed: Math.round(validation.after_pass_rate * evalSet.length),
248
+ failed: evalSet.length - Math.round(validation.after_pass_rate * evalSet.length),
249
+ pass_rate: validation.after_pass_rate,
250
+ };
251
+ recordAudit(
252
+ proposal.proposal_id,
253
+ "validated",
254
+ `Validation complete: improved=${validation.improved}`,
255
+ evalSnapshot,
256
+ );
257
+
258
+ // Step 12: Check validation result
259
+ if (!validation.improved) {
260
+ feedbackReason = `Validation failed: net_change=${validation.net_change.toFixed(3)}, improved=false`;
261
+ recordAudit(
262
+ proposal.proposal_id,
263
+ "rejected",
264
+ `Validation failed: net_change=${validation.net_change.toFixed(3)}`,
265
+ );
266
+
267
+ // If this is the last iteration, return with rejection
268
+ if (iteration === maxIterations - 1) {
269
+ return {
270
+ proposal: lastProposal,
271
+ validation: lastValidation,
272
+ deployed: false,
273
+ auditEntries,
274
+ reason: `Validation failed after ${maxIterations} iterations: net_change=${validation.net_change.toFixed(3)}`,
275
+ };
276
+ }
277
+
278
+ continue;
279
+ }
280
+
281
+ // Validation passed - break out of retry loop
282
+ break;
283
+ }
284
+
285
+ // -----------------------------------------------------------------------
286
+ // Step 13: Dry run check
287
+ // -----------------------------------------------------------------------
288
+ if (dryRun) {
289
+ return {
290
+ proposal: lastProposal,
291
+ validation: lastValidation,
292
+ deployed: false,
293
+ auditEntries,
294
+ reason: "Dry run - proposal validated but not deployed",
295
+ };
296
+ }
297
+
298
+ // -----------------------------------------------------------------------
299
+ // Step 14: Deploy (actual deploy wired in TASK-14)
300
+ // -----------------------------------------------------------------------
301
+ if (lastProposal) {
302
+ recordAudit(
303
+ lastProposal.proposal_id,
304
+ "deployed",
305
+ `Deployed proposal for ${skillName}`,
306
+ lastValidation
307
+ ? {
308
+ total: evalSet.length,
309
+ passed: Math.round(lastValidation.after_pass_rate * evalSet.length),
310
+ failed: evalSet.length - Math.round(lastValidation.after_pass_rate * evalSet.length),
311
+ pass_rate: lastValidation.after_pass_rate,
312
+ }
313
+ : undefined,
314
+ );
315
+ }
316
+
317
+ // -----------------------------------------------------------------------
318
+ // Step 15-16: Return complete result
319
+ // -----------------------------------------------------------------------
320
+ return {
321
+ proposal: lastProposal,
322
+ validation: lastValidation,
323
+ deployed: true,
324
+ auditEntries,
325
+ reason: "Evolution deployed successfully",
326
+ };
327
+ } catch (error) {
328
+ // Robust error handling: catch any unexpected errors and return gracefully
329
+ const errorMessage = error instanceof Error ? error.message : String(error);
330
+ return {
331
+ proposal: null,
332
+ validation: null,
333
+ deployed: false,
334
+ auditEntries,
335
+ reason: `Error during evolution: ${errorMessage}`,
336
+ };
337
+ }
338
+ }
339
+
340
+ // ---------------------------------------------------------------------------
341
+ // CLI entry point
342
+ // ---------------------------------------------------------------------------
343
+
344
+ export async function cliMain(): Promise<void> {
345
+ const { values } = parseArgs({
346
+ options: {
347
+ skill: { type: "string" },
348
+ "skill-path": { type: "string" },
349
+ "eval-set": { type: "string" },
350
+ mode: { type: "string", default: "agent" },
351
+ agent: { type: "string" },
352
+ "dry-run": { type: "boolean", default: false },
353
+ confidence: { type: "string", default: "0.6" },
354
+ "max-iterations": { type: "string", default: "3" },
355
+ help: { type: "boolean", default: false },
356
+ },
357
+ strict: true,
358
+ });
359
+
360
+ if (values.help) {
361
+ console.log(`selftune evolve — Evolve a skill description via failure patterns
362
+
363
+ Usage:
364
+ selftune evolve --skill <name> --skill-path <path> [options]
365
+
366
+ Options:
367
+ --skill Skill name (required)
368
+ --skill-path Path to SKILL.md (required)
369
+ --eval-set Path to eval set JSON (optional, builds from logs if omitted)
370
+ --mode Execution mode: "agent" or "api" (default: "agent")
371
+ --agent Agent CLI to use (claude, codex, opencode)
372
+ --dry-run Validate proposal without deploying
373
+ --confidence Confidence threshold 0.0-1.0 (default: 0.6)
374
+ --max-iterations Max retry iterations (default: 3)
375
+ --help Show this help message`);
376
+ process.exit(0);
377
+ }
378
+
379
+ if (!values.skill || !values["skill-path"]) {
380
+ console.error("[ERROR] --skill and --skill-path are required");
381
+ process.exit(1);
382
+ }
383
+
384
+ const mode = values.mode === "api" ? "api" : "agent";
385
+
386
+ const result = await evolve({
387
+ skillName: values.skill,
388
+ skillPath: values["skill-path"],
389
+ evalSetPath: values["eval-set"],
390
+ mode,
391
+ agent: values.agent,
392
+ dryRun: values["dry-run"] ?? false,
393
+ confidenceThreshold: Number.parseFloat(values.confidence ?? "0.6"),
394
+ maxIterations: Number.parseInt(values["max-iterations"] ?? "3", 10),
395
+ });
396
+
397
+ console.log(JSON.stringify(result, null, 2));
398
+ process.exit(result.deployed ? 0 : 1);
399
+ }
400
+
401
+ if (import.meta.main) {
402
+ cliMain().catch((err) => {
403
+ console.error(`[FATAL] ${err}`);
404
+ process.exit(1);
405
+ });
406
+ }