selftune 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/README.md +259 -0
  3. package/bin/selftune.cjs +29 -0
  4. package/cli/selftune/constants.ts +71 -0
  5. package/cli/selftune/eval/hooks-to-evals.ts +422 -0
  6. package/cli/selftune/evolution/audit.ts +44 -0
  7. package/cli/selftune/evolution/deploy-proposal.ts +244 -0
  8. package/cli/selftune/evolution/evolve.ts +406 -0
  9. package/cli/selftune/evolution/extract-patterns.ts +145 -0
  10. package/cli/selftune/evolution/propose-description.ts +146 -0
  11. package/cli/selftune/evolution/rollback.ts +242 -0
  12. package/cli/selftune/evolution/stopping-criteria.ts +69 -0
  13. package/cli/selftune/evolution/validate-proposal.ts +137 -0
  14. package/cli/selftune/grading/grade-session.ts +459 -0
  15. package/cli/selftune/hooks/prompt-log.ts +52 -0
  16. package/cli/selftune/hooks/session-stop.ts +54 -0
  17. package/cli/selftune/hooks/skill-eval.ts +73 -0
  18. package/cli/selftune/index.ts +104 -0
  19. package/cli/selftune/ingestors/codex-rollout.ts +416 -0
  20. package/cli/selftune/ingestors/codex-wrapper.ts +332 -0
  21. package/cli/selftune/ingestors/opencode-ingest.ts +565 -0
  22. package/cli/selftune/init.ts +297 -0
  23. package/cli/selftune/monitoring/watch.ts +328 -0
  24. package/cli/selftune/observability.ts +255 -0
  25. package/cli/selftune/types.ts +255 -0
  26. package/cli/selftune/utils/jsonl.ts +75 -0
  27. package/cli/selftune/utils/llm-call.ts +192 -0
  28. package/cli/selftune/utils/logging.ts +40 -0
  29. package/cli/selftune/utils/schema-validator.ts +47 -0
  30. package/cli/selftune/utils/seeded-random.ts +31 -0
  31. package/cli/selftune/utils/transcript.ts +260 -0
  32. package/package.json +29 -0
  33. package/skill/SKILL.md +120 -0
  34. package/skill/Workflows/Doctor.md +145 -0
  35. package/skill/Workflows/Evals.md +193 -0
  36. package/skill/Workflows/Evolve.md +159 -0
  37. package/skill/Workflows/Grade.md +157 -0
  38. package/skill/Workflows/Ingest.md +159 -0
  39. package/skill/Workflows/Initialize.md +125 -0
  40. package/skill/Workflows/Rollback.md +131 -0
  41. package/skill/Workflows/Watch.md +128 -0
  42. package/skill/references/grading-methodology.md +176 -0
  43. package/skill/references/invocation-taxonomy.md +144 -0
  44. package/skill/references/logs.md +168 -0
  45. package/skill/settings_snippet.json +41 -0
@@ -0,0 +1,297 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * selftune init — Bootstrap agent identity and write config.
4
+ *
5
+ * Detects the coding agent environment, resolves the CLI path,
6
+ * determines LLM mode, checks hook installation, and writes
7
+ * the result to ~/.selftune/config.json.
8
+ *
9
+ * Usage:
10
+ * selftune init [--agent <type>] [--cli-path <path>] [--llm-mode <mode>] [--force]
11
+ */
12
+
13
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
14
+ import { homedir } from "node:os";
15
+ import { dirname, join, resolve } from "node:path";
16
+ import { fileURLToPath } from "node:url";
17
+ import { parseArgs } from "node:util";
18
+
19
+ import { SELFTUNE_CONFIG_DIR, SELFTUNE_CONFIG_PATH } from "./constants.js";
20
+ import type { SelftuneConfig } from "./types.js";
21
+ import { detectAgent } from "./utils/llm-call.js";
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Agent type detection
25
+ // ---------------------------------------------------------------------------
26
+
27
+ /**
28
+ * Detect which coding agent environment we are running inside.
29
+ *
30
+ * Detection order:
31
+ * 1. Claude Code — ~/.claude/ directory exists AND (`which claude` OR env signals)
32
+ * 2. Codex — $CODEX_HOME set OR `which codex`
33
+ * 3. OpenCode — ~/.local/share/opencode/opencode.db exists OR `which opencode`
34
+ * 4. "unknown" fallback
35
+ */
36
+ const VALID_AGENT_TYPES: SelftuneConfig["agent_type"][] = [
37
+ "claude_code",
38
+ "codex",
39
+ "opencode",
40
+ "unknown",
41
+ ];
42
+
43
+ export function detectAgentType(
44
+ override?: string,
45
+ homeOverride?: string,
46
+ ): SelftuneConfig["agent_type"] {
47
+ if (override) {
48
+ if (VALID_AGENT_TYPES.includes(override as SelftuneConfig["agent_type"])) {
49
+ return override as SelftuneConfig["agent_type"];
50
+ }
51
+ console.error(`[WARN] Unknown agent type "${override}", falling back to detection`);
52
+ }
53
+
54
+ const home = homeOverride ?? homedir();
55
+
56
+ // Claude Code: .claude directory + claude binary
57
+ const claudeDir = join(home, ".claude");
58
+ if (existsSync(claudeDir)) {
59
+ if (Bun.which("claude") || process.env.CLAUDE_CODE_ENTRYPOINT) {
60
+ return "claude_code";
61
+ }
62
+ }
63
+
64
+ // Codex: env var or binary
65
+ if (process.env.CODEX_HOME || Bun.which("codex")) {
66
+ return "codex";
67
+ }
68
+
69
+ // OpenCode: db file or binary
70
+ const opencodeDb = join(home, ".local", "share", "opencode", "opencode.db");
71
+ if (existsSync(opencodeDb) || Bun.which("opencode")) {
72
+ return "opencode";
73
+ }
74
+
75
+ return "unknown";
76
+ }
77
+
78
+ // ---------------------------------------------------------------------------
79
+ // CLI path resolution
80
+ // ---------------------------------------------------------------------------
81
+
82
+ /**
83
+ * Resolve the absolute path to cli/selftune/index.ts.
84
+ * Uses the directory of this file (init.ts lives alongside index.ts).
85
+ */
86
+ export function determineCliPath(override?: string): string {
87
+ if (override) return override;
88
+ return resolve(dirname(import.meta.path), "index.ts");
89
+ }
90
+
91
+ // ---------------------------------------------------------------------------
92
+ // LLM mode determination
93
+ // ---------------------------------------------------------------------------
94
+
95
+ /**
96
+ * Determine LLM mode and agent CLI based on available signals.
97
+ */
98
+ export function determineLlmMode(
99
+ agentCli: string | null,
100
+ hasApiKey?: boolean,
101
+ modeOverride?: string,
102
+ ): { llm_mode: "agent" | "api"; agent_cli: string | null } {
103
+ const detectedAgent = agentCli;
104
+ const validModes = ["agent", "api"] as const;
105
+ if (modeOverride && !validModes.includes(modeOverride as (typeof validModes)[number])) {
106
+ throw new Error(
107
+ `Invalid --llm-mode "${modeOverride}". Allowed values: ${validModes.join(", ")}`,
108
+ );
109
+ }
110
+ const resolvedMode = modeOverride as "agent" | "api" | undefined;
111
+
112
+ if (resolvedMode) {
113
+ return { llm_mode: resolvedMode, agent_cli: detectedAgent };
114
+ }
115
+
116
+ if (detectedAgent) {
117
+ return { llm_mode: "agent", agent_cli: detectedAgent };
118
+ }
119
+
120
+ if (hasApiKey) {
121
+ return { llm_mode: "api", agent_cli: null };
122
+ }
123
+
124
+ // Fallback: agent mode with null cli (will need setup)
125
+ return { llm_mode: "agent", agent_cli: null };
126
+ }
127
+
128
+ // ---------------------------------------------------------------------------
129
+ // Hook detection (Claude Code only)
130
+ // ---------------------------------------------------------------------------
131
+
132
+ const REQUIRED_HOOK_KEYS = ["prompt-submit", "post-tool-use", "session-stop"] as const;
133
+
134
+ /**
135
+ * Check if the selftune hooks are configured in Claude Code settings.
136
+ */
137
+ export function checkClaudeCodeHooks(settingsPath: string): boolean {
138
+ if (!existsSync(settingsPath)) return false;
139
+
140
+ try {
141
+ const raw = readFileSync(settingsPath, "utf-8");
142
+ const settings = JSON.parse(raw);
143
+ const hooks = settings?.hooks;
144
+ if (!hooks || typeof hooks !== "object") return false;
145
+
146
+ for (const key of REQUIRED_HOOK_KEYS) {
147
+ const entries = hooks[key];
148
+ if (!Array.isArray(entries) || entries.length === 0) return false;
149
+ // Check that at least one entry references selftune
150
+ const hasSelftune = entries.some(
151
+ (e: { command?: string }) =>
152
+ typeof e.command === "string" && e.command.includes("selftune"),
153
+ );
154
+ if (!hasSelftune) return false;
155
+ }
156
+
157
+ return true;
158
+ } catch {
159
+ return false;
160
+ }
161
+ }
162
+
163
+ // ---------------------------------------------------------------------------
164
+ // Init options (for testability)
165
+ // ---------------------------------------------------------------------------
166
+
167
+ export interface InitOptions {
168
+ configDir: string;
169
+ configPath: string;
170
+ force: boolean;
171
+ agentOverride?: string;
172
+ cliPathOverride?: string;
173
+ llmModeOverride?: string;
174
+ homeDir?: string;
175
+ }
176
+
177
+ // ---------------------------------------------------------------------------
178
+ // Core init logic
179
+ // ---------------------------------------------------------------------------
180
+
181
+ /**
182
+ * Run the init flow. Returns the written (or existing) config.
183
+ * Extracted as a pure function for testability.
184
+ */
185
+ export function runInit(opts: InitOptions): SelftuneConfig {
186
+ const { configDir, configPath, force } = opts;
187
+
188
+ // If config exists and no --force, return existing
189
+ if (!force && existsSync(configPath)) {
190
+ const raw = readFileSync(configPath, "utf-8");
191
+ try {
192
+ return JSON.parse(raw) as SelftuneConfig;
193
+ } catch (err) {
194
+ throw new Error(
195
+ `Config file at ${configPath} contains invalid JSON. Delete it or use --force to reinitialize. Cause: ${err instanceof Error ? err.message : String(err)}`,
196
+ );
197
+ }
198
+ }
199
+
200
+ // Detect agent type
201
+ const agentType = detectAgentType(opts.agentOverride, opts.homeDir);
202
+
203
+ // Resolve CLI path
204
+ const cliPath = determineCliPath(opts.cliPathOverride);
205
+
206
+ // Detect agent CLI
207
+ const agentCli = detectAgent();
208
+
209
+ // Determine LLM mode
210
+ const hasApiKey = Boolean(process.env.ANTHROPIC_API_KEY);
211
+ const { llm_mode, agent_cli } = determineLlmMode(agentCli, hasApiKey, opts.llmModeOverride);
212
+
213
+ // Check hooks (Claude Code only)
214
+ const home = opts.homeDir ?? homedir();
215
+ const settingsPath = join(home, ".claude", "settings.json");
216
+ const hooksInstalled = agentType === "claude_code" ? checkClaudeCodeHooks(settingsPath) : false;
217
+
218
+ const config: SelftuneConfig = {
219
+ agent_type: agentType,
220
+ cli_path: cliPath,
221
+ llm_mode,
222
+ agent_cli,
223
+ hooks_installed: hooksInstalled,
224
+ initialized_at: new Date().toISOString(),
225
+ };
226
+
227
+ // Write config
228
+ mkdirSync(configDir, { recursive: true });
229
+ writeFileSync(configPath, JSON.stringify(config, null, 2), "utf-8");
230
+
231
+ return config;
232
+ }
233
+
234
+ // ---------------------------------------------------------------------------
235
+ // CLI entry point
236
+ // ---------------------------------------------------------------------------
237
+
238
+ export async function cliMain(): Promise<void> {
239
+ const { values } = parseArgs({
240
+ options: {
241
+ agent: { type: "string" },
242
+ "cli-path": { type: "string" },
243
+ "llm-mode": { type: "string" },
244
+ force: { type: "boolean", default: false },
245
+ },
246
+ strict: true,
247
+ });
248
+
249
+ const configDir = SELFTUNE_CONFIG_DIR;
250
+ const configPath = SELFTUNE_CONFIG_PATH;
251
+ const force = values.force ?? false;
252
+
253
+ // Check for existing config without force
254
+ if (!force && existsSync(configPath)) {
255
+ try {
256
+ const raw = readFileSync(configPath, "utf-8");
257
+ const existing = JSON.parse(raw) as SelftuneConfig;
258
+ console.log(JSON.stringify(existing, null, 2));
259
+ console.error("Already initialized. Use --force to reinitialize.");
260
+ process.exit(0);
261
+ } catch (err) {
262
+ console.error(
263
+ `[WARN] Config at ${configPath} is corrupted: ${err instanceof Error ? err.message : String(err)}. Reinitializing...`,
264
+ );
265
+ }
266
+ }
267
+
268
+ const config = runInit({
269
+ configDir,
270
+ configPath,
271
+ force,
272
+ agentOverride: values.agent,
273
+ cliPathOverride: values["cli-path"],
274
+ llmModeOverride: values["llm-mode"],
275
+ });
276
+
277
+ console.log(JSON.stringify(config, null, 2));
278
+
279
+ // Run doctor as post-check
280
+ const { doctor } = await import("./observability.js");
281
+ const doctorResult = doctor();
282
+ console.error(
283
+ `\n[doctor] ${doctorResult.summary.pass}/${doctorResult.summary.total} checks pass`,
284
+ );
285
+ }
286
+
287
+ // Guard: only run when invoked directly
288
+ const isMain =
289
+ (import.meta as Record<string, unknown>).main === true ||
290
+ process.argv[1] === fileURLToPath(import.meta.url);
291
+
292
+ if (isMain) {
293
+ cliMain().catch((err) => {
294
+ console.error(`[FATAL] ${err}`);
295
+ process.exit(1);
296
+ });
297
+ }
@@ -0,0 +1,328 @@
1
+ /**
2
+ * Post-deploy monitoring: compute snapshots and detect regressions (TASK-16).
3
+ *
4
+ * Exports:
5
+ * - computeMonitoringSnapshot (pure function, deterministic)
6
+ * - watch (reads log files, computes snapshot, optionally rolls back)
7
+ */
8
+
9
+ import { parseArgs } from "node:util";
10
+
11
+ import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
12
+ import { getLastDeployedProposal } from "../evolution/audit.js";
13
+ import type {
14
+ InvocationType,
15
+ MonitoringSnapshot,
16
+ QueryLogRecord,
17
+ SessionTelemetryRecord,
18
+ SkillUsageRecord,
19
+ } from "../types.js";
20
+ import { readJsonl } from "../utils/jsonl.js";
21
+
22
+ // ---------------------------------------------------------------------------
23
+ // Public interfaces
24
+ // ---------------------------------------------------------------------------
25
+
26
+ export interface WatchOptions {
27
+ skillName: string;
28
+ skillPath: string;
29
+ windowSessions: number;
30
+ regressionThreshold: number;
31
+ autoRollback: boolean;
32
+ /** Injected log paths for testing (override defaults). */
33
+ _telemetryLogPath?: string;
34
+ _skillLogPath?: string;
35
+ _queryLogPath?: string;
36
+ _auditLogPath?: string;
37
+ /** Injected rollback function for testing. */
38
+ _rollbackFn?: (opts: {
39
+ skillName: string;
40
+ skillPath: string;
41
+ proposalId?: string;
42
+ }) => Promise<{ rolledBack: boolean; restoredDescription: string; reason: string }>;
43
+ }
44
+
45
+ export interface WatchResult {
46
+ snapshot: MonitoringSnapshot;
47
+ alert: string | null;
48
+ rolledBack: boolean;
49
+ recommendation: string;
50
+ }
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // Constants
54
+ // ---------------------------------------------------------------------------
55
+
56
+ const DEFAULT_BASELINE_PASS_RATE = 0.5;
57
+ const DEFAULT_REGRESSION_THRESHOLD = 0.1;
58
+
59
+ // ---------------------------------------------------------------------------
60
+ // computeMonitoringSnapshot - pure function
61
+ // ---------------------------------------------------------------------------
62
+
63
+ /**
64
+ * Compute a monitoring snapshot from raw log records.
65
+ *
66
+ * The function windows telemetry to the last `windowSessions` entries, then
67
+ * scopes skill and query records to those sessions. If telemetry is empty or
68
+ * no records match the windowed session IDs, all provided skill/query records
69
+ * are used directly (unfiltered by session).
70
+ *
71
+ * @param skillName - The skill to monitor
72
+ * @param telemetry - All session telemetry records
73
+ * @param skillRecords - All skill usage records
74
+ * @param queryRecords - All query log records
75
+ * @param windowSessions - Max number of recent sessions to consider
76
+ * @param baselinePassRate - The baseline pass rate for regression detection
77
+ * @param regressionThreshold - Drop below baseline minus this triggers regression (default 0.10)
78
+ */
79
+ export function computeMonitoringSnapshot(
80
+ skillName: string,
81
+ telemetry: SessionTelemetryRecord[],
82
+ skillRecords: SkillUsageRecord[],
83
+ queryRecords: QueryLogRecord[],
84
+ windowSessions: number,
85
+ baselinePassRate: number,
86
+ regressionThreshold: number = DEFAULT_REGRESSION_THRESHOLD,
87
+ ): MonitoringSnapshot {
88
+ // 1. Window the telemetry to the last N sessions (by array order, assumed chronological)
89
+ const windowedTelemetry = telemetry.slice(-windowSessions);
90
+ const windowedSessionIds = new Set(windowedTelemetry.map((t) => t.session_id));
91
+
92
+ // 2. Filter skill records by skill name first
93
+ const skillNameFiltered = skillRecords.filter((r) => r.skill_name === skillName);
94
+
95
+ // 3. Apply session ID windowing only if telemetry is present and overlaps
96
+ const hasSessionOverlap =
97
+ windowedSessionIds.size > 0 &&
98
+ (skillNameFiltered.some((r) => windowedSessionIds.has(r.session_id)) ||
99
+ queryRecords.some((r) => windowedSessionIds.has(r.session_id)));
100
+
101
+ const filteredSkillRecords = hasSessionOverlap
102
+ ? skillNameFiltered.filter((r) => windowedSessionIds.has(r.session_id))
103
+ : skillNameFiltered;
104
+
105
+ const filteredQueryRecords = hasSessionOverlap
106
+ ? queryRecords.filter((r) => windowedSessionIds.has(r.session_id))
107
+ : queryRecords;
108
+
109
+ // 4. Compute pass rate: triggered_count / total_query_count
110
+ const triggeredCount = filteredSkillRecords.filter((r) => r.triggered).length;
111
+ const totalQueries = filteredQueryRecords.length;
112
+ const passRate = totalQueries === 0 ? 1.0 : triggeredCount / totalQueries;
113
+
114
+ // 5. Compute false negative rate from skill usage records
115
+ const totalSkillChecks = filteredSkillRecords.length;
116
+ const falseNegatives = filteredSkillRecords.filter((r) => !r.triggered).length;
117
+ const falseNegativeRate = totalSkillChecks === 0 ? 0 : falseNegatives / totalSkillChecks;
118
+
119
+ // 6. by_invocation_type: MVP classifies everything as "implicit"
120
+ const byInvocationType: Record<InvocationType, { passed: number; total: number }> = {
121
+ explicit: { passed: 0, total: 0 },
122
+ implicit: { passed: triggeredCount, total: totalSkillChecks },
123
+ contextual: { passed: 0, total: 0 },
124
+ negative: { passed: 0, total: 0 },
125
+ };
126
+
127
+ // 7. Regression detection: pass_rate < baseline - threshold
128
+ // Use rounding to avoid floating-point boundary issues (e.g. 0.8 - 0.1 = 0.7000000000000001)
129
+ const precision = 1e10;
130
+ const adjustedThreshold =
131
+ Math.round((baselinePassRate - regressionThreshold) * precision) / precision;
132
+ const roundedPassRate = Math.round(passRate * precision) / precision;
133
+ const regressionDetected = roundedPassRate < adjustedThreshold;
134
+
135
+ return {
136
+ timestamp: new Date().toISOString(),
137
+ skill_name: skillName,
138
+ window_sessions: windowSessions,
139
+ pass_rate: passRate,
140
+ false_negative_rate: falseNegativeRate,
141
+ by_invocation_type: byInvocationType,
142
+ regression_detected: regressionDetected,
143
+ baseline_pass_rate: baselinePassRate,
144
+ };
145
+ }
146
+
147
+ // ---------------------------------------------------------------------------
148
+ // watch - reads logs, computes snapshot, optionally rolls back
149
+ // ---------------------------------------------------------------------------
150
+
151
+ /**
152
+ * Run the post-deploy monitoring check for a skill.
153
+ */
154
+ export async function watch(options: WatchOptions): Promise<WatchResult> {
155
+ const {
156
+ skillName,
157
+ skillPath,
158
+ windowSessions = 20,
159
+ regressionThreshold = DEFAULT_REGRESSION_THRESHOLD,
160
+ autoRollback = false,
161
+ _telemetryLogPath = TELEMETRY_LOG,
162
+ _skillLogPath = SKILL_LOG,
163
+ _queryLogPath = QUERY_LOG,
164
+ _auditLogPath,
165
+ _rollbackFn,
166
+ } = options;
167
+
168
+ // 1. Read log files
169
+ const telemetry = readJsonl<SessionTelemetryRecord>(_telemetryLogPath);
170
+ const skillRecords = readJsonl<SkillUsageRecord>(_skillLogPath);
171
+ const queryRecords = readJsonl<QueryLogRecord>(_queryLogPath);
172
+
173
+ // 2. Determine baseline pass rate from last deployed audit entry
174
+ const lastDeployed = getLastDeployedProposal(skillName, _auditLogPath);
175
+ const baselinePassRate = lastDeployed?.eval_snapshot?.pass_rate ?? DEFAULT_BASELINE_PASS_RATE;
176
+
177
+ // 3. Compute the monitoring snapshot (includes regression detection)
178
+ const snapshot = computeMonitoringSnapshot(
179
+ skillName,
180
+ telemetry,
181
+ skillRecords,
182
+ queryRecords,
183
+ windowSessions,
184
+ baselinePassRate,
185
+ regressionThreshold,
186
+ );
187
+
188
+ // 4. Build alert and recommendation
189
+ let alert: string | null = null;
190
+ let rolledBack = false;
191
+ let recommendation: string;
192
+
193
+ if (snapshot.regression_detected) {
194
+ alert = `regression detected for "${skillName}": pass_rate=${snapshot.pass_rate.toFixed(2)} below baseline=${baselinePassRate.toFixed(2)} minus threshold=${regressionThreshold.toFixed(2)}`;
195
+
196
+ // 5. Auto-rollback if enabled
197
+ if (autoRollback) {
198
+ const rollbackFn = _rollbackFn ?? (await loadRollbackFn());
199
+ const proposalId = lastDeployed?.proposal_id;
200
+ const rollbackResult = await rollbackFn({
201
+ skillName,
202
+ skillPath,
203
+ proposalId,
204
+ });
205
+ rolledBack = rollbackResult.rolledBack;
206
+ }
207
+
208
+ recommendation = rolledBack
209
+ ? `Rolled back "${skillName}" to previous version. Monitor to confirm recovery.`
210
+ : `Consider running: selftune rollback --skill "${skillName}" --skill-path "${skillPath}"`;
211
+ } else {
212
+ recommendation = `Skill "${skillName}" is stable. Pass rate ${snapshot.pass_rate.toFixed(2)} is within acceptable range of baseline ${baselinePassRate.toFixed(2)}.`;
213
+ }
214
+
215
+ return {
216
+ snapshot,
217
+ alert,
218
+ rolledBack,
219
+ recommendation,
220
+ };
221
+ }
222
+
223
+ // ---------------------------------------------------------------------------
224
+ // Lazy rollback loader (avoids import if rollback.ts doesn't exist yet)
225
+ // ---------------------------------------------------------------------------
226
+
227
+ async function loadRollbackFn(): Promise<
228
+ (opts: {
229
+ skillName: string;
230
+ skillPath: string;
231
+ proposalId?: string;
232
+ }) => Promise<{ rolledBack: boolean; restoredDescription: string; reason: string }>
233
+ > {
234
+ try {
235
+ const mod = await import("../evolution/rollback.js");
236
+ return mod.rollback;
237
+ } catch (error: unknown) {
238
+ // Only suppress module-resolution failures; rethrow syntax/runtime errors
239
+ const code = (error as NodeJS.ErrnoException)?.code;
240
+ if (code === "ERR_MODULE_NOT_FOUND" || code === "MODULE_NOT_FOUND") {
241
+ return async () => ({
242
+ rolledBack: false,
243
+ restoredDescription: "",
244
+ reason: "Rollback module not available",
245
+ });
246
+ }
247
+ throw error;
248
+ }
249
+ }
250
+
251
+ // ---------------------------------------------------------------------------
252
+ // CLI entry point
253
+ // ---------------------------------------------------------------------------
254
+
255
+ export async function cliMain(): Promise<void> {
256
+ const { values } = parseArgs({
257
+ options: {
258
+ skill: { type: "string" },
259
+ "skill-path": { type: "string" },
260
+ window: { type: "string", default: "20" },
261
+ threshold: { type: "string", default: "0.1" },
262
+ "auto-rollback": { type: "boolean", default: false },
263
+ help: { type: "boolean", default: false },
264
+ },
265
+ strict: true,
266
+ });
267
+
268
+ if (values.help) {
269
+ console.log(`selftune watch — Monitor post-deploy skill health
270
+
271
+ Usage:
272
+ selftune watch --skill <name> --skill-path <path> [options]
273
+
274
+ Options:
275
+ --skill Skill name (required)
276
+ --skill-path Path to SKILL.md (required)
277
+ --window Number of recent sessions to consider (default: 20)
278
+ --threshold Regression threshold below baseline (default: 0.1)
279
+ --auto-rollback Automatically rollback on regression detection
280
+ --help Show this help message`);
281
+ process.exit(0);
282
+ }
283
+
284
+ if (!values.skill || !values["skill-path"]) {
285
+ console.error("[ERROR] --skill and --skill-path are required");
286
+ process.exit(1);
287
+ }
288
+
289
+ const rawWindow = values.window ?? "20";
290
+ if (!/^\d+$/.test(rawWindow)) {
291
+ console.error("[ERROR] --window must be a positive integer >= 1");
292
+ process.exit(1);
293
+ }
294
+ const windowSessions = Number.parseInt(rawWindow, 10);
295
+ if (windowSessions < 1) {
296
+ console.error("[ERROR] --window must be a positive integer >= 1");
297
+ process.exit(1);
298
+ }
299
+
300
+ const rawThreshold = values.threshold ?? "0.1";
301
+ if (!/^\d+(\.\d+)?$/.test(rawThreshold)) {
302
+ console.error("[ERROR] --threshold must be a finite number between 0 and 1");
303
+ process.exit(1);
304
+ }
305
+ const regressionThreshold = Number.parseFloat(rawThreshold);
306
+ if (regressionThreshold < 0 || regressionThreshold > 1) {
307
+ console.error("[ERROR] --threshold must be a finite number between 0 and 1");
308
+ process.exit(1);
309
+ }
310
+
311
+ const result = await watch({
312
+ skillName: values.skill,
313
+ skillPath: values["skill-path"],
314
+ windowSessions,
315
+ regressionThreshold,
316
+ autoRollback: values["auto-rollback"] ?? false,
317
+ });
318
+
319
+ console.log(JSON.stringify(result, null, 2));
320
+ process.exit(result.alert ? 1 : 0);
321
+ }
322
+
323
+ if (import.meta.main) {
324
+ cliMain().catch((err) => {
325
+ console.error(`[FATAL] ${err}`);
326
+ process.exit(1);
327
+ });
328
+ }