selftune 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/README.md +259 -0
  3. package/bin/selftune.cjs +29 -0
  4. package/cli/selftune/constants.ts +71 -0
  5. package/cli/selftune/eval/hooks-to-evals.ts +422 -0
  6. package/cli/selftune/evolution/audit.ts +44 -0
  7. package/cli/selftune/evolution/deploy-proposal.ts +244 -0
  8. package/cli/selftune/evolution/evolve.ts +406 -0
  9. package/cli/selftune/evolution/extract-patterns.ts +145 -0
  10. package/cli/selftune/evolution/propose-description.ts +146 -0
  11. package/cli/selftune/evolution/rollback.ts +242 -0
  12. package/cli/selftune/evolution/stopping-criteria.ts +69 -0
  13. package/cli/selftune/evolution/validate-proposal.ts +137 -0
  14. package/cli/selftune/grading/grade-session.ts +459 -0
  15. package/cli/selftune/hooks/prompt-log.ts +52 -0
  16. package/cli/selftune/hooks/session-stop.ts +54 -0
  17. package/cli/selftune/hooks/skill-eval.ts +73 -0
  18. package/cli/selftune/index.ts +104 -0
  19. package/cli/selftune/ingestors/codex-rollout.ts +416 -0
  20. package/cli/selftune/ingestors/codex-wrapper.ts +332 -0
  21. package/cli/selftune/ingestors/opencode-ingest.ts +565 -0
  22. package/cli/selftune/init.ts +297 -0
  23. package/cli/selftune/monitoring/watch.ts +328 -0
  24. package/cli/selftune/observability.ts +255 -0
  25. package/cli/selftune/types.ts +255 -0
  26. package/cli/selftune/utils/jsonl.ts +75 -0
  27. package/cli/selftune/utils/llm-call.ts +192 -0
  28. package/cli/selftune/utils/logging.ts +40 -0
  29. package/cli/selftune/utils/schema-validator.ts +47 -0
  30. package/cli/selftune/utils/seeded-random.ts +31 -0
  31. package/cli/selftune/utils/transcript.ts +260 -0
  32. package/package.json +29 -0
  33. package/skill/SKILL.md +120 -0
  34. package/skill/Workflows/Doctor.md +145 -0
  35. package/skill/Workflows/Evals.md +193 -0
  36. package/skill/Workflows/Evolve.md +159 -0
  37. package/skill/Workflows/Grade.md +157 -0
  38. package/skill/Workflows/Ingest.md +159 -0
  39. package/skill/Workflows/Initialize.md +125 -0
  40. package/skill/Workflows/Rollback.md +131 -0
  41. package/skill/Workflows/Watch.md +128 -0
  42. package/skill/references/grading-methodology.md +176 -0
  43. package/skill/references/invocation-taxonomy.md +144 -0
  44. package/skill/references/logs.md +168 -0
  45. package/skill/settings_snippet.json +41 -0
@@ -0,0 +1,255 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Observability and diagnosability surfaces for selftune.
4
+ *
5
+ * Provides:
6
+ * - Structured health checks (doctor command)
7
+ * - Log file integrity verification
8
+ * - Hook installation checks
9
+ */
10
+
11
+ import { execSync } from "node:child_process";
12
+ import { existsSync, readFileSync } from "node:fs";
13
+ import { homedir } from "node:os";
14
+ import { join } from "node:path";
15
+ import { LOG_DIR, REQUIRED_FIELDS, SELFTUNE_CONFIG_PATH } from "./constants.js";
16
+ import type { DoctorResult, HealthCheck, HealthStatus, SelftuneConfig } from "./types.js";
17
+
18
+ const VALID_AGENT_TYPES = new Set(["claude_code", "codex", "opencode", "unknown"]);
19
+ const VALID_LLM_MODES = new Set(["agent", "api"]);
20
+
21
+ const LOG_FILES: Record<string, string> = {
22
+ session_telemetry: join(LOG_DIR, "session_telemetry_log.jsonl"),
23
+ skill_usage: join(LOG_DIR, "skill_usage_log.jsonl"),
24
+ all_queries: join(LOG_DIR, "all_queries_log.jsonl"),
25
+ evolution_audit: join(LOG_DIR, "evolution_audit_log.jsonl"),
26
+ };
27
+
28
+ const HOOK_FILES = ["prompt-log.ts", "session-stop.ts", "skill-eval.ts"];
29
+
30
+ /**
31
+ * Validate a JSONL file: parse each line as JSON and check that all
32
+ * `requiredFields` are present. Returns a status/message pair suitable
33
+ * for embedding in a {@link HealthCheck}.
34
+ */
35
+ function validateJsonlFile(
36
+ filePath: string,
37
+ requiredFields: Set<string>,
38
+ ): { status: HealthStatus; message: string } {
39
+ let lineCount = 0;
40
+ let parseErrors = 0;
41
+ let schemaErrors = 0;
42
+
43
+ const content = readFileSync(filePath, "utf-8");
44
+ for (const line of content.split("\n")) {
45
+ const trimmed = line.trim();
46
+ if (!trimmed) continue;
47
+ lineCount++;
48
+ try {
49
+ const record = JSON.parse(trimmed);
50
+ const keys = new Set(Object.keys(record));
51
+ for (const field of requiredFields) {
52
+ if (!keys.has(field)) {
53
+ schemaErrors++;
54
+ break;
55
+ }
56
+ }
57
+ } catch {
58
+ parseErrors++;
59
+ }
60
+ }
61
+
62
+ if (parseErrors > 0 || schemaErrors > 0) {
63
+ return {
64
+ status: "fail",
65
+ message: `${lineCount} records, ${parseErrors} parse errors, ${schemaErrors} schema errors`,
66
+ };
67
+ }
68
+ return { status: "pass", message: `${lineCount} records, all valid` };
69
+ }
70
+
71
+ export function checkLogHealth(): HealthCheck[] {
72
+ const checks: HealthCheck[] = [];
73
+
74
+ for (const [name, path] of Object.entries(LOG_FILES)) {
75
+ const check: HealthCheck = { name: `log_${name}`, path, status: "pass", message: "" };
76
+
77
+ if (!existsSync(path)) {
78
+ check.status = "warn";
79
+ check.message = "Log file does not exist yet (no sessions captured)";
80
+ } else {
81
+ const result = validateJsonlFile(path, REQUIRED_FIELDS[name]);
82
+ check.status = result.status;
83
+ check.message = result.message;
84
+ }
85
+
86
+ checks.push(check);
87
+ }
88
+
89
+ return checks;
90
+ }
91
+
92
+ export function checkHookInstallation(): HealthCheck[] {
93
+ const checks: HealthCheck[] = [];
94
+
95
+ // Resolve the repository root so we check the actual active hooks, not bundled source files
96
+ let repoRoot: string;
97
+ try {
98
+ repoRoot = execSync("git rev-parse --show-toplevel", {
99
+ encoding: "utf-8",
100
+ timeout: 5000,
101
+ }).trim();
102
+ } catch {
103
+ // Not inside a git repo -- fall back to cwd
104
+ repoRoot = process.cwd();
105
+ }
106
+
107
+ for (const hook of HOOK_FILES) {
108
+ const hookPath = join(repoRoot, ".git", "hooks", hook);
109
+ const check: HealthCheck = {
110
+ name: `hook_${hook}`,
111
+ path: hookPath,
112
+ status: "pass",
113
+ message: "",
114
+ };
115
+ if (existsSync(hookPath)) {
116
+ check.status = "pass";
117
+ check.message = "Hook file present";
118
+ } else {
119
+ check.status = "fail";
120
+ check.message = "Hook file missing";
121
+ }
122
+ checks.push(check);
123
+ }
124
+
125
+ // Also check if hooks are configured in Claude Code settings
126
+ const settingsPath = join(homedir(), ".claude", "settings.json");
127
+ const settingsCheck: HealthCheck = {
128
+ name: "hook_settings",
129
+ path: settingsPath,
130
+ status: "pass",
131
+ message: "",
132
+ };
133
+ if (!existsSync(settingsPath)) {
134
+ settingsCheck.status = "warn";
135
+ settingsCheck.message = "Claude Code settings.json not found";
136
+ } else {
137
+ try {
138
+ const raw = readFileSync(settingsPath, "utf-8");
139
+ const settings = JSON.parse(raw);
140
+ const hooks = settings?.hooks;
141
+ if (!hooks || typeof hooks !== "object") {
142
+ settingsCheck.status = "warn";
143
+ settingsCheck.message = "No hooks section in settings.json";
144
+ } else {
145
+ const hookKeys = ["prompt-submit", "post-tool-use", "session-stop"];
146
+ const missing = hookKeys.filter((k) => {
147
+ const entries = hooks[k];
148
+ if (!Array.isArray(entries) || entries.length === 0) return true;
149
+ return !entries.some(
150
+ (e: { command?: string }) =>
151
+ typeof e.command === "string" && e.command.includes("selftune"),
152
+ );
153
+ });
154
+ if (missing.length > 0) {
155
+ settingsCheck.status = "warn";
156
+ settingsCheck.message = `Selftune hooks not configured for: ${missing.join(", ")}`;
157
+ } else {
158
+ settingsCheck.status = "pass";
159
+ settingsCheck.message = "All selftune hooks configured in settings.json";
160
+ }
161
+ }
162
+ } catch {
163
+ settingsCheck.status = "warn";
164
+ settingsCheck.message = "Could not parse settings.json";
165
+ }
166
+ }
167
+ checks.push(settingsCheck);
168
+
169
+ return checks;
170
+ }
171
+
172
+ export function checkEvolutionHealth(): HealthCheck[] {
173
+ const auditPath = LOG_FILES.evolution_audit;
174
+ const check: HealthCheck = {
175
+ name: "evolution_audit",
176
+ path: auditPath,
177
+ status: "pass",
178
+ message: "",
179
+ };
180
+
181
+ if (!existsSync(auditPath)) {
182
+ check.status = "warn";
183
+ check.message = "Evolution audit log does not exist yet (no evolution runs)";
184
+ } else {
185
+ const result = validateJsonlFile(auditPath, REQUIRED_FIELDS.evolution_audit);
186
+ check.status = result.status;
187
+ check.message = result.message;
188
+ }
189
+
190
+ return [check];
191
+ }
192
+
193
+ export function checkConfigHealth(): HealthCheck[] {
194
+ const check: HealthCheck = {
195
+ name: "config",
196
+ path: SELFTUNE_CONFIG_PATH,
197
+ status: "pass",
198
+ message: "",
199
+ };
200
+
201
+ if (!existsSync(SELFTUNE_CONFIG_PATH)) {
202
+ check.status = "warn";
203
+ check.message = "Config not found. Run 'selftune init' to bootstrap.";
204
+ } else {
205
+ try {
206
+ const raw = readFileSync(SELFTUNE_CONFIG_PATH, "utf-8");
207
+ const config = JSON.parse(raw) as SelftuneConfig;
208
+ const errors: string[] = [];
209
+ if (!config.agent_type || !VALID_AGENT_TYPES.has(config.agent_type)) {
210
+ errors.push(`invalid agent_type: ${JSON.stringify(config.agent_type)}`);
211
+ }
212
+ if (!config.llm_mode || !VALID_LLM_MODES.has(config.llm_mode)) {
213
+ errors.push(`invalid llm_mode: ${JSON.stringify(config.llm_mode)}`);
214
+ }
215
+ if (errors.length > 0) {
216
+ check.status = "fail";
217
+ check.message = errors.join("; ");
218
+ } else {
219
+ check.status = "pass";
220
+ check.message = `agent_type=${config.agent_type}, llm_mode=${config.llm_mode}`;
221
+ }
222
+ } catch {
223
+ check.status = "fail";
224
+ check.message = "Config file exists but is not valid JSON";
225
+ }
226
+ }
227
+
228
+ return [check];
229
+ }
230
+
231
+ export function doctor(): DoctorResult {
232
+ const allChecks = [
233
+ ...checkConfigHealth(),
234
+ ...checkLogHealth(),
235
+ ...checkHookInstallation(),
236
+ ...checkEvolutionHealth(),
237
+ ];
238
+ const passed = allChecks.filter((c) => c.status === "pass").length;
239
+ const failed = allChecks.filter((c) => c.status === "fail").length;
240
+ const warned = allChecks.filter((c) => c.status === "warn").length;
241
+
242
+ return {
243
+ command: "doctor",
244
+ timestamp: new Date().toISOString(),
245
+ checks: allChecks,
246
+ summary: { pass: passed, fail: failed, warn: warned, total: allChecks.length },
247
+ healthy: failed === 0,
248
+ };
249
+ }
250
+
251
+ if (import.meta.main) {
252
+ const result = doctor();
253
+ console.log(JSON.stringify(result, null, 2));
254
+ process.exit(result.healthy ? 0 : 1);
255
+ }
@@ -0,0 +1,255 @@
1
+ /**
2
+ * Shared interfaces for selftune telemetry, eval, and grading.
3
+ */
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // Config types (written to ~/.selftune/config.json)
7
+ // ---------------------------------------------------------------------------
8
+
9
+ export interface SelftuneConfig {
10
+ agent_type: "claude_code" | "codex" | "opencode" | "unknown";
11
+ cli_path: string;
12
+ llm_mode: "agent" | "api";
13
+ agent_cli: string | null;
14
+ hooks_installed: boolean;
15
+ initialized_at: string;
16
+ }
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Log record types (written to ~/.claude/*.jsonl)
20
+ // ---------------------------------------------------------------------------
21
+
22
+ export interface QueryLogRecord {
23
+ timestamp: string;
24
+ session_id: string;
25
+ query: string;
26
+ source?: string;
27
+ }
28
+
29
+ export interface SkillUsageRecord {
30
+ timestamp: string;
31
+ session_id: string;
32
+ skill_name: string;
33
+ skill_path: string;
34
+ query: string;
35
+ triggered: boolean;
36
+ source?: string;
37
+ }
38
+
39
+ export interface SessionTelemetryRecord {
40
+ timestamp: string;
41
+ session_id: string;
42
+ cwd: string;
43
+ transcript_path: string;
44
+ tool_calls: Record<string, number>;
45
+ total_tool_calls: number;
46
+ bash_commands: string[];
47
+ skills_triggered: string[];
48
+ assistant_turns: number;
49
+ errors_encountered: number;
50
+ transcript_chars: number;
51
+ last_user_query: string;
52
+ source?: string;
53
+ input_tokens?: number;
54
+ output_tokens?: number;
55
+ agent_summary?: string;
56
+ rollout_path?: string;
57
+ }
58
+
59
+ // ---------------------------------------------------------------------------
60
+ // Transcript parsing
61
+ // ---------------------------------------------------------------------------
62
+
63
+ export interface TranscriptMetrics {
64
+ tool_calls: Record<string, number>;
65
+ total_tool_calls: number;
66
+ bash_commands: string[];
67
+ skills_triggered: string[];
68
+ assistant_turns: number;
69
+ errors_encountered: number;
70
+ transcript_chars: number;
71
+ last_user_query: string;
72
+ }
73
+
74
+ // ---------------------------------------------------------------------------
75
+ // Hook payloads (received via stdin from Claude Code)
76
+ // ---------------------------------------------------------------------------
77
+
78
+ export interface PromptSubmitPayload {
79
+ user_prompt: string;
80
+ session_id?: string;
81
+ }
82
+
83
+ export interface PostToolUsePayload {
84
+ tool_name: string;
85
+ tool_input: Record<string, unknown>;
86
+ session_id?: string;
87
+ transcript_path?: string;
88
+ }
89
+
90
+ export interface StopPayload {
91
+ session_id?: string;
92
+ transcript_path?: string;
93
+ cwd?: string;
94
+ }
95
+
96
+ // ---------------------------------------------------------------------------
97
+ // Eval types
98
+ // ---------------------------------------------------------------------------
99
+
100
+ export type InvocationType = "explicit" | "implicit" | "contextual" | "negative";
101
+
102
+ export interface EvalEntry {
103
+ query: string;
104
+ should_trigger: boolean;
105
+ invocation_type?: InvocationType;
106
+ }
107
+
108
+ // ---------------------------------------------------------------------------
109
+ // Grading types
110
+ // ---------------------------------------------------------------------------
111
+
112
+ export interface GradingExpectation {
113
+ text: string;
114
+ passed: boolean;
115
+ evidence: string;
116
+ }
117
+
118
+ export interface GradingClaim {
119
+ claim: string;
120
+ type: "factual" | "process" | "quality";
121
+ verified: boolean;
122
+ evidence: string;
123
+ }
124
+
125
+ export interface GradingSummary {
126
+ passed: number;
127
+ failed: number;
128
+ total: number;
129
+ pass_rate: number;
130
+ }
131
+
132
+ /** Raw output from the LLM grader (before assembly into GradingResult). */
133
+ export interface GraderOutput {
134
+ expectations: GradingExpectation[];
135
+ summary: GradingSummary;
136
+ claims: GradingClaim[];
137
+ eval_feedback: EvalFeedback;
138
+ }
139
+
140
+ export interface EvalFeedback {
141
+ suggestions: Array<{ assertion: string; reason: string }>;
142
+ overall: string;
143
+ }
144
+
145
+ export interface GradingResult {
146
+ session_id: string;
147
+ skill_name: string;
148
+ transcript_path: string;
149
+ graded_at: string;
150
+ expectations: GradingExpectation[];
151
+ summary: GradingSummary;
152
+ execution_metrics: ExecutionMetrics;
153
+ claims: GradingClaim[];
154
+ eval_feedback: EvalFeedback;
155
+ }
156
+
157
+ export interface ExecutionMetrics {
158
+ tool_calls: Record<string, number>;
159
+ total_tool_calls: number;
160
+ total_steps: number;
161
+ bash_commands_run: number;
162
+ errors_encountered: number;
163
+ skills_triggered: string[];
164
+ transcript_chars: number;
165
+ }
166
+
167
+ // ---------------------------------------------------------------------------
168
+ // Health check types
169
+ // ---------------------------------------------------------------------------
170
+
171
+ export type HealthStatus = "pass" | "fail" | "warn";
172
+
173
+ export interface HealthCheck {
174
+ name: string;
175
+ path: string;
176
+ status: HealthStatus;
177
+ message: string;
178
+ }
179
+
180
+ export interface DoctorResult {
181
+ command: string;
182
+ timestamp: string;
183
+ checks: HealthCheck[];
184
+ summary: { pass: number; fail: number; warn: number; total: number };
185
+ healthy: boolean;
186
+ }
187
+
188
+ // ---------------------------------------------------------------------------
189
+ // Evolution types (v0.3)
190
+ // ---------------------------------------------------------------------------
191
+
192
+ export interface FailurePattern {
193
+ pattern_id: string;
194
+ skill_name: string;
195
+ invocation_type: InvocationType;
196
+ missed_queries: string[];
197
+ frequency: number;
198
+ sample_sessions: string[];
199
+ extracted_at: string;
200
+ }
201
+
202
+ export interface EvolutionProposal {
203
+ proposal_id: string;
204
+ skill_name: string;
205
+ skill_path: string;
206
+ original_description: string;
207
+ proposed_description: string;
208
+ rationale: string;
209
+ failure_patterns: string[]; // pattern_ids
210
+ eval_results: {
211
+ before: EvalPassRate;
212
+ after: EvalPassRate;
213
+ };
214
+ confidence: number; // 0.0 - 1.0
215
+ created_at: string;
216
+ status: "pending" | "validated" | "deployed" | "rolled_back";
217
+ }
218
+
219
+ export interface EvalPassRate {
220
+ total: number;
221
+ passed: number;
222
+ failed: number;
223
+ pass_rate: number; // 0.0 to 1.0
224
+ }
225
+
226
+ export interface EvolutionAuditEntry {
227
+ timestamp: string;
228
+ proposal_id: string;
229
+ action: "created" | "validated" | "deployed" | "rolled_back" | "rejected";
230
+ details: string;
231
+ eval_snapshot?: EvalPassRate;
232
+ }
233
+
234
+ export interface EvolutionConfig {
235
+ min_sessions: number;
236
+ min_improvement: number; // e.g., 0.10 = 10 percentage points
237
+ max_iterations: number;
238
+ confidence_threshold: number; // e.g., 0.60
239
+ dry_run: boolean;
240
+ }
241
+
242
+ // ---------------------------------------------------------------------------
243
+ // Monitoring types (v0.4)
244
+ // ---------------------------------------------------------------------------
245
+
246
+ export interface MonitoringSnapshot {
247
+ timestamp: string;
248
+ skill_name: string;
249
+ window_sessions: number;
250
+ pass_rate: number;
251
+ false_negative_rate: number;
252
+ by_invocation_type: Record<InvocationType, { passed: number; total: number }>;
253
+ regression_detected: boolean;
254
+ baseline_pass_rate: number;
255
+ }
@@ -0,0 +1,75 @@
1
+ /**
2
+ * JSONL read/write/append utilities.
3
+ */
4
+
5
+ import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
6
+ import { dirname } from "node:path";
7
+ import { createLogger } from "./logging.js";
8
+ import type { LogType } from "./schema-validator.js";
9
+ import { validateRecord } from "./schema-validator.js";
10
+
11
+ /**
12
+ * Read a JSONL file and return parsed records.
13
+ * Skips blank lines and lines that fail to parse.
14
+ */
15
+ export function readJsonl<T = Record<string, unknown>>(path: string): T[] {
16
+ if (!existsSync(path)) return [];
17
+ const content = readFileSync(path, "utf-8");
18
+ const records: T[] = [];
19
+ for (const line of content.split("\n")) {
20
+ const trimmed = line.trim();
21
+ if (!trimmed) continue;
22
+ try {
23
+ records.push(JSON.parse(trimmed) as T);
24
+ } catch {
25
+ // skip malformed lines
26
+ }
27
+ }
28
+ return records;
29
+ }
30
+
31
+ /**
32
+ * Append a single record to a JSONL file. Creates parent directories if needed.
33
+ * When logType is provided, validates the record and logs warnings on failure
34
+ * but still writes the record (fail-open: hooks must never block).
35
+ */
36
+ export function appendJsonl(path: string, record: unknown, logType?: LogType): void {
37
+ if (logType) {
38
+ const result = validateRecord(record, logType);
39
+ if (!result.valid) {
40
+ const logger = createLogger("jsonl");
41
+ for (const error of result.errors) {
42
+ logger.warn(`Validation warning for ${logType}: ${error}`);
43
+ }
44
+ }
45
+ }
46
+ const dir = dirname(path);
47
+ if (!existsSync(dir)) {
48
+ mkdirSync(dir, { recursive: true });
49
+ }
50
+ appendFileSync(path, `${JSON.stringify(record)}\n`, "utf-8");
51
+ }
52
+
53
+ /**
54
+ * Load a marker file (JSON array of strings) for idempotent ingestion.
55
+ */
56
+ export function loadMarker(path: string): Set<string> {
57
+ if (!existsSync(path)) return new Set();
58
+ try {
59
+ const data = JSON.parse(readFileSync(path, "utf-8"));
60
+ return new Set(Array.isArray(data) ? data : []);
61
+ } catch {
62
+ return new Set();
63
+ }
64
+ }
65
+
66
+ /**
67
+ * Save a marker file (sorted JSON array of strings).
68
+ */
69
+ export function saveMarker(path: string, ingested: Set<string>): void {
70
+ const dir = dirname(path);
71
+ if (!existsSync(dir)) {
72
+ mkdirSync(dir, { recursive: true });
73
+ }
74
+ writeFileSync(path, JSON.stringify([...ingested].sort(), null, 2), "utf-8");
75
+ }