selftune 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.claude/agents/diagnosis-analyst.md +146 -0
  2. package/.claude/agents/evolution-reviewer.md +167 -0
  3. package/.claude/agents/integration-guide.md +200 -0
  4. package/.claude/agents/pattern-analyst.md +147 -0
  5. package/CHANGELOG.md +37 -0
  6. package/README.md +96 -256
  7. package/assets/BeforeAfter.gif +0 -0
  8. package/assets/FeedbackLoop.gif +0 -0
  9. package/assets/logo.svg +9 -0
  10. package/assets/skill-health-badge.svg +20 -0
  11. package/cli/selftune/activation-rules.ts +171 -0
  12. package/cli/selftune/badge/badge-data.ts +108 -0
  13. package/cli/selftune/badge/badge-svg.ts +212 -0
  14. package/cli/selftune/badge/badge.ts +103 -0
  15. package/cli/selftune/constants.ts +75 -1
  16. package/cli/selftune/contribute/bundle.ts +314 -0
  17. package/cli/selftune/contribute/contribute.ts +214 -0
  18. package/cli/selftune/contribute/sanitize.ts +162 -0
  19. package/cli/selftune/cron/setup.ts +266 -0
  20. package/cli/selftune/dashboard-server.ts +582 -0
  21. package/cli/selftune/dashboard.ts +25 -3
  22. package/cli/selftune/eval/baseline.ts +247 -0
  23. package/cli/selftune/eval/composability.ts +117 -0
  24. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  25. package/cli/selftune/eval/hooks-to-evals.ts +68 -2
  26. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  27. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  28. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  29. package/cli/selftune/eval/unit-test.ts +196 -0
  30. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  31. package/cli/selftune/evolution/evolve-body.ts +492 -0
  32. package/cli/selftune/evolution/evolve.ts +466 -103
  33. package/cli/selftune/evolution/extract-patterns.ts +32 -1
  34. package/cli/selftune/evolution/pareto.ts +314 -0
  35. package/cli/selftune/evolution/propose-body.ts +171 -0
  36. package/cli/selftune/evolution/propose-description.ts +100 -2
  37. package/cli/selftune/evolution/propose-routing.ts +166 -0
  38. package/cli/selftune/evolution/refine-body.ts +141 -0
  39. package/cli/selftune/evolution/rollback.ts +19 -2
  40. package/cli/selftune/evolution/validate-body.ts +254 -0
  41. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  42. package/cli/selftune/evolution/validate-routing.ts +177 -0
  43. package/cli/selftune/grading/grade-session.ts +138 -18
  44. package/cli/selftune/grading/pre-gates.ts +104 -0
  45. package/cli/selftune/hooks/auto-activate.ts +185 -0
  46. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  47. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  48. package/cli/selftune/index.ts +88 -0
  49. package/cli/selftune/ingestors/claude-replay.ts +351 -0
  50. package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
  51. package/cli/selftune/init.ts +150 -3
  52. package/cli/selftune/memory/writer.ts +447 -0
  53. package/cli/selftune/monitoring/watch.ts +25 -2
  54. package/cli/selftune/status.ts +17 -13
  55. package/cli/selftune/types.ts +377 -5
  56. package/cli/selftune/utils/frontmatter.ts +217 -0
  57. package/cli/selftune/utils/llm-call.ts +29 -3
  58. package/cli/selftune/utils/transcript.ts +35 -0
  59. package/cli/selftune/utils/trigger-check.ts +89 -0
  60. package/cli/selftune/utils/tui.ts +156 -0
  61. package/dashboard/index.html +569 -8
  62. package/package.json +8 -4
  63. package/skill/SKILL.md +124 -8
  64. package/skill/Workflows/AutoActivation.md +144 -0
  65. package/skill/Workflows/Badge.md +118 -0
  66. package/skill/Workflows/Baseline.md +121 -0
  67. package/skill/Workflows/Composability.md +100 -0
  68. package/skill/Workflows/Contribute.md +91 -0
  69. package/skill/Workflows/Cron.md +155 -0
  70. package/skill/Workflows/Dashboard.md +203 -0
  71. package/skill/Workflows/Doctor.md +37 -1
  72. package/skill/Workflows/Evals.md +69 -1
  73. package/skill/Workflows/EvolutionMemory.md +152 -0
  74. package/skill/Workflows/Evolve.md +111 -6
  75. package/skill/Workflows/EvolveBody.md +159 -0
  76. package/skill/Workflows/ImportSkillsBench.md +111 -0
  77. package/skill/Workflows/Ingest.md +117 -3
  78. package/skill/Workflows/Initialize.md +57 -3
  79. package/skill/Workflows/Replay.md +70 -0
  80. package/skill/Workflows/Rollback.md +20 -1
  81. package/skill/Workflows/UnitTest.md +138 -0
  82. package/skill/Workflows/Watch.md +22 -0
  83. package/skill/settings_snippet.json +23 -0
  84. package/templates/activation-rules-default.json +27 -0
  85. package/templates/multi-skill-settings.json +64 -0
  86. package/templates/single-skill-settings.json +58 -0
@@ -16,6 +16,7 @@ import { TELEMETRY_LOG } from "../constants.js";
16
16
  import type {
17
17
  ExecutionMetrics,
18
18
  GraderOutput,
19
+ GradingExpectation,
19
20
  GradingResult,
20
21
  SessionTelemetryRecord,
21
22
  } from "../types.js";
@@ -26,6 +27,7 @@ import {
26
27
  callViaAgent,
27
28
  } from "../utils/llm-call.js";
28
29
  import { readExcerpt } from "../utils/transcript.js";
30
+ import { type PreGateContext, runPreGates } from "./pre-gates.js";
29
31
 
30
32
  // Re-export for backward compatibility
31
33
  export { detectAgent, stripMarkdownFences } from "../utils/llm-call.js";
@@ -48,24 +50,36 @@ export const GRADER_SYSTEM = `You are a rigorous skill session evaluator. You re
48
50
  Grade each expectation and output ONLY valid JSON matching this schema:
49
51
  {
50
52
  "expectations": [
51
- {"text": "...", "passed": true/false, "evidence": "specific quote or metric"}
53
+ {"text": "...", "passed": true/false, "evidence": "specific quote or metric", "score": 0.0-1.0}
52
54
  ],
53
- "summary": {"passed": N, "failed": N, "total": N, "pass_rate": 0.0},
55
+ "summary": {"passed": N, "failed": N, "total": N, "pass_rate": 0.0, "mean_score": 0.0},
54
56
  "claims": [
55
57
  {"claim": "...", "type": "factual|process|quality", "verified": true/false, "evidence": "..."}
56
58
  ],
57
59
  "eval_feedback": {
58
60
  "suggestions": [{"assertion": "...", "reason": "..."}],
59
61
  "overall": "one sentence"
60
- }
62
+ },
63
+ "failure_feedback": [
64
+ {"query": "the user query that failed", "failure_reason": "why it failed", "improvement_hint": "how to fix", "invocation_type": "explicit|implicit|contextual|negative"}
65
+ ]
61
66
  }
62
67
 
68
+ Score guide:
69
+ - 1.0: Clear, specific evidence of full completion
70
+ - 0.7-0.9: Strong evidence with minor gaps
71
+ - 0.4-0.6: Partial evidence or partial completion
72
+ - 0.1-0.3: Weak evidence, mostly not met
73
+ - 0.0: No evidence or clearly not met
74
+
63
75
  Rules:
64
76
  - PASS only when there is clear, specific evidence — not assumptions
65
77
  - FAIL when evidence is absent or contradictory
66
78
  - Cite exact quotes or specific metric values
67
79
  - Extract 2-4 implicit claims from the transcript and verify them
68
- - Suggest eval improvements only for clear gaps`;
80
+ - Suggest eval improvements only for clear gaps
81
+ - Set score to reflect confidence level (0.0-1.0)
82
+ - For each FAILED expectation, provide a failure_feedback entry with the relevant query, specific reason for failure, and actionable improvement hint`;
69
83
 
70
84
  // ---------------------------------------------------------------------------
71
85
  // Data lookup helpers
@@ -159,6 +173,39 @@ export function buildExecutionMetrics(telemetry: SessionTelemetryRecord): Execut
159
173
  };
160
174
  }
161
175
 
176
+ // ---------------------------------------------------------------------------
177
+ // Graduated scoring
178
+ // ---------------------------------------------------------------------------
179
+
180
+ /**
181
+ * Compute graduated scoring summary from expectations.
182
+ * Uses score field if present, defaults to 1.0 for pass, 0.0 for fail.
183
+ */
184
+ export function buildGraduatedSummary(expectations: GradingExpectation[]): {
185
+ mean_score: number;
186
+ score_std_dev: number;
187
+ } {
188
+ if (expectations.length === 0) {
189
+ return { mean_score: 0, score_std_dev: 0 };
190
+ }
191
+
192
+ const scores = expectations.map((e) => {
193
+ const fallback = e.passed ? 1.0 : 0.0;
194
+ const raw = e.score ?? fallback;
195
+ if (!Number.isFinite(raw)) return fallback;
196
+ return Math.min(1, Math.max(0, raw));
197
+ });
198
+ const mean = scores.reduce((sum, s) => sum + s, 0) / scores.length;
199
+
200
+ const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / scores.length;
201
+ const stdDev = Math.sqrt(variance);
202
+
203
+ return {
204
+ mean_score: Math.round(mean * 1000) / 1000,
205
+ score_std_dev: Math.round(stdDev * 1000) / 1000,
206
+ };
207
+ }
208
+
162
209
  // ---------------------------------------------------------------------------
163
210
  // Prompt building
164
211
  // ---------------------------------------------------------------------------
@@ -234,16 +281,31 @@ export function assembleResult(
234
281
  skillName: string,
235
282
  transcriptPath: string,
236
283
  ): GradingResult {
284
+ // Default missing scores on expectations
285
+ const expectations = (graderOutput?.expectations ?? []).map((e) => ({
286
+ ...e,
287
+ score: e.score ?? (e.passed ? 1.0 : 0.0),
288
+ source: e.source ?? ("llm" as const),
289
+ }));
290
+
291
+ const baseSummary = graderOutput?.summary ?? { passed: 0, failed: 0, total: 0, pass_rate: 0 };
292
+ const graduated = buildGraduatedSummary(expectations);
293
+
237
294
  return {
238
295
  session_id: sessionId ?? "unknown",
239
296
  skill_name: skillName ?? "unknown",
240
297
  transcript_path: transcriptPath ?? "",
241
298
  graded_at: new Date().toISOString(),
242
- expectations: graderOutput?.expectations ?? [],
243
- summary: graderOutput?.summary ?? { passed: 0, failed: 0, total: 0, pass_rate: 0 },
299
+ expectations,
300
+ summary: {
301
+ ...baseSummary,
302
+ mean_score: graduated.mean_score,
303
+ score_std_dev: graduated.score_std_dev,
304
+ },
244
305
  execution_metrics: buildExecutionMetrics(telemetry ?? ({} as SessionTelemetryRecord)),
245
306
  claims: graderOutput?.claims ?? [],
246
307
  eval_feedback: graderOutput?.eval_feedback ?? { suggestions: [], overall: "" },
308
+ failure_feedback: graderOutput?.failure_feedback,
247
309
  };
248
310
  }
249
311
 
@@ -254,10 +316,16 @@ export function assembleResult(
254
316
  function printSummary(result: GradingResult): void {
255
317
  const { summary } = result;
256
318
  const rate = summary.pass_rate ?? 0;
257
- console.log(`\nResults: ${summary.passed}/${summary.total} passed (${Math.round(rate * 100)}%)`);
319
+ const meanStr =
320
+ summary.mean_score != null ? ` | mean score: ${summary.mean_score.toFixed(2)}` : "";
321
+ console.log(
322
+ `\nResults: ${summary.passed}/${summary.total} passed (${Math.round(rate * 100)}%)${meanStr}`,
323
+ );
258
324
  for (const exp of result.expectations ?? []) {
259
325
  const icon = exp.passed ? "\u2713" : "\u2717";
260
- console.log(` ${icon} ${String(exp.text ?? "").slice(0, 70)}`);
326
+ const scoreStr = exp.score != null ? ` [${exp.score.toFixed(1)}]` : "";
327
+ const sourceStr = exp.source ? ` (${exp.source})` : "";
328
+ console.log(` ${icon}${scoreStr}${sourceStr} ${String(exp.text ?? "").slice(0, 70)}`);
261
329
  if (!exp.passed) {
262
330
  console.log(` -> ${String(exp.evidence ?? "").slice(0, 100)}`);
263
331
  }
@@ -380,20 +448,72 @@ export async function cliMain(): Promise<void> {
380
448
  console.log("==========================\n");
381
449
  }
382
450
 
383
- // --- Build prompt and grade ---
384
- const prompt = buildGradingPrompt(expectations, telemetry, transcriptExcerpt, skill);
451
+ // --- Run pre-gates first ---
452
+ const preGateCtx: PreGateContext = {
453
+ telemetry,
454
+ skillName: skill,
455
+ transcriptExcerpt,
456
+ };
457
+ const preGateResult = runPreGates(expectations, preGateCtx);
385
458
 
386
- console.error(`Grading ${expectations.length} expectations for skill '${skill}'...`);
459
+ let allExpectations: GradingExpectation[];
387
460
 
388
- let graderOutput: GraderOutput;
389
- try {
390
- graderOutput = await gradeViaAgent(prompt, agent);
391
- } catch (e) {
392
- console.error(`[ERROR] Grading failed: ${e}`);
393
- process.exit(1);
461
+ if (preGateResult.remaining.length === 0) {
462
+ // All expectations resolved by pre-gates — skip LLM entirely
463
+ console.error(
464
+ `[INFO] All ${expectations.length} expectations resolved by pre-gates, skipping LLM`,
465
+ );
466
+ allExpectations = preGateResult.resolved;
467
+ } else {
468
+ // Build prompt and grade remaining via LLM
469
+ console.error(
470
+ `[INFO] Pre-gates resolved ${preGateResult.resolved.length}/${expectations.length} expectations`,
471
+ );
472
+ const prompt = buildGradingPrompt(preGateResult.remaining, telemetry, transcriptExcerpt, skill);
473
+ console.error(`Grading ${preGateResult.remaining.length} expectations for skill '${skill}'...`);
474
+
475
+ let graderOutput: GraderOutput;
476
+ try {
477
+ graderOutput = await gradeViaAgent(prompt, agent);
478
+ } catch (e) {
479
+ console.error(`[ERROR] Grading failed: ${e}`);
480
+ process.exit(1);
481
+ }
482
+
483
+ // Default scores on LLM results
484
+ const llmExpectations = (graderOutput.expectations ?? []).map((e) => ({
485
+ ...e,
486
+ score: e.score ?? (e.passed ? 1.0 : 0.0),
487
+ source: e.source ?? ("llm" as const),
488
+ }));
489
+
490
+ // Merge pre-gate + LLM results
491
+ allExpectations = [...preGateResult.resolved, ...llmExpectations];
394
492
  }
395
493
 
396
- const result = assembleResult(graderOutput, telemetry, sessionId, skill, transcriptPath);
494
+ // Compute graduated summary
495
+ const graduated = buildGraduatedSummary(allExpectations);
496
+ const passedCount = allExpectations.filter((e) => e.passed).length;
497
+ const totalCount = allExpectations.length;
498
+
499
+ const result: GradingResult = {
500
+ session_id: sessionId,
501
+ skill_name: skill,
502
+ transcript_path: transcriptPath,
503
+ graded_at: new Date().toISOString(),
504
+ expectations: allExpectations,
505
+ summary: {
506
+ passed: passedCount,
507
+ failed: totalCount - passedCount,
508
+ total: totalCount,
509
+ pass_rate: totalCount > 0 ? passedCount / totalCount : 0,
510
+ mean_score: graduated.mean_score,
511
+ score_std_dev: graduated.score_std_dev,
512
+ },
513
+ execution_metrics: buildExecutionMetrics(telemetry),
514
+ claims: [],
515
+ eval_feedback: { suggestions: [], overall: "" },
516
+ };
397
517
 
398
518
  const outputPath = values.output ?? "grading.json";
399
519
  const outputDir = dirname(outputPath);
@@ -0,0 +1,104 @@
1
+ /**
2
+ * pre-gates.ts
3
+ *
4
+ * Deterministic pre-gate checks that resolve grading expectations without LLM.
5
+ * Each gate matches an expectation text pattern and resolves it using telemetry data.
6
+ */
7
+
8
+ import type { GradingExpectation, SessionTelemetryRecord } from "../types.js";
9
+
10
+ // ---------------------------------------------------------------------------
11
+ // Gate definitions
12
+ // ---------------------------------------------------------------------------
13
+
14
+ export interface PreGate {
15
+ name: string;
16
+ pattern: RegExp;
17
+ check: (ctx: PreGateContext) => boolean;
18
+ }
19
+
20
+ export interface PreGateContext {
21
+ telemetry: SessionTelemetryRecord;
22
+ skillName: string;
23
+ transcriptExcerpt?: string;
24
+ }
25
+
26
+ export interface PreGateResult {
27
+ resolved: GradingExpectation[];
28
+ remaining: string[];
29
+ }
30
+
31
+ /** Default set of pre-gates. */
32
+ export const DEFAULT_GATES: PreGate[] = [
33
+ {
34
+ name: "skill_md_read",
35
+ pattern: /(read.*skill\.md|skill\.md.*read)/i,
36
+ check: (ctx) => {
37
+ // Check if skills_triggered contains the skill name
38
+ const triggered = ctx.telemetry.skills_triggered ?? [];
39
+ if (triggered.includes(ctx.skillName)) return true;
40
+ // Also check if transcript mentions reading SKILL.md
41
+ if (ctx.transcriptExcerpt && /Read.*SKILL\.md/i.test(ctx.transcriptExcerpt)) return true;
42
+ return false;
43
+ },
44
+ },
45
+ {
46
+ name: "expected_tools_called",
47
+ pattern: /tool[s]?\s+(were\s+)?called/i,
48
+ check: (ctx) => (ctx.telemetry.total_tool_calls ?? 0) > 0,
49
+ },
50
+ {
51
+ name: "error_count",
52
+ pattern: /error[s]?\s*(count|encountered)/i,
53
+ check: (ctx) => (ctx.telemetry.errors_encountered ?? 0) <= 2,
54
+ },
55
+ {
56
+ name: "session_completed",
57
+ pattern: /session\s*(completed|finished)/i,
58
+ check: (ctx) => (ctx.telemetry.assistant_turns ?? 0) > 0,
59
+ },
60
+ ];
61
+
62
+ // ---------------------------------------------------------------------------
63
+ // Pre-gate runner
64
+ // ---------------------------------------------------------------------------
65
+
66
+ /**
67
+ * Run pre-gate checks against expectations. Returns resolved expectations
68
+ * (with source: "pre-gate" and score: 1.0 or 0.0) and remaining expectation
69
+ * texts that need LLM grading.
70
+ */
71
+ export function runPreGates(
72
+ expectations: string[],
73
+ ctx: PreGateContext,
74
+ gates: PreGate[] = DEFAULT_GATES,
75
+ ): PreGateResult {
76
+ const resolved: GradingExpectation[] = [];
77
+ const remaining: string[] = [];
78
+
79
+ for (const text of expectations) {
80
+ let matched = false;
81
+ for (const gate of gates) {
82
+ if (gate.pattern.global || gate.pattern.sticky) {
83
+ gate.pattern.lastIndex = 0;
84
+ }
85
+ if (gate.pattern.test(text)) {
86
+ const passed = gate.check(ctx);
87
+ resolved.push({
88
+ text,
89
+ passed,
90
+ evidence: `Pre-gate "${gate.name}": ${passed ? "PASS" : "FAIL"}`,
91
+ score: passed ? 1.0 : 0.0,
92
+ source: "pre-gate",
93
+ });
94
+ matched = true;
95
+ break; // first matching gate wins
96
+ }
97
+ }
98
+ if (!matched) {
99
+ remaining.push(text);
100
+ }
101
+ }
102
+
103
+ return { resolved, remaining };
104
+ }
@@ -0,0 +1,185 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Claude Code UserPromptSubmit hook: auto-activate.ts
4
+ *
5
+ * Evaluates activation rules against the current session context and
6
+ * outputs suggestions to stderr (shown to Claude as system messages).
7
+ * Suggestions are advisory — exit code is always 0.
8
+ *
9
+ * Session state is tracked to avoid repeated nags within a session.
10
+ */
11
+
12
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
13
+ import { dirname } from "node:path";
14
+ import {
15
+ CLAUDE_SETTINGS_PATH,
16
+ EVOLUTION_AUDIT_LOG,
17
+ QUERY_LOG,
18
+ SELFTUNE_CONFIG_DIR,
19
+ sessionStatePath,
20
+ TELEMETRY_LOG,
21
+ } from "../constants.js";
22
+ import type {
23
+ ActivationContext,
24
+ ActivationRule,
25
+ PromptSubmitPayload,
26
+ SessionState,
27
+ } from "../types.js";
28
+
29
+ // ---------------------------------------------------------------------------
30
+ // Session state persistence
31
+ // ---------------------------------------------------------------------------
32
+
33
+ export function loadSessionState(path: string, sessionId: string): SessionState {
34
+ if (!existsSync(path)) {
35
+ return { session_id: sessionId, suggestions_shown: [], updated_at: new Date().toISOString() };
36
+ }
37
+
38
+ try {
39
+ const data = JSON.parse(readFileSync(path, "utf-8")) as SessionState;
40
+ if (data.session_id === sessionId && Array.isArray(data.suggestions_shown)) {
41
+ return data;
42
+ }
43
+ } catch {
44
+ // corrupt file — start fresh
45
+ }
46
+
47
+ return { session_id: sessionId, suggestions_shown: [], updated_at: new Date().toISOString() };
48
+ }
49
+
50
+ export function saveSessionState(path: string, state: SessionState): void {
51
+ const dir = dirname(path);
52
+ if (!existsSync(dir)) {
53
+ mkdirSync(dir, { recursive: true });
54
+ }
55
+ writeFileSync(path, JSON.stringify(state, null, 2), "utf-8");
56
+ }
57
+
58
+ // ---------------------------------------------------------------------------
59
+ // PAI coexistence check
60
+ // ---------------------------------------------------------------------------
61
+
62
+ /**
63
+ * Check if PAI's skill-activation-prompt hook is registered in settings.
64
+ * If so, selftune defers skill-level suggestions.
65
+ */
66
+ export function checkPaiCoexistence(settingsPath: string): boolean {
67
+ if (!existsSync(settingsPath)) return false;
68
+
69
+ try {
70
+ const settings = JSON.parse(readFileSync(settingsPath, "utf-8")) as {
71
+ hooks?: Record<string, Array<{ command?: string; hooks?: Array<{ command?: string }> }>>;
72
+ };
73
+
74
+ if (!settings.hooks) return false;
75
+
76
+ // Search all hook entries for skill-activation-prompt
77
+ for (const hookEntries of Object.values(settings.hooks)) {
78
+ if (!Array.isArray(hookEntries)) continue;
79
+ for (const entry of hookEntries) {
80
+ // Check flat entry.command
81
+ if (
82
+ typeof entry.command === "string" &&
83
+ entry.command.includes("skill-activation-prompt")
84
+ ) {
85
+ return true;
86
+ }
87
+ // Check nested entry.hooks[].command
88
+ if (entry.hooks && Array.isArray(entry.hooks)) {
89
+ for (const hook of entry.hooks) {
90
+ if (
91
+ typeof hook.command === "string" &&
92
+ hook.command.includes("skill-activation-prompt")
93
+ ) {
94
+ return true;
95
+ }
96
+ }
97
+ }
98
+ }
99
+ }
100
+ } catch {
101
+ // fail-open
102
+ }
103
+
104
+ return false;
105
+ }
106
+
107
+ // ---------------------------------------------------------------------------
108
+ // Rule evaluation engine
109
+ // ---------------------------------------------------------------------------
110
+
111
+ /**
112
+ * Evaluate all rules against the current context, respecting session state.
113
+ * Returns array of suggestion strings for rules that fired.
114
+ */
115
+ export function evaluateRules(
116
+ rules: ActivationRule[],
117
+ ctx: ActivationContext,
118
+ statePath: string,
119
+ ): string[] {
120
+ const state = loadSessionState(statePath, ctx.session_id);
121
+ const suggestions: string[] = [];
122
+ const newlyShown: string[] = [];
123
+
124
+ for (const rule of rules) {
125
+ // Skip rules already shown this session
126
+ if (state.suggestions_shown.includes(rule.id)) continue;
127
+
128
+ try {
129
+ const suggestion = rule.evaluate(ctx);
130
+ if (suggestion !== null) {
131
+ suggestions.push(suggestion);
132
+ newlyShown.push(rule.id);
133
+ }
134
+ } catch {
135
+ // fail-open: skip rules that throw
136
+ }
137
+ }
138
+
139
+ // Persist updated session state
140
+ if (newlyShown.length > 0) {
141
+ state.suggestions_shown.push(...newlyShown);
142
+ state.updated_at = new Date().toISOString();
143
+ saveSessionState(statePath, state);
144
+ }
145
+
146
+ return suggestions;
147
+ }
148
+
149
+ // ---------------------------------------------------------------------------
150
+ // stdin main (only when executed directly, not when imported)
151
+ // ---------------------------------------------------------------------------
152
+
153
+ if (import.meta.main) {
154
+ try {
155
+ const payload: PromptSubmitPayload = JSON.parse(await Bun.stdin.text());
156
+ const sessionId = payload.session_id ?? "unknown";
157
+
158
+ // Dynamically import default rules (keeps hook file lightweight)
159
+ const { DEFAULT_RULES } = await import("../activation-rules.js");
160
+
161
+ const ctx: ActivationContext = {
162
+ session_id: sessionId,
163
+ query_log_path: QUERY_LOG,
164
+ telemetry_log_path: TELEMETRY_LOG,
165
+ evolution_audit_log_path: EVOLUTION_AUDIT_LOG,
166
+ selftune_dir: SELFTUNE_CONFIG_DIR,
167
+ settings_path: CLAUDE_SETTINGS_PATH,
168
+ };
169
+
170
+ // Check PAI coexistence — if PAI is active, skip selftune suggestions
171
+ // (PAI handles skill-level activation; selftune handles observability)
172
+ if (!checkPaiCoexistence(CLAUDE_SETTINGS_PATH)) {
173
+ const statePath = sessionStatePath(sessionId);
174
+ const suggestions = evaluateRules(DEFAULT_RULES, ctx, statePath);
175
+
176
+ for (const s of suggestions) {
177
+ // Output to stderr — Claude Code shows stderr as system messages
178
+ process.stderr.write(`[selftune] 💡 Suggestion: ${s}\n`);
179
+ }
180
+ }
181
+ } catch {
182
+ // silent — hooks must never block Claude
183
+ }
184
+ process.exit(0);
185
+ }
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Claude Code PreToolUse hook: evolution-guard.ts
4
+ *
5
+ * Fires before Write/Edit tool calls. If the target is a SKILL.md file
6
+ * that has a deployed evolution (i.e., is under active monitoring), and
7
+ * no recent `selftune watch` snapshot exists, this hook BLOCKS the write
8
+ * with exit code 2 and a message suggesting to run watch first.
9
+ *
10
+ * Exit codes:
11
+ * 0 = allow (not a SKILL.md, not monitored, or watch is recent)
12
+ * 2 = block with message (Claude Code convention for PreToolUse hooks)
13
+ *
14
+ * Fail-open: any error → exit 0 (never block accidentally).
15
+ */
16
+
17
+ import { existsSync, readFileSync } from "node:fs";
18
+ import { basename, dirname, join } from "node:path";
19
+ import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
20
+ import type { PreToolUsePayload } from "../types.js";
21
+ import { readJsonl } from "../utils/jsonl.js";
22
+
23
+ // ---------------------------------------------------------------------------
24
+ // Detection helpers (same pattern as skill-change-guard)
25
+ // ---------------------------------------------------------------------------
26
+
27
+ function isSkillMdWrite(toolName: string, filePath: string): boolean {
28
+ if (toolName !== "Write" && toolName !== "Edit") return false;
29
+ return basename(filePath).toUpperCase() === "SKILL.MD";
30
+ }
31
+
32
+ function extractSkillName(filePath: string): string {
33
+ return basename(dirname(filePath)) || "unknown";
34
+ }
35
+
36
+ // ---------------------------------------------------------------------------
37
+ // Active monitoring check (reads audit log directly — no evolution imports)
38
+ // ---------------------------------------------------------------------------
39
+
40
+ /**
41
+ * Check if a skill has an active deployed evolution (meaning it's under monitoring).
42
+ * Reads the evolution audit JSONL directly to respect architecture lint rules.
43
+ *
44
+ * A skill is "actively monitored" if its last audit action is "deployed".
45
+ * If the last action is "rolled_back", it's no longer monitored.
46
+ */
47
+ export function checkActiveMonitoring(skillName: string, auditLogPath: string): boolean {
48
+ const entries = readJsonl<{
49
+ skill_name?: string;
50
+ action: string;
51
+ }>(auditLogPath);
52
+
53
+ // Filter entries for this skill by skill_name field
54
+ const skillEntries = entries.filter((e) => e.skill_name === skillName);
55
+ if (skillEntries.length === 0) return false;
56
+
57
+ const lastEntry = skillEntries[skillEntries.length - 1];
58
+ return lastEntry.action === "deployed";
59
+ }
60
+
61
+ // ---------------------------------------------------------------------------
62
+ // Recent watch snapshot check (reads monitoring dir directly)
63
+ // ---------------------------------------------------------------------------
64
+
65
+ /**
66
+ * Check if there's a recent monitoring snapshot for the given skill.
67
+ * "Recent" means within `maxAgeHours` hours.
68
+ */
69
+ export function hasRecentWatchSnapshot(
70
+ skillName: string,
71
+ selftuneDir: string,
72
+ maxAgeHours: number,
73
+ ): boolean {
74
+ const snapshotPath = join(selftuneDir, "monitoring", "latest-snapshot.json");
75
+ if (!existsSync(snapshotPath)) return false;
76
+
77
+ try {
78
+ const snapshot = JSON.parse(readFileSync(snapshotPath, "utf-8")) as {
79
+ timestamp: string;
80
+ skill_name?: string;
81
+ };
82
+
83
+ // Must be for the same skill
84
+ if (snapshot.skill_name !== skillName) return false;
85
+
86
+ // Must be recent
87
+ const snapshotAge = Date.now() - new Date(snapshot.timestamp).getTime();
88
+ const maxAgeMs = maxAgeHours * 60 * 60 * 1000;
89
+ return snapshotAge <= maxAgeMs;
90
+ } catch {
91
+ return false;
92
+ }
93
+ }
94
+
95
+ // ---------------------------------------------------------------------------
96
+ // Guard result type
97
+ // ---------------------------------------------------------------------------
98
+
99
+ export interface GuardResult {
100
+ exitCode: number;
101
+ message: string;
102
+ }
103
+
104
+ // ---------------------------------------------------------------------------
105
+ // Core processing logic
106
+ // ---------------------------------------------------------------------------
107
+
108
+ export interface GuardOptions {
109
+ auditLogPath: string;
110
+ selftuneDir: string;
111
+ maxSnapshotAgeHours?: number;
112
+ }
113
+
114
+ /**
115
+ * Process a PreToolUse payload. Returns null if the write should be allowed,
116
+ * or a GuardResult with exitCode 2 if the write should be blocked.
117
+ */
118
+ export function processEvolutionGuard(
119
+ payload: PreToolUsePayload,
120
+ options: GuardOptions,
121
+ ): GuardResult | null {
122
+ const filePath =
123
+ typeof payload.tool_input?.file_path === "string" ? payload.tool_input.file_path : "";
124
+
125
+ if (!isSkillMdWrite(payload.tool_name, filePath)) return null;
126
+
127
+ const skillName = extractSkillName(filePath);
128
+ const { auditLogPath, selftuneDir, maxSnapshotAgeHours = 24 } = options;
129
+
130
+ // Check if this skill is under active monitoring
131
+ if (!checkActiveMonitoring(skillName, auditLogPath)) return null;
132
+
133
+ // Check if there's a recent watch snapshot
134
+ if (hasRecentWatchSnapshot(skillName, selftuneDir, maxSnapshotAgeHours)) return null;
135
+
136
+ // Block: skill is monitored but no recent watch
137
+ return {
138
+ exitCode: 2,
139
+ message: `[selftune] Skill "${skillName}" has a deployed evolution and is under active monitoring. Run \`selftune watch --skill ${skillName}\` before modifying SKILL.md to check current health.`,
140
+ };
141
+ }
142
+
143
+ // ---------------------------------------------------------------------------
144
+ // stdin main (only when executed directly, not when imported)
145
+ // ---------------------------------------------------------------------------
146
+
147
+ if (import.meta.main) {
148
+ try {
149
+ const payload: PreToolUsePayload = JSON.parse(await Bun.stdin.text());
150
+
151
+ const result = processEvolutionGuard(payload, {
152
+ auditLogPath: EVOLUTION_AUDIT_LOG,
153
+ selftuneDir: SELFTUNE_CONFIG_DIR,
154
+ });
155
+
156
+ if (result) {
157
+ // Exit code 2 = block with message
158
+ process.stderr.write(`${result.message}\n`);
159
+ process.exit(2);
160
+ }
161
+ } catch {
162
+ // Fail-open: any error → allow the write
163
+ }
164
+ process.exit(0);
165
+ }