npm - selftune - Versions diffs - 0.1.4 → 0.2.0 - Mend

selftune 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

package/.claude/agents/diagnosis-analyst.md +146 -0
package/.claude/agents/evolution-reviewer.md +167 -0
package/.claude/agents/integration-guide.md +200 -0
package/.claude/agents/pattern-analyst.md +147 -0
package/CHANGELOG.md +37 -0
package/README.md +96 -256
package/assets/BeforeAfter.gif +0 -0
package/assets/FeedbackLoop.gif +0 -0
package/assets/logo.svg +9 -0
package/assets/skill-health-badge.svg +20 -0
package/cli/selftune/activation-rules.ts +171 -0
package/cli/selftune/badge/badge-data.ts +108 -0
package/cli/selftune/badge/badge-svg.ts +212 -0
package/cli/selftune/badge/badge.ts +103 -0
package/cli/selftune/constants.ts +75 -1
package/cli/selftune/contribute/bundle.ts +314 -0
package/cli/selftune/contribute/contribute.ts +214 -0
package/cli/selftune/contribute/sanitize.ts +162 -0
package/cli/selftune/cron/setup.ts +266 -0
package/cli/selftune/dashboard-server.ts +582 -0
package/cli/selftune/dashboard.ts +25 -3
package/cli/selftune/eval/baseline.ts +247 -0
package/cli/selftune/eval/composability.ts +117 -0
package/cli/selftune/eval/generate-unit-tests.ts +143 -0
package/cli/selftune/eval/hooks-to-evals.ts +68 -2
package/cli/selftune/eval/import-skillsbench.ts +221 -0
package/cli/selftune/eval/synthetic-evals.ts +172 -0
package/cli/selftune/eval/unit-test-cli.ts +152 -0
package/cli/selftune/eval/unit-test.ts +196 -0
package/cli/selftune/evolution/deploy-proposal.ts +142 -1
package/cli/selftune/evolution/evolve-body.ts +492 -0
package/cli/selftune/evolution/evolve.ts +466 -103
package/cli/selftune/evolution/extract-patterns.ts +32 -1
package/cli/selftune/evolution/pareto.ts +314 -0
package/cli/selftune/evolution/propose-body.ts +171 -0
package/cli/selftune/evolution/propose-description.ts +100 -2
package/cli/selftune/evolution/propose-routing.ts +166 -0
package/cli/selftune/evolution/refine-body.ts +141 -0
package/cli/selftune/evolution/rollback.ts +19 -2
package/cli/selftune/evolution/validate-body.ts +254 -0
package/cli/selftune/evolution/validate-proposal.ts +257 -35
package/cli/selftune/evolution/validate-routing.ts +177 -0
package/cli/selftune/grading/grade-session.ts +138 -18
package/cli/selftune/grading/pre-gates.ts +104 -0
package/cli/selftune/hooks/auto-activate.ts +185 -0
package/cli/selftune/hooks/evolution-guard.ts +165 -0
package/cli/selftune/hooks/skill-change-guard.ts +112 -0
package/cli/selftune/index.ts +88 -0
package/cli/selftune/ingestors/claude-replay.ts +351 -0
package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
package/cli/selftune/init.ts +150 -3
package/cli/selftune/memory/writer.ts +447 -0
package/cli/selftune/monitoring/watch.ts +25 -2
package/cli/selftune/status.ts +17 -13
package/cli/selftune/types.ts +377 -5
package/cli/selftune/utils/frontmatter.ts +217 -0
package/cli/selftune/utils/llm-call.ts +29 -3
package/cli/selftune/utils/transcript.ts +35 -0
package/cli/selftune/utils/trigger-check.ts +89 -0
package/cli/selftune/utils/tui.ts +156 -0
package/dashboard/index.html +569 -8
package/package.json +8 -4
package/skill/SKILL.md +124 -8
package/skill/Workflows/AutoActivation.md +144 -0
package/skill/Workflows/Badge.md +118 -0
package/skill/Workflows/Baseline.md +121 -0
package/skill/Workflows/Composability.md +100 -0
package/skill/Workflows/Contribute.md +91 -0
package/skill/Workflows/Cron.md +155 -0
package/skill/Workflows/Dashboard.md +203 -0
package/skill/Workflows/Doctor.md +37 -1
package/skill/Workflows/Evals.md +69 -1
package/skill/Workflows/EvolutionMemory.md +152 -0
package/skill/Workflows/Evolve.md +111 -6
package/skill/Workflows/EvolveBody.md +159 -0
package/skill/Workflows/ImportSkillsBench.md +111 -0
package/skill/Workflows/Ingest.md +117 -3
package/skill/Workflows/Initialize.md +57 -3
package/skill/Workflows/Replay.md +70 -0
package/skill/Workflows/Rollback.md +20 -1
package/skill/Workflows/UnitTest.md +138 -0
package/skill/Workflows/Watch.md +22 -0
package/skill/settings_snippet.json +23 -0
package/templates/activation-rules-default.json +27 -0
package/templates/multi-skill-settings.json +64 -0
package/templates/single-skill-settings.json +58 -0

package/cli/selftune/grading/grade-session.ts CHANGED Viewed

@@ -16,6 +16,7 @@ import { TELEMETRY_LOG } from "../constants.js";
 import type {
   ExecutionMetrics,
   GraderOutput,
+  GradingExpectation,
   GradingResult,
   SessionTelemetryRecord,
 } from "../types.js";
@@ -26,6 +27,7 @@ import {
   callViaAgent,
 } from "../utils/llm-call.js";
 import { readExcerpt } from "../utils/transcript.js";
+import { type PreGateContext, runPreGates } from "./pre-gates.js";
 // Re-export for backward compatibility
 export { detectAgent, stripMarkdownFences } from "../utils/llm-call.js";
@@ -48,24 +50,36 @@ export const GRADER_SYSTEM = `You are a rigorous skill session evaluator. You re
 Grade each expectation and output ONLY valid JSON matching this schema:
 {
   "expectations": [
-    {"text": "...", "passed": true/false, "evidence": "specific quote or metric"}
+    {"text": "...", "passed": true/false, "evidence": "specific quote or metric", "score": 0.0-1.0}
   ],
-  "summary": {"passed": N, "failed": N, "total": N, "pass_rate": 0.0},
+  "summary": {"passed": N, "failed": N, "total": N, "pass_rate": 0.0, "mean_score": 0.0},
   "claims": [
     {"claim": "...", "type": "factual|process|quality", "verified": true/false, "evidence": "..."}
   ],
   "eval_feedback": {
     "suggestions": [{"assertion": "...", "reason": "..."}],
     "overall": "one sentence"
-  }
+  },
+  "failure_feedback": [
+    {"query": "the user query that failed", "failure_reason": "why it failed", "improvement_hint": "how to fix", "invocation_type": "explicit|implicit|contextual|negative"}
+  ]
 }
+Score guide:
+- 1.0: Clear, specific evidence of full completion
+- 0.7-0.9: Strong evidence with minor gaps
+- 0.4-0.6: Partial evidence or partial completion
+- 0.1-0.3: Weak evidence, mostly not met
+- 0.0: No evidence or clearly not met
 Rules:
 - PASS only when there is clear, specific evidence — not assumptions
 - FAIL when evidence is absent or contradictory
 - Cite exact quotes or specific metric values
 - Extract 2-4 implicit claims from the transcript and verify them
-- Suggest eval improvements only for clear gaps`;
+- Suggest eval improvements only for clear gaps
+- Set score to reflect confidence level (0.0-1.0)
+- For each FAILED expectation, provide a failure_feedback entry with the relevant query, specific reason for failure, and actionable improvement hint`;
 // ---------------------------------------------------------------------------
 // Data lookup helpers
@@ -159,6 +173,39 @@ export function buildExecutionMetrics(telemetry: SessionTelemetryRecord): Execut
   };
 }
+// ---------------------------------------------------------------------------
+// Graduated scoring
+// ---------------------------------------------------------------------------
+/**
+ * Compute graduated scoring summary from expectations.
+ * Uses score field if present, defaults to 1.0 for pass, 0.0 for fail.
+ */
+export function buildGraduatedSummary(expectations: GradingExpectation[]): {
+  mean_score: number;
+  score_std_dev: number;
+} {
+  if (expectations.length === 0) {
+    return { mean_score: 0, score_std_dev: 0 };
+  }
+  const scores = expectations.map((e) => {
+    const fallback = e.passed ? 1.0 : 0.0;
+    const raw = e.score ?? fallback;
+    if (!Number.isFinite(raw)) return fallback;
+    return Math.min(1, Math.max(0, raw));
+  });
+  const mean = scores.reduce((sum, s) => sum + s, 0) / scores.length;
+  const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / scores.length;
+  const stdDev = Math.sqrt(variance);
+  return {
+    mean_score: Math.round(mean * 1000) / 1000,
+    score_std_dev: Math.round(stdDev * 1000) / 1000,
+  };
+}
 // ---------------------------------------------------------------------------
 // Prompt building
 // ---------------------------------------------------------------------------
@@ -234,16 +281,31 @@ export function assembleResult(
   skillName: string,
   transcriptPath: string,
 ): GradingResult {
+  // Default missing scores on expectations
+  const expectations = (graderOutput?.expectations ?? []).map((e) => ({
+    ...e,
+    score: e.score ?? (e.passed ? 1.0 : 0.0),
+    source: e.source ?? ("llm" as const),
+  }));
+  const baseSummary = graderOutput?.summary ?? { passed: 0, failed: 0, total: 0, pass_rate: 0 };
+  const graduated = buildGraduatedSummary(expectations);
   return {
     session_id: sessionId ?? "unknown",
     skill_name: skillName ?? "unknown",
     transcript_path: transcriptPath ?? "",
     graded_at: new Date().toISOString(),
-    expectations: graderOutput?.expectations ?? [],
-    summary: graderOutput?.summary ?? { passed: 0, failed: 0, total: 0, pass_rate: 0 },
+    expectations,
+    summary: {
+      ...baseSummary,
+      mean_score: graduated.mean_score,
+      score_std_dev: graduated.score_std_dev,
+    },
     execution_metrics: buildExecutionMetrics(telemetry ?? ({} as SessionTelemetryRecord)),
     claims: graderOutput?.claims ?? [],
     eval_feedback: graderOutput?.eval_feedback ?? { suggestions: [], overall: "" },
+    failure_feedback: graderOutput?.failure_feedback,
   };
 }
@@ -254,10 +316,16 @@ export function assembleResult(
 function printSummary(result: GradingResult): void {
   const { summary } = result;
   const rate = summary.pass_rate ?? 0;
-  console.log(`\nResults: ${summary.passed}/${summary.total} passed (${Math.round(rate * 100)}%)`);
+  const meanStr =
+    summary.mean_score != null ? ` | mean score: ${summary.mean_score.toFixed(2)}` : "";
+  console.log(
+    `\nResults: ${summary.passed}/${summary.total} passed (${Math.round(rate * 100)}%)${meanStr}`,
+  );
   for (const exp of result.expectations ?? []) {
     const icon = exp.passed ? "\u2713" : "\u2717";
-    console.log(`  ${icon} ${String(exp.text ?? "").slice(0, 70)}`);
+    const scoreStr = exp.score != null ? ` [${exp.score.toFixed(1)}]` : "";
+    const sourceStr = exp.source ? ` (${exp.source})` : "";
+    console.log(`  ${icon}${scoreStr}${sourceStr} ${String(exp.text ?? "").slice(0, 70)}`);
     if (!exp.passed) {
       console.log(`      -> ${String(exp.evidence ?? "").slice(0, 100)}`);
     }
@@ -380,20 +448,72 @@ export async function cliMain(): Promise<void> {
     console.log("==========================\n");
   }
-  // --- Build prompt and grade ---
-  const prompt = buildGradingPrompt(expectations, telemetry, transcriptExcerpt, skill);
+  // --- Run pre-gates first ---
+  const preGateCtx: PreGateContext = {
+    telemetry,
+    skillName: skill,
+    transcriptExcerpt,
+  };
+  const preGateResult = runPreGates(expectations, preGateCtx);
-  console.error(`Grading ${expectations.length} expectations for skill '${skill}'...`);
+  let allExpectations: GradingExpectation[];
-  let graderOutput: GraderOutput;
-  try {
-    graderOutput = await gradeViaAgent(prompt, agent);
-  } catch (e) {
-    console.error(`[ERROR] Grading failed: ${e}`);
-    process.exit(1);
+  if (preGateResult.remaining.length === 0) {
+    // All expectations resolved by pre-gates — skip LLM entirely
+    console.error(
+      `[INFO] All ${expectations.length} expectations resolved by pre-gates, skipping LLM`,
+    );
+    allExpectations = preGateResult.resolved;
+  } else {
+    // Build prompt and grade remaining via LLM
+    console.error(
+      `[INFO] Pre-gates resolved ${preGateResult.resolved.length}/${expectations.length} expectations`,
+    );
+    const prompt = buildGradingPrompt(preGateResult.remaining, telemetry, transcriptExcerpt, skill);
+    console.error(`Grading ${preGateResult.remaining.length} expectations for skill '${skill}'...`);
+    let graderOutput: GraderOutput;
+    try {
+      graderOutput = await gradeViaAgent(prompt, agent);
+    } catch (e) {
+      console.error(`[ERROR] Grading failed: ${e}`);
+      process.exit(1);
+    }
+    // Default scores on LLM results
+    const llmExpectations = (graderOutput.expectations ?? []).map((e) => ({
+      ...e,
+      score: e.score ?? (e.passed ? 1.0 : 0.0),
+      source: e.source ?? ("llm" as const),
+    }));
+    // Merge pre-gate + LLM results
+    allExpectations = [...preGateResult.resolved, ...llmExpectations];
   }
-  const result = assembleResult(graderOutput, telemetry, sessionId, skill, transcriptPath);
+  // Compute graduated summary
+  const graduated = buildGraduatedSummary(allExpectations);
+  const passedCount = allExpectations.filter((e) => e.passed).length;
+  const totalCount = allExpectations.length;
+  const result: GradingResult = {
+    session_id: sessionId,
+    skill_name: skill,
+    transcript_path: transcriptPath,
+    graded_at: new Date().toISOString(),
+    expectations: allExpectations,
+    summary: {
+      passed: passedCount,
+      failed: totalCount - passedCount,
+      total: totalCount,
+      pass_rate: totalCount > 0 ? passedCount / totalCount : 0,
+      mean_score: graduated.mean_score,
+      score_std_dev: graduated.score_std_dev,
+    },
+    execution_metrics: buildExecutionMetrics(telemetry),
+    claims: [],
+    eval_feedback: { suggestions: [], overall: "" },
+  };
   const outputPath = values.output ?? "grading.json";
   const outputDir = dirname(outputPath);

package/cli/selftune/grading/pre-gates.ts ADDED Viewed

@@ -0,0 +1,104 @@
+/**
+ * pre-gates.ts
+ *
+ * Deterministic pre-gate checks that resolve grading expectations without LLM.
+ * Each gate matches an expectation text pattern and resolves it using telemetry data.
+ */
+import type { GradingExpectation, SessionTelemetryRecord } from "../types.js";
+// ---------------------------------------------------------------------------
+// Gate definitions
+// ---------------------------------------------------------------------------
+export interface PreGate {
+  name: string;
+  pattern: RegExp;
+  check: (ctx: PreGateContext) => boolean;
+}
+export interface PreGateContext {
+  telemetry: SessionTelemetryRecord;
+  skillName: string;
+  transcriptExcerpt?: string;
+}
+export interface PreGateResult {
+  resolved: GradingExpectation[];
+  remaining: string[];
+}
+/** Default set of pre-gates. */
+export const DEFAULT_GATES: PreGate[] = [
+  {
+    name: "skill_md_read",
+    pattern: /(read.*skill\.md|skill\.md.*read)/i,
+    check: (ctx) => {
+      // Check if skills_triggered contains the skill name
+      const triggered = ctx.telemetry.skills_triggered ?? [];
+      if (triggered.includes(ctx.skillName)) return true;
+      // Also check if transcript mentions reading SKILL.md
+      if (ctx.transcriptExcerpt && /Read.*SKILL\.md/i.test(ctx.transcriptExcerpt)) return true;
+      return false;
+    },
+  },
+  {
+    name: "expected_tools_called",
+    pattern: /tool[s]?\s+(were\s+)?called/i,
+    check: (ctx) => (ctx.telemetry.total_tool_calls ?? 0) > 0,
+  },
+  {
+    name: "error_count",
+    pattern: /error[s]?\s*(count|encountered)/i,
+    check: (ctx) => (ctx.telemetry.errors_encountered ?? 0) <= 2,
+  },
+  {
+    name: "session_completed",
+    pattern: /session\s*(completed|finished)/i,
+    check: (ctx) => (ctx.telemetry.assistant_turns ?? 0) > 0,
+  },
+];
+// ---------------------------------------------------------------------------
+// Pre-gate runner
+// ---------------------------------------------------------------------------
+/**
+ * Run pre-gate checks against expectations. Returns resolved expectations
+ * (with source: "pre-gate" and score: 1.0 or 0.0) and remaining expectation
+ * texts that need LLM grading.
+ */
+export function runPreGates(
+  expectations: string[],
+  ctx: PreGateContext,
+  gates: PreGate[] = DEFAULT_GATES,
+): PreGateResult {
+  const resolved: GradingExpectation[] = [];
+  const remaining: string[] = [];
+  for (const text of expectations) {
+    let matched = false;
+    for (const gate of gates) {
+      if (gate.pattern.global || gate.pattern.sticky) {
+        gate.pattern.lastIndex = 0;
+      }
+      if (gate.pattern.test(text)) {
+        const passed = gate.check(ctx);
+        resolved.push({
+          text,
+          passed,
+          evidence: `Pre-gate "${gate.name}": ${passed ? "PASS" : "FAIL"}`,
+          score: passed ? 1.0 : 0.0,
+          source: "pre-gate",
+        });
+        matched = true;
+        break; // first matching gate wins
+      }
+    }
+    if (!matched) {
+      remaining.push(text);
+    }
+  }
+  return { resolved, remaining };
+}

package/cli/selftune/hooks/auto-activate.ts ADDED Viewed

@@ -0,0 +1,185 @@
+#!/usr/bin/env bun
+/**
+ * Claude Code UserPromptSubmit hook: auto-activate.ts
+ *
+ * Evaluates activation rules against the current session context and
+ * outputs suggestions to stderr (shown to Claude as system messages).
+ * Suggestions are advisory — exit code is always 0.
+ *
+ * Session state is tracked to avoid repeated nags within a session.
+ */
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { dirname } from "node:path";
+import {
+  CLAUDE_SETTINGS_PATH,
+  EVOLUTION_AUDIT_LOG,
+  QUERY_LOG,
+  SELFTUNE_CONFIG_DIR,
+  sessionStatePath,
+  TELEMETRY_LOG,
+} from "../constants.js";
+import type {
+  ActivationContext,
+  ActivationRule,
+  PromptSubmitPayload,
+  SessionState,
+} from "../types.js";
+// ---------------------------------------------------------------------------
+// Session state persistence
+// ---------------------------------------------------------------------------
+export function loadSessionState(path: string, sessionId: string): SessionState {
+  if (!existsSync(path)) {
+    return { session_id: sessionId, suggestions_shown: [], updated_at: new Date().toISOString() };
+  }
+  try {
+    const data = JSON.parse(readFileSync(path, "utf-8")) as SessionState;
+    if (data.session_id === sessionId && Array.isArray(data.suggestions_shown)) {
+      return data;
+    }
+  } catch {
+    // corrupt file — start fresh
+  }
+  return { session_id: sessionId, suggestions_shown: [], updated_at: new Date().toISOString() };
+}
+export function saveSessionState(path: string, state: SessionState): void {
+  const dir = dirname(path);
+  if (!existsSync(dir)) {
+    mkdirSync(dir, { recursive: true });
+  }
+  writeFileSync(path, JSON.stringify(state, null, 2), "utf-8");
+}
+// ---------------------------------------------------------------------------
+// PAI coexistence check
+// ---------------------------------------------------------------------------
+/**
+ * Check if PAI's skill-activation-prompt hook is registered in settings.
+ * If so, selftune defers skill-level suggestions.
+ */
+export function checkPaiCoexistence(settingsPath: string): boolean {
+  if (!existsSync(settingsPath)) return false;
+  try {
+    const settings = JSON.parse(readFileSync(settingsPath, "utf-8")) as {
+      hooks?: Record<string, Array<{ command?: string; hooks?: Array<{ command?: string }> }>>;
+    };
+    if (!settings.hooks) return false;
+    // Search all hook entries for skill-activation-prompt
+    for (const hookEntries of Object.values(settings.hooks)) {
+      if (!Array.isArray(hookEntries)) continue;
+      for (const entry of hookEntries) {
+        // Check flat entry.command
+        if (
+          typeof entry.command === "string" &&
+          entry.command.includes("skill-activation-prompt")
+        ) {
+          return true;
+        }
+        // Check nested entry.hooks[].command
+        if (entry.hooks && Array.isArray(entry.hooks)) {
+          for (const hook of entry.hooks) {
+            if (
+              typeof hook.command === "string" &&
+              hook.command.includes("skill-activation-prompt")
+            ) {
+              return true;
+            }
+          }
+        }
+      }
+    }
+  } catch {
+    // fail-open
+  }
+  return false;
+}
+// ---------------------------------------------------------------------------
+// Rule evaluation engine
+// ---------------------------------------------------------------------------
+/**
+ * Evaluate all rules against the current context, respecting session state.
+ * Returns array of suggestion strings for rules that fired.
+ */
+export function evaluateRules(
+  rules: ActivationRule[],
+  ctx: ActivationContext,
+  statePath: string,
+): string[] {
+  const state = loadSessionState(statePath, ctx.session_id);
+  const suggestions: string[] = [];
+  const newlyShown: string[] = [];
+  for (const rule of rules) {
+    // Skip rules already shown this session
+    if (state.suggestions_shown.includes(rule.id)) continue;
+    try {
+      const suggestion = rule.evaluate(ctx);
+      if (suggestion !== null) {
+        suggestions.push(suggestion);
+        newlyShown.push(rule.id);
+      }
+    } catch {
+      // fail-open: skip rules that throw
+    }
+  }
+  // Persist updated session state
+  if (newlyShown.length > 0) {
+    state.suggestions_shown.push(...newlyShown);
+    state.updated_at = new Date().toISOString();
+    saveSessionState(statePath, state);
+  }
+  return suggestions;
+}
+// ---------------------------------------------------------------------------
+// stdin main (only when executed directly, not when imported)
+// ---------------------------------------------------------------------------
+if (import.meta.main) {
+  try {
+    const payload: PromptSubmitPayload = JSON.parse(await Bun.stdin.text());
+    const sessionId = payload.session_id ?? "unknown";
+    // Dynamically import default rules (keeps hook file lightweight)
+    const { DEFAULT_RULES } = await import("../activation-rules.js");
+    const ctx: ActivationContext = {
+      session_id: sessionId,
+      query_log_path: QUERY_LOG,
+      telemetry_log_path: TELEMETRY_LOG,
+      evolution_audit_log_path: EVOLUTION_AUDIT_LOG,
+      selftune_dir: SELFTUNE_CONFIG_DIR,
+      settings_path: CLAUDE_SETTINGS_PATH,
+    };
+    // Check PAI coexistence — if PAI is active, skip selftune suggestions
+    // (PAI handles skill-level activation; selftune handles observability)
+    if (!checkPaiCoexistence(CLAUDE_SETTINGS_PATH)) {
+      const statePath = sessionStatePath(sessionId);
+      const suggestions = evaluateRules(DEFAULT_RULES, ctx, statePath);
+      for (const s of suggestions) {
+        // Output to stderr — Claude Code shows stderr as system messages
+        process.stderr.write(`[selftune] 💡 Suggestion: ${s}\n`);
+      }
+    }
+  } catch {
+    // silent — hooks must never block Claude
+  }
+  process.exit(0);
+}

package/cli/selftune/hooks/evolution-guard.ts ADDED Viewed

@@ -0,0 +1,165 @@
+#!/usr/bin/env bun
+/**
+ * Claude Code PreToolUse hook: evolution-guard.ts
+ *
+ * Fires before Write/Edit tool calls. If the target is a SKILL.md file
+ * that has a deployed evolution (i.e., is under active monitoring), and
+ * no recent `selftune watch` snapshot exists, this hook BLOCKS the write
+ * with exit code 2 and a message suggesting to run watch first.
+ *
+ * Exit codes:
+ *   0 = allow (not a SKILL.md, not monitored, or watch is recent)
+ *   2 = block with message (Claude Code convention for PreToolUse hooks)
+ *
+ * Fail-open: any error → exit 0 (never block accidentally).
+ */
+import { existsSync, readFileSync } from "node:fs";
+import { basename, dirname, join } from "node:path";
+import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
+import type { PreToolUsePayload } from "../types.js";
+import { readJsonl } from "../utils/jsonl.js";
+// ---------------------------------------------------------------------------
+// Detection helpers (same pattern as skill-change-guard)
+// ---------------------------------------------------------------------------
+function isSkillMdWrite(toolName: string, filePath: string): boolean {
+  if (toolName !== "Write" && toolName !== "Edit") return false;
+  return basename(filePath).toUpperCase() === "SKILL.MD";
+}
+function extractSkillName(filePath: string): string {
+  return basename(dirname(filePath)) || "unknown";
+}
+// ---------------------------------------------------------------------------
+// Active monitoring check (reads audit log directly — no evolution imports)
+// ---------------------------------------------------------------------------
+/**
+ * Check if a skill has an active deployed evolution (meaning it's under monitoring).
+ * Reads the evolution audit JSONL directly to respect architecture lint rules.
+ *
+ * A skill is "actively monitored" if its last audit action is "deployed".
+ * If the last action is "rolled_back", it's no longer monitored.
+ */
+export function checkActiveMonitoring(skillName: string, auditLogPath: string): boolean {
+  const entries = readJsonl<{
+    skill_name?: string;
+    action: string;
+  }>(auditLogPath);
+  // Filter entries for this skill by skill_name field
+  const skillEntries = entries.filter((e) => e.skill_name === skillName);
+  if (skillEntries.length === 0) return false;
+  const lastEntry = skillEntries[skillEntries.length - 1];
+  return lastEntry.action === "deployed";
+}
+// ---------------------------------------------------------------------------
+// Recent watch snapshot check (reads monitoring dir directly)
+// ---------------------------------------------------------------------------
+/**
+ * Check if there's a recent monitoring snapshot for the given skill.
+ * "Recent" means within `maxAgeHours` hours.
+ */
+export function hasRecentWatchSnapshot(
+  skillName: string,
+  selftuneDir: string,
+  maxAgeHours: number,
+): boolean {
+  const snapshotPath = join(selftuneDir, "monitoring", "latest-snapshot.json");
+  if (!existsSync(snapshotPath)) return false;
+  try {
+    const snapshot = JSON.parse(readFileSync(snapshotPath, "utf-8")) as {
+      timestamp: string;
+      skill_name?: string;
+    };
+    // Must be for the same skill
+    if (snapshot.skill_name !== skillName) return false;
+    // Must be recent
+    const snapshotAge = Date.now() - new Date(snapshot.timestamp).getTime();
+    const maxAgeMs = maxAgeHours * 60 * 60 * 1000;
+    return snapshotAge <= maxAgeMs;
+  } catch {
+    return false;
+  }
+}
+// ---------------------------------------------------------------------------
+// Guard result type
+// ---------------------------------------------------------------------------
+export interface GuardResult {
+  exitCode: number;
+  message: string;
+}
+// ---------------------------------------------------------------------------
+// Core processing logic
+// ---------------------------------------------------------------------------
+export interface GuardOptions {
+  auditLogPath: string;
+  selftuneDir: string;
+  maxSnapshotAgeHours?: number;
+}
+/**
+ * Process a PreToolUse payload. Returns null if the write should be allowed,
+ * or a GuardResult with exitCode 2 if the write should be blocked.
+ */
+export function processEvolutionGuard(
+  payload: PreToolUsePayload,
+  options: GuardOptions,
+): GuardResult | null {
+  const filePath =
+    typeof payload.tool_input?.file_path === "string" ? payload.tool_input.file_path : "";
+  if (!isSkillMdWrite(payload.tool_name, filePath)) return null;
+  const skillName = extractSkillName(filePath);
+  const { auditLogPath, selftuneDir, maxSnapshotAgeHours = 24 } = options;
+  // Check if this skill is under active monitoring
+  if (!checkActiveMonitoring(skillName, auditLogPath)) return null;
+  // Check if there's a recent watch snapshot
+  if (hasRecentWatchSnapshot(skillName, selftuneDir, maxSnapshotAgeHours)) return null;
+  // Block: skill is monitored but no recent watch
+  return {
+    exitCode: 2,
+    message: `[selftune] Skill "${skillName}" has a deployed evolution and is under active monitoring. Run \`selftune watch --skill ${skillName}\` before modifying SKILL.md to check current health.`,
+  };
+}
+// ---------------------------------------------------------------------------
+// stdin main (only when executed directly, not when imported)
+// ---------------------------------------------------------------------------
+if (import.meta.main) {
+  try {
+    const payload: PreToolUsePayload = JSON.parse(await Bun.stdin.text());
+    const result = processEvolutionGuard(payload, {
+      auditLogPath: EVOLUTION_AUDIT_LOG,
+      selftuneDir: SELFTUNE_CONFIG_DIR,
+    });
+    if (result) {
+      // Exit code 2 = block with message
+      process.stderr.write(`${result.message}\n`);
+      process.exit(2);
+    }
+  } catch {
+    // Fail-open: any error → allow the write
+  }
+  process.exit(0);
+}