npm - selftune - Versions diffs - 0.2.22 → 0.2.23 - Mend

selftune 0.2.22 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/README.md +4 -2
package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +1 -0
package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +59 -0
package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +12 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/adapters/pi/hook.ts +273 -0
package/cli/selftune/adapters/pi/install.ts +207 -0
package/cli/selftune/constants.ts +10 -1
package/cli/selftune/dashboard-contract.ts +14 -0
package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
package/cli/selftune/evolution/engines/replay-engine.ts +158 -0
package/cli/selftune/evolution/evidence.ts +2 -6
package/cli/selftune/evolution/evolve-body.ts +73 -20
package/cli/selftune/evolution/validate-body.ts +78 -42
package/cli/selftune/evolution/validate-routing.ts +45 -104
package/cli/selftune/hooks/skill-eval.ts +2 -1
package/cli/selftune/hooks-shared/types.ts +1 -0
package/cli/selftune/index.ts +23 -5
package/cli/selftune/ingestors/pi-ingest.ts +726 -0
package/cli/selftune/init.ts +11 -1
package/cli/selftune/localdb/direct-write.ts +85 -0
package/cli/selftune/localdb/materialize.ts +6 -7
package/cli/selftune/localdb/queries.ts +126 -0
package/cli/selftune/localdb/schema.ts +38 -0
package/cli/selftune/observability.ts +8 -1
package/cli/selftune/orchestrate.ts +43 -0
package/cli/selftune/registry/client.ts +74 -0
package/cli/selftune/registry/history.ts +54 -0
package/cli/selftune/registry/index.ts +90 -0
package/cli/selftune/registry/install.ts +141 -0
package/cli/selftune/registry/list.ts +44 -0
package/cli/selftune/registry/push.ts +171 -0
package/cli/selftune/registry/rollback.ts +49 -0
package/cli/selftune/registry/status.ts +62 -0
package/cli/selftune/registry/sync.ts +125 -0
package/cli/selftune/repair/skill-usage.ts +4 -1
package/cli/selftune/status.ts +31 -0
package/cli/selftune/sync.ts +127 -23
package/cli/selftune/types.ts +2 -1
package/cli/selftune/utils/jsonl.ts +1 -30
package/cli/selftune/utils/skill-discovery.ts +22 -0
package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
package/node_modules/@selftune/telemetry-contract/package.json +1 -1
package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
package/node_modules/@selftune/telemetry-contract/src/schemas.ts +22 -4
package/node_modules/@selftune/telemetry-contract/src/types.ts +1 -12
package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
package/package.json +1 -1
package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
package/packages/telemetry-contract/package.json +1 -1
package/packages/telemetry-contract/src/index.ts +1 -0
package/packages/telemetry-contract/src/schemas.ts +22 -4
package/packages/telemetry-contract/src/types.ts +1 -12
package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
package/packages/ui/AGENTS.md +16 -0
package/packages/ui/README.md +1 -1
package/packages/ui/package.json +1 -1
package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
package/packages/ui/src/components/EvidenceViewer.tsx +153 -443
package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
package/packages/ui/src/components/InfoTip.tsx +1 -2
package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
package/packages/ui/src/components/OverviewPanels.tsx +652 -0
package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
package/packages/ui/src/components/index.ts +56 -1
package/packages/ui/src/components/section-cards.tsx +18 -35
package/packages/ui/src/components/skill-health-grid.tsx +47 -37
package/packages/ui/src/lib/constants.tsx +0 -1
package/packages/ui/src/primitives/card.tsx +1 -1
package/packages/ui/src/primitives/checkbox.tsx +1 -1
package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
package/packages/ui/src/primitives/select.tsx +2 -2
package/packages/ui/src/types.ts +172 -4
package/skill/SKILL.md +18 -4
package/skill/Workflows/Ingest.md +60 -2
package/skill/Workflows/Initialize.md +8 -5
package/skill/Workflows/PlatformHooks.md +19 -3
package/skill/Workflows/Registry.md +99 -0
package/skill/Workflows/Sync.md +3 -1
package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
package/cli/selftune/utils/html.ts +0 -27
package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117

package/cli/selftune/adapters/pi/hook.ts ADDED Viewed

@@ -0,0 +1,273 @@
+#!/usr/bin/env bun
+/**
+ * Pi hook adapter for selftune.
+ *
+ * Reads Pi hook payloads from stdin and delegates to shared selftune hook logic.
+ * Pi extensions emit events for tool calls, tool results, and session lifecycle.
+ *
+ * Usage: echo '$HOOK_PAYLOAD' | selftune pi hook
+ *
+ * Event routing:
+ *   tool_call        -> skill-change-guard + evolution-guard (PreToolUse)
+ *   tool_result      -> skill-eval (processToolUse) + commit-track (processCommitTrack)
+ *   message (user)   -> prompt-log (processPrompt) + auto-activate
+ *   session_shutdown -> session-stop (processSessionStop)
+ *
+ * Fail-open: any unhandled error -> exit 0, never crash the host agent.
+ */
+import type {
+  PostToolUsePayload,
+  PreToolUsePayload,
+  PromptSubmitPayload,
+  StopPayload,
+} from "../../types.js";
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+/** Pi hook payload — superset of all event fields. */
+export interface PiHookPayload {
+  event_type?: string;
+  session_id?: string;
+  cwd?: string;
+  tool_name?: string;
+  tool_input?: Record<string, unknown>;
+  tool_use_id?: string;
+  tool_output?: Record<string, unknown>;
+  prompt?: string;
+  user_prompt?: string;
+  model?: string;
+  provider?: string;
+  last_assistant_message?: string;
+  [key: string]: unknown;
+}
+/** Response written to stdout. Empty object = no-op. */
+type HookResponse = Record<string, unknown>;
+const EMPTY_RESPONSE: HookResponse = {};
+// ---------------------------------------------------------------------------
+// Event handlers (dynamic imports for fast startup)
+// ---------------------------------------------------------------------------
+async function handlePromptSubmit(payload: PiHookPayload): Promise<HookResponse> {
+  // 1. Prompt logging
+  try {
+    const { processPrompt } = await import("../../hooks/prompt-log.js");
+    const promptPayload: PromptSubmitPayload = {
+      session_id: payload.session_id,
+      cwd: payload.cwd,
+      prompt: payload.prompt ?? payload.user_prompt,
+      user_prompt: payload.user_prompt ?? payload.prompt,
+      hook_event_name: "UserPromptSubmit",
+    };
+    await processPrompt(promptPayload);
+  } catch {
+    // fail-open
+  }
+  // 2. Auto-activate suggestions
+  let response: HookResponse = EMPTY_RESPONSE;
+  try {
+    const { processAutoActivate } = await import("../../hooks/auto-activate.js");
+    const sessionId = payload.session_id ?? "unknown";
+    const suggestions = await processAutoActivate(sessionId);
+    if (suggestions.length > 0) {
+      const context = suggestions.map((s) => `[selftune] Suggestion: ${s}`).join("\n");
+      response = { additionalContext: context };
+    }
+  } catch {
+    // fail-open
+  }
+  return response;
+}
+async function handlePreToolUse(
+  payload: PiHookPayload,
+): Promise<{ response: HookResponse; exitCode: number }> {
+  const prePayload: PreToolUsePayload = {
+    tool_name: payload.tool_name ?? "",
+    tool_input: payload.tool_input ?? {},
+    tool_use_id: payload.tool_use_id,
+    session_id: payload.session_id,
+    cwd: payload.cwd,
+    hook_event_name: "PreToolUse",
+  };
+  let constants:
+    | { EVOLUTION_AUDIT_LOG: string; SELFTUNE_CONFIG_DIR: string; SESSION_STATE_DIR: string }
+    | undefined;
+  try {
+    constants = await import("../../constants.js");
+  } catch {
+    // fail-open
+  }
+  // 1. Evolution guard (can block with exit 2)
+  try {
+    if (constants) {
+      const { processEvolutionGuard } = await import("../../hooks/evolution-guard.js");
+      const guardResult = await processEvolutionGuard(prePayload, {
+        auditLogPath: constants.EVOLUTION_AUDIT_LOG,
+        selftuneDir: constants.SELFTUNE_CONFIG_DIR,
+      });
+      if (guardResult) {
+        process.stderr.write(`${guardResult.message}\n`);
+        return { response: EMPTY_RESPONSE, exitCode: guardResult.exitCode };
+      }
+    }
+  } catch {
+    // fail-open
+  }
+  // 2. Skill change guard (advisory only, never blocks)
+  try {
+    if (constants) {
+      const { processPreToolUse } = await import("../../hooks/skill-change-guard.js");
+      const sessionId = payload.session_id ?? "unknown";
+      const safe = sessionId.replace(/[^a-zA-Z0-9_-]/g, "_");
+      const statePath = `${constants.SESSION_STATE_DIR}/guard-state-${safe}.json`;
+      const suggestion = processPreToolUse(prePayload, statePath);
+      if (suggestion) {
+        process.stderr.write(`[selftune] Suggestion: ${suggestion}\n`);
+      }
+    }
+  } catch {
+    // fail-open
+  }
+  return { response: EMPTY_RESPONSE, exitCode: 0 };
+}
+async function handlePostToolUse(payload: PiHookPayload): Promise<HookResponse> {
+  const postPayload: PostToolUsePayload = {
+    tool_name: payload.tool_name ?? "",
+    tool_input: payload.tool_input ?? {},
+    tool_use_id: payload.tool_use_id,
+    tool_response: payload.tool_output,
+    session_id: payload.session_id,
+    cwd: payload.cwd,
+    hook_event_name: "PostToolUse",
+  };
+  // 1. Skill eval (Read/Skill tool usage tracking)
+  try {
+    const { processToolUse } = await import("../../hooks/skill-eval.js");
+    await processToolUse(postPayload);
+  } catch {
+    // fail-open
+  }
+  // 2. Commit tracking (git commit detection in Bash output)
+  try {
+    const { processCommitTrack } = await import("../../hooks/commit-track.js");
+    await processCommitTrack(postPayload);
+  } catch {
+    // fail-open
+  }
+  return EMPTY_RESPONSE;
+}
+async function handleSessionEnd(payload: PiHookPayload): Promise<HookResponse> {
+  try {
+    const { processSessionStop } = await import("../../hooks/session-stop.js");
+    const stopPayload: StopPayload = {
+      session_id: payload.session_id,
+      cwd: payload.cwd,
+      last_assistant_message:
+        typeof payload.last_assistant_message === "string"
+          ? payload.last_assistant_message
+          : undefined,
+      hook_event_name: "Stop",
+    };
+    await processSessionStop(stopPayload);
+  } catch {
+    // fail-open
+  }
+  return EMPTY_RESPONSE;
+}
+// ---------------------------------------------------------------------------
+// Main entry point
+// ---------------------------------------------------------------------------
+function writeResponseAndExit(response: HookResponse, code: number): void {
+  const data = JSON.stringify(response);
+  process.stdout.write(data, () => {
+    process.exit(code);
+  });
+}
+/**
+ * CLI entry point. Reads stdin, routes to the correct handler, writes response.
+ */
+export async function cliMain(): Promise<void> {
+  let exitCode = 0;
+  try {
+    const raw = await Bun.stdin.text();
+    // Fast-path: empty stdin -> no-op
+    if (!raw.trim()) {
+      writeResponseAndExit(EMPTY_RESPONSE, 0);
+      return;
+    }
+    let payload: PiHookPayload;
+    try {
+      payload = JSON.parse(raw) as PiHookPayload;
+    } catch {
+      writeResponseAndExit(EMPTY_RESPONSE, 0);
+      return;
+    }
+    const eventType = typeof payload.event_type === "string" ? payload.event_type : "";
+    if (!eventType) {
+      writeResponseAndExit(EMPTY_RESPONSE, 0);
+      return;
+    }
+    let response: HookResponse = EMPTY_RESPONSE;
+    switch (eventType) {
+      case "message": {
+        response = await handlePromptSubmit(payload);
+        break;
+      }
+      case "tool_call": {
+        const result = await handlePreToolUse(payload);
+        response = result.response;
+        exitCode = result.exitCode;
+        break;
+      }
+      case "tool_result": {
+        response = await handlePostToolUse(payload);
+        break;
+      }
+      case "session_shutdown": {
+        response = await handleSessionEnd(payload);
+        break;
+      }
+      default: {
+        // Unknown event — no-op
+        break;
+      }
+    }
+    writeResponseAndExit(response, exitCode);
+  } catch {
+    // Fail-open: never crash
+    writeResponseAndExit(EMPTY_RESPONSE, 0);
+  }
+}
+// --- stdin main (only when executed directly, not when imported) ---
+if (import.meta.main) {
+  await cliMain();
+}

package/cli/selftune/adapters/pi/install.ts ADDED Viewed

@@ -0,0 +1,207 @@
+#!/usr/bin/env bun
+/**
+ * Install selftune hooks into Pi coding agent environment.
+ *
+ * Pi supports extensions that hook into its lifecycle. This installer
+ * creates a selftune extension that pipes events to `selftune pi hook`.
+ *
+ * Extension location: ~/.pi/extensions/selftune/
+ *
+ * Events hooked:
+ *   - tool_call        (pre-tool — skill guards, inline)
+ *   - tool_result      (post-tool — skill eval + commit tracking, inline)
+ *   - message          (prompt submit — prompt logging + auto-activate, inline)
+ *   - session_shutdown (session end — session telemetry, background)
+ *
+ * Usage: selftune pi install [--dry-run] [--uninstall]
+ */
+import { chmodSync, existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+const PI_DIR = process.env.SELFTUNE_PI_DIR ?? join(homedir(), ".pi");
+const PI_EXTENSIONS_DIR = join(PI_DIR, "extensions", "selftune");
+const MARKER = "# selftune-managed";
+// ---------------------------------------------------------------------------
+// Hook script generators
+// ---------------------------------------------------------------------------
+/** Build a hook command that prefers SELFTUNE_CLI_PATH, then npx. */
+const HOOK_CMD =
+  'if [ -n "$SELFTUNE_CLI_PATH" ]; then "$SELFTUNE_CLI_PATH" pi hook; else npx selftune pi hook; fi';
+function hookScript(eventType: string, inline: boolean): string {
+  if (inline) {
+    // Inline — fast path; finish before Pi moves on.
+    // Capture output and exit code separately to avoid double JSON and preserve guard blocks (exit 2).
+    return `#!/usr/bin/env bash
+${MARKER}
+input=$(cat)
+result=$(echo "$input" | (${HOOK_CMD}) 2>/dev/null)
+rc=$?
+[ -z "$result" ] && result='{}'
+echo "$result"
+exit $rc
+`;
+  }
+  // Background — don't block Pi
+  return `#!/usr/bin/env bash
+${MARKER}
+input=$(cat)
+echo "$input" | (${HOOK_CMD}) &>/dev/null &
+echo '{}'
+`;
+}
+// ---------------------------------------------------------------------------
+// Hook definitions
+// ---------------------------------------------------------------------------
+const HOOKS: Array<{ name: string; description: string; inline: boolean }> = [
+  { name: "tool_call", description: "Pre-tool guards (evolution, skill change)", inline: true },
+  { name: "tool_result", description: "Post-tool eval + commit tracking", inline: true },
+  { name: "message", description: "Prompt logging + auto-activate", inline: true },
+  { name: "session_shutdown", description: "Session telemetry recording", inline: false },
+];
+// ---------------------------------------------------------------------------
+// Install
+// ---------------------------------------------------------------------------
+function installHooks(dryRun: boolean): void {
+  console.log("Setting up selftune hooks for Pi...");
+  console.log(`Extensions directory: ${PI_EXTENSIONS_DIR}`);
+  console.log("");
+  if (!dryRun) {
+    mkdirSync(PI_EXTENSIONS_DIR, { recursive: true });
+  }
+  let installed = 0;
+  let skipped = 0;
+  for (const hook of HOOKS) {
+    const hookPath = join(PI_EXTENSIONS_DIR, hook.name);
+    if (existsSync(hookPath)) {
+      const existing = readFileSync(hookPath, "utf-8");
+      if (existing.includes(MARKER)) {
+        if (dryRun) {
+          console.log(`  Would update: ${hook.name}`);
+        } else {
+          writeFileSync(hookPath, hookScript(hook.name, hook.inline), { mode: 0o755 });
+          chmodSync(hookPath, 0o755);
+          console.log(`  Updated: ${hook.name}`);
+        }
+        installed++;
+      } else {
+        console.log(`  Skipped: ${hook.name} (existing hook not managed by selftune)`);
+        skipped++;
+      }
+    } else {
+      if (dryRun) {
+        console.log(`  Would create: ${hook.name}`);
+      } else {
+        writeFileSync(hookPath, hookScript(hook.name, hook.inline), { mode: 0o755 });
+        console.log(`  Created: ${hook.name}`);
+      }
+      installed++;
+    }
+  }
+  console.log("");
+  if (dryRun) {
+    console.log(`Dry run: ${installed} hook(s) would be installed.`);
+  } else if (installed > 0) {
+    console.log(`Installed ${installed} hook(s).`);
+  }
+  if (skipped > 0) {
+    console.log(`Skipped ${skipped} hook(s) with existing non-selftune content.`);
+  }
+  if (!dryRun && installed > 0) {
+    console.log("");
+    if (skipped === 0) {
+      console.log("Pi will now track commits and record session telemetry.");
+    } else {
+      console.log("Partial install: some hooks were skipped. Telemetry may be incomplete.");
+    }
+    console.log("Run `selftune status` to verify setup.");
+  }
+}
+// ---------------------------------------------------------------------------
+// Uninstall
+// ---------------------------------------------------------------------------
+function uninstallHooks(dryRun: boolean): void {
+  console.log("Removing selftune hooks from Pi...");
+  console.log("");
+  let removed = 0;
+  let skipped = 0;
+  for (const hook of HOOKS) {
+    const hookPath = join(PI_EXTENSIONS_DIR, hook.name);
+    if (!existsSync(hookPath)) {
+      console.log(`  Not found: ${hook.name}`);
+      continue;
+    }
+    const existing = readFileSync(hookPath, "utf-8");
+    if (!existing.includes(MARKER)) {
+      console.log(`  Skipped: ${hook.name} (not managed by selftune)`);
+      skipped++;
+      continue;
+    }
+    if (dryRun) {
+      console.log(`  Would remove: ${hook.name}`);
+    } else {
+      rmSync(hookPath);
+      console.log(`  Removed: ${hook.name}`);
+    }
+    removed++;
+  }
+  console.log("");
+  if (dryRun) {
+    console.log(`Dry run: ${removed} hook(s) would be removed.`);
+  } else if (removed > 0) {
+    console.log(`Removed ${removed} hook(s).`);
+  }
+  if (skipped > 0) {
+    console.log(`Skipped ${skipped} hook(s) not managed by selftune.`);
+  }
+}
+// ---------------------------------------------------------------------------
+// Main entry point
+// ---------------------------------------------------------------------------
+export async function cliMain(): Promise<void> {
+  const args = process.argv.slice(2);
+  const dryRun = args.includes("--dry-run");
+  const uninstall = args.includes("--uninstall");
+  if (uninstall) {
+    uninstallHooks(dryRun);
+  } else {
+    installHooks(dryRun);
+  }
+}
+// --- stdin main (only when executed directly, not when imported) ---
+if (import.meta.main) {
+  try {
+    await cliMain();
+  } catch (err) {
+    console.error(
+      `[selftune] Pi install failed: ${err instanceof Error ? err.message : String(err)}`,
+    );
+    process.exit(1);
+  }
+}

package/cli/selftune/constants.ts CHANGED Viewed

@@ -13,6 +13,8 @@ const claudeHomeDir =
 const openclawHomeDir =
   process.env.SELFTUNE_OPENCLAW_DIR ??
   (resolvedHome ? join(defaultHome, ".openclaw") : join(homedir(), ".openclaw"));
+const piHomeDir =
+  process.env.SELFTUNE_PI_DIR ?? (resolvedHome ? join(defaultHome, ".pi") : join(homedir(), ".pi"));
 export const SELFTUNE_CONFIG_DIR =
   (process.env.SELFTUNE_CONFIG_DIR || undefined) ??
@@ -100,7 +102,7 @@ export const REQUIRED_FIELDS: Record<string, Set<string>> = {
 };
 /** Agent CLI candidates in detection order. */
-export const AGENT_CANDIDATES = ["claude", "codex", "opencode", "openclaw"] as const;
+export const AGENT_CANDIDATES = ["claude", "codex", "opencode", "openclaw", "pi"] as const;
 /** Required Claude Code hook keys in settings.json. */
 export const CLAUDE_CODE_HOOK_KEYS = [
@@ -158,6 +160,13 @@ export const OPENCLAW_AGENTS_DIR =
 /** Marker file tracking which OpenClaw sessions have been ingested. */
 export const OPENCLAW_INGEST_MARKER = join(SELFTUNE_CONFIG_DIR, "openclaw-ingest-marker.json");
+/** Pi sessions directory. */
+export const PI_SESSIONS_DIR =
+  process.env.SELFTUNE_PI_SESSIONS_DIR ?? join(piHomeDir, "agent", "sessions");
+/** Marker file tracking which Pi sessions have been ingested. */
+export const PI_INGEST_MARKER = join(SELFTUNE_CONFIG_DIR, "pi-ingest-marker.json");
 /** Default output directory for contribution bundles. */
 export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
 /** Creator-directed contribution preferences (per-skill opt-in state). */

package/cli/selftune/dashboard-contract.ts CHANGED Viewed

@@ -397,6 +397,20 @@ export interface HealthResponse {
   port: number;
 }
+// -- Replay entry result types ------------------------------------------------
+export interface ReplayEntryResult {
+  proposal_id: string;
+  skill_name: string;
+  validation_mode: string;
+  phase: string;
+  query: string;
+  should_trigger: boolean;
+  triggered: boolean;
+  passed: boolean;
+  evidence: string | null;
+}
 // -- Doctor / health check types ----------------------------------------------
 export type { DoctorResult, HealthCheck, HealthStatus } from "./types.js";

package/cli/selftune/evolution/engines/judge-engine.ts ADDED Viewed

@@ -0,0 +1,96 @@
+/**
+ * judge-engine.ts
+ *
+ * LLM judge validation engine: runs trigger accuracy checks using
+ * an LLM as a YES/NO judge for each eval entry.
+ *
+ * Extracted from validate-routing.ts and validate-body.ts to isolate
+ * LLM-judge-specific concerns from replay-specific concerns.
+ */
+import type { EvalEntry, ValidationMode } from "../../types.js";
+import { callLlm } from "../../utils/llm-call.js";
+import { buildTriggerCheckPrompt, parseTriggerResponse } from "../../utils/trigger-check.js";
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface JudgeValidationResult {
+  before_pass_rate: number;
+  after_pass_rate: number;
+  improved: boolean;
+  regressions: string[];
+  validation_mode: ValidationMode;
+  validation_agent: string;
+}
+// ---------------------------------------------------------------------------
+// Judge validation engine
+// ---------------------------------------------------------------------------
+/**
+ * Run LLM-judge-based trigger accuracy checks on an eval set.
+ * For each entry, asks the LLM whether the content would trigger
+ * the skill for the given query, comparing original vs proposed.
+ */
+export async function runJudgeValidation(
+  originalContent: string,
+  proposedContent: string,
+  evalSet: EvalEntry[],
+  agent: string,
+  modelFlag?: string,
+): Promise<JudgeValidationResult> {
+  if (evalSet.length === 0) {
+    return {
+      before_pass_rate: 0,
+      after_pass_rate: 0,
+      improved: false,
+      regressions: [],
+      validation_mode: "llm_judge",
+      validation_agent: agent,
+    };
+  }
+  const systemPrompt = "You are an evaluation assistant. Answer only YES or NO.";
+  let beforePassed = 0;
+  let afterPassed = 0;
+  const regressions: string[] = [];
+  for (const entry of evalSet) {
+    // Check with original content
+    const beforePrompt = buildTriggerCheckPrompt(originalContent, entry.query);
+    const beforeRaw = await callLlm(systemPrompt, beforePrompt, agent, modelFlag);
+    const beforeTriggered = parseTriggerResponse(beforeRaw);
+    const beforePass =
+      (entry.should_trigger && beforeTriggered) || (!entry.should_trigger && !beforeTriggered);
+    // Check with proposed content
+    const afterPrompt = buildTriggerCheckPrompt(proposedContent, entry.query);
+    const afterRaw = await callLlm(systemPrompt, afterPrompt, agent, modelFlag);
+    const afterTriggered = parseTriggerResponse(afterRaw);
+    const afterPass =
+      (entry.should_trigger && afterTriggered) || (!entry.should_trigger && !afterTriggered);
+    if (beforePass) beforePassed++;
+    if (afterPass) afterPassed++;
+    // Track regressions
+    if (beforePass && !afterPass) {
+      regressions.push(entry.query);
+    }
+  }
+  const total = evalSet.length;
+  const beforePassRate = beforePassed / total;
+  const afterPassRate = afterPassed / total;
+  return {
+    before_pass_rate: beforePassRate,
+    after_pass_rate: afterPassRate,
+    improved: afterPassRate > beforePassRate,
+    regressions,
+    validation_mode: "llm_judge",
+    validation_agent: agent,
+  };
+}