npm - selftune - Versions diffs - 0.2.8 → 0.2.10 - Mend

selftune 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

package/README.md +35 -35
package/apps/local-dashboard/dist/assets/index-BZVLv70T.js +16 -0
package/apps/local-dashboard/dist/assets/{index-CRtLkBTi.css → index-Bs3Y4ixf.css} +1 -1
package/apps/local-dashboard/dist/assets/{vendor-react-BQH_6WrG.js → vendor-react-BXP54cYo.js} +4 -4
package/apps/local-dashboard/dist/assets/{vendor-table-dK1QMLq9.js → vendor-table-DTF_SXoy.js} +1 -1
package/apps/local-dashboard/dist/assets/{vendor-ui-CO2mrx6e.js → vendor-ui-CWU0d1wd.js} +66 -66
package/apps/local-dashboard/dist/index.html +15 -15
package/bin/selftune.cjs +1 -1
package/cli/selftune/activation-rules.ts +37 -18
package/cli/selftune/agent-guidance.ts +16 -16
package/cli/selftune/alpha-identity.ts +1 -2
package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
package/cli/selftune/alpha-upload/flush.ts +2 -2
package/cli/selftune/alpha-upload/stage-canonical.ts +106 -3
package/cli/selftune/auth/device-code.ts +32 -0
package/cli/selftune/auto-update.ts +12 -0
package/cli/selftune/badge/badge.ts +1 -0
package/cli/selftune/canonical-export.ts +5 -0
package/cli/selftune/claude-agents.ts +154 -0
package/cli/selftune/contribute/bundle.ts +2 -0
package/cli/selftune/contribute/contribute.ts +1 -0
package/cli/selftune/cron/setup.ts +2 -2
package/cli/selftune/dashboard-contract.ts +1 -1
package/cli/selftune/dashboard-server.ts +11 -52
package/cli/selftune/eval/hooks-to-evals.ts +13 -6
package/cli/selftune/eval/import-skillsbench.ts +1 -0
package/cli/selftune/eval/synthetic-evals.ts +2 -3
package/cli/selftune/eval/unit-test.ts +1 -0
package/cli/selftune/evolution/deploy-proposal.ts +1 -0
package/cli/selftune/evolution/evolve-body.ts +93 -6
package/cli/selftune/evolution/evolve.ts +0 -1
package/cli/selftune/evolution/propose-body.ts +3 -2
package/cli/selftune/evolution/propose-routing.ts +3 -2
package/cli/selftune/evolution/refine-body.ts +3 -2
package/cli/selftune/export.ts +1 -0
package/cli/selftune/grading/auto-grade.ts +1 -0
package/cli/selftune/grading/grade-session.ts +9 -0
package/cli/selftune/hooks/auto-activate.ts +6 -0
package/cli/selftune/hooks/evolution-guard.ts +12 -15
package/cli/selftune/hooks/prompt-log.ts +1 -0
package/cli/selftune/hooks/session-stop.ts +34 -40
package/cli/selftune/hooks/skill-change-guard.ts +1 -0
package/cli/selftune/hooks/skill-eval.ts +1 -1
package/cli/selftune/index.ts +23 -14
package/cli/selftune/ingestors/claude-replay.ts +1 -0
package/cli/selftune/ingestors/codex-rollout.ts +1 -0
package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
package/cli/selftune/init.ts +197 -96
package/cli/selftune/localdb/db.ts +1 -0
package/cli/selftune/localdb/direct-write.ts +93 -12
package/cli/selftune/localdb/materialize.ts +2 -0
package/cli/selftune/localdb/queries.ts +210 -0
package/cli/selftune/localdb/schema.ts +72 -1
package/cli/selftune/monitoring/watch.ts +1 -0
package/cli/selftune/normalization.ts +4 -0
package/cli/selftune/observability.ts +14 -7
package/cli/selftune/orchestrate.ts +15 -37
package/cli/selftune/repair/skill-usage.ts +7 -3
package/cli/selftune/routes/orchestrate-runs.ts +1 -0
package/cli/selftune/routes/overview.ts +1 -0
package/cli/selftune/routes/skill-report.ts +1 -0
package/cli/selftune/sync.ts +31 -1
package/cli/selftune/types.ts +2 -2
package/cli/selftune/uninstall.ts +412 -0
package/cli/selftune/utils/canonical-log.ts +2 -0
package/cli/selftune/utils/jsonl.ts +1 -0
package/cli/selftune/utils/llm-call.ts +131 -3
package/cli/selftune/utils/skill-log.ts +1 -0
package/cli/selftune/utils/transcript.ts +1 -0
package/cli/selftune/utils/trigger-check.ts +1 -1
package/cli/selftune/workflows/skill-md-writer.ts +5 -5
package/cli/selftune/workflows/workflows.ts +1 -0
package/package.json +38 -33
package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
package/packages/telemetry-contract/package.json +3 -3
package/packages/telemetry-contract/src/index.ts +0 -1
package/packages/telemetry-contract/src/schemas.ts +6 -24
package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
package/packages/ui/README.md +35 -34
package/packages/ui/package.json +3 -3
package/packages/ui/src/components/ActivityTimeline.tsx +49 -42
package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
package/packages/ui/src/components/InfoTip.tsx +4 -3
package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
package/packages/ui/src/components/section-cards.tsx +19 -24
package/packages/ui/src/components/skill-health-grid.tsx +213 -193
package/packages/ui/src/lib/constants.tsx +1 -0
package/packages/ui/src/primitives/badge.tsx +12 -15
package/packages/ui/src/primitives/button.tsx +7 -7
package/packages/ui/src/primitives/card.tsx +15 -26
package/packages/ui/src/primitives/checkbox.tsx +7 -8
package/packages/ui/src/primitives/collapsible.tsx +5 -5
package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
package/packages/ui/src/primitives/label.tsx +6 -6
package/packages/ui/src/primitives/select.tsx +28 -37
package/packages/ui/src/primitives/table.tsx +17 -44
package/packages/ui/src/primitives/tabs.tsx +14 -21
package/packages/ui/src/primitives/tooltip.tsx +10 -22
package/skill/SKILL.md +72 -59
package/skill/Workflows/AlphaUpload.md +4 -4
package/skill/Workflows/AutoActivation.md +11 -6
package/skill/Workflows/Badge.md +22 -16
package/skill/Workflows/Baseline.md +34 -36
package/skill/Workflows/Composability.md +16 -11
package/skill/Workflows/Contribute.md +26 -21
package/skill/Workflows/Cron.md +23 -22
package/skill/Workflows/Dashboard.md +40 -40
package/skill/Workflows/Doctor.md +40 -34
package/skill/Workflows/Evals.md +48 -47
package/skill/Workflows/EvolutionMemory.md +31 -21
package/skill/Workflows/Evolve.md +84 -82
package/skill/Workflows/EvolveBody.md +58 -47
package/skill/Workflows/Grade.md +16 -13
package/skill/Workflows/ImportSkillsBench.md +9 -6
package/skill/Workflows/Ingest.md +36 -21
package/skill/Workflows/Initialize.md +138 -97
package/skill/Workflows/Orchestrate.md +22 -16
package/skill/Workflows/Replay.md +12 -7
package/skill/Workflows/Rollback.md +13 -6
package/skill/Workflows/Schedule.md +6 -6
package/skill/Workflows/Sync.md +18 -11
package/skill/Workflows/UnitTest.md +28 -17
package/skill/Workflows/Watch.md +28 -21
package/skill/agents/diagnosis-analyst.md +11 -0
package/skill/agents/evolution-reviewer.md +15 -1
package/skill/agents/integration-guide.md +10 -0
package/skill/agents/pattern-analyst.md +12 -1
package/skill/references/grading-methodology.md +23 -24
package/skill/references/interactive-config.md +7 -7
package/skill/references/invocation-taxonomy.md +22 -20
package/skill/references/logs.md +20 -6
package/skill/references/setup-patterns.md +4 -2
package/.claude/agents/diagnosis-analyst.md +0 -156
package/.claude/agents/evolution-reviewer.md +0 -180
package/.claude/agents/integration-guide.md +0 -212
package/.claude/agents/pattern-analyst.md +0 -160
package/apps/local-dashboard/dist/assets/index-Bk9vSHHd.js +0 -15

package/cli/selftune/claude-agents.ts ADDED Viewed

@@ -0,0 +1,154 @@
+import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { dirname, join, resolve } from "node:path";
+const MANIFEST_FILENAME = ".selftune-manifest.json";
+const LEGACY_SELFTUNE_AGENT_FILES = [
+  "diagnosis-analyst.md",
+  "evolution-reviewer.md",
+  "integration-guide.md",
+  "pattern-analyst.md",
+] as const;
+const BUNDLED_AGENT_DIR = resolve(dirname(import.meta.path), "..", "..", "skill", "agents");
+interface AgentManifest {
+  version: 1;
+  files: string[];
+  synced_at: string;
+}
+function readManifest(path: string): AgentManifest | null {
+  try {
+    if (!existsSync(path)) return null;
+    const parsed = JSON.parse(readFileSync(path, "utf-8")) as Partial<AgentManifest>;
+    if (!Array.isArray(parsed.files)) return null;
+    return {
+      version: 1,
+      files: parsed.files.filter((name): name is string => typeof name === "string"),
+      synced_at: typeof parsed.synced_at === "string" ? parsed.synced_at : "",
+    };
+  } catch {
+    return null;
+  }
+}
+function writeManifest(path: string, files: string[]): void {
+  const manifest: AgentManifest = {
+    version: 1,
+    files: [...files].sort(),
+    synced_at: new Date().toISOString(),
+  };
+  writeFileSync(path, JSON.stringify(manifest, null, 2), "utf-8");
+}
+function readTextIfExists(path: string): string | null {
+  try {
+    if (!existsSync(path)) return null;
+    return readFileSync(path, "utf-8");
+  } catch {
+    return null;
+  }
+}
+export function getClaudeAgentsDir(homeDir = homedir()): string {
+  return join(homeDir, ".claude", "agents");
+}
+export function getClaudeAgentManifestPath(homeDir = homedir()): string {
+  return join(getClaudeAgentsDir(homeDir), MANIFEST_FILENAME);
+}
+export function listBundledAgentFiles(sourceDir = BUNDLED_AGENT_DIR): string[] {
+  try {
+    if (!existsSync(sourceDir)) return [];
+    return readdirSync(sourceDir)
+      .filter((name) => name.endsWith(".md"))
+      .sort();
+  } catch {
+    return [];
+  }
+}
+export function installAgentFiles(options?: {
+  homeDir?: string;
+  force?: boolean;
+  sourceDir?: string;
+}): string[] {
+  const homeDir = options?.homeDir ?? homedir();
+  const targetDir = getClaudeAgentsDir(homeDir);
+  const manifestPath = getClaudeAgentManifestPath(homeDir);
+  const sourceDir = options?.sourceDir ?? BUNDLED_AGENT_DIR;
+  const sourceFiles = listBundledAgentFiles(sourceDir);
+  if (sourceFiles.length === 0) return [];
+  mkdirSync(targetDir, { recursive: true });
+  const manifest = readManifest(manifestPath);
+  const managedFiles = new Set<string>([
+    ...LEGACY_SELFTUNE_AGENT_FILES,
+    ...(manifest?.files ?? []),
+  ]);
+  const sourceSet = new Set(sourceFiles);
+  const changed = new Set<string>();
+  for (const staleFile of managedFiles) {
+    if (sourceSet.has(staleFile)) continue;
+    const stalePath = join(targetDir, staleFile);
+    if (existsSync(stalePath)) {
+      rmSync(stalePath, { force: true });
+      changed.add(staleFile);
+    }
+  }
+  for (const fileName of sourceFiles) {
+    const sourcePath = join(sourceDir, fileName);
+    const targetPath = join(targetDir, fileName);
+    const sourceContent = readTextIfExists(sourcePath);
+    if (sourceContent === null) continue;
+    const existingContent = readTextIfExists(targetPath);
+    if (options?.force || existingContent !== sourceContent) {
+      writeFileSync(targetPath, sourceContent, "utf-8");
+      changed.add(fileName);
+    }
+  }
+  writeManifest(manifestPath, sourceFiles);
+  return [...changed].sort();
+}
+export function removeInstalledAgentFiles(options?: { homeDir?: string; dryRun?: boolean }): {
+  removed: number;
+  files: string[];
+} {
+  const homeDir = options?.homeDir ?? homedir();
+  const targetDir = getClaudeAgentsDir(homeDir);
+  const manifestPath = getClaudeAgentManifestPath(homeDir);
+  const manifest = readManifest(manifestPath);
+  const managedFiles = new Set<string>([
+    ...LEGACY_SELFTUNE_AGENT_FILES,
+    ...listBundledAgentFiles(),
+    ...(manifest?.files ?? []),
+  ]);
+  const removed: string[] = [];
+  for (const fileName of managedFiles) {
+    const targetPath = join(targetDir, fileName);
+    if (!existsSync(targetPath)) continue;
+    if (!options?.dryRun) {
+      rmSync(targetPath, { force: true });
+    }
+    removed.push(targetPath);
+  }
+  if (existsSync(manifestPath)) {
+    if (!options?.dryRun) {
+      rmSync(manifestPath, { force: true });
+    }
+    removed.push(manifestPath);
+  }
+  return { removed: removed.length, files: removed };
+}

package/cli/selftune/contribute/bundle.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { randomUUID } from "node:crypto";
 import { existsSync, readdirSync, readFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 import {
   EVOLUTION_AUDIT_LOG,
   QUERY_LOG,
@@ -224,6 +225,7 @@ export function assembleBundle(options: {
   let allEvolutionRecords: EvolutionAuditEntry[];
   if (useJsonl) {
+    // JSONL fallback: only used when custom (non-default) log paths are provided (test isolation)
     allSkillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
     allQueryRecords = readJsonl<QueryLogRecord>(queryLogPath);
     allTelemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);

package/cli/selftune/contribute/contribute.ts CHANGED Viewed

@@ -10,6 +10,7 @@
 import { spawnSync } from "node:child_process";
 import { existsSync, mkdirSync, writeFileSync } from "node:fs";
 import { parseArgs } from "node:util";
 import { CONTRIBUTIONS_DIR } from "../constants.js";
 import { assembleBundle } from "./bundle.js";
 import { sanitizeBundle } from "./sanitize.js";

package/cli/selftune/cron/setup.ts CHANGED Viewed

@@ -46,10 +46,10 @@ export const DEFAULT_CRON_JOBS: CronJobConfig[] = [
   },
   {
     name: "selftune-orchestrate",
-    cron: "0 */6 * * *",
+    cron: "0 */2 * * *",
     message:
       "Run selftune orchestrate --max-skills 3. This performs source-truth sync, selects candidate skills, evolves validated low-risk descriptions autonomously, and watches recent deployments for regressions.",
-    description: "Autonomous improvement loop every 6 hours",
+    description: "Autonomous improvement loop every 2 hours",
   },
 ];

package/cli/selftune/dashboard-contract.ts CHANGED Viewed

@@ -199,7 +199,7 @@ export interface HealthResponse {
   db_path: string;
   log_dir: string;
   config_dir: string;
-  watcher_mode: "jsonl" | "none";
+  watcher_mode: "wal" | "jsonl" | "none";
   process_mode: "standalone" | "dev-server" | "test";
   host: string;
   port: number;

package/cli/selftune/dashboard-server.ts CHANGED Viewed

@@ -17,16 +17,11 @@
  */
 import type { Database } from "bun:sqlite";
-import { existsSync, type FSWatcher, watch as fsWatch, readFileSync } from "node:fs";
+import { existsSync, readFileSync, unwatchFile, watchFile } from "node:fs";
 import { dirname, extname, isAbsolute, join, relative, resolve } from "node:path";
 import type { BadgeFormat } from "./badge/badge-svg.js";
-import {
-  EVOLUTION_AUDIT_LOG,
-  LOG_DIR,
-  QUERY_LOG,
-  SELFTUNE_CONFIG_DIR,
-  TELEMETRY_LOG,
-} from "./constants.js";
+import { LOG_DIR, SELFTUNE_CONFIG_DIR } from "./constants.js";
 import type {
   HealthResponse,
   OverviewResponse,
@@ -237,14 +232,14 @@ export async function startDashboardServer(
     }
   }, SSE_KEEPALIVE_MS);
-  // -- File watchers on JSONL logs for push-based updates ---------------------
-  const WATCHED_LOGS = [TELEMETRY_LOG, QUERY_LOG, EVOLUTION_AUDIT_LOG];
-  const watchedLogPaths = new Set(WATCHED_LOGS);
+  // -- SQLite WAL watcher for push-based updates ------------------------------
+  const walPath = `${DB_PATH}-wal`;
+  let walWatcherActive = false;
   let fsDebounceTimer: ReturnType<typeof setTimeout> | null = null;
   const FS_DEBOUNCE_MS = 500;
-  function onLogFileChange(): void {
+  function onWALChange(): void {
     if (fsDebounceTimer) return;
     fsDebounceTimer = setTimeout(() => {
       fsDebounceTimer = null;
@@ -253,47 +248,11 @@ export async function startDashboardServer(
     }, FS_DEBOUNCE_MS);
   }
-  const fileWatchers: FSWatcher[] = [];
-  const watchedFiles = new Set<string>();
-  let directoryWatcherActive = false;
-  function registerFileWatcher(logPath: string): void {
-    if (watchedFiles.has(logPath) || !existsSync(logPath)) return;
-    try {
-      fileWatchers.push(fsWatch(logPath, onLogFileChange));
-      watchedFiles.add(logPath);
-    } catch {
-      // Non-fatal: fall back to polling if watch fails
-    }
-  }
-  for (const logPath of WATCHED_LOGS) {
-    registerFileWatcher(logPath);
-  }
-  try {
-    fileWatchers.push(
-      fsWatch(LOG_DIR, (_eventType, filename) => {
-        if (typeof filename !== "string" || filename.length === 0) return;
-        const fullPath = join(LOG_DIR, filename);
-        if (!watchedLogPaths.has(fullPath)) return;
-        registerFileWatcher(fullPath);
-        onLogFileChange();
-      }),
-    );
-    directoryWatcherActive = true;
-  } catch {
-    directoryWatcherActive = false;
-  }
+  watchFile(walPath, { interval: 500 }, onWALChange);
+  walWatcherActive = true;
   function getWatcherMode(): HealthResponse["watcher_mode"] {
-    return directoryWatcherActive || watchedFiles.size > 0 ? "jsonl" : "none";
-  }
-  if (runtimeMode !== "test" && getWatcherMode() === "jsonl") {
-    console.warn(
-      "Dashboard freshness mode: JSONL watcher invalidation (legacy). Live updates can miss SQLite-only writes until WAL cutover lands.",
-    );
+    return walWatcherActive ? "wal" : "none";
   }
   let cachedStatusResult: StatusResult | null = null;
@@ -572,7 +531,7 @@ export async function startDashboardServer(
   // Graceful shutdown
   const shutdownHandler = () => {
-    for (const w of fileWatchers) w.close();
+    unwatchFile(walPath, onWALChange);
     clearInterval(sseKeepaliveTimer);
     for (const c of sseClients) {
       try {

package/cli/selftune/eval/hooks-to-evals.ts CHANGED Viewed

@@ -4,19 +4,24 @@
  *
  * Converts hook logs into trigger eval sets compatible with run_eval / run_loop.
  *
- * Three input logs (all written automatically by hooks):
- *   ~/.claude/skill_usage_log.jsonl      - queries that DID trigger a skill
- *   ~/.claude/all_queries_log.jsonl      - ALL queries, triggered or not
- *   ~/.claude/session_telemetry_log.jsonl - per-session process metrics (Stop hook)
+ * Default read path is SQLite (via localdb/queries). JSONL fallback is used only
+ * when custom --skill-log / --query-log / --telemetry-log paths are supplied
+ * (test/custom-path override).
+ *
+ * Three underlying log sources (all written automatically by hooks):
+ *   skill_usage     - queries that DID trigger a skill
+ *   query_log       - ALL queries, triggered or not
+ *   session_telemetry - per-session process metrics (Stop hook)
  *
  * For a given skill:
- *   Positives (should_trigger=true)  -> queries in skill_usage_log for that skill
- *   Negatives (should_trigger=false) -> queries in all_queries_log that never triggered
+ *   Positives (should_trigger=true)  -> queries in skill_usage for that skill
+ *   Negatives (should_trigger=false) -> queries in query_log that never triggered
  *                                       that skill (cross-skill AND untriggered queries)
  */
 import { writeFileSync } from "node:fs";
 import { parseArgs } from "node:util";
 import { GENERIC_NEGATIVES, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
 import { getDb } from "../localdb/db.js";
 import {
@@ -468,6 +473,7 @@ export async function cliMain(): Promise<void> {
   let queryRecords: QueryLogRecord[];
   let telemetryRecords: SessionTelemetryRecord[];
+  // SQLite is the default path; JSONL fallback only for custom --*-log overrides
   if (
     skillLogPath === SKILL_LOG &&
     queryLogPath === QUERY_LOG &&
@@ -478,6 +484,7 @@ export async function cliMain(): Promise<void> {
     queryRecords = queryQueryLog(db) as QueryLogRecord[];
     telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
   } else {
+    // test/custom-path fallback
     skillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
     queryRecords = readJsonl<QueryLogRecord>(queryLogPath);
     telemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);

package/cli/selftune/eval/import-skillsbench.ts CHANGED Viewed

@@ -13,6 +13,7 @@
 import { existsSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { parseArgs } from "node:util";
 import type { EvalEntry, SkillsBenchTask } from "../types.js";
 // ---------------------------------------------------------------------------

package/cli/selftune/eval/synthetic-evals.ts CHANGED Viewed

@@ -181,9 +181,8 @@ export async function generateSyntheticEvals(
   try {
     const { getDb } = await import("../localdb/db.js");
     const { querySkillUsageRecords, queryQueryLog } = await import("../localdb/queries.js");
-    const { isHighConfidencePositiveSkillRecord } = await import(
-      "../utils/skill-usage-confidence.js"
-    );
+    const { isHighConfidencePositiveSkillRecord } =
+      await import("../utils/skill-usage-confidence.js");
     const db = getDb();

package/cli/selftune/eval/unit-test.ts CHANGED Viewed

@@ -12,6 +12,7 @@
  */
 import { existsSync, readFileSync } from "node:fs";
 import type {
   SkillAssertion,
   SkillUnitTest,

package/cli/selftune/evolution/deploy-proposal.ts CHANGED Viewed

@@ -7,6 +7,7 @@
  */
 import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
 import type { EvolutionProposal, SkillSections } from "../types.js";
 import type { ValidationResult } from "./validate-proposal.js";

package/cli/selftune/evolution/evolve-body.ts CHANGED Viewed

@@ -25,7 +25,8 @@ import type {
   QueryLogRecord,
   SkillUsageRecord,
 } from "../types.js";
+import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
+import { callViaSubagent } from "../utils/llm-call.js";
 import { appendAuditEntry } from "./audit.js";
 import { checkConstitutionSizeOnly } from "./constitutional.js";
 import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
@@ -57,6 +58,9 @@ export interface EvolveBodyOptions {
   fewShotExamples?: string[];
   gradingResults?: GradingResult[];
   validationModel?: string;
+  teacherEffort?: EffortLevel;
+  /** Run evolution-reviewer subagent as Gate 4 before deployment. */
+  useReviewer?: boolean;
 }
 export interface EvolveBodyResult {
@@ -89,6 +93,7 @@ export interface EvolveBodyDeps {
   readEffectiveSkillUsageRecords?: () => SkillUsageRecord[];
   readFileSync?: typeof readFileSync;
   writeFileSync?: (path: string, data: string, encoding: string) => void;
+  callViaSubagent?: (options: SubagentCallOptions) => Promise<string>;
 }
 // ---------------------------------------------------------------------------
@@ -110,6 +115,19 @@ function createAuditEntry(
   };
 }
+// ---------------------------------------------------------------------------
+// Pipeline defaults — enforced even when the calling agent omits flags
+// ---------------------------------------------------------------------------
+/** Default teacher model: Opus 4.6 for highest-quality proposals. */
+const DEFAULT_TEACHER_MODEL = "opus";
+/** Default student model: Haiku for cheap, fast validation gates. */
+const DEFAULT_STUDENT_MODEL = "haiku";
+/** Default teacher effort: extended thinking for multi-constraint reasoning. */
+const DEFAULT_TEACHER_EFFORT: EffortLevel = "high";
 // ---------------------------------------------------------------------------
 // Main orchestrator
 // ---------------------------------------------------------------------------
@@ -124,8 +142,6 @@ export async function evolveBody(
     target,
     teacherAgent,
     studentAgent,
-    teacherModel,
-    studentModel,
     evalSetPath,
     dryRun,
     maxIterations,
@@ -133,6 +149,11 @@ export async function evolveBody(
     fewShotExamples,
   } = options;
+  // Apply pipeline defaults for models/effort when not explicitly provided
+  const teacherModel = options.teacherModel ?? DEFAULT_TEACHER_MODEL;
+  const studentModel = options.studentModel ?? DEFAULT_STUDENT_MODEL;
+  const teacherEffort = options.teacherEffort ?? DEFAULT_TEACHER_EFFORT;
   // Resolve injectable dependencies
   const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
   const _generateBodyProposal = _deps.generateBodyProposal ?? generateBodyProposal;
@@ -151,6 +172,7 @@ export async function evolveBody(
     });
   const _readFileSync = _deps.readFileSync ?? readFileSync;
   const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
+  const _callViaSubagent = _deps.callViaSubagent ?? callViaSubagent;
   const auditEntries: EvolutionAuditEntry[] = [];
@@ -306,6 +328,7 @@ export async function evolveBody(
             skillPath,
             teacherAgent,
             teacherModel,
+            teacherEffort,
           );
         } else {
           proposal = await _generateBodyProposal(
@@ -318,6 +341,7 @@ export async function evolveBody(
             teacherModel,
             fewShotExamples,
             executionContext,
+            teacherEffort,
           );
         }
       } else if (lastProposal && lastValidation) {
@@ -327,6 +351,7 @@ export async function evolveBody(
           lastValidation,
           teacherAgent,
           teacherModel,
+          options.teacherEffort,
         );
       } else {
         break;
@@ -496,7 +521,63 @@ export async function evolveBody(
       }
     }
-    // Step 5: Deploy or dry-run
+    // Step 5: Optional evolution-reviewer gate (Gate 4)
+    if (options.useReviewer && lastProposal && lastValidation?.improved) {
+      try {
+        const reviewPrompt = [
+          `Review this ${target} evolution proposal for the "${skillName}" skill.`,
+          ``,
+          `Proposal ID: ${lastProposal.proposal_id}`,
+          `Skill path: ${skillPath}`,
+          `Target: ${target}`,
+          `Confidence: ${lastProposal.confidence}`,
+          `Validation: ${lastValidation.gates_passed}/${lastValidation.gates_total} gates passed`,
+          `Regressions: ${lastValidation.regressions.length > 0 ? lastValidation.regressions.join(", ") : "none"}`,
+          ``,
+          `Original content:`,
+          lastProposal.original_body,
+          ``,
+          `Proposed content:`,
+          lastProposal.proposed_body,
+          ``,
+          `Rationale: ${lastProposal.rationale}`,
+        ].join("\n");
+        const reviewOutput = await _callViaSubagent({
+          agentName: "evolution-reviewer",
+          prompt: reviewPrompt,
+          maxTurns: 8,
+          allowedTools: ["Read", "Grep", "Glob", "Bash"],
+        });
+        const isRejected = /\bREJECT\b/.test(reviewOutput) && !/\bAPPROVE\b/.test(reviewOutput);
+        recordAudit(
+          lastProposal.proposal_id,
+          isRejected ? "rejected" : "validated",
+          `Evolution reviewer: ${isRejected ? "REJECTED" : "APPROVED"}`,
+        );
+        if (isRejected) {
+          return {
+            proposal: lastProposal,
+            validation: lastValidation,
+            deployed: false,
+            auditEntries,
+            reason: `Evolution reviewer rejected proposal: ${reviewOutput.slice(0, 500)}`,
+          };
+        }
+      } catch (reviewError) {
+        // Fail-open: if reviewer crashes, log it and continue to deploy
+        const msg = reviewError instanceof Error ? reviewError.message : String(reviewError);
+        recordAudit(
+          lastProposal.proposal_id,
+          "validated",
+          `Evolution reviewer failed (fail-open): ${msg}`,
+        );
+      }
+    }
+    // Step 6: Deploy or dry-run
     if (dryRun) {
       return {
         proposal: lastProposal,
@@ -594,6 +675,8 @@ export async function cliMain(): Promise<void> {
       "task-description": { type: "string" },
       "few-shot": { type: "string" },
       "validation-model": { type: "string" },
+      "teacher-effort": { type: "string", default: "high" },
+      review: { type: "boolean", default: false },
       help: { type: "boolean", default: false },
     },
     strict: true,
@@ -611,8 +694,8 @@ Options:
   --target            Evolution target: body, routing (default: body)
   --teacher-agent     Teacher agent CLI (claude, codex, etc.)
   --student-agent     Student agent CLI for validation
-  --teacher-model     Model flag for teacher agent
-  --student-model     Model flag for student agent
+  --teacher-model     Model flag for teacher agent (default: opus)
+  --student-model     Model flag for student agent (default: haiku)
   --eval-set          Path to eval set JSON
   --dry-run           Validate without deploying
   --max-iterations    Max refinement iterations (default: 3)
@@ -620,6 +703,8 @@ Options:
   --task-description  Optional task description context
   --few-shot          Comma-separated paths to example skill files
   --validation-model  Model for trigger-check validation calls (overrides --student-model for validation)
+  --teacher-effort    Effort level for teacher LLM: low, medium, high, max (default: high)
+  --review            Run evolution-reviewer subagent before deployment (Gate 4)
   --help              Show this help message`);
     process.exit(0);
   }
@@ -669,6 +754,8 @@ Options:
     fewShotExamples,
     gradingResults,
     validationModel: values["validation-model"],
+    teacherEffort: (values["teacher-effort"] as EffortLevel) ?? "high",
+    useReviewer: values.review ?? false,
   });
   console.log(JSON.stringify(result, null, 2));

package/cli/selftune/evolution/evolve.ts CHANGED Viewed

@@ -37,7 +37,6 @@ import type {
   SkillUsageRecord,
 } from "../types.js";
 import { parseFrontmatter, replaceFrontmatterDescription } from "../utils/frontmatter.js";
 import { createEvolveTUI } from "../utils/tui.js";
 import { appendAuditEntry } from "./audit.js";
 import { checkConstitution } from "./constitutional.js";

package/cli/selftune/evolution/propose-body.ts CHANGED Viewed

@@ -7,7 +7,7 @@
  */
 import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
-import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
+import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
 // ---------------------------------------------------------------------------
 // System prompt
@@ -160,6 +160,7 @@ export async function generateBodyProposal(
   modelFlag?: string,
   fewShotExamples?: string[],
   executionContext?: ExecutionContext,
+  effort?: EffortLevel,
 ): Promise<BodyEvolutionProposal> {
   const prompt = buildBodyGenerationPrompt(
     currentContent,
@@ -169,7 +170,7 @@ export async function generateBodyProposal(
     fewShotExamples,
     executionContext,
   );
-  const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag);
+  const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag, effort);
   const { proposed_body, rationale, confidence } = parseBodyProposalResponse(rawResponse);
   return {

package/cli/selftune/evolution/propose-routing.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  */
 import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
-import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
+import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
 // ---------------------------------------------------------------------------
 // System prompt
@@ -139,6 +139,7 @@ export async function generateRoutingProposal(
   skillPath: string,
   agent: string,
   modelFlag?: string,
+  effort?: EffortLevel,
 ): Promise<BodyEvolutionProposal> {
   const prompt = buildRoutingProposalPrompt(
     currentRouting,
@@ -147,7 +148,7 @@ export async function generateRoutingProposal(
     missedQueries,
     skillName,
   );
-  const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag);
+  const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag, effort);
   const { proposed_routing, rationale, confidence } = parseRoutingProposalResponse(rawResponse);
   return {

package/cli/selftune/evolution/refine-body.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  */
 import type { BodyEvolutionProposal, BodyValidationResult } from "../types.js";
-import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
+import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
 // ---------------------------------------------------------------------------
 // System prompt
@@ -118,6 +118,7 @@ export async function refineBodyProposal(
   validationResult: BodyValidationResult,
   agent: string,
   modelFlag?: string,
+  effort?: EffortLevel,
 ): Promise<BodyEvolutionProposal> {
   const prompt = buildRefinementPrompt(
     proposal.proposed_body,
@@ -126,7 +127,7 @@ export async function refineBodyProposal(
     validationResult.regressions,
   );
-  const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag);
+  const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag, effort);
   const { refined_body, changes_made, confidence } = parseRefinementResponse(rawResponse);
   return {