selftune 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +35 -35
  2. package/apps/local-dashboard/dist/assets/index-BZVLv70T.js +16 -0
  3. package/apps/local-dashboard/dist/assets/{index-CRtLkBTi.css → index-Bs3Y4ixf.css} +1 -1
  4. package/apps/local-dashboard/dist/assets/{vendor-react-BQH_6WrG.js → vendor-react-BXP54cYo.js} +4 -4
  5. package/apps/local-dashboard/dist/assets/{vendor-table-dK1QMLq9.js → vendor-table-DTF_SXoy.js} +1 -1
  6. package/apps/local-dashboard/dist/assets/{vendor-ui-CO2mrx6e.js → vendor-ui-CWU0d1wd.js} +66 -66
  7. package/apps/local-dashboard/dist/index.html +15 -15
  8. package/bin/selftune.cjs +1 -1
  9. package/cli/selftune/activation-rules.ts +37 -18
  10. package/cli/selftune/agent-guidance.ts +16 -16
  11. package/cli/selftune/alpha-identity.ts +1 -2
  12. package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
  13. package/cli/selftune/alpha-upload/flush.ts +2 -2
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +106 -3
  15. package/cli/selftune/auth/device-code.ts +32 -0
  16. package/cli/selftune/auto-update.ts +12 -0
  17. package/cli/selftune/badge/badge.ts +1 -0
  18. package/cli/selftune/canonical-export.ts +5 -0
  19. package/cli/selftune/claude-agents.ts +154 -0
  20. package/cli/selftune/contribute/bundle.ts +2 -0
  21. package/cli/selftune/contribute/contribute.ts +1 -0
  22. package/cli/selftune/cron/setup.ts +2 -2
  23. package/cli/selftune/dashboard-contract.ts +1 -1
  24. package/cli/selftune/dashboard-server.ts +11 -52
  25. package/cli/selftune/eval/hooks-to-evals.ts +13 -6
  26. package/cli/selftune/eval/import-skillsbench.ts +1 -0
  27. package/cli/selftune/eval/synthetic-evals.ts +2 -3
  28. package/cli/selftune/eval/unit-test.ts +1 -0
  29. package/cli/selftune/evolution/deploy-proposal.ts +1 -0
  30. package/cli/selftune/evolution/evolve-body.ts +93 -6
  31. package/cli/selftune/evolution/evolve.ts +0 -1
  32. package/cli/selftune/evolution/propose-body.ts +3 -2
  33. package/cli/selftune/evolution/propose-routing.ts +3 -2
  34. package/cli/selftune/evolution/refine-body.ts +3 -2
  35. package/cli/selftune/export.ts +1 -0
  36. package/cli/selftune/grading/auto-grade.ts +1 -0
  37. package/cli/selftune/grading/grade-session.ts +9 -0
  38. package/cli/selftune/hooks/auto-activate.ts +6 -0
  39. package/cli/selftune/hooks/evolution-guard.ts +12 -15
  40. package/cli/selftune/hooks/prompt-log.ts +1 -0
  41. package/cli/selftune/hooks/session-stop.ts +34 -40
  42. package/cli/selftune/hooks/skill-change-guard.ts +1 -0
  43. package/cli/selftune/hooks/skill-eval.ts +1 -1
  44. package/cli/selftune/index.ts +23 -14
  45. package/cli/selftune/ingestors/claude-replay.ts +1 -0
  46. package/cli/selftune/ingestors/codex-rollout.ts +1 -0
  47. package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
  48. package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
  49. package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
  50. package/cli/selftune/init.ts +197 -96
  51. package/cli/selftune/localdb/db.ts +1 -0
  52. package/cli/selftune/localdb/direct-write.ts +93 -12
  53. package/cli/selftune/localdb/materialize.ts +2 -0
  54. package/cli/selftune/localdb/queries.ts +210 -0
  55. package/cli/selftune/localdb/schema.ts +72 -1
  56. package/cli/selftune/monitoring/watch.ts +1 -0
  57. package/cli/selftune/normalization.ts +4 -0
  58. package/cli/selftune/observability.ts +14 -7
  59. package/cli/selftune/orchestrate.ts +15 -37
  60. package/cli/selftune/repair/skill-usage.ts +7 -3
  61. package/cli/selftune/routes/orchestrate-runs.ts +1 -0
  62. package/cli/selftune/routes/overview.ts +1 -0
  63. package/cli/selftune/routes/skill-report.ts +1 -0
  64. package/cli/selftune/sync.ts +31 -1
  65. package/cli/selftune/types.ts +2 -2
  66. package/cli/selftune/uninstall.ts +412 -0
  67. package/cli/selftune/utils/canonical-log.ts +2 -0
  68. package/cli/selftune/utils/jsonl.ts +1 -0
  69. package/cli/selftune/utils/llm-call.ts +131 -3
  70. package/cli/selftune/utils/skill-log.ts +1 -0
  71. package/cli/selftune/utils/transcript.ts +1 -0
  72. package/cli/selftune/utils/trigger-check.ts +1 -1
  73. package/cli/selftune/workflows/skill-md-writer.ts +5 -5
  74. package/cli/selftune/workflows/workflows.ts +1 -0
  75. package/package.json +38 -33
  76. package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
  77. package/packages/telemetry-contract/package.json +3 -3
  78. package/packages/telemetry-contract/src/index.ts +0 -1
  79. package/packages/telemetry-contract/src/schemas.ts +6 -24
  80. package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
  81. package/packages/ui/README.md +35 -34
  82. package/packages/ui/package.json +3 -3
  83. package/packages/ui/src/components/ActivityTimeline.tsx +49 -42
  84. package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
  85. package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
  86. package/packages/ui/src/components/InfoTip.tsx +4 -3
  87. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
  88. package/packages/ui/src/components/section-cards.tsx +19 -24
  89. package/packages/ui/src/components/skill-health-grid.tsx +213 -193
  90. package/packages/ui/src/lib/constants.tsx +1 -0
  91. package/packages/ui/src/primitives/badge.tsx +12 -15
  92. package/packages/ui/src/primitives/button.tsx +7 -7
  93. package/packages/ui/src/primitives/card.tsx +15 -26
  94. package/packages/ui/src/primitives/checkbox.tsx +7 -8
  95. package/packages/ui/src/primitives/collapsible.tsx +5 -5
  96. package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
  97. package/packages/ui/src/primitives/label.tsx +6 -6
  98. package/packages/ui/src/primitives/select.tsx +28 -37
  99. package/packages/ui/src/primitives/table.tsx +17 -44
  100. package/packages/ui/src/primitives/tabs.tsx +14 -21
  101. package/packages/ui/src/primitives/tooltip.tsx +10 -22
  102. package/skill/SKILL.md +72 -59
  103. package/skill/Workflows/AlphaUpload.md +4 -4
  104. package/skill/Workflows/AutoActivation.md +11 -6
  105. package/skill/Workflows/Badge.md +22 -16
  106. package/skill/Workflows/Baseline.md +34 -36
  107. package/skill/Workflows/Composability.md +16 -11
  108. package/skill/Workflows/Contribute.md +26 -21
  109. package/skill/Workflows/Cron.md +23 -22
  110. package/skill/Workflows/Dashboard.md +40 -40
  111. package/skill/Workflows/Doctor.md +40 -34
  112. package/skill/Workflows/Evals.md +48 -47
  113. package/skill/Workflows/EvolutionMemory.md +31 -21
  114. package/skill/Workflows/Evolve.md +84 -82
  115. package/skill/Workflows/EvolveBody.md +58 -47
  116. package/skill/Workflows/Grade.md +16 -13
  117. package/skill/Workflows/ImportSkillsBench.md +9 -6
  118. package/skill/Workflows/Ingest.md +36 -21
  119. package/skill/Workflows/Initialize.md +138 -97
  120. package/skill/Workflows/Orchestrate.md +22 -16
  121. package/skill/Workflows/Replay.md +12 -7
  122. package/skill/Workflows/Rollback.md +13 -6
  123. package/skill/Workflows/Schedule.md +6 -6
  124. package/skill/Workflows/Sync.md +18 -11
  125. package/skill/Workflows/UnitTest.md +28 -17
  126. package/skill/Workflows/Watch.md +28 -21
  127. package/skill/agents/diagnosis-analyst.md +11 -0
  128. package/skill/agents/evolution-reviewer.md +15 -1
  129. package/skill/agents/integration-guide.md +10 -0
  130. package/skill/agents/pattern-analyst.md +12 -1
  131. package/skill/references/grading-methodology.md +23 -24
  132. package/skill/references/interactive-config.md +7 -7
  133. package/skill/references/invocation-taxonomy.md +22 -20
  134. package/skill/references/logs.md +20 -6
  135. package/skill/references/setup-patterns.md +4 -2
  136. package/.claude/agents/diagnosis-analyst.md +0 -156
  137. package/.claude/agents/evolution-reviewer.md +0 -180
  138. package/.claude/agents/integration-guide.md +0 -212
  139. package/.claude/agents/pattern-analyst.md +0 -160
  140. package/apps/local-dashboard/dist/assets/index-Bk9vSHHd.js +0 -15
@@ -0,0 +1,154 @@
1
+ import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync } from "node:fs";
2
+ import { homedir } from "node:os";
3
+ import { dirname, join, resolve } from "node:path";
4
+
5
+ const MANIFEST_FILENAME = ".selftune-manifest.json";
6
+
7
+ const LEGACY_SELFTUNE_AGENT_FILES = [
8
+ "diagnosis-analyst.md",
9
+ "evolution-reviewer.md",
10
+ "integration-guide.md",
11
+ "pattern-analyst.md",
12
+ ] as const;
13
+
14
+ const BUNDLED_AGENT_DIR = resolve(dirname(import.meta.path), "..", "..", "skill", "agents");
15
+
16
+ interface AgentManifest {
17
+ version: 1;
18
+ files: string[];
19
+ synced_at: string;
20
+ }
21
+
22
+ function readManifest(path: string): AgentManifest | null {
23
+ try {
24
+ if (!existsSync(path)) return null;
25
+ const parsed = JSON.parse(readFileSync(path, "utf-8")) as Partial<AgentManifest>;
26
+ if (!Array.isArray(parsed.files)) return null;
27
+ return {
28
+ version: 1,
29
+ files: parsed.files.filter((name): name is string => typeof name === "string"),
30
+ synced_at: typeof parsed.synced_at === "string" ? parsed.synced_at : "",
31
+ };
32
+ } catch {
33
+ return null;
34
+ }
35
+ }
36
+
37
+ function writeManifest(path: string, files: string[]): void {
38
+ const manifest: AgentManifest = {
39
+ version: 1,
40
+ files: [...files].sort(),
41
+ synced_at: new Date().toISOString(),
42
+ };
43
+ writeFileSync(path, JSON.stringify(manifest, null, 2), "utf-8");
44
+ }
45
+
46
+ function readTextIfExists(path: string): string | null {
47
+ try {
48
+ if (!existsSync(path)) return null;
49
+ return readFileSync(path, "utf-8");
50
+ } catch {
51
+ return null;
52
+ }
53
+ }
54
+
55
+ export function getClaudeAgentsDir(homeDir = homedir()): string {
56
+ return join(homeDir, ".claude", "agents");
57
+ }
58
+
59
+ export function getClaudeAgentManifestPath(homeDir = homedir()): string {
60
+ return join(getClaudeAgentsDir(homeDir), MANIFEST_FILENAME);
61
+ }
62
+
63
+ export function listBundledAgentFiles(sourceDir = BUNDLED_AGENT_DIR): string[] {
64
+ try {
65
+ if (!existsSync(sourceDir)) return [];
66
+ return readdirSync(sourceDir)
67
+ .filter((name) => name.endsWith(".md"))
68
+ .sort();
69
+ } catch {
70
+ return [];
71
+ }
72
+ }
73
+
74
+ export function installAgentFiles(options?: {
75
+ homeDir?: string;
76
+ force?: boolean;
77
+ sourceDir?: string;
78
+ }): string[] {
79
+ const homeDir = options?.homeDir ?? homedir();
80
+ const targetDir = getClaudeAgentsDir(homeDir);
81
+ const manifestPath = getClaudeAgentManifestPath(homeDir);
82
+ const sourceDir = options?.sourceDir ?? BUNDLED_AGENT_DIR;
83
+ const sourceFiles = listBundledAgentFiles(sourceDir);
84
+ if (sourceFiles.length === 0) return [];
85
+
86
+ mkdirSync(targetDir, { recursive: true });
87
+
88
+ const manifest = readManifest(manifestPath);
89
+ const managedFiles = new Set<string>([
90
+ ...LEGACY_SELFTUNE_AGENT_FILES,
91
+ ...(manifest?.files ?? []),
92
+ ]);
93
+ const sourceSet = new Set(sourceFiles);
94
+ const changed = new Set<string>();
95
+
96
+ for (const staleFile of managedFiles) {
97
+ if (sourceSet.has(staleFile)) continue;
98
+ const stalePath = join(targetDir, staleFile);
99
+ if (existsSync(stalePath)) {
100
+ rmSync(stalePath, { force: true });
101
+ changed.add(staleFile);
102
+ }
103
+ }
104
+
105
+ for (const fileName of sourceFiles) {
106
+ const sourcePath = join(sourceDir, fileName);
107
+ const targetPath = join(targetDir, fileName);
108
+ const sourceContent = readTextIfExists(sourcePath);
109
+ if (sourceContent === null) continue;
110
+ const existingContent = readTextIfExists(targetPath);
111
+
112
+ if (options?.force || existingContent !== sourceContent) {
113
+ writeFileSync(targetPath, sourceContent, "utf-8");
114
+ changed.add(fileName);
115
+ }
116
+ }
117
+
118
+ writeManifest(manifestPath, sourceFiles);
119
+ return [...changed].sort();
120
+ }
121
+
122
+ export function removeInstalledAgentFiles(options?: { homeDir?: string; dryRun?: boolean }): {
123
+ removed: number;
124
+ files: string[];
125
+ } {
126
+ const homeDir = options?.homeDir ?? homedir();
127
+ const targetDir = getClaudeAgentsDir(homeDir);
128
+ const manifestPath = getClaudeAgentManifestPath(homeDir);
129
+ const manifest = readManifest(manifestPath);
130
+ const managedFiles = new Set<string>([
131
+ ...LEGACY_SELFTUNE_AGENT_FILES,
132
+ ...listBundledAgentFiles(),
133
+ ...(manifest?.files ?? []),
134
+ ]);
135
+ const removed: string[] = [];
136
+
137
+ for (const fileName of managedFiles) {
138
+ const targetPath = join(targetDir, fileName);
139
+ if (!existsSync(targetPath)) continue;
140
+ if (!options?.dryRun) {
141
+ rmSync(targetPath, { force: true });
142
+ }
143
+ removed.push(targetPath);
144
+ }
145
+
146
+ if (existsSync(manifestPath)) {
147
+ if (!options?.dryRun) {
148
+ rmSync(manifestPath, { force: true });
149
+ }
150
+ removed.push(manifestPath);
151
+ }
152
+
153
+ return { removed: removed.length, files: removed };
154
+ }
@@ -8,6 +8,7 @@ import { randomUUID } from "node:crypto";
8
8
  import { existsSync, readdirSync, readFileSync } from "node:fs";
9
9
  import { homedir } from "node:os";
10
10
  import { join } from "node:path";
11
+
11
12
  import {
12
13
  EVOLUTION_AUDIT_LOG,
13
14
  QUERY_LOG,
@@ -224,6 +225,7 @@ export function assembleBundle(options: {
224
225
  let allEvolutionRecords: EvolutionAuditEntry[];
225
226
 
226
227
  if (useJsonl) {
228
+ // JSONL fallback: only used when custom (non-default) log paths are provided (test isolation)
227
229
  allSkillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
228
230
  allQueryRecords = readJsonl<QueryLogRecord>(queryLogPath);
229
231
  allTelemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);
@@ -10,6 +10,7 @@
10
10
  import { spawnSync } from "node:child_process";
11
11
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
12
12
  import { parseArgs } from "node:util";
13
+
13
14
  import { CONTRIBUTIONS_DIR } from "../constants.js";
14
15
  import { assembleBundle } from "./bundle.js";
15
16
  import { sanitizeBundle } from "./sanitize.js";
@@ -46,10 +46,10 @@ export const DEFAULT_CRON_JOBS: CronJobConfig[] = [
46
46
  },
47
47
  {
48
48
  name: "selftune-orchestrate",
49
- cron: "0 */6 * * *",
49
+ cron: "0 */2 * * *",
50
50
  message:
51
51
  "Run selftune orchestrate --max-skills 3. This performs source-truth sync, selects candidate skills, evolves validated low-risk descriptions autonomously, and watches recent deployments for regressions.",
52
- description: "Autonomous improvement loop every 6 hours",
52
+ description: "Autonomous improvement loop every 2 hours",
53
53
  },
54
54
  ];
55
55
 
@@ -199,7 +199,7 @@ export interface HealthResponse {
199
199
  db_path: string;
200
200
  log_dir: string;
201
201
  config_dir: string;
202
- watcher_mode: "jsonl" | "none";
202
+ watcher_mode: "wal" | "jsonl" | "none";
203
203
  process_mode: "standalone" | "dev-server" | "test";
204
204
  host: string;
205
205
  port: number;
@@ -17,16 +17,11 @@
17
17
  */
18
18
 
19
19
  import type { Database } from "bun:sqlite";
20
- import { existsSync, type FSWatcher, watch as fsWatch, readFileSync } from "node:fs";
20
+ import { existsSync, readFileSync, unwatchFile, watchFile } from "node:fs";
21
21
  import { dirname, extname, isAbsolute, join, relative, resolve } from "node:path";
22
+
22
23
  import type { BadgeFormat } from "./badge/badge-svg.js";
23
- import {
24
- EVOLUTION_AUDIT_LOG,
25
- LOG_DIR,
26
- QUERY_LOG,
27
- SELFTUNE_CONFIG_DIR,
28
- TELEMETRY_LOG,
29
- } from "./constants.js";
24
+ import { LOG_DIR, SELFTUNE_CONFIG_DIR } from "./constants.js";
30
25
  import type {
31
26
  HealthResponse,
32
27
  OverviewResponse,
@@ -237,14 +232,14 @@ export async function startDashboardServer(
237
232
  }
238
233
  }, SSE_KEEPALIVE_MS);
239
234
 
240
- // -- File watchers on JSONL logs for push-based updates ---------------------
241
- const WATCHED_LOGS = [TELEMETRY_LOG, QUERY_LOG, EVOLUTION_AUDIT_LOG];
242
- const watchedLogPaths = new Set(WATCHED_LOGS);
235
+ // -- SQLite WAL watcher for push-based updates ------------------------------
236
+ const walPath = `${DB_PATH}-wal`;
237
+ let walWatcherActive = false;
243
238
 
244
239
  let fsDebounceTimer: ReturnType<typeof setTimeout> | null = null;
245
240
  const FS_DEBOUNCE_MS = 500;
246
241
 
247
- function onLogFileChange(): void {
242
+ function onWALChange(): void {
248
243
  if (fsDebounceTimer) return;
249
244
  fsDebounceTimer = setTimeout(() => {
250
245
  fsDebounceTimer = null;
@@ -253,47 +248,11 @@ export async function startDashboardServer(
253
248
  }, FS_DEBOUNCE_MS);
254
249
  }
255
250
 
256
- const fileWatchers: FSWatcher[] = [];
257
- const watchedFiles = new Set<string>();
258
- let directoryWatcherActive = false;
259
-
260
- function registerFileWatcher(logPath: string): void {
261
- if (watchedFiles.has(logPath) || !existsSync(logPath)) return;
262
- try {
263
- fileWatchers.push(fsWatch(logPath, onLogFileChange));
264
- watchedFiles.add(logPath);
265
- } catch {
266
- // Non-fatal: fall back to polling if watch fails
267
- }
268
- }
269
-
270
- for (const logPath of WATCHED_LOGS) {
271
- registerFileWatcher(logPath);
272
- }
273
-
274
- try {
275
- fileWatchers.push(
276
- fsWatch(LOG_DIR, (_eventType, filename) => {
277
- if (typeof filename !== "string" || filename.length === 0) return;
278
- const fullPath = join(LOG_DIR, filename);
279
- if (!watchedLogPaths.has(fullPath)) return;
280
- registerFileWatcher(fullPath);
281
- onLogFileChange();
282
- }),
283
- );
284
- directoryWatcherActive = true;
285
- } catch {
286
- directoryWatcherActive = false;
287
- }
251
+ watchFile(walPath, { interval: 500 }, onWALChange);
252
+ walWatcherActive = true;
288
253
 
289
254
  function getWatcherMode(): HealthResponse["watcher_mode"] {
290
- return directoryWatcherActive || watchedFiles.size > 0 ? "jsonl" : "none";
291
- }
292
-
293
- if (runtimeMode !== "test" && getWatcherMode() === "jsonl") {
294
- console.warn(
295
- "Dashboard freshness mode: JSONL watcher invalidation (legacy). Live updates can miss SQLite-only writes until WAL cutover lands.",
296
- );
255
+ return walWatcherActive ? "wal" : "none";
297
256
  }
298
257
 
299
258
  let cachedStatusResult: StatusResult | null = null;
@@ -572,7 +531,7 @@ export async function startDashboardServer(
572
531
 
573
532
  // Graceful shutdown
574
533
  const shutdownHandler = () => {
575
- for (const w of fileWatchers) w.close();
534
+ unwatchFile(walPath, onWALChange);
576
535
  clearInterval(sseKeepaliveTimer);
577
536
  for (const c of sseClients) {
578
537
  try {
@@ -4,19 +4,24 @@
4
4
  *
5
5
  * Converts hook logs into trigger eval sets compatible with run_eval / run_loop.
6
6
  *
7
- * Three input logs (all written automatically by hooks):
8
- * ~/.claude/skill_usage_log.jsonl - queries that DID trigger a skill
9
- * ~/.claude/all_queries_log.jsonl - ALL queries, triggered or not
10
- * ~/.claude/session_telemetry_log.jsonl - per-session process metrics (Stop hook)
7
+ * Default read path is SQLite (via localdb/queries). JSONL fallback is used only
8
+ * when custom --skill-log / --query-log / --telemetry-log paths are supplied
9
+ * (test/custom-path override).
10
+ *
11
+ * Three underlying log sources (all written automatically by hooks):
12
+ * skill_usage - queries that DID trigger a skill
13
+ * query_log - ALL queries, triggered or not
14
+ * session_telemetry - per-session process metrics (Stop hook)
11
15
  *
12
16
  * For a given skill:
13
- * Positives (should_trigger=true) -> queries in skill_usage_log for that skill
14
- * Negatives (should_trigger=false) -> queries in all_queries_log that never triggered
17
+ * Positives (should_trigger=true) -> queries in skill_usage for that skill
18
+ * Negatives (should_trigger=false) -> queries in query_log that never triggered
15
19
  * that skill (cross-skill AND untriggered queries)
16
20
  */
17
21
 
18
22
  import { writeFileSync } from "node:fs";
19
23
  import { parseArgs } from "node:util";
24
+
20
25
  import { GENERIC_NEGATIVES, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
21
26
  import { getDb } from "../localdb/db.js";
22
27
  import {
@@ -468,6 +473,7 @@ export async function cliMain(): Promise<void> {
468
473
  let queryRecords: QueryLogRecord[];
469
474
  let telemetryRecords: SessionTelemetryRecord[];
470
475
 
476
+ // SQLite is the default path; JSONL fallback only for custom --*-log overrides
471
477
  if (
472
478
  skillLogPath === SKILL_LOG &&
473
479
  queryLogPath === QUERY_LOG &&
@@ -478,6 +484,7 @@ export async function cliMain(): Promise<void> {
478
484
  queryRecords = queryQueryLog(db) as QueryLogRecord[];
479
485
  telemetryRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
480
486
  } else {
487
+ // test/custom-path fallback
481
488
  skillRecords = readJsonl<SkillUsageRecord>(skillLogPath);
482
489
  queryRecords = readJsonl<QueryLogRecord>(queryLogPath);
483
490
  telemetryRecords = readJsonl<SessionTelemetryRecord>(telemetryLogPath);
@@ -13,6 +13,7 @@
13
13
  import { existsSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
14
14
  import { join } from "node:path";
15
15
  import { parseArgs } from "node:util";
16
+
16
17
  import type { EvalEntry, SkillsBenchTask } from "../types.js";
17
18
 
18
19
  // ---------------------------------------------------------------------------
@@ -181,9 +181,8 @@ export async function generateSyntheticEvals(
181
181
  try {
182
182
  const { getDb } = await import("../localdb/db.js");
183
183
  const { querySkillUsageRecords, queryQueryLog } = await import("../localdb/queries.js");
184
- const { isHighConfidencePositiveSkillRecord } = await import(
185
- "../utils/skill-usage-confidence.js"
186
- );
184
+ const { isHighConfidencePositiveSkillRecord } =
185
+ await import("../utils/skill-usage-confidence.js");
187
186
 
188
187
  const db = getDb();
189
188
 
@@ -12,6 +12,7 @@
12
12
  */
13
13
 
14
14
  import { existsSync, readFileSync } from "node:fs";
15
+
15
16
  import type {
16
17
  SkillAssertion,
17
18
  SkillUnitTest,
@@ -7,6 +7,7 @@
7
7
  */
8
8
 
9
9
  import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
10
+
10
11
  import type { EvolutionProposal, SkillSections } from "../types.js";
11
12
  import type { ValidationResult } from "./validate-proposal.js";
12
13
 
@@ -25,7 +25,8 @@ import type {
25
25
  QueryLogRecord,
26
26
  SkillUsageRecord,
27
27
  } from "../types.js";
28
-
28
+ import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
29
+ import { callViaSubagent } from "../utils/llm-call.js";
29
30
  import { appendAuditEntry } from "./audit.js";
30
31
  import { checkConstitutionSizeOnly } from "./constitutional.js";
31
32
  import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
@@ -57,6 +58,9 @@ export interface EvolveBodyOptions {
57
58
  fewShotExamples?: string[];
58
59
  gradingResults?: GradingResult[];
59
60
  validationModel?: string;
61
+ teacherEffort?: EffortLevel;
62
+ /** Run evolution-reviewer subagent as Gate 4 before deployment. */
63
+ useReviewer?: boolean;
60
64
  }
61
65
 
62
66
  export interface EvolveBodyResult {
@@ -89,6 +93,7 @@ export interface EvolveBodyDeps {
89
93
  readEffectiveSkillUsageRecords?: () => SkillUsageRecord[];
90
94
  readFileSync?: typeof readFileSync;
91
95
  writeFileSync?: (path: string, data: string, encoding: string) => void;
96
+ callViaSubagent?: (options: SubagentCallOptions) => Promise<string>;
92
97
  }
93
98
 
94
99
  // ---------------------------------------------------------------------------
@@ -110,6 +115,19 @@ function createAuditEntry(
110
115
  };
111
116
  }
112
117
 
118
+ // ---------------------------------------------------------------------------
119
+ // Pipeline defaults — enforced even when the calling agent omits flags
120
+ // ---------------------------------------------------------------------------
121
+
122
+ /** Default teacher model: Opus 4.6 for highest-quality proposals. */
123
+ const DEFAULT_TEACHER_MODEL = "opus";
124
+
125
+ /** Default student model: Haiku for cheap, fast validation gates. */
126
+ const DEFAULT_STUDENT_MODEL = "haiku";
127
+
128
+ /** Default teacher effort: extended thinking for multi-constraint reasoning. */
129
+ const DEFAULT_TEACHER_EFFORT: EffortLevel = "high";
130
+
113
131
  // ---------------------------------------------------------------------------
114
132
  // Main orchestrator
115
133
  // ---------------------------------------------------------------------------
@@ -124,8 +142,6 @@ export async function evolveBody(
124
142
  target,
125
143
  teacherAgent,
126
144
  studentAgent,
127
- teacherModel,
128
- studentModel,
129
145
  evalSetPath,
130
146
  dryRun,
131
147
  maxIterations,
@@ -133,6 +149,11 @@ export async function evolveBody(
133
149
  fewShotExamples,
134
150
  } = options;
135
151
 
152
+ // Apply pipeline defaults for models/effort when not explicitly provided
153
+ const teacherModel = options.teacherModel ?? DEFAULT_TEACHER_MODEL;
154
+ const studentModel = options.studentModel ?? DEFAULT_STUDENT_MODEL;
155
+ const teacherEffort = options.teacherEffort ?? DEFAULT_TEACHER_EFFORT;
156
+
136
157
  // Resolve injectable dependencies
137
158
  const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
138
159
  const _generateBodyProposal = _deps.generateBodyProposal ?? generateBodyProposal;
@@ -151,6 +172,7 @@ export async function evolveBody(
151
172
  });
152
173
  const _readFileSync = _deps.readFileSync ?? readFileSync;
153
174
  const _writeFileSync = _deps.writeFileSync ?? (await import("node:fs")).writeFileSync;
175
+ const _callViaSubagent = _deps.callViaSubagent ?? callViaSubagent;
154
176
 
155
177
  const auditEntries: EvolutionAuditEntry[] = [];
156
178
 
@@ -306,6 +328,7 @@ export async function evolveBody(
306
328
  skillPath,
307
329
  teacherAgent,
308
330
  teacherModel,
331
+ teacherEffort,
309
332
  );
310
333
  } else {
311
334
  proposal = await _generateBodyProposal(
@@ -318,6 +341,7 @@ export async function evolveBody(
318
341
  teacherModel,
319
342
  fewShotExamples,
320
343
  executionContext,
344
+ teacherEffort,
321
345
  );
322
346
  }
323
347
  } else if (lastProposal && lastValidation) {
@@ -327,6 +351,7 @@ export async function evolveBody(
327
351
  lastValidation,
328
352
  teacherAgent,
329
353
  teacherModel,
354
+ options.teacherEffort,
330
355
  );
331
356
  } else {
332
357
  break;
@@ -496,7 +521,63 @@ export async function evolveBody(
496
521
  }
497
522
  }
498
523
 
499
- // Step 5: Deploy or dry-run
524
+ // Step 5: Optional evolution-reviewer gate (Gate 4)
525
+ if (options.useReviewer && lastProposal && lastValidation?.improved) {
526
+ try {
527
+ const reviewPrompt = [
528
+ `Review this ${target} evolution proposal for the "${skillName}" skill.`,
529
+ ``,
530
+ `Proposal ID: ${lastProposal.proposal_id}`,
531
+ `Skill path: ${skillPath}`,
532
+ `Target: ${target}`,
533
+ `Confidence: ${lastProposal.confidence}`,
534
+ `Validation: ${lastValidation.gates_passed}/${lastValidation.gates_total} gates passed`,
535
+ `Regressions: ${lastValidation.regressions.length > 0 ? lastValidation.regressions.join(", ") : "none"}`,
536
+ ``,
537
+ `Original content:`,
538
+ lastProposal.original_body,
539
+ ``,
540
+ `Proposed content:`,
541
+ lastProposal.proposed_body,
542
+ ``,
543
+ `Rationale: ${lastProposal.rationale}`,
544
+ ].join("\n");
545
+
546
+ const reviewOutput = await _callViaSubagent({
547
+ agentName: "evolution-reviewer",
548
+ prompt: reviewPrompt,
549
+ maxTurns: 8,
550
+ allowedTools: ["Read", "Grep", "Glob", "Bash"],
551
+ });
552
+
553
+ const isRejected = /\bREJECT\b/.test(reviewOutput) && !/\bAPPROVE\b/.test(reviewOutput);
554
+ recordAudit(
555
+ lastProposal.proposal_id,
556
+ isRejected ? "rejected" : "validated",
557
+ `Evolution reviewer: ${isRejected ? "REJECTED" : "APPROVED"}`,
558
+ );
559
+
560
+ if (isRejected) {
561
+ return {
562
+ proposal: lastProposal,
563
+ validation: lastValidation,
564
+ deployed: false,
565
+ auditEntries,
566
+ reason: `Evolution reviewer rejected proposal: ${reviewOutput.slice(0, 500)}`,
567
+ };
568
+ }
569
+ } catch (reviewError) {
570
+ // Fail-open: if reviewer crashes, log it and continue to deploy
571
+ const msg = reviewError instanceof Error ? reviewError.message : String(reviewError);
572
+ recordAudit(
573
+ lastProposal.proposal_id,
574
+ "validated",
575
+ `Evolution reviewer failed (fail-open): ${msg}`,
576
+ );
577
+ }
578
+ }
579
+
580
+ // Step 6: Deploy or dry-run
500
581
  if (dryRun) {
501
582
  return {
502
583
  proposal: lastProposal,
@@ -594,6 +675,8 @@ export async function cliMain(): Promise<void> {
594
675
  "task-description": { type: "string" },
595
676
  "few-shot": { type: "string" },
596
677
  "validation-model": { type: "string" },
678
+ "teacher-effort": { type: "string", default: "high" },
679
+ review: { type: "boolean", default: false },
597
680
  help: { type: "boolean", default: false },
598
681
  },
599
682
  strict: true,
@@ -611,8 +694,8 @@ Options:
611
694
  --target Evolution target: body, routing (default: body)
612
695
  --teacher-agent Teacher agent CLI (claude, codex, etc.)
613
696
  --student-agent Student agent CLI for validation
614
- --teacher-model Model flag for teacher agent
615
- --student-model Model flag for student agent
697
+ --teacher-model Model flag for teacher agent (default: opus)
698
+ --student-model Model flag for student agent (default: haiku)
616
699
  --eval-set Path to eval set JSON
617
700
  --dry-run Validate without deploying
618
701
  --max-iterations Max refinement iterations (default: 3)
@@ -620,6 +703,8 @@ Options:
620
703
  --task-description Optional task description context
621
704
  --few-shot Comma-separated paths to example skill files
622
705
  --validation-model Model for trigger-check validation calls (overrides --student-model for validation)
706
+ --teacher-effort Effort level for teacher LLM: low, medium, high, max (default: high)
707
+ --review Run evolution-reviewer subagent before deployment (Gate 4)
623
708
  --help Show this help message`);
624
709
  process.exit(0);
625
710
  }
@@ -669,6 +754,8 @@ Options:
669
754
  fewShotExamples,
670
755
  gradingResults,
671
756
  validationModel: values["validation-model"],
757
+ teacherEffort: (values["teacher-effort"] as EffortLevel) ?? "high",
758
+ useReviewer: values.review ?? false,
672
759
  });
673
760
 
674
761
  console.log(JSON.stringify(result, null, 2));
@@ -37,7 +37,6 @@ import type {
37
37
  SkillUsageRecord,
38
38
  } from "../types.js";
39
39
  import { parseFrontmatter, replaceFrontmatterDescription } from "../utils/frontmatter.js";
40
-
41
40
  import { createEvolveTUI } from "../utils/tui.js";
42
41
  import { appendAuditEntry } from "./audit.js";
43
42
  import { checkConstitution } from "./constitutional.js";
@@ -7,7 +7,7 @@
7
7
  */
8
8
 
9
9
  import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
10
- import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
10
+ import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
11
11
 
12
12
  // ---------------------------------------------------------------------------
13
13
  // System prompt
@@ -160,6 +160,7 @@ export async function generateBodyProposal(
160
160
  modelFlag?: string,
161
161
  fewShotExamples?: string[],
162
162
  executionContext?: ExecutionContext,
163
+ effort?: EffortLevel,
163
164
  ): Promise<BodyEvolutionProposal> {
164
165
  const prompt = buildBodyGenerationPrompt(
165
166
  currentContent,
@@ -169,7 +170,7 @@ export async function generateBodyProposal(
169
170
  fewShotExamples,
170
171
  executionContext,
171
172
  );
172
- const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag);
173
+ const rawResponse = await callLlm(BODY_GENERATOR_SYSTEM, prompt, agent, modelFlag, effort);
173
174
  const { proposed_body, rationale, confidence } = parseBodyProposalResponse(rawResponse);
174
175
 
175
176
  return {
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { BodyEvolutionProposal, EvolutionTarget, FailurePattern } from "../types.js";
9
- import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
9
+ import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
10
10
 
11
11
  // ---------------------------------------------------------------------------
12
12
  // System prompt
@@ -139,6 +139,7 @@ export async function generateRoutingProposal(
139
139
  skillPath: string,
140
140
  agent: string,
141
141
  modelFlag?: string,
142
+ effort?: EffortLevel,
142
143
  ): Promise<BodyEvolutionProposal> {
143
144
  const prompt = buildRoutingProposalPrompt(
144
145
  currentRouting,
@@ -147,7 +148,7 @@ export async function generateRoutingProposal(
147
148
  missedQueries,
148
149
  skillName,
149
150
  );
150
- const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag);
151
+ const rawResponse = await callLlm(ROUTING_PROPOSER_SYSTEM, prompt, agent, modelFlag, effort);
151
152
  const { proposed_routing, rationale, confidence } = parseRoutingProposalResponse(rawResponse);
152
153
 
153
154
  return {
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { BodyEvolutionProposal, BodyValidationResult } from "../types.js";
9
- import { callLlm, stripMarkdownFences } from "../utils/llm-call.js";
9
+ import { type EffortLevel, callLlm, stripMarkdownFences } from "../utils/llm-call.js";
10
10
 
11
11
  // ---------------------------------------------------------------------------
12
12
  // System prompt
@@ -118,6 +118,7 @@ export async function refineBodyProposal(
118
118
  validationResult: BodyValidationResult,
119
119
  agent: string,
120
120
  modelFlag?: string,
121
+ effort?: EffortLevel,
121
122
  ): Promise<BodyEvolutionProposal> {
122
123
  const prompt = buildRefinementPrompt(
123
124
  proposal.proposed_body,
@@ -126,7 +127,7 @@ export async function refineBodyProposal(
126
127
  validationResult.regressions,
127
128
  );
128
129
 
129
- const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag);
130
+ const rawResponse = await callLlm(BODY_REFINER_SYSTEM, prompt, agent, modelFlag, effort);
130
131
  const { refined_body, changes_made, confidence } = parseRefinementResponse(rawResponse);
131
132
 
132
133
  return {