selftune 0.2.23 → 0.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +93 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dhgv5BQO.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  12. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  13. package/cli/selftune/auto-update.ts +200 -8
  14. package/cli/selftune/canonical-export.ts +55 -25
  15. package/cli/selftune/command-surface.ts +397 -0
  16. package/cli/selftune/contribute/contribute.ts +64 -13
  17. package/cli/selftune/contribution-config.ts +57 -3
  18. package/cli/selftune/contribution-preferences.ts +117 -0
  19. package/cli/selftune/contribution-signals.ts +8 -4
  20. package/cli/selftune/contribution-staging.ts +13 -2
  21. package/cli/selftune/contributions.ts +55 -121
  22. package/cli/selftune/creator-contributions.ts +29 -10
  23. package/cli/selftune/cron/setup.ts +7 -3
  24. package/cli/selftune/dashboard-contract.ts +73 -0
  25. package/cli/selftune/dashboard-server.ts +168 -17
  26. package/cli/selftune/dashboard.ts +350 -17
  27. package/cli/selftune/eval/baseline.ts +21 -5
  28. package/cli/selftune/eval/execution-eval.ts +170 -0
  29. package/cli/selftune/eval/family-overlap.ts +2 -2
  30. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  31. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  32. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  33. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  34. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  35. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  36. package/cli/selftune/evolution/engines/replay-engine.ts +79 -57
  37. package/cli/selftune/evolution/evolve-body.ts +100 -39
  38. package/cli/selftune/evolution/evolve.ts +244 -52
  39. package/cli/selftune/evolution/rollback.ts +0 -1
  40. package/cli/selftune/evolution/validate-body.ts +68 -42
  41. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  42. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  43. package/cli/selftune/evolution/validate-routing.ts +43 -41
  44. package/cli/selftune/evolution/validation-contract.ts +91 -0
  45. package/cli/selftune/grading/auto-grade.ts +11 -7
  46. package/cli/selftune/grading/grade-session.ts +10 -16
  47. package/cli/selftune/index.ts +35 -10
  48. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  49. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  50. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  51. package/cli/selftune/ingestors/pi-ingest.ts +3 -2
  52. package/cli/selftune/init.ts +27 -3
  53. package/cli/selftune/localdb/direct-write.ts +35 -1
  54. package/cli/selftune/localdb/queries/cron.ts +34 -0
  55. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  56. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  57. package/cli/selftune/localdb/queries/execution.ts +133 -0
  58. package/cli/selftune/localdb/queries/json.ts +18 -0
  59. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  60. package/cli/selftune/localdb/queries/raw.ts +95 -0
  61. package/cli/selftune/localdb/queries/staging.ts +270 -0
  62. package/cli/selftune/localdb/queries/trust.ts +392 -0
  63. package/cli/selftune/localdb/queries.ts +60 -2288
  64. package/cli/selftune/localdb/schema.ts +21 -0
  65. package/cli/selftune/monitoring/watch.ts +96 -29
  66. package/cli/selftune/normalization.ts +3 -0
  67. package/cli/selftune/observability.ts +4 -2
  68. package/cli/selftune/orchestrate/cli.ts +161 -0
  69. package/cli/selftune/orchestrate/execute.ts +295 -0
  70. package/cli/selftune/orchestrate/finalize.ts +157 -0
  71. package/cli/selftune/orchestrate/locks.ts +40 -0
  72. package/cli/selftune/orchestrate/plan.ts +131 -0
  73. package/cli/selftune/orchestrate/post-run.ts +59 -0
  74. package/cli/selftune/orchestrate/prepare.ts +334 -0
  75. package/cli/selftune/orchestrate/report.ts +182 -0
  76. package/cli/selftune/orchestrate/runtime.ts +120 -0
  77. package/cli/selftune/orchestrate/signals.ts +48 -0
  78. package/cli/selftune/orchestrate.ts +150 -1173
  79. package/cli/selftune/repair/skill-usage.ts +5 -2
  80. package/cli/selftune/routes/overview.ts +5 -2
  81. package/cli/selftune/routes/skill-report.ts +15 -2
  82. package/cli/selftune/schedule.ts +5 -5
  83. package/cli/selftune/status.ts +39 -2
  84. package/cli/selftune/testing-readiness.ts +597 -0
  85. package/cli/selftune/types.ts +44 -4
  86. package/cli/selftune/uninstall.ts +2 -1
  87. package/cli/selftune/utils/canonical-log.ts +1 -9
  88. package/cli/selftune/utils/cli-error.ts +9 -0
  89. package/cli/selftune/utils/llm-call.ts +126 -6
  90. package/cli/selftune/utils/skill-discovery.ts +2 -0
  91. package/cli/selftune/workflows/proposals.ts +184 -0
  92. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  93. package/cli/selftune/workflows/workflows.ts +100 -26
  94. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  95. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  96. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  97. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  98. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +41 -1
  99. package/node_modules/@selftune/telemetry-contract/src/types.ts +103 -2
  100. package/package.json +25 -9
  101. package/packages/dashboard-core/AGENTS.md +18 -0
  102. package/packages/dashboard-core/README.md +30 -0
  103. package/packages/dashboard-core/index.ts +3 -0
  104. package/packages/dashboard-core/package.json +39 -0
  105. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  106. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  107. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  108. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  109. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  110. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  111. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  112. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  113. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  114. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  115. package/packages/dashboard-core/src/gates/index.ts +3 -0
  116. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  117. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  118. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  119. package/packages/dashboard-core/src/host/index.ts +3 -0
  120. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  121. package/packages/dashboard-core/src/models/index.ts +4 -0
  122. package/packages/dashboard-core/src/models/overview.ts +98 -0
  123. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  124. package/packages/dashboard-core/src/models/skills.ts +34 -0
  125. package/packages/dashboard-core/src/routes/index.ts +2 -0
  126. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  127. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  128. package/packages/dashboard-core/src/routes/types.ts +39 -0
  129. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  130. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  131. package/packages/dashboard-core/src/screens/index.ts +37 -0
  132. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  133. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  134. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  135. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  136. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  137. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  138. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  139. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  140. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  141. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  142. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  143. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  144. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  145. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  146. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  147. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  148. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  149. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  150. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  151. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  152. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  153. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  154. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  155. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  156. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  157. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  158. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  159. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  160. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  161. package/packages/telemetry-contract/src/schemas.ts +41 -1
  162. package/packages/telemetry-contract/src/types.ts +103 -2
  163. package/packages/ui/src/components/EvidenceViewer.tsx +80 -25
  164. package/packages/ui/src/components/OverviewPanels.tsx +67 -26
  165. package/packages/ui/src/primitives/tabs.tsx +7 -6
  166. package/packages/ui/src/types.ts +10 -0
  167. package/skill/SKILL.md +130 -332
  168. package/skill/agents/diagnosis-analyst.md +3 -3
  169. package/skill/agents/evolution-reviewer.md +3 -3
  170. package/skill/agents/integration-guide.md +3 -3
  171. package/skill/agents/pattern-analyst.md +2 -2
  172. package/skill/references/cli-quick-reference.md +89 -0
  173. package/skill/references/creator-playbook.md +131 -0
  174. package/skill/references/examples.md +48 -0
  175. package/skill/references/troubleshooting.md +47 -0
  176. package/skill/references/version-history.md +1 -1
  177. package/skill/selftune.contribute.json +11 -0
  178. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  179. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  180. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  181. package/skill/workflows/CreateTestDeploy.md +170 -0
  182. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  183. package/skill/{Workflows → workflows}/Cron.md +1 -1
  184. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  185. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  186. package/skill/{Workflows → workflows}/Evals.md +67 -2
  187. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  188. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  189. package/skill/{Workflows → workflows}/Grade.md +1 -1
  190. package/skill/{Workflows → workflows}/Initialize.md +8 -4
  191. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  192. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  193. package/skill/workflows/SignalsDashboard.md +87 -0
  194. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  195. package/skill/{Workflows → workflows}/Watch.md +42 -2
  196. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  197. package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +0 -1
  198. package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +0 -59
  199. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  200. package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +0 -12
  201. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  202. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  203. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  204. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  205. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  206. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  207. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  208. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  209. /package/skill/{Workflows → workflows}/Ingest.md +0 -0
  210. /package/skill/{Workflows → workflows}/PlatformHooks.md +0 -0
  211. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  212. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  213. /package/skill/{Workflows → workflows}/Registry.md +0 -0
  214. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  215. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  216. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  217. /package/skill/{Workflows → workflows}/Sync.md +0 -0
  218. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  219. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -27,12 +27,11 @@ import type {
27
27
  FailurePattern,
28
28
  GradingResult,
29
29
  QueryLogRecord,
30
- RoutingReplayFixture,
31
30
  SkillUsageRecord,
32
31
  } from "../types.js";
33
32
  import { CLIError, handleCLIError } from "../utils/cli-error.js";
34
33
  import type { EffortLevel, SubagentCallOptions } from "../utils/llm-call.js";
35
- import { callViaSubagent } from "../utils/llm-call.js";
34
+ import { callViaSubagent, detectLlmAgent } from "../utils/llm-call.js";
36
35
  import { appendAuditEntry } from "./audit.js";
37
36
  import { checkConstitutionSizeOnly } from "./constitutional.js";
38
37
  import { parseSkillSections, replaceBody, replaceSection } from "./deploy-proposal.js";
@@ -43,11 +42,9 @@ import { generateRoutingProposal } from "./propose-routing.js";
43
42
  import { refineBodyProposal } from "./refine-body.js";
44
43
  import type { BodyValidationOptions } from "./validate-body.js";
45
44
  import { validateBodyProposal } from "./validate-body.js";
46
- import {
47
- buildRoutingReplayFixture,
48
- runClaudeRuntimeReplayFixture,
49
- } from "./validate-host-replay.js";
45
+ import { buildRuntimeReplayValidationOptions } from "./validate-host-replay.js";
50
46
  import { validateRoutingProposal } from "./validate-routing.js";
47
+ import { DEFAULT_VALIDATION_STRATEGY, type ValidationStrategy } from "./validation-contract.js";
51
48
 
52
49
  // ---------------------------------------------------------------------------
53
50
  // Types
@@ -69,6 +66,7 @@ export interface EvolveBodyOptions {
69
66
  fewShotExamples?: string[];
70
67
  gradingResults?: GradingResult[];
71
68
  validationModel?: string;
69
+ validationMode?: ValidationStrategy;
72
70
  teacherEffort?: EffortLevel;
73
71
  /** Run evolution-reviewer subagent as Gate 4 before deployment. */
74
72
  useReviewer?: boolean;
@@ -176,6 +174,7 @@ export async function evolveBody(
176
174
  const teacherModel = options.teacherModel ?? DEFAULT_TEACHER_MODEL;
177
175
  const studentModel = options.studentModel ?? DEFAULT_STUDENT_MODEL;
178
176
  const teacherEffort = options.teacherEffort ?? DEFAULT_TEACHER_EFFORT;
177
+ const effectiveValidationMode = options.validationMode ?? DEFAULT_VALIDATION_STRATEGY;
179
178
 
180
179
  // Resolve injectable dependencies
181
180
  const _extractFailurePatterns = _deps.extractFailurePatterns ?? extractFailurePatterns;
@@ -468,30 +467,17 @@ export async function evolveBody(
468
467
  // Validate (validationModel overrides studentModel for validation calls)
469
468
  const validationModelFlag = options.validationModel ?? studentModel;
470
469
  let validation: BodyValidationResult;
470
+ let replayFallbackReason: string | undefined;
471
471
 
472
- // Build replay fixture + runner for ALL targets (not just routing)
473
- const replayFixture = buildRoutingReplayFixture({
472
+ // Build replay fixture + runner for targets that can use runtime replay.
473
+ const replayOptions = buildRuntimeReplayValidationOptions({
474
474
  skillName,
475
475
  skillPath,
476
- platform: studentAgent === "codex" ? "codex" : "claude_code",
476
+ agent: studentAgent,
477
+ contentTarget: target === "body" ? "body" : "routing",
477
478
  });
478
- const replayRunner =
479
- replayFixture.platform === "claude_code" && studentAgent === "claude"
480
- ? async ({
481
- routing,
482
- evalSet,
483
- fixture,
484
- }: {
485
- routing: string;
486
- evalSet: EvalEntry[];
487
- fixture: RoutingReplayFixture;
488
- }) =>
489
- await runClaudeRuntimeReplayFixture({
490
- routing,
491
- evalSet,
492
- fixture,
493
- })
494
- : undefined;
479
+ const replayFixture = replayOptions?.replayFixture;
480
+ const replayRunner = replayOptions?.replayRunner;
495
481
 
496
482
  if (target === "routing") {
497
483
  validation = await _validateRoutingProposal(
@@ -500,14 +486,47 @@ export async function evolveBody(
500
486
  studentAgent,
501
487
  validationModelFlag,
502
488
  {
503
- replayFixture,
489
+ ...(replayFixture ? { replayFixture } : {}),
504
490
  ...(replayRunner ? { replayRunner } : {}),
491
+ mode: effectiveValidationMode,
492
+ onReplayFallback: (reason) => {
493
+ replayFallbackReason = reason;
494
+ if (reason) {
495
+ console.error(
496
+ `[evolve-body] Replay not available (${reason}), falling back to LLM judge validation.`,
497
+ );
498
+ return;
499
+ }
500
+ console.error(
501
+ "[evolve-body] Replay not available, falling back to LLM judge validation.",
502
+ );
503
+ },
505
504
  },
506
505
  );
507
506
  } else {
508
- const bodyReplayOptions: BodyValidationOptions = replayRunner
509
- ? { replay: { replayFixture, replayRunner } }
510
- : {};
507
+ const bodyReplayOptions: BodyValidationOptions = {
508
+ ...(replayFixture
509
+ ? {
510
+ replay: {
511
+ replayFixture,
512
+ ...(replayRunner ? { replayRunner } : {}),
513
+ },
514
+ }
515
+ : {}),
516
+ mode: effectiveValidationMode,
517
+ onReplayFallback: (reason) => {
518
+ replayFallbackReason = reason;
519
+ if (reason) {
520
+ console.error(
521
+ `[evolve-body] Replay not available (${reason}), falling back to LLM judge validation.`,
522
+ );
523
+ return;
524
+ }
525
+ console.error(
526
+ "[evolve-body] Replay not available, falling back to LLM judge validation.",
527
+ );
528
+ },
529
+ };
511
530
  validation = await _validateBodyProposal(
512
531
  proposal,
513
532
  evalSet,
@@ -517,13 +536,23 @@ export async function evolveBody(
517
536
  bodyReplayOptions,
518
537
  );
519
538
  }
539
+ if (replayFallbackReason && !validation.validation_fallback_reason) {
540
+ validation = {
541
+ ...validation,
542
+ validation_fallback_reason: replayFallbackReason,
543
+ };
544
+ }
520
545
  lastValidation = validation;
521
546
  const validatedEvidenceRef = buildValidationEvidenceRef(proposal.proposal_id, "validated");
522
547
 
523
548
  recordAudit(
524
549
  proposal.proposal_id,
525
550
  "validated",
526
- `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed`,
551
+ `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed${
552
+ validation.validation_fallback_reason
553
+ ? ` (replay fallback: ${validation.validation_fallback_reason})`
554
+ : ""
555
+ }`,
527
556
  {
528
557
  validation_mode: validation.validation_mode,
529
558
  validation_agent: validation.validation_agent,
@@ -540,7 +569,11 @@ export async function evolveBody(
540
569
  stage: "validated",
541
570
  rationale: proposal.rationale,
542
571
  confidence: proposal.confidence,
543
- details: `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed`,
572
+ details: `Validation: ${validation.gates_passed}/${validation.gates_total} gates passed${
573
+ validation.validation_fallback_reason
574
+ ? ` (replay fallback: ${validation.validation_fallback_reason})`
575
+ : ""
576
+ }`,
544
577
  validation: {
545
578
  improved: validation.improved,
546
579
  gates_passed: validation.gates_passed,
@@ -552,6 +585,7 @@ export async function evolveBody(
552
585
  validation_mode: validation.validation_mode,
553
586
  validation_agent: validation.validation_agent,
554
587
  validation_fixture_id: validation.validation_fixture_id,
588
+ validation_fallback_reason: validation.validation_fallback_reason,
555
589
  validation_evidence_ref: validatedEvidenceRef,
556
590
  },
557
591
  });
@@ -603,7 +637,11 @@ export async function evolveBody(
603
637
  recordAudit(
604
638
  proposal.proposal_id,
605
639
  "rejected",
606
- `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates`,
640
+ `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates${
641
+ validation.validation_fallback_reason
642
+ ? ` (replay fallback: ${validation.validation_fallback_reason})`
643
+ : ""
644
+ }`,
607
645
  {
608
646
  validation_mode: validation.validation_mode,
609
647
  validation_agent: validation.validation_agent,
@@ -620,7 +658,11 @@ export async function evolveBody(
620
658
  stage: "rejected",
621
659
  rationale: proposal.rationale,
622
660
  confidence: proposal.confidence,
623
- details: `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates`,
661
+ details: `Validation failed: ${validation.gates_passed}/${validation.gates_total} gates${
662
+ validation.validation_fallback_reason
663
+ ? ` (replay fallback: ${validation.validation_fallback_reason})`
664
+ : ""
665
+ }`,
624
666
  validation: {
625
667
  improved: validation.improved,
626
668
  gates_passed: validation.gates_passed,
@@ -632,6 +674,7 @@ export async function evolveBody(
632
674
  validation_mode: validation.validation_mode,
633
675
  validation_agent: validation.validation_agent,
634
676
  validation_fixture_id: validation.validation_fixture_id,
677
+ validation_fallback_reason: validation.validation_fallback_reason,
635
678
  validation_evidence_ref: buildValidationEvidenceRef(proposal.proposal_id, "rejected"),
636
679
  },
637
680
  });
@@ -731,7 +774,11 @@ export async function evolveBody(
731
774
  recordAudit(
732
775
  lastProposal.proposal_id,
733
776
  "deployed",
734
- `Deployed ${target} proposal for ${skillName}`,
777
+ `Deployed ${target} proposal for ${skillName}${
778
+ lastValidation.validation_fallback_reason
779
+ ? ` (replay fallback: ${lastValidation.validation_fallback_reason})`
780
+ : ""
781
+ }`,
735
782
  {
736
783
  validation_mode: lastValidation.validation_mode,
737
784
  validation_agent: lastValidation.validation_agent,
@@ -760,6 +807,7 @@ export async function evolveBody(
760
807
  validation_mode: lastValidation.validation_mode,
761
808
  validation_agent: lastValidation.validation_agent,
762
809
  validation_fixture_id: lastValidation.validation_fixture_id,
810
+ validation_fallback_reason: lastValidation.validation_fallback_reason,
763
811
  validation_evidence_ref: buildValidationEvidenceRef(lastProposal.proposal_id, "deployed"),
764
812
  },
765
813
  });
@@ -813,6 +861,7 @@ export async function cliMain(): Promise<void> {
813
861
  "task-description": { type: "string" },
814
862
  "few-shot": { type: "string" },
815
863
  "validation-model": { type: "string" },
864
+ "validation-mode": { type: "string", default: DEFAULT_VALIDATION_STRATEGY },
816
865
  "teacher-effort": { type: "string", default: "high" },
817
866
  review: { type: "boolean", default: false },
818
867
  help: { type: "boolean", default: false },
@@ -841,6 +890,7 @@ Options:
841
890
  --task-description Optional task description context
842
891
  --few-shot Comma-separated paths to example skill files
843
892
  --validation-model Model for trigger-check validation calls (overrides --student-model for validation)
893
+ --validation-mode Validation strategy: auto, replay, or judge (default: auto)
844
894
  --teacher-effort Effort level for teacher LLM: low, medium, high, max (default: high)
845
895
  --review Run evolution-reviewer subagent before deployment (Gate 4)
846
896
  --help Show this help message`);
@@ -855,15 +905,24 @@ Options:
855
905
  );
856
906
  }
857
907
 
858
- const { detectAgent } = await import("../utils/llm-call.js");
859
- const teacherAgent = values["teacher-agent"] ?? detectAgent() ?? "";
908
+ if (
909
+ values["validation-mode"] &&
910
+ !["auto", "replay", "judge"].includes(values["validation-mode"])
911
+ ) {
912
+ throw new CLIError(
913
+ `Invalid --validation-mode value: ${values["validation-mode"]}`,
914
+ "INVALID_FLAG",
915
+ "Use one of: auto, replay, judge",
916
+ );
917
+ }
918
+ const teacherAgent = values["teacher-agent"] ?? detectLlmAgent() ?? "";
860
919
  const studentAgent = values["student-agent"] ?? teacherAgent;
861
920
 
862
921
  if (!teacherAgent) {
863
922
  throw new CLIError(
864
- "No agent CLI found. Install Claude Code, Codex, or OpenCode.",
923
+ "No agent CLI found. Install Claude Code, Codex, OpenCode, or Pi.",
865
924
  "AGENT_NOT_FOUND",
866
- "Install Claude Code, Codex, or OpenCode.",
925
+ "Install Claude Code, Codex, OpenCode, or Pi.",
867
926
  );
868
927
  }
869
928
 
@@ -901,6 +960,8 @@ Options:
901
960
  fewShotExamples,
902
961
  gradingResults,
903
962
  validationModel: values["validation-model"],
963
+ validationMode:
964
+ (values["validation-mode"] as ValidationStrategy | undefined) ?? DEFAULT_VALIDATION_STRATEGY,
904
965
  teacherEffort: (values["teacher-effort"] as EffortLevel) ?? "high",
905
966
  useReviewer: values.review ?? false,
906
967
  });