npm - selftune - Versions diffs - 0.2.16 → 0.2.18 - Mend

selftune 0.2.16 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/README.md +24 -19
package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
package/cli/selftune/alpha-upload/client.ts +51 -1
package/cli/selftune/alpha-upload/flush.ts +46 -5
package/cli/selftune/alpha-upload/stage-canonical.ts +25 -4
package/cli/selftune/alpha-upload-contract.ts +9 -0
package/cli/selftune/constants.ts +82 -5
package/cli/selftune/contribute/sanitize.ts +52 -5
package/cli/selftune/dashboard-contract.ts +100 -0
package/cli/selftune/dashboard-server.ts +2 -2
package/cli/selftune/evolution/description-quality.ts +12 -11
package/cli/selftune/evolution/evolve.ts +214 -51
package/cli/selftune/evolution/validate-proposal.ts +9 -6
package/cli/selftune/grading/grade-session.ts +20 -0
package/cli/selftune/hooks/commit-track.ts +188 -0
package/cli/selftune/hooks/prompt-log.ts +10 -1
package/cli/selftune/hooks/session-stop.ts +2 -2
package/cli/selftune/hooks/skill-eval.ts +15 -1
package/cli/selftune/hooks/stdin-preview.ts +32 -0
package/cli/selftune/localdb/direct-write.ts +69 -6
package/cli/selftune/localdb/queries.ts +552 -7
package/cli/selftune/localdb/schema.ts +46 -0
package/cli/selftune/orchestrate.ts +32 -4
package/cli/selftune/routes/overview.ts +41 -3
package/cli/selftune/routes/skill-report.ts +88 -17
package/cli/selftune/types.ts +31 -0
package/cli/selftune/utils/transcript.ts +210 -1
package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
package/package.json +1 -1
package/packages/telemetry-contract/src/types.ts +11 -0
package/skill/SKILL.md +29 -1
package/skill/Workflows/Evolve.md +31 -13
package/skill/Workflows/ExportCanonical.md +121 -0
package/skill/Workflows/Hook.md +131 -0
package/skill/Workflows/Initialize.md +9 -8
package/skill/Workflows/Orchestrate.md +27 -5
package/skill/Workflows/Quickstart.md +94 -0
package/skill/Workflows/RepairSkillUsage.md +87 -0
package/skill/Workflows/Uninstall.md +82 -0
package/skill/settings_snippet.json +11 -0

package/cli/selftune/dashboard-contract.ts CHANGED Viewed

@@ -1,3 +1,71 @@
+// -- Cursor-based pagination types -------------------------------------------
+export interface PaginationCursor {
+  timestamp: string;
+  id: number | string;
+}
+export interface PaginatedResult<T> {
+  items: T[];
+  next_cursor: PaginationCursor | null;
+  has_more: boolean;
+}
+/** Parse a JSON cursor param from a URL search string. Returns null on invalid input. */
+export function parseCursorParam(value: string | null | undefined): PaginationCursor | null {
+  if (!value) return null;
+  try {
+    const parsed: unknown = JSON.parse(value);
+    if (parsed && typeof parsed === "object" && "timestamp" in parsed && "id" in parsed) {
+      const { timestamp, id } = parsed as { timestamp: unknown; id: unknown };
+      if (
+        typeof timestamp === "string" &&
+        (typeof id === "string" || (typeof id === "number" && Number.isFinite(id)))
+      ) {
+        return { timestamp, id };
+      }
+    }
+  } catch {
+    // Invalid cursor JSON — treat as no cursor
+  }
+  return null;
+}
+/** Parse an integer query param with bounds clamping. */
+export function parseIntParam(value: string | null | undefined, defaultValue: number): number {
+  if (value == null) return defaultValue;
+  const n = Number.parseInt(value, 10);
+  return Number.isNaN(n) ? defaultValue : Math.max(1, Math.min(n, 10000));
+}
+// -- Paginated overview payload (returned when cursor params are provided) ----
+export interface OverviewPaginatedPayload {
+  telemetry_page: PaginatedResult<TelemetryRecord>;
+  skills_page: PaginatedResult<SkillUsageRecord>;
+  evolution: EvolutionEntry[];
+  counts: OverviewPayload["counts"];
+  unmatched_queries: UnmatchedQuery[];
+  pending_proposals: PendingProposal[];
+  active_sessions: number;
+  recent_activity: RecentActivityItem[];
+}
+export interface SkillReportPaginatedPayload extends Omit<
+  SkillReportPayload,
+  "recent_invocations"
+> {
+  invocations_page: PaginatedResult<{
+    timestamp: string;
+    session_id: string;
+    query: string;
+    triggered: boolean;
+    source: string | null;
+  }>;
+}
+// -- Core record types -------------------------------------------------------
 export interface TelemetryRecord {
   timestamp: string;
   session_id: string;
@@ -220,6 +288,36 @@ export interface HealthResponse {
 // -- Doctor / health check types ----------------------------------------------
 export type { DoctorResult, HealthCheck, HealthStatus } from "./types.js";
+// -- Execution metrics (aggregated from execution_facts enrichment columns) ---
+export interface ExecutionMetrics {
+  avg_files_changed: number;
+  total_lines_added: number;
+  total_lines_removed: number;
+  total_cost_usd: number;
+  avg_cost_usd: number;
+  cached_input_tokens_total: number;
+  reasoning_output_tokens_total: number;
+  artifact_count: number;
+  session_type_distribution: Record<string, number>;
+}
+// -- Commit summary (aggregated from commit_tracking table) -------------------
+export interface CommitRecord {
+  commit_sha: string;
+  commit_title: string | null;
+  branch: string | null;
+  repo_remote: string | null;
+  timestamp: string;
+}
+export interface CommitSummary {
+  total_commits: number;
+  unique_branches: number;
+  recent_commits: Array<{ sha: string; title: string; branch: string; timestamp: string }>;
+}
 export interface SkillReportResponse extends SkillReportPayload {
   evolution: EvolutionEntry[];
   pending_proposals: PendingProposal[];
@@ -242,6 +340,8 @@ export interface SkillReportResponse extends SkillReportPayload {
   };
   prompt_samples: PromptSample[];
   session_metadata: SessionMeta[];
+  execution_metrics?: ExecutionMetrics | null;
+  commit_summary?: CommitSummary | null;
   description_quality?: {
     composite: number;
     criteria: {

package/cli/selftune/dashboard-server.ts CHANGED Viewed

@@ -448,7 +448,7 @@ export async function startDashboardServer(
           );
         }
         refreshV2Data();
-        return withCors(handleOverview(db, selftuneVersion));
+        return withCors(handleOverview(db, selftuneVersion, url.searchParams));
       }
       // ---- GET /api/v2/orchestrate-runs ----
@@ -495,7 +495,7 @@ export async function startDashboardServer(
           );
         }
         refreshV2Data();
-        return withCors(handleSkillReport(db, skillName));
+        return withCors(handleSkillReport(db, skillName, url.searchParams));
       }
       // ---- SPA fallback ----

package/cli/selftune/evolution/description-quality.ts CHANGED Viewed

@@ -139,27 +139,27 @@ export function scoreLengthCriterion(description: string): number {
 }
 /** Score presence of trigger context words (when/if/before/after etc). */
-export function scoreTriggerContextCriterion(description: string): number {
-  const matches = countWordMatches(description.toLowerCase(), TRIGGER_PATTERNS);
+export function scoreTriggerContextCriterion(description: string, lower?: string): number {
+  const matches = countWordMatches(lower ?? description.toLowerCase(), TRIGGER_PATTERNS);
   if (matches === 0) return 0.0;
   if (matches === 1) return 0.7;
   return Math.min(1.0, 0.7 + 0.15 * (matches - 1));
 }
 /** Score absence of vague words (lower is worse). */
-export function scoreVaguenessCriterion(description: string): number {
-  const matches = countWordMatches(description.toLowerCase(), VAGUE_PATTERNS);
+export function scoreVaguenessCriterion(description: string, lower?: string): number {
+  const matches = countWordMatches(lower ?? description.toLowerCase(), VAGUE_PATTERNS);
   if (matches === 0) return 1.0;
   if (matches === 1) return 0.6;
   return Math.max(0.1, 0.6 - 0.15 * (matches - 1));
 }
 /** Score whether description specifies at least one concrete action or domain. */
-export function scoreSpecificityCriterion(description: string): number {
-  const lower = description.toLowerCase();
-  const hasAction = ACTION_PATTERNS.some((p) => p.test(lower));
+export function scoreSpecificityCriterion(description: string, lower?: string): number {
+  const l = lower ?? description.toLowerCase();
+  const hasAction = ACTION_PATTERNS.some((p) => p.test(l));
-  const fillerCount = FILLER_PHRASES.filter((f) => lower.includes(f)).length;
+  const fillerCount = FILLER_PHRASES.filter((f) => l.includes(f)).length;
   const words = description.split(/\s+/).length;
   const fillerRatio = fillerCount > 0 ? fillerCount / Math.max(1, words / 10) : 0;
@@ -204,11 +204,12 @@ const WEIGHTS = {
  * Pure function — no I/O, no LLM calls.
  */
 export function scoreDescription(description: string, skillName?: string): DescriptionQualityScore {
+  const lower = description.toLowerCase();
   const criteria = {
     length: scoreLengthCriterion(description),
-    trigger_context: scoreTriggerContextCriterion(description),
-    vagueness: scoreVaguenessCriterion(description),
-    specificity: scoreSpecificityCriterion(description),
+    trigger_context: scoreTriggerContextCriterion(description, lower),
+    vagueness: scoreVaguenessCriterion(description, lower),
+    specificity: scoreSpecificityCriterion(description, lower),
     not_just_name: scoreNotJustNameCriterion(description, skillName),
   };

package/cli/selftune/evolution/evolve.ts CHANGED Viewed

@@ -38,6 +38,7 @@ import type {
 } from "../types.js";
 import { CLIError, handleCLIError } from "../utils/cli-error.js";
 import { parseFrontmatter, replaceDescription } from "../utils/frontmatter.js";
+import type { EffortLevel } from "../utils/llm-call.js";
 import { createEvolveTUI } from "../utils/tui.js";
 import { appendAuditEntry } from "./audit.js";
 import { checkConstitution } from "./constitutional.js";
@@ -51,6 +52,7 @@ import {
   selectFromFrontier,
 } from "./pareto.js";
 import { generateMultipleProposals, generateProposal } from "./propose-description.js";
+import { evaluateStoppingCriteria } from "./stopping-criteria.js";
 import { buildUnblockSuggestions } from "./unblock-suggestions.js";
 import type { ValidationResult } from "./validate-proposal.js";
 import {
@@ -80,7 +82,9 @@ export interface EvolveOptions {
   validationModel?: string;
   cheapLoop?: boolean;
   gateModel?: string;
+  gateEffort?: EffortLevel;
   proposalModel?: string;
+  adaptiveGate?: boolean;
   syncFirst?: boolean;
   syncForce?: boolean;
 }
@@ -174,6 +178,73 @@ function formatSimpleDiff(oldText: string, newText: string): string {
   return output.join("\n");
 }
+function countValidationLlmCalls(evalSetSize: number): number {
+  if (evalSetSize === 0) return 0;
+  return Math.ceil(evalSetSize / TRIGGER_CHECK_BATCH_SIZE) * 2 * VALIDATION_RUNS;
+}
+interface GateDecision {
+  model: string;
+  effort?: EffortLevel;
+  riskSignals: string[];
+}
+function countWords(text: string): number {
+  return text
+    .trim()
+    .split(/\s+/)
+    .filter((token) => token.length > 0).length;
+}
+function resolveGateDecision(
+  options: EvolveOptions,
+  proposal: EvolutionProposal,
+  validation: ValidationResult,
+  currentDescription: string,
+  confidenceThreshold: number,
+): GateDecision | undefined {
+  const baseModel = options.gateModel;
+  if (!baseModel) return undefined;
+  const baseDecision: GateDecision = {
+    model: baseModel,
+    effort: options.gateEffort,
+    riskSignals: [],
+  };
+  if (!options.adaptiveGate) return baseDecision;
+  const riskSignals: string[] = [];
+  const originalWords = countWords(currentDescription);
+  const proposedWords = countWords(proposal.proposed_description);
+  const wordGrowth = originalWords === 0 ? 1 : proposedWords / originalWords;
+  const lowLift = validation.net_change < 0.15;
+  const hasRegressions = validation.regressions.length > 0;
+  const lowConfidence = proposal.confidence < Math.max(confidenceThreshold + 0.05, 0.75);
+  const broadeningRisk = wordGrowth > 1.8 || proposedWords - originalWords > 32;
+  const notYetStrong = validation.after_pass_rate < 0.9;
+  if (hasRegressions) riskSignals.push(`regressions=${validation.regressions.length}`);
+  if (lowLift) riskSignals.push(`low_lift=${validation.net_change.toFixed(3)}`);
+  if (lowConfidence) riskSignals.push(`confidence=${proposal.confidence.toFixed(2)}`);
+  if (broadeningRisk) riskSignals.push(`word_growth=${wordGrowth.toFixed(2)}x`);
+  if (notYetStrong) riskSignals.push(`after_pass_rate=${validation.after_pass_rate.toFixed(2)}`);
+  const shouldEscalate = hasRegressions || validation.net_change < 0.1 || riskSignals.length >= 2;
+  if (!shouldEscalate) {
+    return {
+      ...baseDecision,
+      riskSignals,
+    };
+  }
+  return {
+    model: "opus",
+    effort: options.gateEffort === "max" ? "max" : "high",
+    riskSignals,
+  };
+}
 // ---------------------------------------------------------------------------
 // Main orchestrator
 // ---------------------------------------------------------------------------
@@ -456,7 +527,7 @@ export async function evolve(
     // -----------------------------------------------------------------------
     // Pareto multi-candidate path
     // -----------------------------------------------------------------------
-    const paretoEnabled = options.paretoEnabled ?? false;
+    const paretoEnabled = options.paretoEnabled ?? true;
     const candidateCount = options.candidateCount ?? 3;
     const tokenEfficiencyEnabled = options.tokenEfficiencyEnabled ?? false;
     const telemetryRecords =
@@ -494,6 +565,7 @@ export async function evolve(
         options.proposalModel,
         aggregateMetrics,
       );
+      llmCallCount += candidateCount;
       // Filter by confidence threshold
       const viableCandidates = candidates.filter((c) => c.confidence >= confidenceThreshold);
@@ -564,6 +636,7 @@ export async function evolve(
           agent,
           options.validationModel,
         );
+        llmCallCount += countValidationLlmCalls(evalSet.length);
         recordAudit(
           proposal.proposal_id,
           "validated",
@@ -628,6 +701,7 @@ export async function evolve(
     } else {
       // Standard single-candidate retry loop
       let feedbackReason = "";
+      const previousPassRates: number[] = [];
       for (let iteration = 0; iteration < maxIterations; iteration++) {
         iterationsCompleted = iteration + 1;
@@ -681,7 +755,24 @@ export async function evolve(
         );
         if (!constitution.passed) {
           feedbackReason = `Constitutional: ${constitution.violations.join("; ")}`;
-          recordAudit(proposal.proposal_id, "rejected", feedbackReason);
+          // Re-evaluate stopping after a constitutional rejection by treating the
+          // last entry in previousPassRates as the currentPassRate (or 0 on the
+          // first iteration) and slicing it out of history before calling
+          // evaluateStoppingCriteria() with the current iteration/maxIterations,
+          // confidenceThreshold, and proposal.confidence.
+          const constitutionStop = evaluateStoppingCriteria(
+            previousPassRates.at(-1) ?? 0,
+            previousPassRates.slice(0, -1),
+            iteration + 1,
+            maxIterations,
+            confidenceThreshold,
+            proposal.confidence,
+          );
+          recordAudit(
+            proposal.proposal_id,
+            "rejected",
+            `${feedbackReason} (stopping: ${constitutionStop.reason})`,
+          );
           recordEvidence({
             timestamp: new Date().toISOString(),
             proposal_id: proposal.proposal_id,
@@ -691,54 +782,64 @@ export async function evolve(
             stage: "rejected",
             rationale: proposal.rationale,
             confidence: proposal.confidence,
-            details: feedbackReason,
+            details: `${feedbackReason} (stopping: ${constitutionStop.reason})`,
           });
-          if (iteration === maxIterations - 1) {
+          if (constitutionStop.shouldStop) {
             finishTui();
             return withStats({
               proposal: lastProposal,
               validation: null,
               deployed: false,
               auditEntries,
-              reason: feedbackReason,
+              reason: `${feedbackReason} (${constitutionStop.reason})`,
             });
           }
           continue;
         }
-        // Step 9: Check confidence threshold
-        if (proposal.confidence < confidenceThreshold) {
-          feedbackReason = `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`;
-          recordAudit(
-            proposal.proposal_id,
-            "rejected",
-            `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
+        // Step 9: Check confidence threshold via stopping criteria
+        {
+          const preValidationStop = evaluateStoppingCriteria(
+            previousPassRates.at(-1) ?? 0,
+            previousPassRates.slice(0, -1),
+            iteration + 1,
+            maxIterations,
+            confidenceThreshold,
+            proposal.confidence,
           );
-          recordEvidence({
-            timestamp: new Date().toISOString(),
-            proposal_id: proposal.proposal_id,
-            skill_name: skillName,
-            skill_path: skillPath,
-            target: "description",
-            stage: "rejected",
-            rationale: proposal.rationale,
-            confidence: proposal.confidence,
-            details: `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
-          });
-          // If this is the last iteration, return early with rejection
-          if (iteration === maxIterations - 1) {
-            finishTui();
-            return withStats({
-              proposal: lastProposal,
-              validation: null,
-              deployed: false,
-              auditEntries,
-              reason: `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`,
+          if (proposal.confidence < confidenceThreshold) {
+            feedbackReason = `Confidence ${proposal.confidence} below threshold ${confidenceThreshold}`;
+            recordAudit(
+              proposal.proposal_id,
+              "rejected",
+              `${feedbackReason} (stopping: ${preValidationStop.reason})`,
+            );
+            recordEvidence({
+              timestamp: new Date().toISOString(),
+              proposal_id: proposal.proposal_id,
+              skill_name: skillName,
+              skill_path: skillPath,
+              target: "description",
+              stage: "rejected",
+              rationale: proposal.rationale,
+              confidence: proposal.confidence,
+              details: `${feedbackReason} (stopping: ${preValidationStop.reason})`,
             });
-          }
-          continue;
+            // Use stopping criteria to decide whether to return or retry
+            if (preValidationStop.shouldStop) {
+              finishTui();
+              return withStats({
+                proposal: lastProposal,
+                validation: null,
+                deployed: false,
+                auditEntries,
+                reason: `${feedbackReason} (${preValidationStop.reason})`,
+              });
+            }
+            continue;
+          }
         }
         // Step 10: Validate against eval set
@@ -753,7 +854,7 @@ export async function evolve(
           options.validationModel,
         );
         lastValidation = validation;
-        llmCallCount += batchCount * 2 * VALIDATION_RUNS;
+        llmCallCount += countValidationLlmCalls(evalSet.length);
         tui.done(
           `Validation: ${(validation.before_pass_rate * 100).toFixed(1)}% \u2192 ${(validation.after_pass_rate * 100).toFixed(1)}% (improved: ${validation.improved})`,
         );
@@ -792,13 +893,23 @@ export async function evolve(
           },
         });
-        // Step 12: Check validation result
+        // Step 12: Evaluate stopping criteria after validation
+        const stopping = evaluateStoppingCriteria(
+          validation.after_pass_rate,
+          previousPassRates,
+          iteration + 1,
+          maxIterations,
+          confidenceThreshold,
+          proposal.confidence,
+        );
+        previousPassRates.push(validation.after_pass_rate);
         if (!validation.improved) {
           feedbackReason = `Validation failed: net_change=${validation.net_change.toFixed(3)}, improved=false`;
           recordAudit(
             proposal.proposal_id,
             "rejected",
-            `Validation failed: net_change=${validation.net_change.toFixed(3)}`,
+            `Validation failed: net_change=${validation.net_change.toFixed(3)} (stopping: ${stopping.reason})`,
           );
           recordEvidence({
             timestamp: new Date().toISOString(),
@@ -809,7 +920,7 @@ export async function evolve(
             stage: "rejected",
             rationale: proposal.rationale,
             confidence: proposal.confidence,
-            details: `Validation failed: net_change=${validation.net_change.toFixed(3)}`,
+            details: `Validation failed: net_change=${validation.net_change.toFixed(3)} (stopping: ${stopping.reason})`,
             validation: {
               improved: validation.improved,
               before_pass_rate: validation.before_pass_rate,
@@ -821,21 +932,26 @@ export async function evolve(
             },
           });
-          // If this is the last iteration, return with rejection
-          if (iteration === maxIterations - 1) {
+          // Use stopping criteria to decide whether to return or retry
+          if (stopping.shouldStop) {
             finishTui();
             return withStats({
               proposal: lastProposal,
               validation: lastValidation,
               deployed: false,
               auditEntries,
-              reason: `Validation failed after ${maxIterations} iterations: net_change=${validation.net_change.toFixed(3)}`,
+              reason: `Validation failed (${stopping.reason}): net_change=${validation.net_change.toFixed(3)}`,
             });
           }
           continue;
         }
+        // Validation passed — check if converged or continue
+        if (stopping.shouldStop && stopping.reason.includes("Converged")) {
+          recordAudit(proposal.proposal_id, "validated", `Stopping early: ${stopping.reason}`);
+        }
         // Validation passed - break out of retry loop
         break;
       }
@@ -916,18 +1032,39 @@ export async function evolve(
     // -----------------------------------------------------------------------
     let gateValidation: ValidationResult | undefined;
     if (options.gateModel && lastProposal && lastValidation?.improved) {
-      tui.step(`Gate validation (${options.gateModel})...`);
-      gateValidation = await _gateValidateProposal(lastProposal, evalSet, agent, options.gateModel);
-      llmCallCount++;
+      const gateDecision = resolveGateDecision(
+        options,
+        lastProposal,
+        lastValidation,
+        currentDescription,
+        confidenceThreshold,
+      );
+      const gateLabel = gateDecision?.effort
+        ? `${gateDecision.model}, effort=${gateDecision.effort}`
+        : (gateDecision?.model ?? options.gateModel);
+      tui.step(`Gate validation (${gateLabel})...`);
+      gateValidation = await _gateValidateProposal(
+        lastProposal,
+        evalSet,
+        agent,
+        gateDecision?.model ?? options.gateModel,
+        gateDecision?.effort,
+      );
+      llmCallCount += countValidationLlmCalls(evalSet.length);
       tui.done(
-        `Gate (${options.gateModel}): improved=${gateValidation.improved}, net_change=${gateValidation.net_change.toFixed(3)}`,
+        `Gate (${gateLabel}): improved=${gateValidation.improved}, net_change=${gateValidation.net_change.toFixed(3)}`,
       );
+      const gatePrefix =
+        gateDecision && gateDecision.riskSignals.length > 0
+          ? `Adaptive gate [${gateDecision.riskSignals.join(", ")}]`
+          : "Gate validation";
       if (!gateValidation.improved) {
         recordAudit(
           lastProposal.proposal_id,
           "rejected",
-          `Gate validation failed (${options.gateModel}): net_change=${gateValidation.net_change.toFixed(3)}`,
+          `${gatePrefix} failed (${gateLabel}): net_change=${gateValidation.net_change.toFixed(3)}`,
         );
         recordEvidence({
           timestamp: new Date().toISOString(),
@@ -938,7 +1075,7 @@ export async function evolve(
           stage: "rejected",
           rationale: lastProposal.rationale,
           confidence: lastProposal.confidence,
-          details: `Gate validation failed (${options.gateModel}): net_change=${gateValidation.net_change.toFixed(3)}`,
+          details: `${gatePrefix} failed (${gateLabel}): net_change=${gateValidation.net_change.toFixed(3)}`,
           validation: {
             improved: gateValidation.improved,
             before_pass_rate: gateValidation.before_pass_rate,
@@ -955,7 +1092,7 @@ export async function evolve(
           validation: lastValidation,
           deployed: false,
           auditEntries,
-          reason: `Gate validation failed (${options.gateModel}): net_change=${gateValidation.net_change.toFixed(3)}`,
+          reason: `${gatePrefix} failed (${gateLabel}): net_change=${gateValidation.net_change.toFixed(3)}`,
           gateValidation,
           ...(baselineResult ? { baselineResult } : {}),
         });
@@ -964,7 +1101,7 @@ export async function evolve(
       recordAudit(
         lastProposal.proposal_id,
         "validated",
-        `Gate validation (${options.gateModel}): improved=${gateValidation.improved}, net_change=${gateValidation.net_change.toFixed(3)}`,
+        `${gatePrefix} (${gateLabel}): improved=${gateValidation.improved}, net_change=${gateValidation.net_change.toFixed(3)}`,
       );
     }
@@ -1082,7 +1219,7 @@ export async function cliMain(): Promise<void> {
       "dry-run": { type: "boolean", default: false },
       confidence: { type: "string", default: "0.6" },
       "max-iterations": { type: "string", default: "3" },
-      pareto: { type: "boolean", default: false },
+      pareto: { type: "boolean", default: true },
       candidates: { type: "string", default: "3" },
       "token-efficiency": { type: "boolean", default: false },
       "with-baseline": { type: "boolean", default: false },
@@ -1090,7 +1227,9 @@ export async function cliMain(): Promise<void> {
       "cheap-loop": { type: "boolean", default: true },
       "full-model": { type: "boolean", default: false },
       "gate-model": { type: "string" },
+      "gate-effort": { type: "string" },
       "proposal-model": { type: "string" },
+      "adaptive-gate": { type: "boolean", default: false },
       "sync-first": { type: "boolean", default: false },
       "sync-force": { type: "boolean", default: false },
       verbose: { type: "boolean", default: false },
@@ -1121,6 +1260,8 @@ Options:
   --cheap-loop        Use cheap models for loop, expensive for gate (default: on)
   --full-model        Use same model for all stages (disables cheap-loop)
   --gate-model        Model for final gate validation (default: sonnet)
+  --gate-effort       Thinking effort for final gate (low|medium|high|max)
+  --adaptive-gate     Escalate risky gate checks to opus + high effort
   --proposal-model    Model for proposal generation LLM calls
   --sync-first        Refresh source-truth telemetry before building evals/failure patterns
   --sync-force        Force a full rescan during --sync-first
@@ -1143,6 +1284,24 @@ Options:
       "Add --sync-first when using --sync-force",
     );
   }
+  if (values["gate-effort"] && !["low", "medium", "high", "max"].includes(values["gate-effort"])) {
+    throw new CLIError(
+      `Invalid --gate-effort value: ${values["gate-effort"]}`,
+      "INVALID_FLAG",
+      "Use one of: low, medium, high, max",
+    );
+  }
+  if (
+    (values["gate-effort"] || values["adaptive-gate"]) &&
+    (values["full-model"] ?? false) &&
+    !values["gate-model"]
+  ) {
+    throw new CLIError(
+      "--gate-effort and --adaptive-gate require --gate-model when --full-model is set",
+      "INVALID_FLAG",
+      "Add --gate-model <model> or drop --full-model",
+    );
+  }
   const { detectAgent } = await import("../utils/llm-call.js");
   const requestedAgent = values.agent;
@@ -1223,6 +1382,8 @@ Options:
     console.error(`[verbose] Dry run: ${values["dry-run"] ?? false}`);
     console.error(`[verbose] Sync first: ${values["sync-first"] ?? false}`);
     console.error(`[verbose] Sync force: ${values["sync-force"] ?? false}`);
+    console.error(`[verbose] Adaptive gate: ${values["adaptive-gate"] ?? false}`);
+    console.error(`[verbose] Gate effort: ${values["gate-effort"] ?? "(default)"}`);
   }
   const result = await evolve({
@@ -1241,7 +1402,9 @@ Options:
     validationModel: values["validation-model"],
     cheapLoop: (values["cheap-loop"] ?? true) && !(values["full-model"] ?? false),
     gateModel: values["gate-model"],
+    gateEffort: values["gate-effort"] as EffortLevel | undefined,
     proposalModel: values["proposal-model"],
+    adaptiveGate: values["adaptive-gate"] ?? false,
     gradingResults,
     syncFirst: values["sync-first"] ?? false,
     syncForce: values["sync-force"] ?? false,