npm - selftune - Versions diffs - 0.2.30 → 0.2.32 - Mend

selftune 0.2.30 → 0.2.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

package/README.md +83 -56
package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/command-surface.ts +613 -2
package/cli/selftune/create/baseline.ts +429 -0
package/cli/selftune/create/check.ts +35 -0
package/cli/selftune/create/init.ts +115 -0
package/cli/selftune/create/package-candidate-state.ts +771 -0
package/cli/selftune/create/package-evaluator.ts +710 -0
package/cli/selftune/create/package-fingerprint.ts +142 -0
package/cli/selftune/create/package-search.ts +377 -0
package/cli/selftune/create/publish.ts +431 -0
package/cli/selftune/create/readiness.ts +495 -0
package/cli/selftune/create/replay.ts +330 -0
package/cli/selftune/create/report.ts +74 -0
package/cli/selftune/create/scaffold.ts +121 -0
package/cli/selftune/create/skills-ref-adapter.ts +177 -0
package/cli/selftune/create/status.ts +33 -0
package/cli/selftune/create/templates.ts +249 -0
package/cli/selftune/cron/setup.ts +1 -1
package/cli/selftune/dashboard-action-events.ts +4 -1
package/cli/selftune/dashboard-action-result.ts +789 -24
package/cli/selftune/dashboard-action-stream.ts +80 -0
package/cli/selftune/dashboard-contract.ts +146 -3
package/cli/selftune/dashboard-server.ts +5 -4
package/cli/selftune/eval/hooks-to-evals.ts +58 -35
package/cli/selftune/eval/synthetic-evals.ts +145 -17
package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
package/cli/selftune/evolution/evolve-body.ts +9 -36
package/cli/selftune/evolution/evolve.ts +8 -72
package/cli/selftune/evolution/stopping-criteria.ts +5 -13
package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
package/cli/selftune/evolution/validate-host-replay.ts +115 -15
package/cli/selftune/improve.ts +206 -0
package/cli/selftune/index.ts +123 -6
package/cli/selftune/init.ts +1 -1
package/cli/selftune/localdb/queries/dashboard.ts +30 -0
package/cli/selftune/localdb/schema.ts +52 -0
package/cli/selftune/monitoring/watch.ts +257 -23
package/cli/selftune/orchestrate/execute.ts +300 -1
package/cli/selftune/orchestrate/finalize.ts +14 -0
package/cli/selftune/orchestrate/plan.ts +22 -5
package/cli/selftune/orchestrate/prepare.ts +59 -4
package/cli/selftune/orchestrate/report.ts +1 -1
package/cli/selftune/orchestrate.ts +34 -1
package/cli/selftune/publish.ts +35 -0
package/cli/selftune/registry/github-install.ts +256 -0
package/cli/selftune/registry/index.ts +1 -1
package/cli/selftune/registry/install.ts +58 -7
package/cli/selftune/routes/actions.ts +81 -15
package/cli/selftune/routes/overview.ts +1 -1
package/cli/selftune/routes/skill-report.ts +147 -2
package/cli/selftune/run.ts +18 -0
package/cli/selftune/schedule.ts +3 -3
package/cli/selftune/search-run.ts +703 -0
package/cli/selftune/status.ts +35 -11
package/cli/selftune/testing-readiness.ts +431 -40
package/cli/selftune/types.ts +316 -0
package/cli/selftune/utils/eval-readiness.ts +1 -0
package/cli/selftune/utils/json-output.ts +11 -0
package/cli/selftune/utils/lifecycle-surface.ts +48 -0
package/cli/selftune/utils/query-filter.ts +82 -1
package/cli/selftune/utils/tui.ts +85 -2
package/cli/selftune/verify.ts +205 -0
package/cli/selftune/workflows/proposals.ts +1 -1
package/cli/selftune/workflows/skill-scaffold.ts +141 -63
package/cli/selftune/workflows/workflows.ts +4 -4
package/package.json +1 -1
package/packages/dashboard-core/src/routes/manifest.ts +2 -2
package/packages/ui/src/components/SkillReportPanels.tsx +7 -7
package/packages/ui/src/primitives/button.tsx +5 -0
package/skill/SKILL.md +148 -85
package/skill/references/cli-quick-reference.md +16 -1
package/skill/references/creator-playbook.md +31 -10
package/skill/workflows/Baseline.md +8 -9
package/skill/workflows/Contributions.md +4 -4
package/skill/workflows/Create.md +173 -0
package/skill/workflows/CreateTestDeploy.md +34 -30
package/skill/workflows/Cron.md +2 -2
package/skill/workflows/Dashboard.md +3 -3
package/skill/workflows/Evals.md +13 -7
package/skill/workflows/Evolve.md +75 -32
package/skill/workflows/EvolveBody.md +22 -15
package/skill/workflows/Hook.md +1 -1
package/skill/workflows/Improve.md +168 -0
package/skill/workflows/Initialize.md +3 -3
package/skill/workflows/Orchestrate.md +49 -12
package/skill/workflows/Publish.md +100 -0
package/skill/workflows/Registry.md +19 -13
package/skill/workflows/Run.md +72 -0
package/skill/workflows/Schedule.md +2 -2
package/skill/workflows/SearchRun.md +89 -0
package/skill/workflows/SignalsDashboard.md +2 -2
package/skill/workflows/UnitTest.md +13 -4
package/skill/workflows/Verify.md +136 -0
package/skill/workflows/Watch.md +114 -47
package/skill/workflows/Workflows.md +13 -8
package/apps/local-dashboard/dist/assets/index-BcXquWFB.css +0 -1
package/apps/local-dashboard/dist/assets/index-Coq42hE4.js +0 -15
package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1

package/cli/selftune/dashboard-action-stream.ts CHANGED Viewed

@@ -63,6 +63,70 @@ function detectDashboardAction(argv: string[]): {
     };
   }
+  if (command === "create" && subcommand === "replay") {
+    return {
+      action: "replay-dry-run",
+      skillName: null,
+      skillPath: readFlagValue(argv, "--skill-path"),
+    };
+  }
+  if (command === "create" && subcommand === "check") {
+    return {
+      action: "create-check",
+      skillName: null,
+      skillPath: readFlagValue(argv, "--skill-path"),
+    };
+  }
+  if (command === "create" && subcommand === "baseline") {
+    return {
+      action: "measure-baseline",
+      skillName: null,
+      skillPath: readFlagValue(argv, "--skill-path"),
+    };
+  }
+  if (command === "create" && subcommand === "report") {
+    return {
+      action: "report-package",
+      skillName: null,
+      skillPath: readFlagValue(argv, "--skill-path"),
+    };
+  }
+  if (command === "create" && subcommand === "publish") {
+    return {
+      action: hasFlag(argv, "--watch") ? "watch" : "deploy-candidate",
+      skillName: null,
+      skillPath: readFlagValue(argv, "--skill-path"),
+    };
+  }
+  if (command === "verify") {
+    return {
+      action: "report-package",
+      skillName: null,
+      skillPath: readFlagValue(argv, "--skill-path"),
+    };
+  }
+  if (command === "publish") {
+    return {
+      action: hasFlag(argv, "--no-watch") ? "deploy-candidate" : "watch",
+      skillName: null,
+      skillPath: readFlagValue(argv, "--skill-path"),
+    };
+  }
+  if (command === "search-run") {
+    return {
+      action: "search-run",
+      skillName: readFlagValue(argv, "--skill"),
+      skillPath: readFlagValue(argv, "--skill-path"),
+    };
+  }
   if (command === "orchestrate") {
     return {
       action: "orchestrate",
@@ -71,6 +135,14 @@ function detectDashboardAction(argv: string[]): {
     };
   }
+  if (command === "run") {
+    return {
+      action: "orchestrate",
+      skillName: null,
+      skillPath: null,
+    };
+  }
   if (command === "evolve" && subcommand === "rollback") {
     return {
       action: "rollback",
@@ -87,6 +159,14 @@ function detectDashboardAction(argv: string[]): {
     };
   }
+  if (command === "improve") {
+    return {
+      action: hasFlag(argv, "--dry-run") ? "replay-dry-run" : "deploy-candidate",
+      skillName: readFlagValue(argv, "--skill"),
+      skillPath: readFlagValue(argv, "--skill-path"),
+    };
+  }
   return null;
 }

package/cli/selftune/dashboard-contract.ts CHANGED Viewed

@@ -1,3 +1,17 @@
+import type {
+  CreatePackageBodySummary,
+  CreatePackageCandidateAcceptanceDecision,
+  CreateCheckReadiness,
+  CreatePackageEvaluationEfficiencySummary,
+  CreatePackageEvaluationEvidenceSummary,
+  CreatePackageEvaluationGradingSummary,
+  CreatePackageEvaluationSource,
+  CreatePackageReplaySummary,
+  CreatePackageEvaluationStatus,
+  CreatePackageEvaluationUnitTestSummary,
+  CreatePackageEvaluationWatchSummary,
+} from "./types.js";
 // -- Cursor-based pagination types -------------------------------------------
 export interface PaginationCursor {
@@ -151,6 +165,7 @@ export interface SkillSummary {
   routing_confidence: number | null;
   confidence_coverage: number;
   testing_readiness?: SkillTestingReadiness;
+  create_readiness?: CreateCheckReadiness;
 }
 // -- Autonomy-first overview types -------------------------------------------
@@ -346,6 +361,9 @@ export interface SkillTestingReadiness {
   baseline_sample_size: number;
   baseline_pass_rate: number | null;
   latest_baseline_at: string | null;
+  package_evaluation_status?: CreatePackageEvaluationStatus | null;
+  package_evaluation_passed?: boolean | null;
+  latest_package_evaluation_at?: string | null;
   deployment_readiness: DeploymentReadiness;
   deployment_summary: string;
   deployment_command: string | null;
@@ -354,6 +372,8 @@ export interface SkillTestingReadiness {
 }
 export type DashboardActionName =
+  | "create-check"
+  | "report-package"
   | "generate-evals"
   | "generate-unit-tests"
   | "replay-dry-run"
@@ -361,7 +381,8 @@ export type DashboardActionName =
   | "deploy-candidate"
   | "watch"
   | "orchestrate"
-  | "rollback";
+  | "rollback"
+  | "search-run";
 export type DashboardActionEventStage =
   | "started"
@@ -376,9 +397,49 @@ export interface DashboardActionResultSummary {
   improved: boolean | null;
   deployed: boolean | null;
   before_pass_rate: number | null;
+  before_label?: string | null;
   after_pass_rate: number | null;
+  after_label?: string | null;
   net_change: number | null;
+  net_change_label?: string | null;
   validation_mode: string | null;
+  validation_label?: string | null;
+  recommended_command?: string | null;
+  package_evaluation_source?: CreatePackageEvaluationSource | null;
+  package_candidate_id?: string | null;
+  package_parent_candidate_id?: string | null;
+  package_candidate_generation?: number | null;
+  package_candidate_acceptance_decision?: CreatePackageCandidateAcceptanceDecision | null;
+  package_candidate_acceptance_rationale?: string | null;
+  package_evidence?: CreatePackageEvaluationEvidenceSummary | null;
+  package_efficiency?: CreatePackageEvaluationEfficiencySummary | null;
+  package_routing?: CreatePackageReplaySummary | null;
+  package_body?: CreatePackageBodySummary | null;
+  package_grading?: CreatePackageEvaluationGradingSummary | null;
+  package_unit_tests?: CreatePackageEvaluationUnitTestSummary | null;
+  package_watch?: CreatePackageEvaluationWatchSummary | null;
+  /** Search run provenance — populated only for search-run actions. */
+  search_run?: DashboardSearchRunSummary | null;
+  /** Whether the watch gate passed for publish actions (null for non-publish actions). */
+  watch_gate_passed?: boolean | null;
+}
+/** Compact search run result surfaced in the action result summary. */
+export interface DashboardSearchRunSummary {
+  search_id: string;
+  parent_candidate_id: string | null;
+  winner_candidate_id: string | null;
+  winner_rationale: string | null;
+  candidates_evaluated: number;
+  frontier_size: number;
+  parent_selection_method: string;
+  surface_plan?: {
+    routing_count: number;
+    body_count: number;
+    weakness_source: string;
+    routing_weakness: number | null;
+    body_weakness: number | null;
+  } | null;
 }
 export interface DashboardActionMetrics {
@@ -424,9 +485,21 @@ export interface DashboardActionEvent {
   progress?: DashboardActionProgress | null;
 }
+export type CreatorOverviewStep =
+  | "run_create_check"
+  | "finish_package"
+  | "generate_evals"
+  | "run_unit_tests"
+  | "run_replay_dry_run"
+  | "measure_baseline"
+  | "deploy_candidate"
+  | "watch_deployment";
 export interface CreatorTestingOverview {
   summary: string;
   counts: {
+    run_create_check: number;
+    finish_package: number;
     generate_evals: number;
     run_unit_tests: number;
     run_replay_dry_run: number;
@@ -436,7 +509,7 @@ export interface CreatorTestingOverview {
   };
   priorities: Array<{
     skill_name: string;
-    next_step: CreatorLoopNextStep;
+    step: CreatorOverviewStep;
     summary: string;
     recommended_command: string;
   }>;
@@ -446,7 +519,7 @@ export interface CreatorTestingOverview {
 export interface OrchestrateRunSkillAction {
   skill: string;
-  action: "evolve" | "watch" | "skip";
+  action: "evolve" | "package-search" | "watch" | "skip";
   reason: string;
   deployed?: boolean;
   rolledBack?: boolean;
@@ -468,6 +541,8 @@ export interface OrchestrateRunReport {
   watched: number;
   skipped: number;
   auto_graded?: number;
+  package_searched?: number;
+  package_improved?: number;
   skill_actions: OrchestrateRunSkillAction[];
 }
@@ -558,6 +633,69 @@ export interface ReplayEntryResult {
   evidence: string | null;
 }
+// -- Package search / frontier types (bounded package evolution) ---------------
+/**
+ * Dashboard-facing view of a package search run result.
+ * References `PackageSearchRunResult` from types.ts — does not redefine search
+ * semantics, only surfaces what the search runner provides.
+ */
+export interface DashboardSearchRunView {
+  search_id: string;
+  skill_name: string;
+  parent_candidate_id: string | null;
+  candidates_evaluated: number;
+  winner_candidate_id: string | null;
+  winner_rationale: string | null;
+  started_at: string;
+  completed_at: string;
+  provenance: DashboardSearchProvenance;
+}
+/** Provenance detail surfaced in the dashboard for a search run. */
+export interface DashboardSearchProvenance {
+  frontier_size: number;
+  parent_selection_method: string;
+  candidate_fingerprints: string[];
+  surface_plan?: {
+    routing_count: number;
+    body_count: number;
+    weakness_source: string;
+    routing_weakness: number | null;
+    body_weakness: number | null;
+  } | null;
+  evaluation_summaries: Array<{
+    candidate_id: string;
+    decision: string;
+    rationale: string;
+  }>;
+}
+/** A frontier member shown in the skill report's frontier state panel. */
+export interface DashboardFrontierMember {
+  candidate_id: string;
+  skill_name: string;
+  fingerprint: string;
+  decision: "accepted" | "rejected" | "pending";
+  measured_delta: number | null;
+  created_at: string;
+  parent_candidate_id: string | null;
+  /** True when this candidate was demoted by watch-fed evidence. */
+  watch_demoted?: boolean;
+  /** Evidence rank within the accepted frontier (1 = best). */
+  evidence_rank?: number | null;
+}
+/** Frontier state summary surfaced in the skill report. */
+export interface DashboardFrontierState {
+  skill_name: string;
+  accepted_count: number;
+  rejected_count: number;
+  pending_count: number;
+  members: DashboardFrontierMember[];
+  latest_search_run: DashboardSearchRunView | null;
+}
 // -- Doctor / health check types ----------------------------------------------
 export type { DoctorResult, HealthCheck, HealthStatus } from "./types.js";
@@ -693,6 +831,8 @@ export interface TrustFields {
 }
 export interface SkillReportResponse extends SkillReportPayload, TrustFields {
+  /** Watch trust score (0-1) from the most recent watch cycle, null if never watched. */
+  watch_trust_score: number | null;
   evolution: EvolutionEntry[];
   pending_proposals: PendingProposal[];
   token_usage: {
@@ -727,4 +867,7 @@ export interface SkillReportResponse extends SkillReportPayload, TrustFields {
     };
   } | null;
   testing_readiness?: SkillTestingReadiness;
+  create_readiness?: CreateCheckReadiness;
+  /** Package frontier state — populated when bounded package evolution data exists. */
+  frontier_state?: DashboardFrontierState | null;
 }

package/cli/selftune/dashboard-server.ts CHANGED Viewed

@@ -10,9 +10,10 @@
  *   GET  /api/v2/overview      — SQLite-backed overview payload
  *   GET  /api/v2/analytics     — Performance analytics (trends, rankings, heatmap)
  *   GET  /api/v2/skills/:name  — SQLite-backed per-skill report
- *   POST /api/actions/watch    — Trigger `selftune watch` for a skill
- *   POST /api/actions/evolve   — Trigger `selftune evolve` for a skill
- *   POST /api/actions/rollback — Trigger `selftune rollback` for a skill
+ *   POST /api/actions/create-check — Trigger `selftune create check` for a draft package
+ *   POST /api/actions/watch        — Trigger `selftune watch` for a skill
+ *   POST /api/actions/evolve       — Trigger `selftune evolve` for a skill
+ *   POST /api/actions/rollback     — Trigger `selftune rollback` for a skill
  *   POST /api/actions/watchlist — Persist creator watchlist preferences
  *   GET  /badge/:name          — Skill health badge
  *   GET  /report/:name         — Skill health report HTML
@@ -676,7 +677,7 @@ export async function startDashboardServer(options?: DashboardServerOptions): Pr
         return serveSpaShell(spaDir);
       }
-      // ---- POST /api/actions/{watch,evolve,rollback,watchlist} ----
+      // ---- POST /api/actions/{create-check,watch,evolve,rollback,watchlist} ----
       if (url.pathname.startsWith("/api/actions/") && req.method === "POST") {
         const trustedActionOrigins = allowedDashboardOrigins(hostname, boundPort);
         const origin = req.headers.get("origin");

package/cli/selftune/eval/hooks-to-evals.ts CHANGED Viewed

@@ -43,8 +43,10 @@ import type {
   SkillUsageRecord,
 } from "../types.js";
 import { CLIError, handleCLIError } from "../utils/cli-error.js";
-import { detectLlmAgent } from "../utils/llm-call.js";
+import { MIN_LOG_READY_POSITIVES } from "../utils/eval-readiness.js";
+import { detectLlmAgent, isLlmBackedAgent } from "../utils/llm-call.js";
 import {
+  extractPositiveEvalQueryText,
   filterActionableQueryRecords,
   filterActionableSkillUsageRecords,
 } from "../utils/query-filter.js";
@@ -63,6 +65,36 @@ import { writeCanonicalEvalSet } from "../testing-readiness.js";
 export { classifyInvocation } from "./invocation-classifier.js";
+function resolveEvalGenerateAgent(requestedAgent?: string | null): string {
+  if (requestedAgent) {
+    if (!isLlmBackedAgent(requestedAgent)) {
+      throw new CLIError(
+        `Unsupported --agent value "${requestedAgent}".`,
+        "INVALID_FLAG",
+        "Use claude, codex, opencode, or pi.",
+      );
+    }
+    if (!Bun.which(requestedAgent)) {
+      throw new CLIError(
+        `Agent CLI '${requestedAgent}' not found in PATH`,
+        "AGENT_NOT_FOUND",
+        "Install it or omit --agent to use auto-detection",
+      );
+    }
+    return requestedAgent;
+  }
+  const detected = detectLlmAgent();
+  if (!detected) {
+    throw new CLIError(
+      "No agent CLI found (claude/codex/opencode/pi)",
+      "AGENT_NOT_FOUND",
+      "Install one of the supported agent CLIs",
+    );
+  }
+  return detected;
+}
 // ---------------------------------------------------------------------------
 // Query truncation
 // ---------------------------------------------------------------------------
@@ -97,8 +129,8 @@ export function buildEvalSet(
   for (const r of actionableSkillRecords) {
     if (!r || typeof r.skill_name !== "string" || typeof r.query !== "string") continue;
     if (isHighConfidencePositiveSkillRecord(r, skillName)) {
-      const q = (r.query ?? "").trim();
-      if (q && q !== "(query not found)") {
+      const q = extractPositiveEvalQueryText(r.query, skillName);
+      if (q) {
         positiveQueries.add(q);
       }
     }
@@ -110,8 +142,8 @@ export function buildEvalSet(
   for (const r of actionableSkillRecords) {
     if (!r || typeof r.skill_name !== "string" || typeof r.query !== "string") continue;
     if (!isHighConfidencePositiveSkillRecord(r, skillName)) continue;
-    const q = (r.query ?? "").trim();
-    if (!q || q === "(query not found)" || seen.has(q)) continue;
+    const q = extractPositiveEvalQueryText(r.query, skillName);
+    if (!q || seen.has(q)) continue;
     seen.add(q);
     const entry: EvalEntry = {
       query: truncateQuery(q),
@@ -331,6 +363,7 @@ export function listEvalSkillReadiness(
     if (r.session_id) rawSessionCounts.get(name)?.add(r.session_id);
     if (!isHighConfidencePositiveSkillRecord(r, name)) continue;
+    if (!extractPositiveEvalQueryText(r.query ?? "", name)) continue;
     trustedTriggerCounts.set(name, (trustedTriggerCounts.get(name) ?? 0) + 1);
     if (!trustedSessionCounts.has(name)) trustedSessionCounts.set(name, new Set<string>());
     if (r.session_id) trustedSessionCounts.get(name)?.add(r.session_id);
@@ -354,7 +387,11 @@ export function listEvalSkillReadiness(
         installed,
         skill_path: installed ? findInstalledSkillPath(name, searchDirs) : undefined,
         readiness:
-          trustedTriggerCount > 0 ? "log_ready" : installed ? "cold_start_ready" : "telemetry_only",
+          trustedTriggerCount >= MIN_LOG_READY_POSITIVES
+            ? "log_ready"
+            : installed
+              ? "cold_start_ready"
+              : "telemetry_only",
       } satisfies EvalSkillReadiness;
     });
 }
@@ -392,9 +429,9 @@ export function listSkills(
     }
     console.log("");
     console.log("Legend:");
-    console.log("  log-ready    real triggers exist; run eval generate normally");
+    console.log("  log-ready    enough clean real triggers exist; run eval generate normally");
     console.log(
-      "  cold-start   installed locally but no trusted triggers yet; use --auto-synthetic",
+      "  cold-start   installed locally but not enough clean trusted triggers yet; use --auto-synthetic",
     );
     console.log("  telemetry-only  trigger data exists but local SKILL.md was not found");
   } else {
@@ -566,6 +603,7 @@ export async function cliMain(): Promise<void> {
       skill: { type: "string" },
       output: { type: "string" },
       out: { type: "string" },
+      agent: { type: "string" },
       max: { type: "string", default: "50" },
       seed: { type: "string", default: "42" },
       "list-skills": { type: "boolean", default: false },
@@ -607,14 +645,7 @@ export async function cliMain(): Promise<void> {
       );
     }
-    const agent = detectLlmAgent();
-    if (!agent) {
-      throw new CLIError(
-        "No agent CLI found (claude/codex/opencode/pi)",
-        "AGENT_NOT_FOUND",
-        "Install one of the supported agent CLIs",
-      );
-    }
+    const agent = resolveEvalGenerateAgent(values.agent);
     const maxPerSide = Number.parseInt(values.max ?? "50", 10);
     const effectiveMax = Number.isNaN(maxPerSide) || maxPerSide <= 0 ? 50 : maxPerSide;
@@ -781,24 +812,17 @@ export async function cliMain(): Promise<void> {
   });
   const positiveCount = evalSet.filter((entry) => entry.should_trigger).length;
-  if (positiveCount === 0 && values["auto-synthetic"]) {
+  if (positiveCount < MIN_LOG_READY_POSITIVES && values["auto-synthetic"]) {
     const skillPath = values["skill-path"] ?? detectedSkillPath;
     if (!skillPath) {
       throw new CLIError(
-        `No trusted triggers found for '${values.skill}', and no SKILL.md path could be resolved for synthetic fallback.`,
+        `Not enough clean trusted triggers found for '${values.skill}', and no SKILL.md path could be resolved for synthetic fallback.`,
         "FILE_NOT_FOUND",
         `Run 'selftune eval generate --list-skills' or rerun with --skill-path /path/to/SKILL.md`,
       );
     }
-    const agent = detectLlmAgent();
-    if (!agent) {
-      throw new CLIError(
-        "No agent CLI found (claude/codex/opencode/pi)",
-        "AGENT_NOT_FOUND",
-        "Install one of the supported agent CLIs",
-      );
-    }
+    const agent = resolveEvalGenerateAgent(values.agent);
     emitDashboardStepProgress({
       current: 1,
@@ -808,7 +832,7 @@ export async function cliMain(): Promise<void> {
       label: "Load skill content",
     });
     console.log(
-      `No trusted triggers found for '${values.skill}'. Falling back to synthetic cold-start eval generation...`,
+      `Only ${positiveCount} clean trusted positive eval candidate(s) found for '${values.skill}'. Falling back to synthetic cold-start eval generation...`,
     );
     const effectiveMax = Number.isNaN(maxPerSide) || maxPerSide <= 0 ? 50 : maxPerSide;
     const syntheticEvalSet = await generateSyntheticEvals(skillPath, values.skill, agent, {
@@ -860,6 +884,12 @@ export async function cliMain(): Promise<void> {
     return;
   }
+  if (positiveCount > 0 && positiveCount < MIN_LOG_READY_POSITIVES) {
+    console.warn(
+      `[WARN] Only ${positiveCount} clean positive eval candidate(s) were found for '${values.skill}'. The log-derived eval set may be low-confidence. Consider rerunning with --auto-synthetic or --blend.`,
+    );
+  }
   // --- Blend mode: merge log-based evals with synthetic gap-fillers ---
   let finalEvalSet = evalSet;
   if (values.blend) {
@@ -872,14 +902,7 @@ export async function cliMain(): Promise<void> {
       );
     }
-    const agent = detectLlmAgent();
-    if (!agent) {
-      throw new CLIError(
-        "No agent CLI found (claude/codex/opencode/pi)",
-        "AGENT_NOT_FOUND",
-        "Install one of the supported agent CLIs",
-      );
-    }
+    const agent = resolveEvalGenerateAgent(values.agent);
     // Fail fast before expensive LLM calls — blending with zero logs always produces []
     if (evalSet.length === 0) {