npm - selftune - Versions diffs - 0.2.22 → 0.2.24 - Mend

selftune 0.2.22 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (270) hide show

package/CHANGELOG.md +6 -0
package/README.md +95 -15
package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
package/apps/local-dashboard/dist/index.html +5 -5
package/cli/selftune/adapters/codex/install.ts +310 -78
package/cli/selftune/adapters/opencode/install.ts +3 -4
package/cli/selftune/adapters/pi/hook.ts +273 -0
package/cli/selftune/adapters/pi/install.ts +207 -0
package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
package/cli/selftune/auto-update.ts +200 -8
package/cli/selftune/canonical-export.ts +55 -25
package/cli/selftune/command-surface.ts +397 -0
package/cli/selftune/constants.ts +10 -1
package/cli/selftune/contribute/contribute.ts +64 -13
package/cli/selftune/contribution-config.ts +57 -3
package/cli/selftune/contribution-preferences.ts +117 -0
package/cli/selftune/contribution-signals.ts +8 -4
package/cli/selftune/contribution-staging.ts +13 -2
package/cli/selftune/contributions.ts +55 -121
package/cli/selftune/creator-contributions.ts +29 -10
package/cli/selftune/cron/setup.ts +7 -3
package/cli/selftune/dashboard-contract.ts +87 -0
package/cli/selftune/dashboard-server.ts +168 -17
package/cli/selftune/dashboard.ts +350 -17
package/cli/selftune/eval/baseline.ts +21 -5
package/cli/selftune/eval/execution-eval.ts +170 -0
package/cli/selftune/eval/family-overlap.ts +2 -2
package/cli/selftune/eval/hooks-to-evals.ts +228 -82
package/cli/selftune/eval/import-skillsbench.ts +2 -2
package/cli/selftune/eval/invocation-classifier.ts +56 -0
package/cli/selftune/eval/synthetic-evals.ts +5 -3
package/cli/selftune/eval/unit-test-cli.ts +7 -4
package/cli/selftune/evolution/apply-proposal.ts +295 -0
package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
package/cli/selftune/evolution/evidence.ts +2 -6
package/cli/selftune/evolution/evolve-body.ts +152 -38
package/cli/selftune/evolution/evolve.ts +244 -52
package/cli/selftune/evolution/rollback.ts +0 -1
package/cli/selftune/evolution/validate-body.ts +111 -49
package/cli/selftune/evolution/validate-host-replay.ts +510 -60
package/cli/selftune/evolution/validate-proposal.ts +11 -150
package/cli/selftune/evolution/validate-routing.ts +51 -108
package/cli/selftune/evolution/validation-contract.ts +91 -0
package/cli/selftune/grading/auto-grade.ts +11 -7
package/cli/selftune/grading/grade-session.ts +10 -16
package/cli/selftune/hooks/skill-eval.ts +2 -1
package/cli/selftune/hooks-shared/types.ts +1 -0
package/cli/selftune/index.ts +58 -15
package/cli/selftune/ingestors/claude-replay.ts +15 -10
package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
package/cli/selftune/ingestors/pi-ingest.ts +727 -0
package/cli/selftune/init.ts +38 -4
package/cli/selftune/localdb/direct-write.ts +120 -1
package/cli/selftune/localdb/materialize.ts +6 -7
package/cli/selftune/localdb/queries/cron.ts +34 -0
package/cli/selftune/localdb/queries/dashboard.ts +834 -0
package/cli/selftune/localdb/queries/evolution.ts +158 -0
package/cli/selftune/localdb/queries/execution.ts +133 -0
package/cli/selftune/localdb/queries/json.ts +18 -0
package/cli/selftune/localdb/queries/monitoring.ts +263 -0
package/cli/selftune/localdb/queries/raw.ts +95 -0
package/cli/selftune/localdb/queries/staging.ts +270 -0
package/cli/selftune/localdb/queries/trust.ts +392 -0
package/cli/selftune/localdb/queries.ts +60 -2162
package/cli/selftune/localdb/schema.ts +59 -0
package/cli/selftune/monitoring/watch.ts +96 -29
package/cli/selftune/normalization.ts +3 -0
package/cli/selftune/observability.ts +12 -3
package/cli/selftune/orchestrate/cli.ts +161 -0
package/cli/selftune/orchestrate/execute.ts +295 -0
package/cli/selftune/orchestrate/finalize.ts +157 -0
package/cli/selftune/orchestrate/locks.ts +40 -0
package/cli/selftune/orchestrate/plan.ts +131 -0
package/cli/selftune/orchestrate/post-run.ts +59 -0
package/cli/selftune/orchestrate/prepare.ts +334 -0
package/cli/selftune/orchestrate/report.ts +182 -0
package/cli/selftune/orchestrate/runtime.ts +120 -0
package/cli/selftune/orchestrate/signals.ts +48 -0
package/cli/selftune/orchestrate.ts +162 -1142
package/cli/selftune/registry/client.ts +74 -0
package/cli/selftune/registry/history.ts +54 -0
package/cli/selftune/registry/index.ts +90 -0
package/cli/selftune/registry/install.ts +141 -0
package/cli/selftune/registry/list.ts +44 -0
package/cli/selftune/registry/push.ts +171 -0
package/cli/selftune/registry/rollback.ts +49 -0
package/cli/selftune/registry/status.ts +62 -0
package/cli/selftune/registry/sync.ts +125 -0
package/cli/selftune/repair/skill-usage.ts +9 -3
package/cli/selftune/routes/overview.ts +5 -2
package/cli/selftune/routes/skill-report.ts +15 -2
package/cli/selftune/schedule.ts +5 -5
package/cli/selftune/status.ts +70 -2
package/cli/selftune/sync.ts +127 -23
package/cli/selftune/testing-readiness.ts +597 -0
package/cli/selftune/types.ts +46 -5
package/cli/selftune/uninstall.ts +2 -1
package/cli/selftune/utils/canonical-log.ts +1 -9
package/cli/selftune/utils/cli-error.ts +9 -0
package/cli/selftune/utils/jsonl.ts +1 -30
package/cli/selftune/utils/llm-call.ts +126 -6
package/cli/selftune/utils/skill-discovery.ts +24 -0
package/cli/selftune/workflows/proposals.ts +184 -0
package/cli/selftune/workflows/skill-scaffold.ts +241 -0
package/cli/selftune/workflows/workflows.ts +100 -26
package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
package/node_modules/@selftune/telemetry-contract/package.json +1 -1
package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
package/package.json +25 -9
package/packages/dashboard-core/AGENTS.md +18 -0
package/packages/dashboard-core/README.md +30 -0
package/packages/dashboard-core/index.ts +3 -0
package/packages/dashboard-core/package.json +39 -0
package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
package/packages/dashboard-core/src/chrome/index.ts +14 -0
package/packages/dashboard-core/src/chrome/types.ts +81 -0
package/packages/dashboard-core/src/chrome/utils.ts +23 -0
package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
package/packages/dashboard-core/src/gates/index.ts +3 -0
package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
package/packages/dashboard-core/src/host/adapter.ts +47 -0
package/packages/dashboard-core/src/host/capabilities.ts +55 -0
package/packages/dashboard-core/src/host/index.ts +3 -0
package/packages/dashboard-core/src/models/analytics.ts +39 -0
package/packages/dashboard-core/src/models/index.ts +4 -0
package/packages/dashboard-core/src/models/overview.ts +98 -0
package/packages/dashboard-core/src/models/runtime.ts +7 -0
package/packages/dashboard-core/src/models/skills.ts +34 -0
package/packages/dashboard-core/src/routes/index.ts +2 -0
package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
package/packages/dashboard-core/src/routes/manifest.ts +451 -0
package/packages/dashboard-core/src/routes/types.ts +39 -0
package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
package/packages/dashboard-core/src/screens/index.ts +37 -0
package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
package/packages/telemetry-contract/package.json +1 -1
package/packages/telemetry-contract/src/index.ts +1 -0
package/packages/telemetry-contract/src/schemas.ts +63 -5
package/packages/telemetry-contract/src/types.ts +97 -7
package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
package/packages/ui/AGENTS.md +16 -0
package/packages/ui/README.md +1 -1
package/packages/ui/package.json +1 -1
package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
package/packages/ui/src/components/InfoTip.tsx +1 -2
package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
package/packages/ui/src/components/OverviewPanels.tsx +693 -0
package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
package/packages/ui/src/components/index.ts +56 -1
package/packages/ui/src/components/section-cards.tsx +18 -35
package/packages/ui/src/components/skill-health-grid.tsx +47 -37
package/packages/ui/src/lib/constants.tsx +0 -1
package/packages/ui/src/primitives/card.tsx +1 -1
package/packages/ui/src/primitives/checkbox.tsx +1 -1
package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
package/packages/ui/src/primitives/select.tsx +2 -2
package/packages/ui/src/primitives/tabs.tsx +7 -6
package/packages/ui/src/types.ts +182 -4
package/skill/SKILL.md +130 -318
package/skill/agents/diagnosis-analyst.md +3 -3
package/skill/agents/evolution-reviewer.md +3 -3
package/skill/agents/integration-guide.md +3 -3
package/skill/agents/pattern-analyst.md +2 -2
package/skill/references/cli-quick-reference.md +89 -0
package/skill/references/creator-playbook.md +131 -0
package/skill/references/examples.md +48 -0
package/skill/references/troubleshooting.md +47 -0
package/skill/references/version-history.md +1 -1
package/skill/selftune.contribute.json +11 -0
package/skill/{Workflows → workflows}/Baseline.md +20 -1
package/skill/{Workflows → workflows}/Contribute.md +23 -10
package/skill/{Workflows → workflows}/Contributions.md +13 -5
package/skill/workflows/CreateTestDeploy.md +170 -0
package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
package/skill/{Workflows → workflows}/Cron.md +1 -1
package/skill/{Workflows → workflows}/Dashboard.md +20 -0
package/skill/{Workflows → workflows}/Doctor.md +1 -1
package/skill/{Workflows → workflows}/Evals.md +67 -2
package/skill/{Workflows → workflows}/Evolve.md +119 -30
package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
package/skill/{Workflows → workflows}/Grade.md +1 -1
package/skill/{Workflows → workflows}/Ingest.md +60 -2
package/skill/{Workflows → workflows}/Initialize.md +16 -9
package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
package/skill/workflows/Registry.md +99 -0
package/skill/{Workflows → workflows}/Schedule.md +3 -3
package/skill/workflows/SignalsDashboard.md +87 -0
package/skill/{Workflows → workflows}/Sync.md +3 -1
package/skill/{Workflows → workflows}/UnitTest.md +19 -0
package/skill/{Workflows → workflows}/Watch.md +42 -2
package/skill/{Workflows → workflows}/Workflows.md +39 -2
package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
package/cli/selftune/utils/html.ts +0 -27
package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
/package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
/package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
/package/skill/{Workflows → workflows}/Badge.md +0 -0
/package/skill/{Workflows → workflows}/Composability.md +0 -0
/package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
/package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
/package/skill/{Workflows → workflows}/Hook.md +0 -0
/package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
/package/skill/{Workflows → workflows}/Quickstart.md +0 -0
/package/skill/{Workflows → workflows}/Recover.md +0 -0
/package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
/package/skill/{Workflows → workflows}/Replay.md +0 -0
/package/skill/{Workflows → workflows}/Rollback.md +0 -0
/package/skill/{Workflows → workflows}/Telemetry.md +0 -0
/package/skill/{Workflows → workflows}/Uninstall.md +0 -0

package/cli/selftune/evolution/apply-proposal.ts ADDED Viewed

@@ -0,0 +1,295 @@
+/**
+ * apply-proposal.ts
+ *
+ * Fetches an approved contributor proposal from the cloud API, applies the
+ * proposed update to the local SKILL.md, and marks the proposal as applied.
+ *
+ * Usage:
+ *   selftune evolve apply-proposal --id <proposal-id> --skill-path <path>
+ */
+import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
+import { parseArgs } from "node:util";
+import { readAlphaIdentity } from "../alpha-identity.js";
+import { SELFTUNE_CONFIG_PATH } from "../constants.js";
+import { CLIError, handleCLIError } from "../utils/cli-error.js";
+import { replaceDescription } from "../utils/frontmatter.js";
+import { getSelftuneVersion } from "../utils/selftune-meta.js";
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+interface ProposalRecord {
+  id: string;
+  skill_id: string;
+  skill_name: string;
+  proposal_type: string;
+  current_value: string;
+  proposed_value: string;
+  reason: string | null;
+  pass_rate_before: number | null;
+  projected_pass_rate: number | null;
+  status: "pending" | "approved" | "rejected" | "applied";
+  proposed_by: string;
+  reviewed_by: string | null;
+  reviewed_at: string | null;
+  applied_at: string | null;
+  created_at: string;
+}
+// ---------------------------------------------------------------------------
+// Cloud API helpers (follows registry/client.ts pattern)
+// ---------------------------------------------------------------------------
+function getCloudConfig(): { apiUrl: string; apiKey: string } | null {
+  try {
+    const identity = readAlphaIdentity(SELFTUNE_CONFIG_PATH);
+    if (!identity?.api_key) return null;
+    const apiUrl = identity.cloud_api_url || "https://api.selftune.dev";
+    return { apiUrl, apiKey: identity.api_key };
+  } catch {
+    return null;
+  }
+}
+async function fetchProposal(
+  proposalId: string,
+  config: { apiUrl: string; apiKey: string },
+): Promise<ProposalRecord> {
+  const url = `${config.apiUrl}/api/v1/proposals/${encodeURIComponent(proposalId)}`;
+  const response = await fetch(url, {
+    method: "GET",
+    headers: {
+      Authorization: `Bearer ${config.apiKey}`,
+      "User-Agent": `selftune/${getSelftuneVersion()}`,
+      Accept: "application/json",
+    },
+    signal: AbortSignal.timeout(15_000),
+  });
+  if (!response.ok) {
+    const text = await response.text().catch(() => "unknown error");
+    if (response.status === 404) {
+      throw new CLIError(
+        `Proposal ${proposalId} not found.`,
+        "NOT_FOUND",
+        "Check the proposal ID and try again.",
+      );
+    }
+    throw new CLIError(
+      `Failed to fetch proposal: HTTP ${response.status}: ${text.slice(0, 200)}`,
+      "API_ERROR",
+      "Check your credentials and network connection.",
+    );
+  }
+  const body = (await response.json()) as { proposal: ProposalRecord };
+  return body.proposal;
+}
+async function markProposalApplied(
+  proposalId: string,
+  config: { apiUrl: string; apiKey: string },
+): Promise<boolean> {
+  const url = `${config.apiUrl}/api/v1/proposals/${encodeURIComponent(proposalId)}`;
+  try {
+    const response = await fetch(url, {
+      method: "PATCH",
+      headers: {
+        Authorization: `Bearer ${config.apiKey}`,
+        "User-Agent": `selftune/${getSelftuneVersion()}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({ status: "applied" }),
+      signal: AbortSignal.timeout(15_000),
+    });
+    if (!response.ok) {
+      const text = await response.text().catch(() => "unknown error");
+      console.error(
+        `Warning: Failed to mark proposal as applied: HTTP ${response.status}: ${text.slice(0, 200)}`,
+      );
+      return false;
+    }
+    return true;
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    console.error(`Warning: Failed to mark proposal as applied: ${message}`);
+    return false;
+  }
+}
+// ---------------------------------------------------------------------------
+// Apply logic
+// ---------------------------------------------------------------------------
+function applyProposalToSkill(skillPath: string, proposal: ProposalRecord): { backupPath: string } {
+  if (!existsSync(skillPath)) {
+    throw new CLIError(
+      `Skill file not found: ${skillPath}`,
+      "FILE_NOT_FOUND",
+      "Verify the --skill-path argument points to your SKILL.md.",
+    );
+  }
+  const content = readFileSync(skillPath, "utf-8");
+  // Back up before modifying
+  const backupPath = `${skillPath}.bak`;
+  copyFileSync(skillPath, backupPath);
+  let updated: string;
+  if (proposal.proposal_type === "description") {
+    updated = replaceDescription(content, proposal.proposed_value);
+  } else if (proposal.proposal_type === "body") {
+    const lines = content.split("\n");
+    let endIdx = -1;
+    if (lines[0]?.trim() === "---") {
+      for (let i = 1; i < lines.length; i++) {
+        if (lines[i].trim() === "---") {
+          endIdx = i;
+          break;
+        }
+      }
+    }
+    if (endIdx >= 0) {
+      updated = lines.slice(0, endIdx + 1).join("\n") + "\n\n" + proposal.proposed_value;
+    } else {
+      // No frontmatter -- replace entire content
+      updated = proposal.proposed_value;
+    }
+  } else {
+    throw new CLIError(
+      `Unsupported proposal type: ${proposal.proposal_type}`,
+      "UNSUPPORTED_TYPE",
+      "Only 'description' and 'body' proposal types can be applied.",
+    );
+  }
+  writeFileSync(skillPath, updated, "utf-8");
+  return { backupPath };
+}
+// ---------------------------------------------------------------------------
+// CLI entry point
+// ---------------------------------------------------------------------------
+export async function cliMain(): Promise<void> {
+  const { values } = parseArgs({
+    options: {
+      id: { type: "string" },
+      "skill-path": { type: "string" },
+      "dry-run": { type: "boolean", default: false },
+      help: { type: "boolean", default: false },
+    },
+    strict: true,
+  });
+  if (values.help) {
+    console.log(`selftune evolve apply-proposal -- Apply an approved contributor proposal
+Usage:
+  selftune evolve apply-proposal --id <proposal-id> --skill-path <path> [options]
+Options:
+  --id            Proposal UUID (required)
+  --skill-path    Path to the target SKILL.md (required)
+  --dry-run       Preview the proposal without applying
+  --help          Show this help message
+The proposal must be proposed by "contributor_aggregate" and have status
+"approved". The command fetches the proposal from the cloud API, applies
+the proposed change to the local SKILL.md, and marks the proposal as applied.`);
+    process.exit(0);
+  }
+  if (!values.id) {
+    throw new CLIError(
+      "--id is required",
+      "MISSING_FLAG",
+      "selftune evolve apply-proposal --id <proposal-id> --skill-path <path>",
+    );
+  }
+  if (!values["skill-path"]) {
+    throw new CLIError(
+      "--skill-path is required",
+      "MISSING_FLAG",
+      "selftune evolve apply-proposal --id <proposal-id> --skill-path <path>",
+    );
+  }
+  const proposalId = values.id;
+  const skillPath = values["skill-path"];
+  const dryRun = values["dry-run"] ?? false;
+  try {
+    // Resolve cloud config once for both fetch and mark calls
+    const config = getCloudConfig();
+    if (!config) {
+      throw new CLIError(
+        "Not authenticated. Run 'selftune init' to set up cloud credentials.",
+        "AUTH_MISSING",
+        "selftune init",
+      );
+    }
+    // 1. Fetch the proposal from the cloud API
+    console.log(`Fetching proposal ${proposalId}...`);
+    const proposal = await fetchProposal(proposalId, config);
+    // 2. Validate the proposal
+    if (proposal.proposed_by !== "contributor_aggregate") {
+      throw new CLIError(
+        `Proposal was proposed by "${proposal.proposed_by}", not "contributor_aggregate".`,
+        "INVALID_PROPOSAL",
+        "Only contributor aggregate proposals can be applied via this command.",
+      );
+    }
+    if (proposal.status !== "approved") {
+      throw new CLIError(
+        `Proposal status is "${proposal.status}", expected "approved".`,
+        "INVALID_STATUS",
+        "Approve the proposal in the dashboard first, then apply it.",
+      );
+    }
+    // 3. Print proposal summary
+    console.log(`\nProposal: ${proposal.id}`);
+    console.log(`  Skill:         ${proposal.skill_name}`);
+    console.log(`  Type:          ${proposal.proposal_type}`);
+    console.log(`  Proposed by:   ${proposal.proposed_by}`);
+    console.log(`  Reason:        ${proposal.reason ?? "(none)"}`);
+    if (proposal.pass_rate_before != null) {
+      console.log(
+        `  Pass rate:     ${(proposal.pass_rate_before * 100).toFixed(1)}% -> ${proposal.projected_pass_rate != null ? (proposal.projected_pass_rate * 100).toFixed(1) + "%" : "?"}`,
+      );
+    }
+    console.log(`\n--- Current Value ---`);
+    console.log(proposal.current_value.slice(0, 500));
+    console.log(`\n--- Proposed Value ---`);
+    console.log(proposal.proposed_value.slice(0, 500));
+    if (dryRun) {
+      console.log("\n[dry-run] No changes written.");
+      return;
+    }
+    // 4. Apply the proposal to the local SKILL.md
+    const { backupPath } = applyProposalToSkill(skillPath, proposal);
+    console.log(`\nApplied proposal to ${skillPath}`);
+    console.log(`Backup saved to ${backupPath}`);
+    // 5. Mark the proposal as applied in the cloud
+    const markedApplied = await markProposalApplied(proposalId, config);
+    if (markedApplied) {
+      console.log(`Proposal ${proposalId} marked as applied.`);
+    }
+  } catch (err) {
+    handleCLIError(err);
+  }
+}

package/cli/selftune/evolution/engines/judge-engine.ts ADDED Viewed

@@ -0,0 +1,96 @@
+/**
+ * judge-engine.ts
+ *
+ * LLM judge validation engine: runs trigger accuracy checks using
+ * an LLM as a YES/NO judge for each eval entry.
+ *
+ * Extracted from validate-routing.ts and validate-body.ts to isolate
+ * LLM-judge-specific concerns from replay-specific concerns.
+ */
+import type { EvalEntry, ValidationMode } from "../../types.js";
+import { callLlm } from "../../utils/llm-call.js";
+import { buildTriggerCheckPrompt, parseTriggerResponse } from "../../utils/trigger-check.js";
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface JudgeValidationResult {
+  before_pass_rate: number;
+  after_pass_rate: number;
+  improved: boolean;
+  regressions: string[];
+  validation_mode: ValidationMode;
+  validation_agent: string;
+}
+// ---------------------------------------------------------------------------
+// Judge validation engine
+// ---------------------------------------------------------------------------
+/**
+ * Run LLM-judge-based trigger accuracy checks on an eval set.
+ * For each entry, asks the LLM whether the content would trigger
+ * the skill for the given query, comparing original vs proposed.
+ */
+export async function runJudgeValidation(
+  originalContent: string,
+  proposedContent: string,
+  evalSet: EvalEntry[],
+  agent: string,
+  modelFlag?: string,
+): Promise<JudgeValidationResult> {
+  if (evalSet.length === 0) {
+    return {
+      before_pass_rate: 0,
+      after_pass_rate: 0,
+      improved: false,
+      regressions: [],
+      validation_mode: "llm_judge",
+      validation_agent: agent,
+    };
+  }
+  const systemPrompt = "You are an evaluation assistant. Answer only YES or NO.";
+  let beforePassed = 0;
+  let afterPassed = 0;
+  const regressions: string[] = [];
+  for (const entry of evalSet) {
+    // Check with original content
+    const beforePrompt = buildTriggerCheckPrompt(originalContent, entry.query);
+    const beforeRaw = await callLlm(systemPrompt, beforePrompt, agent, modelFlag);
+    const beforeTriggered = parseTriggerResponse(beforeRaw);
+    const beforePass =
+      (entry.should_trigger && beforeTriggered) || (!entry.should_trigger && !beforeTriggered);
+    // Check with proposed content
+    const afterPrompt = buildTriggerCheckPrompt(proposedContent, entry.query);
+    const afterRaw = await callLlm(systemPrompt, afterPrompt, agent, modelFlag);
+    const afterTriggered = parseTriggerResponse(afterRaw);
+    const afterPass =
+      (entry.should_trigger && afterTriggered) || (!entry.should_trigger && !afterTriggered);
+    if (beforePass) beforePassed++;
+    if (afterPass) afterPassed++;
+    // Track regressions
+    if (beforePass && !afterPass) {
+      regressions.push(entry.query);
+    }
+  }
+  const total = evalSet.length;
+  const beforePassRate = beforePassed / total;
+  const afterPassRate = afterPassed / total;
+  return {
+    before_pass_rate: beforePassRate,
+    after_pass_rate: afterPassRate,
+    improved: afterPassRate > beforePassRate,
+    regressions,
+    validation_mode: "llm_judge",
+    validation_agent: agent,
+  };
+}

package/cli/selftune/evolution/engines/replay-engine.ts ADDED Viewed

@@ -0,0 +1,180 @@
+/**
+ * replay-engine.ts
+ *
+ * Cohesive module for all replay-based validation logic:
+ *   - Host/runtime replay (PRIMARY path — real agent routing decisions)
+ *   - Custom replay runner support
+ *
+ * Host/runtime replay is preferred because it captures actual agent routing
+ * behavior. If the runtime path is unavailable or fails, callers must fall
+ * back explicitly to another validation mode instead of treating simulated
+ * fixture matching as equivalent replay evidence.
+ *
+ * Extracted from validate-routing.ts and validate-body.ts to isolate
+ * replay-specific concerns from judge-specific concerns.
+ */
+import type {
+  EvalEntry,
+  RoutingReplayEntryResult,
+  RoutingReplayFixture,
+  ValidationMode,
+} from "../../types.js";
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface ReplayRunnerInput {
+  routing: string;
+  evalSet: EvalEntry[];
+  agent: string;
+  fixture: RoutingReplayFixture;
+}
+export type ReplayRunner = (input: ReplayRunnerInput) => Promise<RoutingReplayEntryResult[]>;
+export interface ReplayValidationOptions {
+  replayFixture?: RoutingReplayFixture;
+  /** Host/runtime replay runner — PRIMARY validation path when provided. */
+  replayRunner?: ReplayRunner;
+}
+export interface ReplayValidationResult {
+  before_pass_rate: number;
+  after_pass_rate: number;
+  improved: boolean;
+  validation_mode: ValidationMode;
+  validation_agent: string;
+  validation_fixture_id?: string;
+  per_entry_results?: RoutingReplayEntryResult[];
+  /** Before-phase per-entry results for structured persistence. */
+  before_entry_results?: RoutingReplayEntryResult[];
+}
+export interface ReplayValidationAttempt {
+  result: ReplayValidationResult | null;
+  fallbackReason?: string;
+}
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+function computeReplayResult(
+  beforeResults: RoutingReplayEntryResult[],
+  afterResults: RoutingReplayEntryResult[],
+  total: number,
+  mode: ValidationMode,
+  agent: string,
+  fixtureId: string,
+): ReplayValidationResult {
+  const beforePassed = beforeResults.filter((result) => result.passed).length;
+  const afterPassed = afterResults.filter((result) => result.passed).length;
+  const beforePassRate = beforePassed / total;
+  const afterPassRate = afterPassed / total;
+  const netChange = afterPassRate - beforePassRate;
+  const beforePassedByQuery = new Map<string, boolean>();
+  let regressionCount = 0;
+  let newPassCount = 0;
+  for (const result of beforeResults) {
+    beforePassedByQuery.set(result.query, result.passed);
+  }
+  for (const result of afterResults) {
+    const beforePass = beforePassedByQuery.get(result.query) ?? false;
+    const afterPass = result.passed;
+    if (beforePass && !afterPass) regressionCount++;
+    if (!beforePass && afterPass) newPassCount++;
+  }
+  return {
+    before_pass_rate: beforePassRate,
+    after_pass_rate: afterPassRate,
+    improved:
+      afterPassRate > beforePassRate &&
+      regressionCount < total * 0.05 &&
+      (netChange >= 0.1 || newPassCount >= 2),
+    validation_mode: mode,
+    validation_agent: agent,
+    validation_fixture_id: fixtureId,
+    per_entry_results: afterResults,
+    before_entry_results: beforeResults,
+  };
+}
+// ---------------------------------------------------------------------------
+// Replay validation engine
+// ---------------------------------------------------------------------------
+/**
+ * Attempt replay-backed validation using a real host/runtime runner.
+ *
+ * Returns a null result with a fallback reason when runtime replay is
+ * unavailable or fails. Callers decide whether to fall back to a judge-based
+ * validator (`auto`) or surface an explicit unavailable error (`replay`).
+ */
+export async function runReplayValidation(
+  originalContent: string,
+  proposedContent: string,
+  evalSet: EvalEntry[],
+  agent: string,
+  options: ReplayValidationOptions = {},
+): Promise<ReplayValidationAttempt> {
+  if (evalSet.length === 0) {
+    return { result: null };
+  }
+  if (!options.replayFixture) {
+    return {
+      result: null,
+      fallbackReason: "no replay fixture is available for runtime validation",
+    };
+  }
+  if (!options.replayRunner) {
+    return {
+      result: null,
+      fallbackReason: "no real host/runtime replay runner is configured",
+    };
+  }
+  const fixture = options.replayFixture;
+  const total = evalSet.length;
+  try {
+    const beforeResults = await options.replayRunner({
+      routing: originalContent,
+      evalSet,
+      agent,
+      fixture,
+    });
+    const afterResults = await options.replayRunner({
+      routing: proposedContent,
+      evalSet,
+      agent,
+      fixture,
+    });
+    return {
+      result: computeReplayResult(
+        beforeResults,
+        afterResults,
+        total,
+        "host_replay",
+        agent,
+        fixture.fixture_id,
+      ),
+    };
+  } catch (error) {
+    const message =
+      error instanceof Error && error.message.trim()
+        ? error.message.trim()
+        : "runtime replay failed before producing a routing decision";
+    return {
+      result: null,
+      fallbackReason: `real host/runtime replay failed: ${message}`,
+    };
+  }
+}

package/cli/selftune/evolution/evidence.ts CHANGED Viewed

@@ -12,11 +12,7 @@ import { queryEvolutionEvidence } from "../localdb/queries.js";
 import type { EvolutionEvidenceEntry } from "../types.js";
 /** Append a structured evidence artifact to the evolution evidence log (SQLite). */
-export function appendEvidenceEntry(
-  entry: EvolutionEvidenceEntry,
-  /** @deprecated Unused; retained for API compatibility during migration */
-  _logPath?: string,
-): void {
+export function appendEvidenceEntry(entry: EvolutionEvidenceEntry): void {
   writeEvolutionEvidenceToDb(entry);
 }
@@ -25,7 +21,7 @@ export function appendEvidenceEntry(
  *
  * @param skillName - Optional skill name to filter by
  */
-export function readEvidenceTrail(skillName?: string, _logPath?: string): EvolutionEvidenceEntry[] {
+export function readEvidenceTrail(skillName?: string): EvolutionEvidenceEntry[] {
   const db = getDb();
   return queryEvolutionEvidence(db, skillName) as EvolutionEvidenceEntry[];
 }