selftune 0.2.22 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +95 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/adapters/pi/hook.ts +273 -0
  12. package/cli/selftune/adapters/pi/install.ts +207 -0
  13. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  15. package/cli/selftune/auto-update.ts +200 -8
  16. package/cli/selftune/canonical-export.ts +55 -25
  17. package/cli/selftune/command-surface.ts +397 -0
  18. package/cli/selftune/constants.ts +10 -1
  19. package/cli/selftune/contribute/contribute.ts +64 -13
  20. package/cli/selftune/contribution-config.ts +57 -3
  21. package/cli/selftune/contribution-preferences.ts +117 -0
  22. package/cli/selftune/contribution-signals.ts +8 -4
  23. package/cli/selftune/contribution-staging.ts +13 -2
  24. package/cli/selftune/contributions.ts +55 -121
  25. package/cli/selftune/creator-contributions.ts +29 -10
  26. package/cli/selftune/cron/setup.ts +7 -3
  27. package/cli/selftune/dashboard-contract.ts +87 -0
  28. package/cli/selftune/dashboard-server.ts +168 -17
  29. package/cli/selftune/dashboard.ts +350 -17
  30. package/cli/selftune/eval/baseline.ts +21 -5
  31. package/cli/selftune/eval/execution-eval.ts +170 -0
  32. package/cli/selftune/eval/family-overlap.ts +2 -2
  33. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  34. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  35. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  36. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  37. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  38. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  39. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  40. package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
  41. package/cli/selftune/evolution/evidence.ts +2 -6
  42. package/cli/selftune/evolution/evolve-body.ts +152 -38
  43. package/cli/selftune/evolution/evolve.ts +244 -52
  44. package/cli/selftune/evolution/rollback.ts +0 -1
  45. package/cli/selftune/evolution/validate-body.ts +111 -49
  46. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  47. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  48. package/cli/selftune/evolution/validate-routing.ts +51 -108
  49. package/cli/selftune/evolution/validation-contract.ts +91 -0
  50. package/cli/selftune/grading/auto-grade.ts +11 -7
  51. package/cli/selftune/grading/grade-session.ts +10 -16
  52. package/cli/selftune/hooks/skill-eval.ts +2 -1
  53. package/cli/selftune/hooks-shared/types.ts +1 -0
  54. package/cli/selftune/index.ts +58 -15
  55. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  56. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  57. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  58. package/cli/selftune/ingestors/pi-ingest.ts +727 -0
  59. package/cli/selftune/init.ts +38 -4
  60. package/cli/selftune/localdb/direct-write.ts +120 -1
  61. package/cli/selftune/localdb/materialize.ts +6 -7
  62. package/cli/selftune/localdb/queries/cron.ts +34 -0
  63. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  64. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  65. package/cli/selftune/localdb/queries/execution.ts +133 -0
  66. package/cli/selftune/localdb/queries/json.ts +18 -0
  67. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  68. package/cli/selftune/localdb/queries/raw.ts +95 -0
  69. package/cli/selftune/localdb/queries/staging.ts +270 -0
  70. package/cli/selftune/localdb/queries/trust.ts +392 -0
  71. package/cli/selftune/localdb/queries.ts +60 -2162
  72. package/cli/selftune/localdb/schema.ts +59 -0
  73. package/cli/selftune/monitoring/watch.ts +96 -29
  74. package/cli/selftune/normalization.ts +3 -0
  75. package/cli/selftune/observability.ts +12 -3
  76. package/cli/selftune/orchestrate/cli.ts +161 -0
  77. package/cli/selftune/orchestrate/execute.ts +295 -0
  78. package/cli/selftune/orchestrate/finalize.ts +157 -0
  79. package/cli/selftune/orchestrate/locks.ts +40 -0
  80. package/cli/selftune/orchestrate/plan.ts +131 -0
  81. package/cli/selftune/orchestrate/post-run.ts +59 -0
  82. package/cli/selftune/orchestrate/prepare.ts +334 -0
  83. package/cli/selftune/orchestrate/report.ts +182 -0
  84. package/cli/selftune/orchestrate/runtime.ts +120 -0
  85. package/cli/selftune/orchestrate/signals.ts +48 -0
  86. package/cli/selftune/orchestrate.ts +162 -1142
  87. package/cli/selftune/registry/client.ts +74 -0
  88. package/cli/selftune/registry/history.ts +54 -0
  89. package/cli/selftune/registry/index.ts +90 -0
  90. package/cli/selftune/registry/install.ts +141 -0
  91. package/cli/selftune/registry/list.ts +44 -0
  92. package/cli/selftune/registry/push.ts +171 -0
  93. package/cli/selftune/registry/rollback.ts +49 -0
  94. package/cli/selftune/registry/status.ts +62 -0
  95. package/cli/selftune/registry/sync.ts +125 -0
  96. package/cli/selftune/repair/skill-usage.ts +9 -3
  97. package/cli/selftune/routes/overview.ts +5 -2
  98. package/cli/selftune/routes/skill-report.ts +15 -2
  99. package/cli/selftune/schedule.ts +5 -5
  100. package/cli/selftune/status.ts +70 -2
  101. package/cli/selftune/sync.ts +127 -23
  102. package/cli/selftune/testing-readiness.ts +597 -0
  103. package/cli/selftune/types.ts +46 -5
  104. package/cli/selftune/uninstall.ts +2 -1
  105. package/cli/selftune/utils/canonical-log.ts +1 -9
  106. package/cli/selftune/utils/cli-error.ts +9 -0
  107. package/cli/selftune/utils/jsonl.ts +1 -30
  108. package/cli/selftune/utils/llm-call.ts +126 -6
  109. package/cli/selftune/utils/skill-discovery.ts +24 -0
  110. package/cli/selftune/workflows/proposals.ts +184 -0
  111. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  112. package/cli/selftune/workflows/workflows.ts +100 -26
  113. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  114. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  115. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  116. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  117. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  118. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  119. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  120. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
  121. package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
  122. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  123. package/package.json +25 -9
  124. package/packages/dashboard-core/AGENTS.md +18 -0
  125. package/packages/dashboard-core/README.md +30 -0
  126. package/packages/dashboard-core/index.ts +3 -0
  127. package/packages/dashboard-core/package.json +39 -0
  128. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  129. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  130. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  131. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  132. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  133. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  134. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  135. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  136. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  137. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  138. package/packages/dashboard-core/src/gates/index.ts +3 -0
  139. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  140. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  141. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  142. package/packages/dashboard-core/src/host/index.ts +3 -0
  143. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  144. package/packages/dashboard-core/src/models/index.ts +4 -0
  145. package/packages/dashboard-core/src/models/overview.ts +98 -0
  146. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  147. package/packages/dashboard-core/src/models/skills.ts +34 -0
  148. package/packages/dashboard-core/src/routes/index.ts +2 -0
  149. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  150. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  151. package/packages/dashboard-core/src/routes/types.ts +39 -0
  152. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  153. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  154. package/packages/dashboard-core/src/screens/index.ts +37 -0
  155. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  156. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  157. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  158. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  159. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  160. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  161. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  162. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  163. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  164. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  165. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  166. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  167. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  168. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  169. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  170. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  171. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  172. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  173. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  174. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  175. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  176. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  177. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  178. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  179. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  180. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  181. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  182. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  183. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  184. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  185. package/packages/telemetry-contract/package.json +1 -1
  186. package/packages/telemetry-contract/src/index.ts +1 -0
  187. package/packages/telemetry-contract/src/schemas.ts +63 -5
  188. package/packages/telemetry-contract/src/types.ts +97 -7
  189. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  190. package/packages/ui/AGENTS.md +16 -0
  191. package/packages/ui/README.md +1 -1
  192. package/packages/ui/package.json +1 -1
  193. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  194. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  195. package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
  196. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  197. package/packages/ui/src/components/InfoTip.tsx +1 -2
  198. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  199. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  200. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  201. package/packages/ui/src/components/OverviewPanels.tsx +693 -0
  202. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  203. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  204. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  205. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  206. package/packages/ui/src/components/index.ts +56 -1
  207. package/packages/ui/src/components/section-cards.tsx +18 -35
  208. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  209. package/packages/ui/src/lib/constants.tsx +0 -1
  210. package/packages/ui/src/primitives/card.tsx +1 -1
  211. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  212. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  213. package/packages/ui/src/primitives/select.tsx +2 -2
  214. package/packages/ui/src/primitives/tabs.tsx +7 -6
  215. package/packages/ui/src/types.ts +182 -4
  216. package/skill/SKILL.md +130 -318
  217. package/skill/agents/diagnosis-analyst.md +3 -3
  218. package/skill/agents/evolution-reviewer.md +3 -3
  219. package/skill/agents/integration-guide.md +3 -3
  220. package/skill/agents/pattern-analyst.md +2 -2
  221. package/skill/references/cli-quick-reference.md +89 -0
  222. package/skill/references/creator-playbook.md +131 -0
  223. package/skill/references/examples.md +48 -0
  224. package/skill/references/troubleshooting.md +47 -0
  225. package/skill/references/version-history.md +1 -1
  226. package/skill/selftune.contribute.json +11 -0
  227. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  228. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  229. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  230. package/skill/workflows/CreateTestDeploy.md +170 -0
  231. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  232. package/skill/{Workflows → workflows}/Cron.md +1 -1
  233. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  234. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  235. package/skill/{Workflows → workflows}/Evals.md +67 -2
  236. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  237. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  238. package/skill/{Workflows → workflows}/Grade.md +1 -1
  239. package/skill/{Workflows → workflows}/Ingest.md +60 -2
  240. package/skill/{Workflows → workflows}/Initialize.md +16 -9
  241. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  242. package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
  243. package/skill/workflows/Registry.md +99 -0
  244. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  245. package/skill/workflows/SignalsDashboard.md +87 -0
  246. package/skill/{Workflows → workflows}/Sync.md +3 -1
  247. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  248. package/skill/{Workflows → workflows}/Watch.md +42 -2
  249. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  250. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  251. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  252. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  253. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  254. package/cli/selftune/utils/html.ts +0 -27
  255. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
  256. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  257. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  258. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  259. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  260. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  261. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  262. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  263. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  264. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  265. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  266. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  267. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  268. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  269. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  270. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -0,0 +1,295 @@
1
+ /**
2
+ * apply-proposal.ts
3
+ *
4
+ * Fetches an approved contributor proposal from the cloud API, applies the
5
+ * proposed update to the local SKILL.md, and marks the proposal as applied.
6
+ *
7
+ * Usage:
8
+ * selftune evolve apply-proposal --id <proposal-id> --skill-path <path>
9
+ */
10
+
11
+ import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
12
+ import { parseArgs } from "node:util";
13
+
14
+ import { readAlphaIdentity } from "../alpha-identity.js";
15
+ import { SELFTUNE_CONFIG_PATH } from "../constants.js";
16
+ import { CLIError, handleCLIError } from "../utils/cli-error.js";
17
+ import { replaceDescription } from "../utils/frontmatter.js";
18
+ import { getSelftuneVersion } from "../utils/selftune-meta.js";
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Types
22
+ // ---------------------------------------------------------------------------
23
+
24
+ interface ProposalRecord {
25
+ id: string;
26
+ skill_id: string;
27
+ skill_name: string;
28
+ proposal_type: string;
29
+ current_value: string;
30
+ proposed_value: string;
31
+ reason: string | null;
32
+ pass_rate_before: number | null;
33
+ projected_pass_rate: number | null;
34
+ status: "pending" | "approved" | "rejected" | "applied";
35
+ proposed_by: string;
36
+ reviewed_by: string | null;
37
+ reviewed_at: string | null;
38
+ applied_at: string | null;
39
+ created_at: string;
40
+ }
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Cloud API helpers (follows registry/client.ts pattern)
44
+ // ---------------------------------------------------------------------------
45
+
46
+ function getCloudConfig(): { apiUrl: string; apiKey: string } | null {
47
+ try {
48
+ const identity = readAlphaIdentity(SELFTUNE_CONFIG_PATH);
49
+ if (!identity?.api_key) return null;
50
+ const apiUrl = identity.cloud_api_url || "https://api.selftune.dev";
51
+ return { apiUrl, apiKey: identity.api_key };
52
+ } catch {
53
+ return null;
54
+ }
55
+ }
56
+
57
+ async function fetchProposal(
58
+ proposalId: string,
59
+ config: { apiUrl: string; apiKey: string },
60
+ ): Promise<ProposalRecord> {
61
+ const url = `${config.apiUrl}/api/v1/proposals/${encodeURIComponent(proposalId)}`;
62
+ const response = await fetch(url, {
63
+ method: "GET",
64
+ headers: {
65
+ Authorization: `Bearer ${config.apiKey}`,
66
+ "User-Agent": `selftune/${getSelftuneVersion()}`,
67
+ Accept: "application/json",
68
+ },
69
+ signal: AbortSignal.timeout(15_000),
70
+ });
71
+
72
+ if (!response.ok) {
73
+ const text = await response.text().catch(() => "unknown error");
74
+ if (response.status === 404) {
75
+ throw new CLIError(
76
+ `Proposal ${proposalId} not found.`,
77
+ "NOT_FOUND",
78
+ "Check the proposal ID and try again.",
79
+ );
80
+ }
81
+ throw new CLIError(
82
+ `Failed to fetch proposal: HTTP ${response.status}: ${text.slice(0, 200)}`,
83
+ "API_ERROR",
84
+ "Check your credentials and network connection.",
85
+ );
86
+ }
87
+
88
+ const body = (await response.json()) as { proposal: ProposalRecord };
89
+ return body.proposal;
90
+ }
91
+
92
+ async function markProposalApplied(
93
+ proposalId: string,
94
+ config: { apiUrl: string; apiKey: string },
95
+ ): Promise<boolean> {
96
+ const url = `${config.apiUrl}/api/v1/proposals/${encodeURIComponent(proposalId)}`;
97
+
98
+ try {
99
+ const response = await fetch(url, {
100
+ method: "PATCH",
101
+ headers: {
102
+ Authorization: `Bearer ${config.apiKey}`,
103
+ "User-Agent": `selftune/${getSelftuneVersion()}`,
104
+ "Content-Type": "application/json",
105
+ },
106
+ body: JSON.stringify({ status: "applied" }),
107
+ signal: AbortSignal.timeout(15_000),
108
+ });
109
+
110
+ if (!response.ok) {
111
+ const text = await response.text().catch(() => "unknown error");
112
+ console.error(
113
+ `Warning: Failed to mark proposal as applied: HTTP ${response.status}: ${text.slice(0, 200)}`,
114
+ );
115
+ return false;
116
+ }
117
+
118
+ return true;
119
+ } catch (error) {
120
+ const message = error instanceof Error ? error.message : String(error);
121
+ console.error(`Warning: Failed to mark proposal as applied: ${message}`);
122
+ return false;
123
+ }
124
+ }
125
+
126
+ // ---------------------------------------------------------------------------
127
+ // Apply logic
128
+ // ---------------------------------------------------------------------------
129
+
130
+ function applyProposalToSkill(skillPath: string, proposal: ProposalRecord): { backupPath: string } {
131
+ if (!existsSync(skillPath)) {
132
+ throw new CLIError(
133
+ `Skill file not found: ${skillPath}`,
134
+ "FILE_NOT_FOUND",
135
+ "Verify the --skill-path argument points to your SKILL.md.",
136
+ );
137
+ }
138
+
139
+ const content = readFileSync(skillPath, "utf-8");
140
+
141
+ // Back up before modifying
142
+ const backupPath = `${skillPath}.bak`;
143
+ copyFileSync(skillPath, backupPath);
144
+
145
+ let updated: string;
146
+ if (proposal.proposal_type === "description") {
147
+ updated = replaceDescription(content, proposal.proposed_value);
148
+ } else if (proposal.proposal_type === "body") {
149
+ const lines = content.split("\n");
150
+ let endIdx = -1;
151
+ if (lines[0]?.trim() === "---") {
152
+ for (let i = 1; i < lines.length; i++) {
153
+ if (lines[i].trim() === "---") {
154
+ endIdx = i;
155
+ break;
156
+ }
157
+ }
158
+ }
159
+ if (endIdx >= 0) {
160
+ updated = lines.slice(0, endIdx + 1).join("\n") + "\n\n" + proposal.proposed_value;
161
+ } else {
162
+ // No frontmatter -- replace entire content
163
+ updated = proposal.proposed_value;
164
+ }
165
+ } else {
166
+ throw new CLIError(
167
+ `Unsupported proposal type: ${proposal.proposal_type}`,
168
+ "UNSUPPORTED_TYPE",
169
+ "Only 'description' and 'body' proposal types can be applied.",
170
+ );
171
+ }
172
+
173
+ writeFileSync(skillPath, updated, "utf-8");
174
+ return { backupPath };
175
+ }
176
+
177
+ // ---------------------------------------------------------------------------
178
+ // CLI entry point
179
+ // ---------------------------------------------------------------------------
180
+
181
+ export async function cliMain(): Promise<void> {
182
+ const { values } = parseArgs({
183
+ options: {
184
+ id: { type: "string" },
185
+ "skill-path": { type: "string" },
186
+ "dry-run": { type: "boolean", default: false },
187
+ help: { type: "boolean", default: false },
188
+ },
189
+ strict: true,
190
+ });
191
+
192
+ if (values.help) {
193
+ console.log(`selftune evolve apply-proposal -- Apply an approved contributor proposal
194
+
195
+ Usage:
196
+ selftune evolve apply-proposal --id <proposal-id> --skill-path <path> [options]
197
+
198
+ Options:
199
+ --id Proposal UUID (required)
200
+ --skill-path Path to the target SKILL.md (required)
201
+ --dry-run Preview the proposal without applying
202
+ --help Show this help message
203
+
204
+ The proposal must be proposed by "contributor_aggregate" and have status
205
+ "approved". The command fetches the proposal from the cloud API, applies
206
+ the proposed change to the local SKILL.md, and marks the proposal as applied.`);
207
+ process.exit(0);
208
+ }
209
+
210
+ if (!values.id) {
211
+ throw new CLIError(
212
+ "--id is required",
213
+ "MISSING_FLAG",
214
+ "selftune evolve apply-proposal --id <proposal-id> --skill-path <path>",
215
+ );
216
+ }
217
+ if (!values["skill-path"]) {
218
+ throw new CLIError(
219
+ "--skill-path is required",
220
+ "MISSING_FLAG",
221
+ "selftune evolve apply-proposal --id <proposal-id> --skill-path <path>",
222
+ );
223
+ }
224
+
225
+ const proposalId = values.id;
226
+ const skillPath = values["skill-path"];
227
+ const dryRun = values["dry-run"] ?? false;
228
+
229
+ try {
230
+ // Resolve cloud config once for both fetch and mark calls
231
+ const config = getCloudConfig();
232
+ if (!config) {
233
+ throw new CLIError(
234
+ "Not authenticated. Run 'selftune init' to set up cloud credentials.",
235
+ "AUTH_MISSING",
236
+ "selftune init",
237
+ );
238
+ }
239
+
240
+ // 1. Fetch the proposal from the cloud API
241
+ console.log(`Fetching proposal ${proposalId}...`);
242
+ const proposal = await fetchProposal(proposalId, config);
243
+
244
+ // 2. Validate the proposal
245
+ if (proposal.proposed_by !== "contributor_aggregate") {
246
+ throw new CLIError(
247
+ `Proposal was proposed by "${proposal.proposed_by}", not "contributor_aggregate".`,
248
+ "INVALID_PROPOSAL",
249
+ "Only contributor aggregate proposals can be applied via this command.",
250
+ );
251
+ }
252
+
253
+ if (proposal.status !== "approved") {
254
+ throw new CLIError(
255
+ `Proposal status is "${proposal.status}", expected "approved".`,
256
+ "INVALID_STATUS",
257
+ "Approve the proposal in the dashboard first, then apply it.",
258
+ );
259
+ }
260
+
261
+ // 3. Print proposal summary
262
+ console.log(`\nProposal: ${proposal.id}`);
263
+ console.log(` Skill: ${proposal.skill_name}`);
264
+ console.log(` Type: ${proposal.proposal_type}`);
265
+ console.log(` Proposed by: ${proposal.proposed_by}`);
266
+ console.log(` Reason: ${proposal.reason ?? "(none)"}`);
267
+ if (proposal.pass_rate_before != null) {
268
+ console.log(
269
+ ` Pass rate: ${(proposal.pass_rate_before * 100).toFixed(1)}% -> ${proposal.projected_pass_rate != null ? (proposal.projected_pass_rate * 100).toFixed(1) + "%" : "?"}`,
270
+ );
271
+ }
272
+ console.log(`\n--- Current Value ---`);
273
+ console.log(proposal.current_value.slice(0, 500));
274
+ console.log(`\n--- Proposed Value ---`);
275
+ console.log(proposal.proposed_value.slice(0, 500));
276
+
277
+ if (dryRun) {
278
+ console.log("\n[dry-run] No changes written.");
279
+ return;
280
+ }
281
+
282
+ // 4. Apply the proposal to the local SKILL.md
283
+ const { backupPath } = applyProposalToSkill(skillPath, proposal);
284
+ console.log(`\nApplied proposal to ${skillPath}`);
285
+ console.log(`Backup saved to ${backupPath}`);
286
+
287
+ // 5. Mark the proposal as applied in the cloud
288
+ const markedApplied = await markProposalApplied(proposalId, config);
289
+ if (markedApplied) {
290
+ console.log(`Proposal ${proposalId} marked as applied.`);
291
+ }
292
+ } catch (err) {
293
+ handleCLIError(err);
294
+ }
295
+ }
@@ -0,0 +1,96 @@
1
+ /**
2
+ * judge-engine.ts
3
+ *
4
+ * LLM judge validation engine: runs trigger accuracy checks using
5
+ * an LLM as a YES/NO judge for each eval entry.
6
+ *
7
+ * Extracted from validate-routing.ts and validate-body.ts to isolate
8
+ * LLM-judge-specific concerns from replay-specific concerns.
9
+ */
10
+
11
+ import type { EvalEntry, ValidationMode } from "../../types.js";
12
+ import { callLlm } from "../../utils/llm-call.js";
13
+ import { buildTriggerCheckPrompt, parseTriggerResponse } from "../../utils/trigger-check.js";
14
+
15
+ // ---------------------------------------------------------------------------
16
+ // Types
17
+ // ---------------------------------------------------------------------------
18
+
19
+ export interface JudgeValidationResult {
20
+ before_pass_rate: number;
21
+ after_pass_rate: number;
22
+ improved: boolean;
23
+ regressions: string[];
24
+ validation_mode: ValidationMode;
25
+ validation_agent: string;
26
+ }
27
+
28
+ // ---------------------------------------------------------------------------
29
+ // Judge validation engine
30
+ // ---------------------------------------------------------------------------
31
+
32
+ /**
33
+ * Run LLM-judge-based trigger accuracy checks on an eval set.
34
+ * For each entry, asks the LLM whether the content would trigger
35
+ * the skill for the given query, comparing original vs proposed.
36
+ */
37
+ export async function runJudgeValidation(
38
+ originalContent: string,
39
+ proposedContent: string,
40
+ evalSet: EvalEntry[],
41
+ agent: string,
42
+ modelFlag?: string,
43
+ ): Promise<JudgeValidationResult> {
44
+ if (evalSet.length === 0) {
45
+ return {
46
+ before_pass_rate: 0,
47
+ after_pass_rate: 0,
48
+ improved: false,
49
+ regressions: [],
50
+ validation_mode: "llm_judge",
51
+ validation_agent: agent,
52
+ };
53
+ }
54
+
55
+ const systemPrompt = "You are an evaluation assistant. Answer only YES or NO.";
56
+ let beforePassed = 0;
57
+ let afterPassed = 0;
58
+ const regressions: string[] = [];
59
+
60
+ for (const entry of evalSet) {
61
+ // Check with original content
62
+ const beforePrompt = buildTriggerCheckPrompt(originalContent, entry.query);
63
+ const beforeRaw = await callLlm(systemPrompt, beforePrompt, agent, modelFlag);
64
+ const beforeTriggered = parseTriggerResponse(beforeRaw);
65
+ const beforePass =
66
+ (entry.should_trigger && beforeTriggered) || (!entry.should_trigger && !beforeTriggered);
67
+
68
+ // Check with proposed content
69
+ const afterPrompt = buildTriggerCheckPrompt(proposedContent, entry.query);
70
+ const afterRaw = await callLlm(systemPrompt, afterPrompt, agent, modelFlag);
71
+ const afterTriggered = parseTriggerResponse(afterRaw);
72
+ const afterPass =
73
+ (entry.should_trigger && afterTriggered) || (!entry.should_trigger && !afterTriggered);
74
+
75
+ if (beforePass) beforePassed++;
76
+ if (afterPass) afterPassed++;
77
+
78
+ // Track regressions
79
+ if (beforePass && !afterPass) {
80
+ regressions.push(entry.query);
81
+ }
82
+ }
83
+
84
+ const total = evalSet.length;
85
+ const beforePassRate = beforePassed / total;
86
+ const afterPassRate = afterPassed / total;
87
+
88
+ return {
89
+ before_pass_rate: beforePassRate,
90
+ after_pass_rate: afterPassRate,
91
+ improved: afterPassRate > beforePassRate,
92
+ regressions,
93
+ validation_mode: "llm_judge",
94
+ validation_agent: agent,
95
+ };
96
+ }
@@ -0,0 +1,180 @@
1
+ /**
2
+ * replay-engine.ts
3
+ *
4
+ * Cohesive module for all replay-based validation logic:
5
+ * - Host/runtime replay (PRIMARY path — real agent routing decisions)
6
+ * - Custom replay runner support
7
+ *
8
+ * Host/runtime replay is preferred because it captures actual agent routing
9
+ * behavior. If the runtime path is unavailable or fails, callers must fall
10
+ * back explicitly to another validation mode instead of treating simulated
11
+ * fixture matching as equivalent replay evidence.
12
+ *
13
+ * Extracted from validate-routing.ts and validate-body.ts to isolate
14
+ * replay-specific concerns from judge-specific concerns.
15
+ */
16
+
17
+ import type {
18
+ EvalEntry,
19
+ RoutingReplayEntryResult,
20
+ RoutingReplayFixture,
21
+ ValidationMode,
22
+ } from "../../types.js";
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Types
26
+ // ---------------------------------------------------------------------------
27
+
28
+ export interface ReplayRunnerInput {
29
+ routing: string;
30
+ evalSet: EvalEntry[];
31
+ agent: string;
32
+ fixture: RoutingReplayFixture;
33
+ }
34
+
35
+ export type ReplayRunner = (input: ReplayRunnerInput) => Promise<RoutingReplayEntryResult[]>;
36
+
37
+ export interface ReplayValidationOptions {
38
+ replayFixture?: RoutingReplayFixture;
39
+ /** Host/runtime replay runner — PRIMARY validation path when provided. */
40
+ replayRunner?: ReplayRunner;
41
+ }
42
+
43
+ export interface ReplayValidationResult {
44
+ before_pass_rate: number;
45
+ after_pass_rate: number;
46
+ improved: boolean;
47
+ validation_mode: ValidationMode;
48
+ validation_agent: string;
49
+ validation_fixture_id?: string;
50
+ per_entry_results?: RoutingReplayEntryResult[];
51
+ /** Before-phase per-entry results for structured persistence. */
52
+ before_entry_results?: RoutingReplayEntryResult[];
53
+ }
54
+
55
+ export interface ReplayValidationAttempt {
56
+ result: ReplayValidationResult | null;
57
+ fallbackReason?: string;
58
+ }
59
+
60
+ // ---------------------------------------------------------------------------
61
+ // Internal helpers
62
+ // ---------------------------------------------------------------------------
63
+
64
+ function computeReplayResult(
65
+ beforeResults: RoutingReplayEntryResult[],
66
+ afterResults: RoutingReplayEntryResult[],
67
+ total: number,
68
+ mode: ValidationMode,
69
+ agent: string,
70
+ fixtureId: string,
71
+ ): ReplayValidationResult {
72
+ const beforePassed = beforeResults.filter((result) => result.passed).length;
73
+ const afterPassed = afterResults.filter((result) => result.passed).length;
74
+ const beforePassRate = beforePassed / total;
75
+ const afterPassRate = afterPassed / total;
76
+ const netChange = afterPassRate - beforePassRate;
77
+ const beforePassedByQuery = new Map<string, boolean>();
78
+ let regressionCount = 0;
79
+ let newPassCount = 0;
80
+
81
+ for (const result of beforeResults) {
82
+ beforePassedByQuery.set(result.query, result.passed);
83
+ }
84
+
85
+ for (const result of afterResults) {
86
+ const beforePass = beforePassedByQuery.get(result.query) ?? false;
87
+ const afterPass = result.passed;
88
+ if (beforePass && !afterPass) regressionCount++;
89
+ if (!beforePass && afterPass) newPassCount++;
90
+ }
91
+
92
+ return {
93
+ before_pass_rate: beforePassRate,
94
+ after_pass_rate: afterPassRate,
95
+ improved:
96
+ afterPassRate > beforePassRate &&
97
+ regressionCount < total * 0.05 &&
98
+ (netChange >= 0.1 || newPassCount >= 2),
99
+ validation_mode: mode,
100
+ validation_agent: agent,
101
+ validation_fixture_id: fixtureId,
102
+ per_entry_results: afterResults,
103
+ before_entry_results: beforeResults,
104
+ };
105
+ }
106
+
107
+ // ---------------------------------------------------------------------------
108
+ // Replay validation engine
109
+ // ---------------------------------------------------------------------------
110
+
111
+ /**
112
+ * Attempt replay-backed validation using a real host/runtime runner.
113
+ *
114
+ * Returns a null result with a fallback reason when runtime replay is
115
+ * unavailable or fails. Callers decide whether to fall back to a judge-based
116
+ * validator (`auto`) or surface an explicit unavailable error (`replay`).
117
+ */
118
+ export async function runReplayValidation(
119
+ originalContent: string,
120
+ proposedContent: string,
121
+ evalSet: EvalEntry[],
122
+ agent: string,
123
+ options: ReplayValidationOptions = {},
124
+ ): Promise<ReplayValidationAttempt> {
125
+ if (evalSet.length === 0) {
126
+ return { result: null };
127
+ }
128
+
129
+ if (!options.replayFixture) {
130
+ return {
131
+ result: null,
132
+ fallbackReason: "no replay fixture is available for runtime validation",
133
+ };
134
+ }
135
+
136
+ if (!options.replayRunner) {
137
+ return {
138
+ result: null,
139
+ fallbackReason: "no real host/runtime replay runner is configured",
140
+ };
141
+ }
142
+
143
+ const fixture = options.replayFixture;
144
+ const total = evalSet.length;
145
+
146
+ try {
147
+ const beforeResults = await options.replayRunner({
148
+ routing: originalContent,
149
+ evalSet,
150
+ agent,
151
+ fixture,
152
+ });
153
+ const afterResults = await options.replayRunner({
154
+ routing: proposedContent,
155
+ evalSet,
156
+ agent,
157
+ fixture,
158
+ });
159
+
160
+ return {
161
+ result: computeReplayResult(
162
+ beforeResults,
163
+ afterResults,
164
+ total,
165
+ "host_replay",
166
+ agent,
167
+ fixture.fixture_id,
168
+ ),
169
+ };
170
+ } catch (error) {
171
+ const message =
172
+ error instanceof Error && error.message.trim()
173
+ ? error.message.trim()
174
+ : "runtime replay failed before producing a routing decision";
175
+ return {
176
+ result: null,
177
+ fallbackReason: `real host/runtime replay failed: ${message}`,
178
+ };
179
+ }
180
+ }
@@ -12,11 +12,7 @@ import { queryEvolutionEvidence } from "../localdb/queries.js";
12
12
  import type { EvolutionEvidenceEntry } from "../types.js";
13
13
 
14
14
  /** Append a structured evidence artifact to the evolution evidence log (SQLite). */
15
- export function appendEvidenceEntry(
16
- entry: EvolutionEvidenceEntry,
17
- /** @deprecated Unused; retained for API compatibility during migration */
18
- _logPath?: string,
19
- ): void {
15
+ export function appendEvidenceEntry(entry: EvolutionEvidenceEntry): void {
20
16
  writeEvolutionEvidenceToDb(entry);
21
17
  }
22
18
 
@@ -25,7 +21,7 @@ export function appendEvidenceEntry(
25
21
  *
26
22
  * @param skillName - Optional skill name to filter by
27
23
  */
28
- export function readEvidenceTrail(skillName?: string, _logPath?: string): EvolutionEvidenceEntry[] {
24
+ export function readEvidenceTrail(skillName?: string): EvolutionEvidenceEntry[] {
29
25
  const db = getDb();
30
26
  return queryEvolutionEvidence(db, skillName) as EvolutionEvidenceEntry[];
31
27
  }