selftune 0.2.23 → 0.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +93 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dhgv5BQO.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  12. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  13. package/cli/selftune/auto-update.ts +200 -8
  14. package/cli/selftune/canonical-export.ts +55 -25
  15. package/cli/selftune/command-surface.ts +397 -0
  16. package/cli/selftune/contribute/contribute.ts +64 -13
  17. package/cli/selftune/contribution-config.ts +57 -3
  18. package/cli/selftune/contribution-preferences.ts +117 -0
  19. package/cli/selftune/contribution-signals.ts +8 -4
  20. package/cli/selftune/contribution-staging.ts +13 -2
  21. package/cli/selftune/contributions.ts +55 -121
  22. package/cli/selftune/creator-contributions.ts +29 -10
  23. package/cli/selftune/cron/setup.ts +7 -3
  24. package/cli/selftune/dashboard-contract.ts +73 -0
  25. package/cli/selftune/dashboard-server.ts +168 -17
  26. package/cli/selftune/dashboard.ts +350 -17
  27. package/cli/selftune/eval/baseline.ts +21 -5
  28. package/cli/selftune/eval/execution-eval.ts +170 -0
  29. package/cli/selftune/eval/family-overlap.ts +2 -2
  30. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  31. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  32. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  33. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  34. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  35. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  36. package/cli/selftune/evolution/engines/replay-engine.ts +79 -57
  37. package/cli/selftune/evolution/evolve-body.ts +100 -39
  38. package/cli/selftune/evolution/evolve.ts +244 -52
  39. package/cli/selftune/evolution/rollback.ts +0 -1
  40. package/cli/selftune/evolution/validate-body.ts +68 -42
  41. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  42. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  43. package/cli/selftune/evolution/validate-routing.ts +43 -41
  44. package/cli/selftune/evolution/validation-contract.ts +91 -0
  45. package/cli/selftune/grading/auto-grade.ts +11 -7
  46. package/cli/selftune/grading/grade-session.ts +10 -16
  47. package/cli/selftune/index.ts +35 -10
  48. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  49. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  50. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  51. package/cli/selftune/ingestors/pi-ingest.ts +3 -2
  52. package/cli/selftune/init.ts +27 -3
  53. package/cli/selftune/localdb/direct-write.ts +35 -1
  54. package/cli/selftune/localdb/queries/cron.ts +34 -0
  55. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  56. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  57. package/cli/selftune/localdb/queries/execution.ts +133 -0
  58. package/cli/selftune/localdb/queries/json.ts +18 -0
  59. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  60. package/cli/selftune/localdb/queries/raw.ts +95 -0
  61. package/cli/selftune/localdb/queries/staging.ts +270 -0
  62. package/cli/selftune/localdb/queries/trust.ts +392 -0
  63. package/cli/selftune/localdb/queries.ts +60 -2288
  64. package/cli/selftune/localdb/schema.ts +21 -0
  65. package/cli/selftune/monitoring/watch.ts +96 -29
  66. package/cli/selftune/normalization.ts +3 -0
  67. package/cli/selftune/observability.ts +4 -2
  68. package/cli/selftune/orchestrate/cli.ts +161 -0
  69. package/cli/selftune/orchestrate/execute.ts +295 -0
  70. package/cli/selftune/orchestrate/finalize.ts +157 -0
  71. package/cli/selftune/orchestrate/locks.ts +40 -0
  72. package/cli/selftune/orchestrate/plan.ts +131 -0
  73. package/cli/selftune/orchestrate/post-run.ts +59 -0
  74. package/cli/selftune/orchestrate/prepare.ts +334 -0
  75. package/cli/selftune/orchestrate/report.ts +182 -0
  76. package/cli/selftune/orchestrate/runtime.ts +120 -0
  77. package/cli/selftune/orchestrate/signals.ts +48 -0
  78. package/cli/selftune/orchestrate.ts +150 -1173
  79. package/cli/selftune/repair/skill-usage.ts +5 -2
  80. package/cli/selftune/routes/overview.ts +5 -2
  81. package/cli/selftune/routes/skill-report.ts +15 -2
  82. package/cli/selftune/schedule.ts +5 -5
  83. package/cli/selftune/status.ts +39 -2
  84. package/cli/selftune/testing-readiness.ts +597 -0
  85. package/cli/selftune/types.ts +44 -4
  86. package/cli/selftune/uninstall.ts +2 -1
  87. package/cli/selftune/utils/canonical-log.ts +1 -9
  88. package/cli/selftune/utils/cli-error.ts +9 -0
  89. package/cli/selftune/utils/llm-call.ts +126 -6
  90. package/cli/selftune/utils/skill-discovery.ts +2 -0
  91. package/cli/selftune/workflows/proposals.ts +184 -0
  92. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  93. package/cli/selftune/workflows/workflows.ts +100 -26
  94. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  95. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  96. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  97. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  98. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +41 -1
  99. package/node_modules/@selftune/telemetry-contract/src/types.ts +103 -2
  100. package/package.json +25 -9
  101. package/packages/dashboard-core/AGENTS.md +18 -0
  102. package/packages/dashboard-core/README.md +30 -0
  103. package/packages/dashboard-core/index.ts +3 -0
  104. package/packages/dashboard-core/package.json +39 -0
  105. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  106. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  107. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  108. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  109. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  110. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  111. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  112. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  113. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  114. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  115. package/packages/dashboard-core/src/gates/index.ts +3 -0
  116. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  117. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  118. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  119. package/packages/dashboard-core/src/host/index.ts +3 -0
  120. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  121. package/packages/dashboard-core/src/models/index.ts +4 -0
  122. package/packages/dashboard-core/src/models/overview.ts +98 -0
  123. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  124. package/packages/dashboard-core/src/models/skills.ts +34 -0
  125. package/packages/dashboard-core/src/routes/index.ts +2 -0
  126. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  127. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  128. package/packages/dashboard-core/src/routes/types.ts +39 -0
  129. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  130. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  131. package/packages/dashboard-core/src/screens/index.ts +37 -0
  132. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  133. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  134. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  135. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  136. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  137. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  138. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  139. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  140. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  141. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  142. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  143. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  144. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  145. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  146. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  147. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  148. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  149. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  150. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  151. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  152. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  153. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  154. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  155. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  156. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  157. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  158. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  159. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  160. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  161. package/packages/telemetry-contract/src/schemas.ts +41 -1
  162. package/packages/telemetry-contract/src/types.ts +103 -2
  163. package/packages/ui/src/components/EvidenceViewer.tsx +80 -25
  164. package/packages/ui/src/components/OverviewPanels.tsx +67 -26
  165. package/packages/ui/src/primitives/tabs.tsx +7 -6
  166. package/packages/ui/src/types.ts +10 -0
  167. package/skill/SKILL.md +130 -332
  168. package/skill/agents/diagnosis-analyst.md +3 -3
  169. package/skill/agents/evolution-reviewer.md +3 -3
  170. package/skill/agents/integration-guide.md +3 -3
  171. package/skill/agents/pattern-analyst.md +2 -2
  172. package/skill/references/cli-quick-reference.md +89 -0
  173. package/skill/references/creator-playbook.md +131 -0
  174. package/skill/references/examples.md +48 -0
  175. package/skill/references/troubleshooting.md +47 -0
  176. package/skill/references/version-history.md +1 -1
  177. package/skill/selftune.contribute.json +11 -0
  178. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  179. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  180. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  181. package/skill/workflows/CreateTestDeploy.md +170 -0
  182. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  183. package/skill/{Workflows → workflows}/Cron.md +1 -1
  184. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  185. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  186. package/skill/{Workflows → workflows}/Evals.md +67 -2
  187. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  188. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  189. package/skill/{Workflows → workflows}/Grade.md +1 -1
  190. package/skill/{Workflows → workflows}/Initialize.md +8 -4
  191. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  192. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  193. package/skill/workflows/SignalsDashboard.md +87 -0
  194. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  195. package/skill/{Workflows → workflows}/Watch.md +42 -2
  196. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  197. package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +0 -1
  198. package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +0 -59
  199. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  200. package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +0 -12
  201. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  202. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  203. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  204. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  205. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  206. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  207. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  208. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  209. /package/skill/{Workflows → workflows}/Ingest.md +0 -0
  210. /package/skill/{Workflows → workflows}/PlatformHooks.md +0 -0
  211. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  212. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  213. /package/skill/{Workflows → workflows}/Registry.md +0 -0
  214. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  215. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  216. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  217. /package/skill/{Workflows → workflows}/Sync.md +0 -0
  218. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  219. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -0,0 +1,295 @@
1
+ /**
2
+ * apply-proposal.ts
3
+ *
4
+ * Fetches an approved contributor proposal from the cloud API, applies the
5
+ * proposed update to the local SKILL.md, and marks the proposal as applied.
6
+ *
7
+ * Usage:
8
+ * selftune evolve apply-proposal --id <proposal-id> --skill-path <path>
9
+ */
10
+
11
+ import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
12
+ import { parseArgs } from "node:util";
13
+
14
+ import { readAlphaIdentity } from "../alpha-identity.js";
15
+ import { SELFTUNE_CONFIG_PATH } from "../constants.js";
16
+ import { CLIError, handleCLIError } from "../utils/cli-error.js";
17
+ import { replaceDescription } from "../utils/frontmatter.js";
18
+ import { getSelftuneVersion } from "../utils/selftune-meta.js";
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Types
22
+ // ---------------------------------------------------------------------------
23
+
24
+ interface ProposalRecord {
25
+ id: string;
26
+ skill_id: string;
27
+ skill_name: string;
28
+ proposal_type: string;
29
+ current_value: string;
30
+ proposed_value: string;
31
+ reason: string | null;
32
+ pass_rate_before: number | null;
33
+ projected_pass_rate: number | null;
34
+ status: "pending" | "approved" | "rejected" | "applied";
35
+ proposed_by: string;
36
+ reviewed_by: string | null;
37
+ reviewed_at: string | null;
38
+ applied_at: string | null;
39
+ created_at: string;
40
+ }
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Cloud API helpers (follows registry/client.ts pattern)
44
+ // ---------------------------------------------------------------------------
45
+
46
+ function getCloudConfig(): { apiUrl: string; apiKey: string } | null {
47
+ try {
48
+ const identity = readAlphaIdentity(SELFTUNE_CONFIG_PATH);
49
+ if (!identity?.api_key) return null;
50
+ const apiUrl = identity.cloud_api_url || "https://api.selftune.dev";
51
+ return { apiUrl, apiKey: identity.api_key };
52
+ } catch {
53
+ return null;
54
+ }
55
+ }
56
+
57
+ async function fetchProposal(
58
+ proposalId: string,
59
+ config: { apiUrl: string; apiKey: string },
60
+ ): Promise<ProposalRecord> {
61
+ const url = `${config.apiUrl}/api/v1/proposals/${encodeURIComponent(proposalId)}`;
62
+ const response = await fetch(url, {
63
+ method: "GET",
64
+ headers: {
65
+ Authorization: `Bearer ${config.apiKey}`,
66
+ "User-Agent": `selftune/${getSelftuneVersion()}`,
67
+ Accept: "application/json",
68
+ },
69
+ signal: AbortSignal.timeout(15_000),
70
+ });
71
+
72
+ if (!response.ok) {
73
+ const text = await response.text().catch(() => "unknown error");
74
+ if (response.status === 404) {
75
+ throw new CLIError(
76
+ `Proposal ${proposalId} not found.`,
77
+ "NOT_FOUND",
78
+ "Check the proposal ID and try again.",
79
+ );
80
+ }
81
+ throw new CLIError(
82
+ `Failed to fetch proposal: HTTP ${response.status}: ${text.slice(0, 200)}`,
83
+ "API_ERROR",
84
+ "Check your credentials and network connection.",
85
+ );
86
+ }
87
+
88
+ const body = (await response.json()) as { proposal: ProposalRecord };
89
+ return body.proposal;
90
+ }
91
+
92
+ async function markProposalApplied(
93
+ proposalId: string,
94
+ config: { apiUrl: string; apiKey: string },
95
+ ): Promise<boolean> {
96
+ const url = `${config.apiUrl}/api/v1/proposals/${encodeURIComponent(proposalId)}`;
97
+
98
+ try {
99
+ const response = await fetch(url, {
100
+ method: "PATCH",
101
+ headers: {
102
+ Authorization: `Bearer ${config.apiKey}`,
103
+ "User-Agent": `selftune/${getSelftuneVersion()}`,
104
+ "Content-Type": "application/json",
105
+ },
106
+ body: JSON.stringify({ status: "applied" }),
107
+ signal: AbortSignal.timeout(15_000),
108
+ });
109
+
110
+ if (!response.ok) {
111
+ const text = await response.text().catch(() => "unknown error");
112
+ console.error(
113
+ `Warning: Failed to mark proposal as applied: HTTP ${response.status}: ${text.slice(0, 200)}`,
114
+ );
115
+ return false;
116
+ }
117
+
118
+ return true;
119
+ } catch (error) {
120
+ const message = error instanceof Error ? error.message : String(error);
121
+ console.error(`Warning: Failed to mark proposal as applied: ${message}`);
122
+ return false;
123
+ }
124
+ }
125
+
126
+ // ---------------------------------------------------------------------------
127
+ // Apply logic
128
+ // ---------------------------------------------------------------------------
129
+
130
+ function applyProposalToSkill(skillPath: string, proposal: ProposalRecord): { backupPath: string } {
131
+ if (!existsSync(skillPath)) {
132
+ throw new CLIError(
133
+ `Skill file not found: ${skillPath}`,
134
+ "FILE_NOT_FOUND",
135
+ "Verify the --skill-path argument points to your SKILL.md.",
136
+ );
137
+ }
138
+
139
+ const content = readFileSync(skillPath, "utf-8");
140
+
141
+ // Back up before modifying
142
+ const backupPath = `${skillPath}.bak`;
143
+ copyFileSync(skillPath, backupPath);
144
+
145
+ let updated: string;
146
+ if (proposal.proposal_type === "description") {
147
+ updated = replaceDescription(content, proposal.proposed_value);
148
+ } else if (proposal.proposal_type === "body") {
149
+ const lines = content.split("\n");
150
+ let endIdx = -1;
151
+ if (lines[0]?.trim() === "---") {
152
+ for (let i = 1; i < lines.length; i++) {
153
+ if (lines[i].trim() === "---") {
154
+ endIdx = i;
155
+ break;
156
+ }
157
+ }
158
+ }
159
+ if (endIdx >= 0) {
160
+ updated = lines.slice(0, endIdx + 1).join("\n") + "\n\n" + proposal.proposed_value;
161
+ } else {
162
+ // No frontmatter -- replace entire content
163
+ updated = proposal.proposed_value;
164
+ }
165
+ } else {
166
+ throw new CLIError(
167
+ `Unsupported proposal type: ${proposal.proposal_type}`,
168
+ "UNSUPPORTED_TYPE",
169
+ "Only 'description' and 'body' proposal types can be applied.",
170
+ );
171
+ }
172
+
173
+ writeFileSync(skillPath, updated, "utf-8");
174
+ return { backupPath };
175
+ }
176
+
177
+ // ---------------------------------------------------------------------------
178
+ // CLI entry point
179
+ // ---------------------------------------------------------------------------
180
+
181
+ export async function cliMain(): Promise<void> {
182
+ const { values } = parseArgs({
183
+ options: {
184
+ id: { type: "string" },
185
+ "skill-path": { type: "string" },
186
+ "dry-run": { type: "boolean", default: false },
187
+ help: { type: "boolean", default: false },
188
+ },
189
+ strict: true,
190
+ });
191
+
192
+ if (values.help) {
193
+ console.log(`selftune evolve apply-proposal -- Apply an approved contributor proposal
194
+
195
+ Usage:
196
+ selftune evolve apply-proposal --id <proposal-id> --skill-path <path> [options]
197
+
198
+ Options:
199
+ --id Proposal UUID (required)
200
+ --skill-path Path to the target SKILL.md (required)
201
+ --dry-run Preview the proposal without applying
202
+ --help Show this help message
203
+
204
+ The proposal must be proposed by "contributor_aggregate" and have status
205
+ "approved". The command fetches the proposal from the cloud API, applies
206
+ the proposed change to the local SKILL.md, and marks the proposal as applied.`);
207
+ process.exit(0);
208
+ }
209
+
210
+ if (!values.id) {
211
+ throw new CLIError(
212
+ "--id is required",
213
+ "MISSING_FLAG",
214
+ "selftune evolve apply-proposal --id <proposal-id> --skill-path <path>",
215
+ );
216
+ }
217
+ if (!values["skill-path"]) {
218
+ throw new CLIError(
219
+ "--skill-path is required",
220
+ "MISSING_FLAG",
221
+ "selftune evolve apply-proposal --id <proposal-id> --skill-path <path>",
222
+ );
223
+ }
224
+
225
+ const proposalId = values.id;
226
+ const skillPath = values["skill-path"];
227
+ const dryRun = values["dry-run"] ?? false;
228
+
229
+ try {
230
+ // Resolve cloud config once for both fetch and mark calls
231
+ const config = getCloudConfig();
232
+ if (!config) {
233
+ throw new CLIError(
234
+ "Not authenticated. Run 'selftune init' to set up cloud credentials.",
235
+ "AUTH_MISSING",
236
+ "selftune init",
237
+ );
238
+ }
239
+
240
+ // 1. Fetch the proposal from the cloud API
241
+ console.log(`Fetching proposal ${proposalId}...`);
242
+ const proposal = await fetchProposal(proposalId, config);
243
+
244
+ // 2. Validate the proposal
245
+ if (proposal.proposed_by !== "contributor_aggregate") {
246
+ throw new CLIError(
247
+ `Proposal was proposed by "${proposal.proposed_by}", not "contributor_aggregate".`,
248
+ "INVALID_PROPOSAL",
249
+ "Only contributor aggregate proposals can be applied via this command.",
250
+ );
251
+ }
252
+
253
+ if (proposal.status !== "approved") {
254
+ throw new CLIError(
255
+ `Proposal status is "${proposal.status}", expected "approved".`,
256
+ "INVALID_STATUS",
257
+ "Approve the proposal in the dashboard first, then apply it.",
258
+ );
259
+ }
260
+
261
+ // 3. Print proposal summary
262
+ console.log(`\nProposal: ${proposal.id}`);
263
+ console.log(` Skill: ${proposal.skill_name}`);
264
+ console.log(` Type: ${proposal.proposal_type}`);
265
+ console.log(` Proposed by: ${proposal.proposed_by}`);
266
+ console.log(` Reason: ${proposal.reason ?? "(none)"}`);
267
+ if (proposal.pass_rate_before != null) {
268
+ console.log(
269
+ ` Pass rate: ${(proposal.pass_rate_before * 100).toFixed(1)}% -> ${proposal.projected_pass_rate != null ? (proposal.projected_pass_rate * 100).toFixed(1) + "%" : "?"}`,
270
+ );
271
+ }
272
+ console.log(`\n--- Current Value ---`);
273
+ console.log(proposal.current_value.slice(0, 500));
274
+ console.log(`\n--- Proposed Value ---`);
275
+ console.log(proposal.proposed_value.slice(0, 500));
276
+
277
+ if (dryRun) {
278
+ console.log("\n[dry-run] No changes written.");
279
+ return;
280
+ }
281
+
282
+ // 4. Apply the proposal to the local SKILL.md
283
+ const { backupPath } = applyProposalToSkill(skillPath, proposal);
284
+ console.log(`\nApplied proposal to ${skillPath}`);
285
+ console.log(`Backup saved to ${backupPath}`);
286
+
287
+ // 5. Mark the proposal as applied in the cloud
288
+ const markedApplied = await markProposalApplied(proposalId, config);
289
+ if (markedApplied) {
290
+ console.log(`Proposal ${proposalId} marked as applied.`);
291
+ }
292
+ } catch (err) {
293
+ handleCLIError(err);
294
+ }
295
+ }
@@ -3,12 +3,12 @@
3
3
  *
4
4
  * Cohesive module for all replay-based validation logic:
5
5
  * - Host/runtime replay (PRIMARY path — real agent routing decisions)
6
- * - Fixture-backed replay (FALLBACK — surface similarity matching)
7
6
  * - Custom replay runner support
8
7
  *
9
8
  * Host/runtime replay is preferred because it captures actual agent routing
10
- * behavior. Fixture-backed replay is used as a fallback when no invoker is
11
- * provided or when the invoker returns an error.
9
+ * behavior. If the runtime path is unavailable or fails, callers must fall
10
+ * back explicitly to another validation mode instead of treating simulated
11
+ * fixture matching as equivalent replay evidence.
12
12
  *
13
13
  * Extracted from validate-routing.ts and validate-body.ts to isolate
14
14
  * replay-specific concerns from judge-specific concerns.
@@ -20,7 +20,6 @@ import type {
20
20
  RoutingReplayFixture,
21
21
  ValidationMode,
22
22
  } from "../../types.js";
23
- import { runHostReplayFixture } from "../validate-host-replay.js";
24
23
 
25
24
  // ---------------------------------------------------------------------------
26
25
  // Types
@@ -53,6 +52,11 @@ export interface ReplayValidationResult {
53
52
  before_entry_results?: RoutingReplayEntryResult[];
54
53
  }
55
54
 
55
+ export interface ReplayValidationAttempt {
56
+ result: ReplayValidationResult | null;
57
+ fallbackReason?: string;
58
+ }
59
+
56
60
  // ---------------------------------------------------------------------------
57
61
  // Internal helpers
58
62
  // ---------------------------------------------------------------------------
@@ -67,11 +71,31 @@ function computeReplayResult(
67
71
  ): ReplayValidationResult {
68
72
  const beforePassed = beforeResults.filter((result) => result.passed).length;
69
73
  const afterPassed = afterResults.filter((result) => result.passed).length;
74
+ const beforePassRate = beforePassed / total;
75
+ const afterPassRate = afterPassed / total;
76
+ const netChange = afterPassRate - beforePassRate;
77
+ const beforePassedByQuery = new Map<string, boolean>();
78
+ let regressionCount = 0;
79
+ let newPassCount = 0;
80
+
81
+ for (const result of beforeResults) {
82
+ beforePassedByQuery.set(result.query, result.passed);
83
+ }
84
+
85
+ for (const result of afterResults) {
86
+ const beforePass = beforePassedByQuery.get(result.query) ?? false;
87
+ const afterPass = result.passed;
88
+ if (beforePass && !afterPass) regressionCount++;
89
+ if (!beforePass && afterPass) newPassCount++;
90
+ }
70
91
 
71
92
  return {
72
- before_pass_rate: beforePassed / total,
73
- after_pass_rate: afterPassed / total,
74
- improved: afterPassed > beforePassed,
93
+ before_pass_rate: beforePassRate,
94
+ after_pass_rate: afterPassRate,
95
+ improved:
96
+ afterPassRate > beforePassRate &&
97
+ regressionCount < total * 0.05 &&
98
+ (netChange >= 0.1 || newPassCount >= 2),
75
99
  validation_mode: mode,
76
100
  validation_agent: agent,
77
101
  validation_fixture_id: fixtureId,
@@ -85,12 +109,11 @@ function computeReplayResult(
85
109
  // ---------------------------------------------------------------------------
86
110
 
87
111
  /**
88
- * Attempt replay-backed validation. Prefers host/runtime replay when a
89
- * replayRunner is provided; falls back to fixture-based replay when:
90
- * - No replayRunner is provided
91
- * - The replayRunner throws an error
112
+ * Attempt replay-backed validation using a real host/runtime runner.
92
113
  *
93
- * Returns null if no replay path is available (no fixture provided).
114
+ * Returns a null result with a fallback reason when runtime replay is
115
+ * unavailable or fails. Callers decide whether to fall back to a judge-based
116
+ * validator (`auto`) or surface an explicit unavailable error (`replay`).
94
117
  */
95
118
  export async function runReplayValidation(
96
119
  originalContent: string,
@@ -98,61 +121,60 @@ export async function runReplayValidation(
98
121
  evalSet: EvalEntry[],
99
122
  agent: string,
100
123
  options: ReplayValidationOptions = {},
101
- ): Promise<ReplayValidationResult | null> {
102
- if (evalSet.length === 0 || !options.replayFixture) {
103
- return null;
124
+ ): Promise<ReplayValidationAttempt> {
125
+ if (evalSet.length === 0) {
126
+ return { result: null };
127
+ }
128
+
129
+ if (!options.replayFixture) {
130
+ return {
131
+ result: null,
132
+ fallbackReason: "no replay fixture is available for runtime validation",
133
+ };
134
+ }
135
+
136
+ if (!options.replayRunner) {
137
+ return {
138
+ result: null,
139
+ fallbackReason: "no real host/runtime replay runner is configured",
140
+ };
104
141
  }
105
142
 
106
143
  const fixture = options.replayFixture;
107
144
  const total = evalSet.length;
108
145
 
109
- // PRIMARY path: Host/runtime replay when a runner is provided
110
- if (options.replayRunner) {
111
- try {
112
- const beforeResults = await options.replayRunner({
113
- routing: originalContent,
114
- evalSet,
115
- agent,
116
- fixture,
117
- });
118
- const afterResults = await options.replayRunner({
119
- routing: proposedContent,
120
- evalSet,
121
- agent,
122
- fixture,
123
- });
124
-
125
- return computeReplayResult(
146
+ try {
147
+ const beforeResults = await options.replayRunner({
148
+ routing: originalContent,
149
+ evalSet,
150
+ agent,
151
+ fixture,
152
+ });
153
+ const afterResults = await options.replayRunner({
154
+ routing: proposedContent,
155
+ evalSet,
156
+ agent,
157
+ fixture,
158
+ });
159
+
160
+ return {
161
+ result: computeReplayResult(
126
162
  beforeResults,
127
163
  afterResults,
128
164
  total,
129
165
  "host_replay",
130
166
  agent,
131
167
  fixture.fixture_id,
132
- );
133
- } catch {
134
- // Host replay failed — fall through to fixture-based fallback
135
- }
168
+ ),
169
+ };
170
+ } catch (error) {
171
+ const message =
172
+ error instanceof Error && error.message.trim()
173
+ ? error.message.trim()
174
+ : "runtime replay failed before producing a routing decision";
175
+ return {
176
+ result: null,
177
+ fallbackReason: `real host/runtime replay failed: ${message}`,
178
+ };
136
179
  }
137
-
138
- // FALLBACK path: Fixture-backed replay (surface similarity matching)
139
- const beforeResults = runHostReplayFixture({
140
- routing: originalContent,
141
- evalSet,
142
- fixture,
143
- });
144
- const afterResults = runHostReplayFixture({
145
- routing: proposedContent,
146
- evalSet,
147
- fixture,
148
- });
149
-
150
- return computeReplayResult(
151
- beforeResults,
152
- afterResults,
153
- total,
154
- "fixture_replay",
155
- agent,
156
- fixture.fixture_id,
157
- );
158
180
  }