selftune 0.2.23 → 0.2.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +93 -15
- package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
- package/apps/local-dashboard/dist/assets/index-Dhgv5BQO.js +15 -0
- package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
- package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
- package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/adapters/codex/install.ts +310 -78
- package/cli/selftune/adapters/opencode/install.ts +3 -4
- package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
- package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
- package/cli/selftune/auto-update.ts +200 -8
- package/cli/selftune/canonical-export.ts +55 -25
- package/cli/selftune/command-surface.ts +397 -0
- package/cli/selftune/contribute/contribute.ts +64 -13
- package/cli/selftune/contribution-config.ts +57 -3
- package/cli/selftune/contribution-preferences.ts +117 -0
- package/cli/selftune/contribution-signals.ts +8 -4
- package/cli/selftune/contribution-staging.ts +13 -2
- package/cli/selftune/contributions.ts +55 -121
- package/cli/selftune/creator-contributions.ts +29 -10
- package/cli/selftune/cron/setup.ts +7 -3
- package/cli/selftune/dashboard-contract.ts +73 -0
- package/cli/selftune/dashboard-server.ts +168 -17
- package/cli/selftune/dashboard.ts +350 -17
- package/cli/selftune/eval/baseline.ts +21 -5
- package/cli/selftune/eval/execution-eval.ts +170 -0
- package/cli/selftune/eval/family-overlap.ts +2 -2
- package/cli/selftune/eval/hooks-to-evals.ts +228 -82
- package/cli/selftune/eval/import-skillsbench.ts +2 -2
- package/cli/selftune/eval/invocation-classifier.ts +56 -0
- package/cli/selftune/eval/synthetic-evals.ts +5 -3
- package/cli/selftune/eval/unit-test-cli.ts +7 -4
- package/cli/selftune/evolution/apply-proposal.ts +295 -0
- package/cli/selftune/evolution/engines/replay-engine.ts +79 -57
- package/cli/selftune/evolution/evolve-body.ts +100 -39
- package/cli/selftune/evolution/evolve.ts +244 -52
- package/cli/selftune/evolution/rollback.ts +0 -1
- package/cli/selftune/evolution/validate-body.ts +68 -42
- package/cli/selftune/evolution/validate-host-replay.ts +510 -60
- package/cli/selftune/evolution/validate-proposal.ts +11 -150
- package/cli/selftune/evolution/validate-routing.ts +43 -41
- package/cli/selftune/evolution/validation-contract.ts +91 -0
- package/cli/selftune/grading/auto-grade.ts +11 -7
- package/cli/selftune/grading/grade-session.ts +10 -16
- package/cli/selftune/index.ts +35 -10
- package/cli/selftune/ingestors/claude-replay.ts +15 -10
- package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
- package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
- package/cli/selftune/ingestors/pi-ingest.ts +3 -2
- package/cli/selftune/init.ts +27 -3
- package/cli/selftune/localdb/direct-write.ts +35 -1
- package/cli/selftune/localdb/queries/cron.ts +34 -0
- package/cli/selftune/localdb/queries/dashboard.ts +834 -0
- package/cli/selftune/localdb/queries/evolution.ts +158 -0
- package/cli/selftune/localdb/queries/execution.ts +133 -0
- package/cli/selftune/localdb/queries/json.ts +18 -0
- package/cli/selftune/localdb/queries/monitoring.ts +263 -0
- package/cli/selftune/localdb/queries/raw.ts +95 -0
- package/cli/selftune/localdb/queries/staging.ts +270 -0
- package/cli/selftune/localdb/queries/trust.ts +392 -0
- package/cli/selftune/localdb/queries.ts +60 -2288
- package/cli/selftune/localdb/schema.ts +21 -0
- package/cli/selftune/monitoring/watch.ts +96 -29
- package/cli/selftune/normalization.ts +3 -0
- package/cli/selftune/observability.ts +4 -2
- package/cli/selftune/orchestrate/cli.ts +161 -0
- package/cli/selftune/orchestrate/execute.ts +295 -0
- package/cli/selftune/orchestrate/finalize.ts +157 -0
- package/cli/selftune/orchestrate/locks.ts +40 -0
- package/cli/selftune/orchestrate/plan.ts +131 -0
- package/cli/selftune/orchestrate/post-run.ts +59 -0
- package/cli/selftune/orchestrate/prepare.ts +334 -0
- package/cli/selftune/orchestrate/report.ts +182 -0
- package/cli/selftune/orchestrate/runtime.ts +120 -0
- package/cli/selftune/orchestrate/signals.ts +48 -0
- package/cli/selftune/orchestrate.ts +150 -1173
- package/cli/selftune/repair/skill-usage.ts +5 -2
- package/cli/selftune/routes/overview.ts +5 -2
- package/cli/selftune/routes/skill-report.ts +15 -2
- package/cli/selftune/schedule.ts +5 -5
- package/cli/selftune/status.ts +39 -2
- package/cli/selftune/testing-readiness.ts +597 -0
- package/cli/selftune/types.ts +44 -4
- package/cli/selftune/uninstall.ts +2 -1
- package/cli/selftune/utils/canonical-log.ts +1 -9
- package/cli/selftune/utils/cli-error.ts +9 -0
- package/cli/selftune/utils/llm-call.ts +126 -6
- package/cli/selftune/utils/skill-discovery.ts +2 -0
- package/cli/selftune/workflows/proposals.ts +184 -0
- package/cli/selftune/workflows/skill-scaffold.ts +241 -0
- package/cli/selftune/workflows/workflows.ts +100 -26
- package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
- package/node_modules/@selftune/telemetry-contract/src/schemas.ts +41 -1
- package/node_modules/@selftune/telemetry-contract/src/types.ts +103 -2
- package/package.json +25 -9
- package/packages/dashboard-core/AGENTS.md +18 -0
- package/packages/dashboard-core/README.md +30 -0
- package/packages/dashboard-core/index.ts +3 -0
- package/packages/dashboard-core/package.json +39 -0
- package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
- package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
- package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
- package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
- package/packages/dashboard-core/src/chrome/index.ts +14 -0
- package/packages/dashboard-core/src/chrome/types.ts +81 -0
- package/packages/dashboard-core/src/chrome/utils.ts +23 -0
- package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
- package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
- package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
- package/packages/dashboard-core/src/gates/index.ts +3 -0
- package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
- package/packages/dashboard-core/src/host/adapter.ts +47 -0
- package/packages/dashboard-core/src/host/capabilities.ts +55 -0
- package/packages/dashboard-core/src/host/index.ts +3 -0
- package/packages/dashboard-core/src/models/analytics.ts +39 -0
- package/packages/dashboard-core/src/models/index.ts +4 -0
- package/packages/dashboard-core/src/models/overview.ts +98 -0
- package/packages/dashboard-core/src/models/runtime.ts +7 -0
- package/packages/dashboard-core/src/models/skills.ts +34 -0
- package/packages/dashboard-core/src/routes/index.ts +2 -0
- package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
- package/packages/dashboard-core/src/routes/manifest.ts +451 -0
- package/packages/dashboard-core/src/routes/types.ts +39 -0
- package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
- package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
- package/packages/dashboard-core/src/screens/index.ts +37 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
- package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
- package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
- package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
- package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
- package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
- package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
- package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
- package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
- package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
- package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
- package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
- package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
- package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
- package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
- package/packages/telemetry-contract/src/schemas.ts +41 -1
- package/packages/telemetry-contract/src/types.ts +103 -2
- package/packages/ui/src/components/EvidenceViewer.tsx +80 -25
- package/packages/ui/src/components/OverviewPanels.tsx +67 -26
- package/packages/ui/src/primitives/tabs.tsx +7 -6
- package/packages/ui/src/types.ts +10 -0
- package/skill/SKILL.md +130 -332
- package/skill/agents/diagnosis-analyst.md +3 -3
- package/skill/agents/evolution-reviewer.md +3 -3
- package/skill/agents/integration-guide.md +3 -3
- package/skill/agents/pattern-analyst.md +2 -2
- package/skill/references/cli-quick-reference.md +89 -0
- package/skill/references/creator-playbook.md +131 -0
- package/skill/references/examples.md +48 -0
- package/skill/references/troubleshooting.md +47 -0
- package/skill/references/version-history.md +1 -1
- package/skill/selftune.contribute.json +11 -0
- package/skill/{Workflows → workflows}/Baseline.md +20 -1
- package/skill/{Workflows → workflows}/Contribute.md +23 -10
- package/skill/{Workflows → workflows}/Contributions.md +13 -5
- package/skill/workflows/CreateTestDeploy.md +170 -0
- package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
- package/skill/{Workflows → workflows}/Cron.md +1 -1
- package/skill/{Workflows → workflows}/Dashboard.md +20 -0
- package/skill/{Workflows → workflows}/Doctor.md +1 -1
- package/skill/{Workflows → workflows}/Evals.md +67 -2
- package/skill/{Workflows → workflows}/Evolve.md +119 -30
- package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
- package/skill/{Workflows → workflows}/Grade.md +1 -1
- package/skill/{Workflows → workflows}/Initialize.md +8 -4
- package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
- package/skill/{Workflows → workflows}/Schedule.md +3 -3
- package/skill/workflows/SignalsDashboard.md +87 -0
- package/skill/{Workflows → workflows}/UnitTest.md +19 -0
- package/skill/{Workflows → workflows}/Watch.md +42 -2
- package/skill/{Workflows → workflows}/Workflows.md +39 -2
- package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +0 -1
- package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +0 -59
- package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
- package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +0 -12
- /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
- /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
- /package/skill/{Workflows → workflows}/Badge.md +0 -0
- /package/skill/{Workflows → workflows}/Composability.md +0 -0
- /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
- /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
- /package/skill/{Workflows → workflows}/Hook.md +0 -0
- /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
- /package/skill/{Workflows → workflows}/Ingest.md +0 -0
- /package/skill/{Workflows → workflows}/PlatformHooks.md +0 -0
- /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
- /package/skill/{Workflows → workflows}/Recover.md +0 -0
- /package/skill/{Workflows → workflows}/Registry.md +0 -0
- /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
- /package/skill/{Workflows → workflows}/Replay.md +0 -0
- /package/skill/{Workflows → workflows}/Rollback.md +0 -0
- /package/skill/{Workflows → workflows}/Sync.md +0 -0
- /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
- /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* apply-proposal.ts
|
|
3
|
+
*
|
|
4
|
+
* Fetches an approved contributor proposal from the cloud API, applies the
|
|
5
|
+
* proposed update to the local SKILL.md, and marks the proposal as applied.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* selftune evolve apply-proposal --id <proposal-id> --skill-path <path>
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { copyFileSync, existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
12
|
+
import { parseArgs } from "node:util";
|
|
13
|
+
|
|
14
|
+
import { readAlphaIdentity } from "../alpha-identity.js";
|
|
15
|
+
import { SELFTUNE_CONFIG_PATH } from "../constants.js";
|
|
16
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
17
|
+
import { replaceDescription } from "../utils/frontmatter.js";
|
|
18
|
+
import { getSelftuneVersion } from "../utils/selftune-meta.js";
|
|
19
|
+
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Types
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
interface ProposalRecord {
|
|
25
|
+
id: string;
|
|
26
|
+
skill_id: string;
|
|
27
|
+
skill_name: string;
|
|
28
|
+
proposal_type: string;
|
|
29
|
+
current_value: string;
|
|
30
|
+
proposed_value: string;
|
|
31
|
+
reason: string | null;
|
|
32
|
+
pass_rate_before: number | null;
|
|
33
|
+
projected_pass_rate: number | null;
|
|
34
|
+
status: "pending" | "approved" | "rejected" | "applied";
|
|
35
|
+
proposed_by: string;
|
|
36
|
+
reviewed_by: string | null;
|
|
37
|
+
reviewed_at: string | null;
|
|
38
|
+
applied_at: string | null;
|
|
39
|
+
created_at: string;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Cloud API helpers (follows registry/client.ts pattern)
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
function getCloudConfig(): { apiUrl: string; apiKey: string } | null {
|
|
47
|
+
try {
|
|
48
|
+
const identity = readAlphaIdentity(SELFTUNE_CONFIG_PATH);
|
|
49
|
+
if (!identity?.api_key) return null;
|
|
50
|
+
const apiUrl = identity.cloud_api_url || "https://api.selftune.dev";
|
|
51
|
+
return { apiUrl, apiKey: identity.api_key };
|
|
52
|
+
} catch {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function fetchProposal(
|
|
58
|
+
proposalId: string,
|
|
59
|
+
config: { apiUrl: string; apiKey: string },
|
|
60
|
+
): Promise<ProposalRecord> {
|
|
61
|
+
const url = `${config.apiUrl}/api/v1/proposals/${encodeURIComponent(proposalId)}`;
|
|
62
|
+
const response = await fetch(url, {
|
|
63
|
+
method: "GET",
|
|
64
|
+
headers: {
|
|
65
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
66
|
+
"User-Agent": `selftune/${getSelftuneVersion()}`,
|
|
67
|
+
Accept: "application/json",
|
|
68
|
+
},
|
|
69
|
+
signal: AbortSignal.timeout(15_000),
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
if (!response.ok) {
|
|
73
|
+
const text = await response.text().catch(() => "unknown error");
|
|
74
|
+
if (response.status === 404) {
|
|
75
|
+
throw new CLIError(
|
|
76
|
+
`Proposal ${proposalId} not found.`,
|
|
77
|
+
"NOT_FOUND",
|
|
78
|
+
"Check the proposal ID and try again.",
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
throw new CLIError(
|
|
82
|
+
`Failed to fetch proposal: HTTP ${response.status}: ${text.slice(0, 200)}`,
|
|
83
|
+
"API_ERROR",
|
|
84
|
+
"Check your credentials and network connection.",
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const body = (await response.json()) as { proposal: ProposalRecord };
|
|
89
|
+
return body.proposal;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function markProposalApplied(
|
|
93
|
+
proposalId: string,
|
|
94
|
+
config: { apiUrl: string; apiKey: string },
|
|
95
|
+
): Promise<boolean> {
|
|
96
|
+
const url = `${config.apiUrl}/api/v1/proposals/${encodeURIComponent(proposalId)}`;
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const response = await fetch(url, {
|
|
100
|
+
method: "PATCH",
|
|
101
|
+
headers: {
|
|
102
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
103
|
+
"User-Agent": `selftune/${getSelftuneVersion()}`,
|
|
104
|
+
"Content-Type": "application/json",
|
|
105
|
+
},
|
|
106
|
+
body: JSON.stringify({ status: "applied" }),
|
|
107
|
+
signal: AbortSignal.timeout(15_000),
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
if (!response.ok) {
|
|
111
|
+
const text = await response.text().catch(() => "unknown error");
|
|
112
|
+
console.error(
|
|
113
|
+
`Warning: Failed to mark proposal as applied: HTTP ${response.status}: ${text.slice(0, 200)}`,
|
|
114
|
+
);
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return true;
|
|
119
|
+
} catch (error) {
|
|
120
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
121
|
+
console.error(`Warning: Failed to mark proposal as applied: ${message}`);
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Apply logic
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
|
|
130
|
+
function applyProposalToSkill(skillPath: string, proposal: ProposalRecord): { backupPath: string } {
|
|
131
|
+
if (!existsSync(skillPath)) {
|
|
132
|
+
throw new CLIError(
|
|
133
|
+
`Skill file not found: ${skillPath}`,
|
|
134
|
+
"FILE_NOT_FOUND",
|
|
135
|
+
"Verify the --skill-path argument points to your SKILL.md.",
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const content = readFileSync(skillPath, "utf-8");
|
|
140
|
+
|
|
141
|
+
// Back up before modifying
|
|
142
|
+
const backupPath = `${skillPath}.bak`;
|
|
143
|
+
copyFileSync(skillPath, backupPath);
|
|
144
|
+
|
|
145
|
+
let updated: string;
|
|
146
|
+
if (proposal.proposal_type === "description") {
|
|
147
|
+
updated = replaceDescription(content, proposal.proposed_value);
|
|
148
|
+
} else if (proposal.proposal_type === "body") {
|
|
149
|
+
const lines = content.split("\n");
|
|
150
|
+
let endIdx = -1;
|
|
151
|
+
if (lines[0]?.trim() === "---") {
|
|
152
|
+
for (let i = 1; i < lines.length; i++) {
|
|
153
|
+
if (lines[i].trim() === "---") {
|
|
154
|
+
endIdx = i;
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
if (endIdx >= 0) {
|
|
160
|
+
updated = lines.slice(0, endIdx + 1).join("\n") + "\n\n" + proposal.proposed_value;
|
|
161
|
+
} else {
|
|
162
|
+
// No frontmatter -- replace entire content
|
|
163
|
+
updated = proposal.proposed_value;
|
|
164
|
+
}
|
|
165
|
+
} else {
|
|
166
|
+
throw new CLIError(
|
|
167
|
+
`Unsupported proposal type: ${proposal.proposal_type}`,
|
|
168
|
+
"UNSUPPORTED_TYPE",
|
|
169
|
+
"Only 'description' and 'body' proposal types can be applied.",
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
writeFileSync(skillPath, updated, "utf-8");
|
|
174
|
+
return { backupPath };
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// ---------------------------------------------------------------------------
|
|
178
|
+
// CLI entry point
|
|
179
|
+
// ---------------------------------------------------------------------------
|
|
180
|
+
|
|
181
|
+
export async function cliMain(): Promise<void> {
|
|
182
|
+
const { values } = parseArgs({
|
|
183
|
+
options: {
|
|
184
|
+
id: { type: "string" },
|
|
185
|
+
"skill-path": { type: "string" },
|
|
186
|
+
"dry-run": { type: "boolean", default: false },
|
|
187
|
+
help: { type: "boolean", default: false },
|
|
188
|
+
},
|
|
189
|
+
strict: true,
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
if (values.help) {
|
|
193
|
+
console.log(`selftune evolve apply-proposal -- Apply an approved contributor proposal
|
|
194
|
+
|
|
195
|
+
Usage:
|
|
196
|
+
selftune evolve apply-proposal --id <proposal-id> --skill-path <path> [options]
|
|
197
|
+
|
|
198
|
+
Options:
|
|
199
|
+
--id Proposal UUID (required)
|
|
200
|
+
--skill-path Path to the target SKILL.md (required)
|
|
201
|
+
--dry-run Preview the proposal without applying
|
|
202
|
+
--help Show this help message
|
|
203
|
+
|
|
204
|
+
The proposal must be proposed by "contributor_aggregate" and have status
|
|
205
|
+
"approved". The command fetches the proposal from the cloud API, applies
|
|
206
|
+
the proposed change to the local SKILL.md, and marks the proposal as applied.`);
|
|
207
|
+
process.exit(0);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (!values.id) {
|
|
211
|
+
throw new CLIError(
|
|
212
|
+
"--id is required",
|
|
213
|
+
"MISSING_FLAG",
|
|
214
|
+
"selftune evolve apply-proposal --id <proposal-id> --skill-path <path>",
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
if (!values["skill-path"]) {
|
|
218
|
+
throw new CLIError(
|
|
219
|
+
"--skill-path is required",
|
|
220
|
+
"MISSING_FLAG",
|
|
221
|
+
"selftune evolve apply-proposal --id <proposal-id> --skill-path <path>",
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const proposalId = values.id;
|
|
226
|
+
const skillPath = values["skill-path"];
|
|
227
|
+
const dryRun = values["dry-run"] ?? false;
|
|
228
|
+
|
|
229
|
+
try {
|
|
230
|
+
// Resolve cloud config once for both fetch and mark calls
|
|
231
|
+
const config = getCloudConfig();
|
|
232
|
+
if (!config) {
|
|
233
|
+
throw new CLIError(
|
|
234
|
+
"Not authenticated. Run 'selftune init' to set up cloud credentials.",
|
|
235
|
+
"AUTH_MISSING",
|
|
236
|
+
"selftune init",
|
|
237
|
+
);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// 1. Fetch the proposal from the cloud API
|
|
241
|
+
console.log(`Fetching proposal ${proposalId}...`);
|
|
242
|
+
const proposal = await fetchProposal(proposalId, config);
|
|
243
|
+
|
|
244
|
+
// 2. Validate the proposal
|
|
245
|
+
if (proposal.proposed_by !== "contributor_aggregate") {
|
|
246
|
+
throw new CLIError(
|
|
247
|
+
`Proposal was proposed by "${proposal.proposed_by}", not "contributor_aggregate".`,
|
|
248
|
+
"INVALID_PROPOSAL",
|
|
249
|
+
"Only contributor aggregate proposals can be applied via this command.",
|
|
250
|
+
);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if (proposal.status !== "approved") {
|
|
254
|
+
throw new CLIError(
|
|
255
|
+
`Proposal status is "${proposal.status}", expected "approved".`,
|
|
256
|
+
"INVALID_STATUS",
|
|
257
|
+
"Approve the proposal in the dashboard first, then apply it.",
|
|
258
|
+
);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// 3. Print proposal summary
|
|
262
|
+
console.log(`\nProposal: ${proposal.id}`);
|
|
263
|
+
console.log(` Skill: ${proposal.skill_name}`);
|
|
264
|
+
console.log(` Type: ${proposal.proposal_type}`);
|
|
265
|
+
console.log(` Proposed by: ${proposal.proposed_by}`);
|
|
266
|
+
console.log(` Reason: ${proposal.reason ?? "(none)"}`);
|
|
267
|
+
if (proposal.pass_rate_before != null) {
|
|
268
|
+
console.log(
|
|
269
|
+
` Pass rate: ${(proposal.pass_rate_before * 100).toFixed(1)}% -> ${proposal.projected_pass_rate != null ? (proposal.projected_pass_rate * 100).toFixed(1) + "%" : "?"}`,
|
|
270
|
+
);
|
|
271
|
+
}
|
|
272
|
+
console.log(`\n--- Current Value ---`);
|
|
273
|
+
console.log(proposal.current_value.slice(0, 500));
|
|
274
|
+
console.log(`\n--- Proposed Value ---`);
|
|
275
|
+
console.log(proposal.proposed_value.slice(0, 500));
|
|
276
|
+
|
|
277
|
+
if (dryRun) {
|
|
278
|
+
console.log("\n[dry-run] No changes written.");
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// 4. Apply the proposal to the local SKILL.md
|
|
283
|
+
const { backupPath } = applyProposalToSkill(skillPath, proposal);
|
|
284
|
+
console.log(`\nApplied proposal to ${skillPath}`);
|
|
285
|
+
console.log(`Backup saved to ${backupPath}`);
|
|
286
|
+
|
|
287
|
+
// 5. Mark the proposal as applied in the cloud
|
|
288
|
+
const markedApplied = await markProposalApplied(proposalId, config);
|
|
289
|
+
if (markedApplied) {
|
|
290
|
+
console.log(`Proposal ${proposalId} marked as applied.`);
|
|
291
|
+
}
|
|
292
|
+
} catch (err) {
|
|
293
|
+
handleCLIError(err);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Cohesive module for all replay-based validation logic:
|
|
5
5
|
* - Host/runtime replay (PRIMARY path — real agent routing decisions)
|
|
6
|
-
* - Fixture-backed replay (FALLBACK — surface similarity matching)
|
|
7
6
|
* - Custom replay runner support
|
|
8
7
|
*
|
|
9
8
|
* Host/runtime replay is preferred because it captures actual agent routing
|
|
10
|
-
* behavior.
|
|
11
|
-
*
|
|
9
|
+
* behavior. If the runtime path is unavailable or fails, callers must fall
|
|
10
|
+
* back explicitly to another validation mode instead of treating simulated
|
|
11
|
+
* fixture matching as equivalent replay evidence.
|
|
12
12
|
*
|
|
13
13
|
* Extracted from validate-routing.ts and validate-body.ts to isolate
|
|
14
14
|
* replay-specific concerns from judge-specific concerns.
|
|
@@ -20,7 +20,6 @@ import type {
|
|
|
20
20
|
RoutingReplayFixture,
|
|
21
21
|
ValidationMode,
|
|
22
22
|
} from "../../types.js";
|
|
23
|
-
import { runHostReplayFixture } from "../validate-host-replay.js";
|
|
24
23
|
|
|
25
24
|
// ---------------------------------------------------------------------------
|
|
26
25
|
// Types
|
|
@@ -53,6 +52,11 @@ export interface ReplayValidationResult {
|
|
|
53
52
|
before_entry_results?: RoutingReplayEntryResult[];
|
|
54
53
|
}
|
|
55
54
|
|
|
55
|
+
export interface ReplayValidationAttempt {
|
|
56
|
+
result: ReplayValidationResult | null;
|
|
57
|
+
fallbackReason?: string;
|
|
58
|
+
}
|
|
59
|
+
|
|
56
60
|
// ---------------------------------------------------------------------------
|
|
57
61
|
// Internal helpers
|
|
58
62
|
// ---------------------------------------------------------------------------
|
|
@@ -67,11 +71,31 @@ function computeReplayResult(
|
|
|
67
71
|
): ReplayValidationResult {
|
|
68
72
|
const beforePassed = beforeResults.filter((result) => result.passed).length;
|
|
69
73
|
const afterPassed = afterResults.filter((result) => result.passed).length;
|
|
74
|
+
const beforePassRate = beforePassed / total;
|
|
75
|
+
const afterPassRate = afterPassed / total;
|
|
76
|
+
const netChange = afterPassRate - beforePassRate;
|
|
77
|
+
const beforePassedByQuery = new Map<string, boolean>();
|
|
78
|
+
let regressionCount = 0;
|
|
79
|
+
let newPassCount = 0;
|
|
80
|
+
|
|
81
|
+
for (const result of beforeResults) {
|
|
82
|
+
beforePassedByQuery.set(result.query, result.passed);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
for (const result of afterResults) {
|
|
86
|
+
const beforePass = beforePassedByQuery.get(result.query) ?? false;
|
|
87
|
+
const afterPass = result.passed;
|
|
88
|
+
if (beforePass && !afterPass) regressionCount++;
|
|
89
|
+
if (!beforePass && afterPass) newPassCount++;
|
|
90
|
+
}
|
|
70
91
|
|
|
71
92
|
return {
|
|
72
|
-
before_pass_rate:
|
|
73
|
-
after_pass_rate:
|
|
74
|
-
improved:
|
|
93
|
+
before_pass_rate: beforePassRate,
|
|
94
|
+
after_pass_rate: afterPassRate,
|
|
95
|
+
improved:
|
|
96
|
+
afterPassRate > beforePassRate &&
|
|
97
|
+
regressionCount < total * 0.05 &&
|
|
98
|
+
(netChange >= 0.1 || newPassCount >= 2),
|
|
75
99
|
validation_mode: mode,
|
|
76
100
|
validation_agent: agent,
|
|
77
101
|
validation_fixture_id: fixtureId,
|
|
@@ -85,12 +109,11 @@ function computeReplayResult(
|
|
|
85
109
|
// ---------------------------------------------------------------------------
|
|
86
110
|
|
|
87
111
|
/**
|
|
88
|
-
* Attempt replay-backed validation
|
|
89
|
-
* replayRunner is provided; falls back to fixture-based replay when:
|
|
90
|
-
* - No replayRunner is provided
|
|
91
|
-
* - The replayRunner throws an error
|
|
112
|
+
* Attempt replay-backed validation using a real host/runtime runner.
|
|
92
113
|
*
|
|
93
|
-
* Returns null
|
|
114
|
+
* Returns a null result with a fallback reason when runtime replay is
|
|
115
|
+
* unavailable or fails. Callers decide whether to fall back to a judge-based
|
|
116
|
+
* validator (`auto`) or surface an explicit unavailable error (`replay`).
|
|
94
117
|
*/
|
|
95
118
|
export async function runReplayValidation(
|
|
96
119
|
originalContent: string,
|
|
@@ -98,61 +121,60 @@ export async function runReplayValidation(
|
|
|
98
121
|
evalSet: EvalEntry[],
|
|
99
122
|
agent: string,
|
|
100
123
|
options: ReplayValidationOptions = {},
|
|
101
|
-
): Promise<
|
|
102
|
-
if (evalSet.length === 0
|
|
103
|
-
return null;
|
|
124
|
+
): Promise<ReplayValidationAttempt> {
|
|
125
|
+
if (evalSet.length === 0) {
|
|
126
|
+
return { result: null };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (!options.replayFixture) {
|
|
130
|
+
return {
|
|
131
|
+
result: null,
|
|
132
|
+
fallbackReason: "no replay fixture is available for runtime validation",
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (!options.replayRunner) {
|
|
137
|
+
return {
|
|
138
|
+
result: null,
|
|
139
|
+
fallbackReason: "no real host/runtime replay runner is configured",
|
|
140
|
+
};
|
|
104
141
|
}
|
|
105
142
|
|
|
106
143
|
const fixture = options.replayFixture;
|
|
107
144
|
const total = evalSet.length;
|
|
108
145
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
return computeReplayResult(
|
|
146
|
+
try {
|
|
147
|
+
const beforeResults = await options.replayRunner({
|
|
148
|
+
routing: originalContent,
|
|
149
|
+
evalSet,
|
|
150
|
+
agent,
|
|
151
|
+
fixture,
|
|
152
|
+
});
|
|
153
|
+
const afterResults = await options.replayRunner({
|
|
154
|
+
routing: proposedContent,
|
|
155
|
+
evalSet,
|
|
156
|
+
agent,
|
|
157
|
+
fixture,
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
result: computeReplayResult(
|
|
126
162
|
beforeResults,
|
|
127
163
|
afterResults,
|
|
128
164
|
total,
|
|
129
165
|
"host_replay",
|
|
130
166
|
agent,
|
|
131
167
|
fixture.fixture_id,
|
|
132
|
-
)
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
|
|
168
|
+
),
|
|
169
|
+
};
|
|
170
|
+
} catch (error) {
|
|
171
|
+
const message =
|
|
172
|
+
error instanceof Error && error.message.trim()
|
|
173
|
+
? error.message.trim()
|
|
174
|
+
: "runtime replay failed before producing a routing decision";
|
|
175
|
+
return {
|
|
176
|
+
result: null,
|
|
177
|
+
fallbackReason: `real host/runtime replay failed: ${message}`,
|
|
178
|
+
};
|
|
136
179
|
}
|
|
137
|
-
|
|
138
|
-
// FALLBACK path: Fixture-backed replay (surface similarity matching)
|
|
139
|
-
const beforeResults = runHostReplayFixture({
|
|
140
|
-
routing: originalContent,
|
|
141
|
-
evalSet,
|
|
142
|
-
fixture,
|
|
143
|
-
});
|
|
144
|
-
const afterResults = runHostReplayFixture({
|
|
145
|
-
routing: proposedContent,
|
|
146
|
-
evalSet,
|
|
147
|
-
fixture,
|
|
148
|
-
});
|
|
149
|
-
|
|
150
|
-
return computeReplayResult(
|
|
151
|
-
beforeResults,
|
|
152
|
-
afterResults,
|
|
153
|
-
total,
|
|
154
|
-
"fixture_replay",
|
|
155
|
-
agent,
|
|
156
|
-
fixture.fixture_id,
|
|
157
|
-
);
|
|
158
180
|
}
|