selftune 0.2.30 → 0.2.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -56
- package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
- package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/command-surface.ts +613 -2
- package/cli/selftune/create/baseline.ts +429 -0
- package/cli/selftune/create/check.ts +35 -0
- package/cli/selftune/create/init.ts +115 -0
- package/cli/selftune/create/package-candidate-state.ts +771 -0
- package/cli/selftune/create/package-evaluator.ts +710 -0
- package/cli/selftune/create/package-fingerprint.ts +142 -0
- package/cli/selftune/create/package-search.ts +377 -0
- package/cli/selftune/create/publish.ts +431 -0
- package/cli/selftune/create/readiness.ts +495 -0
- package/cli/selftune/create/replay.ts +330 -0
- package/cli/selftune/create/report.ts +74 -0
- package/cli/selftune/create/scaffold.ts +121 -0
- package/cli/selftune/create/skills-ref-adapter.ts +177 -0
- package/cli/selftune/create/status.ts +33 -0
- package/cli/selftune/create/templates.ts +249 -0
- package/cli/selftune/cron/setup.ts +1 -1
- package/cli/selftune/dashboard-action-events.ts +4 -1
- package/cli/selftune/dashboard-action-result.ts +789 -24
- package/cli/selftune/dashboard-action-stream.ts +80 -0
- package/cli/selftune/dashboard-contract.ts +146 -3
- package/cli/selftune/dashboard-server.ts +5 -4
- package/cli/selftune/eval/hooks-to-evals.ts +58 -35
- package/cli/selftune/eval/synthetic-evals.ts +145 -17
- package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
- package/cli/selftune/evolution/evolve-body.ts +9 -36
- package/cli/selftune/evolution/evolve.ts +8 -72
- package/cli/selftune/evolution/stopping-criteria.ts +5 -13
- package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
- package/cli/selftune/evolution/validate-host-replay.ts +115 -15
- package/cli/selftune/improve.ts +206 -0
- package/cli/selftune/index.ts +123 -6
- package/cli/selftune/init.ts +1 -1
- package/cli/selftune/localdb/queries/dashboard.ts +30 -0
- package/cli/selftune/localdb/schema.ts +52 -0
- package/cli/selftune/monitoring/watch.ts +257 -23
- package/cli/selftune/orchestrate/execute.ts +300 -1
- package/cli/selftune/orchestrate/finalize.ts +14 -0
- package/cli/selftune/orchestrate/plan.ts +22 -5
- package/cli/selftune/orchestrate/prepare.ts +59 -4
- package/cli/selftune/orchestrate/report.ts +1 -1
- package/cli/selftune/orchestrate.ts +34 -1
- package/cli/selftune/publish.ts +35 -0
- package/cli/selftune/registry/github-install.ts +256 -0
- package/cli/selftune/registry/index.ts +1 -1
- package/cli/selftune/registry/install.ts +58 -7
- package/cli/selftune/routes/actions.ts +81 -15
- package/cli/selftune/routes/overview.ts +1 -1
- package/cli/selftune/routes/skill-report.ts +147 -2
- package/cli/selftune/run.ts +18 -0
- package/cli/selftune/schedule.ts +3 -3
- package/cli/selftune/search-run.ts +703 -0
- package/cli/selftune/status.ts +35 -11
- package/cli/selftune/testing-readiness.ts +431 -40
- package/cli/selftune/types.ts +316 -0
- package/cli/selftune/utils/eval-readiness.ts +1 -0
- package/cli/selftune/utils/json-output.ts +11 -0
- package/cli/selftune/utils/lifecycle-surface.ts +48 -0
- package/cli/selftune/utils/query-filter.ts +82 -1
- package/cli/selftune/utils/tui.ts +85 -2
- package/cli/selftune/verify.ts +205 -0
- package/cli/selftune/workflows/proposals.ts +1 -1
- package/cli/selftune/workflows/skill-scaffold.ts +141 -63
- package/cli/selftune/workflows/workflows.ts +4 -4
- package/package.json +1 -1
- package/packages/dashboard-core/src/routes/manifest.ts +2 -2
- package/packages/ui/src/components/SkillReportPanels.tsx +7 -7
- package/packages/ui/src/primitives/button.tsx +5 -0
- package/skill/SKILL.md +148 -85
- package/skill/references/cli-quick-reference.md +16 -1
- package/skill/references/creator-playbook.md +31 -10
- package/skill/workflows/Baseline.md +8 -9
- package/skill/workflows/Contributions.md +4 -4
- package/skill/workflows/Create.md +173 -0
- package/skill/workflows/CreateTestDeploy.md +34 -30
- package/skill/workflows/Cron.md +2 -2
- package/skill/workflows/Dashboard.md +3 -3
- package/skill/workflows/Evals.md +13 -7
- package/skill/workflows/Evolve.md +75 -32
- package/skill/workflows/EvolveBody.md +22 -15
- package/skill/workflows/Hook.md +1 -1
- package/skill/workflows/Improve.md +168 -0
- package/skill/workflows/Initialize.md +3 -3
- package/skill/workflows/Orchestrate.md +49 -12
- package/skill/workflows/Publish.md +100 -0
- package/skill/workflows/Registry.md +19 -13
- package/skill/workflows/Run.md +72 -0
- package/skill/workflows/Schedule.md +2 -2
- package/skill/workflows/SearchRun.md +89 -0
- package/skill/workflows/SignalsDashboard.md +2 -2
- package/skill/workflows/UnitTest.md +13 -4
- package/skill/workflows/Verify.md +136 -0
- package/skill/workflows/Watch.md +114 -47
- package/skill/workflows/Workflows.md +13 -8
- package/apps/local-dashboard/dist/assets/index-BcXquWFB.css +0 -1
- package/apps/local-dashboard/dist/assets/index-Coq42hE4.js +0 -15
- package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1
|
@@ -0,0 +1,703 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { basename } from "node:path";
|
|
3
|
+
import { parseArgs } from "node:util";
|
|
4
|
+
import type { Database } from "bun:sqlite";
|
|
5
|
+
|
|
6
|
+
import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "./command-surface.js";
|
|
7
|
+
import {
|
|
8
|
+
readPackageCandidateArtifact,
|
|
9
|
+
selectAcceptedPackageFrontierCandidate,
|
|
10
|
+
} from "./create/package-candidate-state.js";
|
|
11
|
+
import type { CreatePackageEvaluationResult } from "./create/package-evaluator.js";
|
|
12
|
+
import { resolveCreateSkillPath } from "./create/readiness.js";
|
|
13
|
+
import { computeCreatePackageFingerprint } from "./create/package-fingerprint.js";
|
|
14
|
+
import { runPackageSearch } from "./create/package-search.js";
|
|
15
|
+
import {
|
|
16
|
+
type BoundedMutationResult,
|
|
17
|
+
cleanupVariants,
|
|
18
|
+
extractMutationWeaknesses,
|
|
19
|
+
generateBodyMutations,
|
|
20
|
+
generateReflectiveBodyMutations,
|
|
21
|
+
generateReflectiveRoutingMutations,
|
|
22
|
+
generateRoutingMutations,
|
|
23
|
+
generateTargetedBodyMutations,
|
|
24
|
+
generateTargetedRoutingMutations,
|
|
25
|
+
} from "./evolution/bounded-mutations.js";
|
|
26
|
+
import { getDb } from "./localdb/db.js";
|
|
27
|
+
import {
|
|
28
|
+
readCanonicalPackageEvaluationArtifact,
|
|
29
|
+
writeCanonicalPackageEvaluation,
|
|
30
|
+
writeCanonicalPackageEvaluationArtifact,
|
|
31
|
+
} from "./testing-readiness.js";
|
|
32
|
+
import type { CreatePackageEvaluationSummary, PackageSearchRunResult } from "./types.js";
|
|
33
|
+
import { CLIError, handleCLIError } from "./utils/cli-error.js";
|
|
34
|
+
|
|
35
|
+
type SearchSurface = "routing" | "body" | "both";
|
|
36
|
+
|
|
37
|
+
export interface SearchRunVariant {
|
|
38
|
+
skill_path: string;
|
|
39
|
+
mutation_surface: "routing" | "body";
|
|
40
|
+
mutation_description: string;
|
|
41
|
+
fingerprint: string;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
interface SearchSurfacePlan {
|
|
45
|
+
routing_count: number;
|
|
46
|
+
body_count: number;
|
|
47
|
+
weakness_source: "accepted_frontier" | "canonical_package_evaluation" | "default_even_split";
|
|
48
|
+
routing_weakness: number | null;
|
|
49
|
+
body_weakness: number | null;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface SearchRunVariantGenerationDeps {
|
|
53
|
+
extractMutationWeaknesses?: typeof extractMutationWeaknesses;
|
|
54
|
+
generateReflectiveRoutingMutations?: typeof generateReflectiveRoutingMutations;
|
|
55
|
+
generateReflectiveBodyMutations?: typeof generateReflectiveBodyMutations;
|
|
56
|
+
generateRoutingMutations?: typeof generateRoutingMutations;
|
|
57
|
+
generateBodyMutations?: typeof generateBodyMutations;
|
|
58
|
+
generateTargetedRoutingMutations?: typeof generateTargetedRoutingMutations;
|
|
59
|
+
generateTargetedBodyMutations?: typeof generateTargetedBodyMutations;
|
|
60
|
+
computeCreatePackageFingerprint?: typeof computeCreatePackageFingerprint;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface GeneratedSearchRunVariants {
|
|
64
|
+
generated_variants: SearchRunVariant[];
|
|
65
|
+
cleanup_variants: BoundedMutationResult[];
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export interface ApplySearchRunWinnerResult {
|
|
69
|
+
applied_winner: boolean;
|
|
70
|
+
applied_candidate_id: string | null;
|
|
71
|
+
next_command: string | null;
|
|
72
|
+
package_evaluation: CreatePackageEvaluationSummary | null;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export interface ApplySearchRunWinnerDeps {
|
|
76
|
+
readPackageCandidateArtifact?: typeof readPackageCandidateArtifact;
|
|
77
|
+
readSkillContent?: (skillPath: string) => string;
|
|
78
|
+
writeSkillContent?: (skillPath: string, content: string) => void;
|
|
79
|
+
writeCanonicalPackageEvaluation?: typeof writeCanonicalPackageEvaluation;
|
|
80
|
+
writeCanonicalPackageEvaluationArtifact?: typeof writeCanonicalPackageEvaluationArtifact;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
interface SearchRunPayload extends PackageSearchRunResult, ApplySearchRunWinnerResult {
|
|
84
|
+
generated_variants: SearchRunVariant[];
|
|
85
|
+
improved: boolean;
|
|
86
|
+
surface_plan: SearchSurfacePlan;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function inferSkillName(skillPath: string): string {
|
|
90
|
+
return basename(resolveCreateSkillPath(skillPath).skill_dir);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function readSurface(rawSurface: string | undefined): SearchSurface {
|
|
94
|
+
const surface = (rawSurface ?? "both") as SearchSurface;
|
|
95
|
+
if (!["routing", "body", "both"].includes(surface)) {
|
|
96
|
+
throw new CLIError(
|
|
97
|
+
`Invalid --surface value: ${rawSurface}`,
|
|
98
|
+
"INVALID_FLAG",
|
|
99
|
+
"Use one of: routing, body, both",
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
return surface;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function readMaxCandidates(rawMax: string | undefined): number {
|
|
106
|
+
if (rawMax == null) return 5;
|
|
107
|
+
if (!/^[1-9]\d*$/.test(rawMax)) {
|
|
108
|
+
throw new CLIError(
|
|
109
|
+
"Invalid --max-candidates value. Use a positive integer.",
|
|
110
|
+
"INVALID_FLAG",
|
|
111
|
+
"selftune search-run --skill-path <path> --max-candidates 5",
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
return Number(rawMax);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function computeRoutingWeakness(summary: CreatePackageEvaluationSummary): number {
|
|
118
|
+
const routingPassRate = summary.routing?.pass_rate ?? summary.replay.pass_rate;
|
|
119
|
+
return Math.min(1, Math.max(0, 1 - routingPassRate));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export function computeBodyWeakness(summary: CreatePackageEvaluationSummary): number {
|
|
123
|
+
if (!summary.body) return 0.5;
|
|
124
|
+
if (!summary.body.valid) return 1;
|
|
125
|
+
if (summary.body.quality_score == null) return 0.5;
|
|
126
|
+
return Math.min(1, Math.max(0, 1 - summary.body.quality_score));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function readMeasuredSurfaceWeakness(
|
|
130
|
+
skillName: string,
|
|
131
|
+
db: Database,
|
|
132
|
+
): Omit<SearchSurfacePlan, "routing_count" | "body_count"> {
|
|
133
|
+
const frontierParent = selectAcceptedPackageFrontierCandidate(skillName, { db });
|
|
134
|
+
if (frontierParent) {
|
|
135
|
+
return {
|
|
136
|
+
weakness_source: "accepted_frontier",
|
|
137
|
+
routing_weakness: computeRoutingWeakness(frontierParent.summary),
|
|
138
|
+
body_weakness: computeBodyWeakness(frontierParent.summary),
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const canonicalEvaluation = readCanonicalPackageEvaluationArtifact(skillName)?.summary ?? null;
|
|
143
|
+
if (canonicalEvaluation) {
|
|
144
|
+
return {
|
|
145
|
+
weakness_source: "canonical_package_evaluation",
|
|
146
|
+
routing_weakness: computeRoutingWeakness(canonicalEvaluation),
|
|
147
|
+
body_weakness: computeBodyWeakness(canonicalEvaluation),
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return {
|
|
152
|
+
weakness_source: "default_even_split",
|
|
153
|
+
routing_weakness: null,
|
|
154
|
+
body_weakness: null,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export function planVariantCounts(
|
|
159
|
+
surface: SearchSurface,
|
|
160
|
+
maxCandidates: number,
|
|
161
|
+
weakness: {
|
|
162
|
+
weakness_source: SearchSurfacePlan["weakness_source"];
|
|
163
|
+
routing_weakness: number | null;
|
|
164
|
+
body_weakness: number | null;
|
|
165
|
+
} = {
|
|
166
|
+
weakness_source: "default_even_split",
|
|
167
|
+
routing_weakness: null,
|
|
168
|
+
body_weakness: null,
|
|
169
|
+
},
|
|
170
|
+
): SearchSurfacePlan {
|
|
171
|
+
if (surface === "routing") {
|
|
172
|
+
return {
|
|
173
|
+
routing_count: maxCandidates,
|
|
174
|
+
body_count: 0,
|
|
175
|
+
...weakness,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
if (surface === "body") {
|
|
179
|
+
return {
|
|
180
|
+
routing_count: 0,
|
|
181
|
+
body_count: maxCandidates,
|
|
182
|
+
...weakness,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const routingWeakness = weakness.routing_weakness;
|
|
187
|
+
const bodyWeakness = weakness.body_weakness;
|
|
188
|
+
if (routingWeakness == null || bodyWeakness == null || maxCandidates <= 1) {
|
|
189
|
+
return {
|
|
190
|
+
routing_count: Math.ceil(maxCandidates / 2),
|
|
191
|
+
body_count: Math.floor(maxCandidates / 2),
|
|
192
|
+
...weakness,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const totalWeakness = routingWeakness + bodyWeakness;
|
|
197
|
+
if (totalWeakness <= Number.EPSILON) {
|
|
198
|
+
return {
|
|
199
|
+
routing_count: Math.ceil(maxCandidates / 2),
|
|
200
|
+
body_count: Math.floor(maxCandidates / 2),
|
|
201
|
+
...weakness,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const baseCount = maxCandidates >= 2 ? 1 : 0;
|
|
206
|
+
const remaining = Math.max(0, maxCandidates - baseCount * 2);
|
|
207
|
+
const routingExtra = Math.round((remaining * routingWeakness) / totalWeakness);
|
|
208
|
+
const routingCount = baseCount + routingExtra;
|
|
209
|
+
return {
|
|
210
|
+
routing_count: routingCount,
|
|
211
|
+
body_count: maxCandidates - routingCount,
|
|
212
|
+
...weakness,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function selectUniqueVariants(
|
|
217
|
+
variants: readonly BoundedMutationResult[],
|
|
218
|
+
count: number,
|
|
219
|
+
seenFingerprints: Set<string>,
|
|
220
|
+
deps: SearchRunVariantGenerationDeps,
|
|
221
|
+
): SearchRunVariant[] {
|
|
222
|
+
const selected: SearchRunVariant[] = [];
|
|
223
|
+
const computeFingerprint =
|
|
224
|
+
deps.computeCreatePackageFingerprint ?? computeCreatePackageFingerprint;
|
|
225
|
+
|
|
226
|
+
for (const variant of variants) {
|
|
227
|
+
if (selected.length >= count) {
|
|
228
|
+
break;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const fingerprint = computeFingerprint(variant.variantSkillPath);
|
|
232
|
+
if (!fingerprint) {
|
|
233
|
+
throw new CLIError(
|
|
234
|
+
`Failed to fingerprint ${variant.mutationSurface} variant at ${variant.variantSkillPath}`,
|
|
235
|
+
"RUNTIME_ERROR",
|
|
236
|
+
);
|
|
237
|
+
}
|
|
238
|
+
if (seenFingerprints.has(fingerprint)) {
|
|
239
|
+
continue;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
seenFingerprints.add(fingerprint);
|
|
243
|
+
selected.push({
|
|
244
|
+
skill_path: variant.variantSkillPath,
|
|
245
|
+
mutation_surface: variant.mutationSurface,
|
|
246
|
+
mutation_description: variant.mutationDescription,
|
|
247
|
+
fingerprint,
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return selected;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
async function generateSurfaceVariants(
|
|
255
|
+
skillPath: string,
|
|
256
|
+
skillName: string,
|
|
257
|
+
targetCount: number,
|
|
258
|
+
surface: "routing" | "body",
|
|
259
|
+
agent: string | undefined,
|
|
260
|
+
db: Database,
|
|
261
|
+
seenFingerprints: Set<string>,
|
|
262
|
+
deps: SearchRunVariantGenerationDeps,
|
|
263
|
+
): Promise<GeneratedSearchRunVariants> {
|
|
264
|
+
if (targetCount <= 0) {
|
|
265
|
+
return {
|
|
266
|
+
generated_variants: [],
|
|
267
|
+
cleanup_variants: [],
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const extractWeaknesses = deps.extractMutationWeaknesses ?? extractMutationWeaknesses;
|
|
272
|
+
const weaknesses = extractWeaknesses(skillName, db);
|
|
273
|
+
const cleanupResults: BoundedMutationResult[] = [];
|
|
274
|
+
|
|
275
|
+
try {
|
|
276
|
+
if (agent) {
|
|
277
|
+
const reflectiveVariants = await (async () => {
|
|
278
|
+
try {
|
|
279
|
+
return surface === "routing"
|
|
280
|
+
? await (deps.generateReflectiveRoutingMutations ?? generateReflectiveRoutingMutations)(
|
|
281
|
+
skillPath,
|
|
282
|
+
weaknesses,
|
|
283
|
+
{
|
|
284
|
+
maxVariants: 1,
|
|
285
|
+
skillName,
|
|
286
|
+
agent,
|
|
287
|
+
},
|
|
288
|
+
)
|
|
289
|
+
: await (deps.generateReflectiveBodyMutations ?? generateReflectiveBodyMutations)(
|
|
290
|
+
skillPath,
|
|
291
|
+
weaknesses,
|
|
292
|
+
{
|
|
293
|
+
maxVariants: 1,
|
|
294
|
+
skillName,
|
|
295
|
+
agent,
|
|
296
|
+
},
|
|
297
|
+
);
|
|
298
|
+
} catch {
|
|
299
|
+
return [];
|
|
300
|
+
}
|
|
301
|
+
})();
|
|
302
|
+
cleanupResults.push(...reflectiveVariants);
|
|
303
|
+
|
|
304
|
+
const reflectiveSelections = selectUniqueVariants(
|
|
305
|
+
reflectiveVariants,
|
|
306
|
+
targetCount,
|
|
307
|
+
seenFingerprints,
|
|
308
|
+
deps,
|
|
309
|
+
);
|
|
310
|
+
if (reflectiveSelections.length >= targetCount) {
|
|
311
|
+
return {
|
|
312
|
+
generated_variants: reflectiveSelections,
|
|
313
|
+
cleanup_variants: cleanupResults,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
const remainingAfterReflective = targetCount - reflectiveSelections.length;
|
|
318
|
+
const targetedVariants =
|
|
319
|
+
surface === "routing"
|
|
320
|
+
? (deps.generateTargetedRoutingMutations ?? generateTargetedRoutingMutations)(
|
|
321
|
+
skillPath,
|
|
322
|
+
weaknesses,
|
|
323
|
+
{
|
|
324
|
+
maxVariants: remainingAfterReflective,
|
|
325
|
+
},
|
|
326
|
+
)
|
|
327
|
+
: (deps.generateTargetedBodyMutations ?? generateTargetedBodyMutations)(
|
|
328
|
+
skillPath,
|
|
329
|
+
weaknesses,
|
|
330
|
+
{
|
|
331
|
+
maxVariants: remainingAfterReflective,
|
|
332
|
+
},
|
|
333
|
+
);
|
|
334
|
+
cleanupResults.push(...targetedVariants);
|
|
335
|
+
|
|
336
|
+
reflectiveSelections.push(
|
|
337
|
+
...selectUniqueVariants(targetedVariants, remainingAfterReflective, seenFingerprints, deps),
|
|
338
|
+
);
|
|
339
|
+
if (reflectiveSelections.length >= targetCount) {
|
|
340
|
+
return {
|
|
341
|
+
generated_variants: reflectiveSelections,
|
|
342
|
+
cleanup_variants: cleanupResults,
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
const deterministicOptions = {
|
|
347
|
+
mutationSurface: surface,
|
|
348
|
+
maxVariants: targetCount,
|
|
349
|
+
parentSkillPath: skillPath,
|
|
350
|
+
agent,
|
|
351
|
+
} as const;
|
|
352
|
+
const deterministicVariants =
|
|
353
|
+
surface === "routing"
|
|
354
|
+
? await (deps.generateRoutingMutations ?? generateRoutingMutations)(
|
|
355
|
+
skillPath,
|
|
356
|
+
deterministicOptions,
|
|
357
|
+
)
|
|
358
|
+
: await (deps.generateBodyMutations ?? generateBodyMutations)(
|
|
359
|
+
skillPath,
|
|
360
|
+
deterministicOptions,
|
|
361
|
+
);
|
|
362
|
+
cleanupResults.push(...deterministicVariants);
|
|
363
|
+
reflectiveSelections.push(
|
|
364
|
+
...selectUniqueVariants(
|
|
365
|
+
deterministicVariants,
|
|
366
|
+
targetCount - reflectiveSelections.length,
|
|
367
|
+
seenFingerprints,
|
|
368
|
+
deps,
|
|
369
|
+
),
|
|
370
|
+
);
|
|
371
|
+
|
|
372
|
+
return {
|
|
373
|
+
generated_variants: reflectiveSelections,
|
|
374
|
+
cleanup_variants: cleanupResults,
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
const targetedVariants =
|
|
379
|
+
surface === "routing"
|
|
380
|
+
? (deps.generateTargetedRoutingMutations ?? generateTargetedRoutingMutations)(
|
|
381
|
+
skillPath,
|
|
382
|
+
weaknesses,
|
|
383
|
+
{
|
|
384
|
+
maxVariants: targetCount,
|
|
385
|
+
},
|
|
386
|
+
)
|
|
387
|
+
: (deps.generateTargetedBodyMutations ?? generateTargetedBodyMutations)(
|
|
388
|
+
skillPath,
|
|
389
|
+
weaknesses,
|
|
390
|
+
{
|
|
391
|
+
maxVariants: targetCount,
|
|
392
|
+
},
|
|
393
|
+
);
|
|
394
|
+
cleanupResults.push(...targetedVariants);
|
|
395
|
+
|
|
396
|
+
const generatedVariants = selectUniqueVariants(
|
|
397
|
+
targetedVariants,
|
|
398
|
+
targetCount,
|
|
399
|
+
seenFingerprints,
|
|
400
|
+
deps,
|
|
401
|
+
);
|
|
402
|
+
if (generatedVariants.length >= targetCount) {
|
|
403
|
+
return {
|
|
404
|
+
generated_variants: generatedVariants,
|
|
405
|
+
cleanup_variants: cleanupResults,
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
const deterministicOptions = {
|
|
410
|
+
mutationSurface: surface,
|
|
411
|
+
maxVariants: targetCount,
|
|
412
|
+
parentSkillPath: skillPath,
|
|
413
|
+
agent,
|
|
414
|
+
} as const;
|
|
415
|
+
const deterministicVariants =
|
|
416
|
+
surface === "routing"
|
|
417
|
+
? await (deps.generateRoutingMutations ?? generateRoutingMutations)(
|
|
418
|
+
skillPath,
|
|
419
|
+
deterministicOptions,
|
|
420
|
+
)
|
|
421
|
+
: await (deps.generateBodyMutations ?? generateBodyMutations)(
|
|
422
|
+
skillPath,
|
|
423
|
+
deterministicOptions,
|
|
424
|
+
);
|
|
425
|
+
cleanupResults.push(...deterministicVariants);
|
|
426
|
+
generatedVariants.push(
|
|
427
|
+
...selectUniqueVariants(
|
|
428
|
+
deterministicVariants,
|
|
429
|
+
targetCount - generatedVariants.length,
|
|
430
|
+
seenFingerprints,
|
|
431
|
+
deps,
|
|
432
|
+
),
|
|
433
|
+
);
|
|
434
|
+
|
|
435
|
+
return {
|
|
436
|
+
generated_variants: generatedVariants,
|
|
437
|
+
cleanup_variants: cleanupResults,
|
|
438
|
+
};
|
|
439
|
+
} catch (error) {
|
|
440
|
+
cleanupVariants(cleanupResults);
|
|
441
|
+
throw error;
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
export async function generateSearchRunVariants(
|
|
446
|
+
skillPath: string,
|
|
447
|
+
skillName: string,
|
|
448
|
+
surfacePlan: SearchSurfacePlan,
|
|
449
|
+
agent: string | undefined,
|
|
450
|
+
db: Database,
|
|
451
|
+
deps: SearchRunVariantGenerationDeps = {},
|
|
452
|
+
): Promise<GeneratedSearchRunVariants> {
|
|
453
|
+
const seenFingerprints = new Set<string>();
|
|
454
|
+
const cleanupResults: BoundedMutationResult[] = [];
|
|
455
|
+
|
|
456
|
+
try {
|
|
457
|
+
const routingVariants = await generateSurfaceVariants(
|
|
458
|
+
skillPath,
|
|
459
|
+
skillName,
|
|
460
|
+
surfacePlan.routing_count,
|
|
461
|
+
"routing",
|
|
462
|
+
agent,
|
|
463
|
+
db,
|
|
464
|
+
seenFingerprints,
|
|
465
|
+
deps,
|
|
466
|
+
);
|
|
467
|
+
cleanupResults.push(...routingVariants.cleanup_variants);
|
|
468
|
+
|
|
469
|
+
const bodyVariants = await generateSurfaceVariants(
|
|
470
|
+
skillPath,
|
|
471
|
+
skillName,
|
|
472
|
+
surfacePlan.body_count,
|
|
473
|
+
"body",
|
|
474
|
+
agent,
|
|
475
|
+
db,
|
|
476
|
+
seenFingerprints,
|
|
477
|
+
deps,
|
|
478
|
+
);
|
|
479
|
+
cleanupResults.push(...bodyVariants.cleanup_variants);
|
|
480
|
+
|
|
481
|
+
return {
|
|
482
|
+
generated_variants: [
|
|
483
|
+
...routingVariants.generated_variants,
|
|
484
|
+
...bodyVariants.generated_variants,
|
|
485
|
+
],
|
|
486
|
+
cleanup_variants: cleanupResults,
|
|
487
|
+
};
|
|
488
|
+
} catch (error) {
|
|
489
|
+
cleanupVariants(cleanupResults);
|
|
490
|
+
throw error;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
function buildSearchRunNextCommand(skillPath: string, passed: boolean): string {
|
|
495
|
+
return passed
|
|
496
|
+
? `selftune publish --skill-path ${skillPath}`
|
|
497
|
+
: `selftune verify --skill-path ${skillPath}`;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
function normalizeCandidateEvaluationForSkillPath(
|
|
501
|
+
evaluation: CreatePackageEvaluationResult,
|
|
502
|
+
skillPath: string,
|
|
503
|
+
): CreatePackageEvaluationResult {
|
|
504
|
+
const nextCommand = buildSearchRunNextCommand(skillPath, evaluation.summary.evaluation_passed);
|
|
505
|
+
return {
|
|
506
|
+
...evaluation,
|
|
507
|
+
summary: {
|
|
508
|
+
...evaluation.summary,
|
|
509
|
+
skill_path: skillPath,
|
|
510
|
+
evaluation_source: "candidate_cache",
|
|
511
|
+
next_command: nextCommand,
|
|
512
|
+
},
|
|
513
|
+
replay: {
|
|
514
|
+
...evaluation.replay,
|
|
515
|
+
skill_path: skillPath,
|
|
516
|
+
},
|
|
517
|
+
};
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
export function applySearchRunWinner(
|
|
521
|
+
skillName: string,
|
|
522
|
+
targetSkillPath: string,
|
|
523
|
+
winnerCandidateId: string | null,
|
|
524
|
+
deps: ApplySearchRunWinnerDeps = {},
|
|
525
|
+
): ApplySearchRunWinnerResult {
|
|
526
|
+
if (!winnerCandidateId) {
|
|
527
|
+
return {
|
|
528
|
+
applied_winner: false,
|
|
529
|
+
applied_candidate_id: null,
|
|
530
|
+
next_command: null,
|
|
531
|
+
package_evaluation: null,
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
const candidateArtifact = (deps.readPackageCandidateArtifact ?? readPackageCandidateArtifact)(
|
|
536
|
+
skillName,
|
|
537
|
+
winnerCandidateId,
|
|
538
|
+
);
|
|
539
|
+
if (!candidateArtifact) {
|
|
540
|
+
throw new CLIError(
|
|
541
|
+
`Winner candidate artifact is missing for ${winnerCandidateId}.`,
|
|
542
|
+
"RUNTIME_ERROR",
|
|
543
|
+
"Re-run selftune search-run to regenerate candidate artifacts.",
|
|
544
|
+
);
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
const winnerSkillPath = candidateArtifact.summary.skill_path;
|
|
548
|
+
const winnerContent = (
|
|
549
|
+
deps.readSkillContent ?? ((skillPath) => readFileSync(skillPath, "utf-8"))
|
|
550
|
+
)(winnerSkillPath);
|
|
551
|
+
(deps.writeSkillContent ?? ((skillPath, content) => writeFileSync(skillPath, content, "utf-8")))(
|
|
552
|
+
targetSkillPath,
|
|
553
|
+
winnerContent,
|
|
554
|
+
);
|
|
555
|
+
|
|
556
|
+
const normalizedEvaluation = normalizeCandidateEvaluationForSkillPath(
|
|
557
|
+
candidateArtifact,
|
|
558
|
+
targetSkillPath,
|
|
559
|
+
);
|
|
560
|
+
(deps.writeCanonicalPackageEvaluationArtifact ?? writeCanonicalPackageEvaluationArtifact)(
|
|
561
|
+
skillName,
|
|
562
|
+
normalizedEvaluation,
|
|
563
|
+
);
|
|
564
|
+
(deps.writeCanonicalPackageEvaluation ?? writeCanonicalPackageEvaluation)(
|
|
565
|
+
skillName,
|
|
566
|
+
normalizedEvaluation.summary,
|
|
567
|
+
);
|
|
568
|
+
|
|
569
|
+
return {
|
|
570
|
+
applied_winner: true,
|
|
571
|
+
applied_candidate_id: winnerCandidateId,
|
|
572
|
+
next_command: normalizedEvaluation.summary.next_command,
|
|
573
|
+
package_evaluation: normalizedEvaluation.summary,
|
|
574
|
+
};
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
function formatSearchRunSummary(result: SearchRunPayload): string {
|
|
578
|
+
const lines = [
|
|
579
|
+
`Bounded package search complete for ${result.skill_name}`,
|
|
580
|
+
"",
|
|
581
|
+
`Candidates evaluated: ${result.candidates_evaluated}`,
|
|
582
|
+
`Surface plan: routing ${result.surface_plan.routing_count}, body ${result.surface_plan.body_count}`,
|
|
583
|
+
`Frontier size: ${result.provenance.frontier_size}`,
|
|
584
|
+
`Parent selection: ${result.provenance.parent_selection_method}`,
|
|
585
|
+
`Parent candidate: ${result.parent_candidate_id ?? "root"}`,
|
|
586
|
+
`Winner candidate: ${result.winner_candidate_id ?? "none"}`,
|
|
587
|
+
];
|
|
588
|
+
if (result.surface_plan.weakness_source !== "default_even_split") {
|
|
589
|
+
lines.push(
|
|
590
|
+
`Surface evidence: ${result.surface_plan.weakness_source} (routing ${result.surface_plan.routing_weakness?.toFixed(2) ?? "n/a"}, body ${result.surface_plan.body_weakness?.toFixed(2) ?? "n/a"})`,
|
|
591
|
+
);
|
|
592
|
+
}
|
|
593
|
+
if (result.winner_rationale) {
|
|
594
|
+
lines.push(`Winner rationale: ${result.winner_rationale}`);
|
|
595
|
+
}
|
|
596
|
+
if (result.applied_winner) {
|
|
597
|
+
lines.push(`Winner applied: ${result.applied_candidate_id ?? result.winner_candidate_id}`);
|
|
598
|
+
}
|
|
599
|
+
if (result.next_command) {
|
|
600
|
+
lines.push(`Next command: ${result.next_command}`);
|
|
601
|
+
}
|
|
602
|
+
return lines.join("\n");
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
export async function cliMain(): Promise<void> {
|
|
606
|
+
const { values } = parseArgs({
|
|
607
|
+
options: {
|
|
608
|
+
skill: { type: "string" },
|
|
609
|
+
"skill-path": { type: "string" },
|
|
610
|
+
surface: { type: "string" },
|
|
611
|
+
"max-candidates": { type: "string" },
|
|
612
|
+
agent: { type: "string" },
|
|
613
|
+
"eval-set": { type: "string" },
|
|
614
|
+
"apply-winner": { type: "boolean", default: false },
|
|
615
|
+
json: { type: "boolean", default: false },
|
|
616
|
+
help: { type: "boolean", short: "h", default: false },
|
|
617
|
+
},
|
|
618
|
+
strict: true,
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
if (values.help) {
|
|
622
|
+
console.log(renderCommandHelp(PUBLIC_COMMAND_SURFACES.searchRun));
|
|
623
|
+
process.exit(0);
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
const skillPathArg = values["skill-path"] ?? "";
|
|
627
|
+
if (!skillPathArg.trim()) {
|
|
628
|
+
throw new CLIError(
|
|
629
|
+
"--skill-path <path> is required.",
|
|
630
|
+
"MISSING_FLAG",
|
|
631
|
+
"selftune search-run --skill-path <path>",
|
|
632
|
+
);
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
const surface = readSurface(values.surface);
|
|
636
|
+
const maxCandidates = readMaxCandidates(values["max-candidates"]);
|
|
637
|
+
const skillPath = resolveCreateSkillPath(skillPathArg).skill_path;
|
|
638
|
+
const skill = values.skill?.trim() || inferSkillName(skillPath);
|
|
639
|
+
const db = getDb();
|
|
640
|
+
const surfacePlan = planVariantCounts(
|
|
641
|
+
surface,
|
|
642
|
+
maxCandidates,
|
|
643
|
+
surface === "both" ? readMeasuredSurfaceWeakness(skill, db) : undefined,
|
|
644
|
+
);
|
|
645
|
+
let generatedVariants: SearchRunVariant[] = [];
|
|
646
|
+
let cleanupGeneratedVariants: BoundedMutationResult[] = [];
|
|
647
|
+
|
|
648
|
+
try {
|
|
649
|
+
const preparedVariants = await generateSearchRunVariants(
|
|
650
|
+
skillPath,
|
|
651
|
+
skill,
|
|
652
|
+
surfacePlan,
|
|
653
|
+
values.agent,
|
|
654
|
+
db,
|
|
655
|
+
);
|
|
656
|
+
generatedVariants = preparedVariants.generated_variants;
|
|
657
|
+
cleanupGeneratedVariants = preparedVariants.cleanup_variants;
|
|
658
|
+
|
|
659
|
+
const result = await runPackageSearch({
|
|
660
|
+
skill_name: skill,
|
|
661
|
+
candidate_paths: generatedVariants.map((variant) => ({
|
|
662
|
+
skill_path: variant.skill_path,
|
|
663
|
+
fingerprint: variant.fingerprint,
|
|
664
|
+
})),
|
|
665
|
+
max_candidates: maxCandidates,
|
|
666
|
+
surface_plan: surfacePlan,
|
|
667
|
+
agent: values.agent,
|
|
668
|
+
evalSetPath: values["eval-set"],
|
|
669
|
+
db,
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
const winnerApplication = values["apply-winner"]
|
|
673
|
+
? applySearchRunWinner(skill, skillPath, result.winner_candidate_id)
|
|
674
|
+
: {
|
|
675
|
+
applied_winner: false,
|
|
676
|
+
applied_candidate_id: null,
|
|
677
|
+
next_command: null,
|
|
678
|
+
package_evaluation: null,
|
|
679
|
+
};
|
|
680
|
+
|
|
681
|
+
const payload: SearchRunPayload = {
|
|
682
|
+
...result,
|
|
683
|
+
generated_variants: generatedVariants,
|
|
684
|
+
improved: result.winner_candidate_id != null,
|
|
685
|
+
surface_plan: surfacePlan,
|
|
686
|
+
...winnerApplication,
|
|
687
|
+
};
|
|
688
|
+
|
|
689
|
+
if (values.json || !process.stdout.isTTY) {
|
|
690
|
+
console.log(JSON.stringify(payload, null, 2));
|
|
691
|
+
} else {
|
|
692
|
+
console.log(formatSearchRunSummary(payload));
|
|
693
|
+
}
|
|
694
|
+
} finally {
|
|
695
|
+
cleanupVariants(cleanupGeneratedVariants);
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
process.exit(0);
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
if (import.meta.main) {
|
|
702
|
+
cliMain().catch(handleCLIError);
|
|
703
|
+
}
|