selftune 0.2.30 → 0.2.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -56
- package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
- package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/command-surface.ts +613 -2
- package/cli/selftune/create/baseline.ts +429 -0
- package/cli/selftune/create/check.ts +35 -0
- package/cli/selftune/create/init.ts +115 -0
- package/cli/selftune/create/package-candidate-state.ts +771 -0
- package/cli/selftune/create/package-evaluator.ts +710 -0
- package/cli/selftune/create/package-fingerprint.ts +142 -0
- package/cli/selftune/create/package-search.ts +377 -0
- package/cli/selftune/create/publish.ts +431 -0
- package/cli/selftune/create/readiness.ts +495 -0
- package/cli/selftune/create/replay.ts +330 -0
- package/cli/selftune/create/report.ts +74 -0
- package/cli/selftune/create/scaffold.ts +121 -0
- package/cli/selftune/create/skills-ref-adapter.ts +177 -0
- package/cli/selftune/create/status.ts +33 -0
- package/cli/selftune/create/templates.ts +249 -0
- package/cli/selftune/cron/setup.ts +1 -1
- package/cli/selftune/dashboard-action-events.ts +4 -1
- package/cli/selftune/dashboard-action-result.ts +789 -24
- package/cli/selftune/dashboard-action-stream.ts +80 -0
- package/cli/selftune/dashboard-contract.ts +146 -3
- package/cli/selftune/dashboard-server.ts +5 -4
- package/cli/selftune/eval/hooks-to-evals.ts +58 -35
- package/cli/selftune/eval/synthetic-evals.ts +145 -17
- package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
- package/cli/selftune/evolution/evolve-body.ts +9 -36
- package/cli/selftune/evolution/evolve.ts +8 -72
- package/cli/selftune/evolution/stopping-criteria.ts +5 -13
- package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
- package/cli/selftune/evolution/validate-host-replay.ts +115 -15
- package/cli/selftune/improve.ts +206 -0
- package/cli/selftune/index.ts +123 -6
- package/cli/selftune/init.ts +1 -1
- package/cli/selftune/localdb/queries/dashboard.ts +30 -0
- package/cli/selftune/localdb/schema.ts +52 -0
- package/cli/selftune/monitoring/watch.ts +257 -23
- package/cli/selftune/orchestrate/execute.ts +300 -1
- package/cli/selftune/orchestrate/finalize.ts +14 -0
- package/cli/selftune/orchestrate/plan.ts +22 -5
- package/cli/selftune/orchestrate/prepare.ts +59 -4
- package/cli/selftune/orchestrate/report.ts +1 -1
- package/cli/selftune/orchestrate.ts +34 -1
- package/cli/selftune/publish.ts +35 -0
- package/cli/selftune/registry/github-install.ts +256 -0
- package/cli/selftune/registry/index.ts +1 -1
- package/cli/selftune/registry/install.ts +58 -7
- package/cli/selftune/routes/actions.ts +81 -15
- package/cli/selftune/routes/overview.ts +1 -1
- package/cli/selftune/routes/skill-report.ts +147 -2
- package/cli/selftune/run.ts +18 -0
- package/cli/selftune/schedule.ts +3 -3
- package/cli/selftune/search-run.ts +703 -0
- package/cli/selftune/status.ts +35 -11
- package/cli/selftune/testing-readiness.ts +431 -40
- package/cli/selftune/types.ts +316 -0
- package/cli/selftune/utils/eval-readiness.ts +1 -0
- package/cli/selftune/utils/json-output.ts +11 -0
- package/cli/selftune/utils/lifecycle-surface.ts +48 -0
- package/cli/selftune/utils/query-filter.ts +82 -1
- package/cli/selftune/utils/tui.ts +85 -2
- package/cli/selftune/verify.ts +205 -0
- package/cli/selftune/workflows/proposals.ts +1 -1
- package/cli/selftune/workflows/skill-scaffold.ts +141 -63
- package/cli/selftune/workflows/workflows.ts +4 -4
- package/package.json +1 -1
- package/packages/dashboard-core/src/routes/manifest.ts +2 -2
- package/packages/ui/src/components/SkillReportPanels.tsx +7 -7
- package/packages/ui/src/primitives/button.tsx +5 -0
- package/skill/SKILL.md +148 -85
- package/skill/references/cli-quick-reference.md +16 -1
- package/skill/references/creator-playbook.md +31 -10
- package/skill/workflows/Baseline.md +8 -9
- package/skill/workflows/Contributions.md +4 -4
- package/skill/workflows/Create.md +173 -0
- package/skill/workflows/CreateTestDeploy.md +34 -30
- package/skill/workflows/Cron.md +2 -2
- package/skill/workflows/Dashboard.md +3 -3
- package/skill/workflows/Evals.md +13 -7
- package/skill/workflows/Evolve.md +75 -32
- package/skill/workflows/EvolveBody.md +22 -15
- package/skill/workflows/Hook.md +1 -1
- package/skill/workflows/Improve.md +168 -0
- package/skill/workflows/Initialize.md +3 -3
- package/skill/workflows/Orchestrate.md +49 -12
- package/skill/workflows/Publish.md +100 -0
- package/skill/workflows/Registry.md +19 -13
- package/skill/workflows/Run.md +72 -0
- package/skill/workflows/Schedule.md +2 -2
- package/skill/workflows/SearchRun.md +89 -0
- package/skill/workflows/SignalsDashboard.md +2 -2
- package/skill/workflows/UnitTest.md +13 -4
- package/skill/workflows/Verify.md +136 -0
- package/skill/workflows/Watch.md +114 -47
- package/skill/workflows/Workflows.md +13 -8
- package/apps/local-dashboard/dist/assets/index-BcXquWFB.css +0 -1
- package/apps/local-dashboard/dist/assets/index-Coq42hE4.js +0 -15
- package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1
|
@@ -8,6 +8,7 @@ import { hostname } from "node:os";
|
|
|
8
8
|
import { join } from "node:path";
|
|
9
9
|
|
|
10
10
|
import { registryRequest } from "./client.js";
|
|
11
|
+
import { installFromGithubTarget, parseGithubRegistryInstallTarget } from "./github-install.js";
|
|
11
12
|
|
|
12
13
|
export async function cliMain() {
|
|
13
14
|
const args = process.argv.slice(2);
|
|
@@ -17,13 +18,45 @@ export async function cliMain() {
|
|
|
17
18
|
if (!name) {
|
|
18
19
|
console.error(
|
|
19
20
|
JSON.stringify({
|
|
20
|
-
error: "Usage: selftune registry install <name>",
|
|
21
|
+
error: "Usage: selftune registry install <name|github:owner/repo[@ref][//path]>",
|
|
21
22
|
guidance: { next_command: "selftune registry list" },
|
|
22
23
|
}),
|
|
23
24
|
);
|
|
24
25
|
process.exit(1);
|
|
25
26
|
}
|
|
26
27
|
|
|
28
|
+
let githubTarget = null;
|
|
29
|
+
try {
|
|
30
|
+
githubTarget = parseGithubRegistryInstallTarget(name);
|
|
31
|
+
} catch (error) {
|
|
32
|
+
console.error(
|
|
33
|
+
JSON.stringify({
|
|
34
|
+
error: error instanceof Error ? error.message : "Invalid GitHub install target",
|
|
35
|
+
guidance: {
|
|
36
|
+
next_command: "selftune registry install github:owner/repo//path",
|
|
37
|
+
},
|
|
38
|
+
}),
|
|
39
|
+
);
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (githubTarget) {
|
|
44
|
+
try {
|
|
45
|
+
await installFromGithubTarget(name, globalFlag);
|
|
46
|
+
return;
|
|
47
|
+
} catch (error) {
|
|
48
|
+
console.error(
|
|
49
|
+
JSON.stringify({
|
|
50
|
+
error: error instanceof Error ? error.message : "GitHub install failed",
|
|
51
|
+
guidance: {
|
|
52
|
+
next_command: "selftune registry install github:owner/repo//path",
|
|
53
|
+
},
|
|
54
|
+
}),
|
|
55
|
+
);
|
|
56
|
+
process.exit(1);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
27
60
|
// Find entry by name
|
|
28
61
|
const listResult = await registryRequest<{
|
|
29
62
|
entries: Array<{
|
|
@@ -49,7 +82,12 @@ export async function cliMain() {
|
|
|
49
82
|
// Get detail with versions
|
|
50
83
|
const detailResult = await registryRequest<{
|
|
51
84
|
entry: { id: string; name: string };
|
|
52
|
-
versions: Array<{
|
|
85
|
+
versions: Array<{
|
|
86
|
+
id: string;
|
|
87
|
+
version: string;
|
|
88
|
+
content_hash: string;
|
|
89
|
+
is_current: boolean;
|
|
90
|
+
}>;
|
|
53
91
|
}>("GET", `/${entryId}`);
|
|
54
92
|
|
|
55
93
|
if (!detailResult.success) {
|
|
@@ -71,7 +109,9 @@ export async function cliMain() {
|
|
|
71
109
|
latest_content_hash: string;
|
|
72
110
|
}>;
|
|
73
111
|
}>("POST", "/sync", {
|
|
74
|
-
body: {
|
|
112
|
+
body: {
|
|
113
|
+
installations: [{ entry_id: entryId, current_version_hash: "none" }],
|
|
114
|
+
},
|
|
75
115
|
});
|
|
76
116
|
|
|
77
117
|
const downloadUrl = syncResult.data?.entries?.[0]?.download_url;
|
|
@@ -82,7 +122,9 @@ export async function cliMain() {
|
|
|
82
122
|
|
|
83
123
|
// Download archive
|
|
84
124
|
console.log(`Installing ${name} v${currentVersion.version}...`);
|
|
85
|
-
const response = await fetch(downloadUrl, {
|
|
125
|
+
const response = await fetch(downloadUrl, {
|
|
126
|
+
signal: AbortSignal.timeout(60_000),
|
|
127
|
+
});
|
|
86
128
|
if (!response.ok) {
|
|
87
129
|
console.error(JSON.stringify({ error: `Download failed: HTTP ${response.status}` }));
|
|
88
130
|
process.exit(1);
|
|
@@ -119,13 +161,22 @@ export async function cliMain() {
|
|
|
119
161
|
|
|
120
162
|
// Update local state
|
|
121
163
|
const statePath = join(process.env.HOME || "~", ".selftune", "registry-state.json");
|
|
122
|
-
let state: Array<{
|
|
123
|
-
|
|
164
|
+
let state: Array<{
|
|
165
|
+
entryId: string;
|
|
166
|
+
name: string;
|
|
167
|
+
versionHash: string;
|
|
168
|
+
installPath: string;
|
|
169
|
+
}> = [];
|
|
124
170
|
try {
|
|
125
171
|
state = JSON.parse(readFileSync(statePath, "utf-8"));
|
|
126
172
|
} catch {}
|
|
127
173
|
state = state.filter((s) => s.entryId !== entryId);
|
|
128
|
-
state.push({
|
|
174
|
+
state.push({
|
|
175
|
+
entryId,
|
|
176
|
+
name,
|
|
177
|
+
versionHash: currentVersion.content_hash,
|
|
178
|
+
installPath: targetDir,
|
|
179
|
+
});
|
|
129
180
|
await mkdir(join(process.env.HOME || "~", ".selftune"), { recursive: true });
|
|
130
181
|
await writeFile(statePath, JSON.stringify(state, null, 2));
|
|
131
182
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Route handler: POST /api/actions/{watch,evolve,rollback,watchlist}
|
|
2
|
+
* Route handler: POST /api/actions/{create-check,report-package,search-run,watch,evolve,rollback,watchlist}
|
|
3
3
|
*
|
|
4
4
|
* Triggers selftune CLI commands as child processes and returns the result.
|
|
5
5
|
*/
|
|
@@ -13,6 +13,7 @@ import {
|
|
|
13
13
|
} from "../dashboard-action-events.js";
|
|
14
14
|
import { resolveDashboardActionOutcome } from "../dashboard-action-result.js";
|
|
15
15
|
import type { DashboardActionEvent, DashboardActionName } from "../dashboard-contract.js";
|
|
16
|
+
import { isCreateSkillDraft } from "../create/readiness.js";
|
|
16
17
|
import { getCanonicalEvalSetPath, getUnitTestPath } from "../testing-readiness.js";
|
|
17
18
|
import { saveWatchedSkills } from "../watchlist.js";
|
|
18
19
|
|
|
@@ -91,7 +92,10 @@ export async function runAction(
|
|
|
91
92
|
stdoutPromise,
|
|
92
93
|
stderrPromise,
|
|
93
94
|
]);
|
|
94
|
-
const action =
|
|
95
|
+
const action =
|
|
96
|
+
(command === "evolve" || command === "improve") && args.includes("--dry-run")
|
|
97
|
+
? "replay-dry-run"
|
|
98
|
+
: null;
|
|
95
99
|
const outcome = action
|
|
96
100
|
? resolveDashboardActionOutcome({
|
|
97
101
|
action,
|
|
@@ -136,6 +140,7 @@ function buildActionExecution(
|
|
|
136
140
|
const skillInput = requireSkillInput(body);
|
|
137
141
|
if (skillInput instanceof Response) return skillInput;
|
|
138
142
|
const { skill, skillPath } = skillInput;
|
|
143
|
+
const isDraftPackage = isCreateSkillDraft(skillPath);
|
|
139
144
|
|
|
140
145
|
if (action === "generate-evals") {
|
|
141
146
|
const args = [
|
|
@@ -171,7 +176,24 @@ function buildActionExecution(
|
|
|
171
176
|
};
|
|
172
177
|
}
|
|
173
178
|
|
|
179
|
+
if (action === "create-check") {
|
|
180
|
+
return {
|
|
181
|
+
command: "create",
|
|
182
|
+
args: ["check", "--skill-path", skillPath],
|
|
183
|
+
skill,
|
|
184
|
+
skillPath,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
|
|
174
188
|
if (action === "replay-dry-run") {
|
|
189
|
+
if (isDraftPackage) {
|
|
190
|
+
return {
|
|
191
|
+
command: "create",
|
|
192
|
+
args: ["replay", "--skill-path", skillPath, "--mode", "package"],
|
|
193
|
+
skill,
|
|
194
|
+
skillPath,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
175
197
|
return {
|
|
176
198
|
command: "evolve",
|
|
177
199
|
args: [
|
|
@@ -190,6 +212,14 @@ function buildActionExecution(
|
|
|
190
212
|
}
|
|
191
213
|
|
|
192
214
|
if (action === "measure-baseline") {
|
|
215
|
+
if (isDraftPackage) {
|
|
216
|
+
return {
|
|
217
|
+
command: "create",
|
|
218
|
+
args: ["baseline", "--skill-path", skillPath, "--mode", "package"],
|
|
219
|
+
skill,
|
|
220
|
+
skillPath,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
193
223
|
return {
|
|
194
224
|
command: "grade",
|
|
195
225
|
args: ["baseline", "--skill", skill, "--skill-path", skillPath],
|
|
@@ -198,9 +228,35 @@ function buildActionExecution(
|
|
|
198
228
|
};
|
|
199
229
|
}
|
|
200
230
|
|
|
231
|
+
if (action === "report-package") {
|
|
232
|
+
return {
|
|
233
|
+
command: "create",
|
|
234
|
+
args: ["report", "--skill-path", skillPath],
|
|
235
|
+
skill,
|
|
236
|
+
skillPath,
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (action === "search-run") {
|
|
241
|
+
return {
|
|
242
|
+
command: "search-run",
|
|
243
|
+
args: ["--skill", skill, "--skill-path", skillPath],
|
|
244
|
+
skill,
|
|
245
|
+
skillPath,
|
|
246
|
+
};
|
|
247
|
+
}
|
|
248
|
+
|
|
201
249
|
if (action === "deploy-candidate") {
|
|
250
|
+
if (isDraftPackage) {
|
|
251
|
+
return {
|
|
252
|
+
command: "publish",
|
|
253
|
+
args: ["--skill-path", skillPath, "--no-watch"],
|
|
254
|
+
skill,
|
|
255
|
+
skillPath,
|
|
256
|
+
};
|
|
257
|
+
}
|
|
202
258
|
return {
|
|
203
|
-
command: "
|
|
259
|
+
command: "improve",
|
|
204
260
|
args: ["--skill", skill, "--skill-path", skillPath, "--sync-first"],
|
|
205
261
|
skill,
|
|
206
262
|
skillPath,
|
|
@@ -208,6 +264,14 @@ function buildActionExecution(
|
|
|
208
264
|
}
|
|
209
265
|
|
|
210
266
|
if (action === "watch") {
|
|
267
|
+
if (isDraftPackage) {
|
|
268
|
+
return {
|
|
269
|
+
command: "publish",
|
|
270
|
+
args: ["--skill-path", skillPath],
|
|
271
|
+
skill,
|
|
272
|
+
skillPath,
|
|
273
|
+
};
|
|
274
|
+
}
|
|
211
275
|
return {
|
|
212
276
|
command: "watch",
|
|
213
277
|
args: ["--skill", skill, "--skill-path", skillPath, "--sync-first"],
|
|
@@ -316,6 +380,12 @@ export async function handleAction(
|
|
|
316
380
|
});
|
|
317
381
|
},
|
|
318
382
|
});
|
|
383
|
+
const outcome = resolveDashboardActionOutcome({
|
|
384
|
+
action: normalizedAction as DashboardActionName,
|
|
385
|
+
stdout: result.output,
|
|
386
|
+
stderr: result.error,
|
|
387
|
+
exitCode: result.exitCode ?? 0,
|
|
388
|
+
});
|
|
319
389
|
|
|
320
390
|
emitEvent?.({
|
|
321
391
|
event_id: eventId,
|
|
@@ -324,19 +394,15 @@ export async function handleAction(
|
|
|
324
394
|
skill_name: executable.skill,
|
|
325
395
|
skill_path: executable.skillPath,
|
|
326
396
|
ts: Date.now(),
|
|
327
|
-
success:
|
|
397
|
+
success: outcome.success,
|
|
328
398
|
exit_code: result.exitCode,
|
|
329
|
-
error:
|
|
330
|
-
summary:
|
|
331
|
-
executable.command === "evolve" && executable.args.includes("--dry-run")
|
|
332
|
-
? resolveDashboardActionOutcome({
|
|
333
|
-
action: "replay-dry-run",
|
|
334
|
-
stdout: result.output,
|
|
335
|
-
stderr: result.error,
|
|
336
|
-
exitCode: result.exitCode ?? 0,
|
|
337
|
-
}).summary
|
|
338
|
-
: null,
|
|
399
|
+
error: outcome.error,
|
|
400
|
+
summary: outcome.summary,
|
|
339
401
|
});
|
|
340
402
|
|
|
341
|
-
return Response.json(
|
|
403
|
+
return Response.json({
|
|
404
|
+
...result,
|
|
405
|
+
success: outcome.success,
|
|
406
|
+
error: outcome.error,
|
|
407
|
+
});
|
|
342
408
|
}
|
|
@@ -42,7 +42,7 @@ export function handleOverview(
|
|
|
42
42
|
const pendingReviews = attentionQueue.filter((a) => a.category === "needs_review").length;
|
|
43
43
|
|
|
44
44
|
const trustWatchlist = buildTrustWatchlist(trustSummaries);
|
|
45
|
-
const creatorTesting = buildCreatorTestingOverview(
|
|
45
|
+
const creatorTesting = buildCreatorTestingOverview(skills);
|
|
46
46
|
const autonomyStatus = buildAutonomyStatus(
|
|
47
47
|
db,
|
|
48
48
|
attentionQueue,
|
|
@@ -9,6 +9,12 @@
|
|
|
9
9
|
import type { Database } from "bun:sqlite";
|
|
10
10
|
|
|
11
11
|
import { parseCursorParam } from "../dashboard-contract.js";
|
|
12
|
+
import {
|
|
13
|
+
listAcceptedPackageFrontierCandidates,
|
|
14
|
+
listPackageCandidates,
|
|
15
|
+
} from "../create/package-candidate-state.js";
|
|
16
|
+
import { readSearchRuns } from "../create/package-search.js";
|
|
17
|
+
import { computeCreateDashboardReadiness, isCreateSkillDraft } from "../create/readiness.js";
|
|
12
18
|
import { scoreDescription } from "../evolution/description-quality.js";
|
|
13
19
|
import {
|
|
14
20
|
getExecutionMetrics,
|
|
@@ -17,7 +23,129 @@ import {
|
|
|
17
23
|
getSkillReportPayload,
|
|
18
24
|
safeParseJson,
|
|
19
25
|
} from "../localdb/queries.js";
|
|
20
|
-
import {
|
|
26
|
+
import { computeWatchTrustScore } from "../monitoring/watch.js";
|
|
27
|
+
import type { WatchResult } from "../monitoring/watch.js";
|
|
28
|
+
import {
|
|
29
|
+
getSkillTestingReadiness,
|
|
30
|
+
readCanonicalPackageEvaluationArtifact,
|
|
31
|
+
} from "../testing-readiness.js";
|
|
32
|
+
import type { CreatePackageEvaluationWatchSummary } from "../types.js";
|
|
33
|
+
|
|
34
|
+
function readMeasuredDelta(summary: {
|
|
35
|
+
candidate_acceptance?: {
|
|
36
|
+
replay_pass_rate_delta: number | null;
|
|
37
|
+
routing_pass_rate_delta: number | null;
|
|
38
|
+
baseline_lift_delta: number | null;
|
|
39
|
+
body_quality_delta: number | null;
|
|
40
|
+
unit_test_pass_rate_delta: number | null;
|
|
41
|
+
};
|
|
42
|
+
}): number | null {
|
|
43
|
+
const acceptance = summary.candidate_acceptance;
|
|
44
|
+
if (!acceptance) return null;
|
|
45
|
+
|
|
46
|
+
const deltas = [
|
|
47
|
+
acceptance.replay_pass_rate_delta,
|
|
48
|
+
acceptance.routing_pass_rate_delta,
|
|
49
|
+
acceptance.baseline_lift_delta,
|
|
50
|
+
acceptance.body_quality_delta,
|
|
51
|
+
acceptance.unit_test_pass_rate_delta,
|
|
52
|
+
];
|
|
53
|
+
return deltas.find((delta) => delta != null) ?? null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function isWatchDemoted(summary: {
|
|
57
|
+
watch?: {
|
|
58
|
+
rolled_back?: boolean | null;
|
|
59
|
+
alert?: string | null;
|
|
60
|
+
grade_regression?: boolean | null;
|
|
61
|
+
efficiency_regression?: boolean | null;
|
|
62
|
+
};
|
|
63
|
+
}): boolean {
|
|
64
|
+
const watch = summary.watch;
|
|
65
|
+
return Boolean(
|
|
66
|
+
watch?.rolled_back || watch?.alert || watch?.grade_regression || watch?.efficiency_regression,
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function buildFrontierState(db: Database, skillName: string) {
|
|
71
|
+
const candidates = listPackageCandidates(skillName, db);
|
|
72
|
+
if (candidates.length === 0) return null;
|
|
73
|
+
|
|
74
|
+
const acceptedFrontier = listAcceptedPackageFrontierCandidates(skillName, db);
|
|
75
|
+
const evidenceRanks = new Map(
|
|
76
|
+
acceptedFrontier.map((candidate, index) => [candidate.candidate_id, index + 1]),
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
const members = candidates.map((candidate) => ({
|
|
80
|
+
candidate_id: candidate.candidate_id,
|
|
81
|
+
skill_name: candidate.skill_name,
|
|
82
|
+
fingerprint: candidate.package_fingerprint,
|
|
83
|
+
decision:
|
|
84
|
+
candidate.latest_acceptance_decision === "root" ||
|
|
85
|
+
candidate.latest_acceptance_decision === "accepted"
|
|
86
|
+
? "accepted"
|
|
87
|
+
: candidate.latest_acceptance_decision === "rejected"
|
|
88
|
+
? "rejected"
|
|
89
|
+
: "pending",
|
|
90
|
+
measured_delta: readMeasuredDelta(candidate.summary),
|
|
91
|
+
created_at: candidate.first_evaluated_at,
|
|
92
|
+
parent_candidate_id: candidate.parent_candidate_id,
|
|
93
|
+
watch_demoted: isWatchDemoted(candidate.summary),
|
|
94
|
+
evidence_rank: evidenceRanks.get(candidate.candidate_id) ?? null,
|
|
95
|
+
}));
|
|
96
|
+
|
|
97
|
+
const latestSearchRun = readSearchRuns(db, skillName)[0] ?? null;
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
skill_name: skillName,
|
|
101
|
+
accepted_count: members.filter((member) => member.decision === "accepted").length,
|
|
102
|
+
rejected_count: members.filter((member) => member.decision === "rejected").length,
|
|
103
|
+
pending_count: members.filter((member) => member.decision === "pending").length,
|
|
104
|
+
members,
|
|
105
|
+
latest_search_run: latestSearchRun,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function hydrateWatchResult(summary: CreatePackageEvaluationWatchSummary): WatchResult {
|
|
110
|
+
return {
|
|
111
|
+
snapshot: summary.snapshot,
|
|
112
|
+
alert: summary.alert,
|
|
113
|
+
rolledBack: summary.rolled_back,
|
|
114
|
+
recommendation: summary.recommendation,
|
|
115
|
+
recommended_command: summary.recommended_command,
|
|
116
|
+
gradeAlert: summary.grade_alert,
|
|
117
|
+
gradeRegression: summary.grade_regression,
|
|
118
|
+
...(summary.efficiency_alert || summary.efficiency_regression
|
|
119
|
+
? {
|
|
120
|
+
efficiencyAlert: summary.efficiency_alert ?? null,
|
|
121
|
+
efficiencyRegression: summary.efficiency_regression ?? null,
|
|
122
|
+
}
|
|
123
|
+
: {}),
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function readWatchTrustScore(db: Database, skillName: string): number | null {
|
|
128
|
+
const row = db
|
|
129
|
+
.query(
|
|
130
|
+
`SELECT summary_json
|
|
131
|
+
FROM package_evaluation_reports
|
|
132
|
+
WHERE skill_name = ?`,
|
|
133
|
+
)
|
|
134
|
+
.get(skillName) as { summary_json: string } | null;
|
|
135
|
+
|
|
136
|
+
const parsedSummary = row?.summary_json ? safeParseJson(row.summary_json) : null;
|
|
137
|
+
const summaryWatch = parsedSummary?.watch as CreatePackageEvaluationWatchSummary | undefined;
|
|
138
|
+
if (summaryWatch?.snapshot) {
|
|
139
|
+
return computeWatchTrustScore(hydrateWatchResult(summaryWatch));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const artifactWatch = readCanonicalPackageEvaluationArtifact(skillName)?.summary.watch;
|
|
143
|
+
if (artifactWatch?.snapshot) {
|
|
144
|
+
return computeWatchTrustScore(hydrateWatchResult(artifactWatch));
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
21
149
|
|
|
22
150
|
export function handleSkillReport(
|
|
23
151
|
db: Database,
|
|
@@ -26,6 +154,18 @@ export function handleSkillReport(
|
|
|
26
154
|
): Response {
|
|
27
155
|
const report = getSkillReportPayload(db, skillName);
|
|
28
156
|
const testing_readiness = getSkillTestingReadiness(db, skillName);
|
|
157
|
+
const frontier_state = buildFrontierState(db, skillName);
|
|
158
|
+
const watch_trust_score = readWatchTrustScore(db, skillName);
|
|
159
|
+
let create_readiness = null;
|
|
160
|
+
if (testing_readiness?.skill_path && isCreateSkillDraft(testing_readiness.skill_path)) {
|
|
161
|
+
try {
|
|
162
|
+
create_readiness = computeCreateDashboardReadiness(testing_readiness.skill_path, {
|
|
163
|
+
getTestingReadiness: () => testing_readiness,
|
|
164
|
+
});
|
|
165
|
+
} catch {
|
|
166
|
+
create_readiness = null;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
29
169
|
|
|
30
170
|
// 1. Evolution audit with eval_snapshot
|
|
31
171
|
const evolution = db
|
|
@@ -187,7 +327,9 @@ export function handleSkillReport(
|
|
|
187
327
|
testing_readiness?.unit_test_cases ||
|
|
188
328
|
testing_readiness?.replay_check_count ||
|
|
189
329
|
testing_readiness?.baseline_sample_size,
|
|
190
|
-
)
|
|
330
|
+
) ||
|
|
331
|
+
Boolean(create_readiness) ||
|
|
332
|
+
Boolean(frontier_state);
|
|
191
333
|
if (!hasData) {
|
|
192
334
|
return Response.json({ error: "Skill not found" }, { status: 404 });
|
|
193
335
|
}
|
|
@@ -892,5 +1034,8 @@ export function handleSkillReport(
|
|
|
892
1034
|
data_hygiene,
|
|
893
1035
|
examples,
|
|
894
1036
|
testing_readiness,
|
|
1037
|
+
create_readiness,
|
|
1038
|
+
watch_trust_score,
|
|
1039
|
+
frontier_state,
|
|
895
1040
|
});
|
|
896
1041
|
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "./command-surface.js";
|
|
2
|
+
import { cliMain as orchestrateCliMain } from "./orchestrate.js";
|
|
3
|
+
import { handleCLIError } from "./utils/cli-error.js";
|
|
4
|
+
|
|
5
|
+
export async function cliMain(): Promise<void> {
|
|
6
|
+
const rawArgs = process.argv.slice(2);
|
|
7
|
+
|
|
8
|
+
if (rawArgs.includes("--help") || rawArgs.includes("-h")) {
|
|
9
|
+
console.log(renderCommandHelp(PUBLIC_COMMAND_SURFACES.run));
|
|
10
|
+
process.exit(0);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
await orchestrateCliMain();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if (import.meta.main) {
|
|
17
|
+
cliMain().catch(handleCLIError);
|
|
18
|
+
}
|
package/cli/selftune/schedule.ts
CHANGED
|
@@ -58,7 +58,7 @@ function commandForJob(jobName: string): string {
|
|
|
58
58
|
case "selftune-status":
|
|
59
59
|
return "selftune sync && selftune status";
|
|
60
60
|
case "selftune-orchestrate":
|
|
61
|
-
return "selftune
|
|
61
|
+
return "selftune run --max-skills 3";
|
|
62
62
|
default:
|
|
63
63
|
return `selftune ${jobName.replace("selftune-", "")}`;
|
|
64
64
|
}
|
|
@@ -162,8 +162,8 @@ export function generateCrontab(): string {
|
|
|
162
162
|
const lines = [
|
|
163
163
|
"# selftune automation — add to your crontab with: crontab -e",
|
|
164
164
|
"#",
|
|
165
|
-
"# The core loop: sync →
|
|
166
|
-
"# status remains a reporting job;
|
|
165
|
+
"# The core loop: sync → run",
|
|
166
|
+
"# status remains a reporting job; run handles sync, candidate",
|
|
167
167
|
"# selection, low-risk description evolution, and watch/rollback follow-up.",
|
|
168
168
|
"#",
|
|
169
169
|
`PATH=${home}/.bun/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin`,
|