selftune 0.2.28 → 0.2.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/apps/local-dashboard/dist/assets/index-BcXquWFB.css +1 -0
  2. package/apps/local-dashboard/dist/assets/index-Coq42hE4.js +15 -0
  3. package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +1 -0
  4. package/apps/local-dashboard/dist/index.html +3 -3
  5. package/cli/selftune/auto-update.ts +40 -8
  6. package/cli/selftune/command-surface.ts +1 -1
  7. package/cli/selftune/constants.ts +5 -0
  8. package/cli/selftune/dashboard-action-events.ts +117 -0
  9. package/cli/selftune/dashboard-action-instrumentation.ts +103 -0
  10. package/cli/selftune/dashboard-action-result.ts +90 -0
  11. package/cli/selftune/dashboard-action-stream.ts +252 -0
  12. package/cli/selftune/dashboard-contract.ts +81 -1
  13. package/cli/selftune/dashboard-server.ts +133 -16
  14. package/cli/selftune/eval/hooks-to-evals.ts +157 -0
  15. package/cli/selftune/eval/synthetic-evals.ts +33 -2
  16. package/cli/selftune/eval/unit-test-cli.ts +53 -5
  17. package/cli/selftune/evolution/validate-host-replay.ts +191 -14
  18. package/cli/selftune/index.ts +4 -0
  19. package/cli/selftune/ingestors/opencode-ingest.ts +117 -8
  20. package/cli/selftune/localdb/schema.ts +34 -0
  21. package/cli/selftune/routes/actions.ts +273 -42
  22. package/cli/selftune/testing-readiness.ts +203 -10
  23. package/cli/selftune/utils/llm-call.ts +90 -1
  24. package/package.json +1 -1
  25. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +1 -1
  26. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +1 -5
  27. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +10 -18
  28. package/packages/ui/src/components/EvidenceViewer.tsx +15 -142
  29. package/packages/ui/src/components/EvolutionTimeline.tsx +20 -44
  30. package/packages/ui/src/components/SkillReportPanels.tsx +1 -4
  31. package/skill/SKILL.md +1 -1
  32. package/skill/workflows/Dashboard.md +50 -23
  33. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +0 -1
  34. package/apps/local-dashboard/dist/assets/index-MMLFlnVn.js +0 -15
  35. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +0 -1
@@ -4,44 +4,235 @@
4
4
  * Triggers selftune CLI commands as child processes and returns the result.
5
5
  */
6
6
 
7
+ import { randomUUID } from "node:crypto";
7
8
  import { join } from "node:path";
8
9
 
10
+ import {
11
+ dashboardActionContextEnv,
12
+ type DashboardActionContext,
13
+ } from "../dashboard-action-events.js";
14
+ import { resolveDashboardActionOutcome } from "../dashboard-action-result.js";
15
+ import type { DashboardActionEvent, DashboardActionName } from "../dashboard-contract.js";
16
+ import { getCanonicalEvalSetPath, getUnitTestPath } from "../testing-readiness.js";
9
17
  import { saveWatchedSkills } from "../watchlist.js";
10
18
 
19
+ export interface ActionExecutionHooks {
20
+ actionContext?: DashboardActionContext;
21
+ onStdout?: (chunk: string) => void;
22
+ onStderr?: (chunk: string) => void;
23
+ }
24
+
11
25
  export type ActionRunner = (
12
26
  command: string,
13
27
  args: string[],
14
- ) => Promise<{ success: boolean; output: string; error: string | null }>;
28
+ hooks?: ActionExecutionHooks,
29
+ ) => Promise<{
30
+ success: boolean;
31
+ output: string;
32
+ error: string | null;
33
+ exitCode: number | null;
34
+ }>;
35
+
36
+ export type ActionEventEmitter = (event: DashboardActionEvent) => void;
37
+
38
+ async function readProcessStream(
39
+ stream: ReadableStream<Uint8Array> | null | undefined,
40
+ onChunk?: (chunk: string) => void,
41
+ ): Promise<string> {
42
+ if (!stream) return "";
43
+ const reader = stream.getReader();
44
+ const decoder = new TextDecoder();
45
+ let output = "";
46
+
47
+ while (true) {
48
+ const { value, done } = await reader.read();
49
+ if (done) break;
50
+ const chunk = decoder.decode(value, { stream: true });
51
+ if (!chunk) continue;
52
+ output += chunk;
53
+ onChunk?.(chunk);
54
+ }
55
+
56
+ const tail = decoder.decode();
57
+ if (tail) {
58
+ output += tail;
59
+ onChunk?.(tail);
60
+ }
61
+
62
+ return output;
63
+ }
15
64
 
16
65
  export async function runAction(
17
66
  command: string,
18
67
  args: string[],
19
- ): Promise<{ success: boolean; output: string; error: string | null }> {
68
+ hooks?: ActionExecutionHooks,
69
+ ): Promise<{
70
+ success: boolean;
71
+ output: string;
72
+ error: string | null;
73
+ exitCode: number | null;
74
+ }> {
20
75
  try {
21
76
  const indexPath = join(import.meta.dir, "..", "index.ts");
22
77
  const proc = Bun.spawn(["bun", "run", indexPath, command, ...args], {
23
78
  stdout: "pipe",
24
79
  stderr: "pipe",
80
+ env: {
81
+ ...process.env,
82
+ SELFTUNE_SKIP_AUTO_UPDATE: "1",
83
+ SELFTUNE_DASHBOARD_STREAM_DISABLE: "1",
84
+ ...dashboardActionContextEnv(hooks?.actionContext ?? null),
85
+ },
25
86
  });
26
- const [stdout, stderr] = await Promise.all([
27
- new Response(proc.stdout).text(),
28
- new Response(proc.stderr).text(),
87
+ const stdoutPromise = readProcessStream(proc.stdout, hooks?.onStdout);
88
+ const stderrPromise = readProcessStream(proc.stderr, hooks?.onStderr);
89
+ const [exitCode, stdout, stderr] = await Promise.all([
90
+ proc.exited,
91
+ stdoutPromise,
92
+ stderrPromise,
29
93
  ]);
30
- const exitCode = await proc.exited;
31
- if (exitCode !== 0) {
32
- return { success: false, output: stdout, error: stderr || `Exit code ${exitCode}` };
33
- }
34
- return { success: true, output: stdout, error: null };
94
+ const action = command === "evolve" && args.includes("--dry-run") ? "replay-dry-run" : null;
95
+ const outcome = action
96
+ ? resolveDashboardActionOutcome({
97
+ action,
98
+ stdout,
99
+ stderr,
100
+ exitCode,
101
+ })
102
+ : {
103
+ success: exitCode === 0,
104
+ error: exitCode === 0 ? null : stderr || `Exit code ${exitCode}`,
105
+ };
106
+ return {
107
+ success: outcome.success,
108
+ output: stdout,
109
+ error: outcome.error,
110
+ exitCode,
111
+ };
35
112
  } catch (err: unknown) {
36
113
  const message = err instanceof Error ? err.message : String(err);
37
- return { success: false, output: "", error: message };
114
+ return { success: false, output: "", error: message, exitCode: null };
115
+ }
116
+ }
117
+
118
+ function requireSkillInput(
119
+ body: Record<string, unknown>,
120
+ ): { skill: string; skillPath: string } | Response {
121
+ const skill = body.skill as string | undefined;
122
+ const skillPath = body.skillPath as string | undefined;
123
+ if (!skill || !skillPath) {
124
+ return Response.json(
125
+ { success: false, error: "Missing required fields: skill, skillPath" },
126
+ { status: 400 },
127
+ );
38
128
  }
129
+ return { skill, skillPath };
130
+ }
131
+
132
+ function buildActionExecution(
133
+ action: DashboardActionName,
134
+ body: Record<string, unknown>,
135
+ ): { command: string; args: string[]; skill: string; skillPath: string } | Response {
136
+ const skillInput = requireSkillInput(body);
137
+ if (skillInput instanceof Response) return skillInput;
138
+ const { skill, skillPath } = skillInput;
139
+
140
+ if (action === "generate-evals") {
141
+ const args = [
142
+ "generate",
143
+ "--skill",
144
+ skill,
145
+ "--skill-path",
146
+ skillPath,
147
+ "--output",
148
+ getCanonicalEvalSetPath(skill),
149
+ ];
150
+ if (body.autoSynthetic === true) {
151
+ args.push("--auto-synthetic");
152
+ }
153
+ return { command: "eval", args, skill, skillPath };
154
+ }
155
+
156
+ if (action === "generate-unit-tests") {
157
+ return {
158
+ command: "eval",
159
+ args: [
160
+ "unit-test",
161
+ "--skill",
162
+ skill,
163
+ "--generate",
164
+ "--skill-path",
165
+ skillPath,
166
+ "--tests",
167
+ getUnitTestPath(skill),
168
+ ],
169
+ skill,
170
+ skillPath,
171
+ };
172
+ }
173
+
174
+ if (action === "replay-dry-run") {
175
+ return {
176
+ command: "evolve",
177
+ args: [
178
+ "--skill",
179
+ skill,
180
+ "--skill-path",
181
+ skillPath,
182
+ "--dry-run",
183
+ "--validation-mode",
184
+ "replay",
185
+ "--sync-first",
186
+ ],
187
+ skill,
188
+ skillPath,
189
+ };
190
+ }
191
+
192
+ if (action === "measure-baseline") {
193
+ return {
194
+ command: "grade",
195
+ args: ["baseline", "--skill", skill, "--skill-path", skillPath],
196
+ skill,
197
+ skillPath,
198
+ };
199
+ }
200
+
201
+ if (action === "deploy-candidate") {
202
+ return {
203
+ command: "evolve",
204
+ args: ["--skill", skill, "--skill-path", skillPath, "--sync-first"],
205
+ skill,
206
+ skillPath,
207
+ };
208
+ }
209
+
210
+ if (action === "watch") {
211
+ return {
212
+ command: "watch",
213
+ args: ["--skill", skill, "--skill-path", skillPath, "--sync-first"],
214
+ skill,
215
+ skillPath,
216
+ };
217
+ }
218
+
219
+ if (action === "rollback") {
220
+ const proposalId = body.proposalId as string | undefined;
221
+ const args = ["rollback", "--skill", skill, "--skill-path", skillPath];
222
+ if (proposalId) {
223
+ args.push("--proposal-id", proposalId);
224
+ }
225
+ return { command: "evolve", args, skill, skillPath };
226
+ }
227
+
228
+ return Response.json({ success: false, error: `Unknown action: ${action}` }, { status: 400 });
39
229
  }
40
230
 
41
231
  export async function handleAction(
42
232
  action: string,
43
233
  body: Record<string, unknown>,
44
234
  executeAction: ActionRunner = runAction,
235
+ emitEvent?: ActionEventEmitter,
45
236
  ): Promise<Response> {
46
237
  if (action === "watchlist") {
47
238
  const skills = body.skills;
@@ -62,7 +253,11 @@ export async function handleAction(
62
253
  }
63
254
  try {
64
255
  const saved = saveWatchedSkills(skills);
65
- return Response.json({ success: true, watched_skills: saved, error: null });
256
+ return Response.json({
257
+ success: true,
258
+ watched_skills: saved,
259
+ error: null,
260
+ });
66
261
  } catch (error: unknown) {
67
262
  const message = error instanceof Error ? error.message : String(error);
68
263
  return Response.json(
@@ -75,37 +270,73 @@ export async function handleAction(
75
270
  }
76
271
  }
77
272
 
78
- if (action === "watch" || action === "evolve") {
79
- const skill = body.skill as string | undefined;
80
- const skillPath = body.skillPath as string | undefined;
81
- if (!skill || !skillPath) {
82
- return Response.json(
83
- { success: false, error: "Missing required fields: skill, skillPath" },
84
- { status: 400 },
85
- );
86
- }
87
- const args = ["--skill", skill, "--skill-path", skillPath, "--sync-first"];
88
- const result = await executeAction(action, args);
89
- return Response.json(result);
273
+ const normalizedAction = action === "evolve" ? "deploy-candidate" : action;
274
+ const executable = buildActionExecution(normalizedAction as DashboardActionName, body);
275
+ if (executable instanceof Response) {
276
+ return executable;
90
277
  }
91
278
 
92
- if (action === "rollback") {
93
- const skill = body.skill as string | undefined;
94
- const skillPath = body.skillPath as string | undefined;
95
- const proposalId = body.proposalId as string | undefined;
96
- if (!skill || !skillPath) {
97
- return Response.json(
98
- { success: false, error: "Missing required fields: skill, skillPath" },
99
- { status: 400 },
100
- );
101
- }
102
- const args = ["--skill", skill, "--skill-path", skillPath];
103
- if (proposalId) {
104
- args.push("--proposal-id", proposalId);
105
- }
106
- const result = await executeAction(action, args);
107
- return Response.json(result);
108
- }
279
+ const eventId = randomUUID();
280
+ emitEvent?.({
281
+ event_id: eventId,
282
+ action: normalizedAction as DashboardActionName,
283
+ stage: "started",
284
+ skill_name: executable.skill,
285
+ skill_path: executable.skillPath,
286
+ ts: Date.now(),
287
+ });
109
288
 
110
- return Response.json({ success: false, error: `Unknown action: ${action}` }, { status: 400 });
289
+ const result = await executeAction(executable.command, executable.args, {
290
+ actionContext: {
291
+ eventId,
292
+ action: normalizedAction as DashboardActionName,
293
+ skillName: executable.skill,
294
+ skillPath: executable.skillPath,
295
+ },
296
+ onStdout(chunk) {
297
+ emitEvent?.({
298
+ event_id: eventId,
299
+ action: normalizedAction as DashboardActionName,
300
+ stage: "stdout",
301
+ skill_name: executable.skill,
302
+ skill_path: executable.skillPath,
303
+ ts: Date.now(),
304
+ chunk,
305
+ });
306
+ },
307
+ onStderr(chunk) {
308
+ emitEvent?.({
309
+ event_id: eventId,
310
+ action: normalizedAction as DashboardActionName,
311
+ stage: "stderr",
312
+ skill_name: executable.skill,
313
+ skill_path: executable.skillPath,
314
+ ts: Date.now(),
315
+ chunk,
316
+ });
317
+ },
318
+ });
319
+
320
+ emitEvent?.({
321
+ event_id: eventId,
322
+ action: normalizedAction as DashboardActionName,
323
+ stage: "finished",
324
+ skill_name: executable.skill,
325
+ skill_path: executable.skillPath,
326
+ ts: Date.now(),
327
+ success: result.success,
328
+ exit_code: result.exitCode,
329
+ error: result.error,
330
+ summary:
331
+ executable.command === "evolve" && executable.args.includes("--dry-run")
332
+ ? resolveDashboardActionOutcome({
333
+ action: "replay-dry-run",
334
+ stdout: result.output,
335
+ stderr: result.error,
336
+ exitCode: result.exitCode ?? 0,
337
+ }).summary
338
+ : null,
339
+ });
340
+
341
+ return Response.json(result);
111
342
  }
@@ -1,7 +1,7 @@
1
1
  import type { Database } from "bun:sqlite";
2
2
 
3
3
  import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
4
- import { join } from "node:path";
4
+ import { dirname, join } from "node:path";
5
5
 
6
6
  import { SELFTUNE_CONFIG_DIR } from "./constants.js";
7
7
  import type {
@@ -11,7 +11,8 @@ import type {
11
11
  SkillEvalReadiness,
12
12
  SkillTestingReadiness,
13
13
  } from "./dashboard-contract.js";
14
- import type { EvalEntry, UnitTestSuiteResult } from "./types.js";
14
+ import { getDb } from "./localdb/db.js";
15
+ import type { EvalEntry, SkillUnitTest, UnitTestSuiteResult } from "./types.js";
15
16
  import { queryEvolutionEvidence } from "./localdb/queries/evolution.js";
16
17
  import { queryTrustedSkillObservationRows } from "./localdb/queries/trust.js";
17
18
  import {
@@ -27,6 +28,7 @@ interface TrustedSkillObservationSummary {
27
28
  }
28
29
 
29
30
  interface TestingReadinessContext {
31
+ db: Database;
30
32
  knownSkills: Set<string>;
31
33
  searchDirs: string[];
32
34
  trustedRowsBySkill: Map<string, TrustedSkillObservationSummary[]>;
@@ -64,14 +66,188 @@ export function getUnitTestResultPath(skillName: string): string {
64
66
  return join(getUnitTestDir(), `${skillName}.last-run.json`);
65
67
  }
66
68
 
69
+ function getOptionalDb(): Database | null {
70
+ try {
71
+ return getDb();
72
+ } catch {
73
+ return null;
74
+ }
75
+ }
76
+
77
+ function parseJsonArray(value: string | null | undefined): unknown[] {
78
+ if (!value) return [];
79
+ try {
80
+ const parsed = JSON.parse(value) as unknown;
81
+ return Array.isArray(parsed) ? parsed : [];
82
+ } catch {
83
+ return [];
84
+ }
85
+ }
86
+
87
+ function upsertCanonicalEvalSet(db: Database, skillName: string, evalSet: EvalEntry[]): void {
88
+ db.run(
89
+ `INSERT INTO canonical_eval_sets (skill_name, stored_at, eval_set_json)
90
+ VALUES (?, ?, ?)
91
+ ON CONFLICT(skill_name) DO UPDATE SET
92
+ stored_at = excluded.stored_at,
93
+ eval_set_json = excluded.eval_set_json`,
94
+ [skillName, new Date().toISOString(), JSON.stringify(evalSet)],
95
+ );
96
+ }
97
+
98
+ function upsertUnitTestFile(db: Database, skillName: string, tests: SkillUnitTest[]): void {
99
+ db.run(
100
+ `INSERT INTO unit_test_files (skill_name, stored_at, tests_json)
101
+ VALUES (?, ?, ?)
102
+ ON CONFLICT(skill_name) DO UPDATE SET
103
+ stored_at = excluded.stored_at,
104
+ tests_json = excluded.tests_json`,
105
+ [skillName, new Date().toISOString(), JSON.stringify(tests)],
106
+ );
107
+ }
108
+
109
+ function upsertUnitTestRunResult(
110
+ db: Database,
111
+ skillName: string,
112
+ suite: UnitTestSuiteResult,
113
+ ): void {
114
+ db.run(
115
+ `INSERT INTO unit_test_run_results
116
+ (skill_name, run_at, total, passed, failed, pass_rate, result_json)
117
+ VALUES (?, ?, ?, ?, ?, ?, ?)
118
+ ON CONFLICT(skill_name) DO UPDATE SET
119
+ run_at = excluded.run_at,
120
+ total = excluded.total,
121
+ passed = excluded.passed,
122
+ failed = excluded.failed,
123
+ pass_rate = excluded.pass_rate,
124
+ result_json = excluded.result_json`,
125
+ [
126
+ skillName,
127
+ suite.run_at,
128
+ suite.total,
129
+ suite.passed,
130
+ suite.failed,
131
+ suite.pass_rate,
132
+ JSON.stringify(suite),
133
+ ],
134
+ );
135
+ }
136
+
137
+ function readCanonicalEvalSetFromDb(
138
+ db: Database,
139
+ skillName: string,
140
+ ): {
141
+ entries: EvalEntry[];
142
+ storedAt: string | null;
143
+ } | null {
144
+ const row = db
145
+ .query(
146
+ `SELECT eval_set_json, stored_at
147
+ FROM canonical_eval_sets
148
+ WHERE skill_name = ?`,
149
+ )
150
+ .get(skillName) as { eval_set_json: string; stored_at: string } | null;
151
+ if (!row) return null;
152
+ return {
153
+ entries: parseJsonArray(row.eval_set_json) as EvalEntry[],
154
+ storedAt: row.stored_at ?? null,
155
+ };
156
+ }
157
+
158
+ function readUnitTestsFromDb(
159
+ db: Database,
160
+ skillName: string,
161
+ ): {
162
+ tests: SkillUnitTest[];
163
+ storedAt: string | null;
164
+ } | null {
165
+ const row = db
166
+ .query(
167
+ `SELECT tests_json, stored_at
168
+ FROM unit_test_files
169
+ WHERE skill_name = ?`,
170
+ )
171
+ .get(skillName) as { tests_json: string; stored_at: string } | null;
172
+ if (!row) return null;
173
+ return {
174
+ tests: parseJsonArray(row.tests_json) as SkillUnitTest[],
175
+ storedAt: row.stored_at ?? null,
176
+ };
177
+ }
178
+
179
+ function readUnitTestRunResultFromDb(db: Database, skillName: string): UnitTestSuiteResult | null {
180
+ const row = db
181
+ .query(
182
+ `SELECT result_json
183
+ FROM unit_test_run_results
184
+ WHERE skill_name = ?`,
185
+ )
186
+ .get(skillName) as { result_json: string } | null;
187
+ if (!row?.result_json) return null;
188
+ try {
189
+ const parsed = JSON.parse(row.result_json) as Partial<UnitTestSuiteResult>;
190
+ if (
191
+ typeof parsed !== "object" ||
192
+ parsed == null ||
193
+ typeof parsed.skill_name !== "string" ||
194
+ typeof parsed.total !== "number" ||
195
+ typeof parsed.passed !== "number" ||
196
+ typeof parsed.failed !== "number" ||
197
+ typeof parsed.pass_rate !== "number" ||
198
+ typeof parsed.run_at !== "string"
199
+ ) {
200
+ return null;
201
+ }
202
+ return parsed as UnitTestSuiteResult;
203
+ } catch {
204
+ return null;
205
+ }
206
+ }
207
+
208
+ function listStoredSkillNames(db: Database, tableName: string): Set<string> {
209
+ const rows = db.query(`SELECT skill_name FROM ${tableName}`).all() as Array<{
210
+ skill_name: string;
211
+ }>;
212
+ return new Set(rows.map((row) => row.skill_name).filter(Boolean));
213
+ }
214
+
67
215
  export function writeCanonicalEvalSet(skillName: string, evalSet: EvalEntry[]): string {
68
- mkdirSync(getEvalSetDir(), { recursive: true });
69
216
  const path = getCanonicalEvalSetPath(skillName);
217
+ const db = getOptionalDb();
218
+ if (db) {
219
+ upsertCanonicalEvalSet(db, skillName, evalSet);
220
+ }
221
+ mkdirSync(getEvalSetDir(), { recursive: true });
70
222
  writeFileSync(path, JSON.stringify(evalSet, null, 2), "utf-8");
71
223
  return path;
72
224
  }
73
225
 
226
+ export function writeCanonicalUnitTests(
227
+ skillName: string,
228
+ tests: SkillUnitTest[],
229
+ outputPath?: string,
230
+ ): string {
231
+ const canonicalPath = getUnitTestPath(skillName);
232
+ const db = getOptionalDb();
233
+ if (db) {
234
+ upsertUnitTestFile(db, skillName, tests);
235
+ }
236
+ mkdirSync(getUnitTestDir(), { recursive: true });
237
+ writeFileSync(canonicalPath, JSON.stringify(tests, null, 2), "utf-8");
238
+ if (outputPath && outputPath !== canonicalPath) {
239
+ mkdirSync(dirname(outputPath), { recursive: true });
240
+ writeFileSync(outputPath, JSON.stringify(tests, null, 2), "utf-8");
241
+ return outputPath;
242
+ }
243
+ return canonicalPath;
244
+ }
245
+
74
246
  export function writeUnitTestRunResult(skillName: string, suite: UnitTestSuiteResult): string {
247
+ const db = getOptionalDb();
248
+ if (db) {
249
+ upsertUnitTestRunResult(db, skillName, suite);
250
+ }
75
251
  mkdirSync(getUnitTestDir(), { recursive: true });
76
252
  const path = getUnitTestResultPath(skillName);
77
253
  writeFileSync(path, JSON.stringify(suite, null, 2), "utf-8");
@@ -188,14 +364,14 @@ function summarizeReadiness(
188
364
  switch (nextStep) {
189
365
  case "generate_evals":
190
366
  if (evalReadiness === "log_ready") {
191
- return "Trusted telemetry exists, but no canonical eval set is saved yet.";
367
+ return "Trusted telemetry exists, but no canonical eval set is stored yet.";
192
368
  }
193
369
  if (evalReadiness === "cold_start_ready") {
194
370
  return "Installed locally but still cold-start. Generate synthetic evals before you evolve it.";
195
371
  }
196
372
  return "Telemetry exists, but selftune cannot resolve a local SKILL.md yet. Point it at the skill and generate evals.";
197
373
  case "run_unit_tests":
198
- return `Eval coverage is present (${evalSetEntries} entries), but no unit test file is saved yet.`;
374
+ return `Eval coverage is present (${evalSetEntries} entries), but no unit tests are stored yet.`;
199
375
  case "run_replay_dry_run": {
200
376
  const passRateText =
201
377
  unitTestPassRate != null
@@ -331,6 +507,9 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
331
507
  if (!entry.endsWith(".json")) return null;
332
508
  return entry.slice(0, -".json".length);
333
509
  });
510
+ const storedEvalNames = listStoredSkillNames(db, "canonical_eval_sets");
511
+ const storedUnitTestNames = listStoredSkillNames(db, "unit_test_files");
512
+ const storedUnitTestRunNames = listStoredSkillNames(db, "unit_test_run_results");
334
513
 
335
514
  const evidenceRows = queryEvolutionEvidence(db);
336
515
  const evalEvidenceBySkill = new Map<string, { count: number; latestAt: string | null }>();
@@ -445,6 +624,9 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
445
624
  ...unitTestNames,
446
625
  ...unitTestResultNames,
447
626
  ...canonicalEvalNames,
627
+ ...storedEvalNames,
628
+ ...storedUnitTestNames,
629
+ ...storedUnitTestRunNames,
448
630
  ...evalEvidenceBySkill.keys(),
449
631
  ...replayBySkill.keys(),
450
632
  ...baselineBySkill.keys(),
@@ -452,6 +634,7 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
452
634
  ]);
453
635
 
454
636
  return {
637
+ db,
455
638
  knownSkills,
456
639
  searchDirs,
457
640
  trustedRowsBySkill,
@@ -480,16 +663,26 @@ function buildSkillTestingReadinessRow(
480
663
  const evalReadiness = deriveEvalReadiness(skillPath, trustedTriggerCount);
481
664
 
482
665
  const canonicalEvalPath = getCanonicalEvalSetPath(skillName);
483
- const canonicalEvalEntries = readJsonArrayFile(canonicalEvalPath);
484
- const canonicalEvalStat = existsSync(canonicalEvalPath) ? statSync(canonicalEvalPath) : null;
666
+ const storedEvalSet = readCanonicalEvalSetFromDb(context.db, skillName);
667
+ const canonicalEvalEntries =
668
+ storedEvalSet?.entries ?? (readJsonArrayFile(canonicalEvalPath) as EvalEntry[]);
669
+ const canonicalEvalStat =
670
+ !storedEvalSet && existsSync(canonicalEvalPath) ? statSync(canonicalEvalPath) : null;
485
671
  const evidenceEval = context.evalEvidenceBySkill.get(skillName) ?? { count: 0, latestAt: null };
486
672
  const evalSetEntries =
487
673
  canonicalEvalEntries.length > 0 ? canonicalEvalEntries.length : evidenceEval.count;
488
- const latestEvalAt = canonicalEvalStat?.mtime.toISOString?.() ?? evidenceEval.latestAt ?? null;
674
+ const latestEvalAt =
675
+ storedEvalSet?.storedAt ??
676
+ canonicalEvalStat?.mtime.toISOString?.() ??
677
+ evidenceEval.latestAt ??
678
+ null;
489
679
 
490
680
  const unitTestPath = getUnitTestPath(skillName);
491
- const unitTestCases = readJsonArrayFile(unitTestPath).length;
492
- const unitTestResult = readUnitTestResult(getUnitTestResultPath(skillName));
681
+ const storedUnitTests = readUnitTestsFromDb(context.db, skillName);
682
+ const unitTestCases = storedUnitTests?.tests.length ?? readJsonArrayFile(unitTestPath).length;
683
+ const unitTestResult =
684
+ readUnitTestRunResultFromDb(context.db, skillName) ??
685
+ readUnitTestResult(getUnitTestResultPath(skillName));
493
686
 
494
687
  const replay = context.replayBySkill.get(skillName) ?? {
495
688
  check_count: 0,