selftune 0.2.30 → 0.2.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -56
- package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
- package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/command-surface.ts +613 -2
- package/cli/selftune/create/baseline.ts +429 -0
- package/cli/selftune/create/check.ts +35 -0
- package/cli/selftune/create/init.ts +115 -0
- package/cli/selftune/create/package-candidate-state.ts +771 -0
- package/cli/selftune/create/package-evaluator.ts +710 -0
- package/cli/selftune/create/package-fingerprint.ts +142 -0
- package/cli/selftune/create/package-search.ts +377 -0
- package/cli/selftune/create/publish.ts +431 -0
- package/cli/selftune/create/readiness.ts +495 -0
- package/cli/selftune/create/replay.ts +330 -0
- package/cli/selftune/create/report.ts +74 -0
- package/cli/selftune/create/scaffold.ts +121 -0
- package/cli/selftune/create/skills-ref-adapter.ts +177 -0
- package/cli/selftune/create/status.ts +33 -0
- package/cli/selftune/create/templates.ts +249 -0
- package/cli/selftune/cron/setup.ts +1 -1
- package/cli/selftune/dashboard-action-events.ts +4 -1
- package/cli/selftune/dashboard-action-result.ts +789 -24
- package/cli/selftune/dashboard-action-stream.ts +80 -0
- package/cli/selftune/dashboard-contract.ts +146 -3
- package/cli/selftune/dashboard-server.ts +5 -4
- package/cli/selftune/eval/hooks-to-evals.ts +58 -35
- package/cli/selftune/eval/synthetic-evals.ts +145 -17
- package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
- package/cli/selftune/evolution/evolve-body.ts +9 -36
- package/cli/selftune/evolution/evolve.ts +8 -72
- package/cli/selftune/evolution/stopping-criteria.ts +5 -13
- package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
- package/cli/selftune/evolution/validate-host-replay.ts +115 -15
- package/cli/selftune/improve.ts +206 -0
- package/cli/selftune/index.ts +123 -6
- package/cli/selftune/init.ts +1 -1
- package/cli/selftune/localdb/queries/dashboard.ts +30 -0
- package/cli/selftune/localdb/schema.ts +52 -0
- package/cli/selftune/monitoring/watch.ts +257 -23
- package/cli/selftune/orchestrate/execute.ts +300 -1
- package/cli/selftune/orchestrate/finalize.ts +14 -0
- package/cli/selftune/orchestrate/plan.ts +22 -5
- package/cli/selftune/orchestrate/prepare.ts +59 -4
- package/cli/selftune/orchestrate/report.ts +1 -1
- package/cli/selftune/orchestrate.ts +34 -1
- package/cli/selftune/publish.ts +35 -0
- package/cli/selftune/registry/github-install.ts +256 -0
- package/cli/selftune/registry/index.ts +1 -1
- package/cli/selftune/registry/install.ts +58 -7
- package/cli/selftune/routes/actions.ts +81 -15
- package/cli/selftune/routes/overview.ts +1 -1
- package/cli/selftune/routes/skill-report.ts +147 -2
- package/cli/selftune/run.ts +18 -0
- package/cli/selftune/schedule.ts +3 -3
- package/cli/selftune/search-run.ts +703 -0
- package/cli/selftune/status.ts +35 -11
- package/cli/selftune/testing-readiness.ts +431 -40
- package/cli/selftune/types.ts +316 -0
- package/cli/selftune/utils/eval-readiness.ts +1 -0
- package/cli/selftune/utils/json-output.ts +11 -0
- package/cli/selftune/utils/lifecycle-surface.ts +48 -0
- package/cli/selftune/utils/query-filter.ts +82 -1
- package/cli/selftune/utils/tui.ts +85 -2
- package/cli/selftune/verify.ts +205 -0
- package/cli/selftune/workflows/proposals.ts +1 -1
- package/cli/selftune/workflows/skill-scaffold.ts +141 -63
- package/cli/selftune/workflows/workflows.ts +4 -4
- package/package.json +1 -1
- package/packages/dashboard-core/src/routes/manifest.ts +2 -2
- package/packages/ui/src/components/SkillReportPanels.tsx +7 -7
- package/packages/ui/src/primitives/button.tsx +5 -0
- package/skill/SKILL.md +148 -85
- package/skill/references/cli-quick-reference.md +16 -1
- package/skill/references/creator-playbook.md +31 -10
- package/skill/workflows/Baseline.md +8 -9
- package/skill/workflows/Contributions.md +4 -4
- package/skill/workflows/Create.md +173 -0
- package/skill/workflows/CreateTestDeploy.md +34 -30
- package/skill/workflows/Cron.md +2 -2
- package/skill/workflows/Dashboard.md +3 -3
- package/skill/workflows/Evals.md +13 -7
- package/skill/workflows/Evolve.md +75 -32
- package/skill/workflows/EvolveBody.md +22 -15
- package/skill/workflows/Hook.md +1 -1
- package/skill/workflows/Improve.md +168 -0
- package/skill/workflows/Initialize.md +3 -3
- package/skill/workflows/Orchestrate.md +49 -12
- package/skill/workflows/Publish.md +100 -0
- package/skill/workflows/Registry.md +19 -13
- package/skill/workflows/Run.md +72 -0
- package/skill/workflows/Schedule.md +2 -2
- package/skill/workflows/SearchRun.md +89 -0
- package/skill/workflows/SignalsDashboard.md +2 -2
- package/skill/workflows/UnitTest.md +13 -4
- package/skill/workflows/Verify.md +136 -0
- package/skill/workflows/Watch.md +114 -47
- package/skill/workflows/Workflows.md +13 -8
- package/apps/local-dashboard/dist/assets/index-BcXquWFB.css +0 -1
- package/apps/local-dashboard/dist/assets/index-Coq42hE4.js +0 -15
- package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1
|
@@ -4,17 +4,25 @@ import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSy
|
|
|
4
4
|
import { dirname, join } from "node:path";
|
|
5
5
|
|
|
6
6
|
import { SELFTUNE_CONFIG_DIR } from "./constants.js";
|
|
7
|
+
import type { CreatePackageEvaluationResult } from "./create/package-evaluator.js";
|
|
7
8
|
import type {
|
|
9
|
+
CreatePackageEvaluationSummary,
|
|
10
|
+
CreatePackageEvaluationStatus,
|
|
11
|
+
CreatorOverviewStep,
|
|
8
12
|
CreatorLoopNextStep,
|
|
9
13
|
CreatorTestingOverview,
|
|
10
14
|
DeploymentReadiness,
|
|
11
15
|
SkillEvalReadiness,
|
|
16
|
+
SkillSummary,
|
|
12
17
|
SkillTestingReadiness,
|
|
13
18
|
} from "./dashboard-contract.js";
|
|
14
19
|
import { getDb } from "./localdb/db.js";
|
|
15
20
|
import type { EvalEntry, SkillUnitTest, UnitTestSuiteResult } from "./types.js";
|
|
21
|
+
import { computeCreatePackageFingerprint } from "./create/package-fingerprint.js";
|
|
16
22
|
import { queryEvolutionEvidence } from "./localdb/queries/evolution.js";
|
|
17
23
|
import { queryTrustedSkillObservationRows } from "./localdb/queries/trust.js";
|
|
24
|
+
import { MIN_LOG_READY_POSITIVES } from "./utils/eval-readiness.js";
|
|
25
|
+
import { extractPositiveEvalQueryText } from "./utils/query-filter.js";
|
|
18
26
|
import {
|
|
19
27
|
findInstalledSkillNames,
|
|
20
28
|
findInstalledSkillPath,
|
|
@@ -25,6 +33,7 @@ import {
|
|
|
25
33
|
interface TrustedSkillObservationSummary {
|
|
26
34
|
session_id: string;
|
|
27
35
|
triggered: number;
|
|
36
|
+
query_text: string;
|
|
28
37
|
}
|
|
29
38
|
|
|
30
39
|
interface TestingReadinessContext {
|
|
@@ -39,6 +48,10 @@ interface TestingReadinessContext {
|
|
|
39
48
|
string,
|
|
40
49
|
{ sample_size: number; pass_rate: number | null; measured_at: string | null }
|
|
41
50
|
>;
|
|
51
|
+
packageEvaluationBySkill: Map<
|
|
52
|
+
string,
|
|
53
|
+
{ summary: CreatePackageEvaluationSummary; storedAt: string | null }
|
|
54
|
+
>;
|
|
42
55
|
latestEvolutionBySkill: Map<string, { action: string | null; timestamp: string | null }>;
|
|
43
56
|
}
|
|
44
57
|
|
|
@@ -54,6 +67,10 @@ function getUnitTestDir(): string {
|
|
|
54
67
|
return join(getConfigDir(), "unit-tests");
|
|
55
68
|
}
|
|
56
69
|
|
|
70
|
+
function getPackageEvaluationDir(): string {
|
|
71
|
+
return join(getConfigDir(), "package-evaluations");
|
|
72
|
+
}
|
|
73
|
+
|
|
57
74
|
export function getCanonicalEvalSetPath(skillName: string): string {
|
|
58
75
|
return join(getEvalSetDir(), `${skillName}.json`);
|
|
59
76
|
}
|
|
@@ -66,6 +83,14 @@ export function getUnitTestResultPath(skillName: string): string {
|
|
|
66
83
|
return join(getUnitTestDir(), `${skillName}.last-run.json`);
|
|
67
84
|
}
|
|
68
85
|
|
|
86
|
+
export function getCanonicalPackageEvaluationPath(skillName: string): string {
|
|
87
|
+
return join(getPackageEvaluationDir(), `${skillName}.json`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function getCanonicalPackageEvaluationArtifactPath(skillName: string): string {
|
|
91
|
+
return join(getPackageEvaluationDir(), `${skillName}.artifact.json`);
|
|
92
|
+
}
|
|
93
|
+
|
|
69
94
|
function getOptionalDb(): Database | null {
|
|
70
95
|
try {
|
|
71
96
|
return getDb();
|
|
@@ -84,6 +109,16 @@ function parseJsonArray(value: string | null | undefined): unknown[] {
|
|
|
84
109
|
}
|
|
85
110
|
}
|
|
86
111
|
|
|
112
|
+
function parseJsonObject(value: string | null | undefined): Record<string, unknown> | null {
|
|
113
|
+
if (!value) return null;
|
|
114
|
+
try {
|
|
115
|
+
const parsed = JSON.parse(value) as unknown;
|
|
116
|
+
return parsed && typeof parsed === "object" ? (parsed as Record<string, unknown>) : null;
|
|
117
|
+
} catch {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
87
122
|
function upsertCanonicalEvalSet(db: Database, skillName: string, evalSet: EvalEntry[]): void {
|
|
88
123
|
db.run(
|
|
89
124
|
`INSERT INTO canonical_eval_sets (skill_name, stored_at, eval_set_json)
|
|
@@ -134,6 +169,21 @@ function upsertUnitTestRunResult(
|
|
|
134
169
|
);
|
|
135
170
|
}
|
|
136
171
|
|
|
172
|
+
function upsertPackageEvaluationReport(
|
|
173
|
+
db: Database,
|
|
174
|
+
skillName: string,
|
|
175
|
+
summary: CreatePackageEvaluationSummary,
|
|
176
|
+
): void {
|
|
177
|
+
db.run(
|
|
178
|
+
`INSERT INTO package_evaluation_reports (skill_name, stored_at, summary_json)
|
|
179
|
+
VALUES (?, ?, ?)
|
|
180
|
+
ON CONFLICT(skill_name) DO UPDATE SET
|
|
181
|
+
stored_at = excluded.stored_at,
|
|
182
|
+
summary_json = excluded.summary_json`,
|
|
183
|
+
[skillName, new Date().toISOString(), JSON.stringify(summary)],
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
|
|
137
187
|
function readCanonicalEvalSetFromDb(
|
|
138
188
|
db: Database,
|
|
139
189
|
skillName: string,
|
|
@@ -205,6 +255,35 @@ function readUnitTestRunResultFromDb(db: Database, skillName: string): UnitTestS
|
|
|
205
255
|
}
|
|
206
256
|
}
|
|
207
257
|
|
|
258
|
+
function readPackageEvaluationFromDb(
|
|
259
|
+
db: Database,
|
|
260
|
+
skillName: string,
|
|
261
|
+
): { summary: CreatePackageEvaluationSummary; storedAt: string | null } | null {
|
|
262
|
+
const row = db
|
|
263
|
+
.query(
|
|
264
|
+
`SELECT summary_json, stored_at
|
|
265
|
+
FROM package_evaluation_reports
|
|
266
|
+
WHERE skill_name = ?`,
|
|
267
|
+
)
|
|
268
|
+
.get(skillName) as { summary_json: string; stored_at: string } | null;
|
|
269
|
+
if (!row?.summary_json) return null;
|
|
270
|
+
|
|
271
|
+
const parsed = parseJsonObject(row.summary_json);
|
|
272
|
+
if (
|
|
273
|
+
!parsed ||
|
|
274
|
+
typeof parsed["skill_name"] !== "string" ||
|
|
275
|
+
typeof parsed["status"] !== "string" ||
|
|
276
|
+
typeof parsed["evaluation_passed"] !== "boolean"
|
|
277
|
+
) {
|
|
278
|
+
return null;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
summary: parsed as unknown as CreatePackageEvaluationSummary,
|
|
283
|
+
storedAt: row.stored_at ?? null,
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
|
|
208
287
|
function listStoredSkillNames(db: Database, tableName: string): Set<string> {
|
|
209
288
|
const rows = db.query(`SELECT skill_name FROM ${tableName}`).all() as Array<{
|
|
210
289
|
skill_name: string;
|
|
@@ -254,6 +333,75 @@ export function writeUnitTestRunResult(skillName: string, suite: UnitTestSuiteRe
|
|
|
254
333
|
return path;
|
|
255
334
|
}
|
|
256
335
|
|
|
336
|
+
export function writeCanonicalPackageEvaluation(
|
|
337
|
+
skillName: string,
|
|
338
|
+
summary: CreatePackageEvaluationSummary,
|
|
339
|
+
): string {
|
|
340
|
+
const path = getCanonicalPackageEvaluationPath(skillName);
|
|
341
|
+
const db = getOptionalDb();
|
|
342
|
+
if (db) {
|
|
343
|
+
upsertPackageEvaluationReport(db, skillName, summary);
|
|
344
|
+
}
|
|
345
|
+
mkdirSync(getPackageEvaluationDir(), { recursive: true });
|
|
346
|
+
writeFileSync(path, JSON.stringify(summary, null, 2), "utf-8");
|
|
347
|
+
return path;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
export function writeCanonicalPackageEvaluationArtifact(
|
|
351
|
+
skillName: string,
|
|
352
|
+
result: CreatePackageEvaluationResult,
|
|
353
|
+
): string {
|
|
354
|
+
const path = getCanonicalPackageEvaluationArtifactPath(skillName);
|
|
355
|
+
mkdirSync(getPackageEvaluationDir(), { recursive: true });
|
|
356
|
+
writeFileSync(path, JSON.stringify(result, null, 2), "utf-8");
|
|
357
|
+
return path;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
export function readCanonicalUnitTestRunResult(
|
|
361
|
+
skillName: string,
|
|
362
|
+
db: Database | null = getOptionalDb(),
|
|
363
|
+
): UnitTestSuiteResult | null {
|
|
364
|
+
const storedResult = db ? readUnitTestRunResultFromDb(db, skillName) : null;
|
|
365
|
+
if (storedResult) return storedResult;
|
|
366
|
+
return readUnitTestResult(getUnitTestResultPath(skillName));
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
export function readCanonicalPackageEvaluationArtifact(
|
|
370
|
+
skillName: string,
|
|
371
|
+
): CreatePackageEvaluationResult | null {
|
|
372
|
+
try {
|
|
373
|
+
const path = getCanonicalPackageEvaluationArtifactPath(skillName);
|
|
374
|
+
if (!existsSync(path)) return null;
|
|
375
|
+
const parsed = JSON.parse(
|
|
376
|
+
readFileSync(path, "utf-8"),
|
|
377
|
+
) as Partial<CreatePackageEvaluationResult>;
|
|
378
|
+
if (
|
|
379
|
+
typeof parsed !== "object" ||
|
|
380
|
+
parsed == null ||
|
|
381
|
+
typeof parsed.summary !== "object" ||
|
|
382
|
+
parsed.summary == null ||
|
|
383
|
+
typeof parsed.replay !== "object" ||
|
|
384
|
+
parsed.replay == null ||
|
|
385
|
+
typeof parsed.baseline !== "object" ||
|
|
386
|
+
parsed.baseline == null
|
|
387
|
+
) {
|
|
388
|
+
return null;
|
|
389
|
+
}
|
|
390
|
+
if (
|
|
391
|
+
typeof parsed.summary.skill_name !== "string" ||
|
|
392
|
+
typeof parsed.summary.status !== "string" ||
|
|
393
|
+
typeof parsed.summary.evaluation_passed !== "boolean" ||
|
|
394
|
+
typeof parsed.replay.skill !== "string" ||
|
|
395
|
+
typeof parsed.baseline.skill_name !== "string"
|
|
396
|
+
) {
|
|
397
|
+
return null;
|
|
398
|
+
}
|
|
399
|
+
return parsed as CreatePackageEvaluationResult;
|
|
400
|
+
} catch {
|
|
401
|
+
return null;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
257
405
|
function readJsonArrayFile(path: string): unknown[] {
|
|
258
406
|
try {
|
|
259
407
|
if (!existsSync(path)) return [];
|
|
@@ -319,7 +467,7 @@ function deriveEvalReadiness(
|
|
|
319
467
|
skillPath: string | null,
|
|
320
468
|
trustedTriggerCount: number,
|
|
321
469
|
): SkillEvalReadiness {
|
|
322
|
-
if (trustedTriggerCount
|
|
470
|
+
if (trustedTriggerCount >= MIN_LOG_READY_POSITIVES) return "log_ready";
|
|
323
471
|
if (skillPath) return "cold_start_ready";
|
|
324
472
|
return "telemetry_only";
|
|
325
473
|
}
|
|
@@ -328,12 +476,18 @@ function formatSkillPathArg(skillPath: string | null, skillName: string): string
|
|
|
328
476
|
return skillPath ?? `/path/to/skills/${skillName}/SKILL.md`;
|
|
329
477
|
}
|
|
330
478
|
|
|
479
|
+
function isDraftSkillPath(skillPath: string | null): boolean {
|
|
480
|
+
if (!skillPath) return false;
|
|
481
|
+
return existsSync(join(dirname(skillPath), "selftune.create.json"));
|
|
482
|
+
}
|
|
483
|
+
|
|
331
484
|
function recommendCommand(
|
|
332
485
|
skillName: string,
|
|
333
486
|
skillPath: string | null,
|
|
334
487
|
nextStep: CreatorLoopNextStep,
|
|
335
488
|
): string {
|
|
336
489
|
const pathArg = formatSkillPathArg(skillPath, skillName);
|
|
490
|
+
const draftPackage = isDraftSkillPath(skillPath);
|
|
337
491
|
switch (nextStep) {
|
|
338
492
|
case "generate_evals":
|
|
339
493
|
return skillPath
|
|
@@ -342,25 +496,41 @@ function recommendCommand(
|
|
|
342
496
|
case "run_unit_tests":
|
|
343
497
|
return `selftune eval unit-test --skill ${skillName} --generate --skill-path ${pathArg}`;
|
|
344
498
|
case "run_replay_dry_run":
|
|
345
|
-
return
|
|
499
|
+
return draftPackage
|
|
500
|
+
? `selftune create replay --skill-path ${pathArg} --mode package`
|
|
501
|
+
: `selftune evolve --skill ${skillName} --skill-path ${pathArg} --dry-run --validation-mode replay`;
|
|
346
502
|
case "measure_baseline":
|
|
347
|
-
return
|
|
503
|
+
return draftPackage
|
|
504
|
+
? `selftune create baseline --skill-path ${pathArg} --mode package`
|
|
505
|
+
: `selftune grade baseline --skill ${skillName} --skill-path ${pathArg}`;
|
|
348
506
|
case "deploy_candidate":
|
|
349
|
-
return
|
|
507
|
+
return draftPackage
|
|
508
|
+
? `selftune create publish --skill-path ${pathArg}`
|
|
509
|
+
: `selftune evolve --skill ${skillName} --skill-path ${pathArg} --with-baseline`;
|
|
350
510
|
case "watch_deployment":
|
|
351
|
-
return
|
|
511
|
+
return draftPackage
|
|
512
|
+
? `selftune watch --skill ${skillName} --skill-path ${pathArg}`
|
|
513
|
+
: `selftune watch --skill ${skillName}`;
|
|
352
514
|
}
|
|
353
515
|
}
|
|
354
516
|
|
|
355
517
|
function summarizeReadiness(
|
|
356
518
|
nextStep: CreatorLoopNextStep,
|
|
519
|
+
draftPackage: boolean,
|
|
357
520
|
evalReadiness: SkillEvalReadiness,
|
|
358
521
|
evalSetEntries: number,
|
|
359
522
|
unitTestCases: number,
|
|
360
523
|
replayCheckCount: number,
|
|
361
524
|
baselineSampleSize: number,
|
|
362
525
|
unitTestPassRate: number | null,
|
|
526
|
+
packageEvaluationStatus: CreatePackageEvaluationStatus | null,
|
|
527
|
+
latestPackageEvaluationAt: string | null,
|
|
363
528
|
): string {
|
|
529
|
+
const latestPackageEvaluationText =
|
|
530
|
+
latestPackageEvaluationAt && packageEvaluationStatus
|
|
531
|
+
? ` Latest measured package evaluation: ${packageEvaluationStatus} at ${latestPackageEvaluationAt}.`
|
|
532
|
+
: "";
|
|
533
|
+
|
|
364
534
|
switch (nextStep) {
|
|
365
535
|
case "generate_evals":
|
|
366
536
|
if (evalReadiness === "log_ready") {
|
|
@@ -371,20 +541,36 @@ function summarizeReadiness(
|
|
|
371
541
|
}
|
|
372
542
|
return "Telemetry exists, but selftune cannot resolve a local SKILL.md yet. Point it at the skill and generate evals.";
|
|
373
543
|
case "run_unit_tests":
|
|
374
|
-
return
|
|
544
|
+
return unitTestCases > 0 && unitTestPassRate != null && unitTestPassRate < 1
|
|
545
|
+
? `Deterministic unit tests exist (${unitTestCases} cases), but the latest run only passed ${Math.round(unitTestPassRate * 100)}%. Fix the failing tests and rerun them before moving on.`
|
|
546
|
+
: `Eval coverage is present (${evalSetEntries} entries), but no unit tests are stored yet.`;
|
|
375
547
|
case "run_replay_dry_run": {
|
|
376
548
|
const passRateText =
|
|
377
549
|
unitTestPassRate != null
|
|
378
550
|
? ` Last unit-test run passed ${Math.round(unitTestPassRate * 100)}%.`
|
|
379
551
|
: "";
|
|
380
|
-
|
|
552
|
+
if (draftPackage && packageEvaluationStatus === "replay_failed") {
|
|
553
|
+
return `A measured package evaluation already failed replay, so the draft is not publishable yet. Re-run package replay before publishing.${latestPackageEvaluationText}`;
|
|
554
|
+
}
|
|
555
|
+
return draftPackage
|
|
556
|
+
? `Unit tests are present (${unitTestCases} cases), but package replay validation has not been recorded yet.${passRateText}`
|
|
557
|
+
: `Unit tests are present (${unitTestCases} cases), but replay-backed dry-run validation has not been recorded yet.${passRateText}`;
|
|
381
558
|
}
|
|
382
559
|
case "measure_baseline":
|
|
383
|
-
|
|
560
|
+
if (draftPackage && packageEvaluationStatus === "baseline_failed") {
|
|
561
|
+
return `A measured package evaluation already failed the package baseline gate, so the draft is not publishable yet. Re-run the package baseline after improving the draft.${latestPackageEvaluationText}`;
|
|
562
|
+
}
|
|
563
|
+
return draftPackage
|
|
564
|
+
? `Package replay validation exists (${replayCheckCount} recorded checks), but no measured package baseline exists yet.`
|
|
565
|
+
: `Replay-backed validation exists (${replayCheckCount} recorded checks), but no stored no-skill baseline exists yet.`;
|
|
384
566
|
case "deploy_candidate":
|
|
385
|
-
return
|
|
567
|
+
return draftPackage
|
|
568
|
+
? `Evals, unit tests, package replay, and a package baseline are all present. Ready to run create publish and hand the draft into watch.${baselineSampleSize > 0 ? ` Latest baseline used ${baselineSampleSize} samples.` : ""}`
|
|
569
|
+
: `Evals, unit tests, replay validation, and a baseline are all present. Ready to run a live evolve and deploy a watched candidate.${baselineSampleSize > 0 ? ` Latest baseline used ${baselineSampleSize} samples.` : ""}`;
|
|
386
570
|
case "watch_deployment":
|
|
387
|
-
return
|
|
571
|
+
return draftPackage
|
|
572
|
+
? `This draft package has already been published. Keep watching live traffic and measured package lift before making another mutation.${baselineSampleSize > 0 ? ` Latest baseline used ${baselineSampleSize} samples.` : ""}`
|
|
573
|
+
: `A candidate has already been deployed for this skill. Keep watching live traffic and baseline lift before making another mutation.${baselineSampleSize > 0 ? ` Latest baseline used ${baselineSampleSize} samples.` : ""}`;
|
|
388
574
|
}
|
|
389
575
|
}
|
|
390
576
|
|
|
@@ -427,6 +613,7 @@ function summarizeDeploymentReadiness(
|
|
|
427
613
|
skillPath: string | null,
|
|
428
614
|
): { summary: string; command: string | null } {
|
|
429
615
|
const pathArg = formatSkillPathArg(skillPath, skillName);
|
|
616
|
+
const draftPackage = isDraftSkillPath(skillPath);
|
|
430
617
|
switch (deploymentReadiness) {
|
|
431
618
|
case "blocked":
|
|
432
619
|
return {
|
|
@@ -435,21 +622,30 @@ function summarizeDeploymentReadiness(
|
|
|
435
622
|
};
|
|
436
623
|
case "ready_to_deploy":
|
|
437
624
|
return {
|
|
438
|
-
summary:
|
|
439
|
-
"Tests and
|
|
440
|
-
|
|
625
|
+
summary: draftPackage
|
|
626
|
+
? "Tests and measured package checks are in place. Run create publish so selftune can re-run package replay and baseline before handing the draft into watch."
|
|
627
|
+
: "Tests and baseline are in place. Run a live evolve so selftune can validate and deploy the strongest candidate.",
|
|
628
|
+
command: draftPackage
|
|
629
|
+
? `selftune create publish --skill-path ${pathArg}`
|
|
630
|
+
: `selftune evolve --skill ${skillName} --skill-path ${pathArg} --with-baseline`,
|
|
441
631
|
};
|
|
442
632
|
case "watching":
|
|
443
633
|
return {
|
|
444
|
-
summary:
|
|
445
|
-
"
|
|
446
|
-
|
|
634
|
+
summary: draftPackage
|
|
635
|
+
? "This draft package is already published. Keep watching live trigger behavior and measured package lift before making another mutation."
|
|
636
|
+
: "A candidate is already deployed. Keep watching live trigger behavior and baseline lift before making another mutation.",
|
|
637
|
+
command: draftPackage
|
|
638
|
+
? `selftune watch --skill ${skillName} --skill-path ${pathArg}`
|
|
639
|
+
: `selftune watch --skill ${skillName}`,
|
|
447
640
|
};
|
|
448
641
|
case "rolled_back":
|
|
449
642
|
return {
|
|
450
|
-
summary:
|
|
451
|
-
"The last
|
|
452
|
-
|
|
643
|
+
summary: draftPackage
|
|
644
|
+
? "The last published draft rolled back. Review the failure evidence, rerun package replay and baseline if needed, then publish again once the package is trustworthy."
|
|
645
|
+
: "The last deployment rolled back. Review the failure evidence, rerun a replay dry-run if needed, then redeploy once the candidate is trustworthy again.",
|
|
646
|
+
command: draftPackage
|
|
647
|
+
? `selftune create replay --skill-path ${pathArg} --mode package`
|
|
648
|
+
: `selftune evolve --skill ${skillName} --skill-path ${pathArg} --dry-run --validation-mode replay`,
|
|
453
649
|
};
|
|
454
650
|
}
|
|
455
651
|
}
|
|
@@ -461,10 +657,10 @@ export function listSkillTestingReadiness(
|
|
|
461
657
|
const context = buildTestingReadinessContext(db, searchDirs);
|
|
462
658
|
|
|
463
659
|
return [...context.knownSkills]
|
|
464
|
-
.
|
|
660
|
+
.toSorted((a, b) => a.localeCompare(b))
|
|
465
661
|
.map((skillName) => buildSkillTestingReadinessRow(skillName, context))
|
|
466
662
|
.filter((row): row is SkillTestingReadiness => row != null)
|
|
467
|
-
.
|
|
663
|
+
.toSorted((a, b) => {
|
|
468
664
|
const priorityDiff = nextStepPriority(a.next_step) - nextStepPriority(b.next_step);
|
|
469
665
|
if (priorityDiff !== 0) return priorityDiff;
|
|
470
666
|
const trustedDiff = b.trusted_session_count - a.trusted_session_count;
|
|
@@ -487,7 +683,11 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
|
|
|
487
683
|
|
|
488
684
|
for (const row of trustedRows) {
|
|
489
685
|
const existing = trustedRowsBySkill.get(row.skill_name);
|
|
490
|
-
const compact = {
|
|
686
|
+
const compact = {
|
|
687
|
+
session_id: row.session_id,
|
|
688
|
+
triggered: row.triggered,
|
|
689
|
+
query_text: row.query_text,
|
|
690
|
+
};
|
|
491
691
|
if (existing) existing.push(compact);
|
|
492
692
|
else trustedRowsBySkill.set(row.skill_name, [compact]);
|
|
493
693
|
}
|
|
@@ -495,6 +695,7 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
|
|
|
495
695
|
const installedNames = findInstalledSkillNames(searchDirs);
|
|
496
696
|
const unitTestDir = getUnitTestDir();
|
|
497
697
|
const evalSetDir = getEvalSetDir();
|
|
698
|
+
const packageEvaluationDir = getPackageEvaluationDir();
|
|
498
699
|
const unitTestNames = scanSkillNamesFromDir(unitTestDir, (entry) => {
|
|
499
700
|
if (!entry.endsWith(".json") || entry.endsWith(".last-run.json")) return null;
|
|
500
701
|
return entry.slice(0, -".json".length);
|
|
@@ -507,9 +708,14 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
|
|
|
507
708
|
if (!entry.endsWith(".json")) return null;
|
|
508
709
|
return entry.slice(0, -".json".length);
|
|
509
710
|
});
|
|
711
|
+
const packageEvaluationNames = scanSkillNamesFromDir(packageEvaluationDir, (entry) => {
|
|
712
|
+
if (!entry.endsWith(".json")) return null;
|
|
713
|
+
return entry.slice(0, -".json".length);
|
|
714
|
+
});
|
|
510
715
|
const storedEvalNames = listStoredSkillNames(db, "canonical_eval_sets");
|
|
511
716
|
const storedUnitTestNames = listStoredSkillNames(db, "unit_test_files");
|
|
512
717
|
const storedUnitTestRunNames = listStoredSkillNames(db, "unit_test_run_results");
|
|
718
|
+
const storedPackageEvaluationNames = listStoredSkillNames(db, "package_evaluation_reports");
|
|
513
719
|
|
|
514
720
|
const evidenceRows = queryEvolutionEvidence(db);
|
|
515
721
|
const evalEvidenceBySkill = new Map<string, { count: number; latestAt: string | null }>();
|
|
@@ -580,6 +786,38 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
|
|
|
580
786
|
});
|
|
581
787
|
}
|
|
582
788
|
|
|
789
|
+
const packageEvaluationRows = db
|
|
790
|
+
.query(
|
|
791
|
+
`SELECT skill_name, stored_at, summary_json
|
|
792
|
+
FROM package_evaluation_reports
|
|
793
|
+
ORDER BY stored_at DESC`,
|
|
794
|
+
)
|
|
795
|
+
.all() as Array<{
|
|
796
|
+
skill_name: string;
|
|
797
|
+
stored_at: string;
|
|
798
|
+
summary_json: string;
|
|
799
|
+
}>;
|
|
800
|
+
const packageEvaluationBySkill = new Map<
|
|
801
|
+
string,
|
|
802
|
+
{ summary: CreatePackageEvaluationSummary; storedAt: string | null }
|
|
803
|
+
>();
|
|
804
|
+
for (const row of packageEvaluationRows) {
|
|
805
|
+
if (packageEvaluationBySkill.has(row.skill_name)) continue;
|
|
806
|
+
const parsed = parseJsonObject(row.summary_json);
|
|
807
|
+
if (
|
|
808
|
+
!parsed ||
|
|
809
|
+
typeof parsed["skill_name"] !== "string" ||
|
|
810
|
+
typeof parsed["status"] !== "string" ||
|
|
811
|
+
typeof parsed["evaluation_passed"] !== "boolean"
|
|
812
|
+
) {
|
|
813
|
+
continue;
|
|
814
|
+
}
|
|
815
|
+
packageEvaluationBySkill.set(row.skill_name, {
|
|
816
|
+
summary: parsed as unknown as CreatePackageEvaluationSummary,
|
|
817
|
+
storedAt: row.stored_at ?? null,
|
|
818
|
+
});
|
|
819
|
+
}
|
|
820
|
+
|
|
583
821
|
const latestEvolutionRows = db
|
|
584
822
|
.query(
|
|
585
823
|
`SELECT skill_name, action, timestamp
|
|
@@ -624,9 +862,11 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
|
|
|
624
862
|
...unitTestNames,
|
|
625
863
|
...unitTestResultNames,
|
|
626
864
|
...canonicalEvalNames,
|
|
865
|
+
...packageEvaluationNames,
|
|
627
866
|
...storedEvalNames,
|
|
628
867
|
...storedUnitTestNames,
|
|
629
868
|
...storedUnitTestRunNames,
|
|
869
|
+
...storedPackageEvaluationNames,
|
|
630
870
|
...evalEvidenceBySkill.keys(),
|
|
631
871
|
...replayBySkill.keys(),
|
|
632
872
|
...baselineBySkill.keys(),
|
|
@@ -642,6 +882,7 @@ function buildTestingReadinessContext(db: Database, searchDirs: string[]): Testi
|
|
|
642
882
|
fallbackSkillPathBySkill,
|
|
643
883
|
replayBySkill,
|
|
644
884
|
baselineBySkill,
|
|
885
|
+
packageEvaluationBySkill,
|
|
645
886
|
latestEvolutionBySkill,
|
|
646
887
|
};
|
|
647
888
|
}
|
|
@@ -651,8 +892,11 @@ function buildSkillTestingReadinessRow(
|
|
|
651
892
|
context: TestingReadinessContext,
|
|
652
893
|
): SkillTestingReadiness | null {
|
|
653
894
|
const trustRows = context.trustedRowsBySkill.get(skillName) ?? [];
|
|
654
|
-
const
|
|
655
|
-
|
|
895
|
+
const trustedPositiveRows = trustRows.filter(
|
|
896
|
+
(row) => row.triggered === 1 && extractPositiveEvalQueryText(row.query_text, skillName) != null,
|
|
897
|
+
);
|
|
898
|
+
const trustedTriggerCount = trustedPositiveRows.length;
|
|
899
|
+
const trustedSessionCount = new Set(trustedPositiveRows.map((row) => row.session_id)).size;
|
|
656
900
|
|
|
657
901
|
const installedSkillPath = findInstalledSkillPath(skillName, context.searchDirs) ?? null;
|
|
658
902
|
if (!context.knownSkills.has(skillName) && installedSkillPath == null) {
|
|
@@ -660,6 +904,7 @@ function buildSkillTestingReadinessRow(
|
|
|
660
904
|
}
|
|
661
905
|
|
|
662
906
|
const skillPath = installedSkillPath ?? context.fallbackSkillPathBySkill.get(skillName) ?? null;
|
|
907
|
+
const draftPackage = isDraftSkillPath(skillPath);
|
|
663
908
|
const evalReadiness = deriveEvalReadiness(skillPath, trustedTriggerCount);
|
|
664
909
|
|
|
665
910
|
const canonicalEvalPath = getCanonicalEvalSetPath(skillName);
|
|
@@ -683,6 +928,43 @@ function buildSkillTestingReadinessRow(
|
|
|
683
928
|
const unitTestResult =
|
|
684
929
|
readUnitTestRunResultFromDb(context.db, skillName) ??
|
|
685
930
|
readUnitTestResult(getUnitTestResultPath(skillName));
|
|
931
|
+
const storedPackageEvaluation =
|
|
932
|
+
context.packageEvaluationBySkill.get(skillName) ??
|
|
933
|
+
readPackageEvaluationFromDb(context.db, skillName);
|
|
934
|
+
const filePackageEvaluation =
|
|
935
|
+
storedPackageEvaluation == null && existsSync(getCanonicalPackageEvaluationPath(skillName))
|
|
936
|
+
? (() => {
|
|
937
|
+
const parsed = parseJsonObject(
|
|
938
|
+
readFileSync(getCanonicalPackageEvaluationPath(skillName), "utf-8"),
|
|
939
|
+
);
|
|
940
|
+
if (
|
|
941
|
+
!parsed ||
|
|
942
|
+
typeof parsed["skill_name"] !== "string" ||
|
|
943
|
+
typeof parsed["status"] !== "string" ||
|
|
944
|
+
typeof parsed["evaluation_passed"] !== "boolean"
|
|
945
|
+
) {
|
|
946
|
+
return null;
|
|
947
|
+
}
|
|
948
|
+
const stat = statSync(getCanonicalPackageEvaluationPath(skillName));
|
|
949
|
+
return {
|
|
950
|
+
summary: parsed as unknown as CreatePackageEvaluationSummary,
|
|
951
|
+
storedAt: stat.mtime.toISOString?.() ?? null,
|
|
952
|
+
};
|
|
953
|
+
})()
|
|
954
|
+
: null;
|
|
955
|
+
const packageEvaluation = storedPackageEvaluation ?? filePackageEvaluation;
|
|
956
|
+
const currentPackageFingerprint =
|
|
957
|
+
draftPackage && skillPath ? computeCreatePackageFingerprint(skillPath) : null;
|
|
958
|
+
const packageEvaluationMatchesCurrentPackage =
|
|
959
|
+
packageEvaluation?.summary.package_fingerprint != null &&
|
|
960
|
+
currentPackageFingerprint != null &&
|
|
961
|
+
packageEvaluation.summary.package_fingerprint === currentPackageFingerprint;
|
|
962
|
+
const effectivePackageEvaluation = packageEvaluationMatchesCurrentPackage
|
|
963
|
+
? packageEvaluation
|
|
964
|
+
: null;
|
|
965
|
+
const packageEvaluationStatus = effectivePackageEvaluation?.summary.status ?? null;
|
|
966
|
+
const packageEvaluationPassed = effectivePackageEvaluation?.summary.evaluation_passed ?? null;
|
|
967
|
+
const latestPackageEvaluationAt = effectivePackageEvaluation?.storedAt ?? null;
|
|
686
968
|
|
|
687
969
|
const replay = context.replayBySkill.get(skillName) ?? {
|
|
688
970
|
check_count: 0,
|
|
@@ -703,10 +985,16 @@ function buildSkillTestingReadinessRow(
|
|
|
703
985
|
nextStep = "generate_evals";
|
|
704
986
|
} else if (unitTestCases === 0) {
|
|
705
987
|
nextStep = "run_unit_tests";
|
|
988
|
+
} else if (unitTestResult != null && unitTestResult.pass_rate < 1) {
|
|
989
|
+
nextStep = "run_unit_tests";
|
|
706
990
|
} else if (replay.check_count === 0) {
|
|
707
991
|
nextStep = "run_replay_dry_run";
|
|
708
992
|
} else if (baseline.sample_size === 0) {
|
|
709
993
|
nextStep = "measure_baseline";
|
|
994
|
+
} else if (draftPackage && packageEvaluationStatus === "replay_failed") {
|
|
995
|
+
nextStep = "run_replay_dry_run";
|
|
996
|
+
} else if (draftPackage && packageEvaluationStatus === "baseline_failed") {
|
|
997
|
+
nextStep = "measure_baseline";
|
|
710
998
|
} else if (latestEvolution.action === "deployed") {
|
|
711
999
|
nextStep = "watch_deployment";
|
|
712
1000
|
} else {
|
|
@@ -718,12 +1006,15 @@ function buildSkillTestingReadinessRow(
|
|
|
718
1006
|
const recommended_command = recommendCommand(skillName, skillPath, nextStep);
|
|
719
1007
|
const summary = summarizeReadiness(
|
|
720
1008
|
nextStep,
|
|
1009
|
+
draftPackage,
|
|
721
1010
|
evalReadiness,
|
|
722
1011
|
evalSetEntries,
|
|
723
1012
|
unitTestCases,
|
|
724
1013
|
replay.check_count,
|
|
725
1014
|
baseline.sample_size,
|
|
726
1015
|
unitTestResult?.pass_rate ?? null,
|
|
1016
|
+
packageEvaluationStatus,
|
|
1017
|
+
latestPackageEvaluationAt,
|
|
727
1018
|
);
|
|
728
1019
|
|
|
729
1020
|
return {
|
|
@@ -750,6 +1041,9 @@ function buildSkillTestingReadinessRow(
|
|
|
750
1041
|
baseline_sample_size: baseline.sample_size,
|
|
751
1042
|
baseline_pass_rate: baseline.pass_rate,
|
|
752
1043
|
latest_baseline_at: baseline.measured_at,
|
|
1044
|
+
package_evaluation_status: packageEvaluationStatus,
|
|
1045
|
+
package_evaluation_passed: packageEvaluationPassed,
|
|
1046
|
+
latest_package_evaluation_at: latestPackageEvaluationAt,
|
|
753
1047
|
deployment_readiness: deploymentReadiness,
|
|
754
1048
|
deployment_summary: deployment.summary,
|
|
755
1049
|
deployment_command: deployment.command,
|
|
@@ -758,10 +1052,102 @@ function buildSkillTestingReadinessRow(
|
|
|
758
1052
|
} satisfies SkillTestingReadiness;
|
|
759
1053
|
}
|
|
760
1054
|
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
1055
|
+
function mapCreatorLoopNextStep(step: CreatorLoopNextStep): CreatorOverviewStep {
|
|
1056
|
+
switch (step) {
|
|
1057
|
+
case "generate_evals":
|
|
1058
|
+
return "generate_evals";
|
|
1059
|
+
case "run_unit_tests":
|
|
1060
|
+
return "run_unit_tests";
|
|
1061
|
+
case "run_replay_dry_run":
|
|
1062
|
+
return "run_replay_dry_run";
|
|
1063
|
+
case "measure_baseline":
|
|
1064
|
+
return "measure_baseline";
|
|
1065
|
+
case "deploy_candidate":
|
|
1066
|
+
return "deploy_candidate";
|
|
1067
|
+
case "watch_deployment":
|
|
1068
|
+
return "watch_deployment";
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
function mapCreateStateToCreatorStep(
|
|
1073
|
+
createReadiness: NonNullable<SkillSummary["create_readiness"]>,
|
|
1074
|
+
testingReadiness: SkillTestingReadiness | undefined,
|
|
1075
|
+
): CreatorOverviewStep {
|
|
1076
|
+
if (
|
|
1077
|
+
testingReadiness?.next_step === "watch_deployment" ||
|
|
1078
|
+
testingReadiness?.latest_evolution_action === "deployed"
|
|
1079
|
+
) {
|
|
1080
|
+
return "watch_deployment";
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
switch (createReadiness.state) {
|
|
1084
|
+
case "blocked_spec_validation":
|
|
1085
|
+
case "needs_spec_validation":
|
|
1086
|
+
return "run_create_check";
|
|
1087
|
+
case "needs_package_resources":
|
|
1088
|
+
return "finish_package";
|
|
1089
|
+
case "needs_evals":
|
|
1090
|
+
return "generate_evals";
|
|
1091
|
+
case "needs_unit_tests":
|
|
1092
|
+
return "run_unit_tests";
|
|
1093
|
+
case "needs_routing_replay":
|
|
1094
|
+
return "run_replay_dry_run";
|
|
1095
|
+
case "needs_baseline":
|
|
1096
|
+
return "measure_baseline";
|
|
1097
|
+
case "ready_to_publish":
|
|
1098
|
+
return "deploy_candidate";
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
function deriveCreatorPriority(
|
|
1103
|
+
skill: Pick<SkillSummary, "skill_name" | "testing_readiness" | "create_readiness">,
|
|
1104
|
+
): CreatorTestingOverview["priorities"][number] | null {
|
|
1105
|
+
if (skill.create_readiness) {
|
|
1106
|
+
const step = mapCreateStateToCreatorStep(skill.create_readiness, skill.testing_readiness);
|
|
1107
|
+
if (step === "watch_deployment" && skill.testing_readiness) {
|
|
1108
|
+
return {
|
|
1109
|
+
skill_name: skill.skill_name,
|
|
1110
|
+
step,
|
|
1111
|
+
summary: skill.testing_readiness.summary,
|
|
1112
|
+
recommended_command: skill.testing_readiness.recommended_command,
|
|
1113
|
+
};
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
return {
|
|
1117
|
+
skill_name: skill.skill_name,
|
|
1118
|
+
step,
|
|
1119
|
+
summary: skill.create_readiness.summary,
|
|
1120
|
+
recommended_command:
|
|
1121
|
+
skill.create_readiness.next_command ??
|
|
1122
|
+
skill.testing_readiness?.recommended_command ??
|
|
1123
|
+
`selftune create check --skill-path ${skill.create_readiness.skill_path}`,
|
|
1124
|
+
};
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
if (!skill.testing_readiness) return null;
|
|
1128
|
+
return {
|
|
1129
|
+
skill_name: skill.skill_name,
|
|
1130
|
+
step: mapCreatorLoopNextStep(skill.testing_readiness.next_step),
|
|
1131
|
+
summary: skill.testing_readiness.summary,
|
|
1132
|
+
recommended_command: skill.testing_readiness.recommended_command,
|
|
1133
|
+
};
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
const CREATOR_OVERVIEW_STEP_ORDER: Record<CreatorOverviewStep, number> = {
|
|
1137
|
+
run_create_check: 0,
|
|
1138
|
+
finish_package: 1,
|
|
1139
|
+
generate_evals: 2,
|
|
1140
|
+
run_unit_tests: 3,
|
|
1141
|
+
run_replay_dry_run: 4,
|
|
1142
|
+
measure_baseline: 5,
|
|
1143
|
+
deploy_candidate: 6,
|
|
1144
|
+
watch_deployment: 7,
|
|
1145
|
+
};
|
|
1146
|
+
|
|
1147
|
+
export function buildCreatorTestingOverview(skills: SkillSummary[]): CreatorTestingOverview {
|
|
764
1148
|
const counts = {
|
|
1149
|
+
run_create_check: 0,
|
|
1150
|
+
finish_package: 0,
|
|
765
1151
|
generate_evals: 0,
|
|
766
1152
|
run_unit_tests: 0,
|
|
767
1153
|
run_replay_dry_run: 0,
|
|
@@ -770,21 +1156,26 @@ export function buildCreatorTestingOverview(
|
|
|
770
1156
|
watch_deployment: 0,
|
|
771
1157
|
} satisfies CreatorTestingOverview["counts"];
|
|
772
1158
|
|
|
773
|
-
|
|
774
|
-
|
|
1159
|
+
const priorities = skills
|
|
1160
|
+
.map((skill) => deriveCreatorPriority(skill))
|
|
1161
|
+
.filter(
|
|
1162
|
+
(priority): priority is CreatorTestingOverview["priorities"][number] => priority != null,
|
|
1163
|
+
);
|
|
1164
|
+
|
|
1165
|
+
for (const priority of priorities) {
|
|
1166
|
+
counts[priority.step]++;
|
|
775
1167
|
}
|
|
776
1168
|
|
|
777
|
-
const
|
|
778
|
-
.filter((
|
|
779
|
-
.
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
}));
|
|
1169
|
+
const visiblePriorities = priorities
|
|
1170
|
+
.filter((priority) => priority.step !== "watch_deployment")
|
|
1171
|
+
.toSorted(
|
|
1172
|
+
(a, b) =>
|
|
1173
|
+
CREATOR_OVERVIEW_STEP_ORDER[a.step] - CREATOR_OVERVIEW_STEP_ORDER[b.step] ||
|
|
1174
|
+
a.skill_name.localeCompare(b.skill_name),
|
|
1175
|
+
)
|
|
1176
|
+
.slice(0, 5);
|
|
786
1177
|
|
|
787
|
-
const summary = `${counts.deploy_candidate} ready to deploy, ${counts.watch_deployment} already shipped and under watch, ${counts.generate_evals} still need evals, ${counts.run_unit_tests} need unit tests, ${counts.run_replay_dry_run} need replay dry-runs, ${counts.measure_baseline} need baselines.`;
|
|
1178
|
+
const summary = `${counts.deploy_candidate} ready to deploy, ${counts.watch_deployment} already shipped and under watch, ${counts.run_create_check} need create check, ${counts.finish_package} need package work, ${counts.generate_evals} still need evals, ${counts.run_unit_tests} need unit tests, ${counts.run_replay_dry_run} need replay dry-runs, ${counts.measure_baseline} need baselines.`;
|
|
788
1179
|
|
|
789
|
-
return { summary, counts, priorities };
|
|
1180
|
+
return { summary, counts, priorities: visiblePriorities };
|
|
790
1181
|
}
|