selftune 0.2.31 → 0.2.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +83 -56
  2. package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
  3. package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
  5. package/apps/local-dashboard/dist/index.html +3 -3
  6. package/cli/selftune/command-surface.ts +613 -2
  7. package/cli/selftune/create/baseline.ts +429 -0
  8. package/cli/selftune/create/check.ts +35 -0
  9. package/cli/selftune/create/init.ts +115 -0
  10. package/cli/selftune/create/package-candidate-state.ts +771 -0
  11. package/cli/selftune/create/package-evaluator.ts +710 -0
  12. package/cli/selftune/create/package-fingerprint.ts +142 -0
  13. package/cli/selftune/create/package-search.ts +377 -0
  14. package/cli/selftune/create/publish.ts +431 -0
  15. package/cli/selftune/create/readiness.ts +495 -0
  16. package/cli/selftune/create/replay.ts +330 -0
  17. package/cli/selftune/create/report.ts +74 -0
  18. package/cli/selftune/create/scaffold.ts +121 -0
  19. package/cli/selftune/create/skills-ref-adapter.ts +177 -0
  20. package/cli/selftune/create/status.ts +33 -0
  21. package/cli/selftune/create/templates.ts +249 -0
  22. package/cli/selftune/cron/setup.ts +1 -1
  23. package/cli/selftune/dashboard-action-events.ts +4 -1
  24. package/cli/selftune/dashboard-action-result.ts +789 -24
  25. package/cli/selftune/dashboard-action-stream.ts +80 -0
  26. package/cli/selftune/dashboard-contract.ts +146 -3
  27. package/cli/selftune/dashboard-server.ts +5 -4
  28. package/cli/selftune/eval/hooks-to-evals.ts +58 -35
  29. package/cli/selftune/eval/synthetic-evals.ts +145 -17
  30. package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
  31. package/cli/selftune/evolution/evolve-body.ts +9 -36
  32. package/cli/selftune/evolution/evolve.ts +8 -72
  33. package/cli/selftune/evolution/stopping-criteria.ts +5 -13
  34. package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
  35. package/cli/selftune/evolution/validate-host-replay.ts +115 -15
  36. package/cli/selftune/improve.ts +206 -0
  37. package/cli/selftune/index.ts +123 -6
  38. package/cli/selftune/init.ts +1 -1
  39. package/cli/selftune/localdb/queries/dashboard.ts +30 -0
  40. package/cli/selftune/localdb/schema.ts +52 -0
  41. package/cli/selftune/monitoring/watch.ts +257 -23
  42. package/cli/selftune/orchestrate/execute.ts +300 -1
  43. package/cli/selftune/orchestrate/finalize.ts +14 -0
  44. package/cli/selftune/orchestrate/plan.ts +22 -5
  45. package/cli/selftune/orchestrate/prepare.ts +59 -4
  46. package/cli/selftune/orchestrate/report.ts +1 -1
  47. package/cli/selftune/orchestrate.ts +34 -1
  48. package/cli/selftune/publish.ts +35 -0
  49. package/cli/selftune/routes/actions.ts +81 -15
  50. package/cli/selftune/routes/overview.ts +1 -1
  51. package/cli/selftune/routes/skill-report.ts +147 -2
  52. package/cli/selftune/run.ts +18 -0
  53. package/cli/selftune/schedule.ts +3 -3
  54. package/cli/selftune/search-run.ts +703 -0
  55. package/cli/selftune/status.ts +35 -11
  56. package/cli/selftune/testing-readiness.ts +431 -40
  57. package/cli/selftune/types.ts +316 -0
  58. package/cli/selftune/utils/eval-readiness.ts +1 -0
  59. package/cli/selftune/utils/json-output.ts +11 -0
  60. package/cli/selftune/utils/lifecycle-surface.ts +48 -0
  61. package/cli/selftune/utils/query-filter.ts +82 -1
  62. package/cli/selftune/utils/tui.ts +85 -2
  63. package/cli/selftune/verify.ts +205 -0
  64. package/cli/selftune/workflows/proposals.ts +1 -1
  65. package/cli/selftune/workflows/skill-scaffold.ts +141 -63
  66. package/cli/selftune/workflows/workflows.ts +4 -4
  67. package/package.json +1 -1
  68. package/skill/SKILL.md +148 -85
  69. package/skill/references/cli-quick-reference.md +16 -1
  70. package/skill/references/creator-playbook.md +31 -10
  71. package/skill/workflows/Baseline.md +8 -9
  72. package/skill/workflows/Contributions.md +4 -4
  73. package/skill/workflows/Create.md +173 -0
  74. package/skill/workflows/CreateTestDeploy.md +34 -30
  75. package/skill/workflows/Cron.md +2 -2
  76. package/skill/workflows/Dashboard.md +3 -3
  77. package/skill/workflows/Evals.md +13 -7
  78. package/skill/workflows/Evolve.md +75 -32
  79. package/skill/workflows/EvolveBody.md +22 -15
  80. package/skill/workflows/Hook.md +1 -1
  81. package/skill/workflows/Improve.md +168 -0
  82. package/skill/workflows/Initialize.md +3 -3
  83. package/skill/workflows/Orchestrate.md +49 -12
  84. package/skill/workflows/Publish.md +100 -0
  85. package/skill/workflows/Run.md +72 -0
  86. package/skill/workflows/Schedule.md +2 -2
  87. package/skill/workflows/SearchRun.md +89 -0
  88. package/skill/workflows/SignalsDashboard.md +2 -2
  89. package/skill/workflows/UnitTest.md +13 -4
  90. package/skill/workflows/Verify.md +136 -0
  91. package/skill/workflows/Watch.md +114 -47
  92. package/skill/workflows/Workflows.md +13 -8
  93. package/apps/local-dashboard/dist/assets/index-B7v_o1WC.js +0 -15
  94. package/apps/local-dashboard/dist/assets/index-CrO77SVi.css +0 -1
  95. package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1
@@ -16,6 +16,8 @@ interface OrchestrateFinalTotals {
16
16
  watched: number;
17
17
  skipped: number;
18
18
  autoGraded: number;
19
+ packageSearched: number;
20
+ packageImproved: number;
19
21
  freshlyWatchedSkills: string[];
20
22
  }
21
23
 
@@ -27,6 +29,8 @@ export interface FinalizeOrchestrateRunInput {
27
29
  dryRun: boolean;
28
30
  approvalMode: "auto" | "review";
29
31
  autoGradedCount: number;
32
+ packageSearched: number;
33
+ packageImproved: number;
30
34
  freshlyWatchedSkills: string[];
31
35
  pendingSignals: ImprovementSignalRecord[];
32
36
  elapsedMs: number;
@@ -36,6 +40,8 @@ function buildFinalTotals(
36
40
  skills: SkillStatus[],
37
41
  candidates: SkillAction[],
38
42
  autoGradedCount: number,
43
+ packageSearched: number,
44
+ packageImproved: number,
39
45
  freshlyWatchedSkills: string[],
40
46
  ): OrchestrateFinalTotals {
41
47
  return {
@@ -50,6 +56,8 @@ function buildFinalTotals(
50
56
  freshlyWatchedSkills.length,
51
57
  skipped: candidates.filter((candidate) => candidate.action === "skip").length,
52
58
  autoGraded: autoGradedCount,
59
+ packageSearched,
60
+ packageImproved,
53
61
  freshlyWatchedSkills,
54
62
  };
55
63
  }
@@ -63,6 +71,8 @@ export function finalizeOrchestrateRun(input: FinalizeOrchestrateRunInput): Orch
63
71
  dryRun,
64
72
  approvalMode,
65
73
  autoGradedCount,
74
+ packageSearched,
75
+ packageImproved,
66
76
  freshlyWatchedSkills,
67
77
  pendingSignals,
68
78
  elapsedMs,
@@ -72,6 +82,8 @@ export function finalizeOrchestrateRun(input: FinalizeOrchestrateRunInput): Orch
72
82
  statusResult.skills,
73
83
  candidates,
74
84
  autoGradedCount,
85
+ packageSearched,
86
+ packageImproved,
75
87
  freshlyWatchedSkills,
76
88
  );
77
89
 
@@ -106,6 +118,8 @@ export function finalizeOrchestrateRun(input: FinalizeOrchestrateRunInput): Orch
106
118
  watched: finalTotals.watched,
107
119
  skipped: finalTotals.skipped,
108
120
  auto_graded: finalTotals.autoGraded,
121
+ package_searched: finalTotals.packageSearched,
122
+ package_improved: finalTotals.packageImproved,
109
123
  skill_actions: candidates.map(
110
124
  (candidate): OrchestrateRunSkillAction => ({
111
125
  skill: candidate.skill,
@@ -2,6 +2,22 @@ import type { CandidateContext, SkillAction } from "../orchestrate.js";
2
2
  import type { SkillStatus } from "../status.js";
3
3
  import type { EvolutionAuditEntry } from "../types.js";
4
4
 
5
+ /**
6
+ * Determines whether a skill should use package search instead of standard
7
+ * evolution. Returns true when the skill has package-level evidence:
8
+ * a frontier candidate exists or a canonical evaluation record is present.
9
+ *
10
+ * This gates the package-search path so only skills with sufficient
11
+ * package-level signal enter the bounded search flow.
12
+ */
13
+ export function shouldSelectPackageSearch(skill: SkillStatus, context: CandidateContext): boolean {
14
+ // Package search requires that the candidate context carries a
15
+ // packageFrontierSkills set (populated from package-candidate-state).
16
+ // When present and the skill is listed, we route through package search.
17
+ if (!context.packageFrontierSkills) return false;
18
+ return context.packageFrontierSkills.has(skill.name);
19
+ }
20
+
5
21
  /** Candidate selection criteria. */
6
22
  const CANDIDATE_STATUSES = new Set(["CRITICAL", "WARNING", "UNGRADED"]);
7
23
 
@@ -109,18 +125,19 @@ export function selectCandidates(skills: SkillStatus[], options: CandidateContex
109
125
  continue;
110
126
  }
111
127
 
128
+ const action = shouldSelectPackageSearch(skill, options) ? "package-search" : "evolve";
112
129
  actions.push({
113
130
  skill: skill.name,
114
- action: "evolve",
131
+ action,
115
132
  reason: `status=${skill.status}, passRate=${skill.passRate !== null ? `${(skill.passRate * 100).toFixed(0)}%` : "—"}, missed=${skill.missedQueries}, trend=${skill.trend}`,
116
133
  });
117
134
  }
118
135
 
119
- let evolveCount = 0;
136
+ let activeCount = 0;
120
137
  for (const action of actions) {
121
- if (action.action === "evolve") {
122
- evolveCount++;
123
- if (evolveCount > options.maxSkills) {
138
+ if (action.action === "evolve" || action.action === "package-search") {
139
+ activeCount++;
140
+ if (activeCount > options.maxSkills) {
124
141
  action.action = "skip";
125
142
  action.reason = `capped by --max-skills ${options.maxSkills}`;
126
143
  }
@@ -1,5 +1,5 @@
1
- import { mkdirSync, writeFileSync } from "node:fs";
2
- import { dirname } from "node:path";
1
+ import { existsSync, mkdirSync, writeFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
3
 
4
4
  import {
5
5
  buildDefaultGradingOutputPath,
@@ -7,8 +7,11 @@ import {
7
7
  gradeSession,
8
8
  resolveLatestSessionForSkill,
9
9
  } from "../grading/grade-session.js";
10
+ import { selectAcceptedPackageFrontierCandidate } from "../create/package-candidate-state.js";
10
11
  import { writeGradingResultToDb } from "../localdb/direct-write.js";
11
12
  import { createDefaultSyncOptions } from "../sync.js";
13
+ import { getDb } from "../localdb/db.js";
14
+ import { readCanonicalPackageEvaluationArtifact } from "../testing-readiness.js";
12
15
  import type {
13
16
  ImprovementSignalRecord,
14
17
  QueryLogRecord,
@@ -17,7 +20,7 @@ import type {
17
20
  } from "../types.js";
18
21
  import { readExcerpt } from "../utils/transcript.js";
19
22
  import type { OrchestrateOptions, SkillAction } from "../orchestrate.js";
20
- import { selectCandidates } from "./plan.js";
23
+ import { MIN_CANDIDATE_EVIDENCE, selectCandidates } from "./plan.js";
21
24
  import { groupSignalsBySkill, readPendingSignals } from "./signals.js";
22
25
  import type { ResolvedOrchestrateRuntime } from "./runtime.js";
23
26
 
@@ -33,6 +36,50 @@ export interface PreparedOrchestrateRun {
33
36
  autoGradedCount: number;
34
37
  }
35
38
 
39
+ export function collectPackageSearchEligibleSkills(
40
+ skillNames: string[],
41
+ options?: {
42
+ db?: import("bun:sqlite").Database;
43
+ resolveSkillPath?: (skillName: string) => string | undefined;
44
+ },
45
+ ): Set<string> {
46
+ const eligible = new Set<string>();
47
+
48
+ for (const skillName of skillNames) {
49
+ if (
50
+ selectAcceptedPackageFrontierCandidate(skillName) != null ||
51
+ readCanonicalPackageEvaluationArtifact(skillName) != null
52
+ ) {
53
+ eligible.add(skillName);
54
+ continue;
55
+ }
56
+
57
+ // Second tier: skills with a draft package and sufficient grading evidence
58
+ if (!options?.db || !options?.resolveSkillPath) continue;
59
+
60
+ const skillPath = options.resolveSkillPath(skillName);
61
+ if (!skillPath) continue;
62
+
63
+ const hasDraft = existsSync(join(dirname(skillPath), "selftune.create.json"));
64
+ if (!hasDraft) continue;
65
+
66
+ try {
67
+ const row = options.db
68
+ .query<{ count: number }, [string]>(
69
+ "SELECT COUNT(*) as count FROM grading_results WHERE skill_name = ?",
70
+ )
71
+ .get(skillName);
72
+ if (row && row.count >= MIN_CANDIDATE_EVIDENCE) {
73
+ eligible.add(skillName);
74
+ }
75
+ } catch {
76
+ // Fail-open: table may not exist yet
77
+ }
78
+ }
79
+
80
+ return eligible;
81
+ }
82
+
36
83
  /**
37
84
  * Detects significant overlap between the positive eval sets of evolution
38
85
  * candidates. When two skills share >30% of their positive queries, it
@@ -270,6 +317,10 @@ export async function prepareOrchestrateRun(
270
317
 
271
318
  const pendingSignals = readPendingSignals(runtime.readSignals);
272
319
  const signaledSkills = groupSignalsBySkill(pendingSignals);
320
+ const packageFrontierSkills = collectPackageSearchEligibleSkills(
321
+ statusResult.skills.map((skill) => skill.name),
322
+ { db: getDb(), resolveSkillPath: runtime.resolveSkillPath },
323
+ );
273
324
  if (signaledSkills.size > 0) {
274
325
  console.error(
275
326
  `[orchestrate] Improvement signals: ${pendingSignals.length} pending for ${signaledSkills.size} skill(s)`,
@@ -281,12 +332,16 @@ export async function prepareOrchestrateRun(
281
332
  maxSkills: options.maxSkills,
282
333
  auditEntries,
283
334
  signaledSkills,
335
+ packageFrontierSkills,
284
336
  });
285
337
 
286
338
  const evolveCandidates = candidates.filter((candidate) => candidate.action === "evolve");
339
+ const packageSearchCount = candidates.filter(
340
+ (candidate) => candidate.action === "package-search",
341
+ ).length;
287
342
  const skipCount = candidates.filter((candidate) => candidate.action === "skip").length;
288
343
  console.error(
289
- `[orchestrate] Candidates: ${evolveCandidates.length} to evolve, ${skipCount} skipped`,
344
+ `[orchestrate] Candidates: ${evolveCandidates.length} to evolve, ${packageSearchCount} to package-search, ${skipCount} skipped`,
290
345
  );
291
346
  for (const candidate of candidates) {
292
347
  console.error(
@@ -123,7 +123,7 @@ export function formatOrchestrateReport(result: OrchestrateResult): string {
123
123
  const lines: string[] = [];
124
124
 
125
125
  lines.push(separator);
126
- lines.push("selftune orchestrate — decision report");
126
+ lines.push("selftune run — decision report");
127
127
  lines.push(separator);
128
128
  lines.push("");
129
129
 
@@ -24,8 +24,11 @@ import {
24
24
  autoGradeFreshDeploys,
25
25
  buildReplayValidationOptions,
26
26
  runEvolutionPhase,
27
+ runPackageSearchPhase,
27
28
  watchRecentDeploys,
28
29
  } from "./orchestrate/execute.js";
30
+ export { runPackageSearchPhase } from "./orchestrate/execute.js";
31
+ export type { RunPackageSearchPhaseInput } from "./orchestrate/execute.js";
29
32
  import { finalizeOrchestrateRun } from "./orchestrate/finalize.js";
30
33
  import { acquireLock, releaseLock } from "./orchestrate/locks.js";
31
34
  import { runPostOrchestrateSideEffects } from "./orchestrate/post-run.js";
@@ -67,6 +70,7 @@ export {
67
70
  DEFAULT_COOLDOWN_HOURS,
68
71
  MIN_CANDIDATE_EVIDENCE,
69
72
  selectCandidates,
73
+ shouldSelectPackageSearch,
70
74
  } from "./orchestrate/plan.js";
71
75
  export { autoGradeTopUngraded, detectCrossSkillOverlap } from "./orchestrate/prepare.js";
72
76
  export { formatOrchestrateReport } from "./orchestrate/report.js";
@@ -93,12 +97,20 @@ export interface OrchestrateOptions {
93
97
  maxAutoGrade: number;
94
98
  }
95
99
 
100
+ export interface PackageSearchResult {
101
+ searched: boolean;
102
+ winnerApplied: boolean;
103
+ candidateCount: number;
104
+ winnerCandidateId?: string;
105
+ }
106
+
96
107
  export interface SkillAction {
97
108
  skill: string;
98
- action: "evolve" | "watch" | "skip";
109
+ action: "evolve" | "package-search" | "watch" | "skip";
99
110
  reason: string;
100
111
  evolveResult?: EvolveResult;
101
112
  watchResult?: WatchResult;
113
+ packageSearchResult?: PackageSearchResult;
102
114
  }
103
115
 
104
116
  /** Context for candidate selection beyond simple status checks. */
@@ -110,6 +122,8 @@ export interface CandidateContext {
110
122
  cooldownHours?: number;
111
123
  /** Skill name (lowercase) to improvement signal count. */
112
124
  signaledSkills?: Map<string, number>;
125
+ /** Skills with an accepted package frontier candidate (eligible for package search). */
126
+ packageFrontierSkills?: Set<string>;
113
127
  }
114
128
 
115
129
  export interface OrchestrateResult {
@@ -127,6 +141,8 @@ export interface OrchestrateResult {
127
141
  watched: number;
128
142
  skipped: number;
129
143
  autoGraded: number;
144
+ packageSearched: number;
145
+ packageImproved: number;
130
146
  freshlyWatchedSkills: string[];
131
147
  dryRun: boolean;
132
148
  approvalMode: "auto" | "review";
@@ -240,6 +256,8 @@ export async function orchestrate(
240
256
  watched: 0,
241
257
  skipped: 0,
242
258
  autoGraded: 0,
259
+ packageSearched: 0,
260
+ packageImproved: 0,
243
261
  freshlyWatchedSkills: [],
244
262
  dryRun: options.dryRun,
245
263
  approvalMode: options.approvalMode,
@@ -288,6 +306,19 @@ export async function orchestrate(
288
306
  readSkillRecords: runtime.readSkillRecords,
289
307
  });
290
308
 
309
+ // -------------------------------------------------------------------------
310
+ // Step 5c: Package search for candidates tagged with action "package-search"
311
+ // -------------------------------------------------------------------------
312
+ const packageSearchCandidates = candidates.filter(
313
+ (candidate) => candidate.action === "package-search",
314
+ );
315
+ const packageSearchImproved = await runPackageSearchPhase({
316
+ packageSearchCandidates,
317
+ dryRun: options.dryRun,
318
+ agent,
319
+ resolveSkillPath: runtime.resolveSkillPath,
320
+ });
321
+
291
322
  // -------------------------------------------------------------------------
292
323
  // Step 6: Watch recently evolved skills (including freshly deployed in this run)
293
324
  // -------------------------------------------------------------------------
@@ -336,6 +367,8 @@ export async function orchestrate(
336
367
  dryRun: options.dryRun,
337
368
  approvalMode: options.approvalMode,
338
369
  autoGradedCount,
370
+ packageSearched: packageSearchCandidates.length,
371
+ packageImproved: packageSearchImproved.length,
339
372
  freshlyWatchedSkills,
340
373
  pendingSignals,
341
374
  elapsedMs: Date.now() - startTime,
@@ -0,0 +1,35 @@
1
+ import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "./command-surface.js";
2
+ import { cliMain as createPublishCliMain } from "./create/publish.js";
3
+ import { CLIError, handleCLIError } from "./utils/cli-error.js";
4
+
5
+ export async function cliMain(): Promise<void> {
6
+ const rawArgs = process.argv.slice(2);
7
+
8
+ if (rawArgs.includes("--help") || rawArgs.includes("-h")) {
9
+ console.log(renderCommandHelp(PUBLIC_COMMAND_SURFACES.publish));
10
+ process.exit(0);
11
+ }
12
+
13
+ const hasWatch = rawArgs.includes("--watch") || rawArgs.some((arg) => arg.startsWith("--watch="));
14
+ const hasNoWatch = rawArgs.includes("--no-watch");
15
+
16
+ if (hasWatch && hasNoWatch) {
17
+ throw new CLIError(
18
+ "Use either --watch or --no-watch, not both.",
19
+ "INVALID_FLAG",
20
+ "selftune publish --skill-path <path> [--no-watch]",
21
+ );
22
+ }
23
+
24
+ const delegatedArgs = rawArgs.filter((arg) => arg !== "--no-watch");
25
+ if (!hasWatch && !hasNoWatch) {
26
+ delegatedArgs.push("--watch");
27
+ }
28
+
29
+ process.argv = [process.argv[0], process.argv[1], ...delegatedArgs];
30
+ await createPublishCliMain();
31
+ }
32
+
33
+ if (import.meta.main) {
34
+ cliMain().catch(handleCLIError);
35
+ }
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Route handler: POST /api/actions/{watch,evolve,rollback,watchlist}
2
+ * Route handler: POST /api/actions/{create-check,report-package,search-run,watch,evolve,rollback,watchlist}
3
3
  *
4
4
  * Triggers selftune CLI commands as child processes and returns the result.
5
5
  */
@@ -13,6 +13,7 @@ import {
13
13
  } from "../dashboard-action-events.js";
14
14
  import { resolveDashboardActionOutcome } from "../dashboard-action-result.js";
15
15
  import type { DashboardActionEvent, DashboardActionName } from "../dashboard-contract.js";
16
+ import { isCreateSkillDraft } from "../create/readiness.js";
16
17
  import { getCanonicalEvalSetPath, getUnitTestPath } from "../testing-readiness.js";
17
18
  import { saveWatchedSkills } from "../watchlist.js";
18
19
 
@@ -91,7 +92,10 @@ export async function runAction(
91
92
  stdoutPromise,
92
93
  stderrPromise,
93
94
  ]);
94
- const action = command === "evolve" && args.includes("--dry-run") ? "replay-dry-run" : null;
95
+ const action =
96
+ (command === "evolve" || command === "improve") && args.includes("--dry-run")
97
+ ? "replay-dry-run"
98
+ : null;
95
99
  const outcome = action
96
100
  ? resolveDashboardActionOutcome({
97
101
  action,
@@ -136,6 +140,7 @@ function buildActionExecution(
136
140
  const skillInput = requireSkillInput(body);
137
141
  if (skillInput instanceof Response) return skillInput;
138
142
  const { skill, skillPath } = skillInput;
143
+ const isDraftPackage = isCreateSkillDraft(skillPath);
139
144
 
140
145
  if (action === "generate-evals") {
141
146
  const args = [
@@ -171,7 +176,24 @@ function buildActionExecution(
171
176
  };
172
177
  }
173
178
 
179
+ if (action === "create-check") {
180
+ return {
181
+ command: "create",
182
+ args: ["check", "--skill-path", skillPath],
183
+ skill,
184
+ skillPath,
185
+ };
186
+ }
187
+
174
188
  if (action === "replay-dry-run") {
189
+ if (isDraftPackage) {
190
+ return {
191
+ command: "create",
192
+ args: ["replay", "--skill-path", skillPath, "--mode", "package"],
193
+ skill,
194
+ skillPath,
195
+ };
196
+ }
175
197
  return {
176
198
  command: "evolve",
177
199
  args: [
@@ -190,6 +212,14 @@ function buildActionExecution(
190
212
  }
191
213
 
192
214
  if (action === "measure-baseline") {
215
+ if (isDraftPackage) {
216
+ return {
217
+ command: "create",
218
+ args: ["baseline", "--skill-path", skillPath, "--mode", "package"],
219
+ skill,
220
+ skillPath,
221
+ };
222
+ }
193
223
  return {
194
224
  command: "grade",
195
225
  args: ["baseline", "--skill", skill, "--skill-path", skillPath],
@@ -198,9 +228,35 @@ function buildActionExecution(
198
228
  };
199
229
  }
200
230
 
231
+ if (action === "report-package") {
232
+ return {
233
+ command: "create",
234
+ args: ["report", "--skill-path", skillPath],
235
+ skill,
236
+ skillPath,
237
+ };
238
+ }
239
+
240
+ if (action === "search-run") {
241
+ return {
242
+ command: "search-run",
243
+ args: ["--skill", skill, "--skill-path", skillPath],
244
+ skill,
245
+ skillPath,
246
+ };
247
+ }
248
+
201
249
  if (action === "deploy-candidate") {
250
+ if (isDraftPackage) {
251
+ return {
252
+ command: "publish",
253
+ args: ["--skill-path", skillPath, "--no-watch"],
254
+ skill,
255
+ skillPath,
256
+ };
257
+ }
202
258
  return {
203
- command: "evolve",
259
+ command: "improve",
204
260
  args: ["--skill", skill, "--skill-path", skillPath, "--sync-first"],
205
261
  skill,
206
262
  skillPath,
@@ -208,6 +264,14 @@ function buildActionExecution(
208
264
  }
209
265
 
210
266
  if (action === "watch") {
267
+ if (isDraftPackage) {
268
+ return {
269
+ command: "publish",
270
+ args: ["--skill-path", skillPath],
271
+ skill,
272
+ skillPath,
273
+ };
274
+ }
211
275
  return {
212
276
  command: "watch",
213
277
  args: ["--skill", skill, "--skill-path", skillPath, "--sync-first"],
@@ -316,6 +380,12 @@ export async function handleAction(
316
380
  });
317
381
  },
318
382
  });
383
+ const outcome = resolveDashboardActionOutcome({
384
+ action: normalizedAction as DashboardActionName,
385
+ stdout: result.output,
386
+ stderr: result.error,
387
+ exitCode: result.exitCode ?? 0,
388
+ });
319
389
 
320
390
  emitEvent?.({
321
391
  event_id: eventId,
@@ -324,19 +394,15 @@ export async function handleAction(
324
394
  skill_name: executable.skill,
325
395
  skill_path: executable.skillPath,
326
396
  ts: Date.now(),
327
- success: result.success,
397
+ success: outcome.success,
328
398
  exit_code: result.exitCode,
329
- error: result.error,
330
- summary:
331
- executable.command === "evolve" && executable.args.includes("--dry-run")
332
- ? resolveDashboardActionOutcome({
333
- action: "replay-dry-run",
334
- stdout: result.output,
335
- stderr: result.error,
336
- exitCode: result.exitCode ?? 0,
337
- }).summary
338
- : null,
399
+ error: outcome.error,
400
+ summary: outcome.summary,
339
401
  });
340
402
 
341
- return Response.json(result);
403
+ return Response.json({
404
+ ...result,
405
+ success: outcome.success,
406
+ error: outcome.error,
407
+ });
342
408
  }
@@ -42,7 +42,7 @@ export function handleOverview(
42
42
  const pendingReviews = attentionQueue.filter((a) => a.category === "needs_review").length;
43
43
 
44
44
  const trustWatchlist = buildTrustWatchlist(trustSummaries);
45
- const creatorTesting = buildCreatorTestingOverview(testingReadiness);
45
+ const creatorTesting = buildCreatorTestingOverview(skills);
46
46
  const autonomyStatus = buildAutonomyStatus(
47
47
  db,
48
48
  attentionQueue,