selftune 0.2.23 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +93 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  12. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  13. package/cli/selftune/auto-update.ts +200 -8
  14. package/cli/selftune/canonical-export.ts +55 -25
  15. package/cli/selftune/command-surface.ts +397 -0
  16. package/cli/selftune/contribute/contribute.ts +64 -13
  17. package/cli/selftune/contribution-config.ts +57 -3
  18. package/cli/selftune/contribution-preferences.ts +117 -0
  19. package/cli/selftune/contribution-signals.ts +8 -4
  20. package/cli/selftune/contribution-staging.ts +13 -2
  21. package/cli/selftune/contributions.ts +55 -121
  22. package/cli/selftune/creator-contributions.ts +29 -10
  23. package/cli/selftune/cron/setup.ts +7 -3
  24. package/cli/selftune/dashboard-contract.ts +73 -0
  25. package/cli/selftune/dashboard-server.ts +168 -17
  26. package/cli/selftune/dashboard.ts +350 -17
  27. package/cli/selftune/eval/baseline.ts +21 -5
  28. package/cli/selftune/eval/execution-eval.ts +170 -0
  29. package/cli/selftune/eval/family-overlap.ts +2 -2
  30. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  31. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  32. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  33. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  34. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  35. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  36. package/cli/selftune/evolution/engines/replay-engine.ts +79 -57
  37. package/cli/selftune/evolution/evolve-body.ts +100 -39
  38. package/cli/selftune/evolution/evolve.ts +244 -52
  39. package/cli/selftune/evolution/rollback.ts +0 -1
  40. package/cli/selftune/evolution/validate-body.ts +68 -42
  41. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  42. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  43. package/cli/selftune/evolution/validate-routing.ts +43 -41
  44. package/cli/selftune/evolution/validation-contract.ts +91 -0
  45. package/cli/selftune/grading/auto-grade.ts +11 -7
  46. package/cli/selftune/grading/grade-session.ts +10 -16
  47. package/cli/selftune/index.ts +35 -10
  48. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  49. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  50. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  51. package/cli/selftune/ingestors/pi-ingest.ts +3 -2
  52. package/cli/selftune/init.ts +27 -3
  53. package/cli/selftune/localdb/direct-write.ts +35 -1
  54. package/cli/selftune/localdb/queries/cron.ts +34 -0
  55. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  56. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  57. package/cli/selftune/localdb/queries/execution.ts +133 -0
  58. package/cli/selftune/localdb/queries/json.ts +18 -0
  59. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  60. package/cli/selftune/localdb/queries/raw.ts +95 -0
  61. package/cli/selftune/localdb/queries/staging.ts +270 -0
  62. package/cli/selftune/localdb/queries/trust.ts +392 -0
  63. package/cli/selftune/localdb/queries.ts +60 -2288
  64. package/cli/selftune/localdb/schema.ts +21 -0
  65. package/cli/selftune/monitoring/watch.ts +96 -29
  66. package/cli/selftune/normalization.ts +3 -0
  67. package/cli/selftune/observability.ts +4 -2
  68. package/cli/selftune/orchestrate/cli.ts +161 -0
  69. package/cli/selftune/orchestrate/execute.ts +295 -0
  70. package/cli/selftune/orchestrate/finalize.ts +157 -0
  71. package/cli/selftune/orchestrate/locks.ts +40 -0
  72. package/cli/selftune/orchestrate/plan.ts +131 -0
  73. package/cli/selftune/orchestrate/post-run.ts +59 -0
  74. package/cli/selftune/orchestrate/prepare.ts +334 -0
  75. package/cli/selftune/orchestrate/report.ts +182 -0
  76. package/cli/selftune/orchestrate/runtime.ts +120 -0
  77. package/cli/selftune/orchestrate/signals.ts +48 -0
  78. package/cli/selftune/orchestrate.ts +150 -1173
  79. package/cli/selftune/repair/skill-usage.ts +5 -2
  80. package/cli/selftune/routes/overview.ts +5 -2
  81. package/cli/selftune/routes/skill-report.ts +15 -2
  82. package/cli/selftune/schedule.ts +5 -5
  83. package/cli/selftune/status.ts +39 -2
  84. package/cli/selftune/testing-readiness.ts +597 -0
  85. package/cli/selftune/types.ts +44 -4
  86. package/cli/selftune/uninstall.ts +2 -1
  87. package/cli/selftune/utils/canonical-log.ts +1 -9
  88. package/cli/selftune/utils/cli-error.ts +9 -0
  89. package/cli/selftune/utils/llm-call.ts +126 -6
  90. package/cli/selftune/utils/skill-discovery.ts +2 -0
  91. package/cli/selftune/workflows/proposals.ts +184 -0
  92. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  93. package/cli/selftune/workflows/workflows.ts +100 -26
  94. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  95. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  96. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  97. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  98. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +41 -1
  99. package/node_modules/@selftune/telemetry-contract/src/types.ts +103 -2
  100. package/package.json +25 -9
  101. package/packages/dashboard-core/AGENTS.md +18 -0
  102. package/packages/dashboard-core/README.md +30 -0
  103. package/packages/dashboard-core/index.ts +3 -0
  104. package/packages/dashboard-core/package.json +39 -0
  105. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  106. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  107. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  108. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  109. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  110. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  111. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  112. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  113. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  114. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  115. package/packages/dashboard-core/src/gates/index.ts +3 -0
  116. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  117. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  118. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  119. package/packages/dashboard-core/src/host/index.ts +3 -0
  120. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  121. package/packages/dashboard-core/src/models/index.ts +4 -0
  122. package/packages/dashboard-core/src/models/overview.ts +98 -0
  123. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  124. package/packages/dashboard-core/src/models/skills.ts +34 -0
  125. package/packages/dashboard-core/src/routes/index.ts +2 -0
  126. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  127. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  128. package/packages/dashboard-core/src/routes/types.ts +39 -0
  129. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  130. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  131. package/packages/dashboard-core/src/screens/index.ts +37 -0
  132. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  133. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  134. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  135. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  136. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  137. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  138. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  139. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  140. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  141. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  142. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  143. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  144. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  145. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  146. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  147. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  148. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  149. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  150. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  151. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  152. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  153. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  154. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  155. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  156. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  157. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  158. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  159. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  160. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  161. package/packages/telemetry-contract/src/schemas.ts +41 -1
  162. package/packages/telemetry-contract/src/types.ts +103 -2
  163. package/packages/ui/src/components/EvidenceViewer.tsx +80 -25
  164. package/packages/ui/src/components/OverviewPanels.tsx +67 -26
  165. package/packages/ui/src/primitives/tabs.tsx +7 -6
  166. package/packages/ui/src/types.ts +10 -0
  167. package/skill/SKILL.md +130 -332
  168. package/skill/agents/diagnosis-analyst.md +3 -3
  169. package/skill/agents/evolution-reviewer.md +3 -3
  170. package/skill/agents/integration-guide.md +3 -3
  171. package/skill/agents/pattern-analyst.md +2 -2
  172. package/skill/references/cli-quick-reference.md +89 -0
  173. package/skill/references/creator-playbook.md +131 -0
  174. package/skill/references/examples.md +48 -0
  175. package/skill/references/troubleshooting.md +47 -0
  176. package/skill/references/version-history.md +1 -1
  177. package/skill/selftune.contribute.json +11 -0
  178. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  179. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  180. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  181. package/skill/workflows/CreateTestDeploy.md +170 -0
  182. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  183. package/skill/{Workflows → workflows}/Cron.md +1 -1
  184. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  185. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  186. package/skill/{Workflows → workflows}/Evals.md +67 -2
  187. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  188. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  189. package/skill/{Workflows → workflows}/Grade.md +1 -1
  190. package/skill/{Workflows → workflows}/Initialize.md +8 -4
  191. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  192. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  193. package/skill/workflows/SignalsDashboard.md +87 -0
  194. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  195. package/skill/{Workflows → workflows}/Watch.md +42 -2
  196. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  197. package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +0 -1
  198. package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +0 -59
  199. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  200. package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +0 -12
  201. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  202. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  203. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  204. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  205. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  206. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  207. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  208. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  209. /package/skill/{Workflows → workflows}/Ingest.md +0 -0
  210. /package/skill/{Workflows → workflows}/PlatformHooks.md +0 -0
  211. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  212. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  213. /package/skill/{Workflows → workflows}/Registry.md +0 -0
  214. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  215. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  216. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  217. /package/skill/{Workflows → workflows}/Sync.md +0 -0
  218. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  219. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -0,0 +1,295 @@
1
+ import {
2
+ deriveExpectationsFromSkill,
3
+ gradeSession,
4
+ resolveLatestSessionForSkill,
5
+ } from "../grading/grade-session.js";
6
+ import type { readGradingResultsForSkill } from "../grading/results.js";
7
+ import { writeGradingBaseline, writeGradingResultToDb } from "../localdb/direct-write.js";
8
+ import type { watch as watchSkill } from "../monitoring/watch.js";
9
+ import type { EvolveOptions, evolve as evolveSkill } from "../evolution/evolve.js";
10
+ import type { ReplayValidationOptions } from "../evolution/engines/replay-engine.js";
11
+ import { buildRuntimeReplayValidationOptions } from "../evolution/validate-host-replay.js";
12
+ import { findRecentlyDeployedSkills } from "./plan.js";
13
+ import type { OrchestrateOptions, SkillAction } from "../orchestrate.js";
14
+ import type { EvolutionAuditEntry, SessionTelemetryRecord, SkillUsageRecord } from "../types.js";
15
+ import { readExcerpt } from "../utils/transcript.js";
16
+
17
+ export interface ReplayOptionBuildInput {
18
+ skillName: string;
19
+ skillPath: string;
20
+ agent: string | null;
21
+ }
22
+
23
+ export function buildReplayValidationOptions(
24
+ input: ReplayOptionBuildInput,
25
+ ): ReplayValidationOptions | undefined {
26
+ const { skillName, skillPath, agent } = input;
27
+ if (!agent) return undefined;
28
+
29
+ return buildRuntimeReplayValidationOptions({
30
+ skillName,
31
+ skillPath,
32
+ agent,
33
+ contentTarget: "description",
34
+ });
35
+ }
36
+
37
+ export interface RunEvolutionPhaseInput {
38
+ evolveCandidates: SkillAction[];
39
+ agent: string | null;
40
+ options: Pick<OrchestrateOptions, "approvalMode" | "dryRun">;
41
+ resolveSkillPath: (skillName: string) => string | undefined;
42
+ readGradingResults: typeof readGradingResultsForSkill;
43
+ evolve: typeof evolveSkill;
44
+ buildReplayOptions: typeof buildReplayValidationOptions;
45
+ evolveDefaults: Pick<
46
+ EvolveOptions,
47
+ | "paretoEnabled"
48
+ | "candidateCount"
49
+ | "tokenEfficiencyEnabled"
50
+ | "withBaseline"
51
+ | "validationModel"
52
+ | "cheapLoop"
53
+ | "gateModel"
54
+ | "adaptiveGate"
55
+ | "proposalModel"
56
+ >;
57
+ }
58
+
59
+ export async function runEvolutionPhase(input: RunEvolutionPhaseInput): Promise<SkillAction[]> {
60
+ const {
61
+ evolveCandidates,
62
+ agent,
63
+ options,
64
+ resolveSkillPath,
65
+ readGradingResults,
66
+ evolve,
67
+ buildReplayOptions,
68
+ evolveDefaults,
69
+ } = input;
70
+
71
+ if (!agent) return [];
72
+
73
+ for (const candidate of evolveCandidates) {
74
+ if (candidate.action === "skip") continue;
75
+
76
+ const skillPath = resolveSkillPath(candidate.skill);
77
+ if (!skillPath) {
78
+ candidate.action = "skip";
79
+ candidate.reason = `SKILL.md not found for "${candidate.skill}"`;
80
+ console.error(` ⊘ ${candidate.skill}: ${candidate.reason}`);
81
+ continue;
82
+ }
83
+
84
+ const effectiveDryRun = options.dryRun || options.approvalMode === "review";
85
+ console.error(
86
+ `[orchestrate] Evolving "${candidate.skill}"${effectiveDryRun ? " (dry-run)" : ""}...`,
87
+ );
88
+
89
+ try {
90
+ const evolveResult = await evolve({
91
+ skillName: candidate.skill,
92
+ skillPath,
93
+ agent,
94
+ dryRun: effectiveDryRun,
95
+ confidenceThreshold: 0.6,
96
+ maxIterations: 3,
97
+ gradingResults: readGradingResults(candidate.skill),
98
+ syncFirst: false,
99
+ replayOptions: buildReplayOptions({
100
+ skillName: candidate.skill,
101
+ skillPath,
102
+ agent,
103
+ }),
104
+ ...evolveDefaults,
105
+ });
106
+
107
+ candidate.evolveResult = evolveResult;
108
+
109
+ if (evolveResult.deployed) {
110
+ console.error(` ✓ ${candidate.skill}: deployed (${evolveResult.reason})`);
111
+ } else {
112
+ console.error(` ✗ ${candidate.skill}: not deployed (${evolveResult.reason})`);
113
+ }
114
+ } catch (err) {
115
+ const msg = err instanceof Error ? err.message : String(err);
116
+ candidate.action = "skip";
117
+ candidate.reason = `evolve error: ${msg}`;
118
+ console.error(` ✗ ${candidate.skill}: error — ${msg}`);
119
+ }
120
+ }
121
+
122
+ return evolveCandidates.filter(
123
+ (candidate) => candidate.action === "evolve" && candidate.evolveResult?.deployed,
124
+ );
125
+ }
126
+
127
+ export interface AutoGradeFreshDeploysInput {
128
+ freshlyDeployedCandidates: SkillAction[];
129
+ dryRun: boolean;
130
+ agent: string | null;
131
+ detectAgent: () => string | null;
132
+ readTelemetry: () => SessionTelemetryRecord[];
133
+ readSkillRecords: () => SkillUsageRecord[];
134
+ }
135
+
136
+ export async function autoGradeFreshDeploys(input: AutoGradeFreshDeploysInput): Promise<void> {
137
+ const { freshlyDeployedCandidates, dryRun, agent, detectAgent, readTelemetry, readSkillRecords } =
138
+ input;
139
+
140
+ if (dryRun || freshlyDeployedCandidates.length === 0) return;
141
+
142
+ const gradeAgent = agent ?? detectAgent();
143
+ if (!gradeAgent) return;
144
+
145
+ for (const candidate of freshlyDeployedCandidates) {
146
+ try {
147
+ const freshTelemetry = readTelemetry();
148
+ const freshSkillUsage = readSkillRecords();
149
+ let gradedCount = 0;
150
+ const gradingPassRates: number[] = [];
151
+
152
+ const resolved = resolveLatestSessionForSkill(
153
+ freshTelemetry,
154
+ freshSkillUsage,
155
+ candidate.skill,
156
+ );
157
+ if (resolved) {
158
+ const derived = deriveExpectationsFromSkill(candidate.skill);
159
+ let transcriptExcerpt = "(no transcript)";
160
+ if (resolved.transcriptPath) {
161
+ try {
162
+ transcriptExcerpt = readExcerpt(resolved.transcriptPath);
163
+ } catch {
164
+ transcriptExcerpt = "(no transcript)";
165
+ }
166
+ }
167
+
168
+ const result = await gradeSession({
169
+ expectations: derived.expectations,
170
+ telemetry: resolved.telemetry,
171
+ sessionId: resolved.sessionId,
172
+ skillName: candidate.skill,
173
+ transcriptExcerpt,
174
+ transcriptPath: resolved.transcriptPath,
175
+ agent: gradeAgent,
176
+ });
177
+
178
+ const persisted = writeGradingResultToDb(result);
179
+ if (persisted) {
180
+ gradedCount++;
181
+ gradingPassRates.push(result.summary.pass_rate);
182
+ }
183
+ }
184
+
185
+ if (gradedCount > 0) {
186
+ const avgPassRate =
187
+ gradingPassRates.reduce((sum, passRate) => sum + passRate, 0) / gradingPassRates.length;
188
+ const proposalId = candidate.evolveResult?.auditEntries?.find(
189
+ (entry: { action: string }) => entry.action === "deployed",
190
+ )?.proposal_id;
191
+
192
+ writeGradingBaseline({
193
+ skill_name: candidate.skill,
194
+ proposal_id: proposalId ?? null,
195
+ measured_at: new Date().toISOString(),
196
+ pass_rate: avgPassRate,
197
+ mean_score: null,
198
+ sample_size: gradedCount,
199
+ grading_results_json: JSON.stringify(gradingPassRates),
200
+ });
201
+
202
+ console.error(
203
+ ` [post-deploy] ${candidate.skill}: graded ${gradedCount} session(s), baseline pass_rate=${avgPassRate.toFixed(2)}`,
204
+ );
205
+ }
206
+ } catch (err) {
207
+ const msg = err instanceof Error ? err.message : String(err);
208
+ console.error(` [post-deploy] ${candidate.skill}: auto-grade error — ${msg}`);
209
+ }
210
+ }
211
+ }
212
+
213
+ export interface WatchRecentDeploysInput {
214
+ candidates: SkillAction[];
215
+ freshlyDeployedCandidates: SkillAction[];
216
+ skillFilter?: string;
217
+ recentWindowHours: number;
218
+ readAuditEntries: () => EvolutionAuditEntry[];
219
+ resolveSkillPath: (skillName: string) => string | undefined;
220
+ watch: typeof watchSkill;
221
+ }
222
+
223
+ export async function watchRecentDeploys(
224
+ input: WatchRecentDeploysInput,
225
+ ): Promise<{ freshAuditEntries: EvolutionAuditEntry[]; freshlyWatchedSkills: string[] }> {
226
+ const {
227
+ candidates,
228
+ freshlyDeployedCandidates,
229
+ skillFilter,
230
+ recentWindowHours,
231
+ readAuditEntries,
232
+ resolveSkillPath,
233
+ watch,
234
+ } = input;
235
+
236
+ const freshAuditEntries = readAuditEntries();
237
+ const recentlyEvolved = findRecentlyDeployedSkills(freshAuditEntries, recentWindowHours);
238
+
239
+ for (const candidate of freshlyDeployedCandidates) {
240
+ recentlyEvolved.add(candidate.skill);
241
+ }
242
+
243
+ const freshlyWatchedSkills: string[] = [];
244
+
245
+ for (const skillName of recentlyEvolved) {
246
+ if (skillFilter && skillName !== skillFilter) continue;
247
+
248
+ const skillPath = resolveSkillPath(skillName);
249
+ if (!skillPath) continue;
250
+
251
+ const isFreshlyDeployed = freshlyDeployedCandidates.some(
252
+ (candidate) => candidate.skill === skillName,
253
+ );
254
+ const label = isFreshlyDeployed ? "freshly deployed" : "recently evolved";
255
+ console.error(`[orchestrate] Watching "${skillName}" (${label})...`);
256
+
257
+ try {
258
+ const watchResult = await watch({
259
+ skillName,
260
+ skillPath,
261
+ windowSessions: 20,
262
+ regressionThreshold: 0.1,
263
+ autoRollback: true,
264
+ enableGradeWatch: true,
265
+ syncFirst: false,
266
+ });
267
+
268
+ if (isFreshlyDeployed) {
269
+ const existingCandidate = candidates.find(
270
+ (candidate) => candidate.skill === skillName && candidate.action === "evolve",
271
+ );
272
+ if (existingCandidate) {
273
+ existingCandidate.watchResult = watchResult;
274
+ }
275
+ freshlyWatchedSkills.push(skillName);
276
+ } else {
277
+ candidates.push({
278
+ skill: skillName,
279
+ action: "watch",
280
+ reason: watchResult.alert ?? "stable",
281
+ watchResult,
282
+ });
283
+ }
284
+
285
+ console.error(
286
+ ` ${watchResult.alert ? "⚠" : "✓"} ${skillName}: ${watchResult.recommendation}`,
287
+ );
288
+ } catch (err) {
289
+ const msg = err instanceof Error ? err.message : String(err);
290
+ console.error(` ✗ ${skillName}: watch error — ${msg}`);
291
+ }
292
+ }
293
+
294
+ return { freshAuditEntries, freshlyWatchedSkills };
295
+ }
@@ -0,0 +1,157 @@
1
+ import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "../dashboard-contract.js";
2
+ import { getDb } from "../localdb/db.js";
3
+ import { writeCronRunToDb, writeOrchestrateRunToDb } from "../localdb/direct-write.js";
4
+ import type { OrchestrateResult, SkillAction } from "../orchestrate.js";
5
+ import type { SkillStatus, StatusResult } from "../status.js";
6
+ import type { SyncResult } from "../sync.js";
7
+ import type { ImprovementSignalRecord } from "../types.js";
8
+ import type { WorkflowSkillProposal } from "../workflows/proposals.js";
9
+ import { markSignalsConsumed } from "./signals.js";
10
+
11
+ interface OrchestrateFinalTotals {
12
+ totalSkills: number;
13
+ evaluated: number;
14
+ evolved: number;
15
+ deployed: number;
16
+ watched: number;
17
+ skipped: number;
18
+ autoGraded: number;
19
+ freshlyWatchedSkills: string[];
20
+ }
21
+
22
+ export interface FinalizeOrchestrateRunInput {
23
+ syncResult: SyncResult;
24
+ statusResult: StatusResult;
25
+ candidates: SkillAction[];
26
+ workflowProposals: WorkflowSkillProposal[];
27
+ dryRun: boolean;
28
+ approvalMode: "auto" | "review";
29
+ autoGradedCount: number;
30
+ freshlyWatchedSkills: string[];
31
+ pendingSignals: ImprovementSignalRecord[];
32
+ elapsedMs: number;
33
+ }
34
+
35
+ function buildFinalTotals(
36
+ skills: SkillStatus[],
37
+ candidates: SkillAction[],
38
+ autoGradedCount: number,
39
+ freshlyWatchedSkills: string[],
40
+ ): OrchestrateFinalTotals {
41
+ return {
42
+ totalSkills: skills.length,
43
+ evaluated: candidates.filter((candidate) => candidate.action === "evolve").length,
44
+ evolved: candidates.filter(
45
+ (candidate) => candidate.action === "evolve" && candidate.evolveResult !== undefined,
46
+ ).length,
47
+ deployed: candidates.filter((candidate) => candidate.evolveResult?.deployed).length,
48
+ watched:
49
+ candidates.filter((candidate) => candidate.action === "watch").length +
50
+ freshlyWatchedSkills.length,
51
+ skipped: candidates.filter((candidate) => candidate.action === "skip").length,
52
+ autoGraded: autoGradedCount,
53
+ freshlyWatchedSkills,
54
+ };
55
+ }
56
+
57
+ export function finalizeOrchestrateRun(input: FinalizeOrchestrateRunInput): OrchestrateResult {
58
+ const {
59
+ syncResult,
60
+ statusResult,
61
+ candidates,
62
+ workflowProposals,
63
+ dryRun,
64
+ approvalMode,
65
+ autoGradedCount,
66
+ freshlyWatchedSkills,
67
+ pendingSignals,
68
+ elapsedMs,
69
+ } = input;
70
+
71
+ const finalTotals = buildFinalTotals(
72
+ statusResult.skills,
73
+ candidates,
74
+ autoGradedCount,
75
+ freshlyWatchedSkills,
76
+ );
77
+
78
+ const result: OrchestrateResult = {
79
+ syncResult,
80
+ statusResult,
81
+ candidates,
82
+ workflowProposals,
83
+ summary: {
84
+ ...finalTotals,
85
+ dryRun,
86
+ approvalMode,
87
+ elapsedMs,
88
+ },
89
+ };
90
+
91
+ const runId = `run_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
92
+ if (pendingSignals.length > 0) {
93
+ markSignalsConsumed(pendingSignals, runId);
94
+ }
95
+
96
+ const runReport: OrchestrateRunReport = {
97
+ run_id: runId,
98
+ timestamp: new Date().toISOString(),
99
+ elapsed_ms: result.summary.elapsedMs,
100
+ dry_run: result.summary.dryRun,
101
+ approval_mode: result.summary.approvalMode,
102
+ total_skills: finalTotals.totalSkills,
103
+ evaluated: finalTotals.evaluated,
104
+ evolved: finalTotals.evolved,
105
+ deployed: finalTotals.deployed,
106
+ watched: finalTotals.watched,
107
+ skipped: finalTotals.skipped,
108
+ auto_graded: finalTotals.autoGraded,
109
+ skill_actions: candidates.map(
110
+ (candidate): OrchestrateRunSkillAction => ({
111
+ skill: candidate.skill,
112
+ action: candidate.action,
113
+ reason: candidate.reason,
114
+ deployed: candidate.evolveResult?.deployed,
115
+ rolledBack: candidate.watchResult?.rolledBack,
116
+ alert: candidate.watchResult?.alert,
117
+ elapsed_ms: candidate.evolveResult?.elapsedMs,
118
+ llm_calls: candidate.evolveResult?.llmCallCount,
119
+ }),
120
+ ),
121
+ };
122
+
123
+ try {
124
+ writeOrchestrateRunToDb(runReport);
125
+ } catch {
126
+ /* fail-open */
127
+ }
128
+
129
+ const totalLlmCalls = candidates.reduce(
130
+ (sum, candidate) => sum + (candidate.evolveResult?.llmCallCount ?? 0),
131
+ 0,
132
+ );
133
+ try {
134
+ writeCronRunToDb(getDb(), {
135
+ jobName: "orchestrate",
136
+ startedAt: runReport.timestamp,
137
+ elapsedMs: runReport.elapsed_ms,
138
+ status: "success",
139
+ metrics: {
140
+ total_skills: finalTotals.totalSkills,
141
+ evaluated: finalTotals.evaluated,
142
+ evolved: finalTotals.evolved,
143
+ deployed: finalTotals.deployed,
144
+ watched: finalTotals.watched,
145
+ skipped: finalTotals.skipped,
146
+ dry_run: result.summary.dryRun,
147
+ total_llm_calls: totalLlmCalls,
148
+ auto_graded: finalTotals.autoGraded,
149
+ workflow_skill_proposals: workflowProposals.length,
150
+ },
151
+ });
152
+ } catch {
153
+ /* fail-open */
154
+ }
155
+
156
+ return result;
157
+ }
@@ -0,0 +1,40 @@
1
+ import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
2
+
3
+ import { getOrchestrateLockPath } from "../constants.js";
4
+
5
+ interface LockInfo {
6
+ pid: number;
7
+ timestamp: string;
8
+ }
9
+
10
+ const LOCK_STALE_MS = 30 * 60 * 1000; // 30 minutes
11
+
12
+ export function acquireLock(lockPath: string = getOrchestrateLockPath()): boolean {
13
+ try {
14
+ if (existsSync(lockPath)) {
15
+ try {
16
+ const raw = readFileSync(lockPath, "utf-8");
17
+ const info: LockInfo = JSON.parse(raw);
18
+ const lockAge = Date.now() - Date.parse(info.timestamp);
19
+ if (lockAge < LOCK_STALE_MS) {
20
+ return false;
21
+ }
22
+ } catch {
23
+ // Corrupted lock file, treat as stale and overwrite.
24
+ }
25
+ }
26
+ const lock: LockInfo = { pid: process.pid, timestamp: new Date().toISOString() };
27
+ writeFileSync(lockPath, JSON.stringify(lock));
28
+ return true;
29
+ } catch {
30
+ return true;
31
+ }
32
+ }
33
+
34
+ export function releaseLock(lockPath: string = getOrchestrateLockPath()): void {
35
+ try {
36
+ unlinkSync(lockPath);
37
+ } catch {
38
+ // Silent on errors (file may not exist).
39
+ }
40
+ }
@@ -0,0 +1,131 @@
1
+ import type { CandidateContext, SkillAction } from "../orchestrate.js";
2
+ import type { SkillStatus } from "../status.js";
3
+ import type { EvolutionAuditEntry } from "../types.js";
4
+
5
+ /** Candidate selection criteria. */
6
+ const CANDIDATE_STATUSES = new Set(["CRITICAL", "WARNING", "UNGRADED"]);
7
+
8
+ /** Minimum skill_checks before autonomous evolution is allowed. */
9
+ export const MIN_CANDIDATE_EVIDENCE = 3;
10
+
11
+ /** Default cooldown hours after a deploy before re-evolving the same skill. */
12
+ export const DEFAULT_COOLDOWN_HOURS = 24;
13
+
14
+ function candidatePriority(skill: SkillStatus, signalCount = 0): number {
15
+ const statusWeight = skill.status === "CRITICAL" ? 300 : skill.status === "WARNING" ? 200 : 100;
16
+ const missedWeight = Math.min(skill.missedQueries, 50);
17
+ const passPenalty = skill.passRate === null ? 0 : Math.round((1 - skill.passRate) * 100);
18
+ const trendBoost = skill.trend === "down" ? 30 : 0;
19
+ const signalBoost = Math.min(signalCount * 150, 450);
20
+ return statusWeight + missedWeight + passPenalty + trendBoost + signalBoost;
21
+ }
22
+
23
+ export function findRecentlyDeployedSkills(
24
+ auditEntries: EvolutionAuditEntry[],
25
+ windowHours: number,
26
+ ): Set<string> {
27
+ const cutoffMs = Date.now() - windowHours * 60 * 60 * 1000;
28
+ const names = new Set<string>();
29
+ for (const entry of auditEntries) {
30
+ const deployedAtMs = Date.parse(entry.timestamp);
31
+ if (
32
+ entry.action === "deployed" &&
33
+ entry.skill_name &&
34
+ Number.isFinite(deployedAtMs) &&
35
+ deployedAtMs >= cutoffMs
36
+ ) {
37
+ names.add(entry.skill_name);
38
+ }
39
+ }
40
+ return names;
41
+ }
42
+
43
+ export function selectCandidates(skills: SkillStatus[], options: CandidateContext): SkillAction[] {
44
+ const actions: SkillAction[] = [];
45
+ const orderedSkills = [...skills].sort((a, b) => {
46
+ const aSignals = options.signaledSkills?.get(a.name.toLowerCase()) ?? 0;
47
+ const bSignals = options.signaledSkills?.get(b.name.toLowerCase()) ?? 0;
48
+ return candidatePriority(b, bSignals) - candidatePriority(a, aSignals);
49
+ });
50
+
51
+ const cooldownHours = options.cooldownHours ?? DEFAULT_COOLDOWN_HOURS;
52
+ const recentlyDeployed = findRecentlyDeployedSkills(options.auditEntries ?? [], cooldownHours);
53
+
54
+ for (const skill of orderedSkills) {
55
+ const signalCount = options.signaledSkills?.get(skill.name.toLowerCase()) ?? 0;
56
+
57
+ if (options.skillFilter && skill.name !== options.skillFilter) {
58
+ actions.push({
59
+ skill: skill.name,
60
+ action: "skip",
61
+ reason: `filtered out (--skill ${options.skillFilter})`,
62
+ });
63
+ continue;
64
+ }
65
+
66
+ if (!CANDIDATE_STATUSES.has(skill.status)) {
67
+ actions.push({
68
+ skill: skill.name,
69
+ action: "skip",
70
+ reason: `status=${skill.status} — no action needed`,
71
+ });
72
+ continue;
73
+ }
74
+
75
+ if (recentlyDeployed.has(skill.name)) {
76
+ actions.push({
77
+ skill: skill.name,
78
+ action: "skip",
79
+ reason: `recently evolved (cooldown ${cooldownHours}h) — let it bake`,
80
+ });
81
+ continue;
82
+ }
83
+
84
+ const skillChecks = skill.snapshot?.skill_checks ?? 0;
85
+ if (skillChecks < MIN_CANDIDATE_EVIDENCE && skill.status !== "UNGRADED" && signalCount === 0) {
86
+ actions.push({
87
+ skill: skill.name,
88
+ action: "skip",
89
+ reason: `insufficient evidence (${skillChecks}/${MIN_CANDIDATE_EVIDENCE} checks) — need more data`,
90
+ });
91
+ continue;
92
+ }
93
+
94
+ if (skill.status === "UNGRADED" && skill.missedQueries === 0 && signalCount === 0) {
95
+ actions.push({
96
+ skill: skill.name,
97
+ action: "skip",
98
+ reason: "UNGRADED with 0 missed queries — insufficient signal",
99
+ });
100
+ continue;
101
+ }
102
+
103
+ if (skill.status === "WARNING" && skill.missedQueries === 0 && skill.trend !== "down") {
104
+ actions.push({
105
+ skill: skill.name,
106
+ action: "skip",
107
+ reason: `WARNING but no missed queries and trend=${skill.trend} — weak signal`,
108
+ });
109
+ continue;
110
+ }
111
+
112
+ actions.push({
113
+ skill: skill.name,
114
+ action: "evolve",
115
+ reason: `status=${skill.status}, passRate=${skill.passRate !== null ? `${(skill.passRate * 100).toFixed(0)}%` : "—"}, missed=${skill.missedQueries}, trend=${skill.trend}`,
116
+ });
117
+ }
118
+
119
+ let evolveCount = 0;
120
+ for (const action of actions) {
121
+ if (action.action === "evolve") {
122
+ evolveCount++;
123
+ if (evolveCount > options.maxSkills) {
124
+ action.action = "skip";
125
+ action.reason = `capped by --max-skills ${options.maxSkills}`;
126
+ }
127
+ }
128
+ }
129
+
130
+ return actions;
131
+ }
@@ -0,0 +1,59 @@
1
+ import { readConfiguredAgentType, getSelftuneVersion } from "../utils/selftune-meta.js";
2
+ import { getDb } from "../localdb/db.js";
3
+ import type { OrchestrateResult } from "../orchestrate.js";
4
+ import { SELFTUNE_CONFIG_PATH } from "../constants.js";
5
+ import type { AlphaIdentity } from "../types.js";
6
+
7
+ export async function runPostOrchestrateSideEffects(input: {
8
+ result: OrchestrateResult;
9
+ dryRun: boolean;
10
+ readAlphaIdentity: () => AlphaIdentity | null;
11
+ }): Promise<void> {
12
+ const { result, dryRun, readAlphaIdentity } = input;
13
+ const alphaIdentity = readAlphaIdentity();
14
+
15
+ if (alphaIdentity?.enrolled) {
16
+ try {
17
+ console.error("[orchestrate] Running alpha upload cycle...");
18
+ const { runUploadCycle } = await import("../alpha-upload/index.js");
19
+ const uploadSummary = await runUploadCycle(getDb(), {
20
+ enrolled: true,
21
+ userId: alphaIdentity.user_id,
22
+ agentType: readConfiguredAgentType(SELFTUNE_CONFIG_PATH, "unknown"),
23
+ selftuneVersion: getSelftuneVersion(),
24
+ dryRun,
25
+ apiKey: alphaIdentity.api_key,
26
+ });
27
+ result.uploadSummary = uploadSummary;
28
+ console.error(
29
+ `[orchestrate] Alpha upload: prepared=${uploadSummary.prepared}, sent=${uploadSummary.sent}, failed=${uploadSummary.failed}, skipped=${uploadSummary.skipped}`,
30
+ );
31
+ } catch (err) {
32
+ const msg = err instanceof Error ? err.message : String(err);
33
+ console.error(`[orchestrate] Alpha upload failed (non-blocking): ${msg}`);
34
+ }
35
+ }
36
+
37
+ if (alphaIdentity?.api_key) {
38
+ try {
39
+ const { flushCreatorContributionSignals } = await import("../contribution-relay.js");
40
+ const relayResult = await flushCreatorContributionSignals(getDb(), {
41
+ apiKey: alphaIdentity.api_key,
42
+ dryRun,
43
+ });
44
+ if (relayResult.attempted > 0) {
45
+ result.contributionRelaySummary = {
46
+ attempted: relayResult.attempted,
47
+ sent: relayResult.sent,
48
+ failed: relayResult.failed,
49
+ };
50
+ console.error(
51
+ `[orchestrate] Contribution relay: attempted=${relayResult.attempted}, sent=${relayResult.sent}, failed=${relayResult.failed}`,
52
+ );
53
+ }
54
+ } catch (err) {
55
+ const msg = err instanceof Error ? err.message : String(err);
56
+ console.error(`[orchestrate] Contribution relay failed (non-blocking): ${msg}`);
57
+ }
58
+ }
59
+ }