selftune 0.2.22 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +95 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/adapters/pi/hook.ts +273 -0
  12. package/cli/selftune/adapters/pi/install.ts +207 -0
  13. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  15. package/cli/selftune/auto-update.ts +200 -8
  16. package/cli/selftune/canonical-export.ts +55 -25
  17. package/cli/selftune/command-surface.ts +397 -0
  18. package/cli/selftune/constants.ts +10 -1
  19. package/cli/selftune/contribute/contribute.ts +64 -13
  20. package/cli/selftune/contribution-config.ts +57 -3
  21. package/cli/selftune/contribution-preferences.ts +117 -0
  22. package/cli/selftune/contribution-signals.ts +8 -4
  23. package/cli/selftune/contribution-staging.ts +13 -2
  24. package/cli/selftune/contributions.ts +55 -121
  25. package/cli/selftune/creator-contributions.ts +29 -10
  26. package/cli/selftune/cron/setup.ts +7 -3
  27. package/cli/selftune/dashboard-contract.ts +87 -0
  28. package/cli/selftune/dashboard-server.ts +168 -17
  29. package/cli/selftune/dashboard.ts +350 -17
  30. package/cli/selftune/eval/baseline.ts +21 -5
  31. package/cli/selftune/eval/execution-eval.ts +170 -0
  32. package/cli/selftune/eval/family-overlap.ts +2 -2
  33. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  34. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  35. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  36. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  37. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  38. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  39. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  40. package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
  41. package/cli/selftune/evolution/evidence.ts +2 -6
  42. package/cli/selftune/evolution/evolve-body.ts +152 -38
  43. package/cli/selftune/evolution/evolve.ts +244 -52
  44. package/cli/selftune/evolution/rollback.ts +0 -1
  45. package/cli/selftune/evolution/validate-body.ts +111 -49
  46. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  47. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  48. package/cli/selftune/evolution/validate-routing.ts +51 -108
  49. package/cli/selftune/evolution/validation-contract.ts +91 -0
  50. package/cli/selftune/grading/auto-grade.ts +11 -7
  51. package/cli/selftune/grading/grade-session.ts +10 -16
  52. package/cli/selftune/hooks/skill-eval.ts +2 -1
  53. package/cli/selftune/hooks-shared/types.ts +1 -0
  54. package/cli/selftune/index.ts +58 -15
  55. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  56. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  57. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  58. package/cli/selftune/ingestors/pi-ingest.ts +727 -0
  59. package/cli/selftune/init.ts +38 -4
  60. package/cli/selftune/localdb/direct-write.ts +120 -1
  61. package/cli/selftune/localdb/materialize.ts +6 -7
  62. package/cli/selftune/localdb/queries/cron.ts +34 -0
  63. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  64. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  65. package/cli/selftune/localdb/queries/execution.ts +133 -0
  66. package/cli/selftune/localdb/queries/json.ts +18 -0
  67. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  68. package/cli/selftune/localdb/queries/raw.ts +95 -0
  69. package/cli/selftune/localdb/queries/staging.ts +270 -0
  70. package/cli/selftune/localdb/queries/trust.ts +392 -0
  71. package/cli/selftune/localdb/queries.ts +60 -2162
  72. package/cli/selftune/localdb/schema.ts +59 -0
  73. package/cli/selftune/monitoring/watch.ts +96 -29
  74. package/cli/selftune/normalization.ts +3 -0
  75. package/cli/selftune/observability.ts +12 -3
  76. package/cli/selftune/orchestrate/cli.ts +161 -0
  77. package/cli/selftune/orchestrate/execute.ts +295 -0
  78. package/cli/selftune/orchestrate/finalize.ts +157 -0
  79. package/cli/selftune/orchestrate/locks.ts +40 -0
  80. package/cli/selftune/orchestrate/plan.ts +131 -0
  81. package/cli/selftune/orchestrate/post-run.ts +59 -0
  82. package/cli/selftune/orchestrate/prepare.ts +334 -0
  83. package/cli/selftune/orchestrate/report.ts +182 -0
  84. package/cli/selftune/orchestrate/runtime.ts +120 -0
  85. package/cli/selftune/orchestrate/signals.ts +48 -0
  86. package/cli/selftune/orchestrate.ts +162 -1142
  87. package/cli/selftune/registry/client.ts +74 -0
  88. package/cli/selftune/registry/history.ts +54 -0
  89. package/cli/selftune/registry/index.ts +90 -0
  90. package/cli/selftune/registry/install.ts +141 -0
  91. package/cli/selftune/registry/list.ts +44 -0
  92. package/cli/selftune/registry/push.ts +171 -0
  93. package/cli/selftune/registry/rollback.ts +49 -0
  94. package/cli/selftune/registry/status.ts +62 -0
  95. package/cli/selftune/registry/sync.ts +125 -0
  96. package/cli/selftune/repair/skill-usage.ts +9 -3
  97. package/cli/selftune/routes/overview.ts +5 -2
  98. package/cli/selftune/routes/skill-report.ts +15 -2
  99. package/cli/selftune/schedule.ts +5 -5
  100. package/cli/selftune/status.ts +70 -2
  101. package/cli/selftune/sync.ts +127 -23
  102. package/cli/selftune/testing-readiness.ts +597 -0
  103. package/cli/selftune/types.ts +46 -5
  104. package/cli/selftune/uninstall.ts +2 -1
  105. package/cli/selftune/utils/canonical-log.ts +1 -9
  106. package/cli/selftune/utils/cli-error.ts +9 -0
  107. package/cli/selftune/utils/jsonl.ts +1 -30
  108. package/cli/selftune/utils/llm-call.ts +126 -6
  109. package/cli/selftune/utils/skill-discovery.ts +24 -0
  110. package/cli/selftune/workflows/proposals.ts +184 -0
  111. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  112. package/cli/selftune/workflows/workflows.ts +100 -26
  113. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  114. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  115. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  116. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  117. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  118. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  119. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  120. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
  121. package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
  122. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  123. package/package.json +25 -9
  124. package/packages/dashboard-core/AGENTS.md +18 -0
  125. package/packages/dashboard-core/README.md +30 -0
  126. package/packages/dashboard-core/index.ts +3 -0
  127. package/packages/dashboard-core/package.json +39 -0
  128. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  129. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  130. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  131. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  132. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  133. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  134. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  135. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  136. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  137. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  138. package/packages/dashboard-core/src/gates/index.ts +3 -0
  139. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  140. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  141. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  142. package/packages/dashboard-core/src/host/index.ts +3 -0
  143. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  144. package/packages/dashboard-core/src/models/index.ts +4 -0
  145. package/packages/dashboard-core/src/models/overview.ts +98 -0
  146. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  147. package/packages/dashboard-core/src/models/skills.ts +34 -0
  148. package/packages/dashboard-core/src/routes/index.ts +2 -0
  149. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  150. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  151. package/packages/dashboard-core/src/routes/types.ts +39 -0
  152. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  153. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  154. package/packages/dashboard-core/src/screens/index.ts +37 -0
  155. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  156. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  157. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  158. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  159. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  160. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  161. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  162. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  163. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  164. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  165. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  166. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  167. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  168. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  169. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  170. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  171. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  172. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  173. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  174. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  175. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  176. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  177. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  178. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  179. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  180. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  181. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  182. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  183. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  184. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  185. package/packages/telemetry-contract/package.json +1 -1
  186. package/packages/telemetry-contract/src/index.ts +1 -0
  187. package/packages/telemetry-contract/src/schemas.ts +63 -5
  188. package/packages/telemetry-contract/src/types.ts +97 -7
  189. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  190. package/packages/ui/AGENTS.md +16 -0
  191. package/packages/ui/README.md +1 -1
  192. package/packages/ui/package.json +1 -1
  193. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  194. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  195. package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
  196. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  197. package/packages/ui/src/components/InfoTip.tsx +1 -2
  198. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  199. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  200. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  201. package/packages/ui/src/components/OverviewPanels.tsx +693 -0
  202. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  203. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  204. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  205. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  206. package/packages/ui/src/components/index.ts +56 -1
  207. package/packages/ui/src/components/section-cards.tsx +18 -35
  208. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  209. package/packages/ui/src/lib/constants.tsx +0 -1
  210. package/packages/ui/src/primitives/card.tsx +1 -1
  211. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  212. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  213. package/packages/ui/src/primitives/select.tsx +2 -2
  214. package/packages/ui/src/primitives/tabs.tsx +7 -6
  215. package/packages/ui/src/types.ts +182 -4
  216. package/skill/SKILL.md +130 -318
  217. package/skill/agents/diagnosis-analyst.md +3 -3
  218. package/skill/agents/evolution-reviewer.md +3 -3
  219. package/skill/agents/integration-guide.md +3 -3
  220. package/skill/agents/pattern-analyst.md +2 -2
  221. package/skill/references/cli-quick-reference.md +89 -0
  222. package/skill/references/creator-playbook.md +131 -0
  223. package/skill/references/examples.md +48 -0
  224. package/skill/references/troubleshooting.md +47 -0
  225. package/skill/references/version-history.md +1 -1
  226. package/skill/selftune.contribute.json +11 -0
  227. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  228. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  229. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  230. package/skill/workflows/CreateTestDeploy.md +170 -0
  231. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  232. package/skill/{Workflows → workflows}/Cron.md +1 -1
  233. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  234. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  235. package/skill/{Workflows → workflows}/Evals.md +67 -2
  236. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  237. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  238. package/skill/{Workflows → workflows}/Grade.md +1 -1
  239. package/skill/{Workflows → workflows}/Ingest.md +60 -2
  240. package/skill/{Workflows → workflows}/Initialize.md +16 -9
  241. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  242. package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
  243. package/skill/workflows/Registry.md +99 -0
  244. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  245. package/skill/workflows/SignalsDashboard.md +87 -0
  246. package/skill/{Workflows → workflows}/Sync.md +3 -1
  247. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  248. package/skill/{Workflows → workflows}/Watch.md +42 -2
  249. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  250. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  251. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  252. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  253. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  254. package/cli/selftune/utils/html.ts +0 -27
  255. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
  256. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  257. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  258. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  259. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  260. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  261. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  262. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  263. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  264. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  265. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  266. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  267. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  268. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  269. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  270. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -0,0 +1,295 @@
1
+ import {
2
+ deriveExpectationsFromSkill,
3
+ gradeSession,
4
+ resolveLatestSessionForSkill,
5
+ } from "../grading/grade-session.js";
6
+ import type { readGradingResultsForSkill } from "../grading/results.js";
7
+ import { writeGradingBaseline, writeGradingResultToDb } from "../localdb/direct-write.js";
8
+ import type { watch as watchSkill } from "../monitoring/watch.js";
9
+ import type { EvolveOptions, evolve as evolveSkill } from "../evolution/evolve.js";
10
+ import type { ReplayValidationOptions } from "../evolution/engines/replay-engine.js";
11
+ import { buildRuntimeReplayValidationOptions } from "../evolution/validate-host-replay.js";
12
+ import { findRecentlyDeployedSkills } from "./plan.js";
13
+ import type { OrchestrateOptions, SkillAction } from "../orchestrate.js";
14
+ import type { EvolutionAuditEntry, SessionTelemetryRecord, SkillUsageRecord } from "../types.js";
15
+ import { readExcerpt } from "../utils/transcript.js";
16
+
17
+ export interface ReplayOptionBuildInput {
18
+ skillName: string;
19
+ skillPath: string;
20
+ agent: string | null;
21
+ }
22
+
23
+ export function buildReplayValidationOptions(
24
+ input: ReplayOptionBuildInput,
25
+ ): ReplayValidationOptions | undefined {
26
+ const { skillName, skillPath, agent } = input;
27
+ if (!agent) return undefined;
28
+
29
+ return buildRuntimeReplayValidationOptions({
30
+ skillName,
31
+ skillPath,
32
+ agent,
33
+ contentTarget: "description",
34
+ });
35
+ }
36
+
37
+ export interface RunEvolutionPhaseInput {
38
+ evolveCandidates: SkillAction[];
39
+ agent: string | null;
40
+ options: Pick<OrchestrateOptions, "approvalMode" | "dryRun">;
41
+ resolveSkillPath: (skillName: string) => string | undefined;
42
+ readGradingResults: typeof readGradingResultsForSkill;
43
+ evolve: typeof evolveSkill;
44
+ buildReplayOptions: typeof buildReplayValidationOptions;
45
+ evolveDefaults: Pick<
46
+ EvolveOptions,
47
+ | "paretoEnabled"
48
+ | "candidateCount"
49
+ | "tokenEfficiencyEnabled"
50
+ | "withBaseline"
51
+ | "validationModel"
52
+ | "cheapLoop"
53
+ | "gateModel"
54
+ | "adaptiveGate"
55
+ | "proposalModel"
56
+ >;
57
+ }
58
+
59
+ export async function runEvolutionPhase(input: RunEvolutionPhaseInput): Promise<SkillAction[]> {
60
+ const {
61
+ evolveCandidates,
62
+ agent,
63
+ options,
64
+ resolveSkillPath,
65
+ readGradingResults,
66
+ evolve,
67
+ buildReplayOptions,
68
+ evolveDefaults,
69
+ } = input;
70
+
71
+ if (!agent) return [];
72
+
73
+ for (const candidate of evolveCandidates) {
74
+ if (candidate.action === "skip") continue;
75
+
76
+ const skillPath = resolveSkillPath(candidate.skill);
77
+ if (!skillPath) {
78
+ candidate.action = "skip";
79
+ candidate.reason = `SKILL.md not found for "${candidate.skill}"`;
80
+ console.error(` ⊘ ${candidate.skill}: ${candidate.reason}`);
81
+ continue;
82
+ }
83
+
84
+ const effectiveDryRun = options.dryRun || options.approvalMode === "review";
85
+ console.error(
86
+ `[orchestrate] Evolving "${candidate.skill}"${effectiveDryRun ? " (dry-run)" : ""}...`,
87
+ );
88
+
89
+ try {
90
+ const evolveResult = await evolve({
91
+ skillName: candidate.skill,
92
+ skillPath,
93
+ agent,
94
+ dryRun: effectiveDryRun,
95
+ confidenceThreshold: 0.6,
96
+ maxIterations: 3,
97
+ gradingResults: readGradingResults(candidate.skill),
98
+ syncFirst: false,
99
+ replayOptions: buildReplayOptions({
100
+ skillName: candidate.skill,
101
+ skillPath,
102
+ agent,
103
+ }),
104
+ ...evolveDefaults,
105
+ });
106
+
107
+ candidate.evolveResult = evolveResult;
108
+
109
+ if (evolveResult.deployed) {
110
+ console.error(` ✓ ${candidate.skill}: deployed (${evolveResult.reason})`);
111
+ } else {
112
+ console.error(` ✗ ${candidate.skill}: not deployed (${evolveResult.reason})`);
113
+ }
114
+ } catch (err) {
115
+ const msg = err instanceof Error ? err.message : String(err);
116
+ candidate.action = "skip";
117
+ candidate.reason = `evolve error: ${msg}`;
118
+ console.error(` ✗ ${candidate.skill}: error — ${msg}`);
119
+ }
120
+ }
121
+
122
+ return evolveCandidates.filter(
123
+ (candidate) => candidate.action === "evolve" && candidate.evolveResult?.deployed,
124
+ );
125
+ }
126
+
127
+ export interface AutoGradeFreshDeploysInput {
128
+ freshlyDeployedCandidates: SkillAction[];
129
+ dryRun: boolean;
130
+ agent: string | null;
131
+ detectAgent: () => string | null;
132
+ readTelemetry: () => SessionTelemetryRecord[];
133
+ readSkillRecords: () => SkillUsageRecord[];
134
+ }
135
+
136
+ export async function autoGradeFreshDeploys(input: AutoGradeFreshDeploysInput): Promise<void> {
137
+ const { freshlyDeployedCandidates, dryRun, agent, detectAgent, readTelemetry, readSkillRecords } =
138
+ input;
139
+
140
+ if (dryRun || freshlyDeployedCandidates.length === 0) return;
141
+
142
+ const gradeAgent = agent ?? detectAgent();
143
+ if (!gradeAgent) return;
144
+
145
+ for (const candidate of freshlyDeployedCandidates) {
146
+ try {
147
+ const freshTelemetry = readTelemetry();
148
+ const freshSkillUsage = readSkillRecords();
149
+ let gradedCount = 0;
150
+ const gradingPassRates: number[] = [];
151
+
152
+ const resolved = resolveLatestSessionForSkill(
153
+ freshTelemetry,
154
+ freshSkillUsage,
155
+ candidate.skill,
156
+ );
157
+ if (resolved) {
158
+ const derived = deriveExpectationsFromSkill(candidate.skill);
159
+ let transcriptExcerpt = "(no transcript)";
160
+ if (resolved.transcriptPath) {
161
+ try {
162
+ transcriptExcerpt = readExcerpt(resolved.transcriptPath);
163
+ } catch {
164
+ transcriptExcerpt = "(no transcript)";
165
+ }
166
+ }
167
+
168
+ const result = await gradeSession({
169
+ expectations: derived.expectations,
170
+ telemetry: resolved.telemetry,
171
+ sessionId: resolved.sessionId,
172
+ skillName: candidate.skill,
173
+ transcriptExcerpt,
174
+ transcriptPath: resolved.transcriptPath,
175
+ agent: gradeAgent,
176
+ });
177
+
178
+ const persisted = writeGradingResultToDb(result);
179
+ if (persisted) {
180
+ gradedCount++;
181
+ gradingPassRates.push(result.summary.pass_rate);
182
+ }
183
+ }
184
+
185
+ if (gradedCount > 0) {
186
+ const avgPassRate =
187
+ gradingPassRates.reduce((sum, passRate) => sum + passRate, 0) / gradingPassRates.length;
188
+ const proposalId = candidate.evolveResult?.auditEntries?.find(
189
+ (entry: { action: string }) => entry.action === "deployed",
190
+ )?.proposal_id;
191
+
192
+ writeGradingBaseline({
193
+ skill_name: candidate.skill,
194
+ proposal_id: proposalId ?? null,
195
+ measured_at: new Date().toISOString(),
196
+ pass_rate: avgPassRate,
197
+ mean_score: null,
198
+ sample_size: gradedCount,
199
+ grading_results_json: JSON.stringify(gradingPassRates),
200
+ });
201
+
202
+ console.error(
203
+ ` [post-deploy] ${candidate.skill}: graded ${gradedCount} session(s), baseline pass_rate=${avgPassRate.toFixed(2)}`,
204
+ );
205
+ }
206
+ } catch (err) {
207
+ const msg = err instanceof Error ? err.message : String(err);
208
+ console.error(` [post-deploy] ${candidate.skill}: auto-grade error — ${msg}`);
209
+ }
210
+ }
211
+ }
212
+
213
+ export interface WatchRecentDeploysInput {
214
+ candidates: SkillAction[];
215
+ freshlyDeployedCandidates: SkillAction[];
216
+ skillFilter?: string;
217
+ recentWindowHours: number;
218
+ readAuditEntries: () => EvolutionAuditEntry[];
219
+ resolveSkillPath: (skillName: string) => string | undefined;
220
+ watch: typeof watchSkill;
221
+ }
222
+
223
+ export async function watchRecentDeploys(
224
+ input: WatchRecentDeploysInput,
225
+ ): Promise<{ freshAuditEntries: EvolutionAuditEntry[]; freshlyWatchedSkills: string[] }> {
226
+ const {
227
+ candidates,
228
+ freshlyDeployedCandidates,
229
+ skillFilter,
230
+ recentWindowHours,
231
+ readAuditEntries,
232
+ resolveSkillPath,
233
+ watch,
234
+ } = input;
235
+
236
+ const freshAuditEntries = readAuditEntries();
237
+ const recentlyEvolved = findRecentlyDeployedSkills(freshAuditEntries, recentWindowHours);
238
+
239
+ for (const candidate of freshlyDeployedCandidates) {
240
+ recentlyEvolved.add(candidate.skill);
241
+ }
242
+
243
+ const freshlyWatchedSkills: string[] = [];
244
+
245
+ for (const skillName of recentlyEvolved) {
246
+ if (skillFilter && skillName !== skillFilter) continue;
247
+
248
+ const skillPath = resolveSkillPath(skillName);
249
+ if (!skillPath) continue;
250
+
251
+ const isFreshlyDeployed = freshlyDeployedCandidates.some(
252
+ (candidate) => candidate.skill === skillName,
253
+ );
254
+ const label = isFreshlyDeployed ? "freshly deployed" : "recently evolved";
255
+ console.error(`[orchestrate] Watching "${skillName}" (${label})...`);
256
+
257
+ try {
258
+ const watchResult = await watch({
259
+ skillName,
260
+ skillPath,
261
+ windowSessions: 20,
262
+ regressionThreshold: 0.1,
263
+ autoRollback: true,
264
+ enableGradeWatch: true,
265
+ syncFirst: false,
266
+ });
267
+
268
+ if (isFreshlyDeployed) {
269
+ const existingCandidate = candidates.find(
270
+ (candidate) => candidate.skill === skillName && candidate.action === "evolve",
271
+ );
272
+ if (existingCandidate) {
273
+ existingCandidate.watchResult = watchResult;
274
+ }
275
+ freshlyWatchedSkills.push(skillName);
276
+ } else {
277
+ candidates.push({
278
+ skill: skillName,
279
+ action: "watch",
280
+ reason: watchResult.alert ?? "stable",
281
+ watchResult,
282
+ });
283
+ }
284
+
285
+ console.error(
286
+ ` ${watchResult.alert ? "⚠" : "✓"} ${skillName}: ${watchResult.recommendation}`,
287
+ );
288
+ } catch (err) {
289
+ const msg = err instanceof Error ? err.message : String(err);
290
+ console.error(` ✗ ${skillName}: watch error — ${msg}`);
291
+ }
292
+ }
293
+
294
+ return { freshAuditEntries, freshlyWatchedSkills };
295
+ }
@@ -0,0 +1,157 @@
1
+ import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "../dashboard-contract.js";
2
+ import { getDb } from "../localdb/db.js";
3
+ import { writeCronRunToDb, writeOrchestrateRunToDb } from "../localdb/direct-write.js";
4
+ import type { OrchestrateResult, SkillAction } from "../orchestrate.js";
5
+ import type { SkillStatus, StatusResult } from "../status.js";
6
+ import type { SyncResult } from "../sync.js";
7
+ import type { ImprovementSignalRecord } from "../types.js";
8
+ import type { WorkflowSkillProposal } from "../workflows/proposals.js";
9
+ import { markSignalsConsumed } from "./signals.js";
10
+
11
+ interface OrchestrateFinalTotals {
12
+ totalSkills: number;
13
+ evaluated: number;
14
+ evolved: number;
15
+ deployed: number;
16
+ watched: number;
17
+ skipped: number;
18
+ autoGraded: number;
19
+ freshlyWatchedSkills: string[];
20
+ }
21
+
22
+ export interface FinalizeOrchestrateRunInput {
23
+ syncResult: SyncResult;
24
+ statusResult: StatusResult;
25
+ candidates: SkillAction[];
26
+ workflowProposals: WorkflowSkillProposal[];
27
+ dryRun: boolean;
28
+ approvalMode: "auto" | "review";
29
+ autoGradedCount: number;
30
+ freshlyWatchedSkills: string[];
31
+ pendingSignals: ImprovementSignalRecord[];
32
+ elapsedMs: number;
33
+ }
34
+
35
+ function buildFinalTotals(
36
+ skills: SkillStatus[],
37
+ candidates: SkillAction[],
38
+ autoGradedCount: number,
39
+ freshlyWatchedSkills: string[],
40
+ ): OrchestrateFinalTotals {
41
+ return {
42
+ totalSkills: skills.length,
43
+ evaluated: candidates.filter((candidate) => candidate.action === "evolve").length,
44
+ evolved: candidates.filter(
45
+ (candidate) => candidate.action === "evolve" && candidate.evolveResult !== undefined,
46
+ ).length,
47
+ deployed: candidates.filter((candidate) => candidate.evolveResult?.deployed).length,
48
+ watched:
49
+ candidates.filter((candidate) => candidate.action === "watch").length +
50
+ freshlyWatchedSkills.length,
51
+ skipped: candidates.filter((candidate) => candidate.action === "skip").length,
52
+ autoGraded: autoGradedCount,
53
+ freshlyWatchedSkills,
54
+ };
55
+ }
56
+
57
+ export function finalizeOrchestrateRun(input: FinalizeOrchestrateRunInput): OrchestrateResult {
58
+ const {
59
+ syncResult,
60
+ statusResult,
61
+ candidates,
62
+ workflowProposals,
63
+ dryRun,
64
+ approvalMode,
65
+ autoGradedCount,
66
+ freshlyWatchedSkills,
67
+ pendingSignals,
68
+ elapsedMs,
69
+ } = input;
70
+
71
+ const finalTotals = buildFinalTotals(
72
+ statusResult.skills,
73
+ candidates,
74
+ autoGradedCount,
75
+ freshlyWatchedSkills,
76
+ );
77
+
78
+ const result: OrchestrateResult = {
79
+ syncResult,
80
+ statusResult,
81
+ candidates,
82
+ workflowProposals,
83
+ summary: {
84
+ ...finalTotals,
85
+ dryRun,
86
+ approvalMode,
87
+ elapsedMs,
88
+ },
89
+ };
90
+
91
+ const runId = `run_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
92
+ if (pendingSignals.length > 0) {
93
+ markSignalsConsumed(pendingSignals, runId);
94
+ }
95
+
96
+ const runReport: OrchestrateRunReport = {
97
+ run_id: runId,
98
+ timestamp: new Date().toISOString(),
99
+ elapsed_ms: result.summary.elapsedMs,
100
+ dry_run: result.summary.dryRun,
101
+ approval_mode: result.summary.approvalMode,
102
+ total_skills: finalTotals.totalSkills,
103
+ evaluated: finalTotals.evaluated,
104
+ evolved: finalTotals.evolved,
105
+ deployed: finalTotals.deployed,
106
+ watched: finalTotals.watched,
107
+ skipped: finalTotals.skipped,
108
+ auto_graded: finalTotals.autoGraded,
109
+ skill_actions: candidates.map(
110
+ (candidate): OrchestrateRunSkillAction => ({
111
+ skill: candidate.skill,
112
+ action: candidate.action,
113
+ reason: candidate.reason,
114
+ deployed: candidate.evolveResult?.deployed,
115
+ rolledBack: candidate.watchResult?.rolledBack,
116
+ alert: candidate.watchResult?.alert,
117
+ elapsed_ms: candidate.evolveResult?.elapsedMs,
118
+ llm_calls: candidate.evolveResult?.llmCallCount,
119
+ }),
120
+ ),
121
+ };
122
+
123
+ try {
124
+ writeOrchestrateRunToDb(runReport);
125
+ } catch {
126
+ /* fail-open */
127
+ }
128
+
129
+ const totalLlmCalls = candidates.reduce(
130
+ (sum, candidate) => sum + (candidate.evolveResult?.llmCallCount ?? 0),
131
+ 0,
132
+ );
133
+ try {
134
+ writeCronRunToDb(getDb(), {
135
+ jobName: "orchestrate",
136
+ startedAt: runReport.timestamp,
137
+ elapsedMs: runReport.elapsed_ms,
138
+ status: "success",
139
+ metrics: {
140
+ total_skills: finalTotals.totalSkills,
141
+ evaluated: finalTotals.evaluated,
142
+ evolved: finalTotals.evolved,
143
+ deployed: finalTotals.deployed,
144
+ watched: finalTotals.watched,
145
+ skipped: finalTotals.skipped,
146
+ dry_run: result.summary.dryRun,
147
+ total_llm_calls: totalLlmCalls,
148
+ auto_graded: finalTotals.autoGraded,
149
+ workflow_skill_proposals: workflowProposals.length,
150
+ },
151
+ });
152
+ } catch {
153
+ /* fail-open */
154
+ }
155
+
156
+ return result;
157
+ }
@@ -0,0 +1,40 @@
1
+ import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
2
+
3
+ import { getOrchestrateLockPath } from "../constants.js";
4
+
5
+ interface LockInfo {
6
+ pid: number;
7
+ timestamp: string;
8
+ }
9
+
10
+ const LOCK_STALE_MS = 30 * 60 * 1000; // 30 minutes
11
+
12
+ export function acquireLock(lockPath: string = getOrchestrateLockPath()): boolean {
13
+ try {
14
+ if (existsSync(lockPath)) {
15
+ try {
16
+ const raw = readFileSync(lockPath, "utf-8");
17
+ const info: LockInfo = JSON.parse(raw);
18
+ const lockAge = Date.now() - Date.parse(info.timestamp);
19
+ if (lockAge < LOCK_STALE_MS) {
20
+ return false;
21
+ }
22
+ } catch {
23
+ // Corrupted lock file, treat as stale and overwrite.
24
+ }
25
+ }
26
+ const lock: LockInfo = { pid: process.pid, timestamp: new Date().toISOString() };
27
+ writeFileSync(lockPath, JSON.stringify(lock));
28
+ return true;
29
+ } catch {
30
+ return true;
31
+ }
32
+ }
33
+
34
+ export function releaseLock(lockPath: string = getOrchestrateLockPath()): void {
35
+ try {
36
+ unlinkSync(lockPath);
37
+ } catch {
38
+ // Silent on errors (file may not exist).
39
+ }
40
+ }
@@ -0,0 +1,131 @@
1
+ import type { CandidateContext, SkillAction } from "../orchestrate.js";
2
+ import type { SkillStatus } from "../status.js";
3
+ import type { EvolutionAuditEntry } from "../types.js";
4
+
5
+ /** Candidate selection criteria. */
6
+ const CANDIDATE_STATUSES = new Set(["CRITICAL", "WARNING", "UNGRADED"]);
7
+
8
+ /** Minimum skill_checks before autonomous evolution is allowed. */
9
+ export const MIN_CANDIDATE_EVIDENCE = 3;
10
+
11
+ /** Default cooldown hours after a deploy before re-evolving the same skill. */
12
+ export const DEFAULT_COOLDOWN_HOURS = 24;
13
+
14
+ function candidatePriority(skill: SkillStatus, signalCount = 0): number {
15
+ const statusWeight = skill.status === "CRITICAL" ? 300 : skill.status === "WARNING" ? 200 : 100;
16
+ const missedWeight = Math.min(skill.missedQueries, 50);
17
+ const passPenalty = skill.passRate === null ? 0 : Math.round((1 - skill.passRate) * 100);
18
+ const trendBoost = skill.trend === "down" ? 30 : 0;
19
+ const signalBoost = Math.min(signalCount * 150, 450);
20
+ return statusWeight + missedWeight + passPenalty + trendBoost + signalBoost;
21
+ }
22
+
23
+ export function findRecentlyDeployedSkills(
24
+ auditEntries: EvolutionAuditEntry[],
25
+ windowHours: number,
26
+ ): Set<string> {
27
+ const cutoffMs = Date.now() - windowHours * 60 * 60 * 1000;
28
+ const names = new Set<string>();
29
+ for (const entry of auditEntries) {
30
+ const deployedAtMs = Date.parse(entry.timestamp);
31
+ if (
32
+ entry.action === "deployed" &&
33
+ entry.skill_name &&
34
+ Number.isFinite(deployedAtMs) &&
35
+ deployedAtMs >= cutoffMs
36
+ ) {
37
+ names.add(entry.skill_name);
38
+ }
39
+ }
40
+ return names;
41
+ }
42
+
43
+ export function selectCandidates(skills: SkillStatus[], options: CandidateContext): SkillAction[] {
44
+ const actions: SkillAction[] = [];
45
+ const orderedSkills = [...skills].sort((a, b) => {
46
+ const aSignals = options.signaledSkills?.get(a.name.toLowerCase()) ?? 0;
47
+ const bSignals = options.signaledSkills?.get(b.name.toLowerCase()) ?? 0;
48
+ return candidatePriority(b, bSignals) - candidatePriority(a, aSignals);
49
+ });
50
+
51
+ const cooldownHours = options.cooldownHours ?? DEFAULT_COOLDOWN_HOURS;
52
+ const recentlyDeployed = findRecentlyDeployedSkills(options.auditEntries ?? [], cooldownHours);
53
+
54
+ for (const skill of orderedSkills) {
55
+ const signalCount = options.signaledSkills?.get(skill.name.toLowerCase()) ?? 0;
56
+
57
+ if (options.skillFilter && skill.name !== options.skillFilter) {
58
+ actions.push({
59
+ skill: skill.name,
60
+ action: "skip",
61
+ reason: `filtered out (--skill ${options.skillFilter})`,
62
+ });
63
+ continue;
64
+ }
65
+
66
+ if (!CANDIDATE_STATUSES.has(skill.status)) {
67
+ actions.push({
68
+ skill: skill.name,
69
+ action: "skip",
70
+ reason: `status=${skill.status} — no action needed`,
71
+ });
72
+ continue;
73
+ }
74
+
75
+ if (recentlyDeployed.has(skill.name)) {
76
+ actions.push({
77
+ skill: skill.name,
78
+ action: "skip",
79
+ reason: `recently evolved (cooldown ${cooldownHours}h) — let it bake`,
80
+ });
81
+ continue;
82
+ }
83
+
84
+ const skillChecks = skill.snapshot?.skill_checks ?? 0;
85
+ if (skillChecks < MIN_CANDIDATE_EVIDENCE && skill.status !== "UNGRADED" && signalCount === 0) {
86
+ actions.push({
87
+ skill: skill.name,
88
+ action: "skip",
89
+ reason: `insufficient evidence (${skillChecks}/${MIN_CANDIDATE_EVIDENCE} checks) — need more data`,
90
+ });
91
+ continue;
92
+ }
93
+
94
+ if (skill.status === "UNGRADED" && skill.missedQueries === 0 && signalCount === 0) {
95
+ actions.push({
96
+ skill: skill.name,
97
+ action: "skip",
98
+ reason: "UNGRADED with 0 missed queries — insufficient signal",
99
+ });
100
+ continue;
101
+ }
102
+
103
+ if (skill.status === "WARNING" && skill.missedQueries === 0 && skill.trend !== "down") {
104
+ actions.push({
105
+ skill: skill.name,
106
+ action: "skip",
107
+ reason: `WARNING but no missed queries and trend=${skill.trend} — weak signal`,
108
+ });
109
+ continue;
110
+ }
111
+
112
+ actions.push({
113
+ skill: skill.name,
114
+ action: "evolve",
115
+ reason: `status=${skill.status}, passRate=${skill.passRate !== null ? `${(skill.passRate * 100).toFixed(0)}%` : "—"}, missed=${skill.missedQueries}, trend=${skill.trend}`,
116
+ });
117
+ }
118
+
119
+ let evolveCount = 0;
120
+ for (const action of actions) {
121
+ if (action.action === "evolve") {
122
+ evolveCount++;
123
+ if (evolveCount > options.maxSkills) {
124
+ action.action = "skip";
125
+ action.reason = `capped by --max-skills ${options.maxSkills}`;
126
+ }
127
+ }
128
+ }
129
+
130
+ return actions;
131
+ }
@@ -0,0 +1,59 @@
1
+ import { readConfiguredAgentType, getSelftuneVersion } from "../utils/selftune-meta.js";
2
+ import { getDb } from "../localdb/db.js";
3
+ import type { OrchestrateResult } from "../orchestrate.js";
4
+ import { SELFTUNE_CONFIG_PATH } from "../constants.js";
5
+ import type { AlphaIdentity } from "../types.js";
6
+
7
+ export async function runPostOrchestrateSideEffects(input: {
8
+ result: OrchestrateResult;
9
+ dryRun: boolean;
10
+ readAlphaIdentity: () => AlphaIdentity | null;
11
+ }): Promise<void> {
12
+ const { result, dryRun, readAlphaIdentity } = input;
13
+ const alphaIdentity = readAlphaIdentity();
14
+
15
+ if (alphaIdentity?.enrolled) {
16
+ try {
17
+ console.error("[orchestrate] Running alpha upload cycle...");
18
+ const { runUploadCycle } = await import("../alpha-upload/index.js");
19
+ const uploadSummary = await runUploadCycle(getDb(), {
20
+ enrolled: true,
21
+ userId: alphaIdentity.user_id,
22
+ agentType: readConfiguredAgentType(SELFTUNE_CONFIG_PATH, "unknown"),
23
+ selftuneVersion: getSelftuneVersion(),
24
+ dryRun,
25
+ apiKey: alphaIdentity.api_key,
26
+ });
27
+ result.uploadSummary = uploadSummary;
28
+ console.error(
29
+ `[orchestrate] Alpha upload: prepared=${uploadSummary.prepared}, sent=${uploadSummary.sent}, failed=${uploadSummary.failed}, skipped=${uploadSummary.skipped}`,
30
+ );
31
+ } catch (err) {
32
+ const msg = err instanceof Error ? err.message : String(err);
33
+ console.error(`[orchestrate] Alpha upload failed (non-blocking): ${msg}`);
34
+ }
35
+ }
36
+
37
+ if (alphaIdentity?.api_key) {
38
+ try {
39
+ const { flushCreatorContributionSignals } = await import("../contribution-relay.js");
40
+ const relayResult = await flushCreatorContributionSignals(getDb(), {
41
+ apiKey: alphaIdentity.api_key,
42
+ dryRun,
43
+ });
44
+ if (relayResult.attempted > 0) {
45
+ result.contributionRelaySummary = {
46
+ attempted: relayResult.attempted,
47
+ sent: relayResult.sent,
48
+ failed: relayResult.failed,
49
+ };
50
+ console.error(
51
+ `[orchestrate] Contribution relay: attempted=${relayResult.attempted}, sent=${relayResult.sent}, failed=${relayResult.failed}`,
52
+ );
53
+ }
54
+ } catch (err) {
55
+ const msg = err instanceof Error ? err.message : String(err);
56
+ console.error(`[orchestrate] Contribution relay failed (non-blocking): ${msg}`);
57
+ }
58
+ }
59
+ }