selftune 0.2.22 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +95 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/adapters/pi/hook.ts +273 -0
  12. package/cli/selftune/adapters/pi/install.ts +207 -0
  13. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  15. package/cli/selftune/auto-update.ts +200 -8
  16. package/cli/selftune/canonical-export.ts +55 -25
  17. package/cli/selftune/command-surface.ts +397 -0
  18. package/cli/selftune/constants.ts +10 -1
  19. package/cli/selftune/contribute/contribute.ts +64 -13
  20. package/cli/selftune/contribution-config.ts +57 -3
  21. package/cli/selftune/contribution-preferences.ts +117 -0
  22. package/cli/selftune/contribution-signals.ts +8 -4
  23. package/cli/selftune/contribution-staging.ts +13 -2
  24. package/cli/selftune/contributions.ts +55 -121
  25. package/cli/selftune/creator-contributions.ts +29 -10
  26. package/cli/selftune/cron/setup.ts +7 -3
  27. package/cli/selftune/dashboard-contract.ts +87 -0
  28. package/cli/selftune/dashboard-server.ts +168 -17
  29. package/cli/selftune/dashboard.ts +350 -17
  30. package/cli/selftune/eval/baseline.ts +21 -5
  31. package/cli/selftune/eval/execution-eval.ts +170 -0
  32. package/cli/selftune/eval/family-overlap.ts +2 -2
  33. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  34. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  35. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  36. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  37. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  38. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  39. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  40. package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
  41. package/cli/selftune/evolution/evidence.ts +2 -6
  42. package/cli/selftune/evolution/evolve-body.ts +152 -38
  43. package/cli/selftune/evolution/evolve.ts +244 -52
  44. package/cli/selftune/evolution/rollback.ts +0 -1
  45. package/cli/selftune/evolution/validate-body.ts +111 -49
  46. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  47. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  48. package/cli/selftune/evolution/validate-routing.ts +51 -108
  49. package/cli/selftune/evolution/validation-contract.ts +91 -0
  50. package/cli/selftune/grading/auto-grade.ts +11 -7
  51. package/cli/selftune/grading/grade-session.ts +10 -16
  52. package/cli/selftune/hooks/skill-eval.ts +2 -1
  53. package/cli/selftune/hooks-shared/types.ts +1 -0
  54. package/cli/selftune/index.ts +58 -15
  55. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  56. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  57. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  58. package/cli/selftune/ingestors/pi-ingest.ts +727 -0
  59. package/cli/selftune/init.ts +38 -4
  60. package/cli/selftune/localdb/direct-write.ts +120 -1
  61. package/cli/selftune/localdb/materialize.ts +6 -7
  62. package/cli/selftune/localdb/queries/cron.ts +34 -0
  63. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  64. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  65. package/cli/selftune/localdb/queries/execution.ts +133 -0
  66. package/cli/selftune/localdb/queries/json.ts +18 -0
  67. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  68. package/cli/selftune/localdb/queries/raw.ts +95 -0
  69. package/cli/selftune/localdb/queries/staging.ts +270 -0
  70. package/cli/selftune/localdb/queries/trust.ts +392 -0
  71. package/cli/selftune/localdb/queries.ts +60 -2162
  72. package/cli/selftune/localdb/schema.ts +59 -0
  73. package/cli/selftune/monitoring/watch.ts +96 -29
  74. package/cli/selftune/normalization.ts +3 -0
  75. package/cli/selftune/observability.ts +12 -3
  76. package/cli/selftune/orchestrate/cli.ts +161 -0
  77. package/cli/selftune/orchestrate/execute.ts +295 -0
  78. package/cli/selftune/orchestrate/finalize.ts +157 -0
  79. package/cli/selftune/orchestrate/locks.ts +40 -0
  80. package/cli/selftune/orchestrate/plan.ts +131 -0
  81. package/cli/selftune/orchestrate/post-run.ts +59 -0
  82. package/cli/selftune/orchestrate/prepare.ts +334 -0
  83. package/cli/selftune/orchestrate/report.ts +182 -0
  84. package/cli/selftune/orchestrate/runtime.ts +120 -0
  85. package/cli/selftune/orchestrate/signals.ts +48 -0
  86. package/cli/selftune/orchestrate.ts +162 -1142
  87. package/cli/selftune/registry/client.ts +74 -0
  88. package/cli/selftune/registry/history.ts +54 -0
  89. package/cli/selftune/registry/index.ts +90 -0
  90. package/cli/selftune/registry/install.ts +141 -0
  91. package/cli/selftune/registry/list.ts +44 -0
  92. package/cli/selftune/registry/push.ts +171 -0
  93. package/cli/selftune/registry/rollback.ts +49 -0
  94. package/cli/selftune/registry/status.ts +62 -0
  95. package/cli/selftune/registry/sync.ts +125 -0
  96. package/cli/selftune/repair/skill-usage.ts +9 -3
  97. package/cli/selftune/routes/overview.ts +5 -2
  98. package/cli/selftune/routes/skill-report.ts +15 -2
  99. package/cli/selftune/schedule.ts +5 -5
  100. package/cli/selftune/status.ts +70 -2
  101. package/cli/selftune/sync.ts +127 -23
  102. package/cli/selftune/testing-readiness.ts +597 -0
  103. package/cli/selftune/types.ts +46 -5
  104. package/cli/selftune/uninstall.ts +2 -1
  105. package/cli/selftune/utils/canonical-log.ts +1 -9
  106. package/cli/selftune/utils/cli-error.ts +9 -0
  107. package/cli/selftune/utils/jsonl.ts +1 -30
  108. package/cli/selftune/utils/llm-call.ts +126 -6
  109. package/cli/selftune/utils/skill-discovery.ts +24 -0
  110. package/cli/selftune/workflows/proposals.ts +184 -0
  111. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  112. package/cli/selftune/workflows/workflows.ts +100 -26
  113. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  114. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  115. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  116. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  117. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  118. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  119. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  120. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
  121. package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
  122. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  123. package/package.json +25 -9
  124. package/packages/dashboard-core/AGENTS.md +18 -0
  125. package/packages/dashboard-core/README.md +30 -0
  126. package/packages/dashboard-core/index.ts +3 -0
  127. package/packages/dashboard-core/package.json +39 -0
  128. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  129. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  130. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  131. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  132. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  133. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  134. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  135. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  136. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  137. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  138. package/packages/dashboard-core/src/gates/index.ts +3 -0
  139. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  140. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  141. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  142. package/packages/dashboard-core/src/host/index.ts +3 -0
  143. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  144. package/packages/dashboard-core/src/models/index.ts +4 -0
  145. package/packages/dashboard-core/src/models/overview.ts +98 -0
  146. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  147. package/packages/dashboard-core/src/models/skills.ts +34 -0
  148. package/packages/dashboard-core/src/routes/index.ts +2 -0
  149. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  150. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  151. package/packages/dashboard-core/src/routes/types.ts +39 -0
  152. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  153. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  154. package/packages/dashboard-core/src/screens/index.ts +37 -0
  155. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  156. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  157. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  158. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  159. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  160. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  161. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  162. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  163. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  164. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  165. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  166. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  167. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  168. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  169. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  170. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  171. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  172. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  173. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  174. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  175. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  176. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  177. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  178. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  179. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  180. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  181. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  182. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  183. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  184. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  185. package/packages/telemetry-contract/package.json +1 -1
  186. package/packages/telemetry-contract/src/index.ts +1 -0
  187. package/packages/telemetry-contract/src/schemas.ts +63 -5
  188. package/packages/telemetry-contract/src/types.ts +97 -7
  189. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  190. package/packages/ui/AGENTS.md +16 -0
  191. package/packages/ui/README.md +1 -1
  192. package/packages/ui/package.json +1 -1
  193. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  194. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  195. package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
  196. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  197. package/packages/ui/src/components/InfoTip.tsx +1 -2
  198. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  199. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  200. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  201. package/packages/ui/src/components/OverviewPanels.tsx +693 -0
  202. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  203. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  204. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  205. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  206. package/packages/ui/src/components/index.ts +56 -1
  207. package/packages/ui/src/components/section-cards.tsx +18 -35
  208. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  209. package/packages/ui/src/lib/constants.tsx +0 -1
  210. package/packages/ui/src/primitives/card.tsx +1 -1
  211. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  212. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  213. package/packages/ui/src/primitives/select.tsx +2 -2
  214. package/packages/ui/src/primitives/tabs.tsx +7 -6
  215. package/packages/ui/src/types.ts +182 -4
  216. package/skill/SKILL.md +130 -318
  217. package/skill/agents/diagnosis-analyst.md +3 -3
  218. package/skill/agents/evolution-reviewer.md +3 -3
  219. package/skill/agents/integration-guide.md +3 -3
  220. package/skill/agents/pattern-analyst.md +2 -2
  221. package/skill/references/cli-quick-reference.md +89 -0
  222. package/skill/references/creator-playbook.md +131 -0
  223. package/skill/references/examples.md +48 -0
  224. package/skill/references/troubleshooting.md +47 -0
  225. package/skill/references/version-history.md +1 -1
  226. package/skill/selftune.contribute.json +11 -0
  227. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  228. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  229. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  230. package/skill/workflows/CreateTestDeploy.md +170 -0
  231. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  232. package/skill/{Workflows → workflows}/Cron.md +1 -1
  233. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  234. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  235. package/skill/{Workflows → workflows}/Evals.md +67 -2
  236. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  237. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  238. package/skill/{Workflows → workflows}/Grade.md +1 -1
  239. package/skill/{Workflows → workflows}/Ingest.md +60 -2
  240. package/skill/{Workflows → workflows}/Initialize.md +16 -9
  241. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  242. package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
  243. package/skill/workflows/Registry.md +99 -0
  244. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  245. package/skill/workflows/SignalsDashboard.md +87 -0
  246. package/skill/{Workflows → workflows}/Sync.md +3 -1
  247. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  248. package/skill/{Workflows → workflows}/Watch.md +42 -2
  249. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  250. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  251. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  252. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  253. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  254. package/cli/selftune/utils/html.ts +0 -27
  255. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
  256. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  257. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  258. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  259. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  260. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  261. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  262. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  263. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  264. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  265. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  266. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  267. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  268. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  269. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  270. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -9,42 +9,43 @@
9
9
  * explicit dry-run and review-required modes for human-in-the-loop operation.
10
10
  */
11
11
 
12
- import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
13
- import { homedir } from "node:os";
14
- import { dirname, join } from "node:path";
15
- import { parseArgs } from "node:util";
16
-
17
- import { readAlphaIdentity } from "./alpha-identity.js";
18
12
  import type { UploadCycleSummary } from "./alpha-upload/index.js";
19
- import { getOrchestrateLockPath, SELFTUNE_CONFIG_PATH } from "./constants.js";
20
- import type { OrchestrateRunReport, OrchestrateRunSkillAction } from "./dashboard-contract.js";
21
13
  import type { EvolveOptions, EvolveResult } from "./evolution/evolve.js";
22
- import {
23
- buildDefaultGradingOutputPath,
24
- deriveExpectationsFromSkill,
25
- gradeSession,
26
- resolveLatestSessionForSkill,
27
- } from "./grading/grade-session.js";
28
14
  import { readGradingResultsForSkill } from "./grading/results.js";
29
15
  import { getDb } from "./localdb/db.js";
16
+ import { writeCronRunToDb } from "./localdb/direct-write.js";
17
+ import type { WatchResult } from "./monitoring/watch.js";
30
18
  import {
31
- updateSignalConsumed,
32
- writeGradingResultToDb,
33
- writeOrchestrateRunToDb,
34
- } from "./localdb/direct-write.js";
19
+ buildOrchestrateJsonOutput,
20
+ parseOrchestrateCliArgs,
21
+ renderOrchestrateHelp,
22
+ } from "./orchestrate/cli.js";
35
23
  import {
36
- queryEvolutionAudit,
37
- queryImprovementSignals,
38
- queryQueryLog,
39
- querySessionTelemetry,
40
- querySkillUsageRecords,
41
- } from "./localdb/queries.js";
42
- import type { WatchResult } from "./monitoring/watch.js";
24
+ autoGradeFreshDeploys,
25
+ buildReplayValidationOptions,
26
+ runEvolutionPhase,
27
+ watchRecentDeploys,
28
+ } from "./orchestrate/execute.js";
29
+ import { finalizeOrchestrateRun } from "./orchestrate/finalize.js";
30
+ import { acquireLock, releaseLock } from "./orchestrate/locks.js";
31
+ import { runPostOrchestrateSideEffects } from "./orchestrate/post-run.js";
32
+ import {
33
+ autoGradeTopUngraded,
34
+ detectCrossSkillOverlap,
35
+ prepareOrchestrateRun,
36
+ } from "./orchestrate/prepare.js";
37
+ import {
38
+ DEFAULT_COOLDOWN_HOURS,
39
+ MIN_CANDIDATE_EVIDENCE,
40
+ selectCandidates,
41
+ } from "./orchestrate/plan.js";
42
+ import { formatOrchestrateReport } from "./orchestrate/report.js";
43
+ import { resolveOrchestrateRuntime } from "./orchestrate/runtime.js";
43
44
  import { doctor } from "./observability.js";
44
- import type { SkillStatus, StatusResult } from "./status.js";
45
+ import type { StatusResult } from "./status.js";
45
46
  import { computeStatus } from "./status.js";
46
47
  import type { SyncResult } from "./sync.js";
47
- import { createDefaultSyncOptions, syncSources } from "./sync.js";
48
+ import { syncSources } from "./sync.js";
48
49
  import type {
49
50
  AlphaIdentity,
50
51
  EvolutionAuditEntry,
@@ -53,103 +54,23 @@ import type {
53
54
  SessionTelemetryRecord,
54
55
  SkillUsageRecord,
55
56
  } from "./types.js";
56
- import { CLIError, handleCLIError } from "./utils/cli-error.js";
57
- import { detectAgent } from "./utils/llm-call.js";
58
- import { getSelftuneVersion, readConfiguredAgentType } from "./utils/selftune-meta.js";
57
+ import { handleCLIError } from "./utils/cli-error.js";
58
+ import { detectLlmAgent } from "./utils/llm-call.js";
59
59
  import {
60
- findInstalledSkillPath,
61
- findRepositoryClaudeSkillDirs,
62
- findRepositorySkillDirs,
63
- } from "./utils/skill-discovery.js";
64
- import { readExcerpt } from "./utils/transcript.js";
65
-
66
- // ---------------------------------------------------------------------------
67
- // Lockfile management
68
- // ---------------------------------------------------------------------------
69
-
70
- interface LockInfo {
71
- pid: number;
72
- timestamp: string;
73
- }
74
-
75
- const LOCK_STALE_MS = 30 * 60 * 1000; // 30 minutes
76
-
77
- export function acquireLock(lockPath: string = getOrchestrateLockPath()): boolean {
78
- try {
79
- if (existsSync(lockPath)) {
80
- try {
81
- const raw = readFileSync(lockPath, "utf-8");
82
- const info: LockInfo = JSON.parse(raw);
83
- const lockAge = Date.now() - Date.parse(info.timestamp);
84
- if (lockAge < LOCK_STALE_MS) {
85
- return false; // lock is fresh, cannot acquire
86
- }
87
- // Lock is stale, fall through to overwrite
88
- } catch {
89
- // Corrupted lock file, treat as stale and overwrite
90
- }
91
- }
92
- const lock: LockInfo = { pid: process.pid, timestamp: new Date().toISOString() };
93
- writeFileSync(lockPath, JSON.stringify(lock));
94
- return true;
95
- } catch {
96
- // Fail-open: if we can't check/write, allow the run
97
- return true;
98
- }
99
- }
100
-
101
- export function releaseLock(lockPath: string = getOrchestrateLockPath()): void {
102
- try {
103
- unlinkSync(lockPath);
104
- } catch {
105
- // Silent on errors (file may not exist)
106
- }
107
- }
108
-
109
- // ---------------------------------------------------------------------------
110
- // Signal reading helpers
111
- // ---------------------------------------------------------------------------
112
-
113
- function readPendingSignals(reader?: () => ImprovementSignalRecord[]): ImprovementSignalRecord[] {
114
- const _read =
115
- reader ??
116
- (() => {
117
- const db = getDb();
118
- return queryImprovementSignals(db, false) as ImprovementSignalRecord[];
119
- });
120
- try {
121
- return _read().filter((s) => !s.consumed);
122
- } catch {
123
- return [];
124
- }
125
- }
126
-
127
- export function groupSignalsBySkill(signals: ImprovementSignalRecord[]): Map<string, number> {
128
- const map = new Map<string, number>();
129
- for (const s of signals) {
130
- if (s.mentioned_skill) {
131
- const key = s.mentioned_skill.toLowerCase();
132
- map.set(key, (map.get(key) ?? 0) + 1);
133
- }
134
- }
135
- return map;
136
- }
137
-
138
- export function markSignalsConsumed(signals: ImprovementSignalRecord[], runId: string): void {
139
- try {
140
- if (signals.length === 0) return;
141
- for (const signal of signals) {
142
- const ok = updateSignalConsumed(signal.session_id, signal.query, signal.signal_type, runId);
143
- if (!ok) {
144
- console.error(
145
- `[orchestrate] failed to mark signal consumed: session_id=${signal.session_id}, signal_type=${signal.signal_type}`,
146
- );
147
- }
148
- }
149
- } catch {
150
- // Silent on errors
151
- }
152
- }
60
+ discoverWorkflowSkillProposals,
61
+ persistWorkflowSkillProposal,
62
+ type WorkflowSkillProposal,
63
+ } from "./workflows/proposals.js";
64
+
65
+ export { acquireLock, releaseLock } from "./orchestrate/locks.js";
66
+ export {
67
+ DEFAULT_COOLDOWN_HOURS,
68
+ MIN_CANDIDATE_EVIDENCE,
69
+ selectCandidates,
70
+ } from "./orchestrate/plan.js";
71
+ export { autoGradeTopUngraded, detectCrossSkillOverlap } from "./orchestrate/prepare.js";
72
+ export { formatOrchestrateReport } from "./orchestrate/report.js";
73
+ export { groupSignalsBySkill, markSignalsConsumed } from "./orchestrate/signals.js";
153
74
 
154
75
  // ---------------------------------------------------------------------------
155
76
  // Types
@@ -180,11 +101,24 @@ export interface SkillAction {
180
101
  watchResult?: WatchResult;
181
102
  }
182
103
 
104
+ /** Context for candidate selection beyond simple status checks. */
105
+ export interface CandidateContext {
106
+ skillFilter?: string;
107
+ maxSkills: number;
108
+ auditEntries?: EvolutionAuditEntry[];
109
+ /** Hours since last deploy before a skill can be re-evolved. */
110
+ cooldownHours?: number;
111
+ /** Skill name (lowercase) to improvement signal count. */
112
+ signaledSkills?: Map<string, number>;
113
+ }
114
+
183
115
  export interface OrchestrateResult {
184
116
  syncResult: SyncResult;
185
117
  statusResult: StatusResult;
186
118
  candidates: SkillAction[];
119
+ workflowProposals: WorkflowSkillProposal[];
187
120
  uploadSummary?: UploadCycleSummary;
121
+ contributionRelaySummary?: { attempted: number; sent: number; failed: number };
188
122
  summary: {
189
123
  totalSkills: number;
190
124
  evaluated: number;
@@ -193,190 +127,13 @@ export interface OrchestrateResult {
193
127
  watched: number;
194
128
  skipped: number;
195
129
  autoGraded: number;
130
+ freshlyWatchedSkills: string[];
196
131
  dryRun: boolean;
197
132
  approvalMode: "auto" | "review";
198
133
  elapsedMs: number;
199
134
  };
200
135
  }
201
136
 
202
- // ---------------------------------------------------------------------------
203
- // Human-readable decision report
204
- // ---------------------------------------------------------------------------
205
-
206
- function formatSyncPhase(syncResult: SyncResult): string[] {
207
- const lines: string[] = ["Phase 1: Sync"];
208
- const sources: [string, keyof SyncResult["sources"]][] = [
209
- ["Claude", "claude"],
210
- ["Codex", "codex"],
211
- ["OpenCode", "opencode"],
212
- ["OpenClaw", "openclaw"],
213
- ];
214
-
215
- for (const [label, key] of sources) {
216
- const s = syncResult.sources[key];
217
- if (!s.available) {
218
- lines.push(` ${label.padEnd(12)}not available`);
219
- } else if (s.synced > 0) {
220
- lines.push(` ${label.padEnd(12)}scanned ${s.scanned}, synced ${s.synced}`);
221
- } else {
222
- lines.push(` ${label.padEnd(12)}scanned ${s.scanned}, up to date`);
223
- }
224
- }
225
-
226
- if (syncResult.repair.ran && syncResult.repair.repaired_records > 0) {
227
- lines.push(
228
- ` Repair ${syncResult.repair.repaired_records} records across ${syncResult.repair.repaired_sessions} sessions`,
229
- );
230
- }
231
-
232
- return lines;
233
- }
234
-
235
- function formatStatusPhase(statusResult: StatusResult): string[] {
236
- const lines: string[] = ["Phase 2: Status"];
237
- const byStatus: Record<string, number> = {};
238
- for (const skill of statusResult.skills) {
239
- byStatus[skill.status] = (byStatus[skill.status] ?? 0) + 1;
240
- }
241
- const healthLabel = statusResult.system.healthy ? "healthy" : "UNHEALTHY";
242
- lines.push(` ${statusResult.skills.length} skills found, system ${healthLabel}`);
243
-
244
- const parts: string[] = [];
245
- for (const s of ["CRITICAL", "WARNING", "HEALTHY", "UNGRADED", "UNKNOWN"]) {
246
- if (byStatus[s]) parts.push(`${byStatus[s]} ${s}`);
247
- }
248
- if (parts.length > 0) lines.push(` ${parts.join(", ")}`);
249
-
250
- return lines;
251
- }
252
-
253
- function formatDecisionPhase(candidates: SkillAction[]): string[] {
254
- const lines: string[] = ["Phase 3: Skill Decisions"];
255
- if (candidates.length === 0) {
256
- lines.push(" (no skills to evaluate)");
257
- return lines;
258
- }
259
-
260
- for (const c of candidates) {
261
- const icon = c.action === "skip" ? "⊘" : c.action === "watch" ? "○" : "→";
262
- const actionLabel = c.action.toUpperCase().padEnd(7);
263
- lines.push(` ${icon} ${c.skill.padEnd(20)} ${actionLabel} ${c.reason}`);
264
- }
265
-
266
- return lines;
267
- }
268
-
269
- function formatEvolutionPhase(candidates: SkillAction[]): string[] {
270
- const evolved = candidates.filter((c) => c.action === "evolve" && c.evolveResult !== undefined);
271
- if (evolved.length === 0) return [];
272
-
273
- const lines: string[] = ["Phase 4: Evolution Results"];
274
- for (const c of evolved) {
275
- const r = c.evolveResult as NonNullable<typeof c.evolveResult>;
276
- const status = r.deployed ? "deployed" : "not deployed";
277
- const detail = r.reason;
278
- const validation = r.validation
279
- ? ` (${(r.validation.before_pass_rate * 100).toFixed(0)}% → ${(r.validation.after_pass_rate * 100).toFixed(0)}%)`
280
- : "";
281
- lines.push(` ${c.skill.padEnd(20)} ${status}${validation}`);
282
- lines.push(` ${"".padEnd(20)} ${detail}`);
283
- }
284
-
285
- return lines;
286
- }
287
-
288
- function formatWatchPhase(candidates: SkillAction[]): string[] {
289
- const watched = candidates.filter((c) => c.action === "watch");
290
- if (watched.length === 0) return [];
291
-
292
- const lines: string[] = ["Phase 5: Watch"];
293
- for (const c of watched) {
294
- const snap = c.watchResult?.snapshot;
295
- const metrics = snap
296
- ? ` (pass_rate=${snap.pass_rate.toFixed(2)}, baseline=${snap.baseline_pass_rate.toFixed(2)})`
297
- : "";
298
- const alertTag = c.watchResult?.alert ? " [ALERT]" : "";
299
- const rollbackTag = c.watchResult?.rolledBack ? " [ROLLED BACK]" : "";
300
- lines.push(` ${c.skill.padEnd(20)} ${c.reason}${alertTag}${rollbackTag}${metrics}`);
301
- }
302
-
303
- return lines;
304
- }
305
-
306
- export function formatOrchestrateReport(result: OrchestrateResult): string {
307
- const sep = "═".repeat(48);
308
- const lines: string[] = [];
309
-
310
- lines.push(sep);
311
- lines.push("selftune orchestrate — decision report");
312
- lines.push(sep);
313
- lines.push("");
314
-
315
- // Mode banner
316
- if (result.summary.dryRun) {
317
- lines.push("Mode: DRY RUN (no mutations applied)");
318
- } else if (result.summary.approvalMode === "review") {
319
- lines.push("Mode: REVIEW (proposals validated but not deployed)");
320
- } else {
321
- lines.push("Mode: AUTONOMOUS (validated changes deployed automatically)");
322
- }
323
- lines.push("");
324
-
325
- // Phase 1: Sync
326
- lines.push(...formatSyncPhase(result.syncResult));
327
- lines.push("");
328
-
329
- // Phase 2: Status
330
- lines.push(...formatStatusPhase(result.statusResult));
331
- lines.push("");
332
-
333
- // Phase 3: Skill decisions
334
- lines.push(...formatDecisionPhase(result.candidates));
335
- lines.push("");
336
-
337
- // Phase 4: Evolution results (only if any evolve ran)
338
- const evoLines = formatEvolutionPhase(result.candidates);
339
- if (evoLines.length > 0) {
340
- lines.push(...evoLines);
341
- lines.push("");
342
- }
343
-
344
- // Phase 5: Watch (only if any watched)
345
- const watchLines = formatWatchPhase(result.candidates);
346
- if (watchLines.length > 0) {
347
- lines.push(...watchLines);
348
- lines.push("");
349
- }
350
-
351
- // Final summary
352
- lines.push("Summary");
353
- lines.push(` Auto-graded: ${result.summary.autoGraded}`);
354
- lines.push(` Evaluated: ${result.summary.evaluated} skills`);
355
- lines.push(` Deployed: ${result.summary.deployed}`);
356
- lines.push(` Watched: ${result.summary.watched}`);
357
- lines.push(` Skipped: ${result.summary.skipped}`);
358
- lines.push(` Elapsed: ${(result.summary.elapsedMs / 1000).toFixed(1)}s`);
359
-
360
- if (result.summary.dryRun && result.summary.evaluated > 0) {
361
- lines.push("");
362
- lines.push(" Rerun without --dry-run to allow validated deployments.");
363
- } else if (result.summary.approvalMode === "review" && result.summary.evaluated > 0) {
364
- lines.push("");
365
- lines.push(" Rerun without --review-required to allow validated deployments.");
366
- }
367
-
368
- return lines.join("\n");
369
- }
370
-
371
- /** Candidate selection criteria. */
372
- const CANDIDATE_STATUSES = new Set(["CRITICAL", "WARNING", "UNGRADED"]);
373
-
374
- /** Minimum skill_checks before autonomous evolution is allowed. */
375
- export const MIN_CANDIDATE_EVIDENCE = 3;
376
-
377
- /** Default cooldown hours after a deploy before re-evolving the same skill. */
378
- export const DEFAULT_COOLDOWN_HOURS = 24;
379
-
380
137
  type AutonomousEvolveDefaults = Pick<
381
138
  EvolveOptions,
382
139
  | "paretoEnabled"
@@ -404,15 +161,6 @@ const AUTONOMOUS_EVOLVE_DEFAULTS: AutonomousEvolveDefaults = {
404
161
  proposalModel: "haiku",
405
162
  };
406
163
 
407
- function candidatePriority(skill: SkillStatus, signalCount = 0): number {
408
- const statusWeight = skill.status === "CRITICAL" ? 300 : skill.status === "WARNING" ? 200 : 100;
409
- const missedWeight = Math.min(skill.missedQueries, 50);
410
- const passPenalty = skill.passRate === null ? 0 : Math.round((1 - skill.passRate) * 100);
411
- const trendBoost = skill.trend === "down" ? 30 : 0;
412
- const signalBoost = Math.min(signalCount * 150, 450);
413
- return statusWeight + missedWeight + passPenalty + trendBoost + signalBoost;
414
- }
415
-
416
164
  /**
417
165
  * Injectable dependencies for orchestrate(). Pass overrides in tests.
418
166
  */
@@ -421,7 +169,7 @@ export interface OrchestrateDeps {
421
169
  computeStatus?: typeof computeStatus;
422
170
  evolve?: typeof import("./evolution/evolve.js").evolve;
423
171
  watch?: typeof import("./monitoring/watch.js").watch;
424
- detectAgent?: typeof detectAgent;
172
+ detectAgent?: typeof detectLlmAgent;
425
173
  doctor?: typeof doctor;
426
174
  readTelemetry?: () => SessionTelemetryRecord[];
427
175
  readSkillRecords?: () => SkillUsageRecord[];
@@ -431,341 +179,9 @@ export interface OrchestrateDeps {
431
179
  readGradingResults?: (skillName: string) => ReturnType<typeof readGradingResultsForSkill>;
432
180
  readSignals?: () => ImprovementSignalRecord[];
433
181
  readAlphaIdentity?: () => AlphaIdentity | null;
434
- }
435
-
436
- // ---------------------------------------------------------------------------
437
- // Skill path resolution
438
- // ---------------------------------------------------------------------------
439
-
440
- function getSkillSearchDirs(): string[] {
441
- const home = homedir();
442
- const cwd = process.cwd();
443
- return [
444
- join(home, ".claude", "skills"),
445
- join(home, ".agents", "skills"),
446
- join(home, ".codex", "skills"),
447
- ...findRepositorySkillDirs(cwd),
448
- ...findRepositoryClaudeSkillDirs(cwd),
449
- ];
450
- }
451
-
452
- function defaultResolveSkillPath(skillName: string): string | undefined {
453
- return findInstalledSkillPath(skillName, getSkillSearchDirs());
454
- }
455
-
456
- // ---------------------------------------------------------------------------
457
- // Cross-skill eval set overlap detection (internal — exported for testing only)
458
- // ---------------------------------------------------------------------------
459
-
460
- /**
461
- * Detects significant overlap between the positive eval sets of evolution
462
- * candidates. When two skills share >30% of their positive queries, it
463
- * suggests a routing boundary problem. Console-only — no persistence.
464
- *
465
- * @internal Exported solely for unit testing.
466
- */
467
- export async function detectCrossSkillOverlap(
468
- candidates: Array<{ skill: string }>,
469
- skillRecords: SkillUsageRecord[],
470
- queryRecords: QueryLogRecord[],
471
- ): Promise<
472
- Array<{ skill_a: string; skill_b: string; overlap_pct: number; shared_queries: string[] }>
473
- > {
474
- if (candidates.length < 2) return [];
475
-
476
- const { buildEvalSet } = await import("./eval/hooks-to-evals.js");
477
-
478
- const evalSets = new Map<string, Set<string>>();
479
-
480
- for (const c of candidates) {
481
- const evalSet = buildEvalSet(skillRecords, queryRecords, c.skill);
482
- const positives = new Set(
483
- evalSet
484
- .filter((e: { should_trigger: boolean }) => e.should_trigger)
485
- .map((e: { query: string }) => e.query.toLowerCase()),
486
- );
487
- evalSets.set(c.skill, positives);
488
- }
489
-
490
- const overlaps: Array<{
491
- skill_a: string;
492
- skill_b: string;
493
- overlap_pct: number;
494
- shared_queries: string[];
495
- }> = [];
496
- const skillNames = [...evalSets.keys()];
497
-
498
- for (let i = 0; i < skillNames.length; i++) {
499
- for (let j = i + 1; j < skillNames.length; j++) {
500
- const setA = evalSets.get(skillNames[i]);
501
- const setB = evalSets.get(skillNames[j]);
502
- if (!setA || !setB) continue;
503
-
504
- if (setA.size === 0 || setB.size === 0) continue;
505
-
506
- const shared: string[] = [];
507
- for (const q of setA) {
508
- if (setB.has(q)) shared.push(q);
509
- }
510
-
511
- const overlapPct = shared.length / Math.min(setA.size, setB.size);
512
-
513
- if (overlapPct > 0.3) {
514
- overlaps.push({
515
- skill_a: skillNames[i],
516
- skill_b: skillNames[j],
517
- overlap_pct: overlapPct,
518
- shared_queries: shared.slice(0, 10),
519
- });
520
- }
521
- }
522
- }
523
-
524
- return overlaps;
525
- }
526
-
527
- // ---------------------------------------------------------------------------
528
- // Candidate selection
529
- // ---------------------------------------------------------------------------
530
-
531
- /** Context for candidate selection beyond simple status checks. */
532
- export interface CandidateContext {
533
- skillFilter?: string;
534
- maxSkills: number;
535
- auditEntries?: EvolutionAuditEntry[];
536
- /** Hours since last deploy before a skill can be re-evolved. */
537
- cooldownHours?: number;
538
- /** Skill name (lowercase) to improvement signal count. */
539
- signaledSkills?: Map<string, number>;
540
- }
541
-
542
- export function selectCandidates(skills: SkillStatus[], options: CandidateContext): SkillAction[] {
543
- const actions: SkillAction[] = [];
544
- const orderedSkills = [...skills].sort((a, b) => {
545
- const aSignals = options.signaledSkills?.get(a.name.toLowerCase()) ?? 0;
546
- const bSignals = options.signaledSkills?.get(b.name.toLowerCase()) ?? 0;
547
- return candidatePriority(b, bSignals) - candidatePriority(a, aSignals);
548
- });
549
-
550
- const cooldownHours = options.cooldownHours ?? DEFAULT_COOLDOWN_HOURS;
551
- const recentlyDeployed = findRecentlyDeployedSkills(options.auditEntries ?? [], cooldownHours);
552
-
553
- for (const skill of orderedSkills) {
554
- const signalCount = options.signaledSkills?.get(skill.name.toLowerCase()) ?? 0;
555
-
556
- // Apply skill filter
557
- if (options.skillFilter && skill.name !== options.skillFilter) {
558
- actions.push({
559
- skill: skill.name,
560
- action: "skip",
561
- reason: `filtered out (--skill ${options.skillFilter})`,
562
- });
563
- continue;
564
- }
565
-
566
- // Check if skill is a candidate
567
- if (!CANDIDATE_STATUSES.has(skill.status)) {
568
- actions.push({
569
- skill: skill.name,
570
- action: "skip",
571
- reason: `status=${skill.status} — no action needed`,
572
- });
573
- continue;
574
- }
575
-
576
- // Gate: cooldown — skip if this skill was deployed recently
577
- if (recentlyDeployed.has(skill.name)) {
578
- actions.push({
579
- skill: skill.name,
580
- action: "skip",
581
- reason: `recently evolved (cooldown ${cooldownHours}h) — let it bake`,
582
- });
583
- continue;
584
- }
585
-
586
- // Gate: insufficient evidence — need enough data points for autonomous action
587
- // Bypass if there are improvement signals for this skill
588
- const skillChecks = skill.snapshot?.skill_checks ?? 0;
589
- if (skillChecks < MIN_CANDIDATE_EVIDENCE && skill.status !== "UNGRADED" && signalCount === 0) {
590
- actions.push({
591
- skill: skill.name,
592
- action: "skip",
593
- reason: `insufficient evidence (${skillChecks}/${MIN_CANDIDATE_EVIDENCE} checks) — need more data`,
594
- });
595
- continue;
596
- }
597
-
598
- // UNGRADED: only evolve if there are missed queries (some signal)
599
- // Bypass if there are improvement signals for this skill
600
- if (skill.status === "UNGRADED" && skill.missedQueries === 0 && signalCount === 0) {
601
- actions.push({
602
- skill: skill.name,
603
- action: "skip",
604
- reason: "UNGRADED with 0 missed queries — insufficient signal",
605
- });
606
- continue;
607
- }
608
-
609
- // Gate: weak WARNING signal — skip if no missed queries and trend isn't declining
610
- if (skill.status === "WARNING" && skill.missedQueries === 0 && skill.trend !== "down") {
611
- actions.push({
612
- skill: skill.name,
613
- action: "skip",
614
- reason: `WARNING but no missed queries and trend=${skill.trend} — weak signal`,
615
- });
616
- continue;
617
- }
618
-
619
- actions.push({
620
- skill: skill.name,
621
- action: "evolve",
622
- reason: `status=${skill.status}, passRate=${skill.passRate !== null ? `${(skill.passRate * 100).toFixed(0)}%` : "—"}, missed=${skill.missedQueries}, trend=${skill.trend}`,
623
- });
624
- }
625
-
626
- // Apply max-skills cap to evolve candidates only
627
- let evolveCount = 0;
628
- for (const action of actions) {
629
- if (action.action === "evolve") {
630
- evolveCount++;
631
- if (evolveCount > options.maxSkills) {
632
- action.action = "skip";
633
- action.reason = `capped by --max-skills ${options.maxSkills}`;
634
- }
635
- }
636
- }
637
-
638
- return actions;
639
- }
640
-
641
- /**
642
- * Find skills deployed within the given window.
643
- * Used for both cooldown gating (don't re-evolve) and watch targeting
644
- * (monitor recently deployed skills for regressions).
645
- */
646
- function findRecentlyDeployedSkills(
647
- auditEntries: EvolutionAuditEntry[],
648
- windowHours: number,
649
- ): Set<string> {
650
- const cutoffMs = Date.now() - windowHours * 60 * 60 * 1000;
651
- const names = new Set<string>();
652
- for (const entry of auditEntries) {
653
- const deployedAtMs = Date.parse(entry.timestamp);
654
- if (
655
- entry.action === "deployed" &&
656
- entry.skill_name &&
657
- Number.isFinite(deployedAtMs) &&
658
- deployedAtMs >= cutoffMs
659
- ) {
660
- names.add(entry.skill_name);
661
- }
662
- }
663
- return names;
664
- }
665
-
666
- // ---------------------------------------------------------------------------
667
- // Auto-grade ungraded skills
668
- // ---------------------------------------------------------------------------
669
-
670
- /**
671
- * Auto-grade the top ungraded skills that have some session data.
672
- * Fail-open: individual grading errors are logged but never propagated.
673
- *
674
- * @returns Number of skills successfully graded.
675
- */
676
- export async function autoGradeTopUngraded(
677
- skills: SkillStatus[],
678
- maxAutoGrade: number,
679
- agent: string,
680
- deps: {
681
- readTelemetry: () => SessionTelemetryRecord[];
682
- readSkillRecords: () => SkillUsageRecord[];
683
- },
684
- ): Promise<number> {
685
- // Filter: UNGRADED skills with some data (skill_checks > 0)
686
- const ungradedWithData = skills
687
- .filter((s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0)
688
- .sort((a, b) => (b.snapshot?.skill_checks ?? 0) - (a.snapshot?.skill_checks ?? 0))
689
- .slice(0, maxAutoGrade);
690
-
691
- if (ungradedWithData.length === 0) return 0;
692
-
693
- let graded = 0;
694
-
695
- for (const skill of ungradedWithData) {
696
- try {
697
- const telemetry = deps.readTelemetry();
698
- const skillUsage = deps.readSkillRecords();
699
-
700
- // Resolve the latest session for this skill
701
- const resolved = resolveLatestSessionForSkill(telemetry, skillUsage, skill.name);
702
- if (!resolved) {
703
- console.error(` [auto-grade] ${skill.name}: no session found, skipping`);
704
- continue;
705
- }
706
-
707
- // Derive expectations from SKILL.md
708
- const derived = deriveExpectationsFromSkill(skill.name);
709
- let transcriptExcerpt = "(no transcript)";
710
- if (resolved.transcriptPath) {
711
- try {
712
- transcriptExcerpt = readExcerpt(resolved.transcriptPath);
713
- } catch {
714
- transcriptExcerpt = "(no transcript)";
715
- }
716
- }
717
-
718
- console.error(` [auto-grade] Grading "${skill.name}" (session ${resolved.sessionId})...`);
719
-
720
- const result = await gradeSession({
721
- expectations: derived.expectations,
722
- telemetry: resolved.telemetry,
723
- sessionId: resolved.sessionId,
724
- skillName: skill.name,
725
- transcriptExcerpt,
726
- transcriptPath: resolved.transcriptPath,
727
- agent,
728
- });
729
-
730
- // Persist to SQLite — only count as graded if DB write succeeds
731
- let persisted = false;
732
- try {
733
- persisted = writeGradingResultToDb(result);
734
- } catch {
735
- persisted = false;
736
- }
737
- if (!persisted) {
738
- console.error(` [auto-grade] ${skill.name}: graded but failed to persist result`);
739
- continue;
740
- }
741
-
742
- // Persist to file (fail-open, supplementary)
743
- try {
744
- const basePath = buildDefaultGradingOutputPath(resolved.sessionId);
745
- const safeName = skill.name.replace(/[^a-zA-Z0-9_-]/g, "_");
746
- const outputPath = basePath.replace(/\.json$/, `_${safeName}.json`);
747
- const outputDir = dirname(outputPath);
748
- mkdirSync(outputDir, { recursive: true });
749
- writeFileSync(outputPath, JSON.stringify(result, null, 2), "utf-8");
750
- } catch {
751
- // fail-open: DB is authoritative, file is supplementary
752
- }
753
-
754
- const passRate = result.summary.pass_rate;
755
- console.error(
756
- ` [auto-grade] ${skill.name}: ${result.summary.passed}/${result.summary.total} passed (${Math.round(passRate * 100)}%)`,
757
- );
758
- graded++;
759
- } catch (err) {
760
- const msg = err instanceof Error ? err.message : String(err);
761
- console.error(
762
- ` [auto-grade] ${skill.name}: error — ${msg}. Retry with: selftune grade ${skill.name}`,
763
- );
764
- // fail-open: continue to next skill
765
- }
766
- }
767
-
768
- return graded;
182
+ discoverWorkflowSkillProposals?: typeof discoverWorkflowSkillProposals;
183
+ persistWorkflowSkillProposal?: typeof persistWorkflowSkillProposal;
184
+ buildReplayOptions?: typeof buildReplayValidationOptions;
769
185
  }
770
186
 
771
187
  // ---------------------------------------------------------------------------
@@ -776,6 +192,8 @@ export async function orchestrate(
776
192
  options: OrchestrateOptions,
777
193
  deps: OrchestrateDeps = {},
778
194
  ): Promise<OrchestrateResult> {
195
+ const startTime = Date.now();
196
+
779
197
  if (!acquireLock()) {
780
198
  // Another orchestrate run is in progress
781
199
  console.error("[orchestrate] Another run is in progress (lock held). Exiting.");
@@ -788,6 +206,7 @@ export async function orchestrate(
788
206
  codex: { available: false, scanned: 0, synced: 0, skipped: 0 },
789
207
  opencode: { available: false, scanned: 0, synced: 0, skipped: 0 },
790
208
  openclaw: { available: false, scanned: 0, synced: 0, skipped: 0 },
209
+ pi: { available: false, scanned: 0, synced: 0, skipped: 0 },
791
210
  },
792
211
  repair: {
793
212
  ran: false,
@@ -795,6 +214,12 @@ export async function orchestrate(
795
214
  repaired_records: 0,
796
215
  codex_repaired_records: 0,
797
216
  },
217
+ creator_contributions: {
218
+ ran: false,
219
+ eligible_skills: 0,
220
+ built_signals: 0,
221
+ staged_signals: 0,
222
+ },
798
223
  timings: [],
799
224
  total_elapsed_ms: 0,
800
225
  },
@@ -806,6 +231,7 @@ export async function orchestrate(
806
231
  system: { healthy: true, pass: 0, fail: 0, warn: 0 },
807
232
  },
808
233
  candidates: [],
234
+ workflowProposals: [],
809
235
  summary: {
810
236
  totalSkills: 0,
811
237
  evaluated: 0,
@@ -814,6 +240,7 @@ export async function orchestrate(
814
240
  watched: 0,
815
241
  skipped: 0,
816
242
  autoGraded: 0,
243
+ freshlyWatchedSkills: [],
817
244
  dryRun: options.dryRun,
818
245
  approvalMode: options.approvalMode,
819
246
  elapsedMs: 0,
@@ -822,395 +249,120 @@ export async function orchestrate(
822
249
  }
823
250
 
824
251
  try {
825
- const startTime = Date.now();
826
-
827
- const _syncSources = deps.syncSources ?? syncSources;
828
- const _computeStatus = deps.computeStatus ?? computeStatus;
829
- const _detectAgent = deps.detectAgent ?? detectAgent;
830
- const _doctor = deps.doctor ?? doctor;
831
- const _readTelemetry =
832
- deps.readTelemetry ??
833
- (() => {
834
- const db = getDb();
835
- return querySessionTelemetry(db) as SessionTelemetryRecord[];
836
- });
837
- const _readSkillRecords =
838
- deps.readSkillRecords ??
839
- (() => {
840
- const db = getDb();
841
- return querySkillUsageRecords(db) as SkillUsageRecord[];
842
- });
843
- const _readQueryRecords =
844
- deps.readQueryRecords ??
845
- (() => {
846
- const db = getDb();
847
- return queryQueryLog(db) as QueryLogRecord[];
848
- });
849
- const _readAuditEntries =
850
- deps.readAuditEntries ??
851
- (() => {
852
- const db = getDb();
853
- return queryEvolutionAudit(db) as EvolutionAuditEntry[];
854
- });
855
- const _resolveSkillPath = deps.resolveSkillPath ?? defaultResolveSkillPath;
856
- const _readGradingResults = deps.readGradingResults ?? readGradingResultsForSkill;
857
- const _readAlphaIdentity =
858
- deps.readAlphaIdentity ?? (() => readAlphaIdentity(SELFTUNE_CONFIG_PATH));
859
-
860
- // Lazy-load evolve and watch to avoid circular imports
861
- const _evolve = deps.evolve ?? (await import("./evolution/evolve.js")).evolve;
862
- const _watch = deps.watch ?? (await import("./monitoring/watch.js")).watch;
863
-
864
- // -------------------------------------------------------------------------
865
- // Step 1: Sync source-truth telemetry (mandatory)
866
- // -------------------------------------------------------------------------
867
- console.error("[orchestrate] Syncing source-truth telemetry...");
868
- const syncResult = _syncSources(createDefaultSyncOptions({ force: options.syncForce }));
869
- const sourceSynced = Object.values(syncResult.sources).reduce((sum, s) => sum + s.synced, 0);
870
- console.error(
871
- `[orchestrate] Sync complete: ${sourceSynced} sessions synced, ${syncResult.repair.repaired_records} repaired`,
872
- );
873
-
874
- // -------------------------------------------------------------------------
875
- // Step 2: Compute status
876
- // -------------------------------------------------------------------------
877
- console.error("[orchestrate] Computing skill status...");
878
- const telemetry = _readTelemetry();
879
- const skillRecords = _readSkillRecords();
880
- const queryRecords = _readQueryRecords();
881
- const auditEntries = _readAuditEntries();
882
- const doctorResult = await _doctor();
883
-
884
- let statusResult = _computeStatus(
252
+ const runtime = await resolveOrchestrateRuntime(deps);
253
+ const {
254
+ syncResult,
255
+ statusResult,
885
256
  telemetry,
886
257
  skillRecords,
887
- queryRecords,
888
- auditEntries,
889
- doctorResult,
890
- );
891
- console.error(
892
- `[orchestrate] Status: ${statusResult.skills.length} skills, system=${statusResult.system.healthy ? "healthy" : "unhealthy"}`,
893
- );
258
+ pendingSignals,
259
+ candidates,
260
+ evolveCandidates,
261
+ agent,
262
+ autoGradedCount,
263
+ } = await prepareOrchestrateRun(options, runtime);
894
264
 
895
265
  // -------------------------------------------------------------------------
896
- // Step 2a: Auto-grade ungraded skills with sufficient data
266
+ // Step 5: Evolve candidates
897
267
  // -------------------------------------------------------------------------
898
- let autoGradedCount = 0;
899
- const scopedSkills = options.skillFilter
900
- ? statusResult.skills.filter((s) => s.name === options.skillFilter)
901
- : statusResult.skills;
902
- const ungradedWithData = scopedSkills.filter(
903
- (s) => s.status === "UNGRADED" && (s.snapshot?.skill_checks ?? 0) > 0,
904
- );
905
-
906
- if (!options.dryRun && options.maxAutoGrade > 0 && ungradedWithData.length > 0) {
907
- const gradeAgent = _detectAgent();
908
- if (gradeAgent) {
909
- console.error(
910
- `[orchestrate] Auto-grading ${Math.min(ungradedWithData.length, options.maxAutoGrade)} ungraded skill(s)...`,
911
- );
912
- autoGradedCount = await autoGradeTopUngraded(
913
- scopedSkills,
914
- options.maxAutoGrade,
915
- gradeAgent,
916
- { readTelemetry: _readTelemetry, readSkillRecords: _readSkillRecords },
917
- );
918
-
919
- if (autoGradedCount > 0) {
920
- // Recompute status so candidate selection sees updated grades
921
- console.error(
922
- `[orchestrate] Recomputing status after grading ${autoGradedCount} skill(s)...`,
923
- );
924
- try {
925
- const freshTelemetry = _readTelemetry();
926
- const freshSkillRecords = _readSkillRecords();
927
- const freshQueryRecords = _readQueryRecords();
928
- const freshAudit = _readAuditEntries();
929
- const freshDoctor = doctorResult; // reuse — environment unchanged during grading
930
- statusResult = _computeStatus(
931
- freshTelemetry,
932
- freshSkillRecords,
933
- freshQueryRecords,
934
- freshAudit,
935
- freshDoctor,
936
- );
937
- } catch (recomputeErr) {
938
- console.error(
939
- `[orchestrate] Warning: failed to recompute status after grading — using pre-grade status. ${recomputeErr instanceof Error ? recomputeErr.message : String(recomputeErr)}`,
940
- );
941
- }
942
- }
943
- } else {
944
- console.error(
945
- "[orchestrate] No agent CLI found — skipping auto-grade. To disable, rerun with: selftune orchestrate --max-auto-grade 0",
946
- );
947
- }
948
- }
268
+ const freshlyDeployedInThisRun = await runEvolutionPhase({
269
+ evolveCandidates,
270
+ agent,
271
+ options,
272
+ resolveSkillPath: runtime.resolveSkillPath,
273
+ readGradingResults: runtime.readGradingResults,
274
+ evolve: runtime.evolve,
275
+ buildReplayOptions: runtime.buildReplayOptions,
276
+ evolveDefaults: AUTONOMOUS_EVOLVE_DEFAULTS,
277
+ });
949
278
 
950
279
  // -------------------------------------------------------------------------
951
- // Step 2b: Read pending improvement signals
280
+ // Step 5b: Auto-grade & write baselines for freshly deployed skills
952
281
  // -------------------------------------------------------------------------
953
- const pendingSignals = readPendingSignals(deps.readSignals);
954
- const signaledSkills = groupSignalsBySkill(pendingSignals);
955
- if (signaledSkills.size > 0) {
956
- console.error(
957
- `[orchestrate] Improvement signals: ${pendingSignals.length} pending for ${signaledSkills.size} skill(s)`,
958
- );
959
- }
282
+ await autoGradeFreshDeploys({
283
+ freshlyDeployedCandidates: freshlyDeployedInThisRun,
284
+ dryRun: options.dryRun,
285
+ agent,
286
+ detectAgent: runtime.detectAgent,
287
+ readTelemetry: runtime.readTelemetry,
288
+ readSkillRecords: runtime.readSkillRecords,
289
+ });
960
290
 
961
291
  // -------------------------------------------------------------------------
962
- // Step 3: Select candidates
292
+ // Step 6: Watch recently evolved skills (including freshly deployed in this run)
963
293
  // -------------------------------------------------------------------------
964
- const candidates = selectCandidates(statusResult.skills, {
294
+ const { freshAuditEntries, freshlyWatchedSkills } = await watchRecentDeploys({
295
+ candidates,
296
+ freshlyDeployedCandidates: freshlyDeployedInThisRun,
965
297
  skillFilter: options.skillFilter,
966
- maxSkills: options.maxSkills,
967
- auditEntries,
968
- signaledSkills,
298
+ recentWindowHours: options.recentWindowHours,
299
+ readAuditEntries: runtime.readAuditEntries,
300
+ resolveSkillPath: runtime.resolveSkillPath,
301
+ watch: runtime.watch,
969
302
  });
970
303
 
971
- const evolveCandidates = candidates.filter((c) => c.action === "evolve");
972
- const skipCount = candidates.filter((c) => c.action === "skip").length;
973
- console.error(
974
- `[orchestrate] Candidates: ${evolveCandidates.length} to evolve, ${skipCount} skipped`,
975
- );
976
-
977
- // Log each decision
978
- for (const c of candidates) {
979
- console.error(` ${c.action === "skip" ? "⊘" : "→"} ${c.skill}: ${c.reason}`);
980
- }
981
-
982
- // Cross-skill overlap detection (console-only, non-critical)
983
- if (evolveCandidates.length >= 2) {
984
- try {
985
- const overlap = await detectCrossSkillOverlap(evolveCandidates, skillRecords, queryRecords);
986
- if (overlap.length > 0) {
987
- console.error("\n[orchestrate] Cross-skill eval overlap detected:");
988
- for (const o of overlap) {
989
- console.error(
990
- ` ⚠ ${o.skill_a} ↔ ${o.skill_b}: ${(o.overlap_pct * 100).toFixed(0)}% shared queries (${o.shared_queries.length} queries)`,
991
- );
992
- }
993
- console.error("");
994
- }
995
- } catch {
996
- // fail-open: overlap detection is non-critical
997
- }
998
- }
999
-
1000
- // -------------------------------------------------------------------------
1001
- // Step 4: Detect agent
1002
- // -------------------------------------------------------------------------
1003
- const agent = _detectAgent();
1004
- if (!agent && evolveCandidates.length > 0) {
1005
- console.error("[orchestrate] WARNING: No agent CLI found in PATH. Evolve will be skipped.");
1006
- for (const c of evolveCandidates) {
1007
- c.action = "skip";
1008
- c.reason = "no agent CLI available";
1009
- }
1010
- }
1011
-
1012
304
  // -------------------------------------------------------------------------
1013
- // Step 5: Evolve candidates
305
+ // Step 6b: Generate workflow-skill proposals from strong telemetry patterns
1014
306
  // -------------------------------------------------------------------------
1015
- for (const candidate of evolveCandidates) {
1016
- // Skip if agent detection marked this candidate as skip
1017
- if (candidate.action === "skip") continue;
1018
-
1019
- const skillPath = _resolveSkillPath(candidate.skill);
1020
- if (!skillPath) {
1021
- candidate.action = "skip";
1022
- candidate.reason = `SKILL.md not found for "${candidate.skill}"`;
1023
- console.error(` ⊘ ${candidate.skill}: ${candidate.reason}`);
1024
- continue;
1025
- }
307
+ const workflowProposals = runtime.discoverWorkflowSkillProposals(telemetry, skillRecords, {
308
+ cwd: process.cwd(),
309
+ skillFilter: options.skillFilter,
310
+ resolveSkillPath: runtime.resolveSkillPath,
311
+ existingAuditEntries: freshAuditEntries,
312
+ });
1026
313
 
1027
- const effectiveDryRun = options.dryRun || options.approvalMode === "review";
314
+ if (workflowProposals.length > 0) {
1028
315
  console.error(
1029
- `[orchestrate] Evolving "${candidate.skill}"${effectiveDryRun ? " (dry-run)" : ""}...`,
316
+ `[orchestrate] Workflow skill proposals: ${workflowProposals.length}${options.dryRun ? " (dry-run)" : ""}`,
1030
317
  );
1031
-
1032
- try {
1033
- const evolveResult = await _evolve({
1034
- skillName: candidate.skill,
1035
- skillPath,
1036
- agent: agent as string,
1037
- dryRun: effectiveDryRun,
1038
- confidenceThreshold: 0.6,
1039
- maxIterations: 3,
1040
- gradingResults: _readGradingResults(candidate.skill),
1041
- syncFirst: false, // We already synced
1042
- ...AUTONOMOUS_EVOLVE_DEFAULTS,
1043
- });
1044
-
1045
- candidate.evolveResult = evolveResult;
1046
-
1047
- if (evolveResult.deployed) {
1048
- console.error(` ✓ ${candidate.skill}: deployed (${evolveResult.reason})`);
1049
- } else {
1050
- console.error(` ✗ ${candidate.skill}: not deployed (${evolveResult.reason})`);
318
+ for (const proposal of workflowProposals) {
319
+ console.error(` + ${proposal.draft.skill_name}: ${proposal.summary}`);
320
+ if (!options.dryRun) {
321
+ runtime.persistWorkflowSkillProposal(proposal, {
322
+ sourceSkillPath: runtime.resolveSkillPath(proposal.source_skill_name),
323
+ });
1051
324
  }
1052
- } catch (err) {
1053
- const msg = err instanceof Error ? err.message : String(err);
1054
- candidate.action = "skip";
1055
- candidate.reason = `evolve error: ${msg}`;
1056
- console.error(` ✗ ${candidate.skill}: error — ${msg}`);
1057
- }
1058
- }
1059
-
1060
- // -------------------------------------------------------------------------
1061
- // Step 6: Watch recently evolved skills
1062
- // -------------------------------------------------------------------------
1063
- // Re-read audit entries to capture any newly-deployed entries from the evolve loop above.
1064
- // evolve() writes audit entries synchronously, so a fresh read is needed.
1065
- const freshAuditEntries = _readAuditEntries();
1066
- const recentlyEvolved = findRecentlyDeployedSkills(
1067
- freshAuditEntries,
1068
- options.recentWindowHours,
1069
- );
1070
-
1071
- // O(1) lookup for skills already processed as evolve candidates
1072
- const evolvedSkillNames = new Set(
1073
- candidates.filter((c) => c.action === "evolve").map((c) => c.skill),
1074
- );
1075
-
1076
- for (const skillName of recentlyEvolved) {
1077
- // Skip if already processed in this run as evolve candidate
1078
- if (evolvedSkillNames.has(skillName)) {
1079
- continue;
1080
- }
1081
-
1082
- // Apply skill filter
1083
- if (options.skillFilter && skillName !== options.skillFilter) continue;
1084
-
1085
- const skillPath = _resolveSkillPath(skillName);
1086
- if (!skillPath) continue;
1087
-
1088
- console.error(`[orchestrate] Watching "${skillName}" (recently evolved)...`);
1089
-
1090
- try {
1091
- const watchResult = await _watch({
1092
- skillName,
1093
- skillPath,
1094
- windowSessions: 20,
1095
- regressionThreshold: 0.1,
1096
- autoRollback: true,
1097
- syncFirst: false,
1098
- });
1099
-
1100
- candidates.push({
1101
- skill: skillName,
1102
- action: "watch",
1103
- reason: watchResult.alert ?? "stable",
1104
- watchResult,
1105
- });
1106
-
1107
- console.error(
1108
- ` ${watchResult.alert ? "⚠" : "✓"} ${skillName}: ${watchResult.recommendation}`,
1109
- );
1110
- } catch (err) {
1111
- const msg = err instanceof Error ? err.message : String(err);
1112
- console.error(` ✗ ${skillName}: watch error — ${msg}`);
1113
325
  }
1114
326
  }
1115
327
 
1116
328
  // -------------------------------------------------------------------------
1117
329
  // Step 7: Build summary (single source of truth for both CLI and dashboard)
1118
330
  // -------------------------------------------------------------------------
1119
- const finalTotals = {
1120
- totalSkills: statusResult.skills.length,
1121
- evaluated: candidates.filter((c) => c.action === "evolve").length,
1122
- evolved: candidates.filter((c) => c.action === "evolve" && c.evolveResult !== undefined)
1123
- .length,
1124
- deployed: candidates.filter((c) => c.evolveResult?.deployed).length,
1125
- watched: candidates.filter((c) => c.action === "watch").length,
1126
- skipped: candidates.filter((c) => c.action === "skip").length,
1127
- autoGraded: autoGradedCount,
1128
- };
1129
-
1130
- const result: OrchestrateResult = {
331
+ const result = finalizeOrchestrateRun({
1131
332
  syncResult,
1132
333
  statusResult,
1133
334
  candidates,
1134
- summary: {
1135
- ...finalTotals,
1136
- dryRun: options.dryRun,
1137
- approvalMode: options.approvalMode,
1138
- elapsedMs: Date.now() - startTime,
1139
- },
1140
- };
1141
-
1142
- // -------------------------------------------------------------------------
1143
- // Step 7b: Mark consumed signals
1144
- // -------------------------------------------------------------------------
1145
- const runId = `run_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
1146
- if (pendingSignals.length > 0) {
1147
- markSignalsConsumed(pendingSignals, runId);
1148
- }
335
+ workflowProposals,
336
+ dryRun: options.dryRun,
337
+ approvalMode: options.approvalMode,
338
+ autoGradedCount,
339
+ freshlyWatchedSkills,
340
+ pendingSignals,
341
+ elapsedMs: Date.now() - startTime,
342
+ });
1149
343
 
1150
- // -------------------------------------------------------------------------
1151
- // Step 8: Persist run report
1152
- // -------------------------------------------------------------------------
1153
- const runReport: OrchestrateRunReport = {
1154
- run_id: runId,
1155
- timestamp: new Date().toISOString(),
1156
- elapsed_ms: result.summary.elapsedMs,
1157
- dry_run: result.summary.dryRun,
1158
- approval_mode: result.summary.approvalMode,
1159
- total_skills: finalTotals.totalSkills,
1160
- evaluated: finalTotals.evaluated,
1161
- evolved: finalTotals.evolved,
1162
- deployed: finalTotals.deployed,
1163
- watched: finalTotals.watched,
1164
- skipped: finalTotals.skipped,
1165
- auto_graded: finalTotals.autoGraded,
1166
- skill_actions: candidates.map(
1167
- (c): OrchestrateRunSkillAction => ({
1168
- skill: c.skill,
1169
- action: c.action,
1170
- reason: c.reason,
1171
- deployed: c.evolveResult?.deployed,
1172
- rolledBack: c.watchResult?.rolledBack,
1173
- alert: c.watchResult?.alert,
1174
- elapsed_ms: c.evolveResult?.elapsedMs,
1175
- llm_calls: c.evolveResult?.llmCallCount,
1176
- }),
1177
- ),
1178
- };
344
+ await runPostOrchestrateSideEffects({
345
+ result,
346
+ dryRun: options.dryRun,
347
+ readAlphaIdentity: runtime.readAlphaIdentity,
348
+ });
1179
349
 
350
+ return result;
351
+ } catch (err) {
352
+ // Log failed orchestrate run to unified cron_runs timeline
353
+ const elapsedMs = Date.now() - startTime;
1180
354
  try {
1181
- writeOrchestrateRunToDb(runReport);
355
+ writeCronRunToDb(getDb(), {
356
+ jobName: "orchestrate",
357
+ startedAt: new Date(startTime).toISOString(),
358
+ elapsedMs,
359
+ status: "error",
360
+ error: err instanceof Error ? err.message : String(err),
361
+ });
1182
362
  } catch {
1183
363
  /* fail-open */
1184
364
  }
1185
-
1186
- // -------------------------------------------------------------------------
1187
- // Step 9: Alpha upload (fail-open — never blocks the orchestrate loop)
1188
- // -------------------------------------------------------------------------
1189
- const alphaIdentity = _readAlphaIdentity();
1190
- if (alphaIdentity?.enrolled) {
1191
- try {
1192
- console.error("[orchestrate] Running alpha upload cycle...");
1193
- const { runUploadCycle } = await import("./alpha-upload/index.js");
1194
- const db = getDb();
1195
- const uploadSummary = await runUploadCycle(db, {
1196
- enrolled: true,
1197
- userId: alphaIdentity.user_id,
1198
- agentType: readConfiguredAgentType(SELFTUNE_CONFIG_PATH, "unknown"),
1199
- selftuneVersion: getSelftuneVersion(),
1200
- dryRun: options.dryRun,
1201
- apiKey: alphaIdentity.api_key,
1202
- });
1203
- result.uploadSummary = uploadSummary;
1204
- console.error(
1205
- `[orchestrate] Alpha upload: prepared=${uploadSummary.prepared}, sent=${uploadSummary.sent}, failed=${uploadSummary.failed}, skipped=${uploadSummary.skipped}`,
1206
- );
1207
- } catch (err) {
1208
- const msg = err instanceof Error ? err.message : String(err);
1209
- console.error(`[orchestrate] Alpha upload failed (non-blocking): ${msg}`);
1210
- }
1211
- }
1212
-
1213
- return result;
365
+ throw err;
1214
366
  } finally {
1215
367
  releaseLock();
1216
368
  }
@@ -1221,113 +373,18 @@ export async function orchestrate(
1221
373
  // ---------------------------------------------------------------------------
1222
374
 
1223
375
  export async function cliMain(): Promise<void> {
1224
- const { values } = parseArgs({
1225
- options: {
1226
- "dry-run": { type: "boolean", default: false },
1227
- "review-required": { type: "boolean", default: false },
1228
- "auto-approve": { type: "boolean", default: false },
1229
- skill: { type: "string" },
1230
- "max-skills": { type: "string", default: "5" },
1231
- "recent-window": { type: "string", default: "48" },
1232
- "sync-force": { type: "boolean", default: false },
1233
- "max-auto-grade": { type: "string", default: "5" },
1234
- loop: { type: "boolean", default: false },
1235
- "loop-interval": { type: "string", default: "3600" },
1236
- help: { type: "boolean", short: "h", default: false },
1237
- },
1238
- strict: true,
1239
- });
1240
-
1241
- if (values.help) {
1242
- console.log(`selftune orchestrate — Autonomous core loop
1243
-
1244
- Runs the full improvement cycle: sync → status → auto-grade → evolve → watch.
1245
-
1246
- Usage:
1247
- selftune orchestrate [options]
1248
-
1249
- Options:
1250
- --dry-run Preview actions without mutations
1251
- --review-required Validate candidates but require human review before deploy
1252
- --auto-approve Deprecated alias; autonomous mode is now the default
1253
- --skill <name> Scope to a single skill
1254
- --max-skills <n> Cap skills processed per run (default: 5)
1255
- --recent-window <hrs> Hours to look back for watch targets (default: 48)
1256
- --sync-force Force full rescan during sync
1257
- --max-auto-grade <n> Max ungraded skills to auto-grade per run (default: 5, 0 to disable)
1258
- --loop Run in continuous loop mode (never stops)
1259
- --loop-interval <s> Seconds between iterations (default: 3600, min: 60)
1260
- -h, --help Show this help message
1261
-
1262
- Safety:
1263
- By default, low-risk description evolution runs autonomously after
1264
- validation. Use --review-required to keep a human in the loop, or
1265
- --dry-run to preview the whole loop without mutations. Every deploy
1266
- still passes validation gates first.
376
+ const cli = parseOrchestrateCliArgs();
1267
377
 
1268
- Examples:
1269
- selftune orchestrate # autonomous description evolution
1270
- selftune orchestrate --review-required # validate but do not deploy
1271
- selftune orchestrate --dry-run # preview only
1272
- selftune orchestrate --skill Research # single skill
1273
- selftune orchestrate --max-skills 3 # limit scope
1274
- selftune orchestrate --loop # continuous loop (hourly)
1275
- selftune orchestrate --loop --loop-interval 600 # every 10 minutes`);
378
+ if (cli.showHelp) {
379
+ console.log(renderOrchestrateHelp());
1276
380
  process.exit(0);
1277
381
  }
1278
382
 
1279
- const maxSkillsRaw = values["max-skills"] ?? "5";
1280
- if (!/^\d+$/.test(maxSkillsRaw) || Number(maxSkillsRaw) < 1) {
1281
- throw new CLIError(
1282
- "--max-skills must be a positive integer",
1283
- "INVALID_FLAG",
1284
- "selftune orchestrate --max-skills 5",
1285
- );
383
+ for (const warning of cli.warnings) {
384
+ console.error(warning);
1286
385
  }
1287
- const maxSkills = Number(maxSkillsRaw);
1288
386
 
1289
- const recentWindowRaw = values["recent-window"] ?? "48";
1290
- if (!/^\d+$/.test(recentWindowRaw) || Number(recentWindowRaw) < 1) {
1291
- throw new CLIError(
1292
- "--recent-window must be a positive integer",
1293
- "INVALID_FLAG",
1294
- "selftune orchestrate --recent-window 48",
1295
- );
1296
- }
1297
- const recentWindow = Number(recentWindowRaw);
1298
-
1299
- const maxAutoGradeRaw = values["max-auto-grade"] ?? "5";
1300
- if (!/^\d+$/.test(maxAutoGradeRaw)) {
1301
- throw new CLIError(
1302
- "--max-auto-grade must be a non-negative integer",
1303
- "INVALID_FLAG",
1304
- "selftune orchestrate --max-auto-grade 5",
1305
- );
1306
- }
1307
- const maxAutoGrade = Number(maxAutoGradeRaw);
1308
-
1309
- const loopIntervalRaw = values["loop-interval"] ?? "3600";
1310
- if (!/^\d+$/.test(loopIntervalRaw) || (values.loop && Number(loopIntervalRaw) < 60)) {
1311
- throw new CLIError(
1312
- "--loop-interval must be an integer >= 60 (seconds)",
1313
- "INVALID_FLAG",
1314
- "selftune orchestrate --loop --loop-interval 3600",
1315
- );
1316
- }
1317
- const loopInterval = Number(loopIntervalRaw);
1318
-
1319
- const autoApprove = values["auto-approve"] ?? false;
1320
- if (autoApprove) {
1321
- console.error(
1322
- "[orchestrate] --auto-approve is deprecated; autonomous mode is now the default.",
1323
- );
1324
- }
1325
-
1326
- const reviewRequired = values["review-required"] ?? false;
1327
- const dryRun = values["dry-run"] ?? false;
1328
- const approvalMode: "auto" | "review" = reviewRequired ? "review" : "auto";
1329
-
1330
- const isLoop = values.loop ?? false;
387
+ const isLoop = cli.loop;
1331
388
  let stopRequested = false;
1332
389
  let sleepTimer: ReturnType<typeof setTimeout> | null = null;
1333
390
  let sleepResolve: (() => void) | null = null;
@@ -1357,54 +414,17 @@ Examples:
1357
414
  }
1358
415
 
1359
416
  const result = await orchestrate({
1360
- dryRun,
1361
- approvalMode,
1362
- skillFilter: values.skill,
1363
- maxSkills,
1364
- recentWindowHours: recentWindow,
1365
- syncForce: values["sync-force"] ?? false,
1366
- maxAutoGrade,
417
+ ...cli.runOptions,
1367
418
  });
1368
419
 
1369
- // JSON output: include per-skill decisions for machine consumption
1370
- const jsonOutput = {
1371
- ...result.summary,
1372
- ...(result.uploadSummary ? { upload: result.uploadSummary } : {}),
1373
- decisions: result.candidates.map((c) => ({
1374
- skill: c.skill,
1375
- action: c.action,
1376
- reason: c.reason,
1377
- ...(c.evolveResult
1378
- ? {
1379
- deployed: c.evolveResult.deployed,
1380
- evolveReason: c.evolveResult.reason,
1381
- validation: c.evolveResult.validation
1382
- ? {
1383
- before: c.evolveResult.validation.before_pass_rate,
1384
- after: c.evolveResult.validation.after_pass_rate,
1385
- improved: c.evolveResult.validation.improved,
1386
- }
1387
- : null,
1388
- }
1389
- : {}),
1390
- ...(c.watchResult
1391
- ? {
1392
- alert: c.watchResult.alert,
1393
- rolledBack: c.watchResult.rolledBack,
1394
- passRate: c.watchResult.snapshot?.pass_rate ?? null,
1395
- recommendation: c.watchResult.recommendation,
1396
- }
1397
- : {}),
1398
- })),
1399
- };
1400
- console.log(JSON.stringify(jsonOutput, null, 2));
420
+ console.log(JSON.stringify(buildOrchestrateJsonOutput(result), null, 2));
1401
421
 
1402
422
  // Print human-readable decision report to stderr
1403
423
  console.error(`\n${formatOrchestrateReport(result)}`);
1404
424
 
1405
425
  if (!isLoop || stopRequested) break;
1406
426
 
1407
- const nextMinutes = Math.round(loopInterval / 60);
427
+ const nextMinutes = Math.round(cli.loopIntervalSeconds / 60);
1408
428
  console.error(`\n[orchestrate] Next cycle in ${nextMinutes} minute(s)... (Ctrl+C to stop)`);
1409
429
  await new Promise<void>((resolve) => {
1410
430
  sleepResolve = resolve;
@@ -1412,7 +432,7 @@ Examples:
1412
432
  sleepTimer = null;
1413
433
  sleepResolve = null;
1414
434
  resolve();
1415
- }, loopInterval * 1000);
435
+ }, cli.loopIntervalSeconds * 1000);
1416
436
  });
1417
437
  } while (isLoop && !stopRequested);
1418
438