selftune 0.2.22 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +95 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/adapters/pi/hook.ts +273 -0
  12. package/cli/selftune/adapters/pi/install.ts +207 -0
  13. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  15. package/cli/selftune/auto-update.ts +200 -8
  16. package/cli/selftune/canonical-export.ts +55 -25
  17. package/cli/selftune/command-surface.ts +397 -0
  18. package/cli/selftune/constants.ts +10 -1
  19. package/cli/selftune/contribute/contribute.ts +64 -13
  20. package/cli/selftune/contribution-config.ts +57 -3
  21. package/cli/selftune/contribution-preferences.ts +117 -0
  22. package/cli/selftune/contribution-signals.ts +8 -4
  23. package/cli/selftune/contribution-staging.ts +13 -2
  24. package/cli/selftune/contributions.ts +55 -121
  25. package/cli/selftune/creator-contributions.ts +29 -10
  26. package/cli/selftune/cron/setup.ts +7 -3
  27. package/cli/selftune/dashboard-contract.ts +87 -0
  28. package/cli/selftune/dashboard-server.ts +168 -17
  29. package/cli/selftune/dashboard.ts +350 -17
  30. package/cli/selftune/eval/baseline.ts +21 -5
  31. package/cli/selftune/eval/execution-eval.ts +170 -0
  32. package/cli/selftune/eval/family-overlap.ts +2 -2
  33. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  34. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  35. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  36. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  37. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  38. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  39. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  40. package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
  41. package/cli/selftune/evolution/evidence.ts +2 -6
  42. package/cli/selftune/evolution/evolve-body.ts +152 -38
  43. package/cli/selftune/evolution/evolve.ts +244 -52
  44. package/cli/selftune/evolution/rollback.ts +0 -1
  45. package/cli/selftune/evolution/validate-body.ts +111 -49
  46. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  47. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  48. package/cli/selftune/evolution/validate-routing.ts +51 -108
  49. package/cli/selftune/evolution/validation-contract.ts +91 -0
  50. package/cli/selftune/grading/auto-grade.ts +11 -7
  51. package/cli/selftune/grading/grade-session.ts +10 -16
  52. package/cli/selftune/hooks/skill-eval.ts +2 -1
  53. package/cli/selftune/hooks-shared/types.ts +1 -0
  54. package/cli/selftune/index.ts +58 -15
  55. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  56. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  57. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  58. package/cli/selftune/ingestors/pi-ingest.ts +727 -0
  59. package/cli/selftune/init.ts +38 -4
  60. package/cli/selftune/localdb/direct-write.ts +120 -1
  61. package/cli/selftune/localdb/materialize.ts +6 -7
  62. package/cli/selftune/localdb/queries/cron.ts +34 -0
  63. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  64. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  65. package/cli/selftune/localdb/queries/execution.ts +133 -0
  66. package/cli/selftune/localdb/queries/json.ts +18 -0
  67. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  68. package/cli/selftune/localdb/queries/raw.ts +95 -0
  69. package/cli/selftune/localdb/queries/staging.ts +270 -0
  70. package/cli/selftune/localdb/queries/trust.ts +392 -0
  71. package/cli/selftune/localdb/queries.ts +60 -2162
  72. package/cli/selftune/localdb/schema.ts +59 -0
  73. package/cli/selftune/monitoring/watch.ts +96 -29
  74. package/cli/selftune/normalization.ts +3 -0
  75. package/cli/selftune/observability.ts +12 -3
  76. package/cli/selftune/orchestrate/cli.ts +161 -0
  77. package/cli/selftune/orchestrate/execute.ts +295 -0
  78. package/cli/selftune/orchestrate/finalize.ts +157 -0
  79. package/cli/selftune/orchestrate/locks.ts +40 -0
  80. package/cli/selftune/orchestrate/plan.ts +131 -0
  81. package/cli/selftune/orchestrate/post-run.ts +59 -0
  82. package/cli/selftune/orchestrate/prepare.ts +334 -0
  83. package/cli/selftune/orchestrate/report.ts +182 -0
  84. package/cli/selftune/orchestrate/runtime.ts +120 -0
  85. package/cli/selftune/orchestrate/signals.ts +48 -0
  86. package/cli/selftune/orchestrate.ts +162 -1142
  87. package/cli/selftune/registry/client.ts +74 -0
  88. package/cli/selftune/registry/history.ts +54 -0
  89. package/cli/selftune/registry/index.ts +90 -0
  90. package/cli/selftune/registry/install.ts +141 -0
  91. package/cli/selftune/registry/list.ts +44 -0
  92. package/cli/selftune/registry/push.ts +171 -0
  93. package/cli/selftune/registry/rollback.ts +49 -0
  94. package/cli/selftune/registry/status.ts +62 -0
  95. package/cli/selftune/registry/sync.ts +125 -0
  96. package/cli/selftune/repair/skill-usage.ts +9 -3
  97. package/cli/selftune/routes/overview.ts +5 -2
  98. package/cli/selftune/routes/skill-report.ts +15 -2
  99. package/cli/selftune/schedule.ts +5 -5
  100. package/cli/selftune/status.ts +70 -2
  101. package/cli/selftune/sync.ts +127 -23
  102. package/cli/selftune/testing-readiness.ts +597 -0
  103. package/cli/selftune/types.ts +46 -5
  104. package/cli/selftune/uninstall.ts +2 -1
  105. package/cli/selftune/utils/canonical-log.ts +1 -9
  106. package/cli/selftune/utils/cli-error.ts +9 -0
  107. package/cli/selftune/utils/jsonl.ts +1 -30
  108. package/cli/selftune/utils/llm-call.ts +126 -6
  109. package/cli/selftune/utils/skill-discovery.ts +24 -0
  110. package/cli/selftune/workflows/proposals.ts +184 -0
  111. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  112. package/cli/selftune/workflows/workflows.ts +100 -26
  113. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  114. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  115. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  116. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  117. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  118. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  119. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  120. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
  121. package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
  122. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  123. package/package.json +25 -9
  124. package/packages/dashboard-core/AGENTS.md +18 -0
  125. package/packages/dashboard-core/README.md +30 -0
  126. package/packages/dashboard-core/index.ts +3 -0
  127. package/packages/dashboard-core/package.json +39 -0
  128. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  129. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  130. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  131. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  132. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  133. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  134. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  135. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  136. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  137. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  138. package/packages/dashboard-core/src/gates/index.ts +3 -0
  139. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  140. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  141. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  142. package/packages/dashboard-core/src/host/index.ts +3 -0
  143. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  144. package/packages/dashboard-core/src/models/index.ts +4 -0
  145. package/packages/dashboard-core/src/models/overview.ts +98 -0
  146. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  147. package/packages/dashboard-core/src/models/skills.ts +34 -0
  148. package/packages/dashboard-core/src/routes/index.ts +2 -0
  149. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  150. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  151. package/packages/dashboard-core/src/routes/types.ts +39 -0
  152. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  153. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  154. package/packages/dashboard-core/src/screens/index.ts +37 -0
  155. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  156. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  157. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  158. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  159. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  160. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  161. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  162. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  163. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  164. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  165. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  166. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  167. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  168. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  169. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  170. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  171. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  172. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  173. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  174. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  175. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  176. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  177. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  178. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  179. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  180. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  181. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  182. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  183. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  184. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  185. package/packages/telemetry-contract/package.json +1 -1
  186. package/packages/telemetry-contract/src/index.ts +1 -0
  187. package/packages/telemetry-contract/src/schemas.ts +63 -5
  188. package/packages/telemetry-contract/src/types.ts +97 -7
  189. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  190. package/packages/ui/AGENTS.md +16 -0
  191. package/packages/ui/README.md +1 -1
  192. package/packages/ui/package.json +1 -1
  193. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  194. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  195. package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
  196. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  197. package/packages/ui/src/components/InfoTip.tsx +1 -2
  198. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  199. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  200. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  201. package/packages/ui/src/components/OverviewPanels.tsx +693 -0
  202. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  203. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  204. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  205. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  206. package/packages/ui/src/components/index.ts +56 -1
  207. package/packages/ui/src/components/section-cards.tsx +18 -35
  208. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  209. package/packages/ui/src/lib/constants.tsx +0 -1
  210. package/packages/ui/src/primitives/card.tsx +1 -1
  211. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  212. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  213. package/packages/ui/src/primitives/select.tsx +2 -2
  214. package/packages/ui/src/primitives/tabs.tsx +7 -6
  215. package/packages/ui/src/types.ts +182 -4
  216. package/skill/SKILL.md +130 -318
  217. package/skill/agents/diagnosis-analyst.md +3 -3
  218. package/skill/agents/evolution-reviewer.md +3 -3
  219. package/skill/agents/integration-guide.md +3 -3
  220. package/skill/agents/pattern-analyst.md +2 -2
  221. package/skill/references/cli-quick-reference.md +89 -0
  222. package/skill/references/creator-playbook.md +131 -0
  223. package/skill/references/examples.md +48 -0
  224. package/skill/references/troubleshooting.md +47 -0
  225. package/skill/references/version-history.md +1 -1
  226. package/skill/selftune.contribute.json +11 -0
  227. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  228. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  229. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  230. package/skill/workflows/CreateTestDeploy.md +170 -0
  231. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  232. package/skill/{Workflows → workflows}/Cron.md +1 -1
  233. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  234. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  235. package/skill/{Workflows → workflows}/Evals.md +67 -2
  236. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  237. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  238. package/skill/{Workflows → workflows}/Grade.md +1 -1
  239. package/skill/{Workflows → workflows}/Ingest.md +60 -2
  240. package/skill/{Workflows → workflows}/Initialize.md +16 -9
  241. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  242. package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
  243. package/skill/workflows/Registry.md +99 -0
  244. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  245. package/skill/workflows/SignalsDashboard.md +87 -0
  246. package/skill/{Workflows → workflows}/Sync.md +3 -1
  247. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  248. package/skill/{Workflows → workflows}/Watch.md +42 -2
  249. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  250. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  251. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  252. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  253. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  254. package/cli/selftune/utils/html.ts +0 -27
  255. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
  256. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  257. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  258. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  259. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  260. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  261. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  262. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  263. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  264. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  265. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  266. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  267. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  268. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  269. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  270. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -129,6 +129,22 @@ CREATE TABLE IF NOT EXISTS evolution_audit (
129
129
  validation_evidence_ref TEXT
130
130
  )`;
131
131
 
132
+ // -- Replay entry results (per-entry validation outcomes) ---------------------
133
+
134
+ export const CREATE_REPLAY_ENTRY_RESULTS = `
135
+ CREATE TABLE IF NOT EXISTS replay_entry_results (
136
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
137
+ proposal_id TEXT NOT NULL,
138
+ skill_name TEXT NOT NULL,
139
+ validation_mode TEXT NOT NULL,
140
+ phase TEXT NOT NULL,
141
+ query TEXT NOT NULL,
142
+ should_trigger INTEGER NOT NULL,
143
+ triggered INTEGER NOT NULL,
144
+ passed INTEGER NOT NULL,
145
+ evidence TEXT
146
+ )`;
147
+
132
148
  // -- Local telemetry tables (from JSONL logs) ---------------------------------
133
149
 
134
150
  export const CREATE_SESSION_TELEMETRY = `
@@ -215,6 +231,20 @@ CREATE TABLE IF NOT EXISTS grading_results (
215
231
  execution_metrics_json TEXT
216
232
  )`;
217
233
 
234
+ // -- Grading baselines table (pre/post deploy grade snapshots) ---------------
235
+
236
+ export const CREATE_GRADING_BASELINES = `
237
+ CREATE TABLE IF NOT EXISTS grading_baselines (
238
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
239
+ skill_name TEXT NOT NULL,
240
+ proposal_id TEXT,
241
+ measured_at TEXT NOT NULL,
242
+ pass_rate REAL NOT NULL,
243
+ mean_score REAL,
244
+ sample_size INTEGER NOT NULL,
245
+ grading_results_json TEXT
246
+ )`;
247
+
218
248
  // -- Improvement signal table (from signal_log.jsonl) ------------------------
219
249
 
220
250
  export const CREATE_IMPROVEMENT_SIGNALS = `
@@ -294,6 +324,20 @@ CREATE TABLE IF NOT EXISTS commit_tracking (
294
324
  created_at TEXT NOT NULL DEFAULT (datetime('now'))
295
325
  )`;
296
326
 
327
+ // -- Cron run audit log -------------------------------------------------------
328
+
329
+ export const CREATE_CRON_RUNS = `
330
+ CREATE TABLE IF NOT EXISTS cron_runs (
331
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
332
+ job_name TEXT NOT NULL,
333
+ started_at TEXT NOT NULL,
334
+ elapsed_ms INTEGER NOT NULL,
335
+ status TEXT NOT NULL,
336
+ metrics_json TEXT,
337
+ error TEXT,
338
+ UNIQUE(job_name, started_at)
339
+ )`;
340
+
297
341
  // -- Metadata table -----------------------------------------------------------
298
342
 
299
343
  export const CREATE_META = `
@@ -339,6 +383,11 @@ export const CREATE_INDEXES = [
339
383
  `CREATE INDEX IF NOT EXISTS idx_grading_skill ON grading_results(skill_name)`,
340
384
  `CREATE INDEX IF NOT EXISTS idx_grading_ts ON grading_results(graded_at)`,
341
385
  `CREATE UNIQUE INDEX IF NOT EXISTS idx_grading_dedup ON grading_results(session_id, skill_name, graded_at)`,
386
+ // -- Grading baseline indexes ------------------------------------------------
387
+ `CREATE INDEX IF NOT EXISTS idx_grading_bl_skill ON grading_baselines(skill_name)`,
388
+ `CREATE INDEX IF NOT EXISTS idx_grading_bl_proposal ON grading_baselines(proposal_id)`,
389
+ `CREATE INDEX IF NOT EXISTS idx_grading_bl_ts ON grading_baselines(measured_at)`,
390
+ `CREATE INDEX IF NOT EXISTS idx_grading_bl_skill_proposal ON grading_baselines(skill_name, proposal_id, measured_at)`,
342
391
  // -- Improvement signal indexes ---------------------------------------------
343
392
  `CREATE INDEX IF NOT EXISTS idx_signals_session ON improvement_signals(session_id)`,
344
393
  `CREATE INDEX IF NOT EXISTS idx_signals_consumed ON improvement_signals(consumed)`,
@@ -355,11 +404,18 @@ export const CREATE_INDEXES = [
355
404
  `CREATE INDEX IF NOT EXISTS idx_staging_kind ON canonical_upload_staging(record_kind)`,
356
405
  `CREATE INDEX IF NOT EXISTS idx_staging_session ON canonical_upload_staging(session_id)`,
357
406
  `CREATE UNIQUE INDEX IF NOT EXISTS idx_staging_dedup ON canonical_upload_staging(record_kind, record_id)`,
407
+ // -- Replay entry result indexes ---------------------------------------------
408
+ `CREATE INDEX IF NOT EXISTS idx_replay_entry_proposal ON replay_entry_results(proposal_id)`,
409
+ `CREATE INDEX IF NOT EXISTS idx_replay_entry_skill ON replay_entry_results(skill_name)`,
410
+ `CREATE INDEX IF NOT EXISTS idx_replay_entry_passed ON replay_entry_results(passed)`,
411
+ `CREATE INDEX IF NOT EXISTS idx_replay_entry_proposal_phase ON replay_entry_results(proposal_id, phase)`,
358
412
  // -- Commit tracking indexes ------------------------------------------------
359
413
  `CREATE INDEX IF NOT EXISTS idx_commit_sha ON commit_tracking(commit_sha)`,
360
414
  `CREATE INDEX IF NOT EXISTS idx_commit_session ON commit_tracking(session_id)`,
361
415
  `CREATE INDEX IF NOT EXISTS idx_commit_ts ON commit_tracking(timestamp)`,
362
416
  `CREATE UNIQUE INDEX IF NOT EXISTS idx_commit_dedup ON commit_tracking(session_id, commit_sha)`,
417
+ // -- Cron run indexes -------------------------------------------------------
418
+ `CREATE INDEX IF NOT EXISTS idx_cron_runs_job_ts ON cron_runs(job_name, started_at)`,
363
419
  ];
364
420
 
365
421
  /**
@@ -443,17 +499,20 @@ export const ALL_DDL = [
443
499
  CREATE_EXECUTION_FACTS,
444
500
  CREATE_EVOLUTION_EVIDENCE,
445
501
  CREATE_EVOLUTION_AUDIT,
502
+ CREATE_REPLAY_ENTRY_RESULTS,
446
503
  CREATE_SESSION_TELEMETRY,
447
504
  CREATE_SKILL_USAGE,
448
505
  CREATE_ORCHESTRATE_RUNS,
449
506
  CREATE_QUERIES,
450
507
  CREATE_GRADING_RESULTS,
508
+ CREATE_GRADING_BASELINES,
451
509
  CREATE_IMPROVEMENT_SIGNALS,
452
510
  CREATE_UPLOAD_QUEUE,
453
511
  CREATE_CREATOR_CONTRIBUTION_STAGING,
454
512
  CREATE_UPLOAD_WATERMARKS,
455
513
  CREATE_CANONICAL_UPLOAD_STAGING,
456
514
  CREATE_COMMIT_TRACKING,
515
+ CREATE_CRON_RUNS,
457
516
  CREATE_META,
458
517
  ...CREATE_INDEXES,
459
518
  ];
@@ -8,12 +8,15 @@
8
8
 
9
9
  import { parseArgs } from "node:util";
10
10
 
11
+ import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "../command-surface.js";
11
12
  import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
12
13
  import { classifyInvocation } from "../eval/hooks-to-evals.js";
13
14
  import { getLastDeployedProposal } from "../evolution/audit.js";
14
15
  import { getDb } from "../localdb/db.js";
15
16
  import {
17
+ queryGradingBaseline,
16
18
  queryQueryLog,
19
+ queryRecentGradingResults,
17
20
  querySessionTelemetry,
18
21
  querySkillUsageRecords,
19
22
  } from "../localdb/queries.js";
@@ -42,6 +45,10 @@ export interface WatchOptions {
42
45
  windowSessions: number;
43
46
  regressionThreshold: number;
44
47
  autoRollback: boolean;
48
+ /** Grade regression threshold (default 0.15). */
49
+ gradeRegressionThreshold?: number;
50
+ /** Enable grade-based regression watch (default true). */
51
+ enableGradeWatch?: boolean;
45
52
  /** Injected log paths for testing (override defaults). */
46
53
  _telemetryLogPath?: string;
47
54
  _skillLogPath?: string;
@@ -65,6 +72,8 @@ export interface WatchResult {
65
72
  rolledBack: boolean;
66
73
  recommendation: string;
67
74
  sync_result?: SyncResult;
75
+ gradeAlert?: string | null;
76
+ gradeRegression?: { before: number; after: number; delta: number } | null;
68
77
  }
69
78
 
70
79
  // ---------------------------------------------------------------------------
@@ -73,6 +82,7 @@ export interface WatchResult {
73
82
 
74
83
  const DEFAULT_BASELINE_PASS_RATE = 0.5;
75
84
  const DEFAULT_REGRESSION_THRESHOLD = 0.1;
85
+ const DEFAULT_GRADE_REGRESSION_THRESHOLD = 0.15;
76
86
  export const MIN_MONITORING_SKILL_CHECKS = 3;
77
87
 
78
88
  // ---------------------------------------------------------------------------
@@ -190,6 +200,8 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
190
200
  skillPath,
191
201
  windowSessions = 20,
192
202
  regressionThreshold = DEFAULT_REGRESSION_THRESHOLD,
203
+ gradeRegressionThreshold = DEFAULT_GRADE_REGRESSION_THRESHOLD,
204
+ enableGradeWatch = true,
193
205
  autoRollback = false,
194
206
  _telemetryLogPath = TELEMETRY_LOG,
195
207
  _skillLogPath = SKILL_LOG,
@@ -235,26 +247,71 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
235
247
  regressionThreshold,
236
248
  );
237
249
 
238
- // 4. Build alert and recommendation
239
- let alert: string | null = null;
250
+ // 4. Build trigger alert. Grade alerts are added below before rollback
251
+ // decisions so either signal can drive automated rollback.
252
+ let triggerAlert: string | null = null;
240
253
  let rolledBack = false;
241
- let recommendation: string;
242
254
 
243
255
  if (snapshot.regression_detected) {
244
- alert = `regression detected for "${skillName}": pass_rate=${snapshot.pass_rate.toFixed(2)} below baseline=${baselinePassRate.toFixed(2)} minus threshold=${regressionThreshold.toFixed(2)}`;
245
-
246
- // 5. Auto-rollback if enabled
247
- if (autoRollback) {
248
- const rollbackFn = _rollbackFn ?? (await loadRollbackFn());
249
- const proposalId = lastDeployed?.proposal_id;
250
- const rollbackResult = await rollbackFn({
251
- skillName,
252
- skillPath,
253
- proposalId,
254
- });
255
- rolledBack = rollbackResult.rolledBack;
256
+ triggerAlert = `regression detected for "${skillName}": pass_rate=${snapshot.pass_rate.toFixed(2)} below baseline=${baselinePassRate.toFixed(2)} minus threshold=${regressionThreshold.toFixed(2)}`;
257
+ }
258
+
259
+ // 5. Grade regression detection (fail-open)
260
+ let gradeAlert: string | null = null;
261
+ let gradeRegression: { before: number; after: number; delta: number } | null = null;
262
+
263
+ if (enableGradeWatch) {
264
+ try {
265
+ const baseline = queryGradingBaseline(db, skillName, lastDeployed?.proposal_id);
266
+ const recentResults = queryRecentGradingResults(db, skillName, 10);
267
+
268
+ if (baseline && recentResults.length > 0) {
269
+ // Compute the average pass rate from recent grading results
270
+ const validResults = recentResults.filter((r) => r.pass_rate != null);
271
+ if (validResults.length > 0) {
272
+ const recentAvgPassRate =
273
+ validResults.reduce((sum, r) => sum + (r.pass_rate ?? 0), 0) / validResults.length;
274
+ const baselinePassRateGrade = baseline.pass_rate;
275
+ const delta = baselinePassRateGrade - recentAvgPassRate;
276
+
277
+ if (delta > gradeRegressionThreshold) {
278
+ gradeAlert = `grade regression detected for "${skillName}": baseline_grade_pass_rate=${baselinePassRateGrade.toFixed(2)}, recent_avg=${recentAvgPassRate.toFixed(2)}, delta=${delta.toFixed(2)} exceeds threshold=${gradeRegressionThreshold.toFixed(2)}`;
279
+ gradeRegression = {
280
+ before: baselinePassRateGrade,
281
+ after: recentAvgPassRate,
282
+ delta,
283
+ };
284
+ }
285
+ }
286
+ }
287
+ } catch (err) {
288
+ // Fail-open: grade watch should never block trigger monitoring
289
+ console.error(
290
+ JSON.stringify({
291
+ level: "debug",
292
+ code: "grade_watch_failed",
293
+ message: `Grade watch failed for "${skillName}": ${err instanceof Error ? err.message : String(err)}`,
294
+ }),
295
+ );
256
296
  }
297
+ }
257
298
 
299
+ const alerts = [triggerAlert, gradeAlert].filter((value): value is string => Boolean(value));
300
+ const alert = alerts.length > 0 ? alerts.join("\n") : null;
301
+
302
+ if (alert && autoRollback) {
303
+ const rollbackFn = _rollbackFn ?? (await loadRollbackFn());
304
+ const proposalId = lastDeployed?.proposal_id;
305
+ const rollbackResult = await rollbackFn({
306
+ skillName,
307
+ skillPath,
308
+ proposalId,
309
+ });
310
+ rolledBack = rollbackResult.rolledBack;
311
+ }
312
+
313
+ let recommendation: string;
314
+ if (alert) {
258
315
  recommendation = rolledBack
259
316
  ? `Rolled back "${skillName}" to previous version. Monitor to confirm recovery.`
260
317
  : `Consider running: selftune rollback --skill "${skillName}" --skill-path "${skillPath}"`;
@@ -285,6 +342,8 @@ export async function watch(options: WatchOptions): Promise<WatchResult> {
285
342
  alert,
286
343
  rolledBack,
287
344
  recommendation,
345
+ gradeAlert,
346
+ gradeRegression,
288
347
  ...(syncResult ? { sync_result: syncResult } : {}),
289
348
  };
290
349
  }
@@ -329,6 +388,8 @@ export async function cliMain(): Promise<void> {
329
388
  window: { type: "string", default: "20" },
330
389
  threshold: { type: "string", default: "0.1" },
331
390
  "auto-rollback": { type: "boolean", default: false },
391
+ "grade-threshold": { type: "string", default: "0.15" },
392
+ "no-grade-watch": { type: "boolean", default: false },
332
393
  "sync-first": { type: "boolean", default: false },
333
394
  "sync-force": { type: "boolean", default: false },
334
395
  help: { type: "boolean", default: false },
@@ -337,20 +398,7 @@ export async function cliMain(): Promise<void> {
337
398
  });
338
399
 
339
400
  if (values.help) {
340
- console.log(`selftune watch — Monitor post-deploy skill health
341
-
342
- Usage:
343
- selftune watch --skill <name> --skill-path <path> [options]
344
-
345
- Options:
346
- --skill Skill name (required)
347
- --skill-path Path to SKILL.md (required)
348
- --window Number of recent sessions to consider (default: 20)
349
- --threshold Regression threshold below baseline (default: 0.1)
350
- --auto-rollback Automatically rollback on regression detection
351
- --sync-first Refresh source-truth telemetry before reading watch inputs
352
- --sync-force Force a full rescan during --sync-first
353
- --help Show this help message`);
401
+ console.log(renderCommandHelp(PUBLIC_COMMAND_SURFACES.watch));
354
402
  process.exit(0);
355
403
  }
356
404
 
@@ -403,11 +451,30 @@ Options:
403
451
  );
404
452
  }
405
453
 
454
+ const rawGradeThreshold = values["grade-threshold"] ?? "0.15";
455
+ if (!/^\d+(\.\d+)?$/.test(rawGradeThreshold)) {
456
+ throw new CLIError(
457
+ "--grade-threshold must be a finite number between 0 and 1.",
458
+ "INVALID_FLAG",
459
+ "selftune watch --grade-threshold 0.15",
460
+ );
461
+ }
462
+ const gradeRegressionThreshold = Number.parseFloat(rawGradeThreshold);
463
+ if (gradeRegressionThreshold < 0 || gradeRegressionThreshold > 1) {
464
+ throw new CLIError(
465
+ "--grade-threshold must be a finite number between 0 and 1.",
466
+ "INVALID_FLAG",
467
+ "selftune watch --grade-threshold 0.15",
468
+ );
469
+ }
470
+
406
471
  const result = await watch({
407
472
  skillName: values.skill,
408
473
  skillPath: values["skill-path"],
409
474
  windowSessions,
410
475
  regressionThreshold,
476
+ gradeRegressionThreshold,
477
+ enableGradeWatch: !(values["no-grade-watch"] ?? false),
411
478
  autoRollback: values["auto-rollback"] ?? false,
412
479
  syncFirst: values["sync-first"] ?? false,
413
480
  syncForce: values["sync-force"] ?? false,
@@ -694,6 +694,7 @@ export function buildCanonicalSkillInvocation(
694
694
  }
695
695
 
696
696
  export interface BuildExecutionFactInput extends CanonicalBaseInput {
697
+ execution_fact_id?: string;
697
698
  occurred_at: string;
698
699
  prompt_id?: string;
699
700
  tool_calls_json: Record<string, number>;
@@ -716,6 +717,8 @@ export function buildCanonicalExecutionFact(
716
717
  const record: CanonicalExecutionFactRecord = {
717
718
  ...base,
718
719
  record_kind: "execution_fact",
720
+ execution_fact_id:
721
+ input.execution_fact_id ?? `${input.session_id}:${input.occurred_at}:execution_fact`,
719
722
  occurred_at: input.occurred_at,
720
723
  tool_calls_json: input.tool_calls_json,
721
724
  total_tool_calls: input.total_tool_calls,
@@ -14,6 +14,7 @@ import { join } from "node:path";
14
14
 
15
15
  import { getAlphaGuidance } from "./agent-guidance.js";
16
16
  import { getAlphaLinkState, readAlphaIdentity } from "./alpha-identity.js";
17
+ import { getSelftuneUpdateHint } from "./auto-update.js";
17
18
  import { LOG_DIR, REQUIRED_FIELDS, SELFTUNE_CONFIG_PATH } from "./constants.js";
18
19
  import { DB_PATH, getDb } from "./localdb/db.js";
19
20
  import type {
@@ -26,7 +27,14 @@ import type {
26
27
  } from "./types.js";
27
28
  import { missingClaudeCodeHookKeys } from "./utils/hooks.js";
28
29
 
29
- const VALID_AGENT_TYPES = new Set(["claude_code", "codex", "opencode", "openclaw", "unknown"]);
30
+ const VALID_AGENT_TYPES = new Set([
31
+ "claude_code",
32
+ "codex",
33
+ "opencode",
34
+ "openclaw",
35
+ "pi",
36
+ "unknown",
37
+ ]);
30
38
  const VALID_LLM_MODES = new Set(["agent"]);
31
39
 
32
40
  const LOG_FILES: Record<string, string> = {
@@ -311,12 +319,13 @@ export async function checkVersionHealth(): Promise<HealthCheck[]> {
311
319
  if (cmp >= 0) {
312
320
  check.message = `v${currentVersion} (latest)`;
313
321
  } else {
322
+ const updateCommand = getSelftuneUpdateHint("latest");
314
323
  check.status = "warn";
315
- check.message = `v${currentVersion} installed, v${latestVersion} available. Run: npx skills add selftune-dev/selftune`;
324
+ check.message = `v${currentVersion} installed, v${latestVersion} available. Run: ${updateCommand}`;
316
325
  check.guidance = {
317
326
  code: "version_update_available",
318
327
  message: "A newer selftune release is available.",
319
- next_command: "npx skills add selftune-dev/selftune",
328
+ next_command: updateCommand,
320
329
  suggested_commands: ["selftune doctor"],
321
330
  blocking: false,
322
331
  };
@@ -0,0 +1,161 @@
1
+ import { parseArgs } from "node:util";
2
+
3
+ import { PUBLIC_COMMAND_SURFACES, renderCommandHelp } from "../command-surface.js";
4
+ import type { OrchestrateOptions, OrchestrateResult } from "../orchestrate.js";
5
+ import { CLIError } from "../utils/cli-error.js";
6
+
7
+ export interface ParsedOrchestrateCliArgs {
8
+ showHelp: boolean;
9
+ warnings: string[];
10
+ loop: boolean;
11
+ loopIntervalSeconds: number;
12
+ runOptions: OrchestrateOptions;
13
+ }
14
+
15
+ function parsePositiveIntegerFlag(value: string, message: string, command: string): number {
16
+ if (!/^\d+$/.test(value) || Number(value) < 1) {
17
+ throw new CLIError(message, "INVALID_FLAG", command);
18
+ }
19
+ return Number(value);
20
+ }
21
+
22
+ function parseNonNegativeIntegerFlag(value: string, message: string, command: string): number {
23
+ if (!/^\d+$/.test(value)) {
24
+ throw new CLIError(message, "INVALID_FLAG", command);
25
+ }
26
+ return Number(value);
27
+ }
28
+
29
+ export function renderOrchestrateHelp(): string {
30
+ return renderCommandHelp(PUBLIC_COMMAND_SURFACES.orchestrate);
31
+ }
32
+
33
+ export function parseOrchestrateCliArgs(
34
+ argv: string[] = process.argv.slice(2),
35
+ ): ParsedOrchestrateCliArgs {
36
+ const { values } = parseArgs({
37
+ args: argv,
38
+ options: {
39
+ "dry-run": { type: "boolean", default: false },
40
+ "review-required": { type: "boolean", default: false },
41
+ "auto-approve": { type: "boolean", default: false },
42
+ skill: { type: "string" },
43
+ "max-skills": { type: "string", default: "5" },
44
+ "recent-window": { type: "string", default: "48" },
45
+ "sync-force": { type: "boolean", default: false },
46
+ "max-auto-grade": { type: "string", default: "5" },
47
+ loop: { type: "boolean", default: false },
48
+ "loop-interval": { type: "string", default: "3600" },
49
+ help: { type: "boolean", short: "h", default: false },
50
+ },
51
+ strict: true,
52
+ });
53
+
54
+ if (values.help) {
55
+ return {
56
+ showHelp: true,
57
+ warnings: [],
58
+ loop: false,
59
+ loopIntervalSeconds: 3600,
60
+ runOptions: {
61
+ dryRun: false,
62
+ approvalMode: "auto",
63
+ maxSkills: 5,
64
+ recentWindowHours: 48,
65
+ syncForce: false,
66
+ maxAutoGrade: 5,
67
+ },
68
+ };
69
+ }
70
+
71
+ const loop = values.loop ?? false;
72
+ const maxSkills = parsePositiveIntegerFlag(
73
+ values["max-skills"] ?? "5",
74
+ "--max-skills must be a positive integer",
75
+ "selftune orchestrate --max-skills 5",
76
+ );
77
+ const recentWindowHours = parsePositiveIntegerFlag(
78
+ values["recent-window"] ?? "48",
79
+ "--recent-window must be a positive integer",
80
+ "selftune orchestrate --recent-window 48",
81
+ );
82
+ const maxAutoGrade = parseNonNegativeIntegerFlag(
83
+ values["max-auto-grade"] ?? "5",
84
+ "--max-auto-grade must be a non-negative integer",
85
+ "selftune orchestrate --max-auto-grade 5",
86
+ );
87
+
88
+ const loopIntervalRaw = values["loop-interval"] ?? "3600";
89
+ if (!/^\d+$/.test(loopIntervalRaw) || (loop && Number(loopIntervalRaw) < 60)) {
90
+ throw new CLIError(
91
+ "--loop-interval must be an integer >= 60 (seconds)",
92
+ "INVALID_FLAG",
93
+ "selftune orchestrate --loop --loop-interval 3600",
94
+ );
95
+ }
96
+
97
+ const warnings: string[] = [];
98
+ if (values["auto-approve"]) {
99
+ warnings.push(
100
+ "[orchestrate] --auto-approve is deprecated; autonomous mode is now the default.",
101
+ );
102
+ }
103
+
104
+ return {
105
+ showHelp: false,
106
+ warnings,
107
+ loop,
108
+ loopIntervalSeconds: Number(loopIntervalRaw),
109
+ runOptions: {
110
+ dryRun: values["dry-run"] ?? false,
111
+ approvalMode: values["review-required"] ? "review" : "auto",
112
+ skillFilter: values.skill,
113
+ maxSkills,
114
+ recentWindowHours,
115
+ syncForce: values["sync-force"] ?? false,
116
+ maxAutoGrade,
117
+ },
118
+ };
119
+ }
120
+
121
+ export function buildOrchestrateJsonOutput(result: OrchestrateResult) {
122
+ return {
123
+ ...result.summary,
124
+ ...(result.uploadSummary ? { upload: result.uploadSummary } : {}),
125
+ workflow_proposals: result.workflowProposals.map((proposal) => ({
126
+ proposal_id: proposal.proposal_id,
127
+ source_skill_name: proposal.source_skill_name,
128
+ workflow_id: proposal.workflow.workflow_id,
129
+ generated_skill_name: proposal.draft.skill_name,
130
+ output_path: proposal.draft.skill_path,
131
+ confidence: proposal.confidence,
132
+ reason: proposal.rationale,
133
+ })),
134
+ decisions: result.candidates.map((candidate) => ({
135
+ skill: candidate.skill,
136
+ action: candidate.action,
137
+ reason: candidate.reason,
138
+ ...(candidate.evolveResult
139
+ ? {
140
+ deployed: candidate.evolveResult.deployed,
141
+ evolveReason: candidate.evolveResult.reason,
142
+ validation: candidate.evolveResult.validation
143
+ ? {
144
+ before: candidate.evolveResult.validation.before_pass_rate,
145
+ after: candidate.evolveResult.validation.after_pass_rate,
146
+ improved: candidate.evolveResult.validation.improved,
147
+ }
148
+ : null,
149
+ }
150
+ : {}),
151
+ ...(candidate.watchResult
152
+ ? {
153
+ alert: candidate.watchResult.alert,
154
+ rolledBack: candidate.watchResult.rolledBack,
155
+ passRate: candidate.watchResult.snapshot?.pass_rate ?? null,
156
+ recommendation: candidate.watchResult.recommendation,
157
+ }
158
+ : {}),
159
+ })),
160
+ };
161
+ }