selftune 0.2.22 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +95 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/adapters/pi/hook.ts +273 -0
  12. package/cli/selftune/adapters/pi/install.ts +207 -0
  13. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  15. package/cli/selftune/auto-update.ts +200 -8
  16. package/cli/selftune/canonical-export.ts +55 -25
  17. package/cli/selftune/command-surface.ts +397 -0
  18. package/cli/selftune/constants.ts +10 -1
  19. package/cli/selftune/contribute/contribute.ts +64 -13
  20. package/cli/selftune/contribution-config.ts +57 -3
  21. package/cli/selftune/contribution-preferences.ts +117 -0
  22. package/cli/selftune/contribution-signals.ts +8 -4
  23. package/cli/selftune/contribution-staging.ts +13 -2
  24. package/cli/selftune/contributions.ts +55 -121
  25. package/cli/selftune/creator-contributions.ts +29 -10
  26. package/cli/selftune/cron/setup.ts +7 -3
  27. package/cli/selftune/dashboard-contract.ts +87 -0
  28. package/cli/selftune/dashboard-server.ts +168 -17
  29. package/cli/selftune/dashboard.ts +350 -17
  30. package/cli/selftune/eval/baseline.ts +21 -5
  31. package/cli/selftune/eval/execution-eval.ts +170 -0
  32. package/cli/selftune/eval/family-overlap.ts +2 -2
  33. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  34. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  35. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  36. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  37. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  38. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  39. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  40. package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
  41. package/cli/selftune/evolution/evidence.ts +2 -6
  42. package/cli/selftune/evolution/evolve-body.ts +152 -38
  43. package/cli/selftune/evolution/evolve.ts +244 -52
  44. package/cli/selftune/evolution/rollback.ts +0 -1
  45. package/cli/selftune/evolution/validate-body.ts +111 -49
  46. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  47. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  48. package/cli/selftune/evolution/validate-routing.ts +51 -108
  49. package/cli/selftune/evolution/validation-contract.ts +91 -0
  50. package/cli/selftune/grading/auto-grade.ts +11 -7
  51. package/cli/selftune/grading/grade-session.ts +10 -16
  52. package/cli/selftune/hooks/skill-eval.ts +2 -1
  53. package/cli/selftune/hooks-shared/types.ts +1 -0
  54. package/cli/selftune/index.ts +58 -15
  55. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  56. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  57. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  58. package/cli/selftune/ingestors/pi-ingest.ts +727 -0
  59. package/cli/selftune/init.ts +38 -4
  60. package/cli/selftune/localdb/direct-write.ts +120 -1
  61. package/cli/selftune/localdb/materialize.ts +6 -7
  62. package/cli/selftune/localdb/queries/cron.ts +34 -0
  63. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  64. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  65. package/cli/selftune/localdb/queries/execution.ts +133 -0
  66. package/cli/selftune/localdb/queries/json.ts +18 -0
  67. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  68. package/cli/selftune/localdb/queries/raw.ts +95 -0
  69. package/cli/selftune/localdb/queries/staging.ts +270 -0
  70. package/cli/selftune/localdb/queries/trust.ts +392 -0
  71. package/cli/selftune/localdb/queries.ts +60 -2162
  72. package/cli/selftune/localdb/schema.ts +59 -0
  73. package/cli/selftune/monitoring/watch.ts +96 -29
  74. package/cli/selftune/normalization.ts +3 -0
  75. package/cli/selftune/observability.ts +12 -3
  76. package/cli/selftune/orchestrate/cli.ts +161 -0
  77. package/cli/selftune/orchestrate/execute.ts +295 -0
  78. package/cli/selftune/orchestrate/finalize.ts +157 -0
  79. package/cli/selftune/orchestrate/locks.ts +40 -0
  80. package/cli/selftune/orchestrate/plan.ts +131 -0
  81. package/cli/selftune/orchestrate/post-run.ts +59 -0
  82. package/cli/selftune/orchestrate/prepare.ts +334 -0
  83. package/cli/selftune/orchestrate/report.ts +182 -0
  84. package/cli/selftune/orchestrate/runtime.ts +120 -0
  85. package/cli/selftune/orchestrate/signals.ts +48 -0
  86. package/cli/selftune/orchestrate.ts +162 -1142
  87. package/cli/selftune/registry/client.ts +74 -0
  88. package/cli/selftune/registry/history.ts +54 -0
  89. package/cli/selftune/registry/index.ts +90 -0
  90. package/cli/selftune/registry/install.ts +141 -0
  91. package/cli/selftune/registry/list.ts +44 -0
  92. package/cli/selftune/registry/push.ts +171 -0
  93. package/cli/selftune/registry/rollback.ts +49 -0
  94. package/cli/selftune/registry/status.ts +62 -0
  95. package/cli/selftune/registry/sync.ts +125 -0
  96. package/cli/selftune/repair/skill-usage.ts +9 -3
  97. package/cli/selftune/routes/overview.ts +5 -2
  98. package/cli/selftune/routes/skill-report.ts +15 -2
  99. package/cli/selftune/schedule.ts +5 -5
  100. package/cli/selftune/status.ts +70 -2
  101. package/cli/selftune/sync.ts +127 -23
  102. package/cli/selftune/testing-readiness.ts +597 -0
  103. package/cli/selftune/types.ts +46 -5
  104. package/cli/selftune/uninstall.ts +2 -1
  105. package/cli/selftune/utils/canonical-log.ts +1 -9
  106. package/cli/selftune/utils/cli-error.ts +9 -0
  107. package/cli/selftune/utils/jsonl.ts +1 -30
  108. package/cli/selftune/utils/llm-call.ts +126 -6
  109. package/cli/selftune/utils/skill-discovery.ts +24 -0
  110. package/cli/selftune/workflows/proposals.ts +184 -0
  111. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  112. package/cli/selftune/workflows/workflows.ts +100 -26
  113. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  114. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  115. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  116. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  117. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  118. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  119. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  120. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
  121. package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
  122. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  123. package/package.json +25 -9
  124. package/packages/dashboard-core/AGENTS.md +18 -0
  125. package/packages/dashboard-core/README.md +30 -0
  126. package/packages/dashboard-core/index.ts +3 -0
  127. package/packages/dashboard-core/package.json +39 -0
  128. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  129. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  130. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  131. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  132. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  133. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  134. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  135. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  136. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  137. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  138. package/packages/dashboard-core/src/gates/index.ts +3 -0
  139. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  140. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  141. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  142. package/packages/dashboard-core/src/host/index.ts +3 -0
  143. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  144. package/packages/dashboard-core/src/models/index.ts +4 -0
  145. package/packages/dashboard-core/src/models/overview.ts +98 -0
  146. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  147. package/packages/dashboard-core/src/models/skills.ts +34 -0
  148. package/packages/dashboard-core/src/routes/index.ts +2 -0
  149. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  150. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  151. package/packages/dashboard-core/src/routes/types.ts +39 -0
  152. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  153. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  154. package/packages/dashboard-core/src/screens/index.ts +37 -0
  155. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  156. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  157. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  158. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  159. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  160. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  161. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  162. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  163. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  164. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  165. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  166. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  167. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  168. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  169. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  170. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  171. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  172. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  173. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  174. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  175. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  176. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  177. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  178. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  179. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  180. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  181. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
  182. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  183. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  184. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
  185. package/packages/telemetry-contract/package.json +1 -1
  186. package/packages/telemetry-contract/src/index.ts +1 -0
  187. package/packages/telemetry-contract/src/schemas.ts +63 -5
  188. package/packages/telemetry-contract/src/types.ts +97 -7
  189. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  190. package/packages/ui/AGENTS.md +16 -0
  191. package/packages/ui/README.md +1 -1
  192. package/packages/ui/package.json +1 -1
  193. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  194. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  195. package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
  196. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  197. package/packages/ui/src/components/InfoTip.tsx +1 -2
  198. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  199. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  200. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  201. package/packages/ui/src/components/OverviewPanels.tsx +693 -0
  202. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  203. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  204. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  205. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  206. package/packages/ui/src/components/index.ts +56 -1
  207. package/packages/ui/src/components/section-cards.tsx +18 -35
  208. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  209. package/packages/ui/src/lib/constants.tsx +0 -1
  210. package/packages/ui/src/primitives/card.tsx +1 -1
  211. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  212. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  213. package/packages/ui/src/primitives/select.tsx +2 -2
  214. package/packages/ui/src/primitives/tabs.tsx +7 -6
  215. package/packages/ui/src/types.ts +182 -4
  216. package/skill/SKILL.md +130 -318
  217. package/skill/agents/diagnosis-analyst.md +3 -3
  218. package/skill/agents/evolution-reviewer.md +3 -3
  219. package/skill/agents/integration-guide.md +3 -3
  220. package/skill/agents/pattern-analyst.md +2 -2
  221. package/skill/references/cli-quick-reference.md +89 -0
  222. package/skill/references/creator-playbook.md +131 -0
  223. package/skill/references/examples.md +48 -0
  224. package/skill/references/troubleshooting.md +47 -0
  225. package/skill/references/version-history.md +1 -1
  226. package/skill/selftune.contribute.json +11 -0
  227. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  228. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  229. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  230. package/skill/workflows/CreateTestDeploy.md +170 -0
  231. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  232. package/skill/{Workflows → workflows}/Cron.md +1 -1
  233. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  234. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  235. package/skill/{Workflows → workflows}/Evals.md +67 -2
  236. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  237. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  238. package/skill/{Workflows → workflows}/Grade.md +1 -1
  239. package/skill/{Workflows → workflows}/Ingest.md +60 -2
  240. package/skill/{Workflows → workflows}/Initialize.md +16 -9
  241. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  242. package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
  243. package/skill/workflows/Registry.md +99 -0
  244. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  245. package/skill/workflows/SignalsDashboard.md +87 -0
  246. package/skill/{Workflows → workflows}/Sync.md +3 -1
  247. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  248. package/skill/{Workflows → workflows}/Watch.md +42 -2
  249. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  250. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  251. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  252. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  253. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  254. package/cli/selftune/utils/html.ts +0 -27
  255. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
  256. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  257. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  258. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  259. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  260. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  261. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  262. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  263. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  264. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  265. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  266. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  267. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  268. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  269. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  270. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -6,7 +6,7 @@ Bootstrap selftune for first-time use or after changing environments.
6
6
 
7
7
  - The user asks to set up selftune, configure selftune, or initialize selftune
8
8
  - The agent detects `~/.selftune/config.json` does not exist
9
- - The user has switched agent platforms (Claude Code, Codex, OpenCode)
9
+ - The user has switched agent platforms (Claude Code, Codex, OpenCode, Pi)
10
10
  - The user wants to add hooks for additional platforms (multi-agent setup)
11
11
 
12
12
  ## Default Command
@@ -21,7 +21,7 @@ selftune init --no-alpha [--force]
21
21
 
22
22
  | Flag | Description | Default |
23
23
  | ------------------------- | ------------------------------------------------------------------------- | ------------- |
24
- | `--agent <type>` | Agent platform: `claude_code`, `codex`, `opencode`, `openclaw` | Auto-detected |
24
+ | `--agent <type>` | Agent platform: `claude_code`, `codex`, `opencode`, `openclaw`, `pi` | Auto-detected |
25
25
  | `--cli-path <path>` | Override auto-detected CLI entry-point path | Auto-detected |
26
26
  | `--force` | Reinitialize even if config already exists | Off |
27
27
  | `--no-sync` | Skip historical transcript backfill during init | Sync on |
@@ -89,9 +89,12 @@ which selftune
89
89
  If `selftune` is not on PATH, install it:
90
90
 
91
91
  ```bash
92
- npm install -g selftune
92
+ npx skills add selftune-dev/selftune
93
93
  ```
94
94
 
95
+ If you manage the CLI directly instead of using the skill installer, use
96
+ `npm install -g selftune` or `bun add -g selftune`.
97
+
95
98
  ### 2. Check Existing Config
96
99
 
97
100
  ```bash
@@ -146,6 +149,7 @@ CLIs available. Run these checks:
146
149
  which codex 2>/dev/null && echo "codex available"
147
150
  which opencode 2>/dev/null && echo "opencode available"
148
151
  ls ~/Documents/Cline/Hooks/ 2>/dev/null && echo "cline available"
152
+ ls ~/.pi/agent/ 2>/dev/null && echo "pi available"
149
153
  ```
150
154
 
151
155
  If **any** additional platforms are detected, use `AskUserQuestion` listing only
@@ -168,9 +172,10 @@ For each platform the user selects, run the install command:
168
172
  selftune codex install # writes hooks.json entries
169
173
  selftune opencode install # writes shell shim + config entries
170
174
  selftune cline install # creates hook scripts
175
+ selftune pi install # creates extension hook scripts
171
176
  ```
172
177
 
173
- Use `--dry-run` first if the user wants to preview. See `Workflows/PlatformHooks.md`
178
+ Use `--dry-run` first if the user wants to preview. See `workflows/PlatformHooks.md`
174
179
  for platform-specific details.
175
180
 
176
181
  **Batch ingest** fallback for platforms without real-time hooks or to backfill history:
@@ -179,6 +184,7 @@ for platform-specific details.
179
184
  selftune ingest codex # import Codex rollout sessions
180
185
  selftune ingest opencode # import OpenCode sessions from SQLite
181
186
  selftune ingest openclaw # import OpenClaw sessions
187
+ selftune ingest pi # import Pi sessions
182
188
  ```
183
189
 
184
190
  ### 5. Initialize Memory Directory
@@ -221,7 +227,7 @@ reported issues before proceeding.
221
227
 
222
228
  Init automatically runs `selftune sync` to backfill existing session
223
229
  transcripts into the SQLite database. This replays Claude Code transcripts,
224
- Codex rollouts, OpenCode sessions, and OpenClaw sessions so the eval set
230
+ Codex rollouts, OpenCode sessions, OpenClaw sessions, and Pi sessions so the eval set
225
231
  and evolution pipeline have data to work with immediately.
226
232
 
227
233
  The sync step is fail-open — if it encounters errors, init continues.
@@ -412,8 +418,9 @@ retrying with `selftune init --alpha --alpha-email <email> --force`.
412
418
 
413
419
  **User asks to set up or initialize selftune**
414
420
 
415
- > Run `which selftune` to check installation. If missing, install with
416
- > `npm install -g selftune`. Run `selftune init`, then verify with
421
+ > Run `which selftune` to check installation. If missing, install or refresh with
422
+ > `npx skills add selftune-dev/selftune`. If the user manages the CLI directly,
423
+ > use `npm install -g selftune` or `bun add -g selftune`. Run `selftune init`, then verify with
417
424
  > `selftune doctor`. Report results to the user.
418
425
 
419
426
  **User wants alpha enrollment**
@@ -426,8 +433,8 @@ retrying with `selftune init --alpha --alpha-email <email> --force`.
426
433
 
427
434
  > Run `selftune init` for the primary agent, then offer to install hooks for
428
435
  > additional detected platforms. Run `selftune codex install`, `selftune opencode install`,
429
- > or `selftune cline install` as needed. All platforms write to the same shared
430
- > log schema — no extra config required.
436
+ > `selftune cline install`, or `selftune pi install` as needed. All platforms
437
+ > write to the same shared log schema — no extra config required.
431
438
 
432
439
  **Hooks not capturing data**
433
440
 
@@ -50,6 +50,7 @@ proposalModel = haiku
50
50
  | `--max-auto-grade <n>` | Max ungraded skills to auto-grade per run (0 to disable) | `5` |
51
51
  | `--loop` | Run as a long-lived process that cycles continuously | Off |
52
52
  | `--loop-interval <seconds>` | Pause between cycles (minimum 60) | `3600` |
53
+ | `--help` | Show command help | Off |
53
54
 
54
55
  ## Default Behavior
55
56
 
@@ -57,7 +58,12 @@ proposalModel = haiku
57
58
  - Auto-grade up to 5 ungraded skills that have session data (enables evolution on first run after ingest)
58
59
  - Prioritize critical/warning/ungraded skills with real missed-query signal
59
60
  - Deploy validated low-risk description changes automatically
60
- - Watch recent deployments and roll back regressions automatically
61
+ - Auto-grade and write grading baselines for freshly deployed skills
62
+ - Generate review-first new skill proposals from strong workflow patterns
63
+ - Watch recent deployments (including freshly deployed skills in same run) and roll back regressions automatically
64
+ - Monitor grade regression alongside trigger regression during watch
65
+ - Upload personal telemetry to cloud (alpha users)
66
+ - Flush staged creator-directed contribution signals for opted-in skills
61
67
 
62
68
  Use `--review-required` only when you want a stricter policy for a specific run.
63
69
 
@@ -111,6 +117,7 @@ Machine-readable JSON with the summary fields plus a `decisions` array containin
111
117
  - `skill`, `action`, `reason`
112
118
  - `deployed`, `evolveReason`, `validation` (before/after pass rates, improved flag) — when evolved
113
119
  - `alert`, `rolledBack`, `passRate`, `recommendation` — when watched
120
+ - `freshlyWatchedSkills` — array of skill names that were deployed and watched in the same run
114
121
 
115
122
  This is the recommended runtime for recurring autonomous scheduling.
116
123
 
@@ -162,8 +169,11 @@ In autonomous mode, orchestrate calls sub-workflows in this fixed order:
162
169
  2. **Status** — compute skill health using existing grade results (reads `grading.json` outputs from previous sessions)
163
170
  3. **Auto-grade** — grade up to `--max-auto-grade` (default 5) ungraded skills that have session data but no grades yet. Skipped during `--dry-run` (grading makes LLM calls). After grading, status is recomputed so candidate selection sees updated grades. Fail-open: individual grading errors are logged but never block the loop.
164
171
  4. **Evolve** — run evolution on selected candidates (pre-flight is skipped; Pareto mode uses 3 candidates; cheap-loop uses `haiku` for proposal + validation and `sonnet` for the final gate; adaptive gate escalation promotes risky proposals to `opus` + `high` effort; baseline and token-efficiency stay off)
165
- 5. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
166
- 6. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
172
+ 5. **Post-deploy grade + baseline** — for each freshly deployed skill, grade the most recent session and write a grading baseline to SQLite (`grading_baselines` table). The baseline records the measured pass rate and sample size, anchoring future grade regression detection. Fail-open: individual grading errors are logged but never block the loop.
173
+ 6. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback). Skills freshly deployed in this run are included in the watch set immediately, so they are monitored in the same orchestrate cycle rather than waiting for the next run. These appear in `freshlyWatchedSkills` in the output. Grade watch (`enableGradeWatch: true`) runs alongside trigger regression for all watched skills.
174
+ 7. **Workflow proposals** — discover repeated multi-skill patterns and create review-first `new_skill` proposals when a workflow is strong enough to merit codification. These are never auto-deployed; they are surfaced as proposals for review.
175
+ 8. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
176
+ 9. **Contribution relay flush** — if an API key is configured, flush any staged creator-directed contribution signals for opted-in skills. Fail-open: relay errors never block the orchestrate loop. Respects `--dry-run`.
167
177
 
168
178
  When orchestrate invokes evolve for a selected candidate, it always passes
169
179
  `confidenceThreshold: 0.6` and `maxIterations: 3`, plus the autonomous evolve
@@ -2,11 +2,11 @@
2
2
 
3
3
  ## Purpose
4
4
 
5
- Install and configure selftune hooks for non-Claude-Code platforms (Codex, OpenCode, Cline).
5
+ Install and configure selftune hooks for non-Claude-Code platforms (Codex, OpenCode, Cline, Pi).
6
6
 
7
7
  ## When to Use
8
8
 
9
- - User wants selftune on Codex, OpenCode, or Cline
9
+ - User wants selftune on Codex, OpenCode, Cline, or Pi
10
10
  - User asks about multi-platform support
11
11
  - User wants real-time skill tracking on a non-Claude-Code agent
12
12
 
@@ -18,7 +18,7 @@ Install and configure selftune hooks for non-Claude-Code platforms (Codex, OpenC
18
18
  selftune <platform> install [--dry-run] [--uninstall]
19
19
  ```
20
20
 
21
- Supported platforms: `codex`, `opencode`, `cline`
21
+ Supported platforms: `codex`, `opencode`, `cline`, `pi`
22
22
 
23
23
  | Flag | Description |
24
24
  | ------------- | ---------------------------------------------- |
@@ -56,6 +56,13 @@ This is called automatically by the agent's hook system. Users don't run this di
56
56
  - Events: PostToolUse, TaskComplete, TaskCancel
57
57
  - Install creates executable shell scripts in the hooks directory
58
58
 
59
+ ### Pi
60
+
61
+ - Config: `~/.pi/extensions/selftune/`
62
+ - Sessions: `~/.pi/agent/sessions/`
63
+ - Events: tool_call, tool_result, message, session_shutdown
64
+ - Install creates executable hook scripts in the extensions directory
65
+
59
66
  ## Examples
60
67
 
61
68
  ### Codex
@@ -82,6 +89,14 @@ selftune cline install --dry-run # Preview what would be created
82
89
  selftune cline install --uninstall # Remove selftune hook scripts
83
90
  ```
84
91
 
92
+ ### Pi
93
+
94
+ ```bash
95
+ selftune pi install # Install hooks into ~/.pi/extensions/selftune/
96
+ selftune pi install --dry-run # Preview changes without writing
97
+ selftune pi install --uninstall # Remove selftune hooks
98
+ ```
99
+
85
100
  ### Hook handler (agent-only, not user-facing)
86
101
 
87
102
  The hook subcommand is called automatically by the agent. Users do not run it directly:
@@ -90,4 +105,5 @@ The hook subcommand is called automatically by the agent. Users do not run it di
90
105
  printf '%s\n' "$PAYLOAD" | selftune codex hook
91
106
  printf '%s\n' "$PAYLOAD" | selftune opencode hook
92
107
  printf '%s\n' "$PAYLOAD" | selftune cline hook
108
+ printf '%s\n' "$PAYLOAD" | selftune pi hook
93
109
  ```
@@ -0,0 +1,99 @@
1
+ # Registry — Team Skill Distribution
2
+
3
+ Manage versioned skill distribution across your team. Push skill folders to the cloud, install from the registry, sync to latest versions, and rollback when needed.
4
+
5
+ ## Commands
6
+
7
+ | Command | Flags | What It Does |
8
+ |---------|-------|-------------|
9
+ | `selftune registry push [name]` | `--version=<semver>` `--summary=<text>` | Archive current skill folder and push as a new version |
10
+ | `selftune registry install <name>` | `--global` | Download and extract a skill from the registry |
11
+ | `selftune registry sync` | | Check all installed entries for updates, pull latest |
12
+ | `selftune registry status` | | Show installed entries with version drift |
13
+ | `selftune registry rollback <name>` | `--to=<version>` `--reason=<text>` | Rollback a skill to a previous version |
14
+ | `selftune registry history <name>` | | Show version timeline with quality data |
15
+ | `selftune registry list` | | Show all published entries in the org |
16
+
17
+ ## When to Use
18
+
19
+ - User says "push this skill to the team" → `selftune registry push`
20
+ - User says "install the deploy skill" → `selftune registry install deploy`
21
+ - User says "update my skills" or "sync registry" → `selftune registry sync`
22
+ - User says "check for updates" → `selftune registry status`
23
+ - User says "rollback the deploy skill" → `selftune registry rollback deploy`
24
+ - User says "show version history" → `selftune registry history <name>`
25
+ - User says "what's in the registry" → `selftune registry list`
26
+
27
+ ## Push Workflow
28
+
29
+ 1. Navigate to the skill directory (must contain `SKILL.md`)
30
+ 2. Run `selftune registry push` — archives the entire folder (SKILL.md + scripts/ + assets/)
31
+ 3. The skill name and description are extracted from SKILL.md frontmatter
32
+ 4. Use `--version=1.0.0` for explicit semver, otherwise auto-generated
33
+ 5. Use `--summary="Added new trigger keywords"` for change notes
34
+
35
+ ## Install Workflow
36
+
37
+ 1. Run `selftune registry install <name>` to pull from the registry
38
+ 2. By default, installs to `.claude/skills/<name>/` in the current project
39
+ 3. Use `--global` to install to `~/.claude/skills/<name>/` (available everywhere)
40
+ 4. Installation is tracked — `selftune registry status` shows what's installed
41
+
42
+ ## Sync Workflow
43
+
44
+ 1. Run `selftune registry sync` to check all installations for updates
45
+ 2. Only downloads archives when the version hash differs (lightweight check)
46
+ 3. Local state is stored at `~/.selftune/registry-state.json`
47
+
48
+ ## Rollback Workflow
49
+
50
+ 1. Run `selftune registry rollback <name>` to revert to the previous version
51
+ 2. Use `--to=1.0.0` to target a specific version
52
+ 3. After rollback, tell team members to run `selftune registry sync`
53
+ 4. Rollback is recorded with timestamp and reason
54
+
55
+ ## Prerequisites
56
+
57
+ - Must be authenticated (`selftune alpha upload` to set up API key)
58
+ - Push and rollback require Team plan and admin role
59
+ - Install requires Pro plan or higher
60
+
61
+ ## Output Format
62
+
63
+ All commands output JSON for agent consumption:
64
+
65
+ ```json
66
+ // push
67
+ {"success": true, "name": "deploy", "version": "1.2.0", "files": 8, "size": 4096, "hash": "abc123"}
68
+
69
+ // sync
70
+ {"synced": 2, "failed": 0, "total": 5}
71
+
72
+ // status
73
+ {"installations": [{"name": "deploy", "installed": "1.1.0", "latest": "1.2.0", "status": "behind"}]}
74
+ ```
75
+
76
+ ## Common Patterns
77
+
78
+ **User wants to share a skill with the team**
79
+
80
+ > Run `selftune registry push` from the skill directory. Report the version
81
+ > and file count from the JSON output.
82
+
83
+ **User wants to install a shared skill**
84
+
85
+ > Run `selftune registry install <name>`. Use `--global` if they want it
86
+ > available across all projects.
87
+
88
+ **User wants to check what's outdated**
89
+
90
+ > Run `selftune registry status`. Report entries where `status` is `"behind"`.
91
+
92
+ **User wants to update everything**
93
+
94
+ > Run `selftune registry sync`. Report `synced` and `failed` counts.
95
+
96
+ **User wants to undo a bad version**
97
+
98
+ > Run `selftune registry rollback <name> --reason="regression in trigger accuracy"`.
99
+ > Remind them to have team members run `selftune registry sync` afterward.
@@ -4,7 +4,7 @@ Generate ready-to-use scheduling examples for automating selftune with
4
4
  standard system tools. This is the **primary automation path** — it works
5
5
  on any machine without requiring a specific agent runtime.
6
6
 
7
- For OpenClaw-specific scheduling, see `Workflows/Cron.md`.
7
+ For OpenClaw-specific scheduling, see `workflows/Cron.md`.
8
8
 
9
9
  ## When to Use
10
10
 
@@ -51,7 +51,7 @@ Outputs examples for all three scheduling systems (cron, launchd, systemd).
51
51
 
52
52
  ## Alias
53
53
 
54
- `selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `Workflows/Cron.md` for the full cron workflow reference.
54
+ `selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `workflows/Cron.md` for the full cron workflow reference.
55
55
 
56
56
  ## PATH Resolution (All Platforms)
57
57
 
@@ -69,4 +69,4 @@ environments that don't include homebrew, bun, or node binary locations.
69
69
  - **User wants quick setup on a Linux server** -- Run `selftune schedule --install --format cron`.
70
70
  - **User wants setup on macOS** -- Run `selftune schedule --install --format launchd`.
71
71
  - **User wants setup on a systemd-based server** -- Run `selftune schedule --install --format systemd`.
72
- - **User mentions OpenClaw** -- Use `selftune cron setup --platform openclaw` for the OpenClaw scheduler adapter. The default product path is still `selftune schedule --install`. See `Workflows/Cron.md`.
72
+ - **User mentions OpenClaw** -- Use `selftune cron setup --platform openclaw` for the OpenClaw scheduler adapter. The default product path is still `selftune schedule --install`. See `workflows/Cron.md`.
@@ -0,0 +1,87 @@
1
+ # selftune Signals Dashboard Workflow
2
+
3
+ View contributor signals, contributor statistics, and skill signal strength
4
+ from the hosted selftune cloud dashboard.
5
+
6
+ This is **not** the same as:
7
+ - `selftune dashboard` — the **local** SPA that reads your own SQLite telemetry
8
+ - `selftune contribute` — exporting an anonymized **export bundle** for the community
9
+ - `selftune contributions` — managing your **sharing preferences** for creator-directed signals
10
+ - `selftune creator-contributions` — managing the **creator sharing setup** file (`selftune.contribute.json`)
11
+
12
+ ## When to Use
13
+
14
+ - The user asks about contributor signals, contributor stats, or aggregated skill health
15
+ - The user wants to see how many people are contributing signals for a skill
16
+ - The user asks about signal performance, signal strength, or cohort counts
17
+ - The user says "show me signals", "show me contributor signals", or "how are signals doing?"
18
+
19
+ ## Where to Find It
20
+
21
+ The signals dashboard is the hosted web application at the selftune cloud
22
+ URL (e.g. `https://selftune.dev/signals` or the locally-running Next.js
23
+ dev server at `http://localhost:3000/signals`). The old `/community` path is a
24
+ legacy alias.
25
+
26
+ ## What It Shows
27
+
28
+ | Section | Description |
29
+ | --- | --- |
30
+ | Overview cards | Total contributors, total signals, active skills |
31
+ | Skill list | Per-skill signal counts, distinct cohorts, trigger rates |
32
+ | Signal strength | Whether a skill meets the actionable threshold (>=10 signals, >=3 cohorts) |
33
+ | Time buckets | Signal volume over time |
34
+ | Pending proposals | Skills eligible for contributor-signal-driven evolution proposals |
35
+ | Below-threshold skills | Skills that need more data before proposals can be generated |
36
+
37
+ ## Signal Strength Thresholds
38
+
39
+ A skill is considered **actionable** when it meets both of these thresholds:
40
+ - At least **10 total signals** from contributors
41
+ - At least **3 distinct contributor cohorts**
42
+
43
+ Skills below these thresholds appear in the "needs more data" section.
44
+ These same thresholds gate proposal generation on the API side.
45
+
46
+ ## Steps
47
+
48
+ 1. Direct the user to the signals dashboard URL
49
+ 2. If asked about a specific skill, describe its signal strength and contributor count
50
+ 3. If a skill is below threshold, explain how many more signals or cohorts are needed
51
+ 4. If the user wants to help a skill reach threshold, route to the **Contribute** workflow
52
+ 5. If the user is the skill creator, use the Community page as the handoff into proposals and watch
53
+
54
+ ## Creator Loop
55
+
56
+ For a creator, the after-ship loop is:
57
+
58
+ 1. check whether the skill is low-signal or actionable
59
+ 2. inspect missed categories and grade distribution
60
+ 3. create a contributor proposal only when the signal is coherent
61
+ 4. review/apply the proposal through the normal proposal flow
62
+ 5. watch outcomes after apply
63
+
64
+ Read `references/creator-playbook.md` for the full before-ship and after-ship playbook.
65
+
66
+ ## Common Patterns
67
+
68
+ **User asks "how are contributor signals doing?"**
69
+
70
+ > Direct them to the signals dashboard. Summarize the overview stats
71
+ > (total contributors, total signals, number of actionable skills).
72
+
73
+ **User asks about a specific skill's contributor signals**
74
+
75
+ > Look up the skill on the signals dashboard. Report its total signals,
76
+ > distinct cohorts, and whether it meets the actionable threshold.
77
+
78
+ **User wants to help a skill that's below threshold**
79
+
80
+ > Route to the Contribute workflow (`selftune contribute --skill <name>`)
81
+ > to export an anonymized bundle and submit it.
82
+
83
+ **User confuses signals dashboard with local dashboard**
84
+
85
+ > Clarify: `selftune dashboard` shows **local** telemetry from your own
86
+ > SQLite database. The signals dashboard shows **aggregated** data from
87
+ > all contributors across the selftune cloud.
@@ -10,7 +10,7 @@ also writes the compatibility repaired overlay JSONL.
10
10
  ## When to Use
11
11
 
12
12
  - Before running `status`, `dashboard`, `watch`, or `evolve` when data may be stale
13
- - The user has run many Claude Code, Codex, OpenCode, or OpenClaw sessions since last sync
13
+ - The user has run many Claude Code, Codex, OpenCode, OpenClaw, or Pi sessions since last sync
14
14
  - The agent detects host logs may be polluted and needs the repaired/source-first view
15
15
  - Before inspecting alpha-upload readiness or pushing fresh cloud data
16
16
 
@@ -31,6 +31,8 @@ selftune sync
31
31
  | `--no-codex` | Skip Codex rollout ingest |
32
32
  | `--no-opencode` | Skip OpenCode ingest |
33
33
  | `--no-openclaw` | Skip OpenClaw ingest |
34
+ | `--no-pi` | Skip Pi ingest |
35
+ | `--pi-sessions-dir <dir>` | Pi sessions directory (default: `~/.pi/agent/sessions`) |
34
36
  | `--no-repair` | Skip rebuilding repaired skill-usage data |
35
37
  | `--json` | Output results as JSON |
36
38
 
@@ -9,6 +9,14 @@ accuracy, output content, and tool usage with deterministic assertions.
9
9
  selftune eval unit-test --skill <name> --tests <path> [options]
10
10
  ```
11
11
 
12
+ ## Where selftune stores the result
13
+
14
+ - Test definitions live in `~/.selftune/unit-tests/<skill>.json`
15
+ - The latest run summary is mirrored into `~/.selftune/unit-tests/<skill>.last-run.json`
16
+
17
+ The dashboard and `selftune status` read those files to decide whether a skill still needs test
18
+ generation or already has a passing suite.
19
+
12
20
  ## Options
13
21
 
14
22
  | Flag | Description | Default |
@@ -138,6 +146,17 @@ selftune eval unit-test --skill Research
138
146
  Compare the new `pass_rate` against the previous run. Report whether
139
147
  the evolution improved trigger accuracy.
140
148
 
149
+ ### 5. Continue the creator loop
150
+
151
+ After unit tests exist, the next creator step is usually:
152
+
153
+ ```bash
154
+ selftune evolve --skill <name> --skill-path <path> --dry-run --validation-mode replay
155
+ ```
156
+
157
+ That keeps the sequence aligned with the dashboard readiness surface:
158
+ evals -> unit tests -> replay dry-run -> baseline -> deploy -> watch.
159
+
141
160
  ## Common Patterns
142
161
 
143
162
  **User asks to generate tests for a skill**
@@ -20,6 +20,9 @@ selftune watch --skill <name> --skill-path <path> [options]
20
20
  | `--auto-rollback` | Automatically rollback on detected regression | Off |
21
21
  | `--sync-first` | Refresh source-truth telemetry before evaluating | Off |
22
22
  | `--sync-force` | Force a full source rescan during `--sync-first` | Off |
23
+ | `--grade-threshold <n>` | Grade regression threshold (drop from baseline)| 0.15 |
24
+ | `--no-grade-watch` | Disable grade-based regression monitoring | Enabled |
25
+ | `--help` | Show command help | Off |
23
26
 
24
27
  ## Output Format
25
28
 
@@ -34,7 +37,22 @@ selftune watch --skill <name> --skill-path <path> [options]
34
37
  "regression_detected": false,
35
38
  "delta": -0.03,
36
39
  "status": "healthy",
37
- "evaluated_at": "2026-02-28T14:00:00Z"
40
+ "evaluated_at": "2026-02-28T14:00:00Z",
41
+ "gradeAlert": null,
42
+ "gradeRegression": null
43
+ }
44
+ ```
45
+
46
+ When grade regression is detected, the additional fields are populated:
47
+
48
+ ```json
49
+ {
50
+ "gradeAlert": "grade regression detected for \"pptx\": baseline_grade_pass_rate=0.85, recent_avg=0.65, delta=0.20 exceeds threshold=0.15",
51
+ "gradeRegression": {
52
+ "before": 0.85,
53
+ "after": 0.65,
54
+ "delta": 0.20
55
+ }
38
56
  }
39
57
  ```
40
58
 
@@ -47,6 +65,28 @@ selftune watch --skill <name> --skill-path <path> [options]
47
65
  | `regression` | Pass rate dropped below baseline minus threshold |
48
66
  | `insufficient_data` | Not enough sessions in the window to evaluate |
49
67
 
68
+ ## Grade Regression Monitoring
69
+
70
+ In addition to trigger-based regression (pass rate from eval sets), watch now
71
+ monitors **grade regression** using grading baselines stored in SQLite.
72
+
73
+ Grade regression compares the baseline grade pass rate (written when a skill is
74
+ deployed) against the average pass rate of recent grading results. If the delta
75
+ exceeds `gradeRegressionThreshold` (default 0.15), a `gradeAlert` is raised.
76
+
77
+ This runs alongside trigger regression:
78
+
79
+ | Check | Source | Threshold | Field |
80
+ | ------------------ | --------------------------- | --------- | ------------------- |
81
+ | Trigger regression | Eval set pass rates | 0.10 | `regression_detected` |
82
+ | Grade regression | Grading baseline vs recent | 0.15 | `gradeRegression` |
83
+
84
+ Both checks contribute to the overall `alert` field. A grade regression alert
85
+ is appended to the watch alert string alongside any trigger regression alert.
86
+
87
+ Grade watch is enabled by default. Disable it by passing `--no-grade-watch`
88
+ if you only want trigger-based monitoring.
89
+
50
90
  ## Parsing Instructions
51
91
 
52
92
  ### Check Regression Status
@@ -105,7 +145,7 @@ If regression is detected:
105
145
 
106
146
  - Review recent session transcripts to understand what changed
107
147
  - Check if the eval set is still representative
108
- - Run `evolve rollback` if the regression is confirmed (see `Workflows/Rollback.md`)
148
+ - Run `evolve rollback` if the regression is confirmed (see `workflows/Rollback.md`)
109
149
 
110
150
  If `--auto-rollback` was set, the command automatically restores the
111
151
  previous description and logs a `rolled_back` entry.
@@ -6,14 +6,16 @@ When the user asks about multi-skill workflows, workflow discovery, or skill com
6
6
 
7
7
  ## Overview
8
8
 
9
- Discover repeated multi-skill sequences from telemetry and optionally save a
10
- discovered workflow into a skill's `## Workflows` section.
9
+ Discover repeated multi-skill sequences from telemetry, save a discovered
10
+ workflow into a skill's `## Workflows` section, or scaffold a new local skill
11
+ from an observed workflow pattern.
11
12
 
12
13
  ## Default Commands
13
14
 
14
15
  ```bash
15
16
  selftune workflows [options]
16
17
  selftune workflows save <workflow-id|index> [--skill-path <path>]
18
+ selftune workflows scaffold <workflow-id|index> [--output-dir <path>] [--skill-name <name>] [--description <text>] [--write]
17
19
  ```
18
20
 
19
21
  ## Options
@@ -29,6 +31,13 @@ selftune workflows save <workflow-id|index> [--skill-path <path>]
29
31
  auto-detect the first skill's SKILL.md path across contributing sessions. If
30
32
  that skill maps to multiple SKILL.md files in those sessions, the command
31
33
  errors and you must pass `--skill-path` explicitly.
34
+ - `--output-dir <path>`: Target registry directory for `scaffold`. Default:
35
+ the repo-root `.agents/skills` directory.
36
+ - `--skill-name <name>`: Override the generated scaffolded skill name.
37
+ - `--description <text>`: Override the generated scaffolded skill description.
38
+ - `--write`: Persist the scaffolded draft skill to disk. Without this flag,
39
+ `scaffold` previews the draft only.
40
+ - `--force`: Overwrite an existing draft skill path when combined with `--write`.
32
41
 
33
42
  ## Save Semantics
34
43
 
@@ -49,6 +58,30 @@ SKILL.md. The subsection name is derived from the skill chain
49
58
  (`Copywriting-MarketingAutomation-SelfTuneBlog`) and includes
50
59
  discovered-source metadata with occurrence count and synergy score.
51
60
 
61
+ ## Scaffold Semantics
62
+
63
+ `scaffold` turns an observed workflow into a draft local skill.
64
+
65
+ - Default behavior is preview-first: the command prints the proposed skill name,
66
+ output path, provenance, and full `SKILL.md` content.
67
+ - Add `--write` to create `<output-dir>/<skill-name>/SKILL.md`.
68
+ - The generated skill is intentionally conservative: it includes provenance,
69
+ a description derived from the workflow trigger, an execution plan, and the
70
+ discovered workflow section. It does not silently publish or distribute the
71
+ new skill.
72
+
73
+ When `selftune orchestrate` sees a strong workflow pattern, it now creates a
74
+ review-first `new_skill` proposal automatically. The manual `scaffold` command
75
+ still exists for explicit previewing and local draft writes.
76
+
77
+ Examples:
78
+
79
+ ```bash
80
+ selftune workflows scaffold 1
81
+ selftune workflows scaffold "Copywriting→MarketingAutomation→SelfTuneBlog" --skill-name "blog publisher"
82
+ selftune workflows scaffold 1 --output-dir .agents/skills --write
83
+ ```
84
+
52
85
  ## Output Format
53
86
 
54
87
  ### Human-readable output
@@ -127,3 +160,7 @@ Discovered Workflows (from 450 sessions):
127
160
  `selftune workflows save 1 --skill-path /path/to/SKILL.md`
128
161
  - "Save a specific discovered workflow by ID"
129
162
  `selftune workflows save "Copywriting→MarketingAutomation→SelfTuneBlog"`
163
+ - "Preview a new skill scaffold from the top workflow"
164
+ `selftune workflows scaffold 1`
165
+ - "Write the scaffolded draft skill into the repo registry"
166
+ `selftune workflows scaffold 1 --output-dir .agents/skills --write`