selftune 0.2.23 → 0.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +93 -15
  3. package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
  4. package/apps/local-dashboard/dist/assets/index-Dhgv5BQO.js +15 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
  6. package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
  7. package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
  8. package/apps/local-dashboard/dist/index.html +5 -5
  9. package/cli/selftune/adapters/codex/install.ts +310 -78
  10. package/cli/selftune/adapters/opencode/install.ts +3 -4
  11. package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
  12. package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
  13. package/cli/selftune/auto-update.ts +200 -8
  14. package/cli/selftune/canonical-export.ts +55 -25
  15. package/cli/selftune/command-surface.ts +397 -0
  16. package/cli/selftune/contribute/contribute.ts +64 -13
  17. package/cli/selftune/contribution-config.ts +57 -3
  18. package/cli/selftune/contribution-preferences.ts +117 -0
  19. package/cli/selftune/contribution-signals.ts +8 -4
  20. package/cli/selftune/contribution-staging.ts +13 -2
  21. package/cli/selftune/contributions.ts +55 -121
  22. package/cli/selftune/creator-contributions.ts +29 -10
  23. package/cli/selftune/cron/setup.ts +7 -3
  24. package/cli/selftune/dashboard-contract.ts +73 -0
  25. package/cli/selftune/dashboard-server.ts +168 -17
  26. package/cli/selftune/dashboard.ts +350 -17
  27. package/cli/selftune/eval/baseline.ts +21 -5
  28. package/cli/selftune/eval/execution-eval.ts +170 -0
  29. package/cli/selftune/eval/family-overlap.ts +2 -2
  30. package/cli/selftune/eval/hooks-to-evals.ts +228 -82
  31. package/cli/selftune/eval/import-skillsbench.ts +2 -2
  32. package/cli/selftune/eval/invocation-classifier.ts +56 -0
  33. package/cli/selftune/eval/synthetic-evals.ts +5 -3
  34. package/cli/selftune/eval/unit-test-cli.ts +7 -4
  35. package/cli/selftune/evolution/apply-proposal.ts +295 -0
  36. package/cli/selftune/evolution/engines/replay-engine.ts +79 -57
  37. package/cli/selftune/evolution/evolve-body.ts +100 -39
  38. package/cli/selftune/evolution/evolve.ts +244 -52
  39. package/cli/selftune/evolution/rollback.ts +0 -1
  40. package/cli/selftune/evolution/validate-body.ts +68 -42
  41. package/cli/selftune/evolution/validate-host-replay.ts +510 -60
  42. package/cli/selftune/evolution/validate-proposal.ts +11 -150
  43. package/cli/selftune/evolution/validate-routing.ts +43 -41
  44. package/cli/selftune/evolution/validation-contract.ts +91 -0
  45. package/cli/selftune/grading/auto-grade.ts +11 -7
  46. package/cli/selftune/grading/grade-session.ts +10 -16
  47. package/cli/selftune/index.ts +35 -10
  48. package/cli/selftune/ingestors/claude-replay.ts +15 -10
  49. package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
  50. package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
  51. package/cli/selftune/ingestors/pi-ingest.ts +3 -2
  52. package/cli/selftune/init.ts +27 -3
  53. package/cli/selftune/localdb/direct-write.ts +35 -1
  54. package/cli/selftune/localdb/queries/cron.ts +34 -0
  55. package/cli/selftune/localdb/queries/dashboard.ts +834 -0
  56. package/cli/selftune/localdb/queries/evolution.ts +158 -0
  57. package/cli/selftune/localdb/queries/execution.ts +133 -0
  58. package/cli/selftune/localdb/queries/json.ts +18 -0
  59. package/cli/selftune/localdb/queries/monitoring.ts +263 -0
  60. package/cli/selftune/localdb/queries/raw.ts +95 -0
  61. package/cli/selftune/localdb/queries/staging.ts +270 -0
  62. package/cli/selftune/localdb/queries/trust.ts +392 -0
  63. package/cli/selftune/localdb/queries.ts +60 -2288
  64. package/cli/selftune/localdb/schema.ts +21 -0
  65. package/cli/selftune/monitoring/watch.ts +96 -29
  66. package/cli/selftune/normalization.ts +3 -0
  67. package/cli/selftune/observability.ts +4 -2
  68. package/cli/selftune/orchestrate/cli.ts +161 -0
  69. package/cli/selftune/orchestrate/execute.ts +295 -0
  70. package/cli/selftune/orchestrate/finalize.ts +157 -0
  71. package/cli/selftune/orchestrate/locks.ts +40 -0
  72. package/cli/selftune/orchestrate/plan.ts +131 -0
  73. package/cli/selftune/orchestrate/post-run.ts +59 -0
  74. package/cli/selftune/orchestrate/prepare.ts +334 -0
  75. package/cli/selftune/orchestrate/report.ts +182 -0
  76. package/cli/selftune/orchestrate/runtime.ts +120 -0
  77. package/cli/selftune/orchestrate/signals.ts +48 -0
  78. package/cli/selftune/orchestrate.ts +150 -1173
  79. package/cli/selftune/repair/skill-usage.ts +5 -2
  80. package/cli/selftune/routes/overview.ts +5 -2
  81. package/cli/selftune/routes/skill-report.ts +15 -2
  82. package/cli/selftune/schedule.ts +5 -5
  83. package/cli/selftune/status.ts +39 -2
  84. package/cli/selftune/testing-readiness.ts +597 -0
  85. package/cli/selftune/types.ts +44 -4
  86. package/cli/selftune/uninstall.ts +2 -1
  87. package/cli/selftune/utils/canonical-log.ts +1 -9
  88. package/cli/selftune/utils/cli-error.ts +9 -0
  89. package/cli/selftune/utils/llm-call.ts +126 -6
  90. package/cli/selftune/utils/skill-discovery.ts +2 -0
  91. package/cli/selftune/workflows/proposals.ts +184 -0
  92. package/cli/selftune/workflows/skill-scaffold.ts +241 -0
  93. package/cli/selftune/workflows/workflows.ts +100 -26
  94. package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
  95. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  96. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  97. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  98. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +41 -1
  99. package/node_modules/@selftune/telemetry-contract/src/types.ts +103 -2
  100. package/package.json +25 -9
  101. package/packages/dashboard-core/AGENTS.md +18 -0
  102. package/packages/dashboard-core/README.md +30 -0
  103. package/packages/dashboard-core/index.ts +3 -0
  104. package/packages/dashboard-core/package.json +39 -0
  105. package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
  106. package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
  107. package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
  108. package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
  109. package/packages/dashboard-core/src/chrome/index.ts +14 -0
  110. package/packages/dashboard-core/src/chrome/types.ts +81 -0
  111. package/packages/dashboard-core/src/chrome/utils.ts +23 -0
  112. package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
  113. package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
  114. package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
  115. package/packages/dashboard-core/src/gates/index.ts +3 -0
  116. package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
  117. package/packages/dashboard-core/src/host/adapter.ts +47 -0
  118. package/packages/dashboard-core/src/host/capabilities.ts +55 -0
  119. package/packages/dashboard-core/src/host/index.ts +3 -0
  120. package/packages/dashboard-core/src/models/analytics.ts +39 -0
  121. package/packages/dashboard-core/src/models/index.ts +4 -0
  122. package/packages/dashboard-core/src/models/overview.ts +98 -0
  123. package/packages/dashboard-core/src/models/runtime.ts +7 -0
  124. package/packages/dashboard-core/src/models/skills.ts +34 -0
  125. package/packages/dashboard-core/src/routes/index.ts +2 -0
  126. package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
  127. package/packages/dashboard-core/src/routes/manifest.ts +451 -0
  128. package/packages/dashboard-core/src/routes/types.ts +39 -0
  129. package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
  130. package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
  131. package/packages/dashboard-core/src/screens/index.ts +37 -0
  132. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
  133. package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
  134. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
  135. package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
  136. package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
  137. package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
  138. package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
  139. package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
  140. package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
  141. package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
  142. package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
  143. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
  144. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
  145. package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
  146. package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
  147. package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
  148. package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
  149. package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
  150. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
  151. package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
  152. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
  153. package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
  154. package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
  155. package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
  156. package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
  157. package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
  158. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  159. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
  160. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  161. package/packages/telemetry-contract/src/schemas.ts +41 -1
  162. package/packages/telemetry-contract/src/types.ts +103 -2
  163. package/packages/ui/src/components/EvidenceViewer.tsx +80 -25
  164. package/packages/ui/src/components/OverviewPanels.tsx +67 -26
  165. package/packages/ui/src/primitives/tabs.tsx +7 -6
  166. package/packages/ui/src/types.ts +10 -0
  167. package/skill/SKILL.md +130 -332
  168. package/skill/agents/diagnosis-analyst.md +3 -3
  169. package/skill/agents/evolution-reviewer.md +3 -3
  170. package/skill/agents/integration-guide.md +3 -3
  171. package/skill/agents/pattern-analyst.md +2 -2
  172. package/skill/references/cli-quick-reference.md +89 -0
  173. package/skill/references/creator-playbook.md +131 -0
  174. package/skill/references/examples.md +48 -0
  175. package/skill/references/troubleshooting.md +47 -0
  176. package/skill/references/version-history.md +1 -1
  177. package/skill/selftune.contribute.json +11 -0
  178. package/skill/{Workflows → workflows}/Baseline.md +20 -1
  179. package/skill/{Workflows → workflows}/Contribute.md +23 -10
  180. package/skill/{Workflows → workflows}/Contributions.md +13 -5
  181. package/skill/workflows/CreateTestDeploy.md +170 -0
  182. package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
  183. package/skill/{Workflows → workflows}/Cron.md +1 -1
  184. package/skill/{Workflows → workflows}/Dashboard.md +20 -0
  185. package/skill/{Workflows → workflows}/Doctor.md +1 -1
  186. package/skill/{Workflows → workflows}/Evals.md +67 -2
  187. package/skill/{Workflows → workflows}/Evolve.md +119 -30
  188. package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
  189. package/skill/{Workflows → workflows}/Grade.md +1 -1
  190. package/skill/{Workflows → workflows}/Initialize.md +8 -4
  191. package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
  192. package/skill/{Workflows → workflows}/Schedule.md +3 -3
  193. package/skill/workflows/SignalsDashboard.md +87 -0
  194. package/skill/{Workflows → workflows}/UnitTest.md +19 -0
  195. package/skill/{Workflows → workflows}/Watch.md +42 -2
  196. package/skill/{Workflows → workflows}/Workflows.md +39 -2
  197. package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +0 -1
  198. package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +0 -59
  199. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
  200. package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +0 -12
  201. /package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
  202. /package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
  203. /package/skill/{Workflows → workflows}/Badge.md +0 -0
  204. /package/skill/{Workflows → workflows}/Composability.md +0 -0
  205. /package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
  206. /package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
  207. /package/skill/{Workflows → workflows}/Hook.md +0 -0
  208. /package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
  209. /package/skill/{Workflows → workflows}/Ingest.md +0 -0
  210. /package/skill/{Workflows → workflows}/PlatformHooks.md +0 -0
  211. /package/skill/{Workflows → workflows}/Quickstart.md +0 -0
  212. /package/skill/{Workflows → workflows}/Recover.md +0 -0
  213. /package/skill/{Workflows → workflows}/Registry.md +0 -0
  214. /package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
  215. /package/skill/{Workflows → workflows}/Replay.md +0 -0
  216. /package/skill/{Workflows → workflows}/Rollback.md +0 -0
  217. /package/skill/{Workflows → workflows}/Sync.md +0 -0
  218. /package/skill/{Workflows → workflows}/Telemetry.md +0 -0
  219. /package/skill/{Workflows → workflows}/Uninstall.md +0 -0
@@ -1,10 +1,12 @@
1
1
  # selftune Creator-Contributions Workflow
2
2
 
3
- Manage the creator-side `selftune.contribute.json` file bundled with a skill.
3
+ Manage the **creator sharing setup** — the `selftune.contribute.json` file
4
+ bundled with a skill package.
4
5
 
5
6
  This is **not** the same as:
6
- - `selftune contributions` — end-user opt-in / opt-out preferences
7
- - `selftune contribute` — community export bundle
7
+ - `selftune contributions` — end-user **sharing preferences** (opt-in / opt-out)
8
+ - `selftune contribute` — community **export bundle** (anonymized data export)
9
+ - The signals dashboard — viewing aggregated **contributor signal data** from all contributors
8
10
 
9
11
  ## When to Use
10
12
 
@@ -45,8 +47,17 @@ selftune creator-contributions disable --skill <name> [--skill-path <path>]
45
47
  ## Notes
46
48
 
47
49
  - This is local packaging/setup only. It does **not** upload creator-directed signals yet.
48
- - The creator ID is currently sourced from `--creator-id` or the local alpha identity's `cloud_user_id`.
50
+ - The `creator_id` field must be the creator's cloud user UUID (the `cloud_user_id` from alpha enrollment). This is the canonical identifier used to route signals back to the correct creator account.
51
+ - The creator ID is sourced from `--creator-id` or the local alpha identity's `cloud_user_id`.
49
52
  - Use this workflow when the user is preparing a skill package.
53
+ - For the full creator lifecycle, read `references/creator-playbook.md` before shipping.
54
+
55
+ ## Selftune Dogfood Config
56
+
57
+ The selftune skill itself ships a bundled `selftune.contribute.json` at
58
+ `oss/selftune/skill/selftune.contribute.json`. This is the selftune project
59
+ dogfooding its own creator-directed relay flow. The `creator_id` field is
60
+ set to the production selftune creator's cloud user UUID.
50
61
 
51
62
  ## Common Patterns
52
63
 
@@ -60,13 +71,14 @@ selftune creator-contributions disable --skill <name> [--skill-path <path>]
60
71
  > Run `selftune creator-contributions enable --skill <name>`.
61
72
  > If auto-discovery fails, rerun with `--skill-path /path/to/SKILL.md`.
62
73
  > If no creator identity is available locally, rerun with `--creator-id <id>`.
63
- > Example: `selftune creator-contributions enable --skill sc-search --skill-path ./skills/sc-search/SKILL.md --creator-id cr_state_change --signals trigger,grade,miss_category --message "Share privacy-safe usage signals with the skill creator." --privacy-url https://statechange.ai/privacy`
74
+ > The command rejects non-UUID creator IDs and unsupported signal names.
75
+ > Example: `selftune creator-contributions enable --skill sc-search --skill-path ./skills/sc-search/SKILL.md --creator-id 550e8400-e29b-41d4-a716-446655440000 --signals trigger,grade,miss_category --message "Share privacy-safe usage signals with the skill creator." --privacy-url https://statechange.ai/privacy`
64
76
 
65
77
  **User wants to enable creator contributions for a whole installed skill suite**
66
78
 
67
79
  > Run `selftune creator-contributions enable --all --prefix sc-`.
68
80
  > This is the fastest path when preparing a whole family of skills like State Change skills.
69
- > Example: `selftune creator-contributions enable --all --prefix sc- --creator-id cr_state_change`
81
+ > Example: `selftune creator-contributions enable --all --prefix sc- --creator-id 550e8400-e29b-41d4-a716-446655440000`
70
82
 
71
83
  **User wants to stop bundling creator contribution config**
72
84
 
@@ -130,4 +130,4 @@ interactive mode is for user-directed improvements.
130
130
  - **User needs a specific timezone (OpenClaw)** -- Run `selftune cron setup --platform openclaw --tz America/New_York`.
131
131
  - **User asks what jobs are registered** -- Run `selftune cron list`. Shows a table of all selftune cron jobs with their schedules and descriptions.
132
132
  - **User wants to remove cron automation** -- Run `selftune cron remove`. Preview first with `selftune cron remove --dry-run`.
133
- - **Skill regressed after cron evolution** -- The watch job should catch this automatically. If not, run `selftune evolve rollback --skill <name> --skill-path <path>` manually. See `Workflows/Rollback.md`.
133
+ - **Skill regressed after cron evolution** -- The watch job should catch this automatically. If not, run `selftune evolve rollback --skill <name> --skill-path <path>` manually. See `workflows/Rollback.md`.
@@ -22,6 +22,7 @@ generate JSONL from SQLite for debugging or offline analysis.
22
22
  | Flag | Description | Default |
23
23
  | --------------- | ----------------------------------------- | ------- |
24
24
  | `--port <port>` | Custom port for the server | 3141 |
25
+ | `--restart` | Force-restart an existing dashboard on the target port | Off |
25
26
  | `--no-open` | Start server without opening browser | Off |
26
27
  | `--serve` | _(Deprecated)_ Alias for default behavior | — |
27
28
 
@@ -35,6 +36,16 @@ suggesting `selftune dashboard` instead.
35
36
  The live server binds to `localhost:3141` by default. Use `--port` to
36
37
  override.
37
38
 
39
+ If a healthy selftune dashboard is already running on the requested port,
40
+ `selftune dashboard` reuses it instead of failing. If the running standalone
41
+ dashboard version is older than the installed CLI, the command restarts it
42
+ automatically to pick up the update. Use `--restart` to force that behavior
43
+ even when the versions match.
44
+
45
+ The dashboard client also polls `/api/health` for `spa_build_id`. If the server
46
+ is newer than the loaded client, the UI shows a reload prompt instead of silently
47
+ staying stale.
48
+
38
49
  ### Endpoints
39
50
 
40
51
  | Method | Path | Description |
@@ -162,6 +173,7 @@ checked file paths.
162
173
  ```bash
163
174
  selftune dashboard
164
175
  selftune dashboard --port 8080
176
+ selftune dashboard --restart
165
177
  selftune dashboard --no-open
166
178
  ```
167
179
 
@@ -182,6 +194,14 @@ to trigger watch, evolve, or rollback directly from the dashboard.
182
194
  > Run `selftune dashboard`. The server provides real-time updates via SSE
183
195
  > (~1 second latency).
184
196
 
197
+ **User just updated selftune and wants the dashboard to pick up the new UI**
198
+
199
+ > Run `selftune dashboard`. It reuses a healthy instance when possible and
200
+ > automatically restarts an older standalone dashboard version on the same port.
201
+ > If the user explicitly wants a restart, run `selftune dashboard --restart`.
202
+ > If the browser still has an older client loaded, the dashboard shows a reload
203
+ > prompt based on `/api/health` build metadata.
204
+
185
205
  **Dashboard shows no data**
186
206
 
187
207
  > Run `selftune doctor` to verify hooks are installed. If hooks are missing,
@@ -163,7 +163,7 @@ For each failed check, take the appropriate action:
163
163
  | `evolution_audit` | Remove corrupted entries. Future operations will append clean entries. |
164
164
  | `dashboard_freshness_mode` | This is an operator warning, not a broken install. Expect possible freshness gaps for SQLite-only writes and export before destructive recovery. |
165
165
  | `skill_version_sync` | Run `bun run sync-version` to stamp SKILL.md from package.json. |
166
- | `version_up_to_date` | Run `npm install -g selftune` to update. |
166
+ | `version_up_to_date` | Follow `.checks[].guidance.next_command` for the active install source. Common fixes are `npm install -g selftune@latest`, `bun add -g selftune@latest`, or `npx skills add selftune-dev/selftune`. |
167
167
 
168
168
  ### 4. Re-run Doctor
169
169
 
@@ -20,6 +20,24 @@ Invoke this workflow when the user requests any of the following:
20
20
  selftune eval generate --skill <name> [options]
21
21
  ```
22
22
 
23
+ ## Recommended Creator Loop
24
+
25
+ Use eval generation as step 1 of the default creator loop:
26
+
27
+ ```bash
28
+ selftune eval generate --skill <name>
29
+ selftune eval unit-test --skill <name> --generate --skill-path <path>
30
+ selftune evolve --skill <name> --skill-path <path> --dry-run --validation-mode replay
31
+ selftune grade baseline --skill <name> --skill-path <path>
32
+ selftune evolve --skill <name> --skill-path <path> --with-baseline
33
+ selftune watch --skill <name>
34
+ ```
35
+
36
+ The command still writes the requested output path, and it now also mirrors a canonical copy into
37
+ `~/.selftune/eval-sets/<skill>.json` so the dashboard and `selftune status` can track whether eval
38
+ coverage exists. Once the earlier steps are complete, the creator loop surfaces now flip from
39
+ "needs testing" to "ready to deploy" and then "watching" after ship.
40
+
23
41
  ## Options
24
42
 
25
43
  | Flag | Description | Default |
@@ -39,6 +57,8 @@ selftune eval generate --skill <name> [options]
39
57
  | `--auto-synthetic` | Fall back to SKILL.md-based cold-start evals when no trusted triggers exist | Off |
40
58
  | `--skill-path <path>` | Path to SKILL.md (required with `--synthetic`) | — |
41
59
  | `--model <model>` | LLM model to use for synthetic generation | Agent default |
60
+ | `--blend` | Blend log-based and synthetic evals into one set | Off |
61
+ | `--help` | Show command help | Off |
42
62
 
43
63
  ## Output Format
44
64
 
@@ -49,11 +69,14 @@ selftune eval generate --skill <name> [options]
49
69
  {
50
70
  "query": "Make me a slide deck for the Q3 board meeting",
51
71
  "should_trigger": true,
52
- "invocation_type": "contextual"
72
+ "invocation_type": "contextual",
73
+ "source": "log",
74
+ "created_at": "2026-04-01T12:00:00Z"
53
75
  },
54
76
  {
55
77
  "query": "What format should I use for a presentation?",
56
- "should_trigger": false
78
+ "should_trigger": false,
79
+ "source": "synthetic"
57
80
  }
58
81
  ]
59
82
  ```
@@ -61,6 +84,24 @@ selftune eval generate --skill <name> [options]
61
84
  Each entry has `query` (string, max 500 chars), `should_trigger` (boolean),
62
85
  and optional `invocation_type` (omitted when `--no-taxonomy` is set).
63
86
 
87
+ Entries also carry optional provenance fields:
88
+
89
+ - `source` — `"log"` (from real usage logs), `"synthetic"` (LLM-generated from SKILL.md), or `"blended"` (synthetic entry that survived dedup in a blended set)
90
+ - `created_at` — ISO timestamp of when the entry was created
91
+
92
+ Use `computeEvalSourceStats(entries)` to get aggregate provenance statistics:
93
+
94
+ ```json
95
+ {
96
+ "total": 80,
97
+ "synthetic": 10,
98
+ "log": 50,
99
+ "blended": 20,
100
+ "oldest": "2026-03-01T00:00:00Z",
101
+ "newest": "2026-04-01T12:00:00Z"
102
+ }
103
+ ```
104
+
64
105
  ### List Skills
65
106
 
66
107
  ```json
@@ -181,6 +222,30 @@ Use `--model` to override the default LLM model:
181
222
  selftune eval generate --skill pptx --synthetic --skill-path ./skills/pptx/SKILL.md --model claude-sonnet-4-5-20250514
182
223
  ```
183
224
 
225
+ ### Generate Blended Evals
226
+
227
+ When a skill has real log data but you want to fill coverage gaps with synthetic
228
+ entries, use `--blend` to combine both sources into one eval set.
229
+
230
+ ```bash
231
+ selftune eval generate --skill pptx --blend --skill-path /path/to/skills/pptx/SKILL.md
232
+ ```
233
+
234
+ The blending policy:
235
+
236
+ 1. Keep ALL log-based entries (marked `source: "log"`)
237
+ 2. Generate synthetic entries from SKILL.md
238
+ 3. Deduplicate: drop any synthetic entry whose normalized Levenshtein distance to any log entry is < 0.3
239
+ 4. Mark surviving synthetic entries as `source: "blended"`
240
+ 5. Cap total entries at 2x the log-based count
241
+
242
+ This preserves real-world boundary cases from logs while filling underrepresented
243
+ invocation types with synthetic entries. The 2x cap prevents synthetic entries from
244
+ overwhelming log signal.
245
+
246
+ `--blend` requires a resolvable SKILL.md path. Use `--skill-path` or install the
247
+ skill locally so selftune can find it.
248
+
184
249
  ### Generate Evals (Log-Based)
185
250
 
186
251
  Cross-reference `skill_usage_log.jsonl` (positive triggers) against
@@ -19,6 +19,23 @@ Invoke this workflow when the user requests any of the following:
19
19
  selftune evolve --skill <name> --skill-path <path> [options]
20
20
  ```
21
21
 
22
+ ## Recommended Creator Loop
23
+
24
+ Do not treat `evolve` as the first step when a creator asks whether a skill is
25
+ ready. The default loop is:
26
+
27
+ ```bash
28
+ selftune eval generate --skill <name> --skill-path <path>
29
+ selftune eval unit-test --skill <name> --generate --skill-path <path>
30
+ selftune evolve --skill <name> --skill-path <path> --dry-run --validation-mode replay
31
+ selftune grade baseline --skill <name> --skill-path <path>
32
+ ```
33
+
34
+ Then move to a live `selftune evolve ...` or `selftune watch ...` run.
35
+
36
+ If canonical evals or stored unit-test results already exist, reuse them rather
37
+ than regenerating everything.
38
+
22
39
  ## Options
23
40
 
24
41
  | Flag | Description | Default |
@@ -26,7 +43,7 @@ selftune evolve --skill <name> --skill-path <path> [options]
26
43
  | `--skill <name>` | Skill name | Required |
27
44
  | `--skill-path <path>` | Path to the skill's SKILL.md | Required |
28
45
  | `--eval-set <path>` | Pre-built eval set JSON | Auto-generated from logs |
29
- | `--agent <name>` | Agent CLI to use (claude, codex, opencode) | Auto-detected |
46
+ | `--agent <name>` | Agent CLI to use (claude, codex, opencode, pi) | Auto-detected |
30
47
  | `--dry-run` | Propose and validate without deploying | Off |
31
48
  | `--confidence <n>` | Minimum confidence threshold (0-1) | 0.6 |
32
49
  | `--max-iterations <n>` | Maximum retry iterations | 3 |
@@ -42,8 +59,10 @@ selftune evolve --skill <name> --skill-path <path> [options]
42
59
  | `--gate-effort <level>` | Thinking effort for the final gate (`low|medium|high|max`) | None |
43
60
  | `--adaptive-gate` | Escalate risky gate checks to `opus` + `high` effort | Off |
44
61
  | `--proposal-model <model>` | Model for proposal generation LLM calls | None |
62
+ | `--validation-mode <mode>` | Validation strategy: `auto`, `replay`, or `judge` | `auto` |
45
63
  | `--sync-first` | Refresh source-truth telemetry before generating evals/failure patterns | Off |
46
64
  | `--sync-force` | Force a full source rescan during `--sync-first` | Off |
65
+ | `--help` | Show command help | Off |
47
66
 
48
67
  ## Output Format
49
68
 
@@ -83,37 +102,42 @@ Routing/body validation may also carry provenance fields such as:
83
102
  - `validation_fixture_id` — fixture identifier when replay-backed validation is used
84
103
  - `before_pass_rate` / `after_pass_rate` — only present when trigger validation actually ran; structural-guard exits do not emit synthetic pass rates
85
104
 
86
- Most evolve runs today still validate through `llm_judge`. Routing evolution now
87
- auto-builds a replay fixture from the target skill plus installed sibling
88
- skills in the same registry, so replay-backed validation is preferred whenever
89
- that local fixture can be constructed because it captures host-style routing
90
- behavior instead of model judgment.
91
-
92
- For Claude Code, the replay path now stages a temporary project-local
93
- `.claude/skills` registry, swaps in the candidate routing table, and runs a
94
- one-turn Claude print-mode session with project/local settings only. Validation
95
- records whether Claude actually invoked the target skill, invoked a competing
96
- skill, invoked an unrelated skill, or made no routing decision at all.
97
- Unrelated skill use is treated as a replay failure even on negative evals,
98
- because it still indicates the runtime routed somewhere unexpected. If that
99
- runtime path is unavailable or fails to reach a runtime decision, selftune
100
- falls back to the existing fixture-backed surface simulation and notes the
101
- fallback in the replay evidence instead of pretending it was a runtime result.
102
-
103
- For non-Claude platforms today, replay remains fixture-backed: it evaluates the
104
- target routing table against the installed target/competing skill surfaces in a
105
- controlled replay fixture and records per-entry evidence. That is still a
106
- stronger signal than a free-form judge prompt, but you should describe it as
107
- replay-backed validation, not as live operator telemetry.
105
+ Most evolve runs today still validate through `llm_judge`. Replay-backed
106
+ validation is only considered available when selftune can run a real
107
+ host/runtime replay for the target host. Today that means the Claude Code,
108
+ Codex, and OpenCode paths can stage a temporary local registry, apply the
109
+ candidate skill content, and observe the runtime's actual routing decision;
110
+ when that runtime path is unavailable, `auto` falls back to `llm_judge` and
111
+ `replay` errors explicitly instead of silently downgrading to fixture
112
+ simulation.
113
+
114
+ Description, routing, and full-body evolution now share the same public
115
+ validation contract: `auto` prefers replay and falls back to judge, `replay`
116
+ requires a replay path, and `judge` bypasses replay entirely. Audit and
117
+ evidence records may also include `validation_fallback_reason` when `auto`
118
+ had to fall back from replay to judge.
119
+
120
+ Replay stages the candidate into the target host's project-local registry:
121
+ Claude Code uses `.claude/skills`, Codex uses `.agents/skills`, and OpenCode
122
+ uses `.opencode/skills`. Validation records whether the runtime selected the
123
+ target skill, selected a competing skill, selected an unrelated skill, or made
124
+ no routing decision at all. Reads outside the staged skill set are treated as
125
+ replay failures even on negative evals, because they indicate the runtime left
126
+ the controlled evaluation surface.
127
+
128
+ For hosts without runtime replay support today, replay is not available. In
129
+ `auto` mode selftune falls back to `llm_judge`; in `replay` mode it exits with
130
+ `REPLAY_UNAVAILABLE`. Do not describe fixture-only surface matching as replay
131
+ validation in user-facing summaries.
108
132
 
109
133
  Replay parsing is intentionally conservative: unreadable skill files degrade to
110
134
  empty surfaces instead of throwing, and malformed routing rows with empty
111
- trigger cells are ignored rather than treated as valid triggers. Claude replay
112
- also normalizes observed `Read` paths against the staged workspace, so relative
113
- skill reads still count as read-only evidence for the target or competing
114
- skill. Reads outside the staged skill set are treated as replay failures rather
115
- than benign negatives, because they indicate the runtime left the controlled
116
- evaluation surface.
135
+ trigger cells are ignored rather than treated as valid triggers. Replay also
136
+ normalizes observed skill reads against the staged workspace, so relative skill
137
+ paths from Claude, Codex, or OpenCode still count as evidence for the target or
138
+ competing skill. Reads outside the staged skill set are treated as replay
139
+ failures rather than benign negatives, because they indicate the runtime left
140
+ the controlled evaluation surface.
117
141
 
118
142
  ## Parsing Instructions
119
143
 
@@ -281,6 +305,40 @@ The candidate is tested against the full eval set:
281
305
  If validation fails, the command retries up to `--max-iterations` times
282
306
  with adjusted proposals.
283
307
 
308
+ ### Validation Mode (`--validation-mode`)
309
+
310
+ The `--validation-mode` flag controls which validation engine is used for
311
+ description proposals. Three modes are available:
312
+
313
+ | Mode | Behavior |
314
+ | -------- | ------------------------------------------------------------------------ |
315
+ | `auto` | Try replay-based validation first; fall back to LLM judge if unavailable |
316
+ | `replay` | Replay engine only; error if no replay fixture or runner is available |
317
+ | `judge` | LLM judge only (legacy path via `validateProposal`) |
318
+
319
+ The default is `auto`, which provides the strongest available signal without
320
+ requiring manual fixture configuration. When replay is available, it stages the
321
+ candidate skill content into a temporary local registry and records the
322
+ runtime's actual routing decision per eval entry. For description evolution,
323
+ that means the proposed description is applied to the target skill before
324
+ replay. When replay is not available, `auto` falls back to the LLM judge and
325
+ logs the fallback.
326
+
327
+ The actual mode used is recorded as `validation_mode` in audit entries
328
+ (`llm_judge`, `host_replay`, or `structural_guard`), along with
329
+ `validation_agent` and `validation_fixture_id` when applicable.
330
+
331
+ ```bash
332
+ # Default: auto (replay-first, judge fallback)
333
+ selftune evolve --skill pptx --skill-path ./skills/pptx/SKILL.md
334
+
335
+ # Force replay only (error if unavailable)
336
+ selftune evolve --skill pptx --skill-path ./skills/pptx/SKILL.md --validation-mode replay
337
+
338
+ # Force judge only (legacy behavior)
339
+ selftune evolve --skill pptx --skill-path ./skills/pptx/SKILL.md --validation-mode judge
340
+ ```
341
+
284
342
  ### Aggregate Metrics To Report
285
343
 
286
344
  When summarizing an evolution run, include these aggregate metrics rather
@@ -378,6 +436,37 @@ selftune evolve --skill X --skill-path Y --cheap-loop --gate-model opus --gate-e
378
436
  selftune evolve --skill X --skill-path Y --proposal-model haiku --validation-model sonnet
379
437
  ```
380
438
 
439
+ ## Apply Contributor Proposal
440
+
441
+ The `apply-proposal` subcommand fetches an approved contributor aggregate
442
+ proposal from the cloud dashboard and applies it to the local SKILL.md.
443
+
444
+ ```bash
445
+ selftune evolve apply-proposal --id <proposal-id> --skill-path <path> [--dry-run]
446
+ ```
447
+
448
+ ### Apply-Proposal Options
449
+
450
+ | Flag | Description | Default |
451
+ | ----------------- | ----------------------------------------------- | -------- |
452
+ | `--id <uuid>` | Proposal UUID from the dashboard | Required |
453
+ | `--skill-path` | Path to the target SKILL.md | Required |
454
+ | `--dry-run` | Preview the proposal without writing to disk | Off |
455
+
456
+ ### Apply-Proposal Flow
457
+
458
+ 1. Fetch the proposal via `GET /api/v1/proposals/:id`
459
+ 2. Verify `proposed_by` is `contributor_aggregate` and status is `approved`
460
+ 3. Display a summary (type, reason, pass rate change, diff preview)
461
+ 4. If not `--dry-run`: back up SKILL.md, apply the proposed value, and
462
+ `PATCH /api/v1/proposals/:id` with status `applied`
463
+
464
+ ### When to Use
465
+
466
+ - After reviewing and approving a contributor proposal in the cloud dashboard
467
+ - When community signal suggests a description or body improvement
468
+ - As the final step in the contributor-driven evolution workflow
469
+
381
470
  ## Common Patterns
382
471
 
383
472
  **User asks to evolve a specific skill (e.g., "evolve the pptx skill"):**
@@ -398,7 +487,7 @@ Also check if the eval set has contradictory expectations.
398
487
 
399
488
  **Agent CLI override needed:**
400
489
  The evolve command auto-detects the installed agent CLI.
401
- Use `--agent <name>` to override (claude, codex, opencode).
490
+ Use `--agent <name>` to override (claude, codex, opencode, pi).
402
491
 
403
492
  ## Subagent Escalation
404
493
 
@@ -10,6 +10,22 @@ LLM validates them through a 3-gate pipeline.
10
10
  selftune evolve body --skill <name> --skill-path <path> --target <target> [options]
11
11
  ```
12
12
 
13
+ ## Recommended Creator Loop
14
+
15
+ Before mutating routing or the full body, make sure the creator trust loop is in
16
+ place:
17
+
18
+ ```bash
19
+ selftune eval generate --skill <name> --skill-path <path>
20
+ selftune eval unit-test --skill <name> --generate --skill-path <path>
21
+ selftune evolve body --skill <name> --skill-path <path> --target <target> --dry-run --validation-mode replay
22
+ selftune grade baseline --skill <name> --skill-path <path>
23
+ ```
24
+
25
+ If replay validation or the baseline is still missing, prefer filling that gap
26
+ before live deployment. Body and routing evolution are much harder to trust than
27
+ description-only changes when the creator loop is incomplete.
28
+
13
29
  ## Options
14
30
 
15
31
  | Flag | Description | Default |
@@ -26,6 +42,7 @@ selftune evolve body --skill <name> --skill-path <path> --target <target> [optio
26
42
  | `--max-iterations <n>` | Maximum refinement iterations | 3 |
27
43
  | `--task-description <text>` | Context for the evolution goal | None |
28
44
  | `--validation-model <model>` | Model for trigger-check validation calls (overrides `--student-model` for validation) | None |
45
+ | `--validation-mode <mode>` | Validation strategy: `auto`, `replay`, or `judge` | `auto` |
29
46
  | `--teacher-effort <level>` | Effort level for teacher LLM: `low`, `medium`, `high`, `max` | `high` |
30
47
  | `--review` | Run `evolution-reviewer` subagent as Gate 4 before deployment | Off |
31
48
  | `--few-shot <paths>` | Comma-separated paths to example SKILL.md files | None |
@@ -51,7 +68,7 @@ Every proposal passes through three sequential gates:
51
68
  | Gate | Type | What it checks | Cost |
52
69
  | ----------------------------- | ----------- | ----------------------------------------------------------------------------------------------- | -------- |
53
70
  | **Gate 1: Structural** | Pure code | YAML frontmatter present, `# Title` exists, `## Workflow Routing` preserved if original had one | Free |
54
- | **Gate 2: Trigger Accuracy** | Student LLM | YES/NO trigger check per eval entry on the extracted description | Cheap |
71
+ | **Gate 2: Trigger Accuracy** | Replay or student LLM | Runtime replay when available; otherwise YES/NO trigger check per eval entry | Cheap |
55
72
  | **Gate 3: Quality** | Student LLM | Body clarity and completeness score (0.0-1.0) | Cheap |
56
73
  | **Gate 4: Reviewer** (opt-in) | Subagent | `evolution-reviewer` multi-turn review — reads files, checks evidence, APPROVE/REJECT verdict | Moderate |
57
74
 
@@ -141,6 +158,25 @@ Few-shot examples from `--few-shot` paths provide structural guidance.
141
158
  Each gate runs in sequence. If a gate fails, the teacher receives the
142
159
  failure details and generates a refined proposal.
143
160
 
161
+ ### Validation Mode (`--validation-mode`)
162
+
163
+ `evolve body` uses the same validation contract as `evolve`:
164
+
165
+ | Mode | Behavior |
166
+ | -------- | ------------------------------------------------------------------------ |
167
+ | `auto` | Try replay-backed validation first; fall back to LLM judge if unavailable |
168
+ | `replay` | Replay engine only; error if no replay fixture or runner is available |
169
+ | `judge` | LLM judge only |
170
+
171
+ When replay is available, selftune stages the candidate skill content into a
172
+ temporary local registry before running the real host/runtime replay. Claude
173
+ Code uses `.claude/skills`, Codex uses `.agents/skills`, and OpenCode uses
174
+ `.opencode/skills`. Routing targets stage the candidate `## Workflow Routing`
175
+ section; body targets stage the full candidate body while preserving
176
+ frontmatter and title. When replay is not available, `auto` falls back to the
177
+ LLM judge and records the `validation_fallback_reason` in audit/evidence
178
+ output.
179
+
144
180
  ### 6. Deploy or Preview
145
181
 
146
182
  If `--dry-run`, prints the proposal without deploying. Otherwise:
@@ -164,6 +200,10 @@ If `--dry-run`, prints the proposal without deploying. Otherwise:
164
200
 
165
201
  > `selftune evolve body --skill pptx --skill-path /path/SKILL.md --target body --teacher-model opus --student-model haiku`
166
202
 
203
+ **"Force replay-only validation for a routing change"**
204
+
205
+ > `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target routing --validation-mode replay`
206
+
167
207
  **"Preview what would change"**
168
208
 
169
209
  > Always start with `--dry-run` to review the proposal before deploying.
@@ -17,7 +17,7 @@ selftune grade --skill <name> [options]
17
17
  | `--expectations "..."` | Explicit expectations (semicolon-separated) | Auto-derived |
18
18
  | `--evals-json <path>` | Pre-built eval set JSON file | None |
19
19
  | `--eval-id <n>` | Specific eval ID to grade from the eval set | None |
20
- | `--agent <name>` | Agent CLI to use (claude, codex, opencode) | Auto-detected |
20
+ | `--agent <name>` | Agent CLI to use (claude, codex, opencode, pi) | Auto-detected |
21
21
 
22
22
  ## Output Format
23
23
 
@@ -89,9 +89,12 @@ which selftune
89
89
  If `selftune` is not on PATH, install it:
90
90
 
91
91
  ```bash
92
- npm install -g selftune
92
+ npx skills add selftune-dev/selftune
93
93
  ```
94
94
 
95
+ If you manage the CLI directly instead of using the skill installer, use
96
+ `npm install -g selftune` or `bun add -g selftune`.
97
+
95
98
  ### 2. Check Existing Config
96
99
 
97
100
  ```bash
@@ -172,7 +175,7 @@ selftune cline install # creates hook scripts
172
175
  selftune pi install # creates extension hook scripts
173
176
  ```
174
177
 
175
- Use `--dry-run` first if the user wants to preview. See `Workflows/PlatformHooks.md`
178
+ Use `--dry-run` first if the user wants to preview. See `workflows/PlatformHooks.md`
176
179
  for platform-specific details.
177
180
 
178
181
  **Batch ingest** fallback for platforms without real-time hooks or to backfill history:
@@ -415,8 +418,9 @@ retrying with `selftune init --alpha --alpha-email <email> --force`.
415
418
 
416
419
  **User asks to set up or initialize selftune**
417
420
 
418
- > Run `which selftune` to check installation. If missing, install with
419
- > `npm install -g selftune`. Run `selftune init`, then verify with
421
+ > Run `which selftune` to check installation. If missing, install or refresh with
422
+ > `npx skills add selftune-dev/selftune`. If the user manages the CLI directly,
423
+ > use `npm install -g selftune` or `bun add -g selftune`. Run `selftune init`, then verify with
420
424
  > `selftune doctor`. Report results to the user.
421
425
 
422
426
  **User wants alpha enrollment**
@@ -50,6 +50,7 @@ proposalModel = haiku
50
50
  | `--max-auto-grade <n>` | Max ungraded skills to auto-grade per run (0 to disable) | `5` |
51
51
  | `--loop` | Run as a long-lived process that cycles continuously | Off |
52
52
  | `--loop-interval <seconds>` | Pause between cycles (minimum 60) | `3600` |
53
+ | `--help` | Show command help | Off |
53
54
 
54
55
  ## Default Behavior
55
56
 
@@ -57,7 +58,12 @@ proposalModel = haiku
57
58
  - Auto-grade up to 5 ungraded skills that have session data (enables evolution on first run after ingest)
58
59
  - Prioritize critical/warning/ungraded skills with real missed-query signal
59
60
  - Deploy validated low-risk description changes automatically
60
- - Watch recent deployments and roll back regressions automatically
61
+ - Auto-grade and write grading baselines for freshly deployed skills
62
+ - Generate review-first new skill proposals from strong workflow patterns
63
+ - Watch recent deployments (including freshly deployed skills in same run) and roll back regressions automatically
64
+ - Monitor grade regression alongside trigger regression during watch
65
+ - Upload personal telemetry to cloud (alpha users)
66
+ - Flush staged creator-directed contribution signals for opted-in skills
61
67
 
62
68
  Use `--review-required` only when you want a stricter policy for a specific run.
63
69
 
@@ -111,6 +117,7 @@ Machine-readable JSON with the summary fields plus a `decisions` array containin
111
117
  - `skill`, `action`, `reason`
112
118
  - `deployed`, `evolveReason`, `validation` (before/after pass rates, improved flag) — when evolved
113
119
  - `alert`, `rolledBack`, `passRate`, `recommendation` — when watched
120
+ - `freshlyWatchedSkills` — array of skill names that were deployed and watched in the same run
114
121
 
115
122
  This is the recommended runtime for recurring autonomous scheduling.
116
123
 
@@ -162,8 +169,11 @@ In autonomous mode, orchestrate calls sub-workflows in this fixed order:
162
169
  2. **Status** — compute skill health using existing grade results (reads `grading.json` outputs from previous sessions)
163
170
  3. **Auto-grade** — grade up to `--max-auto-grade` (default 5) ungraded skills that have session data but no grades yet. Skipped during `--dry-run` (grading makes LLM calls). After grading, status is recomputed so candidate selection sees updated grades. Fail-open: individual grading errors are logged but never block the loop.
164
171
  4. **Evolve** — run evolution on selected candidates (pre-flight is skipped; Pareto mode uses 3 candidates; cheap-loop uses `haiku` for proposal + validation and `sonnet` for the final gate; adaptive gate escalation promotes risky proposals to `opus` + `high` effort; baseline and token-efficiency stay off)
165
- 5. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
166
- 6. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
172
+ 5. **Post-deploy grade + baseline** — for each freshly deployed skill, grade the most recent session and write a grading baseline to SQLite (`grading_baselines` table). The baseline records the measured pass rate and sample size, anchoring future grade regression detection. Fail-open: individual grading errors are logged but never block the loop.
173
+ 6. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback). Skills freshly deployed in this run are included in the watch set immediately, so they are monitored in the same orchestrate cycle rather than waiting for the next run. These appear in `freshlyWatchedSkills` in the output. Grade watch (`enableGradeWatch: true`) runs alongside trigger regression for all watched skills.
174
+ 7. **Workflow proposals** — discover repeated multi-skill patterns and create review-first `new_skill` proposals when a workflow is strong enough to merit codification. These are never auto-deployed; they are surfaced as proposals for review.
175
+ 8. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
176
+ 9. **Contribution relay flush** — if an API key is configured, flush any staged creator-directed contribution signals for opted-in skills. Fail-open: relay errors never block the orchestrate loop. Respects `--dry-run`.
167
177
 
168
178
  When orchestrate invokes evolve for a selected candidate, it always passes
169
179
  `confidenceThreshold: 0.6` and `maxIterations: 3`, plus the autonomous evolve
@@ -4,7 +4,7 @@ Generate ready-to-use scheduling examples for automating selftune with
4
4
  standard system tools. This is the **primary automation path** — it works
5
5
  on any machine without requiring a specific agent runtime.
6
6
 
7
- For OpenClaw-specific scheduling, see `Workflows/Cron.md`.
7
+ For OpenClaw-specific scheduling, see `workflows/Cron.md`.
8
8
 
9
9
  ## When to Use
10
10
 
@@ -51,7 +51,7 @@ Outputs examples for all three scheduling systems (cron, launchd, systemd).
51
51
 
52
52
  ## Alias
53
53
 
54
- `selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `Workflows/Cron.md` for the full cron workflow reference.
54
+ `selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `workflows/Cron.md` for the full cron workflow reference.
55
55
 
56
56
  ## PATH Resolution (All Platforms)
57
57
 
@@ -69,4 +69,4 @@ environments that don't include homebrew, bun, or node binary locations.
69
69
  - **User wants quick setup on a Linux server** -- Run `selftune schedule --install --format cron`.
70
70
  - **User wants setup on macOS** -- Run `selftune schedule --install --format launchd`.
71
71
  - **User wants setup on a systemd-based server** -- Run `selftune schedule --install --format systemd`.
72
- - **User mentions OpenClaw** -- Use `selftune cron setup --platform openclaw` for the OpenClaw scheduler adapter. The default product path is still `selftune schedule --install`. See `Workflows/Cron.md`.
72
+ - **User mentions OpenClaw** -- Use `selftune cron setup --platform openclaw` for the OpenClaw scheduler adapter. The default product path is still `selftune schedule --install`. See `workflows/Cron.md`.