selftune 0.2.31 → 0.2.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +83 -56
  2. package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
  3. package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
  5. package/apps/local-dashboard/dist/index.html +3 -3
  6. package/cli/selftune/command-surface.ts +613 -2
  7. package/cli/selftune/create/baseline.ts +429 -0
  8. package/cli/selftune/create/check.ts +35 -0
  9. package/cli/selftune/create/init.ts +115 -0
  10. package/cli/selftune/create/package-candidate-state.ts +771 -0
  11. package/cli/selftune/create/package-evaluator.ts +710 -0
  12. package/cli/selftune/create/package-fingerprint.ts +142 -0
  13. package/cli/selftune/create/package-search.ts +377 -0
  14. package/cli/selftune/create/publish.ts +431 -0
  15. package/cli/selftune/create/readiness.ts +495 -0
  16. package/cli/selftune/create/replay.ts +330 -0
  17. package/cli/selftune/create/report.ts +74 -0
  18. package/cli/selftune/create/scaffold.ts +121 -0
  19. package/cli/selftune/create/skills-ref-adapter.ts +177 -0
  20. package/cli/selftune/create/status.ts +33 -0
  21. package/cli/selftune/create/templates.ts +249 -0
  22. package/cli/selftune/cron/setup.ts +1 -1
  23. package/cli/selftune/dashboard-action-events.ts +4 -1
  24. package/cli/selftune/dashboard-action-result.ts +789 -24
  25. package/cli/selftune/dashboard-action-stream.ts +80 -0
  26. package/cli/selftune/dashboard-contract.ts +146 -3
  27. package/cli/selftune/dashboard-server.ts +5 -4
  28. package/cli/selftune/eval/hooks-to-evals.ts +58 -35
  29. package/cli/selftune/eval/synthetic-evals.ts +145 -17
  30. package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
  31. package/cli/selftune/evolution/evolve-body.ts +9 -36
  32. package/cli/selftune/evolution/evolve.ts +8 -72
  33. package/cli/selftune/evolution/stopping-criteria.ts +5 -13
  34. package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
  35. package/cli/selftune/evolution/validate-host-replay.ts +115 -15
  36. package/cli/selftune/improve.ts +206 -0
  37. package/cli/selftune/index.ts +123 -6
  38. package/cli/selftune/init.ts +1 -1
  39. package/cli/selftune/localdb/queries/dashboard.ts +30 -0
  40. package/cli/selftune/localdb/schema.ts +52 -0
  41. package/cli/selftune/monitoring/watch.ts +257 -23
  42. package/cli/selftune/orchestrate/execute.ts +300 -1
  43. package/cli/selftune/orchestrate/finalize.ts +14 -0
  44. package/cli/selftune/orchestrate/plan.ts +22 -5
  45. package/cli/selftune/orchestrate/prepare.ts +59 -4
  46. package/cli/selftune/orchestrate/report.ts +1 -1
  47. package/cli/selftune/orchestrate.ts +34 -1
  48. package/cli/selftune/publish.ts +35 -0
  49. package/cli/selftune/routes/actions.ts +81 -15
  50. package/cli/selftune/routes/overview.ts +1 -1
  51. package/cli/selftune/routes/skill-report.ts +147 -2
  52. package/cli/selftune/run.ts +18 -0
  53. package/cli/selftune/schedule.ts +3 -3
  54. package/cli/selftune/search-run.ts +703 -0
  55. package/cli/selftune/status.ts +35 -11
  56. package/cli/selftune/testing-readiness.ts +431 -40
  57. package/cli/selftune/types.ts +316 -0
  58. package/cli/selftune/utils/eval-readiness.ts +1 -0
  59. package/cli/selftune/utils/json-output.ts +11 -0
  60. package/cli/selftune/utils/lifecycle-surface.ts +48 -0
  61. package/cli/selftune/utils/query-filter.ts +82 -1
  62. package/cli/selftune/utils/tui.ts +85 -2
  63. package/cli/selftune/verify.ts +205 -0
  64. package/cli/selftune/workflows/proposals.ts +1 -1
  65. package/cli/selftune/workflows/skill-scaffold.ts +141 -63
  66. package/cli/selftune/workflows/workflows.ts +4 -4
  67. package/package.json +1 -1
  68. package/skill/SKILL.md +148 -85
  69. package/skill/references/cli-quick-reference.md +16 -1
  70. package/skill/references/creator-playbook.md +31 -10
  71. package/skill/workflows/Baseline.md +8 -9
  72. package/skill/workflows/Contributions.md +4 -4
  73. package/skill/workflows/Create.md +173 -0
  74. package/skill/workflows/CreateTestDeploy.md +34 -30
  75. package/skill/workflows/Cron.md +2 -2
  76. package/skill/workflows/Dashboard.md +3 -3
  77. package/skill/workflows/Evals.md +13 -7
  78. package/skill/workflows/Evolve.md +75 -32
  79. package/skill/workflows/EvolveBody.md +22 -15
  80. package/skill/workflows/Hook.md +1 -1
  81. package/skill/workflows/Improve.md +168 -0
  82. package/skill/workflows/Initialize.md +3 -3
  83. package/skill/workflows/Orchestrate.md +49 -12
  84. package/skill/workflows/Publish.md +100 -0
  85. package/skill/workflows/Run.md +72 -0
  86. package/skill/workflows/Schedule.md +2 -2
  87. package/skill/workflows/SearchRun.md +89 -0
  88. package/skill/workflows/SignalsDashboard.md +2 -2
  89. package/skill/workflows/UnitTest.md +13 -4
  90. package/skill/workflows/Verify.md +136 -0
  91. package/skill/workflows/Watch.md +114 -47
  92. package/skill/workflows/Workflows.md +13 -8
  93. package/apps/local-dashboard/dist/assets/index-B7v_o1WC.js +0 -15
  94. package/apps/local-dashboard/dist/assets/index-CrO77SVi.css +0 -1
  95. package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1
package/skill/SKILL.md CHANGED
@@ -2,26 +2,28 @@
2
2
  name: selftune
3
3
  description: >
4
4
  Self-improving skills toolkit that watches real agent sessions, detects missed
5
- triggers, grades execution quality, and evolves skill descriptions to match how
6
- users actually talk. Use when grading sessions, generating evals, evolving skill
7
- descriptions or routing tables, discovering reusable workflows, scaffolding new
8
- workflow skills, checking skill health, viewing the dashboard, ingesting sessions
9
- from other platforms, or running autonomous improvement loops.
5
+ triggers, grades execution quality, and evolves skills through a package
6
+ evaluation pipeline (replay, baseline, grading, unit tests, and post-deploy
7
+ watch). Use when verifying skill packages, publishing improvements, evolving
8
+ skill descriptions or routing tables, discovering reusable workflows, scaffolding
9
+ new workflow skills, checking skill health, viewing the dashboard, ingesting
10
+ sessions from other platforms, or running autonomous improvement loops.
10
11
  Make sure to use this skill whenever the user mentions skill improvement, skill
11
12
  performance, skill triggers, skill evolution, skill health, undertriggering,
12
13
  overtriggering, session grading, or wants to know how their skills are doing —
13
14
  even if they don't say "selftune" explicitly.
14
15
  metadata:
15
16
  author: selftune-dev
16
- version: 0.2.31
17
+ version: 0.2.32
17
18
  category: developer-tools
18
19
  ---
19
20
 
20
21
  # selftune
21
22
 
22
23
  Observe real agent sessions, detect missed triggers, grade execution quality,
23
- evolve skill descriptions toward the language real users actually use, and
24
- scaffold workflow skills from repeated telemetry patterns.
24
+ evolve skills through package evaluation (replay, baseline, grading, body,
25
+ unit tests, and post-deploy watch), and scaffold workflow skills from
26
+ repeated telemetry patterns.
25
27
 
26
28
  **You are the operator.** The user installed this skill so YOU can manage their
27
29
  skill health autonomously. They will say things like "set up selftune",
@@ -34,6 +36,43 @@ If `~/.selftune/config.json` does not exist, read `workflows/Initialize.md`
34
36
  first. The CLI must be installed (`selftune` on PATH) before other commands
35
37
  will work. Do not proceed with other commands until initialization is complete.
36
38
 
39
+ ## Primary Lifecycle
40
+
41
+ Default to this lifecycle unless the user explicitly asks for a low-level
42
+ workflow:
43
+
44
+ 1. `status`
45
+ - use `selftune status`
46
+ - for draft packages, use `selftune create status --skill-path <path>`
47
+
48
+ 2. `verify`
49
+ - use `selftune verify --skill-path <path>`
50
+ - if verify reports missing readiness or evidence, follow the returned next
51
+ low-level command instead of rerunning the full chain
52
+
53
+ 3. `publish`
54
+ - for draft packages, use `selftune publish --skill-path <path>`
55
+ - for already-live skills, `publish` usually means a validated `Improve`
56
+ action plus `Watch`
57
+
58
+ 4. `improve`
59
+ - use `selftune improve --skill <name> --skill-path <path>`
60
+ - let `--scope auto` choose bounded package search automatically when the
61
+ skill already has package evidence or a draft package manifest
62
+ - set `--scope description|routing|body|package` when the measured gap is
63
+ already clear and you want to force the mutation surface
64
+ - use `--scope package` when the problem spans routing and body together or
65
+ you want measured frontier comparison before deciding what to publish
66
+ - omit `--dry-run` when you want the winning package candidate promoted back
67
+ into the draft automatically
68
+
69
+ 5. `run`
70
+ - use `selftune run`
71
+
72
+ Treat `eval generate`, `unit-test`, `replay`, `baseline`, `watch`, and
73
+ body-specific evolution as advanced supporting workflows unless the user asks
74
+ for them directly or the default lifecycle fails.
75
+
37
76
  ## Command Execution Policy
38
77
 
39
78
  ```bash
@@ -43,7 +82,8 @@ selftune <command> [options]
43
82
  Commands vary in output format:
44
83
 
45
84
  - **JSON by default:** `selftune doctor` and `selftune watch` emit structured JSON on stdout.
46
- - **Text by default:** `selftune status`, `selftune last`, `selftune orchestrate`, and `selftune evolve` print human-readable text.
85
+ - **Text by default:** `selftune status`, `selftune last`, `selftune verify`, `selftune publish`, and `selftune improve` print human-readable text when stdout is a TTY.
86
+ - **Mixed runtime output:** `selftune run` / `selftune orchestrate` emit JSON on stdout and a human report on stderr.
47
87
  - **JSON opt-in:** `selftune sync --json` enables structured JSON output.
48
88
  - **Server:** `selftune dashboard` starts a local SPA server — it does not emit data.
49
89
 
@@ -54,70 +94,78 @@ next step from prose.
54
94
  Run `selftune <command> --help` for exact flags. Read
55
95
  `references/cli-quick-reference.md` when you need the full flag reference.
56
96
 
57
- ## Creator Trust Loop
97
+ ## Package Evaluation Pipeline (Creator Trust Loop)
58
98
 
59
- When the user wants to improve a skill, default to this creator loop before
60
- jumping straight to mutation:
99
+ When the user wants to improve a skill, default to this package evaluation
100
+ pipeline before jumping straight to mutation. Each step builds measured
101
+ evidence that the package is ready to publish:
61
102
 
62
- 1. `selftune eval generate --skill <name> --skill-path <path>`
63
- 2. `selftune eval unit-test --skill <name> --generate --skill-path <path>`
64
- 3. `selftune evolve --skill <name> --skill-path <path> --dry-run --validation-mode replay`
65
- 4. `selftune grade baseline --skill <name> --skill-path <path>`
66
- 5. `selftune evolve --skill <name> --skill-path <path> --with-baseline`
67
- 6. then `selftune watch --skill <name>`
103
+ - `draft` the package exists but is still incomplete
104
+ - `verify_blocked` the draft is still in one of the concrete readiness states: `needs_spec_validation`, `needs_package_resources`, `needs_evals`, `needs_unit_tests`, `needs_routing_replay`, or `needs_baseline`
105
+ - `verified` the trust gates pass and the skill is ready to ship
106
+ - `published` the skill was shipped successfully
107
+ - `watching` post-deploy monitoring is active
108
+ - `needs_improvement` measured evidence shows trigger, routing, body, or value gaps
109
+ - `unhealthy` — hooks, telemetry, config, or selftune itself is broken
68
110
 
69
111
  If the user asks "how do I know this skill works?" or "can I trust this skill
70
- yet?", start with this loop, then use `selftune status`, the dashboard, or the
71
- skill report to explain what is still missing, whether the skill is ready to
72
- deploy, or whether it is already being watched live.
112
+ yet?", start with this pipeline, then use `selftune status`, the dashboard, or
113
+ the skill report to explain what is still missing, whether the package is ready
114
+ to publish, or whether it is already being watched live.
73
115
 
74
116
  ## Workflow Routing
75
117
 
76
- | Trigger keywords | Workflow | File |
77
- | --- | --- | --- |
78
- | create test deploy, creator loop, ship skill, ready to deploy, can I trust this skill, how do I know this skill works | CreateTestDeploy | workflows/CreateTestDeploy.md |
79
- | grade, score, evaluate, assess session, auto-grade | Grade | workflows/Grade.md |
80
- | evals, eval set, undertriggering, skill stats, eval generate | Evals | workflows/Evals.md |
81
- | evolve, improve, optimize skills, make skills better, triggers, catch more queries, apply proposal, apply contributor proposal | Evolve | workflows/Evolve.md |
82
- | evolve body, evolve routing, full body evolution, rewrite skill, teacher student | EvolveBody | workflows/EvolveBody.md |
83
- | evolve rollback, undo, restore, revert evolution, go back, undo last change | Rollback | workflows/Rollback.md |
84
- | watch, monitor, regression, post-deploy, keep an eye on | Watch | workflows/Watch.md |
85
- | doctor, health, hooks, broken, diagnose, not working, something wrong | Doctor | workflows/Doctor.md |
86
- | ingest, import, codex logs, opencode, openclaw, pi, wrap codex | Ingest | workflows/Ingest.md |
87
- | replay, backfill, claude transcripts, historical sessions | Replay | workflows/Replay.md |
88
- | contributions, sharing preferences, opt in/out creator sharing, approve/revoke contributions | Contributions | workflows/Contributions.md |
89
- | creator contributions, selftune.contribute.json, enable/disable creator contribution | CreatorContributions | workflows/CreatorContributions.md |
90
- | signals dashboard, contributor signals, signals page, community dashboard, community data, contributor stats, signal health, how are signals, how is community | SignalsDashboard | workflows/SignalsDashboard.md |
91
- | contribute, share, export bundle, export data, anonymized, give back | Contribute | workflows/Contribute.md |
92
- | init, setup, set up, bootstrap, first time, install, configure selftune, alpha, enroll | Initialize | workflows/Initialize.md |
93
- | cron, schedule, automate evolution, run automatically | Cron | workflows/Cron.md |
94
- | schedule, selftune schedule, launchd, systemd, crontab, automation setup | Schedule | workflows/Schedule.md |
95
- | auto-activate, suggestions, activation rules, nag, why suggest | AutoActivation | workflows/AutoActivation.md |
96
- | dashboard, visual, open dashboard, show dashboard, serve dashboard | Dashboard | workflows/Dashboard.md |
97
- | evolution memory, session continuity, what happened last | EvolutionMemory | workflows/EvolutionMemory.md |
98
- | grade baseline, baseline lift, adds value, skill value, no-skill comparison | Baseline | workflows/Baseline.md |
99
- | eval unit-test, skill test, test skill, generate tests, run tests | UnitTest | workflows/UnitTest.md |
100
- | eval composability, co-occurrence, skill conflicts, family overlap, sibling confusion | Composability | workflows/Composability.md |
101
- | eval import, skillsbench, external evals, benchmark tasks | ImportSkillsBench | workflows/ImportSkillsBench.md |
102
- | telemetry, analytics, disable analytics, opt out, tracking, privacy | Telemetry | workflows/Telemetry.md |
103
- | orchestrate, autonomous, full loop, improve all skills, run selftune loop | Orchestrate | workflows/Orchestrate.md |
104
- | sync, refresh, source truth, rescan sessions | Sync | workflows/Sync.md |
105
- | badge, readme badge, skill badge, health badge | Badge | workflows/Badge.md |
106
- | workflows, discover workflows, scaffold workflow skill, build skill from logs | Workflows | workflows/Workflows.md |
107
- | alpha upload, upload data, send alpha data, manual upload | AlphaUpload | workflows/AlphaUpload.md |
108
- | recover, rebuild sqlite, recover db, legacy backfill | Recover | workflows/Recover.md |
109
- | quickstart, getting started, onboard, first time setup, new user | Quickstart | workflows/Quickstart.md |
110
- | uninstall, remove selftune, clean up, teardown | Uninstall | workflows/Uninstall.md |
111
- | repair, rebuild usage, fix skill usage, trustworthy usage | RepairSkillUsage | workflows/RepairSkillUsage.md |
112
- | export canonical, canonical export, canonical telemetry, push payload | ExportCanonical | workflows/ExportCanonical.md |
113
- | hook, run hook, invoke hook, manual hook, debug hook | Hook | workflows/Hook.md |
114
- | codex/opencode/cline/pi hooks, platform hooks, non-claude hooks, multi-agent | PlatformHooks | workflows/PlatformHooks.md |
115
- | registry, distribute, push/install/sync/rollback skill, team skills | Registry | workflows/Registry.md |
116
- | export, dump, jsonl, export sqlite, debug export | Export | _(direct: `selftune export`)_ |
117
- | status, health summary, skill health, how are skills, run selftune | Status | _(direct: `selftune status`)_ |
118
- | last, last session, recent session, what happened | Last | _(direct: `selftune last`)_ |
119
-
120
- Workflows Grade, Evolve, Watch, and Ingest also run autonomously via `selftune orchestrate`.
118
+ | Trigger keywords | Workflow | File |
119
+ | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------- | --------------------------------- |
120
+ | create skill, new skill package, author skill, bootstrap skill, scaffold package, benchmark report, package report, publish report | Create | workflows/Create.md |
121
+ | verify skill, creator loop, can I trust this skill, how do I know this skill works, test this skill, ready to ship, ready to deploy | Verify | workflows/Verify.md |
122
+ | publish skill, ship skill, deploy skill, go live, release skill | Publish | workflows/Publish.md |
123
+ | search run, package frontier, candidate search, bounded package evolution, compare package candidates, optimize package, improve routing and body together, bounded evolution | SearchRun | workflows/SearchRun.md |
124
+ | grade, score, evaluate, assess session, auto-grade | Grade | workflows/Grade.md |
125
+ | evals, eval set, undertriggering, skill stats, eval generate | Evals | workflows/Evals.md |
126
+ | improve, optimize skills, make skills better, triggers, catch more queries, apply proposal, apply contributor proposal | Improve | workflows/Improve.md |
127
+ | evolve description, description-only evolution, improve trigger wording | Evolve | workflows/Evolve.md |
128
+ | evolve body, evolve routing, full body evolution, rewrite skill, teacher student | EvolveBody | workflows/EvolveBody.md |
129
+ | evolve rollback, undo, restore, revert evolution, go back, undo last change | Rollback | workflows/Rollback.md |
130
+ | watch, monitor, regression, post-deploy, keep an eye on | Watch | workflows/Watch.md |
131
+ | doctor, health, hooks, broken, diagnose, not working, something wrong | Doctor | workflows/Doctor.md |
132
+ | ingest, import, codex logs, opencode, openclaw, pi, wrap codex | Ingest | workflows/Ingest.md |
133
+ | replay, backfill, claude transcripts, historical sessions | Replay | workflows/Replay.md |
134
+ | contributions, sharing preferences, opt in/out creator sharing, approve/revoke contributions | Contributions | workflows/Contributions.md |
135
+ | creator contributions, selftune.contribute.json, enable/disable creator contribution | CreatorContributions | workflows/CreatorContributions.md |
136
+ | signals dashboard, contributor signals, signals page, community dashboard, community data, contributor stats, signal health, how are signals, how is community | SignalsDashboard | workflows/SignalsDashboard.md |
137
+ | contribute, share, export bundle, export data, anonymized, give back | Contribute | workflows/Contribute.md |
138
+ | init, setup, set up, bootstrap, first time, install, configure selftune, alpha, enroll | Initialize | workflows/Initialize.md |
139
+ | cron, schedule, automate evolution, run automatically | Cron | workflows/Cron.md |
140
+ | schedule, selftune schedule, launchd, systemd, crontab, automation setup | Schedule | workflows/Schedule.md |
141
+ | auto-activate, suggestions, activation rules, nag, why suggest | AutoActivation | workflows/AutoActivation.md |
142
+ | dashboard, visual, open dashboard, show dashboard, serve dashboard | Dashboard | workflows/Dashboard.md |
143
+ | evolution memory, session continuity, what happened last | EvolutionMemory | workflows/EvolutionMemory.md |
144
+ | grade baseline, baseline lift, adds value, skill value, no-skill comparison | Baseline | workflows/Baseline.md |
145
+ | eval unit-test, skill test, test skill, generate tests, run tests | UnitTest | workflows/UnitTest.md |
146
+ | eval composability, co-occurrence, skill conflicts, family overlap, sibling confusion | Composability | workflows/Composability.md |
147
+ | eval import, skillsbench, external evals, benchmark tasks | ImportSkillsBench | workflows/ImportSkillsBench.md |
148
+ | telemetry, analytics, disable analytics, opt out, tracking, privacy | Telemetry | workflows/Telemetry.md |
149
+ | orchestrate, autonomous, full loop, improve all skills, run selftune, run selftune loop, run with package search, automatic package improvement | Run | workflows/Run.md |
150
+ | sync, refresh, source truth, rescan sessions | Sync | workflows/Sync.md |
151
+ | badge, readme badge, skill badge, health badge | Badge | workflows/Badge.md |
152
+ | workflows, discover workflows, scaffold workflow skill, build skill from logs | Workflows | workflows/Workflows.md |
153
+ | alpha upload, upload data, send alpha data, manual upload | AlphaUpload | workflows/AlphaUpload.md |
154
+ | recover, rebuild sqlite, recover db, legacy backfill | Recover | workflows/Recover.md |
155
+ | quickstart, getting started, onboard, first time setup, new user | Quickstart | workflows/Quickstart.md |
156
+ | uninstall, remove selftune, clean up, teardown | Uninstall | workflows/Uninstall.md |
157
+ | repair, rebuild usage, fix skill usage, trustworthy usage | RepairSkillUsage | workflows/RepairSkillUsage.md |
158
+ | export canonical, canonical export, canonical telemetry, push payload | ExportCanonical | workflows/ExportCanonical.md |
159
+ | hook, run hook, invoke hook, manual hook, debug hook | Hook | workflows/Hook.md |
160
+ | codex/opencode/cline/pi hooks, platform hooks, non-claude hooks, multi-agent | PlatformHooks | workflows/PlatformHooks.md |
161
+ | registry, distribute, push/install/sync/rollback skill, team skills | Registry | workflows/Registry.md |
162
+ | export, dump, jsonl, export sqlite, debug export | Export | _(direct: `selftune export`)_ |
163
+ | status, health summary, skill health, how are skills, run selftune | Status | _(direct: `selftune status`)_ |
164
+ | last, last session, recent session, what happened | Last | _(direct: `selftune last`)_ |
165
+
166
+ Workflows Grade, Improve, Watch, and Ingest also run autonomously via `selftune orchestrate`.
167
+ When package evaluation evidence exists, `selftune orchestrate` (aliased as `selftune run`)
168
+ can automatically select package-level bounded search instead of description-level evolve.
121
169
 
122
170
  ## Interactive Configuration
123
171
 
@@ -130,12 +178,27 @@ tier reference, and quick-path rules.
130
178
  selftune bundles focused agents in `agents/`. Read the relevant agent file and
131
179
  follow its instructions — either inline or by spawning a subagent.
132
180
 
133
- | Trigger keywords | Agent file | When to use |
134
- | --- | --- | --- |
135
- | diagnose, root cause, why failing, debug performance | `agents/diagnosis-analyst.md` | Recurring low grades or unclear failures after doctor/status |
136
- | patterns, conflicts, cross-skill, overlap | `agents/pattern-analyst.md` | Skills overlap, misroute, or interfere |
137
- | review evolution, check proposal, safe to deploy | `agents/evolution-reviewer.md` | Before deploying high-stakes or marginal evolutions |
138
- | set up selftune, integrate, configure project | `agents/integration-guide.md` | Complex setup: monorepos, multi-skill, mixed-platform |
181
+ | Trigger keywords | Agent file | When to use |
182
+ | ---------------------------------------------------- | ------------------------------ | ------------------------------------------------------------ |
183
+ | diagnose, root cause, why failing, debug performance | `agents/diagnosis-analyst.md` | Recurring low grades or unclear failures after doctor/status |
184
+ | patterns, conflicts, cross-skill, overlap | `agents/pattern-analyst.md` | Skills overlap, misroute, or interfere |
185
+ | review evolution, check proposal, safe to deploy | `agents/evolution-reviewer.md` | Before deploying high-stakes or marginal evolutions |
186
+ | set up selftune, integrate, configure project | `agents/integration-guide.md` | Complex setup: monorepos, multi-skill, mixed-platform |
187
+
188
+ ## Advanced Workflows
189
+
190
+ Load these when the user explicitly asks for a low-level step, when the primary
191
+ lifecycle fails, or when debugging needs deeper evidence:
192
+
193
+ - `workflows/Evals.md`
194
+ - `workflows/UnitTest.md`
195
+ - `workflows/Baseline.md`
196
+ - `workflows/Replay.md`
197
+ - `workflows/Watch.md`
198
+ - `workflows/Evolve.md`
199
+ - `workflows/EvolveBody.md`
200
+ - `workflows/Composability.md`
201
+ - `workflows/ImportSkillsBench.md`
139
202
 
140
203
  ## Negative Examples
141
204
 
@@ -173,16 +236,16 @@ community contribution, signal sharing, opt in creator, creator UUID.
173
236
 
174
237
  Load these on demand — do not read unless needed for the current task:
175
238
 
176
- | Reference | When to read |
177
- | --- | --- |
178
- | `references/cli-quick-reference.md` | Need exact CLI flags beyond `--help` |
179
- | `references/troubleshooting.md` | Diagnosing common errors |
180
- | `references/examples.md` | Need step-by-step scenario walkthroughs |
181
- | `references/creator-playbook.md` | Publishing skills others install; before-ship vs after-ship creator loop |
182
- | `references/interactive-config.md` | Before mutating workflows |
183
- | `references/grading-methodology.md` | Grading sessions or interpreting grades |
184
- | `references/invocation-taxonomy.md` | Analyzing trigger coverage |
185
- | `references/logs.md` | Parsing or debugging log files |
186
- | `references/setup-patterns.md` | Complex platform-specific setup |
187
- | `references/version-history.md` | Checking what changed between versions |
188
- | `settings_snippet.json` | During initialization |
239
+ | Reference | When to read |
240
+ | ----------------------------------- | -------------------------------------------------------------------- |
241
+ | `references/cli-quick-reference.md` | Need exact CLI flags beyond `--help` |
242
+ | `references/troubleshooting.md` | Diagnosing common errors |
243
+ | `references/examples.md` | Need step-by-step scenario walkthroughs |
244
+ | `references/creator-playbook.md` | Publishing skills others install; before-ship vs after-ship pipeline |
245
+ | `references/interactive-config.md` | Before mutating workflows |
246
+ | `references/grading-methodology.md` | Grading sessions or interpreting grades |
247
+ | `references/invocation-taxonomy.md` | Analyzing trigger coverage |
248
+ | `references/logs.md` | Parsing or debugging log files |
249
+ | `references/setup-patterns.md` | Complex platform-specific setup |
250
+ | `references/version-history.md` | Checking what changed between versions |
251
+ | `settings_snippet.json` | During initialization |
@@ -20,9 +20,23 @@ selftune grade baseline --skill <name> --skill-path <path> [--eval-set <path>]
20
20
  selftune evolve --skill <name> --skill-path <path> [--dry-run] [--validation-mode auto|replay|judge]
21
21
  selftune evolve body --skill <name> --skill-path <path> --target <body|routing> [--dry-run]
22
22
  selftune evolve rollback --skill <name> --skill-path <path> [--proposal-id <id>]
23
+ selftune improve --skill <name> --skill-path <path> [--scope auto|description|routing|body|package] [--dry-run] [--validation-mode auto|replay|judge]
24
+
25
+ # Create group
26
+ selftune verify --skill-path <path> [--agent AGENT] [--eval-set PATH] [--no-auto-fix] [--json]
27
+ selftune publish --skill-path <path> [--no-watch] [--ignore-watch-alerts] [--json]
28
+ selftune search-run --skill-path <path> [--skill NAME] [--surface routing|body|both] [--max-candidates N] [--agent AGENT] [--eval-set PATH] [--apply-winner] [--json]
29
+ selftune create status --skill-path <path> [--json]
30
+ selftune create init --name <name> --description <text> [--output-dir PATH] [--force] [--json]
31
+ selftune create scaffold --from-workflow <id|index> [--output-dir PATH] [--skill-name NAME] [--description TEXT] [--write] [--force] [--json]
32
+ selftune create check --skill-path <path> [--json]
33
+ selftune create replay --skill-path <path> [--mode routing|package] [--agent AGENT] [--json]
34
+ selftune create baseline --skill-path <path> [--mode routing|package] [--agent AGENT] [--json]
35
+ selftune create report --skill-path <path> [--agent AGENT] [--eval-set PATH] [--json]
36
+ selftune create publish --skill-path <path> [--watch] [--ignore-watch-alerts] [--json]
23
37
 
24
38
  # Eval group
25
- selftune eval generate --skill <name> [--list-skills] [--stats] [--max N] [--seed N] [--output PATH] [--blend]
39
+ selftune eval generate --skill <name> [--list-skills] [--stats] [--max N] [--seed N] [--output PATH] [--agent AGENT] [--blend]
26
40
  selftune eval unit-test --skill <name> --tests <path> [--run-agent] [--generate]
27
41
  selftune eval import --dir <path> --skill <name> --output <path> [--match-strategy exact|fuzzy]
28
42
  selftune eval composability --skill <name> [--window N] [--telemetry-log <path>]
@@ -45,6 +59,7 @@ selftune telemetry [status|enable|disable]
45
59
  selftune export [TABLE...] [--output/-o DIR] [--since DATE]
46
60
 
47
61
  # Autonomous loop
62
+ selftune run [--dry-run] [--review-required] [--auto-approve] [--skill NAME] [--max-skills N] [--recent-window HOURS] [--sync-force] [--max-auto-grade N] [--loop] [--loop-interval SECS]
48
63
  selftune orchestrate [--dry-run] [--review-required] [--auto-approve] [--skill NAME] [--max-skills N] [--recent-window HOURS] [--sync-force] [--max-auto-grade N] [--loop] [--loop-interval SECS]
49
64
  selftune sync [--since DATE] [--dry-run] [--force] [--no-claude] [--no-codex] [--no-opencode] [--no-openclaw] [--no-pi] [--no-repair] [--json]
50
65
 
@@ -3,8 +3,9 @@
3
3
  Use this when you are publishing a skill other people will install.
4
4
 
5
5
  If the user wants the operational step-by-step loop from cold start to deploy,
6
- route first to `workflows/CreateTestDeploy.md`. Use this reference for the
7
- packaging and after-ship interpretation layer around that loop.
6
+ route first to `workflows/Verify.md` and `workflows/Publish.md`. Use this
7
+ reference for the packaging and after-ship interpretation layer around that
8
+ loop.
8
9
 
9
10
  The goal is simple:
10
11
 
@@ -39,20 +40,23 @@ Rule of thumb:
39
40
 
40
41
  ### Cold-start test and deploy the skill before publishing
41
42
 
42
- The default creator loop is now:
43
+ The default package evaluation pipeline is:
43
44
 
44
45
  ```bash
46
+ selftune verify --skill-path path/to/my-skill
45
47
  selftune eval generate --skill my-skill
48
+ selftune verify --skill-path path/to/my-skill
46
49
  selftune eval unit-test --skill my-skill --generate --skill-path path/to/SKILL.md
47
- selftune evolve --skill my-skill --skill-path path/to/SKILL.md --dry-run --validation-mode replay
48
- selftune grade baseline --skill my-skill --skill-path path/to/SKILL.md
49
- selftune evolve --skill my-skill --skill-path path/to/SKILL.md --with-baseline
50
- selftune watch --skill my-skill
50
+ selftune verify --skill-path path/to/my-skill
51
+ selftune create replay --skill-path path/to/my-skill --mode package
52
+ selftune create baseline --skill-path path/to/my-skill --mode package
53
+ selftune verify --skill-path path/to/my-skill
54
+ selftune publish --skill-path path/to/my-skill
51
55
  ```
52
56
 
53
- That same sequence is now packaged as the dedicated `CreateTestDeploy`
54
- workflow in the shipped selftune skill, while `Evals`, `UnitTest`, `Baseline`,
55
- `Evolve`, and `Watch` remain the atomic workflow docs for each individual step.
57
+ `verify` is the front door in that sequence. Evals, unit tests, replay, and
58
+ baseline remain the atomic supporting steps when the draft is still missing
59
+ evidence.
56
60
 
57
61
  The dashboard overview, per-skill report, and `selftune status` all read from that loop and show
58
62
  the next missing step directly, then flip to deploy-ready and watching states once the skill is shipped.
@@ -106,11 +110,28 @@ Actionable threshold today:
106
110
  - at least `10` total signals
107
111
  - at least `3` distinct contributor cohorts
108
112
 
113
+ ### Package-level improvement
114
+
115
+ When a skill has enough package evaluation evidence (accepted frontier
116
+ candidates, canonical package evaluations), `selftune orchestrate` can
117
+ automatically select package-level bounded search instead of description-only
118
+ evolve. You can also trigger this manually:
119
+
120
+ ```bash
121
+ selftune improve --skill my-skill --skill-path path/to/SKILL.md --scope package
122
+ ```
123
+
124
+ Package search generates bounded mutations on routing and body surfaces,
125
+ evaluates them against the accepted frontier parent through the package
126
+ evaluator, and applies the winning candidate. Watch evidence feeds back into
127
+ frontier selection, so post-deploy regressions inform future search runs.
128
+
109
129
  ### Interpret signal correctly
110
130
 
111
131
  - High missed counts with concentrated categories usually mean the **description/router** is wrong.
112
132
  - Low grades with decent trigger rate usually mean the **body/workflow/reference/tool split** is wrong.
113
133
  - Low-signal skills need more contributors before you trust a proposal.
134
+ - When both routing and body surfaces show weakness, `selftune improve --scope package` or automatic orchestrate scope selection can address them together.
114
135
 
115
136
  ## Fast Checklist
116
137
 
@@ -138,20 +138,19 @@ Report the interpretation to the user based on the lift value.
138
138
  Add `--with-baseline` to evolve commands to prevent wasting evolution
139
139
  cycles on skills that don't add value.
140
140
 
141
- ### 4. Canonical creator loop position
141
+ ### 4. Canonical pipeline position
142
142
 
143
- Baseline is the last pre-deploy check in the default creator loop:
143
+ Baseline is the last pre-deploy check in the package evaluation pipeline:
144
144
 
145
145
  ```bash
146
- selftune eval generate --skill <name>
147
- selftune eval unit-test --skill <name> --generate --skill-path <path>
148
- selftune evolve --skill <name> --skill-path <path> --dry-run --validation-mode replay
149
- selftune grade baseline --skill <name> --skill-path <path>
150
- selftune evolve --skill <name> --skill-path <path> --with-baseline
151
- selftune watch --skill <name>
146
+ selftune verify --skill-path <path>
147
+ selftune create baseline --skill-path <path> --mode package
148
+ selftune verify --skill-path <path>
149
+ selftune publish --skill-path <path>
152
150
  ```
153
151
 
154
- After that, the skill is ready for live deploy and then watch with much clearer trust evidence.
152
+ For already-published skills, `grade baseline` remains the explicit value gate
153
+ behind `evolve --with-baseline`.
155
154
 
156
155
  ## Common Patterns
157
156
 
@@ -56,16 +56,16 @@ selftune contributions upload [--dry-run] [--retry-failed] [--limit <n>]
56
56
 
57
57
  ## Automatic Flush via Orchestrate
58
58
 
59
- When `selftune orchestrate` runs, it automatically flushes any staged
59
+ When `selftune run` runs, it automatically flushes any staged
60
60
  creator-directed relay signals as Step 10 (after alpha upload). This means
61
61
  users who have opted in don't need to run `selftune contributions upload`
62
- manually — orchestrate handles it. The flush is fail-open and never blocks
63
- the orchestrate loop. An API key is required (alpha enrolled).
62
+ manually — the runtime handles it. The flush is fail-open and never blocks
63
+ the autonomous loop. An API key is required (alpha enrolled).
64
64
 
65
65
  ## Notes
66
66
 
67
67
  - This workflow now shows which installed skills are requesting creator-directed sharing via `selftune.contribute.json`.
68
- - Once approved, creator-directed contribution signals are staged locally during `selftune sync` / `selftune orchestrate`.
68
+ - Once approved, creator-directed contribution signals are staged locally during `selftune sync` / `selftune run`.
69
69
  - Use `selftune contributions upload` to flush staged rows to the creator-directed relay endpoint.
70
70
  - Relay upload is separate from `selftune alpha upload` and currently reuses the local cloud API key when available.
71
71
  - Use `selftune contribute` when the user explicitly wants to export/share an anonymized community bundle.