selftune 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +35 -35
  2. package/apps/local-dashboard/dist/assets/index-BZVLv70T.js +16 -0
  3. package/apps/local-dashboard/dist/assets/{index-CRtLkBTi.css → index-Bs3Y4ixf.css} +1 -1
  4. package/apps/local-dashboard/dist/assets/{vendor-react-BQH_6WrG.js → vendor-react-BXP54cYo.js} +4 -4
  5. package/apps/local-dashboard/dist/assets/{vendor-table-dK1QMLq9.js → vendor-table-DTF_SXoy.js} +1 -1
  6. package/apps/local-dashboard/dist/assets/{vendor-ui-CO2mrx6e.js → vendor-ui-CWU0d1wd.js} +66 -66
  7. package/apps/local-dashboard/dist/index.html +15 -15
  8. package/bin/selftune.cjs +1 -1
  9. package/cli/selftune/activation-rules.ts +37 -18
  10. package/cli/selftune/agent-guidance.ts +16 -16
  11. package/cli/selftune/alpha-identity.ts +1 -2
  12. package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
  13. package/cli/selftune/alpha-upload/flush.ts +2 -2
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +106 -3
  15. package/cli/selftune/auth/device-code.ts +32 -0
  16. package/cli/selftune/auto-update.ts +12 -0
  17. package/cli/selftune/badge/badge.ts +1 -0
  18. package/cli/selftune/canonical-export.ts +5 -0
  19. package/cli/selftune/claude-agents.ts +154 -0
  20. package/cli/selftune/contribute/bundle.ts +2 -0
  21. package/cli/selftune/contribute/contribute.ts +1 -0
  22. package/cli/selftune/cron/setup.ts +2 -2
  23. package/cli/selftune/dashboard-contract.ts +1 -1
  24. package/cli/selftune/dashboard-server.ts +11 -52
  25. package/cli/selftune/eval/hooks-to-evals.ts +13 -6
  26. package/cli/selftune/eval/import-skillsbench.ts +1 -0
  27. package/cli/selftune/eval/synthetic-evals.ts +2 -3
  28. package/cli/selftune/eval/unit-test.ts +1 -0
  29. package/cli/selftune/evolution/deploy-proposal.ts +1 -0
  30. package/cli/selftune/evolution/evolve-body.ts +93 -6
  31. package/cli/selftune/evolution/evolve.ts +0 -1
  32. package/cli/selftune/evolution/propose-body.ts +3 -2
  33. package/cli/selftune/evolution/propose-routing.ts +3 -2
  34. package/cli/selftune/evolution/refine-body.ts +3 -2
  35. package/cli/selftune/export.ts +1 -0
  36. package/cli/selftune/grading/auto-grade.ts +1 -0
  37. package/cli/selftune/grading/grade-session.ts +9 -0
  38. package/cli/selftune/hooks/auto-activate.ts +6 -0
  39. package/cli/selftune/hooks/evolution-guard.ts +12 -15
  40. package/cli/selftune/hooks/prompt-log.ts +1 -0
  41. package/cli/selftune/hooks/session-stop.ts +34 -40
  42. package/cli/selftune/hooks/skill-change-guard.ts +1 -0
  43. package/cli/selftune/hooks/skill-eval.ts +1 -1
  44. package/cli/selftune/index.ts +23 -14
  45. package/cli/selftune/ingestors/claude-replay.ts +1 -0
  46. package/cli/selftune/ingestors/codex-rollout.ts +1 -0
  47. package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
  48. package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
  49. package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
  50. package/cli/selftune/init.ts +197 -96
  51. package/cli/selftune/localdb/db.ts +1 -0
  52. package/cli/selftune/localdb/direct-write.ts +93 -12
  53. package/cli/selftune/localdb/materialize.ts +2 -0
  54. package/cli/selftune/localdb/queries.ts +210 -0
  55. package/cli/selftune/localdb/schema.ts +72 -1
  56. package/cli/selftune/monitoring/watch.ts +1 -0
  57. package/cli/selftune/normalization.ts +4 -0
  58. package/cli/selftune/observability.ts +14 -7
  59. package/cli/selftune/orchestrate.ts +15 -37
  60. package/cli/selftune/repair/skill-usage.ts +7 -3
  61. package/cli/selftune/routes/orchestrate-runs.ts +1 -0
  62. package/cli/selftune/routes/overview.ts +1 -0
  63. package/cli/selftune/routes/skill-report.ts +1 -0
  64. package/cli/selftune/sync.ts +31 -1
  65. package/cli/selftune/types.ts +2 -2
  66. package/cli/selftune/uninstall.ts +412 -0
  67. package/cli/selftune/utils/canonical-log.ts +2 -0
  68. package/cli/selftune/utils/jsonl.ts +1 -0
  69. package/cli/selftune/utils/llm-call.ts +131 -3
  70. package/cli/selftune/utils/skill-log.ts +1 -0
  71. package/cli/selftune/utils/transcript.ts +1 -0
  72. package/cli/selftune/utils/trigger-check.ts +1 -1
  73. package/cli/selftune/workflows/skill-md-writer.ts +5 -5
  74. package/cli/selftune/workflows/workflows.ts +1 -0
  75. package/package.json +38 -33
  76. package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
  77. package/packages/telemetry-contract/package.json +3 -3
  78. package/packages/telemetry-contract/src/index.ts +0 -1
  79. package/packages/telemetry-contract/src/schemas.ts +6 -24
  80. package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
  81. package/packages/ui/README.md +35 -34
  82. package/packages/ui/package.json +3 -3
  83. package/packages/ui/src/components/ActivityTimeline.tsx +49 -42
  84. package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
  85. package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
  86. package/packages/ui/src/components/InfoTip.tsx +4 -3
  87. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
  88. package/packages/ui/src/components/section-cards.tsx +19 -24
  89. package/packages/ui/src/components/skill-health-grid.tsx +213 -193
  90. package/packages/ui/src/lib/constants.tsx +1 -0
  91. package/packages/ui/src/primitives/badge.tsx +12 -15
  92. package/packages/ui/src/primitives/button.tsx +7 -7
  93. package/packages/ui/src/primitives/card.tsx +15 -26
  94. package/packages/ui/src/primitives/checkbox.tsx +7 -8
  95. package/packages/ui/src/primitives/collapsible.tsx +5 -5
  96. package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
  97. package/packages/ui/src/primitives/label.tsx +6 -6
  98. package/packages/ui/src/primitives/select.tsx +28 -37
  99. package/packages/ui/src/primitives/table.tsx +17 -44
  100. package/packages/ui/src/primitives/tabs.tsx +14 -21
  101. package/packages/ui/src/primitives/tooltip.tsx +10 -22
  102. package/skill/SKILL.md +72 -59
  103. package/skill/Workflows/AlphaUpload.md +4 -4
  104. package/skill/Workflows/AutoActivation.md +11 -6
  105. package/skill/Workflows/Badge.md +22 -16
  106. package/skill/Workflows/Baseline.md +34 -36
  107. package/skill/Workflows/Composability.md +16 -11
  108. package/skill/Workflows/Contribute.md +26 -21
  109. package/skill/Workflows/Cron.md +23 -22
  110. package/skill/Workflows/Dashboard.md +40 -40
  111. package/skill/Workflows/Doctor.md +40 -34
  112. package/skill/Workflows/Evals.md +48 -47
  113. package/skill/Workflows/EvolutionMemory.md +31 -21
  114. package/skill/Workflows/Evolve.md +84 -82
  115. package/skill/Workflows/EvolveBody.md +58 -47
  116. package/skill/Workflows/Grade.md +16 -13
  117. package/skill/Workflows/ImportSkillsBench.md +9 -6
  118. package/skill/Workflows/Ingest.md +36 -21
  119. package/skill/Workflows/Initialize.md +138 -97
  120. package/skill/Workflows/Orchestrate.md +22 -16
  121. package/skill/Workflows/Replay.md +12 -7
  122. package/skill/Workflows/Rollback.md +13 -6
  123. package/skill/Workflows/Schedule.md +6 -6
  124. package/skill/Workflows/Sync.md +18 -11
  125. package/skill/Workflows/UnitTest.md +28 -17
  126. package/skill/Workflows/Watch.md +28 -21
  127. package/skill/agents/diagnosis-analyst.md +11 -0
  128. package/skill/agents/evolution-reviewer.md +15 -1
  129. package/skill/agents/integration-guide.md +10 -0
  130. package/skill/agents/pattern-analyst.md +12 -1
  131. package/skill/references/grading-methodology.md +23 -24
  132. package/skill/references/interactive-config.md +7 -7
  133. package/skill/references/invocation-taxonomy.md +22 -20
  134. package/skill/references/logs.md +20 -6
  135. package/skill/references/setup-patterns.md +4 -2
  136. package/.claude/agents/diagnosis-analyst.md +0 -156
  137. package/.claude/agents/evolution-reviewer.md +0 -180
  138. package/.claude/agents/integration-guide.md +0 -212
  139. package/.claude/agents/pattern-analyst.md +0 -160
  140. package/apps/local-dashboard/dist/assets/index-Bk9vSHHd.js +0 -15
package/skill/SKILL.md CHANGED
@@ -12,7 +12,7 @@ description: >
12
12
  even if they don't say "selftune" explicitly.
13
13
  metadata:
14
14
  author: selftune-dev
15
- version: 0.2.8
15
+ version: 0.2.10
16
16
  category: developer-tools
17
17
  ---
18
18
 
@@ -104,44 +104,44 @@ selftune cron remove [--dry-run]
104
104
  selftune telemetry [status|enable|disable]
105
105
  selftune export [TABLE...] [--output/-o DIR] [--since DATE]
106
106
 
107
- # Alpha enrollment (cloud app is control-plane only, not the main UX)
108
- selftune init --alpha --alpha-email <email> --alpha-key <st_live_key>
107
+ # Alpha enrollment (device-code flow browser opens automatically)
108
+ selftune init --alpha --alpha-email <email>
109
109
  selftune alpha upload [--dry-run]
110
110
  selftune status # shows cloud link state + upload readiness
111
111
  ```
112
112
 
113
113
  ## Workflow Routing
114
114
 
115
- | Trigger keywords | Workflow | File |
116
- |------------------|----------|------|
117
- | grade, score, evaluate, assess session, auto-grade | Grade | Workflows/Grade.md |
118
- | evals, eval set, undertriggering, skill stats, eval generate | Evals | Workflows/Evals.md |
119
- | evolve, improve, optimize skills, make skills better, triggers, catch more queries | Evolve | Workflows/Evolve.md |
120
- | evolve body, evolve routing, full body evolution, rewrite skill, teacher student | EvolveBody | Workflows/EvolveBody.md |
121
- | evolve rollback, undo, restore, revert evolution, go back, undo last change | Rollback | Workflows/Rollback.md |
122
- | watch, monitor, regression, post-deploy, keep an eye on | Watch | Workflows/Watch.md |
123
- | doctor, health, hooks, broken, diagnose, not working, something wrong | Doctor | Workflows/Doctor.md |
124
- | ingest, import, codex logs, opencode, openclaw, wrap codex | Ingest | Workflows/Ingest.md |
125
- | replay, backfill, claude transcripts, historical sessions | Replay | Workflows/Replay.md |
126
- | contribute, share, community, export data, anonymized, give back | Contribute | Workflows/Contribute.md |
127
- | init, setup, set up, bootstrap, first time, install, configure selftune, alpha, enroll, alpha enrollment, cloud link, upload credential | Initialize | Workflows/Initialize.md |
128
- | cron, schedule, automate evolution, run automatically | Cron | Workflows/Cron.md |
129
- | auto-activate, suggestions, activation rules, nag, why suggest | AutoActivation | Workflows/AutoActivation.md |
130
- | dashboard, visual, open dashboard, show dashboard, serve dashboard, live dashboard | Dashboard | Workflows/Dashboard.md |
131
- | evolution memory, session continuity, what happened last | EvolutionMemory | Workflows/EvolutionMemory.md |
132
- | grade baseline, baseline lift, adds value, skill value, no-skill comparison | Baseline | Workflows/Baseline.md |
133
- | eval unit-test, skill test, test skill, generate tests, run tests | UnitTest | Workflows/UnitTest.md |
134
- | eval composability, co-occurrence, skill conflicts, skills together | Composability | Workflows/Composability.md |
135
- | eval import, skillsbench, external evals, benchmark tasks | ImportSkillsBench | Workflows/ImportSkillsBench.md |
136
- | telemetry, analytics, disable analytics, opt out, tracking, privacy | Telemetry | Workflows/Telemetry.md |
137
- | orchestrate, autonomous, full loop, improve all skills, run selftune loop | Orchestrate | Workflows/Orchestrate.md |
138
- | sync, refresh, source truth, rescan sessions | Sync | Workflows/Sync.md |
139
- | badge, readme badge, skill badge, health badge | Badge | Workflows/Badge.md |
140
- | workflows, discover workflows, list workflows, multi-skill workflows | Workflows | Workflows/Workflows.md |
141
- | alpha upload, upload data, send alpha data, manual upload, dry run upload | AlphaUpload | Workflows/AlphaUpload.md |
142
- | export, dump, jsonl, export sqlite, debug export | Export | *(direct command — no workflow file)* |
143
- | status, health summary, skill health, how are skills, skills doing, run selftune | Status | *(direct command — no workflow file)* |
144
- | last, last session, recent session, what happened, what changed | Last | *(direct command — no workflow file)* |
115
+ | Trigger keywords | Workflow | File |
116
+ | --------------------------------------------------------------------------------------------------------------------------------------- | ----------------- | ------------------------------------- |
117
+ | grade, score, evaluate, assess session, auto-grade | Grade | Workflows/Grade.md |
118
+ | evals, eval set, undertriggering, skill stats, eval generate | Evals | Workflows/Evals.md |
119
+ | evolve, improve, optimize skills, make skills better, triggers, catch more queries | Evolve | Workflows/Evolve.md |
120
+ | evolve body, evolve routing, full body evolution, rewrite skill, teacher student | EvolveBody | Workflows/EvolveBody.md |
121
+ | evolve rollback, undo, restore, revert evolution, go back, undo last change | Rollback | Workflows/Rollback.md |
122
+ | watch, monitor, regression, post-deploy, keep an eye on | Watch | Workflows/Watch.md |
123
+ | doctor, health, hooks, broken, diagnose, not working, something wrong | Doctor | Workflows/Doctor.md |
124
+ | ingest, import, codex logs, opencode, openclaw, wrap codex | Ingest | Workflows/Ingest.md |
125
+ | replay, backfill, claude transcripts, historical sessions | Replay | Workflows/Replay.md |
126
+ | contribute, share, community, export data, anonymized, give back | Contribute | Workflows/Contribute.md |
127
+ | init, setup, set up, bootstrap, first time, install, configure selftune, alpha, enroll, alpha enrollment, cloud link, upload credential | Initialize | Workflows/Initialize.md |
128
+ | cron, schedule, automate evolution, run automatically | Cron | Workflows/Cron.md |
129
+ | auto-activate, suggestions, activation rules, nag, why suggest | AutoActivation | Workflows/AutoActivation.md |
130
+ | dashboard, visual, open dashboard, show dashboard, serve dashboard, live dashboard | Dashboard | Workflows/Dashboard.md |
131
+ | evolution memory, session continuity, what happened last | EvolutionMemory | Workflows/EvolutionMemory.md |
132
+ | grade baseline, baseline lift, adds value, skill value, no-skill comparison | Baseline | Workflows/Baseline.md |
133
+ | eval unit-test, skill test, test skill, generate tests, run tests | UnitTest | Workflows/UnitTest.md |
134
+ | eval composability, co-occurrence, skill conflicts, skills together | Composability | Workflows/Composability.md |
135
+ | eval import, skillsbench, external evals, benchmark tasks | ImportSkillsBench | Workflows/ImportSkillsBench.md |
136
+ | telemetry, analytics, disable analytics, opt out, tracking, privacy | Telemetry | Workflows/Telemetry.md |
137
+ | orchestrate, autonomous, full loop, improve all skills, run selftune loop | Orchestrate | Workflows/Orchestrate.md |
138
+ | sync, refresh, source truth, rescan sessions | Sync | Workflows/Sync.md |
139
+ | badge, readme badge, skill badge, health badge | Badge | Workflows/Badge.md |
140
+ | workflows, discover workflows, list workflows, multi-skill workflows | Workflows | Workflows/Workflows.md |
141
+ | alpha upload, upload data, send alpha data, manual upload, dry run upload | AlphaUpload | Workflows/AlphaUpload.md |
142
+ | export, dump, jsonl, export sqlite, debug export | Export | _(direct command — no workflow file)_ |
143
+ | status, health summary, skill health, how are skills, skills doing, run selftune | Status | _(direct command — no workflow file)_ |
144
+ | last, last session, recent session, what happened, what changed | Last | _(direct command — no workflow file)_ |
145
145
 
146
146
  Workflows Grade, Evolve, Watch, and Ingest also run autonomously via `selftune orchestrate`.
147
147
 
@@ -155,7 +155,7 @@ tier reference, and quick-path rules.
155
155
 
156
156
  The core idea: observe how users actually talk, find where skills miss, propose
157
157
  better descriptions, validate them, and deploy — with automatic rollback if things
158
- get worse. Every step produces evidence so you can explain *why* a change was made.
158
+ get worse. Every step produces evidence so you can explain _why_ a change was made.
159
159
 
160
160
  ```text
161
161
  Observe --> Detect --> Diagnose --> Propose --> Validate --> Audit --> Deploy --> Watch --> Rollback
@@ -179,17 +179,22 @@ selftune bundles focused agents in `agents/`. When you need deeper analysis,
179
179
  read the relevant agent file and follow its instructions — either inline or
180
180
  by spawning a subagent with those instructions as its prompt.
181
181
 
182
+ On Claude Code, `selftune init` also syncs compatibility copies into
183
+ `~/.claude/agents/` so native `--agent <name>` calls keep matching these
184
+ bundled definitions.
185
+
182
186
  Treat these as worker-style subagents:
187
+
183
188
  - pass the required inputs from the parent agent
184
189
  - expect a structured report back
185
190
  - do not have them question the user directly unless you explicitly want that
186
191
 
187
- | Trigger keywords | Agent file | When to use |
188
- |------------------|-----------|-------------|
189
- | diagnose, root cause, why failing, debug performance | `agents/diagnosis-analyst.md` | When one skill has recurring low grades, regressions, or unclear failures after basic doctor/status review |
190
- | patterns, conflicts, cross-skill, overlap, optimize skills | `agents/pattern-analyst.md` | When multiple skills may overlap, misroute, or interfere, especially after composability flags conflict |
191
- | review evolution, check proposal, safe to deploy | `agents/evolution-reviewer.md` | Before deploying a dry-run or pending proposal, especially for high-stakes skills or marginal improvements |
192
- | set up selftune, integrate, configure project | `agents/integration-guide.md` | For complex setup and verification work in monorepos, multi-skill repos, or mixed-platform environments |
192
+ | Trigger keywords | Agent file | When to use |
193
+ | ---------------------------------------------------------- | ------------------------------ | ---------------------------------------------------------------------------------------------------------- |
194
+ | diagnose, root cause, why failing, debug performance | `agents/diagnosis-analyst.md` | When one skill has recurring low grades, regressions, or unclear failures after basic doctor/status review |
195
+ | patterns, conflicts, cross-skill, overlap, optimize skills | `agents/pattern-analyst.md` | When multiple skills may overlap, misroute, or interfere, especially after composability flags conflict |
196
+ | review evolution, check proposal, safe to deploy | `agents/evolution-reviewer.md` | Before deploying a dry-run or pending proposal, especially for high-stakes skills or marginal improvements |
197
+ | set up selftune, integrate, configure project | `agents/integration-guide.md` | For complex setup and verification work in monorepos, multi-skill repos, or mixed-platform environments |
193
198
 
194
199
  ## Examples
195
200
 
@@ -198,6 +203,7 @@ Treat these as worker-style subagents:
198
203
  User says: "Set up selftune" or "Install selftune"
199
204
 
200
205
  Actions:
206
+
201
207
  1. Read `Workflows/Initialize.md`
202
208
  2. Run `selftune init` to bootstrap config (hooks are installed automatically)
203
209
  3. Run `selftune doctor` to verify
@@ -209,6 +215,7 @@ Result: Config at `~/.selftune/config.json`, hooks active, ready for session cap
209
215
  User says: "Make the pptx skill catch more queries" or "Evolve the Research skill"
210
216
 
211
217
  Actions:
218
+
212
219
  1. `selftune eval generate --skill pptx` to find missed triggers
213
220
  2. `selftune evolve --skill pptx --skill-path <path>` to propose changes
214
221
  3. `selftune watch --skill pptx --skill-path <path>` to monitor post-deploy
@@ -220,6 +227,7 @@ Result: Skill description updated to match real user language, with rollback ava
220
227
  User says: "How are my skills doing?" or "Run selftune"
221
228
 
222
229
  Actions:
230
+
223
231
  1. `selftune status` for overall health summary
224
232
  2. `selftune last` for most recent session insight
225
233
  3. `selftune doctor` if issues detected
@@ -231,6 +239,7 @@ Result: Pass rates, trend data, and actionable recommendations.
231
239
  User says: "Set up cron jobs" or "Run selftune automatically"
232
240
 
233
241
  Actions:
242
+
234
243
  1. `selftune cron setup` to install OS-level scheduling
235
244
  2. Orchestrate loop runs: ingest → grade → evolve → watch
236
245
 
@@ -245,6 +254,7 @@ Error: `command not found: selftune`
245
254
  Cause: CLI not installed or not on PATH.
246
255
 
247
256
  Solution:
257
+
248
258
  1. Run `npm install -g selftune` or check `bin/selftune.cjs` exists
249
259
  2. Verify with `which selftune`
250
260
  3. If using bun: `bun link` in the repo root
@@ -256,6 +266,7 @@ Error: `selftune grade` returns empty results.
256
266
  Cause: Hooks not capturing sessions, or no sessions since last ingest.
257
267
 
258
268
  Solution:
269
+
259
270
  1. Run `selftune doctor` to verify hook installation
260
271
  2. Run `selftune ingest claude --force` to re-ingest
261
272
  3. Check `~/.claude/` for telemetry JSONL files
@@ -265,6 +276,7 @@ Solution:
265
276
  Cause: Eval set too small or skill already well-tuned.
266
277
 
267
278
  Solution:
279
+
268
280
  1. Run `selftune eval generate --skill <name> --max 50` for a larger eval set
269
281
  2. Check `selftune status` — if pass rate is >90%, evolution may not be needed
270
282
  3. Try `selftune evolve body` for deeper structural changes
@@ -274,6 +286,7 @@ Solution:
274
286
  Error: Port already in use or blank page.
275
287
 
276
288
  Solution:
289
+
277
290
  1. Try a different port: `selftune dashboard --port 3142`
278
291
  2. Check if another process holds the port: `lsof -i :3141`
279
292
  3. Use `--no-open` to start the server without opening a browser
@@ -285,31 +298,31 @@ share keywords but need different solutions:
285
298
 
286
299
  - "Fix this React hydration bug" — general debugging, not skill improvement
287
300
  - "Create a PowerPoint about Q3 results" — this is pptx skill, not selftune
288
- - "Run my unit tests" — project tests, not skill eval tests (even though selftune has "eval unit-test", this is about *project* tests)
289
- - "How do I use the Research skill?" — skill *usage*, not skill *improvement* (route to the Research skill itself)
301
+ - "Run my unit tests" — project tests, not skill eval tests (even though selftune has "eval unit-test", this is about _project_ tests)
302
+ - "How do I use the Research skill?" — skill _usage_, not skill _improvement_ (route to the Research skill itself)
290
303
  - "Generate a report from this data" — content generation, not skill evolution
291
304
  - "My build is failing" — project issue, not selftune health issue (even though "failing" overlaps with skill diagnostics language)
292
305
  - "Evaluate this code for security issues" — "evaluate" here means code review, not session grading
293
306
  - "Improve this function's performance" — code optimization, not skill optimization (even though "improve" and "performance" are selftune keywords)
294
307
 
295
- The key distinction: selftune is about improving *skills themselves* (their
308
+ The key distinction: selftune is about improving _skills themselves_ (their
296
309
  descriptions, triggers, and execution quality). If the user is trying to
297
- accomplish a task *using* a skill, route to that skill instead.
310
+ accomplish a task _using_ a skill, route to that skill instead.
298
311
 
299
312
  ## Resource Index
300
313
 
301
- | Resource | Purpose | When to read |
302
- |----------|---------|--------------|
303
- | `SKILL.md` | This file — routing, triggers, quick reference | Always loaded |
304
- | `Workflows/*.md` | Step-by-step instructions for each workflow | When routing to a workflow |
305
- | `agents/diagnosis-analyst.md` | Deep-dive skill failure analysis | Spawn when doctor/grades show persistent issues |
306
- | `agents/pattern-analyst.md` | Cross-skill conflict detection | Spawn when composability flags conflicts |
307
- | `agents/evolution-reviewer.md` | Safety gate for evolution proposals | Spawn before deploying high-stakes evolutions |
308
- | `agents/integration-guide.md` | Guided setup for complex projects | Spawn for monorepos, multi-skill setups |
309
- | `references/logs.md` | Log file formats (telemetry, usage, queries, audit) | When parsing or debugging log files |
310
- | `references/grading-methodology.md` | 3-tier grading model, evidence standards | When grading sessions or interpreting grades |
311
- | `references/invocation-taxonomy.md` | 4 invocation types, coverage analysis | When analyzing trigger coverage |
312
- | `references/interactive-config.md` | Pre-flight config pattern, model tiers | Before running mutating workflows |
313
- | `references/setup-patterns.md` | Platform-specific setup patterns | During complex setup scenarios |
314
- | `settings_snippet.json` | Claude Code hook configuration template | During initialization |
315
- | `assets/*.json` | Config templates (activation rules, settings) | During initialization |
314
+ | Resource | Purpose | When to read |
315
+ | ----------------------------------- | --------------------------------------------------- | ----------------------------------------------- |
316
+ | `SKILL.md` | This file — routing, triggers, quick reference | Always loaded |
317
+ | `Workflows/*.md` | Step-by-step instructions for each workflow | When routing to a workflow |
318
+ | `agents/diagnosis-analyst.md` | Deep-dive skill failure analysis | Spawn when doctor/grades show persistent issues |
319
+ | `agents/pattern-analyst.md` | Cross-skill conflict detection | Spawn when composability flags conflicts |
320
+ | `agents/evolution-reviewer.md` | Safety gate for evolution proposals | Spawn before deploying high-stakes evolutions |
321
+ | `agents/integration-guide.md` | Guided setup for complex projects | Spawn for monorepos, multi-skill setups |
322
+ | `references/logs.md` | Log file formats (telemetry, usage, queries, audit) | When parsing or debugging log files |
323
+ | `references/grading-methodology.md` | 3-tier grading model, evidence standards | When grading sessions or interpreting grades |
324
+ | `references/invocation-taxonomy.md` | 4 invocation types, coverage analysis | When analyzing trigger coverage |
325
+ | `references/interactive-config.md` | Pre-flight config pattern, model tiers | Before running mutating workflows |
326
+ | `references/setup-patterns.md` | Platform-specific setup patterns | During complex setup scenarios |
327
+ | `settings_snippet.json` | Claude Code hook configuration template | During initialization |
328
+ | `assets/*.json` | Config templates (activation rules, settings) | During initialization |
@@ -11,10 +11,10 @@ selftune alpha upload [--dry-run]
11
11
 
12
12
  ## Flags
13
13
 
14
- | Flag | Meaning | Default |
15
- |------|---------|---------|
16
- | `--dry-run` | Stage and summarize the upload without sending the HTTP request | Off |
17
- | `-h`, `--help` | Show command help | Off |
14
+ | Flag | Meaning | Default |
15
+ | -------------- | --------------------------------------------------------------- | ------- |
16
+ | `--dry-run` | Stage and summarize the upload without sending the HTTP request | Off |
17
+ | `-h`, `--help` | Show command help | Off |
18
18
 
19
19
  ## Behavior
20
20
 
@@ -35,12 +35,12 @@ Detection scans all hook entries in settings for any command containing
35
35
 
36
36
  ## Default Rules
37
37
 
38
- | Rule ID | Description | Trigger Condition | Suggestion |
39
- |---------|-------------|-------------------|------------|
40
- | `post-session-diagnostic` | Suggest diagnostic review | >2 unmatched queries in current session | `selftune last` |
41
- | `grading-threshold-breach` | Suggest evolution | Session pass rate < 0.6 (60%) | `selftune evolve` |
42
- | `stale-evolution` | Suggest evolution | >7 days since last evolution AND pending false negatives exist | `selftune evolve` |
43
- | `regression-detected` | Suggest rollback | Watch snapshot shows `regression_detected: true` | `selftune evolve rollback` |
38
+ | Rule ID | Description | Trigger Condition | Suggestion |
39
+ | -------------------------- | ------------------------- | -------------------------------------------------------------- | -------------------------- |
40
+ | `post-session-diagnostic` | Suggest diagnostic review | >2 unmatched queries in current session | `selftune last` |
41
+ | `grading-threshold-breach` | Suggest evolution | Session pass rate < 0.6 (60%) | `selftune evolve` |
42
+ | `stale-evolution` | Suggest evolution | >7 days since last evolution AND pending false negatives exist | `selftune evolve` |
43
+ | `regression-detected` | Suggest rollback | Watch snapshot shows `regression_detected: true` | `selftune evolve rollback` |
44
44
 
45
45
  ### Rule Details
46
46
 
@@ -122,24 +122,29 @@ Delete or comment out the entry to disable all auto-activation suggestions.
122
122
  ## Common Patterns
123
123
 
124
124
  **User wants to disable auto-suggestions**
125
+
125
126
  > Remove the auto-activate hook entry from `~/.claude/settings.json`
126
127
  > (see Disabling section above). Each rule fires at most once per session.
127
128
 
128
129
  **User asks why selftune suggestions appear**
130
+
129
131
  > Explain that the auto-activate hook detected an actionable condition.
130
132
  > Parse the suggestion text to identify which rule fired and report the
131
133
  > recommended action.
132
134
 
133
135
  **Suggestions are not appearing when expected**
136
+
134
137
  > Run `selftune doctor` to verify the hook is installed. Check that
135
138
  > `UserPromptSubmit` includes the auto-activate hook in settings.
136
139
 
137
140
  **PAI coexistence conflict**
141
+
138
142
  > Verify PAI's `skill-activation-prompt` hook is in `~/.claude/settings.json`.
139
143
  > If present, selftune skips all suggestions automatically. If the user
140
144
  > sees duplicates, one of the two hooks is misconfigured.
141
145
 
142
146
  **User wants custom activation rules**
147
+
143
148
  > Direct the user to `cli/selftune/activation-rules.ts`. New rules must
144
149
  > conform to the `ActivationRule` interface: pure filesystem readers with
145
150
  > no network calls or heavy imports.
@@ -16,32 +16,37 @@ selftune badge --skill <name> [--format svg|markdown|url] [--output <path>]
16
16
 
17
17
  ## Options
18
18
 
19
- | Option | Required | Default | Description |
20
- |--------|----------|---------|-------------|
21
- | `--skill` | Yes | -- | Skill name to generate badge for |
22
- | `--format` | No | `svg` | Output format: `svg`, `markdown`, or `url` |
23
- | `--output` | No | stdout | Write output to file |
24
- | `--help` | No | -- | Show usage information |
19
+ | Option | Required | Default | Description |
20
+ | ---------- | -------- | ------- | ------------------------------------------ |
21
+ | `--skill` | Yes | -- | Skill name to generate badge for |
22
+ | `--format` | No | `svg` | Output format: `svg`, `markdown`, or `url` |
23
+ | `--output` | No | stdout | Write output to file |
24
+ | `--help` | No | -- | Show usage information |
25
25
 
26
26
  ## Examples
27
27
 
28
28
  ### Generate SVG badge
29
+
29
30
  ```bash
30
31
  selftune badge --skill my-skill --format svg > badge.svg
31
32
  ```
32
33
 
33
34
  ### Get markdown for README
35
+
34
36
  ```bash
35
37
  selftune badge --skill my-skill --format markdown
36
38
  ```
39
+
37
40
  Output: `![Skill Health: my-skill](https://img.shields.io/badge/Skill%20Health-87%25%20%E2%86%91-4c1)`
38
41
 
39
42
  ### Get shields.io URL
43
+
40
44
  ```bash
41
45
  selftune badge --skill my-skill --format url
42
46
  ```
43
47
 
44
48
  ### Write badge to file
49
+
45
50
  ```bash
46
51
  selftune badge --skill my-skill --output badge.svg
47
52
  ```
@@ -59,16 +64,17 @@ Markdown and URL formats use shields.io, which renders its own badge — the log
59
64
 
60
65
  ## Badge Colors
61
66
 
62
- | Pass Rate | Color | Hex |
63
- |-----------|-------|-----|
64
- | > 80% | Green | `#4c1` |
65
- | 60-80% | Yellow | `#dfb317` |
66
- | < 60% | Red | `#e05d44` |
67
- | No data | Gray | `#9f9f9f` |
67
+ | Pass Rate | Color | Hex |
68
+ | --------- | ------ | --------- |
69
+ | > 80% | Green | `#4c1` |
70
+ | 60-80% | Yellow | `#dfb317` |
71
+ | < 60% | Red | `#e05d44` |
72
+ | No data | Gray | `#9f9f9f` |
68
73
 
69
74
  ## Embedding in README
70
75
 
71
76
  Add to your skill's README.md:
77
+
72
78
  ```markdown
73
79
  ![Skill Health: my-skill](https://img.shields.io/badge/Skill%20Health-87%25%20%E2%86%91-4c1)
74
80
  ```
@@ -104,10 +110,10 @@ The hosted badge service at `badge.selftune.dev` aggregates community contributi
104
110
 
105
111
  ### Endpoints
106
112
 
107
- | Route | Method | Description |
108
- |-------|--------|-------------|
109
- | `/badge/:skill` | GET | SVG badge from aggregated community data |
110
- | `/badge/:org/:skill` | GET | Organization-scoped SVG badge |
113
+ | Route | Method | Description |
114
+ | -------------------- | ------ | ---------------------------------------- |
115
+ | `/badge/:skill` | GET | SVG badge from aggregated community data |
116
+ | `/badge/:org/:skill` | GET | Organization-scoped SVG badge |
111
117
 
112
118
  ### Embedding from hosted service
113
119
 
@@ -7,6 +7,7 @@ improvement in pass rate that the skill provides.
7
7
  ## When to Invoke
8
8
 
9
9
  Invoke this workflow when the user requests any of the following:
10
+
10
11
  - Measuring whether a skill adds value or is worth keeping
11
12
  - Comparing skill performance against a no-skill baseline
12
13
  - Deciding whether to evolve or rework a skill
@@ -20,12 +21,12 @@ selftune grade baseline --skill <name> --skill-path <path> [options]
20
21
 
21
22
  ## Options
22
23
 
23
- | Flag | Description | Default |
24
- |------|-------------|---------|
25
- | `--skill <name>` | Skill name | Required |
26
- | `--skill-path <path>` | Path to the skill's SKILL.md | Required |
27
- | `--eval-set <path>` | Pre-built eval set JSON | Auto-generated from logs |
28
- | `--agent <name>` | Agent CLI to use | Auto-detected |
24
+ | Flag | Description | Default |
25
+ | --------------------- | ---------------------------- | ------------------------ |
26
+ | `--skill <name>` | Skill name | Required |
27
+ | `--skill-path <path>` | Path to the skill's SKILL.md | Required |
28
+ | `--eval-set <path>` | Pre-built eval set JSON | Auto-generated from logs |
29
+ | `--agent <name>` | Agent CLI to use | Auto-detected |
29
30
 
30
31
  ## Output Format
31
32
 
@@ -69,36 +70,33 @@ skipped — the skill needs fundamental rework, not description tweaks.
69
70
 
70
71
  Before running baseline measurement, use the `AskUserQuestion` tool to present structured configuration options.
71
72
 
72
- If the user responds with "use defaults", cancels, or similar shorthand, skip to step 1 using the recommended defaults.
73
+ If the user responds with "use defaults" or similar shorthand, skip to step 1 using the recommended defaults. If the user cancels, stop -- do not proceed with defaults.
73
74
 
74
- Use `AskUserQuestion` with these questions:
75
+ Ask one `AskUserQuestion` at a time in this order:
75
76
 
76
- ```json
77
- {
78
- "questions": [
79
- {
80
- "question": "Eval Set Source",
81
- "options": ["Auto-generate from logs (recommended if logs exist)", "Use existing eval set file", "Generate synthetic evals first (for new skills)"]
82
- },
83
- {
84
- "question": "Agent CLI",
85
- "options": ["Auto-detect (recommended)", "claude", "codex", "opencode"]
86
- }
87
- ]
88
- }
89
- ```
77
+ 1. `Eval Set Source`
78
+ Options:
79
+ - `Auto-generate from logs (recommended if logs exist)`
80
+ - `Use existing eval set file`
81
+ - `Generate synthetic evals first (for new skills)`
82
+ 2. `Agent CLI`
83
+ Options:
84
+ - `Auto-detect (recommended)`
85
+ - `claude`
86
+ - `codex`
87
+ - `opencode`
90
88
 
91
- If `AskUserQuestion` is not available, fall back to presenting these as inline numbered options.
89
+ If `AskUserQuestion` is not available or Claude does not invoke it, fall back to presenting the same choices as inline numbered options.
92
90
 
93
91
  After the user responds, parse their selections and map each choice to the corresponding CLI flags:
94
92
 
95
- | Selection | CLI Flag |
96
- |-----------|----------|
97
- | 1a (auto-generate) | _(no flag, default)_ |
98
- | 1b (existing eval set) | `--eval-set <path>` |
99
- | 1c (synthetic first) | Run Evals workflow with `--synthetic` first, then use output |
100
- | 2a (auto-detect) | _(no flag, default)_ |
101
- | 2b (specify agent) | `--agent <name>` |
93
+ | Selection | CLI Flag |
94
+ | ---------------------- | ------------------------------------------------------------ |
95
+ | 1a (auto-generate) | _(no flag, default)_ |
96
+ | 1b (existing eval set) | `--eval-set <path>` |
97
+ | 1c (synthetic first) | Run Evals workflow with `--synthetic` first, then use output |
98
+ | 2a (auto-detect) | _(no flag, default)_ |
99
+ | 2b (specify agent) | `--agent <name>` |
102
100
 
103
101
  Show a confirmation summary to the user:
104
102
 
@@ -122,12 +120,12 @@ Parse the JSON output and extract `lift` and `adds_value` fields.
122
120
 
123
121
  ### 2. Interpret Results
124
122
 
125
- | Lift | Interpretation | Action |
126
- |------|---------------|--------|
127
- | >= 0.20 | Strong value | Skill is working well |
128
- | 0.05–0.20 | Moderate value | Consider evolving to improve |
129
- | < 0.05 | Minimal value | Skill may need rework, not just evolution |
130
- | < 0 | Negative value | Skill is hurting — investigate or disable |
123
+ | Lift | Interpretation | Action |
124
+ | --------- | -------------- | ----------------------------------------- |
125
+ | >= 0.20 | Strong value | Skill is working well |
126
+ | 0.05–0.20 | Moderate value | Consider evolving to improve |
127
+ | < 0.05 | Minimal value | Skill may need rework, not just evolution |
128
+ | < 0 | Negative value | Skill is hurting — investigate or disable |
131
129
 
132
130
  Report the interpretation to the user based on the lift value.
133
131
 
@@ -12,11 +12,11 @@ selftune eval composability --skill <name> [options]
12
12
 
13
13
  ## Options
14
14
 
15
- | Flag | Description | Default |
16
- |------|-------------|---------|
17
- | `--skill <name>` | Skill to analyze | Required |
18
- | `--window <n>` | Only analyze sessions from last N days | All sessions |
19
- | `--telemetry-log <path>` | Path to telemetry log | `~/.claude/session_telemetry_log.jsonl` |
15
+ | Flag | Description | Default |
16
+ | ------------------------ | -------------------------------------- | --------------------------------------- |
17
+ | `--skill <name>` | Skill to analyze | Required |
18
+ | `--window <n>` | Only analyze sessions from last N days | All sessions |
19
+ | `--telemetry-log <path>` | Path to telemetry log | `~/.claude/session_telemetry_log.jsonl` |
20
20
 
21
21
  ## Output Format
22
22
 
@@ -70,16 +70,17 @@ selftune eval composability --skill Research
70
70
 
71
71
  ### 2. Interpret Results
72
72
 
73
- | Conflict Score | Interpretation |
74
- |---------------|---------------|
75
- | 0.0–0.1 | No conflict — skills work well together |
76
- | 0.1–0.3 | Minor friction — monitor but no action needed |
77
- | 0.3–0.6 | Moderate conflict — investigate trigger overlap |
78
- | 0.6–1.0 | Severe conflict — skills likely interfere with each other |
73
+ | Conflict Score | Interpretation |
74
+ | -------------- | --------------------------------------------------------- |
75
+ | 0.0–0.1 | No conflict — skills work well together |
76
+ | 0.1–0.3 | Minor friction — monitor but no action needed |
77
+ | 0.3–0.6 | Moderate conflict — investigate trigger overlap |
78
+ | 0.6–1.0 | Severe conflict — skills likely interfere with each other |
79
79
 
80
80
  ### 3. Address Conflicts
81
81
 
82
82
  When conflict candidates are identified, present them to the user with recommended actions:
83
+
83
84
  - Check for trigger keyword overlap between the skills
84
85
  - Check if one skill's workflow interferes with the other's
85
86
  - Consider evolving descriptions to reduce false triggers
@@ -95,13 +96,17 @@ resolution plan with trigger ownership recommendations.
95
96
  ## Common Patterns
96
97
 
97
98
  **"Are there conflicts between my skills?"**
99
+
98
100
  > `selftune eval composability --skill Research`
99
101
 
100
102
  **"Check composability for recent sessions only"**
103
+
101
104
  > `selftune eval composability --skill pptx --window 7`
102
105
 
103
106
  **"Which skills conflict with Research?"**
107
+
104
108
  > Run composability and check the `conflict_candidates` array.
105
109
 
106
110
  **"Why are sessions with multiple skills failing?"**
111
+
107
112
  > Run composability for each skill involved, look for high conflict scores.