selftune 0.2.9 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +35 -35
  2. package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +16 -0
  3. package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +2 -0
  4. package/apps/local-dashboard/dist/assets/rolldown-runtime-Dw2cE7zH.js +1 -0
  5. package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +11 -0
  6. package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +8 -0
  7. package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +12 -0
  8. package/apps/local-dashboard/dist/index.html +16 -15
  9. package/bin/selftune.cjs +1 -1
  10. package/cli/selftune/activation-rules.ts +1 -0
  11. package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
  12. package/cli/selftune/alpha-upload/stage-canonical.ts +94 -0
  13. package/cli/selftune/auth/device-code.ts +32 -0
  14. package/cli/selftune/auto-update.ts +12 -0
  15. package/cli/selftune/badge/badge.ts +1 -0
  16. package/cli/selftune/canonical-export.ts +5 -0
  17. package/cli/selftune/claude-agents.ts +154 -0
  18. package/cli/selftune/contribute/bundle.ts +1 -0
  19. package/cli/selftune/contribute/contribute.ts +1 -0
  20. package/cli/selftune/cron/setup.ts +2 -2
  21. package/cli/selftune/dashboard-server.ts +1 -0
  22. package/cli/selftune/eval/hooks-to-evals.ts +1 -0
  23. package/cli/selftune/eval/import-skillsbench.ts +1 -0
  24. package/cli/selftune/eval/synthetic-evals.ts +2 -3
  25. package/cli/selftune/eval/unit-test.ts +1 -0
  26. package/cli/selftune/evolution/deploy-proposal.ts +9 -238
  27. package/cli/selftune/evolution/evolve-body.ts +93 -6
  28. package/cli/selftune/evolution/evolve.ts +3 -7
  29. package/cli/selftune/evolution/propose-body.ts +3 -2
  30. package/cli/selftune/evolution/propose-routing.ts +3 -2
  31. package/cli/selftune/evolution/refine-body.ts +3 -2
  32. package/cli/selftune/evolution/rollback.ts +1 -1
  33. package/cli/selftune/export.ts +1 -0
  34. package/cli/selftune/grading/grade-session.ts +8 -0
  35. package/cli/selftune/hooks/auto-activate.ts +1 -0
  36. package/cli/selftune/hooks/evolution-guard.ts +1 -1
  37. package/cli/selftune/hooks/prompt-log.ts +1 -0
  38. package/cli/selftune/hooks/session-stop.ts +34 -40
  39. package/cli/selftune/hooks/skill-change-guard.ts +1 -0
  40. package/cli/selftune/hooks/skill-eval.ts +1 -1
  41. package/cli/selftune/index.ts +23 -14
  42. package/cli/selftune/ingestors/claude-replay.ts +1 -0
  43. package/cli/selftune/ingestors/codex-rollout.ts +1 -0
  44. package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
  45. package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
  46. package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
  47. package/cli/selftune/init.ts +121 -29
  48. package/cli/selftune/localdb/db.ts +1 -0
  49. package/cli/selftune/localdb/direct-write.ts +39 -0
  50. package/cli/selftune/localdb/materialize.ts +2 -0
  51. package/cli/selftune/localdb/queries.ts +53 -0
  52. package/cli/selftune/localdb/schema.ts +28 -0
  53. package/cli/selftune/normalization.ts +1 -0
  54. package/cli/selftune/observability.ts +1 -0
  55. package/cli/selftune/repair/skill-usage.ts +1 -0
  56. package/cli/selftune/routes/orchestrate-runs.ts +1 -0
  57. package/cli/selftune/routes/overview.ts +1 -0
  58. package/cli/selftune/routes/report.ts +1 -1
  59. package/cli/selftune/routes/skill-report.ts +2 -1
  60. package/cli/selftune/status.ts +1 -1
  61. package/cli/selftune/sync.ts +30 -1
  62. package/cli/selftune/uninstall.ts +412 -0
  63. package/cli/selftune/utils/canonical-log.ts +2 -0
  64. package/cli/selftune/utils/frontmatter.ts +50 -7
  65. package/cli/selftune/utils/jsonl.ts +1 -0
  66. package/cli/selftune/utils/llm-call.ts +131 -3
  67. package/cli/selftune/utils/skill-log.ts +1 -0
  68. package/cli/selftune/utils/transcript.ts +1 -0
  69. package/cli/selftune/utils/trigger-check.ts +1 -1
  70. package/cli/selftune/workflows/skill-md-writer.ts +5 -5
  71. package/cli/selftune/workflows/workflows.ts +1 -0
  72. package/package.json +37 -33
  73. package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
  74. package/packages/telemetry-contract/package.json +1 -1
  75. package/packages/telemetry-contract/src/schemas.ts +1 -0
  76. package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
  77. package/packages/ui/README.md +35 -34
  78. package/packages/ui/package.json +3 -3
  79. package/packages/ui/src/components/ActivityTimeline.tsx +50 -43
  80. package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
  81. package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
  82. package/packages/ui/src/components/InfoTip.tsx +4 -3
  83. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
  84. package/packages/ui/src/components/section-cards.tsx +20 -25
  85. package/packages/ui/src/components/skill-health-grid.tsx +213 -193
  86. package/packages/ui/src/lib/constants.tsx +1 -0
  87. package/packages/ui/src/primitives/badge.tsx +12 -15
  88. package/packages/ui/src/primitives/button.tsx +7 -7
  89. package/packages/ui/src/primitives/card.tsx +15 -26
  90. package/packages/ui/src/primitives/checkbox.tsx +7 -8
  91. package/packages/ui/src/primitives/collapsible.tsx +5 -5
  92. package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
  93. package/packages/ui/src/primitives/label.tsx +6 -6
  94. package/packages/ui/src/primitives/select.tsx +28 -37
  95. package/packages/ui/src/primitives/table.tsx +17 -44
  96. package/packages/ui/src/primitives/tabs.tsx +14 -21
  97. package/packages/ui/src/primitives/tooltip.tsx +10 -22
  98. package/skill/SKILL.md +70 -57
  99. package/skill/Workflows/AlphaUpload.md +4 -4
  100. package/skill/Workflows/AutoActivation.md +11 -6
  101. package/skill/Workflows/Badge.md +22 -16
  102. package/skill/Workflows/Baseline.md +34 -36
  103. package/skill/Workflows/Composability.md +16 -11
  104. package/skill/Workflows/Contribute.md +26 -21
  105. package/skill/Workflows/Cron.md +23 -22
  106. package/skill/Workflows/Dashboard.md +32 -27
  107. package/skill/Workflows/Doctor.md +33 -27
  108. package/skill/Workflows/Evals.md +48 -47
  109. package/skill/Workflows/EvolutionMemory.md +31 -21
  110. package/skill/Workflows/Evolve.md +84 -82
  111. package/skill/Workflows/EvolveBody.md +58 -47
  112. package/skill/Workflows/Grade.md +16 -13
  113. package/skill/Workflows/ImportSkillsBench.md +9 -6
  114. package/skill/Workflows/Ingest.md +36 -21
  115. package/skill/Workflows/Initialize.md +108 -40
  116. package/skill/Workflows/Orchestrate.md +22 -16
  117. package/skill/Workflows/Replay.md +12 -7
  118. package/skill/Workflows/Rollback.md +13 -6
  119. package/skill/Workflows/Schedule.md +6 -6
  120. package/skill/Workflows/Sync.md +18 -11
  121. package/skill/Workflows/UnitTest.md +28 -17
  122. package/skill/Workflows/Watch.md +28 -21
  123. package/skill/agents/diagnosis-analyst.md +11 -0
  124. package/skill/agents/evolution-reviewer.md +15 -1
  125. package/skill/agents/integration-guide.md +10 -0
  126. package/skill/agents/pattern-analyst.md +12 -1
  127. package/skill/references/grading-methodology.md +23 -24
  128. package/skill/references/interactive-config.md +7 -7
  129. package/skill/references/invocation-taxonomy.md +22 -20
  130. package/skill/references/logs.md +14 -6
  131. package/skill/references/setup-patterns.md +4 -2
  132. package/.claude/agents/diagnosis-analyst.md +0 -156
  133. package/.claude/agents/evolution-reviewer.md +0 -180
  134. package/.claude/agents/integration-guide.md +0 -212
  135. package/.claude/agents/pattern-analyst.md +0 -160
  136. package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +0 -1
  137. package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +0 -15
  138. package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +0 -60
  139. package/apps/local-dashboard/dist/assets/vendor-table-dK1QMLq9.js +0 -26
  140. package/apps/local-dashboard/dist/assets/vendor-ui-CO2mrx6e.js +0 -341
@@ -17,11 +17,11 @@ OpenClaw-specific cron integration.
17
17
 
18
18
  Auto-detect the current platform and install scheduled jobs.
19
19
 
20
- | Flag | Description | Default |
21
- |------|-------------|---------|
22
- | `--platform <name>` | Force a specific platform (`openclaw`, `cron`, `launchd`, `systemd`) | Auto-detect |
23
- | `--dry-run` | Preview without installing | Off |
24
- | `--tz <timezone>` | IANA timezone for job schedules (OpenClaw only) | Flag > `TZ` env > system timezone |
20
+ | Flag | Description | Default |
21
+ | ------------------- | -------------------------------------------------------------------- | --------------------------------- |
22
+ | `--platform <name>` | Force a specific platform (`openclaw`, `cron`, `launchd`, `systemd`) | Auto-detect |
23
+ | `--dry-run` | Preview without installing | Off |
24
+ | `--tz <timezone>` | IANA timezone for job schedules (OpenClaw only) | Flag > `TZ` env > system timezone |
25
25
 
26
26
  Platform auto-detection: macOS → launchd, Linux → systemd, other → cron.
27
27
 
@@ -43,9 +43,9 @@ No flags.
43
43
 
44
44
  Remove all selftune cron jobs from OpenClaw.
45
45
 
46
- | Flag | Description | Default |
47
- |------|-------------|---------|
48
- | `--dry-run` | Preview which jobs would be removed without deleting | Off |
46
+ | Flag | Description | Default |
47
+ | ----------- | ---------------------------------------------------- | ------- |
48
+ | `--dry-run` | Preview which jobs would be removed without deleting | Off |
49
49
 
50
50
  ## Aliases
51
51
 
@@ -56,11 +56,11 @@ invocations with flags (e.g. `selftune schedule --platform launchd`) continue to
56
56
 
57
57
  Setup registers these jobs:
58
58
 
59
- | Name | Cron Expression | Schedule | Description |
60
- |------|----------------|----------|-------------|
61
- | `selftune-sync` | `*/30 * * * *` | Every 30 minutes | Sync source-truth telemetry |
62
- | `selftune-status` | `0 8 * * *` | Daily at 8am | Health check — report skills with pass rate below 80% |
63
- | `selftune-orchestrate` | `0 */6 * * *` | Every 6 hours | Full autonomous loop: sync → candidate selection → evolve → watch |
59
+ | Name | Cron Expression | Schedule | Description |
60
+ | ---------------------- | --------------- | ---------------- | ----------------------------------------------------------------- |
61
+ | `selftune-sync` | `*/30 * * * *` | Every 30 minutes | Sync source-truth telemetry |
62
+ | `selftune-status` | `0 8 * * *` | Daily at 8am | Health check — report skills with pass rate below 80% |
63
+ | `selftune-orchestrate` | `0 */6 * * *` | Every 6 hours | Full autonomous loop: sync → candidate selection → evolve → watch |
64
64
 
65
65
  All jobs run in **isolated session** mode — each execution gets a clean
66
66
  session with no context accumulation from previous runs.
@@ -79,6 +79,7 @@ session with no context accumulation from previous runs.
79
79
  3. Verify with `selftune status` after the first scheduled run fires
80
80
 
81
81
  For OpenClaw specifically:
82
+
82
83
  1. Run `selftune cron setup --platform openclaw --dry-run` to preview
83
84
  2. Run `selftune cron setup --platform openclaw` to register jobs
84
85
  3. Run `selftune cron list` to verify jobs are registered
@@ -111,15 +112,15 @@ interactive mode is for user-directed improvements.
111
112
 
112
113
  ## Safety Controls
113
114
 
114
- | Control | How It Works |
115
- |---------|-------------|
116
- | Dry-run first | `selftune cron setup --dry-run` previews commands before installing |
117
- | Regression threshold | Evolution only deploys if improvement exceeds 5% on existing triggers |
118
- | Auto-rollback | `selftune watch` automatically rolls back if pass rate drops below baseline minus threshold |
119
- | Audit trail | Every evolution recorded in `evolution_audit_log.jsonl` with full history |
120
- | SKILL.md backup | `.bak` file created before every deploy — primary rollback path exists via .bak; fallback depends on audit metadata integrity |
121
- | Human override | `selftune evolve rollback --skill <name> --skill-path <path>` available anytime to manually revert |
122
- | Pin descriptions | Config flag to freeze specific skills and prevent evolution on sensitive skills |
115
+ | Control | How It Works |
116
+ | -------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
117
+ | Dry-run first | `selftune cron setup --dry-run` previews commands before installing |
118
+ | Regression threshold | Evolution only deploys if improvement exceeds 5% on existing triggers |
119
+ | Auto-rollback | `selftune watch` automatically rolls back if pass rate drops below baseline minus threshold |
120
+ | Audit trail | Every evolution recorded in `evolution_audit_log.jsonl` with full history |
121
+ | SKILL.md backup | `.bak` file created before every deploy — primary rollback path exists via .bak; fallback depends on audit metadata integrity |
122
+ | Human override | `selftune evolve rollback --skill <name> --skill-path <path>` available anytime to manually revert |
123
+ | Pin descriptions | Config flag to freeze specific skills and prevent evolution on sensitive skills |
123
124
 
124
125
  ## Common Patterns
125
126
 
@@ -19,11 +19,11 @@ generate JSONL from SQLite for debugging or offline analysis.
19
19
 
20
20
  ## Options
21
21
 
22
- | Flag | Description | Default |
23
- |------|-------------|---------|
24
- | `--port <port>` | Custom port for the server | 3141 |
25
- | `--no-open` | Start server without opening browser | Off |
26
- | `--serve` | *(Deprecated)* Alias for default behavior | — |
22
+ | Flag | Description | Default |
23
+ | --------------- | ----------------------------------------- | ------- |
24
+ | `--port <port>` | Custom port for the server | 3141 |
25
+ | `--no-open` | Start server without opening browser | Off |
26
+ | `--serve` | _(Deprecated)_ Alias for default behavior | — |
27
27
 
28
28
  Note: `--export` and `--out` were removed. The CLI will error if used,
29
29
  suggesting `selftune dashboard` instead.
@@ -37,18 +37,18 @@ override.
37
37
 
38
38
  ### Endpoints
39
39
 
40
- | Method | Path | Description |
41
- |--------|------|-------------|
42
- | `GET` | `/` | Serve dashboard SPA shell |
43
- | `GET` | `/api/v2/overview` | SQLite-backed overview payload |
44
- | `GET` | `/api/v2/skills/:name` | SQLite-backed per-skill report |
45
- | `GET` | `/api/v2/orchestrate-runs` | Recent orchestrate run reports |
46
- | `GET` | `/api/v2/doctor` | System health diagnostics (config, logs, hooks, evolution) |
47
- | `GET` | `/api/v2/events` | SSE stream for live dashboard updates |
48
- | `GET` | `/api/health` | Dashboard server health probe |
49
- | `POST` | `/api/actions/watch` | Trigger `selftune watch` for a skill |
50
- | `POST` | `/api/actions/evolve` | Trigger `selftune evolve` for a skill |
51
- | `POST` | `/api/actions/rollback` | Trigger `selftune evolve rollback` for a skill |
40
+ | Method | Path | Description |
41
+ | ------ | -------------------------- | ---------------------------------------------------------- |
42
+ | `GET` | `/` | Serve dashboard SPA shell |
43
+ | `GET` | `/api/v2/overview` | SQLite-backed overview payload |
44
+ | `GET` | `/api/v2/skills/:name` | SQLite-backed per-skill report |
45
+ | `GET` | `/api/v2/orchestrate-runs` | Recent orchestrate run reports |
46
+ | `GET` | `/api/v2/doctor` | System health diagnostics (config, logs, hooks, evolution) |
47
+ | `GET` | `/api/v2/events` | SSE stream for live dashboard updates |
48
+ | `GET` | `/api/health` | Dashboard server health probe |
49
+ | `POST` | `/api/actions/watch` | Trigger `selftune watch` for a skill |
50
+ | `POST` | `/api/actions/evolve` | Trigger `selftune evolve` for a skill |
51
+ | `POST` | `/api/actions/rollback` | Trigger `selftune evolve rollback` for a skill |
52
52
 
53
53
  ### Live Updates (SSE)
54
54
 
@@ -110,16 +110,16 @@ database and stops the server.
110
110
 
111
111
  The dashboard displays data from these sources:
112
112
 
113
- | Data | Source | Description |
114
- |------|--------|-------------|
115
- | Telemetry | SQLite (`~/.selftune/selftune.db`) | Session-level telemetry records |
116
- | Skills | SQLite (`~/.selftune/selftune.db`) | Skill activation and usage events |
117
- | Queries | SQLite (`~/.selftune/selftune.db`) | All user queries across sessions |
118
- | Evolution | SQLite (`~/.selftune/selftune.db`) | Evolution audit trail (create, deploy, rollback) |
119
- | Decisions | `~/.selftune/memory/` | Evolution decision records |
120
- | Snapshots | Computed | Per-skill monitoring snapshots (pass rate, regression status) |
121
- | Unmatched | Computed | Queries that did not trigger any skill |
122
- | Pending | Computed | Evolution proposals not yet deployed, rejected, or rolled back |
113
+ | Data | Source | Description |
114
+ | --------- | ---------------------------------- | -------------------------------------------------------------- |
115
+ | Telemetry | SQLite (`~/.selftune/selftune.db`) | Session-level telemetry records |
116
+ | Skills | SQLite (`~/.selftune/selftune.db`) | Skill activation and usage events |
117
+ | Queries | SQLite (`~/.selftune/selftune.db`) | All user queries across sessions |
118
+ | Evolution | SQLite (`~/.selftune/selftune.db`) | Evolution audit trail (create, deploy, rollback) |
119
+ | Decisions | `~/.selftune/memory/` | Evolution decision records |
120
+ | Snapshots | Computed | Per-skill monitoring snapshots (pass rate, regression status) |
121
+ | Unmatched | Computed | Queries that did not trigger any skill |
122
+ | Pending | Computed | Evolution proposals not yet deployed, rejected, or rolled back |
123
123
 
124
124
  If no log data is found, the server reports an error listing the
125
125
  checked file paths.
@@ -142,21 +142,26 @@ to trigger watch, evolve, or rollback directly from the dashboard.
142
142
  ## Common Patterns
143
143
 
144
144
  **User wants to see skill performance visually**
145
+
145
146
  > Run `selftune dashboard`. This opens a browser with a point-in-time snapshot.
146
147
  > Report to the user that the dashboard is open.
147
148
 
148
149
  **User wants live monitoring**
150
+
149
151
  > Run `selftune dashboard`. The server provides real-time updates via SSE
150
152
  > (~1 second latency).
151
153
 
152
154
  **Dashboard shows no data**
155
+
153
156
  > Run `selftune doctor` to verify hooks are installed. If hooks are missing,
154
157
  > route to the Initialize workflow. If hooks are present but no sessions
155
158
  > have run, inform the user that sessions must generate telemetry first.
156
159
 
157
160
  **User wants a different port**
161
+
158
162
  > Run `selftune dashboard --port <port>`. Port must be 1-65535.
159
163
 
160
164
  **User wants to trigger actions from the dashboard**
165
+
161
166
  > Run `selftune dashboard`. The dashboard provides action buttons for
162
167
  > watch, evolve, and rollback per skill via POST endpoints.
@@ -96,47 +96,47 @@ or queue checks when alpha is configured:
96
96
 
97
97
  ### Config Check
98
98
 
99
- | Check name | What it validates |
100
- |------------|-------------------|
101
- | `config` | `~/.selftune/config.json` exists, is valid JSON, contains `agent_type` and `llm_mode` fields |
99
+ | Check name | What it validates |
100
+ | ---------- | -------------------------------------------------------------------------------------------- |
101
+ | `config` | `~/.selftune/config.json` exists, is valid JSON, contains `agent_type` and `llm_mode` fields |
102
102
 
103
103
  ### Log Checks (4 checks)
104
104
 
105
- | Check name | What it validates |
106
- |------------|-------------------|
105
+ | Check name | What it validates |
106
+ | ----------------------- | ----------------------------------------------------- |
107
107
  | `log_session_telemetry` | `session_telemetry_log.jsonl` exists and is parseable |
108
- | `log_skill_usage` | `skill_usage_log.jsonl` exists and is parseable |
109
- | `log_all_queries` | `all_queries_log.jsonl` exists and is parseable |
110
- | `log_evolution_audit` | `evolution_audit_log.jsonl` exists and is parseable |
108
+ | `log_skill_usage` | `skill_usage_log.jsonl` exists and is parseable |
109
+ | `log_all_queries` | `all_queries_log.jsonl` exists and is parseable |
110
+ | `log_evolution_audit` | `evolution_audit_log.jsonl` exists and is parseable |
111
111
 
112
112
  ### Hook Check
113
113
 
114
- | Check name | What it validates |
115
- |------------|-------------------|
114
+ | Check name | What it validates |
115
+ | --------------- | ------------------------------------------------------- |
116
116
  | `hook_settings` | `~/.claude/settings.json` has selftune hooks configured |
117
117
 
118
118
  ### Evolution Check
119
119
 
120
- | Check name | What it validates |
121
- |------------|-------------------|
120
+ | Check name | What it validates |
121
+ | ----------------- | ------------------------------------------------ |
122
122
  | `evolution_audit` | Evolution audit log entries have valid structure |
123
123
 
124
124
  ### Integrity Check
125
125
 
126
- | Check name | What it validates |
127
- |------------|-------------------|
126
+ | Check name | What it validates |
127
+ | -------------------------- | ------------------------------------------------------------------------------------------------------------- |
128
128
  | `dashboard_freshness_mode` | Warns when the dashboard still relies on legacy JSONL watcher invalidation instead of SQLite WAL live refresh |
129
129
 
130
130
  ### Skill Version Sync Check
131
131
 
132
- | Check name | What it validates |
133
- |------------|-------------------|
132
+ | Check name | What it validates |
133
+ | -------------------- | --------------------------------------------------------- |
134
134
  | `skill_version_sync` | SKILL.md frontmatter version matches package.json version |
135
135
 
136
136
  ### Version Check
137
137
 
138
- | Check name | What it validates |
139
- |------------|-------------------|
138
+ | Check name | What it validates |
139
+ | -------------------- | ------------------------------------------------ |
140
140
  | `version_up_to_date` | Installed version matches latest on npm registry |
141
141
 
142
142
  ## Steps
@@ -155,15 +155,15 @@ Parse the JSON output. If `healthy: true`, selftune is fully operational.
155
155
 
156
156
  For each failed check, take the appropriate action:
157
157
 
158
- | Failed check | Fix |
159
- |-------------|-----|
160
- | `config` | Run `selftune init` (or `selftune init --force` to regenerate). |
161
- | `log_*` | Run a session to generate initial log entries. Check hook installation with `selftune init`. |
162
- | `hook_settings` | Run `selftune init` to install hooks into `~/.claude/settings.json`. |
163
- | `evolution_audit` | Remove corrupted entries. Future operations will append clean entries. |
158
+ | Failed check | Fix |
159
+ | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
160
+ | `config` | Run `selftune init` (or `selftune init --force` to regenerate). |
161
+ | `log_*` | Run a session to generate initial log entries. Check hook installation with `selftune init`. |
162
+ | `hook_settings` | Run `selftune init` to install hooks into `~/.claude/settings.json`. |
163
+ | `evolution_audit` | Remove corrupted entries. Future operations will append clean entries. |
164
164
  | `dashboard_freshness_mode` | This is an operator warning, not a broken install. Expect possible freshness gaps for SQLite-only writes and export before destructive recovery. |
165
- | `skill_version_sync` | Run `bun run sync-version` to stamp SKILL.md from package.json. |
166
- | `version_up_to_date` | Run `npm install -g selftune` to update. |
165
+ | `skill_version_sync` | Run `bun run sync-version` to stamp SKILL.md from package.json. |
166
+ | `version_up_to_date` | Run `npm install -g selftune` to update. |
167
167
 
168
168
  ### 4. Re-run Doctor
169
169
 
@@ -181,6 +181,7 @@ for root cause analysis.
181
181
  **Symptoms:** `selftune status` shows alpha upload as "not enrolled" or "enrolled (missing credential)"
182
182
 
183
183
  **Diagnostic steps:**
184
+
184
185
  1. Check `selftune status` — look at "Alpha Upload" and "Cloud link" lines
185
186
  2. If `doctor` includes a `cloud_link` or alpha queue warning, prefer `.checks[].guidance.next_command`
186
187
  3. If "not enrolled" or "not linked": run `selftune init --alpha --alpha-email <email>` (opens browser for device-code auth)
@@ -192,23 +193,28 @@ for root cause analysis.
192
193
  ## Common Patterns
193
194
 
194
195
  **User reports something seems broken**
196
+
195
197
  > Run `selftune doctor`. Parse the JSON output for failed checks. Report
196
198
  > each failure's `name` and `message` to the user with the recommended fix.
197
199
 
198
200
  **User asks if hooks are working**
201
+
199
202
  > Run `selftune doctor`. Parse `.checks[]` for hook-related entries. If
200
203
  > hooks pass but no data appears, verify hook script paths in
201
204
  > `~/.claude/settings.json` point to actual files.
202
205
 
203
206
  **No telemetry data available**
207
+
204
208
  > Run `selftune doctor`. Route fixes by platform:
209
+ >
205
210
  > - **Claude Code** — route to the Initialize workflow to install hooks
206
211
  > - **Codex** — run `selftune ingest codex` or `selftune ingest wrap-codex`
207
212
  > - **OpenCode** — run `selftune ingest opencode`
208
213
  > - **OpenClaw** — run `selftune ingest openclaw`
209
- > At least one session must complete after setup to generate telemetry.
214
+ > At least one session must complete after setup to generate telemetry.
210
215
 
211
216
  **User asks to check selftune health**
217
+
212
218
  > Run `selftune doctor`. Parse `.healthy` and `.summary`. If `healthy: true`,
213
219
  > report that selftune is fully operational. If false, report failed checks
214
220
  > and recommended fixes.
@@ -7,6 +7,7 @@ its invocation type.
7
7
  ## When to Invoke
8
8
 
9
9
  Invoke this workflow when the user requests any of the following:
10
+
10
11
  - Generating eval sets or test data for a skill
11
12
  - Checking which skills are undertriggering
12
13
  - Viewing skill telemetry or usage stats
@@ -21,22 +22,22 @@ selftune eval generate --skill <name> [options]
21
22
 
22
23
  ## Options
23
24
 
24
- | Flag | Description | Default |
25
- |------|-------------|---------|
26
- | `--skill <name>` | Skill to generate evals for | Required (unless `--list-skills`) |
27
- | `--list-skills` | List all logged skills with query counts | Off |
28
- | `--stats` | Show aggregate telemetry stats for the skill | Off |
29
- | `--max <n>` | Maximum eval entries per side | 50 |
30
- | `--seed <n>` | Seed for deterministic shuffling | 42 |
31
- | `--output <path>` / `--out <path>` | Output file path | `{skillName}_trigger_eval.json` |
32
- | `--no-negatives` | Exclude negative examples from output | Off |
33
- | `--no-taxonomy` | Skip invocation_type classification | Off |
34
- | `--skill-log <path>` | Path to skill_usage_log.jsonl | Default log path |
35
- | `--query-log <path>` | Path to all_queries_log.jsonl | Default log path |
36
- | `--telemetry-log <path>` | Path to session_telemetry_log.jsonl | Default log path |
37
- | `--synthetic` | Generate evals from SKILL.md via LLM (no logs needed) | Off |
38
- | `--skill-path <path>` | Path to SKILL.md (required with `--synthetic`) | — |
39
- | `--model <model>` | LLM model to use for synthetic generation | Agent default |
25
+ | Flag | Description | Default |
26
+ | ---------------------------------- | ----------------------------------------------------- | --------------------------------- |
27
+ | `--skill <name>` | Skill to generate evals for | Required (unless `--list-skills`) |
28
+ | `--list-skills` | List all logged skills with query counts | Off |
29
+ | `--stats` | Show aggregate telemetry stats for the skill | Off |
30
+ | `--max <n>` | Maximum eval entries per side | 50 |
31
+ | `--seed <n>` | Seed for deterministic shuffling | 42 |
32
+ | `--output <path>` / `--out <path>` | Output file path | `{skillName}_trigger_eval.json` |
33
+ | `--no-negatives` | Exclude negative examples from output | Off |
34
+ | `--no-taxonomy` | Skip invocation_type classification | Off |
35
+ | `--skill-log <path>` | Path to skill_usage_log.jsonl | Default log path |
36
+ | `--query-log <path>` | Path to all_queries_log.jsonl | Default log path |
37
+ | `--telemetry-log <path>` | Path to session_telemetry_log.jsonl | Default log path |
38
+ | `--synthetic` | Generate evals from SKILL.md via LLM (no logs needed) | Off |
39
+ | `--skill-path <path>` | Path to SKILL.md (required with `--synthetic`) | — |
40
+ | `--model <model>` | LLM model to use for synthetic generation | Agent default |
40
41
 
41
42
  ## Output Format
42
43
 
@@ -126,6 +127,7 @@ selftune eval generate --skill pptx --synthetic --skill-path /path/to/skills/ppt
126
127
  ```
127
128
 
128
129
  The command:
130
+
129
131
  1. Reads the SKILL.md file content
130
132
  2. Loads real user queries from the database (if available) as few-shot style examples so synthetic queries match real phrasing patterns
131
133
  3. Sends skill content and real examples to an LLM with a prompt requesting realistic test queries
@@ -155,6 +157,7 @@ selftune eval generate --skill pptx --max 50 --output evals-pptx.json
155
157
  ```
156
158
 
157
159
  The command:
160
+
158
161
  1. Reads positive triggers from `skill_usage_log.jsonl`
159
162
  2. Reads all queries from `all_queries_log.jsonl`
160
163
  3. Identifies queries that should have triggered but did not
@@ -181,40 +184,36 @@ If the user responds with "use defaults" or similar shorthand, skip to step 1 us
181
184
 
182
185
  For `--list-skills` or `--stats` requests, skip pre-flight entirely — these are read-only operations.
183
186
 
184
- Use `AskUserQuestion` with these questions:
185
-
186
- ```json
187
- {
188
- "questions": [
189
- {
190
- "question": "Generation Mode",
191
- "options": ["Log-based — build from real usage logs (recommended if logs exist)", "Synthetic — generate from SKILL.md via LLM (for new skills)"]
192
- },
193
- {
194
- "question": "Model (for synthetic mode)",
195
- "options": ["Fast (haiku) — quick generation", "Balanced (sonnet) — better diversity (recommended)", "Best (opus) — highest quality"]
196
- },
197
- {
198
- "question": "Max Entries",
199
- "options": ["50 (default)", "25 (quick)", "100 (comprehensive)"]
200
- }
201
- ]
202
- }
203
- ```
204
-
205
- If `AskUserQuestion` is not available, fall back to presenting these as inline numbered options.
187
+ Ask one `AskUserQuestion` at a time in this order:
188
+
189
+ 1. `Generation Mode`
190
+ Options:
191
+ - `Log-based — build from real usage logs (recommended if logs exist)`
192
+ - `Synthetic — generate from SKILL.md via LLM (for new skills)`
193
+ 2. If the user chose synthetic, ask `Model (for synthetic mode)`
194
+ Options:
195
+ - `Fast (haiku) — quick generation`
196
+ - `Balanced (sonnet) — better diversity (recommended)`
197
+ - `Best (opus) highest quality`
198
+ 3. Ask `Max Entries`
199
+ Options:
200
+ - `50 (default)`
201
+ - `25 (quick)`
202
+ - `100 (comprehensive)`
203
+
204
+ If `AskUserQuestion` is not available or Claude does not invoke it, fall back to presenting the same choices as inline numbered options.
206
205
 
207
206
  After the user responds, parse their selections and map each choice to the corresponding CLI flags:
208
207
 
209
- | Selection | CLI Flag |
210
- |-----------|----------|
211
- | 1a (log-based) | _(no flag, default)_ |
212
- | 1b (synthetic) | `--synthetic --skill-path <path>` |
213
- | Custom max entries | `--max <value>` |
214
- | 4a (haiku) | `--model haiku` (resolved internally by selftune) |
215
- | 4b (sonnet) | `--model sonnet` |
216
- | 4c (opus) | `--model opus` |
217
- | Custom output path | `--out <path>` |
208
+ | Selection | CLI Flag |
209
+ | ------------------ | ------------------------------------------------- |
210
+ | 1a (log-based) | _(no flag, default)_ |
211
+ | 1b (synthetic) | `--synthetic --skill-path <path>` |
212
+ | Custom max entries | `--max <value>` |
213
+ | 4a (haiku) | `--model haiku` (resolved internally by selftune) |
214
+ | 4b (sonnet) | `--model sonnet` |
215
+ | 4c (opus) | `--model opus` |
216
+ | Custom output path | `--out <path>` |
218
217
 
219
218
  Show a confirmation summary to the user:
220
219
 
@@ -238,6 +237,7 @@ eval generation is useful.
238
237
  ### 2. Generate the Eval Set
239
238
 
240
239
  Run with `--skill <name>`. Parse the JSON output and review for:
240
+
241
241
  - Balance between positive and negative entries
242
242
  - Coverage of all three positive invocation types (explicit, implicit, contextual)
243
243
  - Reasonable negative examples (keyword overlap but wrong intent)
@@ -245,6 +245,7 @@ Run with `--skill <name>`. Parse the JSON output and review for:
245
245
  ### 3. Review Invocation Type Distribution
246
246
 
247
247
  A healthy eval set has:
248
+
248
249
  - Some explicit queries (easy baseline)
249
250
  - Many implicit queries (natural usage)
250
251
  - Several contextual queries (real-world usage)
@@ -35,26 +35,29 @@ rolled back.
35
35
  # Selftune Context
36
36
 
37
37
  ## Active Evolutions
38
+
38
39
  - pptx: deployed -- Added implicit triggers for slide deck queries
39
40
  - csv-parser: regression -- pass_rate=0.65, baseline=0.88
40
41
 
41
42
  ## Known Issues
43
+
42
44
  - Regression detected for csv-parser: pass_rate=0.65 below baseline=0.88
43
45
 
44
46
  ## Last Updated
47
+
45
48
  2026-03-01T14:00:00.000Z
46
49
  ```
47
50
 
48
51
  **Status values:**
49
52
 
50
- | Status | Meaning |
51
- |--------|---------|
52
- | `deployed` | Evolution was deployed successfully |
53
- | `failed` | Evolution attempted but did not deploy |
54
- | `regression` | Watch detected a regression in pass rate |
55
- | `healthy` | Watch confirmed pass rate is within threshold |
56
- | `rolled-back` | Rollback completed successfully |
57
- | `rollback-failed` | Rollback was attempted but failed |
53
+ | Status | Meaning |
54
+ | ----------------- | --------------------------------------------- |
55
+ | `deployed` | Evolution was deployed successfully |
56
+ | `failed` | Evolution attempted but did not deploy |
57
+ | `regression` | Watch detected a regression in pass rate |
58
+ | `healthy` | Watch confirmed pass rate is within threshold |
59
+ | `rolled-back` | Rollback completed successfully |
60
+ | `rollback-failed` | Rollback was attempted but failed |
58
61
 
59
62
  ### 2. plan.md -- Current Priorities
60
63
 
@@ -66,13 +69,16 @@ Records evolution priorities and strategy.
66
69
  # Evolution Plan
67
70
 
68
71
  ## Current Priorities
72
+
69
73
  1. Improve csv-parser implicit trigger coverage
70
74
  2. Re-evolve pptx after eval set expansion
71
75
 
72
76
  ## Strategy
77
+
73
78
  Focus on skills with highest session volume first.
74
79
 
75
80
  ## Last Updated
81
+
76
82
  2026-03-01T14:00:00.000Z
77
83
  ```
78
84
 
@@ -85,6 +91,7 @@ only appended.
85
91
 
86
92
  ```markdown
87
93
  ## 2026-03-01T14:00:00.000Z -- evolve
94
+
88
95
  - **Skill:** pptx
89
96
  - **Action:** evolved
90
97
  - **Rationale:** Missed implicit triggers for slide deck queries
@@ -95,14 +102,14 @@ only appended.
95
102
 
96
103
  Each entry contains:
97
104
 
98
- | Field | Description |
99
- |-------|-------------|
100
- | Timestamp | ISO 8601 timestamp in the `##` heading |
101
- | Action type | `evolve`, `rollback`, or `watch` in the heading |
102
- | Skill | The skill name |
103
- | Action | Past-tense result: `evolved`, `rolled-back`, or `watched` |
104
- | Rationale | Why the action was taken |
105
- | Result | What happened |
105
+ | Field | Description |
106
+ | ----------- | --------------------------------------------------------- |
107
+ | Timestamp | ISO 8601 timestamp in the `##` heading |
108
+ | Action type | `evolve`, `rollback`, or `watch` in the heading |
109
+ | Skill | The skill name |
110
+ | Action | Past-tense result: `evolved`, `rolled-back`, or `watched` |
111
+ | Rationale | Why the action was taken |
112
+ | Result | What happened |
106
113
 
107
114
  Entries are separated by `---` markers.
108
115
 
@@ -111,11 +118,11 @@ Entries are separated by `---` markers.
111
118
  Memory is updated automatically by the memory writer (`cli/selftune/memory/writer.ts`).
112
119
  No manual editing is required during normal operation.
113
120
 
114
- | Trigger | Function | Updates |
115
- |---------|----------|---------|
116
- | After evolve completes | `updateContextAfterEvolve` | context.md + decisions.md |
117
- | After rollback completes | `updateContextAfterRollback` | context.md + decisions.md |
118
- | After watch completes | `updateContextAfterWatch` | context.md + decisions.md, adds known issues on regression |
121
+ | Trigger | Function | Updates |
122
+ | ------------------------ | ---------------------------- | ---------------------------------------------------------- |
123
+ | After evolve completes | `updateContextAfterEvolve` | context.md + decisions.md |
124
+ | After rollback completes | `updateContextAfterRollback` | context.md + decisions.md |
125
+ | After watch completes | `updateContextAfterWatch` | context.md + decisions.md, adds known issues on regression |
119
126
 
120
127
  ## Reading Memory
121
128
 
@@ -142,13 +149,16 @@ They will be recreated automatically on the next evolve, watch, or rollback run.
142
149
  ## Common Patterns
143
150
 
144
151
  **"What happened in the last evolution?"**
152
+
145
153
  > Read `~/.selftune/memory/decisions.md`. The most recent entry at the bottom
146
154
  > of the file contains the last action, skill, rationale, and result.
147
155
 
148
156
  **"What's the current state?"**
157
+
149
158
  > Read `~/.selftune/memory/context.md`. The Active Evolutions section lists
150
159
  > every tracked skill and its current status.
151
160
 
152
161
  **"Memory seems stale"**
162
+
153
163
  > Delete the files in `~/.selftune/memory/` and run `selftune evolve` or
154
164
  > `selftune watch` to recreate them with fresh data.