selftune 0.2.6 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/README.md +1 -0
  2. package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +1 -0
  3. package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +15 -0
  4. package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +60 -0
  5. package/apps/local-dashboard/dist/assets/{vendor-table-B7VF2Ipl.js → vendor-table-dK1QMLq9.js} +1 -1
  6. package/apps/local-dashboard/dist/assets/{vendor-ui-r2k_Ku_V.js → vendor-ui-CO2mrx6e.js} +60 -65
  7. package/apps/local-dashboard/dist/index.html +5 -5
  8. package/cli/selftune/activation-rules.ts +57 -18
  9. package/cli/selftune/agent-guidance.ts +96 -0
  10. package/cli/selftune/alpha-identity.ts +156 -0
  11. package/cli/selftune/alpha-upload/build-payloads.ts +151 -0
  12. package/cli/selftune/alpha-upload/client.ts +113 -0
  13. package/cli/selftune/alpha-upload/flush.ts +191 -0
  14. package/cli/selftune/alpha-upload/index.ts +194 -0
  15. package/cli/selftune/alpha-upload/queue.ts +252 -0
  16. package/cli/selftune/alpha-upload/stage-canonical.ts +251 -0
  17. package/cli/selftune/alpha-upload-contract.ts +52 -0
  18. package/cli/selftune/auth/device-code.ts +110 -0
  19. package/cli/selftune/auto-update.ts +130 -0
  20. package/cli/selftune/badge/badge.ts +19 -9
  21. package/cli/selftune/canonical-export.ts +16 -3
  22. package/cli/selftune/constants.ts +28 -8
  23. package/cli/selftune/contribute/bundle.ts +33 -5
  24. package/cli/selftune/dashboard-contract.ts +32 -1
  25. package/cli/selftune/dashboard-server.ts +215 -693
  26. package/cli/selftune/dashboard.ts +1 -1
  27. package/cli/selftune/eval/baseline.ts +11 -7
  28. package/cli/selftune/eval/hooks-to-evals.ts +39 -15
  29. package/cli/selftune/eval/synthetic-evals.ts +54 -1
  30. package/cli/selftune/evolution/audit.ts +24 -19
  31. package/cli/selftune/evolution/constitutional.ts +176 -0
  32. package/cli/selftune/evolution/evidence.ts +18 -13
  33. package/cli/selftune/evolution/evolve-body.ts +104 -7
  34. package/cli/selftune/evolution/evolve.ts +195 -22
  35. package/cli/selftune/evolution/propose-body.ts +18 -1
  36. package/cli/selftune/evolution/propose-description.ts +27 -2
  37. package/cli/selftune/evolution/rollback.ts +11 -15
  38. package/cli/selftune/export.ts +84 -0
  39. package/cli/selftune/grading/auto-grade.ts +14 -4
  40. package/cli/selftune/grading/grade-session.ts +17 -6
  41. package/cli/selftune/hooks/auto-activate.ts +5 -0
  42. package/cli/selftune/hooks/evolution-guard.ts +25 -11
  43. package/cli/selftune/hooks/prompt-log.ts +23 -9
  44. package/cli/selftune/hooks/session-stop.ts +78 -15
  45. package/cli/selftune/hooks/skill-eval.ts +189 -10
  46. package/cli/selftune/index.ts +274 -2
  47. package/cli/selftune/ingestors/claude-replay.ts +48 -21
  48. package/cli/selftune/init.ts +260 -49
  49. package/cli/selftune/last.ts +7 -7
  50. package/cli/selftune/localdb/db.ts +90 -10
  51. package/cli/selftune/localdb/direct-write.ts +573 -0
  52. package/cli/selftune/localdb/materialize.ts +296 -42
  53. package/cli/selftune/localdb/queries.ts +482 -32
  54. package/cli/selftune/localdb/schema.ts +153 -1
  55. package/cli/selftune/monitoring/watch.ts +27 -8
  56. package/cli/selftune/normalization.ts +88 -15
  57. package/cli/selftune/observability.ts +257 -5
  58. package/cli/selftune/orchestrate.ts +176 -53
  59. package/cli/selftune/quickstart.ts +34 -10
  60. package/cli/selftune/repair/skill-usage.ts +15 -2
  61. package/cli/selftune/routes/actions.ts +77 -0
  62. package/cli/selftune/routes/badge.ts +66 -0
  63. package/cli/selftune/routes/doctor.ts +12 -0
  64. package/cli/selftune/routes/index.ts +14 -0
  65. package/cli/selftune/routes/orchestrate-runs.ts +13 -0
  66. package/cli/selftune/routes/overview.ts +14 -0
  67. package/cli/selftune/routes/report.ts +293 -0
  68. package/cli/selftune/routes/skill-report.ts +230 -0
  69. package/cli/selftune/status.ts +203 -7
  70. package/cli/selftune/sync.ts +14 -1
  71. package/cli/selftune/types.ts +52 -2
  72. package/cli/selftune/utils/jsonl.ts +58 -1
  73. package/cli/selftune/utils/selftune-meta.ts +38 -0
  74. package/cli/selftune/utils/skill-log.ts +30 -4
  75. package/cli/selftune/utils/transcript.ts +15 -0
  76. package/cli/selftune/workflows/workflows.ts +7 -6
  77. package/package.json +11 -6
  78. package/packages/telemetry-contract/fixtures/complete-push.ts +184 -0
  79. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +58 -0
  80. package/packages/telemetry-contract/fixtures/golden.json +1 -0
  81. package/packages/telemetry-contract/fixtures/index.ts +4 -0
  82. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +40 -0
  83. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +79 -0
  84. package/packages/telemetry-contract/package.json +6 -1
  85. package/packages/telemetry-contract/src/schemas.ts +196 -0
  86. package/packages/telemetry-contract/src/types.ts +3 -1
  87. package/packages/telemetry-contract/src/validators.ts +3 -1
  88. package/packages/telemetry-contract/tests/compatibility.test.ts +144 -0
  89. package/packages/ui/package.json +4 -0
  90. package/packages/ui/src/components/ActivityTimeline.tsx +61 -29
  91. package/packages/ui/src/components/section-cards.tsx +31 -14
  92. package/packages/ui/src/types.ts +1 -0
  93. package/skill/SKILL.md +214 -174
  94. package/skill/Workflows/AlphaUpload.md +45 -0
  95. package/skill/Workflows/Baseline.md +18 -12
  96. package/skill/Workflows/Composability.md +3 -3
  97. package/skill/Workflows/Dashboard.md +39 -91
  98. package/skill/Workflows/Doctor.md +93 -66
  99. package/skill/Workflows/Evals.md +49 -40
  100. package/skill/Workflows/Evolve.md +76 -28
  101. package/skill/Workflows/EvolveBody.md +37 -38
  102. package/skill/Workflows/Initialize.md +145 -26
  103. package/skill/Workflows/Orchestrate.md +11 -2
  104. package/skill/Workflows/Sync.md +23 -0
  105. package/skill/Workflows/Watch.md +2 -5
  106. package/skill/agents/diagnosis-analyst.md +163 -0
  107. package/skill/agents/evolution-reviewer.md +149 -0
  108. package/skill/agents/integration-guide.md +154 -0
  109. package/skill/agents/pattern-analyst.md +149 -0
  110. package/skill/assets/multi-skill-settings.json +1 -1
  111. package/skill/assets/single-skill-settings.json +1 -1
  112. package/skill/references/interactive-config.md +39 -0
  113. package/skill/references/invocation-taxonomy.md +34 -0
  114. package/skill/references/logs.md +15 -1
  115. package/skill/references/setup-patterns.md +3 -3
  116. package/skill/settings_snippet.json +1 -1
  117. package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +0 -1
  118. package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +0 -15
  119. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +0 -60
@@ -1,8 +1,8 @@
1
1
  # selftune Dashboard Workflow
2
2
 
3
3
  Visual dashboard for selftune telemetry, skill performance, evolution
4
- audit, and monitoring data. Supports static HTML export, file output,
5
- and a live server with polling-based auto-refresh and action buttons.
4
+ audit, and monitoring data. Starts a local SPA server with SSE-based
5
+ real-time updates and action buttons.
6
6
 
7
7
  ## Default Command
8
8
 
@@ -10,58 +10,23 @@ and a live server with polling-based auto-refresh and action buttons.
10
10
  selftune dashboard
11
11
  ```
12
12
 
13
- Opens a standalone HTML dashboard in the default browser with embedded
14
- data from all selftune log files.
13
+ Starts a Bun HTTP server with a React SPA dashboard and opens it in the
14
+ default browser. The dashboard reads SQLite directly and uses WAL-based
15
+ invalidation to push live updates via Server-Sent Events (SSE).
16
+ TanStack Query polling (60s) acts as a fallback. Action buttons trigger
17
+ selftune commands directly from the dashboard. Use `selftune export` to
18
+ generate JSONL from SQLite for debugging or offline analysis.
15
19
 
16
20
  ## Options
17
21
 
18
22
  | Flag | Description | Default |
19
23
  |------|-------------|---------|
20
- | `--export` | Export data-embedded HTML to stdout | Off |
21
- | `--out FILE` | Write data-embedded HTML to FILE | None |
22
- | `--serve` | Start live dashboard server | Off |
23
- | `--port <port>` | Custom port for live server (requires `--serve`) | 3141 |
24
+ | `--port <port>` | Custom port for the server | 3141 |
25
+ | `--no-open` | Start server without opening browser | Off |
26
+ | `--serve` | *(Deprecated)* Alias for default behavior | |
24
27
 
25
- ## Modes
26
-
27
- ### Static (Default)
28
-
29
- Builds an HTML file with all telemetry data embedded as JSON, saves it
30
- to `~/.selftune/dashboard.html`, and opens it in the default browser.
31
- The data is a point-in-time snapshot -- refresh by re-running the command.
32
-
33
- ```bash
34
- selftune dashboard
35
- ```
36
-
37
- ### Export
38
-
39
- Writes the same data-embedded HTML to stdout. Useful for piping to other
40
- tools or capturing output programmatically.
41
-
42
- ```bash
43
- selftune dashboard --export > dashboard.html
44
- ```
45
-
46
- ### File
47
-
48
- Writes the data-embedded HTML to a specific file path.
49
-
50
- ```bash
51
- selftune dashboard --out /tmp/report.html
52
- ```
53
-
54
- ### Live Server
55
-
56
- Starts a Bun HTTP server with a React SPA dashboard. The SPA uses
57
- TanStack Query polling to auto-refresh data (overview every 15s,
58
- orchestrate runs every 30s, doctor every 30s) and provides action
59
- buttons to trigger selftune commands.
60
-
61
- ```bash
62
- selftune dashboard --serve
63
- selftune dashboard --serve --port 8080
64
- ```
28
+ Note: `--export` and `--out` were removed. The CLI will error if used,
29
+ suggesting `selftune dashboard` instead.
65
30
 
66
31
  ## Live Server
67
32
 
@@ -79,23 +44,23 @@ override.
79
44
  | `GET` | `/api/v2/skills/:name` | SQLite-backed per-skill report |
80
45
  | `GET` | `/api/v2/orchestrate-runs` | Recent orchestrate run reports |
81
46
  | `GET` | `/api/v2/doctor` | System health diagnostics (config, logs, hooks, evolution) |
47
+ | `GET` | `/api/v2/events` | SSE stream for live dashboard updates |
82
48
  | `GET` | `/api/health` | Dashboard server health probe |
83
49
  | `POST` | `/api/actions/watch` | Trigger `selftune watch` for a skill |
84
50
  | `POST` | `/api/actions/evolve` | Trigger `selftune evolve` for a skill |
85
51
  | `POST` | `/api/actions/rollback` | Trigger `selftune evolve rollback` for a skill |
86
52
 
87
- ### Auto-Refresh
53
+ ### Live Updates (SSE)
88
54
 
89
- The dashboard SPA uses TanStack Query with `refetchInterval` to poll
90
- the v2 API endpoints automatically:
55
+ The dashboard connects to `/api/v2/events` via Server-Sent Events.
56
+ The server watches the SQLite WAL file for changes and broadcasts an
57
+ `update` event when new data is written. The SPA invalidates all cached
58
+ queries, triggering immediate refetches (~1s latency).
91
59
 
92
- - `/api/v2/overview` every 15 seconds
93
- - `/api/v2/orchestrate-runs` every 30 seconds
94
- - `/api/v2/doctor` — every 30 seconds
95
- - `/api/v2/skills/:name` — every 30 seconds (when viewing a skill)
60
+ TanStack Query polling (60s) acts as a fallback safety net in case the
61
+ SSE connection drops. Data also refreshes on window focus.
96
62
 
97
- Data also refreshes on window focus. No SSE or websocket connection
98
- is required.
63
+ See [docs/design-docs/live-dashboard-sse.md](../../docs/design-docs/live-dashboard-sse.md) for the full design.
99
64
 
100
65
  ### Action Endpoints
101
66
 
@@ -147,45 +112,32 @@ The dashboard displays data from these sources:
147
112
 
148
113
  | Data | Source | Description |
149
114
  |------|--------|-------------|
150
- | Telemetry | `session_telemetry_log.jsonl` | Session-level telemetry records |
151
- | Skills | `skill_usage_log.jsonl` | Skill activation and usage events |
152
- | Queries | `all_queries_log.jsonl` | All user queries across sessions |
153
- | Evolution | `evolution_audit_log.jsonl` | Evolution audit trail (create, deploy, rollback) |
115
+ | Telemetry | SQLite (`~/.selftune/selftune.db`) | Session-level telemetry records |
116
+ | Skills | SQLite (`~/.selftune/selftune.db`) | Skill activation and usage events |
117
+ | Queries | SQLite (`~/.selftune/selftune.db`) | All user queries across sessions |
118
+ | Evolution | SQLite (`~/.selftune/selftune.db`) | Evolution audit trail (create, deploy, rollback) |
154
119
  | Decisions | `~/.selftune/memory/` | Evolution decision records |
155
120
  | Snapshots | Computed | Per-skill monitoring snapshots (pass rate, regression status) |
156
121
  | Unmatched | Computed | Queries that did not trigger any skill |
157
122
  | Pending | Computed | Evolution proposals not yet deployed, rejected, or rolled back |
158
123
 
159
- If no log data is found, the static modes exit with an error message
160
- listing the checked file paths.
124
+ If no log data is found, the server reports an error listing the
125
+ checked file paths.
161
126
 
162
127
  ## Steps
163
128
 
164
- ### 1. Choose Mode
165
-
166
- | Goal | Command |
167
- |------|---------|
168
- | Quick visual check | `selftune dashboard` |
169
- | Save report to file | `selftune dashboard --out report.html` |
170
- | Pipe to another tool | `selftune dashboard --export` |
171
- | Live monitoring | `selftune dashboard --serve` |
172
-
173
- ### 2. Run Command
129
+ ### 1. Run Dashboard
174
130
 
175
131
  ```bash
176
- # Static (opens browser)
177
132
  selftune dashboard
178
-
179
- # Live server
180
- selftune dashboard --serve
133
+ selftune dashboard --port 8080
134
+ selftune dashboard --no-open
181
135
  ```
182
136
 
183
- ### 3. Interact with Dashboard
137
+ ### 2. Interact with Dashboard
184
138
 
185
- - **Static mode**: View the snapshot. Re-run to refresh.
186
- - **Live mode**: Data refreshes automatically via polling (15-30s intervals).
187
- Use action buttons to trigger watch, evolve, or rollback directly from
188
- the dashboard.
139
+ Data refreshes in real time via SSE (~1s latency). Use action buttons
140
+ to trigger watch, evolve, or rollback directly from the dashboard.
189
141
 
190
142
  ## Common Patterns
191
143
 
@@ -194,12 +146,8 @@ selftune dashboard --serve
194
146
  > Report to the user that the dashboard is open.
195
147
 
196
148
  **User wants live monitoring**
197
- > Run `selftune dashboard --serve`. Inform the user that data refreshes
198
- > automatically every 15-30 seconds via polling.
199
-
200
- **User wants a shareable report**
201
- > Run `selftune dashboard --out report.html`. Report the file path to the
202
- > user. The HTML file is self-contained with all data embedded.
149
+ > Run `selftune dashboard`. The server provides real-time updates via SSE
150
+ > (~1 second latency).
203
151
 
204
152
  **Dashboard shows no data**
205
153
  > Run `selftune doctor` to verify hooks are installed. If hooks are missing,
@@ -207,8 +155,8 @@ selftune dashboard --serve
207
155
  > have run, inform the user that sessions must generate telemetry first.
208
156
 
209
157
  **User wants a different port**
210
- > Run `selftune dashboard --serve --port <port>`. Port must be 1-65535.
158
+ > Run `selftune dashboard --port <port>`. Port must be 1-65535.
211
159
 
212
160
  **User wants to trigger actions from the dashboard**
213
- > Run `selftune dashboard --serve` for live mode. The dashboard provides
214
- > action buttons for watch, evolve, and rollback per skill via POST endpoints.
161
+ > Run `selftune dashboard`. The dashboard provides action buttons for
162
+ > watch, evolve, and rollback per skill via POST endpoints.
@@ -17,34 +17,57 @@ None. Doctor runs all checks unconditionally.
17
17
 
18
18
  ```json
19
19
  {
20
- "healthy": true,
20
+ "command": "doctor",
21
+ "timestamp": "2026-02-28T10:00:00Z",
21
22
  "checks": [
22
23
  {
23
- "name": "session_telemetry_log exists",
24
+ "name": "config",
25
+ "path": "/Users/you/.selftune/config.json",
24
26
  "status": "pass",
25
- "detail": "Found 142 entries"
27
+ "message": "Valid config with agent_type and llm_mode"
26
28
  },
27
29
  {
28
- "name": "skill_usage_log parseable",
30
+ "name": "log_session_telemetry",
31
+ "path": "/Users/you/.claude/session_telemetry_log.jsonl",
29
32
  "status": "pass",
30
- "detail": "All 89 entries valid JSON"
33
+ "message": "Found 142 entries"
31
34
  },
32
35
  {
33
- "name": "hooks installed",
36
+ "name": "hook_settings",
37
+ "path": "/Users/you/.claude/settings.json",
34
38
  "status": "fail",
35
- "detail": "PostToolUse hook not found in ~/.claude/settings.json"
39
+ "message": "PostToolUse hook not found in ~/.claude/settings.json"
40
+ },
41
+ {
42
+ "name": "dashboard_freshness_mode",
43
+ "status": "pass",
44
+ "message": "Dashboard reads SQLite and watches WAL for live updates"
36
45
  }
37
46
  ],
38
47
  "summary": {
39
- "passed": 5,
40
- "failed": 1,
41
- "total": 6
42
- }
48
+ "pass": 9,
49
+ "fail": 1,
50
+ "warn": 0,
51
+ "total": 10
52
+ },
53
+ "healthy": false
43
54
  }
44
55
  ```
45
56
 
46
57
  The process exits with code 0 if `healthy: true`, code 1 otherwise.
47
58
 
59
+ Failed or warning checks may include a machine-readable `guidance` object:
60
+
61
+ ```json
62
+ {
63
+ "code": "config_missing",
64
+ "message": "selftune is not initialized yet.",
65
+ "next_command": "selftune init",
66
+ "suggested_commands": ["selftune doctor"],
67
+ "blocking": true
68
+ }
69
+ ```
70
+
48
71
  ## Parsing Instructions
49
72
 
50
73
  ### Check Overall Health
@@ -57,69 +80,64 @@ The process exits with code 0 if `healthy: true`, code 1 otherwise.
57
80
  ### Find Failed Checks
58
81
 
59
82
  ```bash
60
- # Parse: .checks[] | select(.status == "fail") | { name, detail }
83
+ # Parse: .checks[] | select(.status == "fail") | { name, message }
61
84
  ```
62
85
 
63
86
  ### Get Summary Counts
64
87
 
65
88
  ```bash
66
- # Parse: .summary.passed, .summary.failed, .summary.total
89
+ # Parse: .summary.pass, .summary.fail, .summary.warn, .summary.total
67
90
  ```
68
91
 
69
92
  ## Health Checks
70
93
 
71
- Doctor validates these areas:
94
+ Doctor validates these baseline areas (10 checks total), and adds alpha cloud-link
95
+ or queue checks when alpha is configured:
72
96
 
73
- ### Log File Checks
97
+ ### Config Check
74
98
 
75
- | Check | What it validates |
76
- |-------|-------------------|
77
- | Log files exist | `session_telemetry_log.jsonl`, `skill_usage_log.jsonl`, `all_queries_log.jsonl` exist in `~/.claude/` |
78
- | Logs are parseable | Every line in each log file is valid JSON |
79
- | Schema conformance | Required fields present per log type (see `references/logs.md`) |
99
+ | Check name | What it validates |
100
+ |------------|-------------------|
101
+ | `config` | `~/.selftune/config.json` exists, is valid JSON, contains `agent_type` and `llm_mode` fields |
80
102
 
81
- ### Hook Checks
103
+ ### Log Checks (4 checks)
82
104
 
83
- | Check | What it validates |
84
- |-------|-------------------|
85
- | Hooks installed | `UserPromptSubmit`, `PreToolUse`, `PostToolUse`, and `Stop` hooks are configured in `~/.claude/settings.json` |
86
- | Hook scripts exist | The script files referenced by hooks exist on disk |
87
- | Auto-activate hook | `hooks/auto-activate.ts` is registered in `UserPromptSubmit` and the file is executable |
88
- | Evolution guard hook | `hooks/evolution-guard.ts` is registered in `PreToolUse` and the file exists |
105
+ | Check name | What it validates |
106
+ |------------|-------------------|
107
+ | `log_session_telemetry` | `session_telemetry_log.jsonl` exists and is parseable |
108
+ | `log_skill_usage` | `skill_usage_log.jsonl` exists and is parseable |
109
+ | `log_all_queries` | `all_queries_log.jsonl` exists and is parseable |
110
+ | `log_evolution_audit` | `evolution_audit_log.jsonl` exists and is parseable |
89
111
 
90
- ### Memory Checks
112
+ ### Hook Check
91
113
 
92
- | Check | What it validates |
93
- |-------|-------------------|
94
- | Memory directory exists | `~/.selftune/memory/` directory is present |
95
- | Memory files valid | `context.md`, `decisions.md`, `plan.md` exist and are non-empty (if previously written) |
114
+ | Check name | What it validates |
115
+ |------------|-------------------|
116
+ | `hook_settings` | `~/.claude/settings.json` has selftune hooks configured |
96
117
 
97
- ### Activation Rules Checks
118
+ ### Evolution Check
98
119
 
99
- | Check | What it validates |
100
- |-------|-------------------|
101
- | Rules file exists | `~/.selftune/activation-rules.json` is present |
102
- | Rules file valid | The file contains valid JSON conforming to the activation rules schema |
120
+ | Check name | What it validates |
121
+ |------------|-------------------|
122
+ | `evolution_audit` | Evolution audit log entries have valid structure |
103
123
 
104
- ### Agent Checks
124
+ ### Integrity Check
105
125
 
106
- | Check | What it validates |
107
- |-------|-------------------|
108
- | Optional agent directory exists | If `.claude/agents/` is present, it is readable |
109
- | Optional agent files present | If the repo bundles helper agents, the expected files are present |
126
+ | Check name | What it validates |
127
+ |------------|-------------------|
128
+ | `dashboard_freshness_mode` | Warns when the dashboard still relies on legacy JSONL watcher invalidation instead of SQLite WAL live refresh |
110
129
 
111
- ### Dashboard Checks (optional)
130
+ ### Skill Version Sync Check
112
131
 
113
- | Check | What it validates |
114
- |-------|-------------------|
115
- | Dashboard server accessible | `dashboard-server.ts` exists in the CLI directory |
132
+ | Check name | What it validates |
133
+ |------------|-------------------|
134
+ | `skill_version_sync` | SKILL.md frontmatter version matches package.json version |
116
135
 
117
- ### Evolution Audit Checks
136
+ ### Version Check
118
137
 
119
- | Check | What it validates |
120
- |-------|-------------------|
121
- | Audit log integrity | `evolution_audit_log.jsonl` entries have required fields (`timestamp`, `proposal_id`, `action`) |
122
- | Valid action values | All entries use known action types: `created`, `validated`, `deployed`, `rolled_back` |
138
+ | Check name | What it validates |
139
+ |------------|-------------------|
140
+ | `version_up_to_date` | Installed version matches latest on npm registry |
123
141
 
124
142
  ## Steps
125
143
 
@@ -139,18 +157,13 @@ For each failed check, take the appropriate action:
139
157
 
140
158
  | Failed check | Fix |
141
159
  |-------------|-----|
142
- | Log files missing | Run a session to generate initial log entries. Check hook installation. |
143
- | Logs not parseable | Inspect the corrupted log file. Remove or fix invalid lines. |
144
- | Hooks not installed | Merge `skill/settings_snippet.json` into `~/.claude/settings.json`. Update paths. |
145
- | Hook scripts missing | Verify the selftune repo path. Re-run `init` if the repo was moved. |
146
- | Auto-activate missing | Add `hooks/auto-activate.ts` to `UserPromptSubmit` in settings. |
147
- | Evolution guard missing | Add `hooks/evolution-guard.ts` to `PreToolUse` in settings. |
148
- | Memory directory missing | Run `mkdir -p ~/.selftune/memory`. |
149
- | Memory files invalid | Delete and let the memory writer recreate them on next evolve/watch. |
150
- | Activation rules missing | Copy `assets/activation-rules-default.json` to `~/.selftune/activation-rules.json`. |
151
- | Activation rules invalid | Validate JSON syntax. Re-copy from template if corrupted. |
152
- | Agent files missing | If your repo uses optional helper agents, restore them in `.claude/agents/`. Otherwise ignore this advisory. |
153
- | Audit log invalid | Remove corrupted entries. Future operations will append clean entries. |
160
+ | `config` | Run `selftune init` (or `selftune init --force` to regenerate). |
161
+ | `log_*` | Run a session to generate initial log entries. Check hook installation with `selftune init`. |
162
+ | `hook_settings` | Run `selftune init` to install hooks into `~/.claude/settings.json`. |
163
+ | `evolution_audit` | Remove corrupted entries. Future operations will append clean entries. |
164
+ | `dashboard_freshness_mode` | This is an operator warning, not a broken install. Expect possible freshness gaps for SQLite-only writes and export before destructive recovery. |
165
+ | `skill_version_sync` | Run `bun run sync-version` to stamp SKILL.md from package.json. |
166
+ | `version_up_to_date` | Run `npm install -g selftune` to update. |
154
167
 
155
168
  ### 4. Re-run Doctor
156
169
 
@@ -159,14 +172,28 @@ After fixes, run doctor again to verify all checks pass.
159
172
  ## Subagent Escalation
160
173
 
161
174
  If doctor reveals persistent issues with a specific skill — especially
162
- recurring failures that basic fixes do not resolve — spawn the
163
- `diagnosis-analyst` agent as a subagent for root cause analysis.
175
+ recurring failures that basic fixes do not resolve — read
176
+ `skill/agents/diagnosis-analyst.md` and spawn a subagent with those instructions
177
+ for root cause analysis.
178
+
179
+ ### Alpha Upload Not Active
180
+
181
+ **Symptoms:** `selftune status` shows alpha upload as "not enrolled" or "enrolled (missing credential)"
182
+
183
+ **Diagnostic steps:**
184
+ 1. Check `selftune status` — look at "Alpha Upload" and "Cloud link" lines
185
+ 2. If `doctor` includes a `cloud_link` or alpha queue warning, prefer `.checks[].guidance.next_command`
186
+ 3. If "not enrolled" or "not linked": run `selftune init --alpha --alpha-email <email>` (opens browser for device-code auth)
187
+ 4. If "enrolled (missing credential)": re-run `selftune init --alpha --alpha-email <email> --force` (re-authenticates via browser)
188
+ 5. If "api_key has invalid format": re-run init with `--alpha --force` to re-authenticate
189
+
190
+ **Resolution:** Follow the setup sequence in Initialize workflow → Alpha Enrollment section.
164
191
 
165
192
  ## Common Patterns
166
193
 
167
194
  **User reports something seems broken**
168
195
  > Run `selftune doctor`. Parse the JSON output for failed checks. Report
169
- > each failure's `name` and `detail` to the user with the recommended fix.
196
+ > each failure's `name` and `message` to the user with the recommended fix.
170
197
 
171
198
  **User asks if hooks are working**
172
199
  > Run `selftune doctor`. Parse `.checks[]` for hook-related entries. If
@@ -26,9 +26,14 @@ selftune eval generate --skill <name> [options]
26
26
  | `--skill <name>` | Skill to generate evals for | Required (unless `--list-skills`) |
27
27
  | `--list-skills` | List all logged skills with query counts | Off |
28
28
  | `--stats` | Show aggregate telemetry stats for the skill | Off |
29
- | `--max <n>` | Maximum eval entries to generate | 50 |
30
- | `--seed <n>` | Random seed for negative sampling | Random |
31
- | `--out <path>` | Output file path | `evals-<skill>.json` |
29
+ | `--max <n>` | Maximum eval entries per side | 50 |
30
+ | `--seed <n>` | Seed for deterministic shuffling | 42 |
31
+ | `--output <path>` / `--out <path>` | Output file path | `{skillName}_trigger_eval.json` |
32
+ | `--no-negatives` | Exclude negative examples from output | Off |
33
+ | `--no-taxonomy` | Skip invocation_type classification | Off |
34
+ | `--skill-log <path>` | Path to skill_usage_log.jsonl | Default log path |
35
+ | `--query-log <path>` | Path to all_queries_log.jsonl | Default log path |
36
+ | `--telemetry-log <path>` | Path to session_telemetry_log.jsonl | Default log path |
32
37
  | `--synthetic` | Generate evals from SKILL.md via LLM (no logs needed) | Off |
33
38
  | `--skill-path <path>` | Path to SKILL.md (required with `--synthetic`) | — |
34
39
  | `--model <model>` | LLM model to use for synthetic generation | Agent default |
@@ -40,24 +45,20 @@ selftune eval generate --skill <name> [options]
40
45
  ```json
41
46
  [
42
47
  {
43
- "id": 1,
44
48
  "query": "Make me a slide deck for the Q3 board meeting",
45
- "expected": true,
46
- "invocation_type": "contextual",
47
- "skill_name": "pptx",
48
- "source_session": "abc123"
49
+ "should_trigger": true,
50
+ "invocation_type": "contextual"
49
51
  },
50
52
  {
51
- "id": 2,
52
53
  "query": "What format should I use for a presentation?",
53
- "expected": false,
54
- "invocation_type": "negative",
55
- "skill_name": "pptx",
56
- "source_session": null
54
+ "should_trigger": false
57
55
  }
58
56
  ]
59
57
  ```
60
58
 
59
+ Each entry has `query` (string, max 500 chars), `should_trigger` (boolean),
60
+ and optional `invocation_type` (omitted when `--no-taxonomy` is set).
61
+
61
62
  ### List Skills
62
63
 
63
64
  ```json
@@ -93,14 +94,14 @@ selftune eval generate --skill <name> [options]
93
94
  ### Find Missed Queries (False Negatives)
94
95
 
95
96
  ```bash
96
- # Parse: .[] | select(.expected == true and .invocation_type != "explicit")
97
+ # Parse: .[] | select(.should_trigger == true and .invocation_type != "explicit")
97
98
  # These are queries that should trigger but might be missed
98
99
  ```
99
100
 
100
101
  ### Get Negative Examples
101
102
 
102
103
  ```bash
103
- # Parse: .[] | select(.expected == false)
104
+ # Parse: .[] | select(.should_trigger == false)
104
105
  ```
105
106
 
106
107
  ## Sub-Workflows
@@ -126,10 +127,16 @@ selftune eval generate --skill pptx --synthetic --skill-path /path/to/skills/ppt
126
127
 
127
128
  The command:
128
129
  1. Reads the SKILL.md file content
129
- 2. Sends it to an LLM with a prompt requesting realistic test queries
130
- 3. Parses the response into eval entries with invocation type annotations
131
- 4. Classifies each positive query using the deterministic `classifyInvocation()` heuristic
132
- 5. Writes the eval set to the output file
130
+ 2. Loads real user queries from the database (if available) as few-shot style examples so synthetic queries match real phrasing patterns
131
+ 3. Sends skill content and real examples to an LLM with a prompt requesting realistic test queries
132
+ 4. Parses the response into eval entries with invocation type annotations
133
+ 5. Classifies each positive query using the deterministic `classifyInvocation()` heuristic
134
+ 6. Writes the eval set to the output file
135
+
136
+ **Note:** When real query data exists in the database, synthetic generation
137
+ automatically includes high-confidence positive triggers and general queries as
138
+ phrasing references. This produces more natural-sounding eval queries. If no
139
+ database is available, generation proceeds without real examples (fail-open).
133
140
 
134
141
  Use `--model` to override the default LLM model:
135
142
 
@@ -144,7 +151,7 @@ Cross-reference `skill_usage_log.jsonl` (positive triggers) against
144
151
  an eval set annotated with invocation types.
145
152
 
146
153
  ```bash
147
- selftune eval generate --skill pptx --max 50 --out evals-pptx.json
154
+ selftune eval generate --skill pptx --max 50 --output evals-pptx.json
148
155
  ```
149
156
 
150
157
  The command:
@@ -168,32 +175,34 @@ selftune eval generate --skill pptx --stats
168
175
 
169
176
  ### 0. Pre-Flight Configuration
170
177
 
171
- Before generating evals, present numbered configuration options to the user inline in your response, then wait for the user's answer before proceeding.
178
+ Before generating evals, use the `AskUserQuestion` tool to present structured configuration options.
172
179
 
173
- If the user responds with "use defaults", "just do it", or similar shorthand, skip to step 1 using the recommended defaults.
180
+ If the user responds with "use defaults" or similar shorthand, skip to step 1 using the recommended defaults. If the user cancels, stop -- do not proceed with defaults.
174
181
 
175
182
  For `--list-skills` or `--stats` requests, skip pre-flight entirely — these are read-only operations.
176
183
 
177
- Present the following options inline in your response:
178
-
179
- 1. **Generation Mode**
180
- - a) Log-based — build evals from real usage logs (recommended if logs exist)
181
- - b) Synthetic — generate evals from SKILL.md via LLM (for new skills with no data)
182
-
183
- 2. **Skill Path** (synthetic mode only)
184
- - Provide absolute or relative path to the target SKILL.md
185
- - Example: `./skills/pptx/SKILL.md`
186
-
187
- 3. **Max Entries:** 50 (default — how many eval entries to generate)
184
+ Use `AskUserQuestion` with these questions:
188
185
 
189
- 4. **Model** (synthetic mode only)
190
- - a) Fast (haiku) — quick generation
191
- - b) Balanced (sonnet) — better query diversity (recommended)
192
- - c) Best (opus) — highest quality synthetic queries
193
-
194
- 5. **Output Path:** `evals-<skill>.json` (default)
186
+ ```json
187
+ {
188
+ "questions": [
189
+ {
190
+ "question": "Generation Mode",
191
+ "options": ["Log-based build from real usage logs (recommended if logs exist)", "Synthetic — generate from SKILL.md via LLM (for new skills)"]
192
+ },
193
+ {
194
+ "question": "Model (for synthetic mode)",
195
+ "options": ["Fast (haiku) — quick generation", "Balanced (sonnet) — better diversity (recommended)", "Best (opus) — highest quality"]
196
+ },
197
+ {
198
+ "question": "Max Entries",
199
+ "options": ["50 (default)", "25 (quick)", "100 (comprehensive)"]
200
+ }
201
+ ]
202
+ }
203
+ ```
195
204
 
196
- Ask: "Reply with your choices or 'use defaults' for recommended settings."
205
+ If `AskUserQuestion` is not available, fall back to presenting these as inline numbered options.
197
206
 
198
207
  After the user responds, parse their selections and map each choice to the corresponding CLI flags:
199
208