selftune 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +35 -35
  2. package/apps/local-dashboard/dist/assets/index-BZVLv70T.js +16 -0
  3. package/apps/local-dashboard/dist/assets/{index-CRtLkBTi.css → index-Bs3Y4ixf.css} +1 -1
  4. package/apps/local-dashboard/dist/assets/{vendor-react-BQH_6WrG.js → vendor-react-BXP54cYo.js} +4 -4
  5. package/apps/local-dashboard/dist/assets/{vendor-table-dK1QMLq9.js → vendor-table-DTF_SXoy.js} +1 -1
  6. package/apps/local-dashboard/dist/assets/{vendor-ui-CO2mrx6e.js → vendor-ui-CWU0d1wd.js} +66 -66
  7. package/apps/local-dashboard/dist/index.html +15 -15
  8. package/bin/selftune.cjs +1 -1
  9. package/cli/selftune/activation-rules.ts +37 -18
  10. package/cli/selftune/agent-guidance.ts +16 -16
  11. package/cli/selftune/alpha-identity.ts +1 -2
  12. package/cli/selftune/alpha-upload/build-payloads.ts +18 -2
  13. package/cli/selftune/alpha-upload/flush.ts +2 -2
  14. package/cli/selftune/alpha-upload/stage-canonical.ts +106 -3
  15. package/cli/selftune/auth/device-code.ts +32 -0
  16. package/cli/selftune/auto-update.ts +12 -0
  17. package/cli/selftune/badge/badge.ts +1 -0
  18. package/cli/selftune/canonical-export.ts +5 -0
  19. package/cli/selftune/claude-agents.ts +154 -0
  20. package/cli/selftune/contribute/bundle.ts +2 -0
  21. package/cli/selftune/contribute/contribute.ts +1 -0
  22. package/cli/selftune/cron/setup.ts +2 -2
  23. package/cli/selftune/dashboard-contract.ts +1 -1
  24. package/cli/selftune/dashboard-server.ts +11 -52
  25. package/cli/selftune/eval/hooks-to-evals.ts +13 -6
  26. package/cli/selftune/eval/import-skillsbench.ts +1 -0
  27. package/cli/selftune/eval/synthetic-evals.ts +2 -3
  28. package/cli/selftune/eval/unit-test.ts +1 -0
  29. package/cli/selftune/evolution/deploy-proposal.ts +1 -0
  30. package/cli/selftune/evolution/evolve-body.ts +93 -6
  31. package/cli/selftune/evolution/evolve.ts +0 -1
  32. package/cli/selftune/evolution/propose-body.ts +3 -2
  33. package/cli/selftune/evolution/propose-routing.ts +3 -2
  34. package/cli/selftune/evolution/refine-body.ts +3 -2
  35. package/cli/selftune/export.ts +1 -0
  36. package/cli/selftune/grading/auto-grade.ts +1 -0
  37. package/cli/selftune/grading/grade-session.ts +9 -0
  38. package/cli/selftune/hooks/auto-activate.ts +6 -0
  39. package/cli/selftune/hooks/evolution-guard.ts +12 -15
  40. package/cli/selftune/hooks/prompt-log.ts +1 -0
  41. package/cli/selftune/hooks/session-stop.ts +34 -40
  42. package/cli/selftune/hooks/skill-change-guard.ts +1 -0
  43. package/cli/selftune/hooks/skill-eval.ts +1 -1
  44. package/cli/selftune/index.ts +23 -14
  45. package/cli/selftune/ingestors/claude-replay.ts +1 -0
  46. package/cli/selftune/ingestors/codex-rollout.ts +1 -0
  47. package/cli/selftune/ingestors/codex-wrapper.ts +1 -0
  48. package/cli/selftune/ingestors/openclaw-ingest.ts +1 -0
  49. package/cli/selftune/ingestors/opencode-ingest.ts +1 -0
  50. package/cli/selftune/init.ts +197 -96
  51. package/cli/selftune/localdb/db.ts +1 -0
  52. package/cli/selftune/localdb/direct-write.ts +93 -12
  53. package/cli/selftune/localdb/materialize.ts +2 -0
  54. package/cli/selftune/localdb/queries.ts +210 -0
  55. package/cli/selftune/localdb/schema.ts +72 -1
  56. package/cli/selftune/monitoring/watch.ts +1 -0
  57. package/cli/selftune/normalization.ts +4 -0
  58. package/cli/selftune/observability.ts +14 -7
  59. package/cli/selftune/orchestrate.ts +15 -37
  60. package/cli/selftune/repair/skill-usage.ts +7 -3
  61. package/cli/selftune/routes/orchestrate-runs.ts +1 -0
  62. package/cli/selftune/routes/overview.ts +1 -0
  63. package/cli/selftune/routes/skill-report.ts +1 -0
  64. package/cli/selftune/sync.ts +31 -1
  65. package/cli/selftune/types.ts +2 -2
  66. package/cli/selftune/uninstall.ts +412 -0
  67. package/cli/selftune/utils/canonical-log.ts +2 -0
  68. package/cli/selftune/utils/jsonl.ts +1 -0
  69. package/cli/selftune/utils/llm-call.ts +131 -3
  70. package/cli/selftune/utils/skill-log.ts +1 -0
  71. package/cli/selftune/utils/transcript.ts +1 -0
  72. package/cli/selftune/utils/trigger-check.ts +1 -1
  73. package/cli/selftune/workflows/skill-md-writer.ts +5 -5
  74. package/cli/selftune/workflows/workflows.ts +1 -0
  75. package/package.json +38 -33
  76. package/packages/telemetry-contract/fixtures/golden.test.ts +1 -0
  77. package/packages/telemetry-contract/package.json +3 -3
  78. package/packages/telemetry-contract/src/index.ts +0 -1
  79. package/packages/telemetry-contract/src/schemas.ts +6 -24
  80. package/packages/telemetry-contract/tests/compatibility.test.ts +1 -0
  81. package/packages/ui/README.md +35 -34
  82. package/packages/ui/package.json +3 -3
  83. package/packages/ui/src/components/ActivityTimeline.tsx +49 -42
  84. package/packages/ui/src/components/EvidenceViewer.tsx +306 -182
  85. package/packages/ui/src/components/EvolutionTimeline.tsx +83 -72
  86. package/packages/ui/src/components/InfoTip.tsx +4 -3
  87. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +60 -53
  88. package/packages/ui/src/components/section-cards.tsx +19 -24
  89. package/packages/ui/src/components/skill-health-grid.tsx +213 -193
  90. package/packages/ui/src/lib/constants.tsx +1 -0
  91. package/packages/ui/src/primitives/badge.tsx +12 -15
  92. package/packages/ui/src/primitives/button.tsx +7 -7
  93. package/packages/ui/src/primitives/card.tsx +15 -26
  94. package/packages/ui/src/primitives/checkbox.tsx +7 -8
  95. package/packages/ui/src/primitives/collapsible.tsx +5 -5
  96. package/packages/ui/src/primitives/dropdown-menu.tsx +45 -55
  97. package/packages/ui/src/primitives/label.tsx +6 -6
  98. package/packages/ui/src/primitives/select.tsx +28 -37
  99. package/packages/ui/src/primitives/table.tsx +17 -44
  100. package/packages/ui/src/primitives/tabs.tsx +14 -21
  101. package/packages/ui/src/primitives/tooltip.tsx +10 -22
  102. package/skill/SKILL.md +72 -59
  103. package/skill/Workflows/AlphaUpload.md +4 -4
  104. package/skill/Workflows/AutoActivation.md +11 -6
  105. package/skill/Workflows/Badge.md +22 -16
  106. package/skill/Workflows/Baseline.md +34 -36
  107. package/skill/Workflows/Composability.md +16 -11
  108. package/skill/Workflows/Contribute.md +26 -21
  109. package/skill/Workflows/Cron.md +23 -22
  110. package/skill/Workflows/Dashboard.md +40 -40
  111. package/skill/Workflows/Doctor.md +40 -34
  112. package/skill/Workflows/Evals.md +48 -47
  113. package/skill/Workflows/EvolutionMemory.md +31 -21
  114. package/skill/Workflows/Evolve.md +84 -82
  115. package/skill/Workflows/EvolveBody.md +58 -47
  116. package/skill/Workflows/Grade.md +16 -13
  117. package/skill/Workflows/ImportSkillsBench.md +9 -6
  118. package/skill/Workflows/Ingest.md +36 -21
  119. package/skill/Workflows/Initialize.md +138 -97
  120. package/skill/Workflows/Orchestrate.md +22 -16
  121. package/skill/Workflows/Replay.md +12 -7
  122. package/skill/Workflows/Rollback.md +13 -6
  123. package/skill/Workflows/Schedule.md +6 -6
  124. package/skill/Workflows/Sync.md +18 -11
  125. package/skill/Workflows/UnitTest.md +28 -17
  126. package/skill/Workflows/Watch.md +28 -21
  127. package/skill/agents/diagnosis-analyst.md +11 -0
  128. package/skill/agents/evolution-reviewer.md +15 -1
  129. package/skill/agents/integration-guide.md +10 -0
  130. package/skill/agents/pattern-analyst.md +12 -1
  131. package/skill/references/grading-methodology.md +23 -24
  132. package/skill/references/interactive-config.md +7 -7
  133. package/skill/references/invocation-taxonomy.md +22 -20
  134. package/skill/references/logs.md +20 -6
  135. package/skill/references/setup-patterns.md +4 -2
  136. package/.claude/agents/diagnosis-analyst.md +0 -156
  137. package/.claude/agents/evolution-reviewer.md +0 -180
  138. package/.claude/agents/integration-guide.md +0 -212
  139. package/.claude/agents/pattern-analyst.md +0 -160
  140. package/apps/local-dashboard/dist/assets/index-Bk9vSHHd.js +0 -15
@@ -18,42 +18,43 @@ selftune contribute --skill selftune
18
18
 
19
19
  ## Options
20
20
 
21
- | Flag | Description |
22
- |------|-------------|
23
- | `--skill <name>` | Skill to contribute data for (default: "selftune") |
24
- | `--output <path>` | Output file path (default: auto-generated in ~/.selftune/contributions/) |
25
- | `--preview` | Show what would be shared without writing |
26
- | `--sanitize <level>` | `conservative` (default) or `aggressive` |
27
- | `--since <date>` | Only include data from this date onward |
28
- | `--submit` | Auto-create GitHub Issue via `gh` CLI |
21
+ | Flag | Description |
22
+ | -------------------- | ------------------------------------------------------------------------ |
23
+ | `--skill <name>` | Skill to contribute data for (default: "selftune") |
24
+ | `--output <path>` | Output file path (default: auto-generated in ~/.selftune/contributions/) |
25
+ | `--preview` | Show what would be shared without writing |
26
+ | `--sanitize <level>` | `conservative` (default) or `aggressive` |
27
+ | `--since <date>` | Only include data from this date onward |
28
+ | `--submit` | Auto-create GitHub Issue via `gh` CLI |
29
29
 
30
30
  ## Sanitization Levels
31
31
 
32
32
  ### Conservative (default)
33
33
 
34
- | Pattern | Replacement |
35
- |---------|-------------|
36
- | File paths | `[PATH]` |
37
- | Email addresses | `[EMAIL]` |
38
- | API keys, tokens, JWTs | `[SECRET]` |
39
- | IP addresses | `[IP]` |
40
- | Project name from cwd | `[PROJECT]` |
41
- | Session IDs | `[SESSION]` |
34
+ | Pattern | Replacement |
35
+ | ---------------------- | ----------- |
36
+ | File paths | `[PATH]` |
37
+ | Email addresses | `[EMAIL]` |
38
+ | API keys, tokens, JWTs | `[SECRET]` |
39
+ | IP addresses | `[IP]` |
40
+ | Project name from cwd | `[PROJECT]` |
41
+ | Session IDs | `[SESSION]` |
42
42
 
43
43
  ### Aggressive
44
44
 
45
45
  Extends conservative with:
46
46
 
47
- | Pattern | Replacement |
48
- |---------|-------------|
47
+ | Pattern | Replacement |
48
+ | ------------------------------------------ | -------------- |
49
49
  | camelCase/PascalCase identifiers > 8 chars | `[IDENTIFIER]` |
50
- | Quoted strings | `[STRING]` |
51
- | Import/require module paths | `[MODULE]` |
52
- | Queries > 200 chars | Truncated |
50
+ | Quoted strings | `[STRING]` |
51
+ | Import/require module paths | `[MODULE]` |
52
+ | Queries > 200 chars | Truncated |
53
53
 
54
54
  ## Bundle Contents
55
55
 
56
56
  The contribution bundle includes:
57
+
57
58
  - **Positive queries** -- queries that triggered the skill (sanitized)
58
59
  - **Eval entries** -- trigger eval set for the skill
59
60
  - **Grading summary** -- aggregate pass rates (no raw transcripts)
@@ -79,16 +80,20 @@ No raw transcripts, file contents, or identifiable information is included.
79
80
  ## Common Patterns
80
81
 
81
82
  **User wants to see what would be shared**
83
+
82
84
  > Run `selftune contribute --preview`. Parse the output and report the
83
85
  > sanitized data summary to the user before proceeding.
84
86
 
85
87
  **User requests stronger anonymization**
88
+
86
89
  > Run `selftune contribute --sanitize aggressive`. This replaces identifiers,
87
90
  > quoted strings, and module paths in addition to standard PII scrubbing.
88
91
 
89
92
  **User wants to submit directly**
93
+
90
94
  > Run `selftune contribute --submit`. This creates a GitHub Issue via `gh`
91
95
  > CLI with the bundle inlined or uploaded as a gist.
92
96
 
93
97
  **User wants to limit to recent data**
98
+
94
99
  > Run `selftune contribute --since <date>` with the user's specified date.
@@ -17,11 +17,11 @@ OpenClaw-specific cron integration.
17
17
 
18
18
  Auto-detect the current platform and install scheduled jobs.
19
19
 
20
- | Flag | Description | Default |
21
- |------|-------------|---------|
22
- | `--platform <name>` | Force a specific platform (`openclaw`, `cron`, `launchd`, `systemd`) | Auto-detect |
23
- | `--dry-run` | Preview without installing | Off |
24
- | `--tz <timezone>` | IANA timezone for job schedules (OpenClaw only) | Flag > `TZ` env > system timezone |
20
+ | Flag | Description | Default |
21
+ | ------------------- | -------------------------------------------------------------------- | --------------------------------- |
22
+ | `--platform <name>` | Force a specific platform (`openclaw`, `cron`, `launchd`, `systemd`) | Auto-detect |
23
+ | `--dry-run` | Preview without installing | Off |
24
+ | `--tz <timezone>` | IANA timezone for job schedules (OpenClaw only) | Flag > `TZ` env > system timezone |
25
25
 
26
26
  Platform auto-detection: macOS → launchd, Linux → systemd, other → cron.
27
27
 
@@ -43,9 +43,9 @@ No flags.
43
43
 
44
44
  Remove all selftune cron jobs from OpenClaw.
45
45
 
46
- | Flag | Description | Default |
47
- |------|-------------|---------|
48
- | `--dry-run` | Preview which jobs would be removed without deleting | Off |
46
+ | Flag | Description | Default |
47
+ | ----------- | ---------------------------------------------------- | ------- |
48
+ | `--dry-run` | Preview which jobs would be removed without deleting | Off |
49
49
 
50
50
  ## Aliases
51
51
 
@@ -56,11 +56,11 @@ invocations with flags (e.g. `selftune schedule --platform launchd`) continue to
56
56
 
57
57
  Setup registers these jobs:
58
58
 
59
- | Name | Cron Expression | Schedule | Description |
60
- |------|----------------|----------|-------------|
61
- | `selftune-sync` | `*/30 * * * *` | Every 30 minutes | Sync source-truth telemetry |
62
- | `selftune-status` | `0 8 * * *` | Daily at 8am | Health check — report skills with pass rate below 80% |
63
- | `selftune-orchestrate` | `0 */6 * * *` | Every 6 hours | Full autonomous loop: sync → candidate selection → evolve → watch |
59
+ | Name | Cron Expression | Schedule | Description |
60
+ | ---------------------- | --------------- | ---------------- | ----------------------------------------------------------------- |
61
+ | `selftune-sync` | `*/30 * * * *` | Every 30 minutes | Sync source-truth telemetry |
62
+ | `selftune-status` | `0 8 * * *` | Daily at 8am | Health check — report skills with pass rate below 80% |
63
+ | `selftune-orchestrate` | `0 */6 * * *` | Every 6 hours | Full autonomous loop: sync → candidate selection → evolve → watch |
64
64
 
65
65
  All jobs run in **isolated session** mode — each execution gets a clean
66
66
  session with no context accumulation from previous runs.
@@ -79,6 +79,7 @@ session with no context accumulation from previous runs.
79
79
  3. Verify with `selftune status` after the first scheduled run fires
80
80
 
81
81
  For OpenClaw specifically:
82
+
82
83
  1. Run `selftune cron setup --platform openclaw --dry-run` to preview
83
84
  2. Run `selftune cron setup --platform openclaw` to register jobs
84
85
  3. Run `selftune cron list` to verify jobs are registered
@@ -111,15 +112,15 @@ interactive mode is for user-directed improvements.
111
112
 
112
113
  ## Safety Controls
113
114
 
114
- | Control | How It Works |
115
- |---------|-------------|
116
- | Dry-run first | `selftune cron setup --dry-run` previews commands before installing |
117
- | Regression threshold | Evolution only deploys if improvement exceeds 5% on existing triggers |
118
- | Auto-rollback | `selftune watch` automatically rolls back if pass rate drops below baseline minus threshold |
119
- | Audit trail | Every evolution recorded in `evolution_audit_log.jsonl` with full history |
120
- | SKILL.md backup | `.bak` file created before every deploy — primary rollback path exists via .bak; fallback depends on audit metadata integrity |
121
- | Human override | `selftune evolve rollback --skill <name> --skill-path <path>` available anytime to manually revert |
122
- | Pin descriptions | Config flag to freeze specific skills and prevent evolution on sensitive skills |
115
+ | Control | How It Works |
116
+ | -------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
117
+ | Dry-run first | `selftune cron setup --dry-run` previews commands before installing |
118
+ | Regression threshold | Evolution only deploys if improvement exceeds 5% on existing triggers |
119
+ | Auto-rollback | `selftune watch` automatically rolls back if pass rate drops below baseline minus threshold |
120
+ | Audit trail | Every evolution recorded in `evolution_audit_log.jsonl` with full history |
121
+ | SKILL.md backup | `.bak` file created before every deploy — primary rollback path exists via .bak; fallback depends on audit metadata integrity |
122
+ | Human override | `selftune evolve rollback --skill <name> --skill-path <path>` available anytime to manually revert |
123
+ | Pin descriptions | Config flag to freeze specific skills and prevent evolution on sensitive skills |
123
124
 
124
125
  ## Common Patterns
125
126
 
@@ -11,22 +11,19 @@ selftune dashboard
11
11
  ```
12
12
 
13
13
  Starts a Bun HTTP server with a React SPA dashboard and opens it in the
14
- default browser. The dashboard reads SQLite directly, but the current
15
- live-update invalidation path still watches JSONL logs and pushes
16
- updates via Server-Sent Events (SSE). That means the dashboard usually
17
- refreshes quickly, but SQLite-only writes can still lag until the WAL
18
- cutover lands. TanStack Query polling (60s) acts as a fallback. Action
19
- buttons trigger selftune commands directly from the dashboard. Use
20
- `selftune export` to generate JSONL from SQLite for debugging or
21
- offline analysis.
14
+ default browser. The dashboard reads SQLite directly and uses WAL-based
15
+ invalidation to push live updates via Server-Sent Events (SSE).
16
+ TanStack Query polling (60s) acts as a fallback. Action buttons trigger
17
+ selftune commands directly from the dashboard. Use `selftune export` to
18
+ generate JSONL from SQLite for debugging or offline analysis.
22
19
 
23
20
  ## Options
24
21
 
25
- | Flag | Description | Default |
26
- |------|-------------|---------|
27
- | `--port <port>` | Custom port for the server | 3141 |
28
- | `--no-open` | Start server without opening browser | Off |
29
- | `--serve` | *(Deprecated)* Alias for default behavior | — |
22
+ | Flag | Description | Default |
23
+ | --------------- | ----------------------------------------- | ------- |
24
+ | `--port <port>` | Custom port for the server | 3141 |
25
+ | `--no-open` | Start server without opening browser | Off |
26
+ | `--serve` | _(Deprecated)_ Alias for default behavior | — |
30
27
 
31
28
  Note: `--export` and `--out` were removed. The CLI will error if used,
32
29
  suggesting `selftune dashboard` instead.
@@ -40,27 +37,25 @@ override.
40
37
 
41
38
  ### Endpoints
42
39
 
43
- | Method | Path | Description |
44
- |--------|------|-------------|
45
- | `GET` | `/` | Serve dashboard SPA shell |
46
- | `GET` | `/api/v2/overview` | SQLite-backed overview payload |
47
- | `GET` | `/api/v2/skills/:name` | SQLite-backed per-skill report |
48
- | `GET` | `/api/v2/orchestrate-runs` | Recent orchestrate run reports |
49
- | `GET` | `/api/v2/doctor` | System health diagnostics (config, logs, hooks, evolution) |
50
- | `GET` | `/api/v2/events` | SSE stream for live dashboard updates |
51
- | `GET` | `/api/health` | Dashboard server health probe |
52
- | `POST` | `/api/actions/watch` | Trigger `selftune watch` for a skill |
53
- | `POST` | `/api/actions/evolve` | Trigger `selftune evolve` for a skill |
54
- | `POST` | `/api/actions/rollback` | Trigger `selftune evolve rollback` for a skill |
40
+ | Method | Path | Description |
41
+ | ------ | -------------------------- | ---------------------------------------------------------- |
42
+ | `GET` | `/` | Serve dashboard SPA shell |
43
+ | `GET` | `/api/v2/overview` | SQLite-backed overview payload |
44
+ | `GET` | `/api/v2/skills/:name` | SQLite-backed per-skill report |
45
+ | `GET` | `/api/v2/orchestrate-runs` | Recent orchestrate run reports |
46
+ | `GET` | `/api/v2/doctor` | System health diagnostics (config, logs, hooks, evolution) |
47
+ | `GET` | `/api/v2/events` | SSE stream for live dashboard updates |
48
+ | `GET` | `/api/health` | Dashboard server health probe |
49
+ | `POST` | `/api/actions/watch` | Trigger `selftune watch` for a skill |
50
+ | `POST` | `/api/actions/evolve` | Trigger `selftune evolve` for a skill |
51
+ | `POST` | `/api/actions/rollback` | Trigger `selftune evolve rollback` for a skill |
55
52
 
56
53
  ### Live Updates (SSE)
57
54
 
58
55
  The dashboard connects to `/api/v2/events` via Server-Sent Events.
59
- When watched JSONL log files change on disk, the server broadcasts an
60
- `update` event. The SPA invalidates all cached queries, triggering
61
- immediate refetches. New data usually appears quickly, but the runtime
62
- footer and Status page will warn when the server is still in this
63
- legacy JSONL watcher mode.
56
+ The server watches the SQLite WAL file for changes and broadcasts an
57
+ `update` event when new data is written. The SPA invalidates all cached
58
+ queries, triggering immediate refetches (~1s latency).
64
59
 
65
60
  TanStack Query polling (60s) acts as a fallback safety net in case the
66
61
  SSE connection drops. Data also refreshes on window focus.
@@ -115,16 +110,16 @@ database and stops the server.
115
110
 
116
111
  The dashboard displays data from these sources:
117
112
 
118
- | Data | Source | Description |
119
- |------|--------|-------------|
120
- | Telemetry | SQLite (`~/.selftune/selftune.db`) | Session-level telemetry records |
121
- | Skills | SQLite (`~/.selftune/selftune.db`) | Skill activation and usage events |
122
- | Queries | SQLite (`~/.selftune/selftune.db`) | All user queries across sessions |
123
- | Evolution | SQLite (`~/.selftune/selftune.db`) | Evolution audit trail (create, deploy, rollback) |
124
- | Decisions | `~/.selftune/memory/` | Evolution decision records |
125
- | Snapshots | Computed | Per-skill monitoring snapshots (pass rate, regression status) |
126
- | Unmatched | Computed | Queries that did not trigger any skill |
127
- | Pending | Computed | Evolution proposals not yet deployed, rejected, or rolled back |
113
+ | Data | Source | Description |
114
+ | --------- | ---------------------------------- | -------------------------------------------------------------- |
115
+ | Telemetry | SQLite (`~/.selftune/selftune.db`) | Session-level telemetry records |
116
+ | Skills | SQLite (`~/.selftune/selftune.db`) | Skill activation and usage events |
117
+ | Queries | SQLite (`~/.selftune/selftune.db`) | All user queries across sessions |
118
+ | Evolution | SQLite (`~/.selftune/selftune.db`) | Evolution audit trail (create, deploy, rollback) |
119
+ | Decisions | `~/.selftune/memory/` | Evolution decision records |
120
+ | Snapshots | Computed | Per-skill monitoring snapshots (pass rate, regression status) |
121
+ | Unmatched | Computed | Queries that did not trigger any skill |
122
+ | Pending | Computed | Evolution proposals not yet deployed, rejected, or rolled back |
128
123
 
129
124
  If no log data is found, the server reports an error listing the
130
125
  checked file paths.
@@ -147,21 +142,26 @@ to trigger watch, evolve, or rollback directly from the dashboard.
147
142
  ## Common Patterns
148
143
 
149
144
  **User wants to see skill performance visually**
145
+
150
146
  > Run `selftune dashboard`. This opens a browser with a point-in-time snapshot.
151
147
  > Report to the user that the dashboard is open.
152
148
 
153
149
  **User wants live monitoring**
150
+
154
151
  > Run `selftune dashboard`. The server provides real-time updates via SSE
155
152
  > (~1 second latency).
156
153
 
157
154
  **Dashboard shows no data**
155
+
158
156
  > Run `selftune doctor` to verify hooks are installed. If hooks are missing,
159
157
  > route to the Initialize workflow. If hooks are present but no sessions
160
158
  > have run, inform the user that sessions must generate telemetry first.
161
159
 
162
160
  **User wants a different port**
161
+
163
162
  > Run `selftune dashboard --port <port>`. Port must be 1-65535.
164
163
 
165
164
  **User wants to trigger actions from the dashboard**
165
+
166
166
  > Run `selftune dashboard`. The dashboard provides action buttons for
167
167
  > watch, evolve, and rollback per skill via POST endpoints.
@@ -40,14 +40,14 @@ None. Doctor runs all checks unconditionally.
40
40
  },
41
41
  {
42
42
  "name": "dashboard_freshness_mode",
43
- "status": "warn",
44
- "message": "Dashboard still uses legacy JSONL watcher invalidation"
43
+ "status": "pass",
44
+ "message": "Dashboard reads SQLite and watches WAL for live updates"
45
45
  }
46
46
  ],
47
47
  "summary": {
48
- "pass": 8,
48
+ "pass": 9,
49
49
  "fail": 1,
50
- "warn": 1,
50
+ "warn": 0,
51
51
  "total": 10
52
52
  },
53
53
  "healthy": false
@@ -96,47 +96,47 @@ or queue checks when alpha is configured:
96
96
 
97
97
  ### Config Check
98
98
 
99
- | Check name | What it validates |
100
- |------------|-------------------|
101
- | `config` | `~/.selftune/config.json` exists, is valid JSON, contains `agent_type` and `llm_mode` fields |
99
+ | Check name | What it validates |
100
+ | ---------- | -------------------------------------------------------------------------------------------- |
101
+ | `config` | `~/.selftune/config.json` exists, is valid JSON, contains `agent_type` and `llm_mode` fields |
102
102
 
103
103
  ### Log Checks (4 checks)
104
104
 
105
- | Check name | What it validates |
106
- |------------|-------------------|
105
+ | Check name | What it validates |
106
+ | ----------------------- | ----------------------------------------------------- |
107
107
  | `log_session_telemetry` | `session_telemetry_log.jsonl` exists and is parseable |
108
- | `log_skill_usage` | `skill_usage_log.jsonl` exists and is parseable |
109
- | `log_all_queries` | `all_queries_log.jsonl` exists and is parseable |
110
- | `log_evolution_audit` | `evolution_audit_log.jsonl` exists and is parseable |
108
+ | `log_skill_usage` | `skill_usage_log.jsonl` exists and is parseable |
109
+ | `log_all_queries` | `all_queries_log.jsonl` exists and is parseable |
110
+ | `log_evolution_audit` | `evolution_audit_log.jsonl` exists and is parseable |
111
111
 
112
112
  ### Hook Check
113
113
 
114
- | Check name | What it validates |
115
- |------------|-------------------|
114
+ | Check name | What it validates |
115
+ | --------------- | ------------------------------------------------------- |
116
116
  | `hook_settings` | `~/.claude/settings.json` has selftune hooks configured |
117
117
 
118
118
  ### Evolution Check
119
119
 
120
- | Check name | What it validates |
121
- |------------|-------------------|
120
+ | Check name | What it validates |
121
+ | ----------------- | ------------------------------------------------ |
122
122
  | `evolution_audit` | Evolution audit log entries have valid structure |
123
123
 
124
124
  ### Integrity Check
125
125
 
126
- | Check name | What it validates |
127
- |------------|-------------------|
126
+ | Check name | What it validates |
127
+ | -------------------------- | ------------------------------------------------------------------------------------------------------------- |
128
128
  | `dashboard_freshness_mode` | Warns when the dashboard still relies on legacy JSONL watcher invalidation instead of SQLite WAL live refresh |
129
129
 
130
130
  ### Skill Version Sync Check
131
131
 
132
- | Check name | What it validates |
133
- |------------|-------------------|
132
+ | Check name | What it validates |
133
+ | -------------------- | --------------------------------------------------------- |
134
134
  | `skill_version_sync` | SKILL.md frontmatter version matches package.json version |
135
135
 
136
136
  ### Version Check
137
137
 
138
- | Check name | What it validates |
139
- |------------|-------------------|
138
+ | Check name | What it validates |
139
+ | -------------------- | ------------------------------------------------ |
140
140
  | `version_up_to_date` | Installed version matches latest on npm registry |
141
141
 
142
142
  ## Steps
@@ -155,15 +155,15 @@ Parse the JSON output. If `healthy: true`, selftune is fully operational.
155
155
 
156
156
  For each failed check, take the appropriate action:
157
157
 
158
- | Failed check | Fix |
159
- |-------------|-----|
160
- | `config` | Run `selftune init` (or `selftune init --force` to regenerate). |
161
- | `log_*` | Run a session to generate initial log entries. Check hook installation with `selftune init`. |
162
- | `hook_settings` | Run `selftune init` to install hooks into `~/.claude/settings.json`. |
163
- | `evolution_audit` | Remove corrupted entries. Future operations will append clean entries. |
158
+ | Failed check | Fix |
159
+ | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
160
+ | `config` | Run `selftune init` (or `selftune init --force` to regenerate). |
161
+ | `log_*` | Run a session to generate initial log entries. Check hook installation with `selftune init`. |
162
+ | `hook_settings` | Run `selftune init` to install hooks into `~/.claude/settings.json`. |
163
+ | `evolution_audit` | Remove corrupted entries. Future operations will append clean entries. |
164
164
  | `dashboard_freshness_mode` | This is an operator warning, not a broken install. Expect possible freshness gaps for SQLite-only writes and export before destructive recovery. |
165
- | `skill_version_sync` | Run `bun run sync-version` to stamp SKILL.md from package.json. |
166
- | `version_up_to_date` | Run `npm install -g selftune` to update. |
165
+ | `skill_version_sync` | Run `bun run sync-version` to stamp SKILL.md from package.json. |
166
+ | `version_up_to_date` | Run `npm install -g selftune` to update. |
167
167
 
168
168
  ### 4. Re-run Doctor
169
169
 
@@ -181,34 +181,40 @@ for root cause analysis.
181
181
  **Symptoms:** `selftune status` shows alpha upload as "not enrolled" or "enrolled (missing credential)"
182
182
 
183
183
  **Diagnostic steps:**
184
+
184
185
  1. Check `selftune status` — look at "Alpha Upload" and "Cloud link" lines
185
186
  2. If `doctor` includes a `cloud_link` or alpha queue warning, prefer `.checks[].guidance.next_command`
186
- 3. If "not enrolled" or "not linked": run `selftune init --alpha --alpha-email <email> --alpha-key <key>`
187
- 4. If "enrolled (missing credential)": re-run `selftune init --alpha --alpha-email <email> --alpha-key <credential> --force`
188
- 5. If "api_key has invalid format": credential must start with `st_live_` or `st_test_`
187
+ 3. If "not enrolled" or "not linked": run `selftune init --alpha --alpha-email <email>` (opens browser for device-code auth)
188
+ 4. If "enrolled (missing credential)": re-run `selftune init --alpha --alpha-email <email> --force` (re-authenticates via browser)
189
+ 5. If "api_key has invalid format": re-run init with `--alpha --force` to re-authenticate
189
190
 
190
191
  **Resolution:** Follow the setup sequence in Initialize workflow → Alpha Enrollment section.
191
192
 
192
193
  ## Common Patterns
193
194
 
194
195
  **User reports something seems broken**
196
+
195
197
  > Run `selftune doctor`. Parse the JSON output for failed checks. Report
196
198
  > each failure's `name` and `message` to the user with the recommended fix.
197
199
 
198
200
  **User asks if hooks are working**
201
+
199
202
  > Run `selftune doctor`. Parse `.checks[]` for hook-related entries. If
200
203
  > hooks pass but no data appears, verify hook script paths in
201
204
  > `~/.claude/settings.json` point to actual files.
202
205
 
203
206
  **No telemetry data available**
207
+
204
208
  > Run `selftune doctor`. Route fixes by platform:
209
+ >
205
210
  > - **Claude Code** — route to the Initialize workflow to install hooks
206
211
  > - **Codex** — run `selftune ingest codex` or `selftune ingest wrap-codex`
207
212
  > - **OpenCode** — run `selftune ingest opencode`
208
213
  > - **OpenClaw** — run `selftune ingest openclaw`
209
- > At least one session must complete after setup to generate telemetry.
214
+ > At least one session must complete after setup to generate telemetry.
210
215
 
211
216
  **User asks to check selftune health**
217
+
212
218
  > Run `selftune doctor`. Parse `.healthy` and `.summary`. If `healthy: true`,
213
219
  > report that selftune is fully operational. If false, report failed checks
214
220
  > and recommended fixes.
@@ -7,6 +7,7 @@ its invocation type.
7
7
  ## When to Invoke
8
8
 
9
9
  Invoke this workflow when the user requests any of the following:
10
+
10
11
  - Generating eval sets or test data for a skill
11
12
  - Checking which skills are undertriggering
12
13
  - Viewing skill telemetry or usage stats
@@ -21,22 +22,22 @@ selftune eval generate --skill <name> [options]
21
22
 
22
23
  ## Options
23
24
 
24
- | Flag | Description | Default |
25
- |------|-------------|---------|
26
- | `--skill <name>` | Skill to generate evals for | Required (unless `--list-skills`) |
27
- | `--list-skills` | List all logged skills with query counts | Off |
28
- | `--stats` | Show aggregate telemetry stats for the skill | Off |
29
- | `--max <n>` | Maximum eval entries per side | 50 |
30
- | `--seed <n>` | Seed for deterministic shuffling | 42 |
31
- | `--output <path>` / `--out <path>` | Output file path | `{skillName}_trigger_eval.json` |
32
- | `--no-negatives` | Exclude negative examples from output | Off |
33
- | `--no-taxonomy` | Skip invocation_type classification | Off |
34
- | `--skill-log <path>` | Path to skill_usage_log.jsonl | Default log path |
35
- | `--query-log <path>` | Path to all_queries_log.jsonl | Default log path |
36
- | `--telemetry-log <path>` | Path to session_telemetry_log.jsonl | Default log path |
37
- | `--synthetic` | Generate evals from SKILL.md via LLM (no logs needed) | Off |
38
- | `--skill-path <path>` | Path to SKILL.md (required with `--synthetic`) | — |
39
- | `--model <model>` | LLM model to use for synthetic generation | Agent default |
25
+ | Flag | Description | Default |
26
+ | ---------------------------------- | ----------------------------------------------------- | --------------------------------- |
27
+ | `--skill <name>` | Skill to generate evals for | Required (unless `--list-skills`) |
28
+ | `--list-skills` | List all logged skills with query counts | Off |
29
+ | `--stats` | Show aggregate telemetry stats for the skill | Off |
30
+ | `--max <n>` | Maximum eval entries per side | 50 |
31
+ | `--seed <n>` | Seed for deterministic shuffling | 42 |
32
+ | `--output <path>` / `--out <path>` | Output file path | `{skillName}_trigger_eval.json` |
33
+ | `--no-negatives` | Exclude negative examples from output | Off |
34
+ | `--no-taxonomy` | Skip invocation_type classification | Off |
35
+ | `--skill-log <path>` | Path to skill_usage_log.jsonl | Default log path |
36
+ | `--query-log <path>` | Path to all_queries_log.jsonl | Default log path |
37
+ | `--telemetry-log <path>` | Path to session_telemetry_log.jsonl | Default log path |
38
+ | `--synthetic` | Generate evals from SKILL.md via LLM (no logs needed) | Off |
39
+ | `--skill-path <path>` | Path to SKILL.md (required with `--synthetic`) | — |
40
+ | `--model <model>` | LLM model to use for synthetic generation | Agent default |
40
41
 
41
42
  ## Output Format
42
43
 
@@ -126,6 +127,7 @@ selftune eval generate --skill pptx --synthetic --skill-path /path/to/skills/ppt
126
127
  ```
127
128
 
128
129
  The command:
130
+
129
131
  1. Reads the SKILL.md file content
130
132
  2. Loads real user queries from the database (if available) as few-shot style examples so synthetic queries match real phrasing patterns
131
133
  3. Sends skill content and real examples to an LLM with a prompt requesting realistic test queries
@@ -155,6 +157,7 @@ selftune eval generate --skill pptx --max 50 --output evals-pptx.json
155
157
  ```
156
158
 
157
159
  The command:
160
+
158
161
  1. Reads positive triggers from `skill_usage_log.jsonl`
159
162
  2. Reads all queries from `all_queries_log.jsonl`
160
163
  3. Identifies queries that should have triggered but did not
@@ -181,40 +184,36 @@ If the user responds with "use defaults" or similar shorthand, skip to step 1 us
181
184
 
182
185
  For `--list-skills` or `--stats` requests, skip pre-flight entirely — these are read-only operations.
183
186
 
184
- Use `AskUserQuestion` with these questions:
185
-
186
- ```json
187
- {
188
- "questions": [
189
- {
190
- "question": "Generation Mode",
191
- "options": ["Log-based — build from real usage logs (recommended if logs exist)", "Synthetic — generate from SKILL.md via LLM (for new skills)"]
192
- },
193
- {
194
- "question": "Model (for synthetic mode)",
195
- "options": ["Fast (haiku) — quick generation", "Balanced (sonnet) — better diversity (recommended)", "Best (opus) — highest quality"]
196
- },
197
- {
198
- "question": "Max Entries",
199
- "options": ["50 (default)", "25 (quick)", "100 (comprehensive)"]
200
- }
201
- ]
202
- }
203
- ```
204
-
205
- If `AskUserQuestion` is not available, fall back to presenting these as inline numbered options.
187
+ Ask one `AskUserQuestion` at a time in this order:
188
+
189
+ 1. `Generation Mode`
190
+ Options:
191
+ - `Log-based — build from real usage logs (recommended if logs exist)`
192
+ - `Synthetic — generate from SKILL.md via LLM (for new skills)`
193
+ 2. If the user chose synthetic, ask `Model (for synthetic mode)`
194
+ Options:
195
+ - `Fast (haiku) — quick generation`
196
+ - `Balanced (sonnet) — better diversity (recommended)`
197
+ - `Best (opus) highest quality`
198
+ 3. Ask `Max Entries`
199
+ Options:
200
+ - `50 (default)`
201
+ - `25 (quick)`
202
+ - `100 (comprehensive)`
203
+
204
+ If `AskUserQuestion` is not available or Claude does not invoke it, fall back to presenting the same choices as inline numbered options.
206
205
 
207
206
  After the user responds, parse their selections and map each choice to the corresponding CLI flags:
208
207
 
209
- | Selection | CLI Flag |
210
- |-----------|----------|
211
- | 1a (log-based) | _(no flag, default)_ |
212
- | 1b (synthetic) | `--synthetic --skill-path <path>` |
213
- | Custom max entries | `--max <value>` |
214
- | 4a (haiku) | `--model haiku` (resolved internally by selftune) |
215
- | 4b (sonnet) | `--model sonnet` |
216
- | 4c (opus) | `--model opus` |
217
- | Custom output path | `--out <path>` |
208
+ | Selection | CLI Flag |
209
+ | ------------------ | ------------------------------------------------- |
210
+ | 1a (log-based) | _(no flag, default)_ |
211
+ | 1b (synthetic) | `--synthetic --skill-path <path>` |
212
+ | Custom max entries | `--max <value>` |
213
+ | 4a (haiku) | `--model haiku` (resolved internally by selftune) |
214
+ | 4b (sonnet) | `--model sonnet` |
215
+ | 4c (opus) | `--model opus` |
216
+ | Custom output path | `--out <path>` |
218
217
 
219
218
  Show a confirmation summary to the user:
220
219
 
@@ -238,6 +237,7 @@ eval generation is useful.
238
237
  ### 2. Generate the Eval Set
239
238
 
240
239
  Run with `--skill <name>`. Parse the JSON output and review for:
240
+
241
241
  - Balance between positive and negative entries
242
242
  - Coverage of all three positive invocation types (explicit, implicit, contextual)
243
243
  - Reasonable negative examples (keyword overlap but wrong intent)
@@ -245,6 +245,7 @@ Run with `--skill <name>`. Parse the JSON output and review for:
245
245
  ### 3. Review Invocation Type Distribution
246
246
 
247
247
  A healthy eval set has:
248
+
248
249
  - Some explicit queries (easy baseline)
249
250
  - Many implicit queries (natural usage)
250
251
  - Several contextual queries (real-world usage)