selftune 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/.claude/agents/diagnosis-analyst.md +20 -10
  2. package/.claude/agents/evolution-reviewer.md +14 -1
  3. package/.claude/agents/integration-guide.md +18 -6
  4. package/.claude/agents/pattern-analyst.md +18 -5
  5. package/CHANGELOG.md +12 -4
  6. package/README.md +43 -35
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/cli/selftune/badge/badge-data.ts +1 -1
  20. package/cli/selftune/badge/badge.ts +4 -8
  21. package/cli/selftune/canonical-export.ts +183 -0
  22. package/cli/selftune/constants.ts +28 -0
  23. package/cli/selftune/contribute/contribute.ts +1 -1
  24. package/cli/selftune/cron/setup.ts +17 -17
  25. package/cli/selftune/dashboard-contract.ts +202 -0
  26. package/cli/selftune/dashboard-server.ts +653 -186
  27. package/cli/selftune/dashboard.ts +41 -176
  28. package/cli/selftune/eval/baseline.ts +5 -4
  29. package/cli/selftune/eval/composability-v2.ts +273 -0
  30. package/cli/selftune/eval/hooks-to-evals.ts +34 -15
  31. package/cli/selftune/eval/unit-test-cli.ts +1 -1
  32. package/cli/selftune/evolution/evidence.ts +26 -0
  33. package/cli/selftune/evolution/evolve-body.ts +105 -11
  34. package/cli/selftune/evolution/evolve.ts +371 -25
  35. package/cli/selftune/evolution/extract-patterns.ts +87 -29
  36. package/cli/selftune/evolution/rollback.ts +2 -2
  37. package/cli/selftune/grading/auto-grade.ts +200 -0
  38. package/cli/selftune/grading/grade-session.ts +448 -97
  39. package/cli/selftune/grading/results.ts +42 -0
  40. package/cli/selftune/hooks/prompt-log.ts +172 -2
  41. package/cli/selftune/hooks/session-stop.ts +123 -3
  42. package/cli/selftune/hooks/skill-eval.ts +119 -3
  43. package/cli/selftune/index.ts +395 -116
  44. package/cli/selftune/ingestors/claude-replay.ts +140 -114
  45. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  46. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  47. package/cli/selftune/ingestors/openclaw-ingest.ts +141 -8
  48. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  49. package/cli/selftune/init.ts +227 -14
  50. package/cli/selftune/last.ts +14 -5
  51. package/cli/selftune/localdb/db.ts +63 -0
  52. package/cli/selftune/localdb/materialize.ts +428 -0
  53. package/cli/selftune/localdb/queries.ts +376 -0
  54. package/cli/selftune/localdb/schema.ts +204 -0
  55. package/cli/selftune/monitoring/watch.ts +66 -15
  56. package/cli/selftune/normalization.ts +682 -0
  57. package/cli/selftune/observability.ts +19 -44
  58. package/cli/selftune/orchestrate.ts +1073 -0
  59. package/cli/selftune/quickstart.ts +203 -0
  60. package/cli/selftune/repair/skill-usage.ts +576 -0
  61. package/cli/selftune/schedule.ts +561 -0
  62. package/cli/selftune/status.ts +48 -26
  63. package/cli/selftune/sync.ts +627 -0
  64. package/cli/selftune/types.ts +148 -0
  65. package/cli/selftune/utils/canonical-log.ts +45 -0
  66. package/cli/selftune/utils/hooks.ts +41 -0
  67. package/cli/selftune/utils/html.ts +27 -0
  68. package/cli/selftune/utils/llm-call.ts +78 -20
  69. package/cli/selftune/utils/math.ts +10 -0
  70. package/cli/selftune/utils/query-filter.ts +139 -0
  71. package/cli/selftune/utils/skill-discovery.ts +340 -0
  72. package/cli/selftune/utils/skill-log.ts +68 -0
  73. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  74. package/cli/selftune/utils/transcript.ts +272 -26
  75. package/cli/selftune/workflows/discover.ts +254 -0
  76. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  77. package/cli/selftune/workflows/workflows.ts +188 -0
  78. package/package.json +21 -8
  79. package/packages/telemetry-contract/README.md +11 -0
  80. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  81. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  82. package/packages/telemetry-contract/index.ts +1 -0
  83. package/packages/telemetry-contract/package.json +19 -0
  84. package/packages/telemetry-contract/src/index.ts +2 -0
  85. package/packages/telemetry-contract/src/types.ts +163 -0
  86. package/packages/telemetry-contract/src/validators.ts +109 -0
  87. package/skill/SKILL.md +84 -53
  88. package/skill/Workflows/AutoActivation.md +17 -16
  89. package/skill/Workflows/Badge.md +6 -0
  90. package/skill/Workflows/Baseline.md +46 -23
  91. package/skill/Workflows/Composability.md +12 -5
  92. package/skill/Workflows/Contribute.md +17 -14
  93. package/skill/Workflows/Cron.md +56 -79
  94. package/skill/Workflows/Dashboard.md +45 -34
  95. package/skill/Workflows/Doctor.md +30 -17
  96. package/skill/Workflows/Evals.md +64 -40
  97. package/skill/Workflows/EvolutionMemory.md +2 -0
  98. package/skill/Workflows/Evolve.md +102 -47
  99. package/skill/Workflows/EvolveBody.md +6 -6
  100. package/skill/Workflows/Grade.md +36 -31
  101. package/skill/Workflows/ImportSkillsBench.md +11 -5
  102. package/skill/Workflows/Ingest.md +43 -36
  103. package/skill/Workflows/Initialize.md +44 -30
  104. package/skill/Workflows/Orchestrate.md +139 -0
  105. package/skill/Workflows/Replay.md +39 -18
  106. package/skill/Workflows/Rollback.md +3 -3
  107. package/skill/Workflows/Schedule.md +61 -0
  108. package/skill/Workflows/Sync.md +88 -0
  109. package/skill/Workflows/UnitTest.md +34 -22
  110. package/skill/Workflows/Watch.md +14 -4
  111. package/skill/Workflows/Workflows.md +129 -0
  112. package/skill/assets/activation-rules-default.json +26 -0
  113. package/skill/assets/multi-skill-settings.json +63 -0
  114. package/skill/assets/single-skill-settings.json +57 -0
  115. package/skill/references/invocation-taxonomy.md +2 -2
  116. package/skill/references/logs.md +164 -2
  117. package/skill/references/setup-patterns.md +65 -0
  118. package/skill/references/version-history.md +40 -0
  119. package/skill/settings_snippet.json +1 -1
  120. package/templates/multi-skill-settings.json +7 -7
  121. package/templates/single-skill-settings.json +6 -6
  122. package/dashboard/index.html +0 -1680
@@ -4,10 +4,18 @@ Measure whether a skill adds value over a no-skill baseline. Runs trigger
4
4
  checks with and without the skill description to compute lift — the
5
5
  improvement in pass rate that the skill provides.
6
6
 
7
+ ## When to Invoke
8
+
9
+ Invoke this workflow when the user requests any of the following:
10
+ - Measuring whether a skill adds value or is worth keeping
11
+ - Comparing skill performance against a no-skill baseline
12
+ - Deciding whether to evolve or rework a skill
13
+ - Any request containing "baseline", "does this skill help", or "skill value"
14
+
7
15
  ## Default Command
8
16
 
9
17
  ```bash
10
- selftune baseline --skill <name> --skill-path <path> [options]
18
+ selftune grade baseline --skill <name> --skill-path <path> [options]
11
19
  ```
12
20
 
13
21
  ## Options
@@ -59,27 +67,34 @@ skipped — the skill needs fundamental rework, not description tweaks.
59
67
 
60
68
  ### 0. Pre-Flight Configuration
61
69
 
62
- Before running baseline measurement, present configuration options to the user.
63
- If the user says "use defaults" or similar, skip to step 1 with recommended defaults.
70
+ Before running baseline measurement, present numbered configuration options to the user inline in your response, then wait for the user's answer before proceeding.
64
71
 
65
- Present these options:
72
+ If the user responds with "use defaults", "just do it", or similar shorthand, skip to step 1 using the recommended defaults.
66
73
 
67
- ```
68
- selftune baseline — Pre-Flight Configuration
74
+ Present the following options inline in your response:
69
75
 
70
- 1. Eval Set Source
71
- a) Auto-generate from logs (recommended if logs exist)
72
- b) Use existing eval set file — provide path
73
- c) Generate synthetic evals first (for new skills with no data)
76
+ 1. **Eval Set Source**
77
+ - a) Auto-generate from logs (recommended if logs exist)
78
+ - b) Use existing eval set file — provide path
79
+ - c) Generate synthetic evals first (for new skills with no data)
74
80
 
75
- 2. Agent CLI
76
- a) Auto-detect (recommended)
77
- b) Specify: claude / codex / opencode
81
+ 2. **Agent CLI**
82
+ - a) Auto-detect (recommended)
83
+ - b) Specify: claude / codex / opencode
78
84
 
79
- Reply with your choices or "use defaults" for recommended settings.
80
- ```
85
+ Ask: "Reply with your choices or 'use defaults' for recommended settings."
86
+
87
+ After the user responds, parse their selections and map each choice to the corresponding CLI flags:
81
88
 
82
- After the user responds, show a confirmation summary:
89
+ | Selection | CLI Flag |
90
+ |-----------|----------|
91
+ | 1a (auto-generate) | _(no flag, default)_ |
92
+ | 1b (existing eval set) | `--eval-set <path>` |
93
+ | 1c (synthetic first) | Run Evals workflow with `--synthetic` first, then use output |
94
+ | 2a (auto-detect) | _(no flag, default)_ |
95
+ | 2b (specify agent) | `--agent <name>` |
96
+
97
+ Show a confirmation summary to the user:
83
98
 
84
99
  ```
85
100
  Configuration Summary:
@@ -89,12 +104,16 @@ Configuration Summary:
89
104
  Proceeding...
90
105
  ```
91
106
 
107
+ Build the CLI command string with all selected flags and continue to step 1.
108
+
92
109
  ### 1. Run Baseline Measurement
93
110
 
94
111
  ```bash
95
- selftune baseline --skill Research --skill-path ~/.claude/skills/Research/SKILL.md
112
+ selftune grade baseline --skill Research --skill-path ~/.claude/skills/Research/SKILL.md
96
113
  ```
97
114
 
115
+ Parse the JSON output and extract `lift` and `adds_value` fields.
116
+
98
117
  ### 2. Interpret Results
99
118
 
100
119
  | Lift | Interpretation | Action |
@@ -104,6 +123,8 @@ selftune baseline --skill Research --skill-path ~/.claude/skills/Research/SKILL.
104
123
  | < 0.05 | Minimal value | Skill may need rework, not just evolution |
105
124
  | < 0 | Negative value | Skill is hurting — investigate or disable |
106
125
 
126
+ Report the interpretation to the user based on the lift value.
127
+
107
128
  ### 3. Use as Evolution Gate
108
129
 
109
130
  Add `--with-baseline` to evolve commands to prevent wasting evolution
@@ -111,11 +132,13 @@ cycles on skills that don't add value.
111
132
 
112
133
  ## Common Patterns
113
134
 
114
- **"Does the Research skill add value?"**
115
- > `selftune baseline --skill Research --skill-path ~/.claude/skills/Research/SKILL.md`
135
+ **User asks whether a skill adds value (e.g., "does the Research skill help?"):**
136
+ Run `selftune grade baseline --skill Research --skill-path ~/.claude/skills/Research/SKILL.md`.
137
+ Parse the JSON output and report the lift value with interpretation.
116
138
 
117
- **"Only evolve if the skill is actually useful"**
118
- > `selftune evolve --skill Research --skill-path /path/SKILL.md --with-baseline`
139
+ **User wants to gate evolution on baseline value:**
140
+ Run `selftune evolve --skill Research --skill-path /path/SKILL.md --with-baseline`.
141
+ This measures baseline lift before deploying and skips evolution if lift is below 5%.
119
142
 
120
- **"Check baseline with a custom eval set"**
121
- > `selftune baseline --skill pptx --skill-path /path/SKILL.md --eval-set evals-pptx.json`
143
+ **User wants to test with a custom eval set:**
144
+ Run `selftune grade baseline --skill pptx --skill-path /path/SKILL.md --eval-set evals-pptx.json`.
@@ -7,7 +7,7 @@ co-occurring than when used alone.
7
7
  ## Default Command
8
8
 
9
9
  ```bash
10
- selftune composability --skill <name> [options]
10
+ selftune eval composability --skill <name> [options]
11
11
  ```
12
12
 
13
13
  ## Options
@@ -65,7 +65,7 @@ The analyzer is a pure function that computes conflict scores from telemetry:
65
65
  ### 1. Run Analysis
66
66
 
67
67
  ```bash
68
- selftune composability --skill Research
68
+ selftune eval composability --skill Research
69
69
  ```
70
70
 
71
71
  ### 2. Interpret Results
@@ -79,19 +79,26 @@ selftune composability --skill Research
79
79
 
80
80
  ### 3. Address Conflicts
81
81
 
82
- For conflict candidates:
82
+ When conflict candidates are identified, present them to the user with recommended actions:
83
83
  - Check for trigger keyword overlap between the skills
84
84
  - Check if one skill's workflow interferes with the other's
85
85
  - Consider evolving descriptions to reduce false triggers
86
86
  - Use the `pattern-analyst` agent for deeper cross-skill analysis
87
87
 
88
+ ## Subagent Escalation
89
+
90
+ For deep cross-skill analysis beyond what the composability command provides,
91
+ spawn the `pattern-analyst` agent as a subagent. This is useful when conflict
92
+ scores are high (> 0.3) and you need a full resolution plan with trigger
93
+ ownership recommendations.
94
+
88
95
  ## Common Patterns
89
96
 
90
97
  **"Are there conflicts between my skills?"**
91
- > `selftune composability --skill Research`
98
+ > `selftune eval composability --skill Research`
92
99
 
93
100
  **"Check composability for recent sessions only"**
94
- > `selftune composability --skill pptx --window 7`
101
+ > `selftune eval composability --skill pptx --window 7`
95
102
 
96
103
  **"Which skills conflict with Research?"**
97
104
  > Run composability and check the `conflict_candidates` array.
@@ -6,9 +6,9 @@ private data.
6
6
 
7
7
  ## When to Use
8
8
 
9
- - Want to help improve selftune's skill routing
10
- - Sharing anonymized usage patterns with the community
11
- - Contributing eval data for skill evolution
9
+ - The user asks to contribute data, share usage patterns, or help improve selftune
10
+ - The user wants to export anonymized skill observability data
11
+ - The agent needs to submit eval data for community skill evolution
12
12
 
13
13
  ## Default Command
14
14
 
@@ -71,21 +71,24 @@ No raw transcripts, file contents, or identifiable information is included.
71
71
 
72
72
  ## Steps
73
73
 
74
- 1. Run `selftune contribute --preview --skill selftune` to see what would be shared
75
- 2. Review the sanitized output
74
+ 1. Run `selftune contribute --preview --skill selftune` to preview the contribution bundle
75
+ 2. Parse the output and report the sanitized data summary to the user for review
76
76
  3. Run `selftune contribute --skill selftune` to write the bundle
77
- 4. Optionally: `selftune contribute --skill selftune --submit` to create GitHub issue
77
+ 4. If the user wants to submit directly, run `selftune contribute --skill selftune --submit`
78
78
 
79
79
  ## Common Patterns
80
80
 
81
- **"Preview what I'd share"**
82
- > `selftune contribute --preview`
81
+ **User wants to see what would be shared**
82
+ > Run `selftune contribute --preview`. Parse the output and report the
83
+ > sanitized data summary to the user before proceeding.
83
84
 
84
- **"Use aggressive sanitization"**
85
- > `selftune contribute --sanitize aggressive`
85
+ **User requests stronger anonymization**
86
+ > Run `selftune contribute --sanitize aggressive`. This replaces identifiers,
87
+ > quoted strings, and module paths in addition to standard PII scrubbing.
86
88
 
87
- **"Submit directly to GitHub"**
88
- > `selftune contribute --submit`
89
+ **User wants to submit directly**
90
+ > Run `selftune contribute --submit`. This creates a GitHub Issue via `gh`
91
+ > CLI with the bundle inlined or uploaded as a gist.
89
92
 
90
- **"Only contribute recent data"**
91
- > `selftune contribute --since 2026-02-01`
93
+ **User wants to limit to recent data**
94
+ > Run `selftune contribute --since <date>` with the user's specified date.
@@ -1,40 +1,37 @@
1
1
  # selftune Cron Workflow
2
2
 
3
- Manage OpenClaw cron jobs that run the selftune pipeline on a schedule.
4
- Enables fully autonomous skill evolution skills improve while you sleep.
3
+ Set up scheduled automation for the selftune pipeline. Auto-detects the
4
+ platform (system cron, macOS launchd, Linux systemd) or can target
5
+ OpenClaw-specific cron integration.
5
6
 
6
7
  ## When to Use
7
8
 
8
- - Setting up selftune automation for the first time on OpenClaw
9
+ - Setting up selftune automation for the first time
9
10
  - Checking which cron jobs are registered
10
11
  - Removing selftune cron jobs (cleanup or reconfiguration)
11
12
  - Enabling the autonomous observe-grade-evolve-deploy loop
12
13
 
13
- ## Prerequisites
14
+ ## Commands
14
15
 
15
- OpenClaw must be installed and in your PATH. The setup command will check
16
- for this and exit with instructions if OpenClaw is not found.
17
-
18
- ```bash
19
- which openclaw # Must resolve
20
- ```
16
+ ### `selftune cron setup`
21
17
 
22
- ## Default Command
18
+ Auto-detect the current platform and install scheduled jobs.
23
19
 
24
- ```bash
25
- selftune cron setup
26
- ```
20
+ | Flag | Description | Default |
21
+ |------|-------------|---------|
22
+ | `--platform <name>` | Force a specific platform (`openclaw`, `cron`, `launchd`, `systemd`) | Auto-detect |
23
+ | `--dry-run` | Preview without installing | Off |
24
+ | `--tz <timezone>` | IANA timezone for job schedules (OpenClaw only) | Flag > `TZ` env > system timezone |
27
25
 
28
- ## Subcommands
26
+ Platform auto-detection: macOS → launchd, Linux → systemd, other → cron.
29
27
 
30
- ### `selftune cron setup`
28
+ ### `selftune cron setup --platform openclaw`
31
29
 
32
- Register the default selftune cron jobs with OpenClaw.
30
+ Register selftune cron jobs with OpenClaw. Requires OpenClaw installed and on PATH.
33
31
 
34
- | Flag | Description | Default |
35
- |------|-------------|---------|
36
- | `--dry-run` | Preview commands without registering jobs | Off |
37
- | `--tz <timezone>` | IANA timezone for job schedules | Flag > `TZ` env > system timezone |
32
+ ```bash
33
+ which openclaw # Must resolve
34
+ ```
38
35
 
39
36
  ### `selftune cron list`
40
37
 
@@ -50,106 +47,86 @@ Remove all selftune cron jobs from OpenClaw.
50
47
  |------|-------------|---------|
51
48
  | `--dry-run` | Preview which jobs would be removed without deleting | Off |
52
49
 
50
+ ## Aliases
51
+
52
+ `selftune schedule` is an alias for `selftune cron`. Existing `selftune schedule`
53
+ invocations with flags (e.g. `selftune schedule --platform launchd`) continue to work.
54
+
53
55
  ## Default Job Schedule
54
56
 
55
- Setup registers these four jobs:
57
+ Setup registers these jobs:
56
58
 
57
59
  | Name | Cron Expression | Schedule | Description |
58
60
  |------|----------------|----------|-------------|
59
- | `selftune-ingest` | `*/30 * * * *` | Every 30 minutes | Ingest new sessions from OpenClaw transcripts |
61
+ | `selftune-sync` | `*/30 * * * *` | Every 30 minutes | Sync source-truth telemetry |
60
62
  | `selftune-status` | `0 8 * * *` | Daily at 8am | Health check — report skills with pass rate below 80% |
61
- | `selftune-evolve` | `0 3 * * 0` | Weekly at 3am Sunday | Full evolution pipeline for undertriggering skills |
62
- | `selftune-watch` | `0 */6 * * *` | Every 6 hours | Monitor recently evolved skills for regressions |
63
+ | `selftune-orchestrate` | `0 */6 * * *` | Every 6 hours | Full autonomous loop: sync candidate selection → evolve → watch |
63
64
 
64
65
  All jobs run in **isolated session** mode — each execution gets a clean
65
66
  session with no context accumulation from previous runs.
66
67
 
67
68
  ## Output
68
69
 
69
- - **setup:** Registers jobs via `openclaw cron add` and confirms each registration
70
+ - **setup:** Installs platform-appropriate schedule artifacts and activates them
71
+ - **setup --platform openclaw:** Registers jobs via `openclaw cron add` and confirms each
70
72
  - **list:** Prints a formatted table of registered selftune cron jobs (name, schedule, description)
71
73
  - **remove:** Deletes each selftune cron job via `openclaw cron remove` and confirms
72
74
 
73
- Jobs persist at `~/.openclaw/cron/jobs.json` and survive OpenClaw restarts.
74
-
75
75
  ## Steps
76
76
 
77
- 1. Run `selftune cron setup --dry-run` to preview what would be registered
78
- 2. Run `selftune cron setup` to register the default jobs
77
+ 1. Run `selftune cron setup --dry-run` to preview what would be installed
78
+ 2. Run `selftune cron setup` to install scheduled jobs for your platform
79
+ 3. Verify with `selftune status` after the first scheduled run fires
80
+
81
+ For OpenClaw specifically:
82
+ 1. Run `selftune cron setup --platform openclaw --dry-run` to preview
83
+ 2. Run `selftune cron setup --platform openclaw` to register jobs
79
84
  3. Run `selftune cron list` to verify jobs are registered
80
- 4. Wait for the first cron cycle to fire (ingest runs every 30 minutes)
81
- 5. Check results with `selftune status` after the first daily health check
82
85
 
83
86
  ## The Autonomous Evolution Loop
84
87
 
85
- When cron jobs are active, selftune operates as a self-correcting system:
88
+ When scheduled jobs are active, selftune operates as a self-correcting system.
89
+ The OS scheduler calls the CLI binary directly — no agent session is needed,
90
+ no token cost for routine runs.
86
91
 
87
- ```
88
- Cron fires (isolated session)
92
+ ```text
93
+ OS scheduler fires (cron/launchd/systemd)
89
94
  |
90
95
  v
91
- Agent runs selftune pipeline (ingest -> status -> evolve -> watch)
96
+ selftune orchestrate --max-skills 3 (CLI runs directly, no agent)
92
97
  |
93
98
  v
94
- Improved SKILL.md written to disk
95
- |
96
- v
97
- OpenClaw file watcher detects change (250ms debounce)
99
+ sync candidate selection → evolve → validate → deploy → watch
98
100
  |
99
101
  v
100
- Skill snapshot version bumped — next agent turn uses updated description
102
+ Improved SKILL.md written to disk
101
103
  |
102
104
  v
103
- Better triggering in real-time, no restart needed
105
+ Next interactive agent session uses updated description
104
106
  ```
105
107
 
106
- The four jobs form a continuous loop:
107
- - **ingest** captures raw session data every 30 minutes
108
- - **status** identifies undertriggering skills daily
109
- - **evolve** proposes and deploys improvements weekly
110
- - **watch** monitors for regressions every 6 hours and auto-rolls back if needed
111
-
112
- Skills improve and take effect within seconds of the cron job completing.
113
- No deployment step, no restart, no manual intervention.
108
+ This is distinct from interactive mode where the user says "improve my skills"
109
+ and the agent runs orchestrate. Automated mode is for routine maintenance;
110
+ interactive mode is for user-directed improvements.
114
111
 
115
112
  ## Safety Controls
116
113
 
117
114
  | Control | How It Works |
118
115
  |---------|-------------|
119
- | Dry-run first | `selftune cron setup --dry-run` previews commands before registering |
116
+ | Dry-run first | `selftune cron setup --dry-run` previews commands before installing |
120
117
  | Regression threshold | Evolution only deploys if improvement exceeds 5% on existing triggers |
121
118
  | Auto-rollback | `selftune watch` automatically rolls back if pass rate drops below baseline minus threshold |
122
119
  | Audit trail | Every evolution recorded in `evolution_audit_log.jsonl` with full history |
123
120
  | SKILL.md backup | `.bak` file created before every deploy — primary rollback path exists via .bak; fallback depends on audit metadata integrity |
124
- | Isolated sessions | Each cron run gets a clean session (no context pollution between runs) |
125
- | Human override | `selftune rollback --skill <name> --skill-path <path>` available anytime to manually revert |
121
+ | Human override | `selftune evolve rollback --skill <name> --skill-path <path>` available anytime to manually revert |
126
122
  | Pin descriptions | Config flag to freeze specific skills and prevent evolution on sensitive skills |
127
123
 
128
124
  ## Common Patterns
129
125
 
130
- **"Set up autonomous skill evolution"**
131
- > Run `selftune cron setup`. The four default jobs handle ingestion,
132
- > health checks, evolution, and regression monitoring.
133
-
134
- **"Preview before registering"**
135
- > Run `selftune cron setup --dry-run` to see exactly what commands
136
- > would be executed without registering anything.
137
-
138
- **"Use a specific timezone"**
139
- > Run `selftune cron setup --tz America/New_York`. Without the flag,
140
- > timezone resolution is: `--tz` flag > `TZ` environment variable > system timezone.
141
-
142
- **"What jobs are registered?"**
143
- > Run `selftune cron list`. Shows a table of all selftune cron jobs
144
- > with their schedules and descriptions.
145
-
146
- **"Remove all cron automation"**
147
- > Run `selftune cron remove`. Preview first with `selftune cron remove --dry-run`.
148
-
149
- **"A skill regressed after cron evolution"**
150
- > The watch job should catch this automatically. If not, run
151
- > `selftune rollback --skill <name>` manually. See `Workflows/Rollback.md`.
152
-
153
- **"How do I know the cron loop is working?"**
154
- > Run `selftune status` after the first daily health check fires (8am).
155
- > Check `evolution_audit_log.jsonl` for entries with recent timestamps.
126
+ - **User wants autonomous skill evolution** -- Run `selftune cron setup`. Auto-detects the platform and installs appropriate scheduled jobs.
127
+ - **User specifies OpenClaw** -- Run `selftune cron setup --platform openclaw`.
128
+ - **User wants to preview before installing** -- Run `selftune cron setup --dry-run` to show exactly what would be installed without changing anything.
129
+ - **User needs a specific timezone (OpenClaw)** -- Run `selftune cron setup --platform openclaw --tz America/New_York`.
130
+ - **User asks what jobs are registered** -- Run `selftune cron list`. Shows a table of all selftune cron jobs with their schedules and descriptions.
131
+ - **User wants to remove cron automation** -- Run `selftune cron remove`. Preview first with `selftune cron remove --dry-run`.
132
+ - **Skill regressed after cron evolution** -- The watch job should catch this automatically. If not, run `selftune evolve rollback --skill <name> --skill-path <path>` manually. See `Workflows/Rollback.md`.
@@ -2,7 +2,7 @@
2
2
 
3
3
  Visual dashboard for selftune telemetry, skill performance, evolution
4
4
  audit, and monitoring data. Supports static HTML export, file output,
5
- and a live server with SSE auto-refresh and action buttons.
5
+ and a live server with polling-based auto-refresh and action buttons.
6
6
 
7
7
  ## Default Command
8
8
 
@@ -53,9 +53,10 @@ selftune dashboard --out /tmp/report.html
53
53
 
54
54
  ### Live Server
55
55
 
56
- Starts a Bun HTTP server with real-time data updates via Server-Sent
57
- Events (SSE). The dashboard auto-refreshes every 5 seconds and provides
58
- action buttons to trigger selftune commands.
56
+ Starts a Bun HTTP server with a React SPA dashboard. The SPA uses
57
+ TanStack Query polling to auto-refresh data (overview every 15s,
58
+ orchestrate runs every 30s, doctor every 30s) and provides action
59
+ buttons to trigger selftune commands.
59
60
 
60
61
  ```bash
61
62
  selftune dashboard --serve
@@ -73,19 +74,28 @@ override.
73
74
 
74
75
  | Method | Path | Description |
75
76
  |--------|------|-------------|
76
- | `GET` | `/` | Serve dashboard HTML with embedded data and live mode flag |
77
- | `GET` | `/api/data` | JSON endpoint returning current telemetry data |
78
- | `GET` | `/api/events` | SSE stream sending data updates every 5 seconds |
77
+ | `GET` | `/` | Serve dashboard SPA shell |
78
+ | `GET` | `/api/v2/overview` | SQLite-backed overview payload |
79
+ | `GET` | `/api/v2/skills/:name` | SQLite-backed per-skill report |
80
+ | `GET` | `/api/v2/orchestrate-runs` | Recent orchestrate run reports |
81
+ | `GET` | `/api/v2/doctor` | System health diagnostics (config, logs, hooks, evolution) |
82
+ | `GET` | `/api/health` | Dashboard server health probe |
79
83
  | `POST` | `/api/actions/watch` | Trigger `selftune watch` for a skill |
80
84
  | `POST` | `/api/actions/evolve` | Trigger `selftune evolve` for a skill |
81
- | `POST` | `/api/actions/rollback` | Trigger `selftune rollback` for a skill |
85
+ | `POST` | `/api/actions/rollback` | Trigger `selftune evolve rollback` for a skill |
82
86
 
83
- ### SSE Auto-Refresh
87
+ ### Auto-Refresh
84
88
 
85
- The `/api/events` endpoint opens an SSE connection that pushes fresh
86
- data every 5 seconds. The dashboard client listens for `data` events
87
- and re-renders automatically. When `window.__SELFTUNE_LIVE__` is set
88
- (injected by the live server), the dashboard enables SSE polling.
89
+ The dashboard SPA uses TanStack Query with `refetchInterval` to poll
90
+ the v2 API endpoints automatically:
91
+
92
+ - `/api/v2/overview` every 15 seconds
93
+ - `/api/v2/orchestrate-runs` — every 30 seconds
94
+ - `/api/v2/doctor` — every 30 seconds
95
+ - `/api/v2/skills/:name` — every 30 seconds (when viewing a skill)
96
+
97
+ Data also refreshes on window focus. No SSE or websocket connection
98
+ is required.
89
99
 
90
100
  ### Action Endpoints
91
101
 
@@ -128,8 +138,8 @@ On failure, `success` is `false` and `error` contains the error message.
128
138
  The live server auto-opens the dashboard URL in the default browser on
129
139
  macOS (`open`) and Linux (`xdg-open`).
130
140
 
131
- Graceful shutdown on `SIGINT` (Ctrl+C) and `SIGTERM`: closes all SSE
132
- client connections and stops the server.
141
+ Graceful shutdown on `SIGINT` (Ctrl+C) and `SIGTERM`: closes the SQLite
142
+ database and stops the server.
133
143
 
134
144
  ## Data Contents
135
145
 
@@ -173,31 +183,32 @@ selftune dashboard --serve
173
183
  ### 3. Interact with Dashboard
174
184
 
175
185
  - **Static mode**: View the snapshot. Re-run to refresh.
176
- - **Live mode**: Data refreshes automatically every 5 seconds. Use
177
- action buttons to trigger watch, evolve, or rollback directly from
186
+ - **Live mode**: Data refreshes automatically via polling (15-30s intervals).
187
+ Use action buttons to trigger watch, evolve, or rollback directly from
178
188
  the dashboard.
179
189
 
180
190
  ## Common Patterns
181
191
 
182
- **"Show me the dashboard"**
183
- > Run `selftune dashboard`. Opens a browser with current data.
192
+ **User wants to see skill performance visually**
193
+ > Run `selftune dashboard`. This opens a browser with a point-in-time snapshot.
194
+ > Report to the user that the dashboard is open.
184
195
 
185
- **"I want live updates"**
186
- > Run `selftune dashboard --serve`. The SSE stream refreshes every 5
187
- > seconds without manual intervention.
196
+ **User wants live monitoring**
197
+ > Run `selftune dashboard --serve`. Inform the user that data refreshes
198
+ > automatically every 15-30 seconds via polling.
188
199
 
189
- **"Export a report"**
190
- > Use `selftune dashboard --out report.html` to save a self-contained
191
- > HTML file. Share it -- no server needed, all data is embedded.
200
+ **User wants a shareable report**
201
+ > Run `selftune dashboard --out report.html`. Report the file path to the
202
+ > user. The HTML file is self-contained with all data embedded.
192
203
 
193
- **"The dashboard shows no data"**
194
- > No log files found. Run some sessions first so hooks generate
195
- > telemetry. Check `selftune doctor` to verify hooks are installed.
204
+ **Dashboard shows no data**
205
+ > Run `selftune doctor` to verify hooks are installed. If hooks are missing,
206
+ > route to the Initialize workflow. If hooks are present but no sessions
207
+ > have run, inform the user that sessions must generate telemetry first.
196
208
 
197
- **"Use a different port"**
198
- > `selftune dashboard --serve --port 8080`. Port must be 1-65535.
209
+ **User wants a different port**
210
+ > Run `selftune dashboard --serve --port <port>`. Port must be 1-65535.
199
211
 
200
- **"Trigger actions from the dashboard"**
201
- > In live server mode, the dashboard provides buttons to trigger watch,
202
- > evolve, and rollback for each skill. These call the action endpoints
203
- > which spawn selftune subprocesses.
212
+ **User wants to trigger actions from the dashboard**
213
+ > Run `selftune dashboard --serve` for live mode. The dashboard provides
214
+ > action buttons for watch, evolve, and rollback per skill via POST endpoints.
@@ -105,8 +105,8 @@ Doctor validates these areas:
105
105
 
106
106
  | Check | What it validates |
107
107
  |-------|-------------------|
108
- | Agent directory exists | `.claude/agents/` directory is present |
109
- | Agent files present | Expected agent files exist: `diagnosis-analyst.md`, `pattern-analyst.md`, `evolution-reviewer.md`, `integration-guide.md` |
108
+ | Optional agent directory exists | If `.claude/agents/` is present, it is readable |
109
+ | Optional agent files present | If the repo bundles helper agents, the expected files are present |
110
110
 
111
111
  ### Dashboard Checks (optional)
112
112
 
@@ -147,28 +147,41 @@ For each failed check, take the appropriate action:
147
147
  | Evolution guard missing | Add `hooks/evolution-guard.ts` to `PreToolUse` in settings. |
148
148
  | Memory directory missing | Run `mkdir -p ~/.selftune/memory`. |
149
149
  | Memory files invalid | Delete and let the memory writer recreate them on next evolve/watch. |
150
- | Activation rules missing | Copy `templates/activation-rules-default.json` to `~/.selftune/activation-rules.json`. |
150
+ | Activation rules missing | Copy `assets/activation-rules-default.json` to `~/.selftune/activation-rules.json`. |
151
151
  | Activation rules invalid | Validate JSON syntax. Re-copy from template if corrupted. |
152
- | Agent files missing | Copy agents from the selftune repo `.claude/agents/` directory. |
152
+ | Agent files missing | If your repo uses optional helper agents, restore them in `.claude/agents/`. Otherwise ignore this advisory. |
153
153
  | Audit log invalid | Remove corrupted entries. Future operations will append clean entries. |
154
154
 
155
155
  ### 4. Re-run Doctor
156
156
 
157
157
  After fixes, run doctor again to verify all checks pass.
158
158
 
159
- ## Common Patterns
160
-
161
- **"Something seems broken"**
162
- > Run doctor first. Report any failing checks with their detail messages.
159
+ ## Subagent Escalation
163
160
 
164
- **"Are my hooks working?"**
165
- > Doctor checks hook installation. If hooks pass but no data appears,
166
- > verify the hook script paths point to actual files.
161
+ If doctor reveals persistent issues with a specific skill — especially
162
+ recurring failures that basic fixes do not resolve spawn the
163
+ `diagnosis-analyst` agent as a subagent for root cause analysis.
167
164
 
168
- **"No telemetry available"**
169
- > Doctor will report missing log files. Install hooks using the
170
- > `settings_snippet.json` in the skill directory, then run a session.
165
+ ## Common Patterns
171
166
 
172
- **"Check selftune health"**
173
- > Run doctor and report the summary. A clean bill of health means
174
- > all checks pass and selftune is ready to grade/evolve/watch.
167
+ **User reports something seems broken**
168
+ > Run `selftune doctor`. Parse the JSON output for failed checks. Report
169
+ > each failure's `name` and `detail` to the user with the recommended fix.
170
+
171
+ **User asks if hooks are working**
172
+ > Run `selftune doctor`. Parse `.checks[]` for hook-related entries. If
173
+ > hooks pass but no data appears, verify hook script paths in
174
+ > `~/.claude/settings.json` point to actual files.
175
+
176
+ **No telemetry data available**
177
+ > Run `selftune doctor`. Route fixes by platform:
178
+ > - **Claude Code** — route to the Initialize workflow to install hooks
179
+ > - **Codex** — run `selftune ingest codex` or `selftune ingest wrap-codex`
180
+ > - **OpenCode** — run `selftune ingest opencode`
181
+ > - **OpenClaw** — run `selftune ingest openclaw`
182
+ > At least one session must complete after setup to generate telemetry.
183
+
184
+ **User asks to check selftune health**
185
+ > Run `selftune doctor`. Parse `.healthy` and `.summary`. If `healthy: true`,
186
+ > report that selftune is fully operational. If false, report failed checks
187
+ > and recommended fixes.