selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -0,0 +1,144 @@
1
+ # selftune Baseline Workflow
2
+
3
+ Measure whether a skill adds value over a no-skill baseline. Runs trigger
4
+ checks with and without the skill description to compute lift — the
5
+ improvement in pass rate that the skill provides.
6
+
7
+ ## When to Invoke
8
+
9
+ Invoke this workflow when the user requests any of the following:
10
+ - Measuring whether a skill adds value or is worth keeping
11
+ - Comparing skill performance against a no-skill baseline
12
+ - Deciding whether to evolve or rework a skill
13
+ - Any request containing "baseline", "does this skill help", or "skill value"
14
+
15
+ ## Default Command
16
+
17
+ ```bash
18
+ selftune grade baseline --skill <name> --skill-path <path> [options]
19
+ ```
20
+
21
+ ## Options
22
+
23
+ | Flag | Description | Default |
24
+ |------|-------------|---------|
25
+ | `--skill <name>` | Skill name | Required |
26
+ | `--skill-path <path>` | Path to the skill's SKILL.md | Required |
27
+ | `--eval-set <path>` | Pre-built eval set JSON | Auto-generated from logs |
28
+ | `--agent <name>` | Agent CLI to use | Auto-detected |
29
+
30
+ ## Output Format
31
+
32
+ ```json
33
+ {
34
+ "skill_name": "Research",
35
+ "eval_set_size": 25,
36
+ "baseline_pass_rate": 0.32,
37
+ "with_skill_pass_rate": 0.88,
38
+ "lift": 0.56,
39
+ "adds_value": true,
40
+ "measured_at": "2026-03-04T12:00:00.000Z"
41
+ }
42
+ ```
43
+
44
+ ## How It Works
45
+
46
+ 1. Loads the eval set (from `--eval-set` or auto-generated from logs)
47
+ 2. Reads the skill's current description from SKILL.md
48
+ 3. Runs trigger checks against an **empty description** (no-skill baseline)
49
+ 4. Runs trigger checks against the **actual description** (with-skill)
50
+ 5. Computes pass rates for both conditions
51
+ 6. Calculates `lift = with_skill_pass_rate - baseline_pass_rate`
52
+ 7. Sets `adds_value = lift >= 0.05`
53
+
54
+ ## Integration with Evolve
55
+
56
+ The `selftune evolve` command supports a `--with-baseline` flag:
57
+
58
+ ```bash
59
+ selftune evolve --skill Research --skill-path /path/SKILL.md --with-baseline
60
+ ```
61
+
62
+ When enabled, the evolve command measures baseline lift before deploying.
63
+ If the skill doesn't add at least 5% lift over no-skill, the evolution is
64
+ skipped — the skill needs fundamental rework, not description tweaks.
65
+
66
+ ## Steps
67
+
68
+ ### 0. Pre-Flight Configuration
69
+
70
+ Before running baseline measurement, present numbered configuration options to the user inline in your response, then wait for the user's answer before proceeding.
71
+
72
+ If the user responds with "use defaults", "just do it", or similar shorthand, skip to step 1 using the recommended defaults.
73
+
74
+ Present the following options inline in your response:
75
+
76
+ 1. **Eval Set Source**
77
+ - a) Auto-generate from logs (recommended if logs exist)
78
+ - b) Use existing eval set file — provide path
79
+ - c) Generate synthetic evals first (for new skills with no data)
80
+
81
+ 2. **Agent CLI**
82
+ - a) Auto-detect (recommended)
83
+ - b) Specify: claude / codex / opencode
84
+
85
+ Ask: "Reply with your choices or 'use defaults' for recommended settings."
86
+
87
+ After the user responds, parse their selections and map each choice to the corresponding CLI flags:
88
+
89
+ | Selection | CLI Flag |
90
+ |-----------|----------|
91
+ | 1a (auto-generate) | _(no flag, default)_ |
92
+ | 1b (existing eval set) | `--eval-set <path>` |
93
+ | 1c (synthetic first) | Run Evals workflow with `--synthetic` first, then use output |
94
+ | 2a (auto-detect) | _(no flag, default)_ |
95
+ | 2b (specify agent) | `--agent <name>` |
96
+
97
+ Show a confirmation summary to the user:
98
+
99
+ ```
100
+ Configuration Summary:
101
+ Eval source: auto-generate from logs
102
+ Agent: auto-detect
103
+
104
+ Proceeding...
105
+ ```
106
+
107
+ Build the CLI command string with all selected flags and continue to step 1.
108
+
109
+ ### 1. Run Baseline Measurement
110
+
111
+ ```bash
112
+ selftune grade baseline --skill Research --skill-path ~/.claude/skills/Research/SKILL.md
113
+ ```
114
+
115
+ Parse the JSON output and extract `lift` and `adds_value` fields.
116
+
117
+ ### 2. Interpret Results
118
+
119
+ | Lift | Interpretation | Action |
120
+ |------|---------------|--------|
121
+ | >= 0.20 | Strong value | Skill is working well |
122
+ | 0.05–0.20 | Moderate value | Consider evolving to improve |
123
+ | < 0.05 | Minimal value | Skill may need rework, not just evolution |
124
+ | < 0 | Negative value | Skill is hurting — investigate or disable |
125
+
126
+ Report the interpretation to the user based on the lift value.
127
+
128
+ ### 3. Use as Evolution Gate
129
+
130
+ Add `--with-baseline` to evolve commands to prevent wasting evolution
131
+ cycles on skills that don't add value.
132
+
133
+ ## Common Patterns
134
+
135
+ **User asks whether a skill adds value (e.g., "does the Research skill help?"):**
136
+ Run `selftune grade baseline --skill Research --skill-path ~/.claude/skills/Research/SKILL.md`.
137
+ Parse the JSON output and report the lift value with interpretation.
138
+
139
+ **User wants to gate evolution on baseline value:**
140
+ Run `selftune evolve --skill Research --skill-path /path/SKILL.md --with-baseline`.
141
+ This measures baseline lift before deploying and skips evolution if lift is below 5%.
142
+
143
+ **User wants to test with a custom eval set:**
144
+ Run `selftune grade baseline --skill pptx --skill-path /path/SKILL.md --eval-set evals-pptx.json`.
@@ -0,0 +1,107 @@
1
+ # selftune Composability Workflow
2
+
3
+ Analyze how skills interact when triggered together in the same session.
4
+ Detects conflict candidates — skill pairs that produce more errors when
5
+ co-occurring than when used alone.
6
+
7
+ ## Default Command
8
+
9
+ ```bash
10
+ selftune eval composability --skill <name> [options]
11
+ ```
12
+
13
+ ## Options
14
+
15
+ | Flag | Description | Default |
16
+ |------|-------------|---------|
17
+ | `--skill <name>` | Skill to analyze | Required |
18
+ | `--window <n>` | Only analyze sessions from last N days | All sessions |
19
+ | `--telemetry-log <path>` | Path to telemetry log | `~/.claude/session_telemetry_log.jsonl` |
20
+
21
+ ## Output Format
22
+
23
+ ```json
24
+ {
25
+ "skill_name": "Research",
26
+ "analyzed_sessions": 150,
27
+ "co_occurring_skills": [
28
+ {
29
+ "skill_a": "Research",
30
+ "skill_b": "Browser",
31
+ "co_occurrence_count": 42,
32
+ "conflict_score": 0.12,
33
+ "avg_errors_together": 1.5,
34
+ "avg_errors_alone": 1.3
35
+ }
36
+ ],
37
+ "conflict_candidates": [
38
+ {
39
+ "skill_a": "Research",
40
+ "skill_b": "Content",
41
+ "co_occurrence_count": 15,
42
+ "conflict_score": 0.45,
43
+ "avg_errors_together": 3.2,
44
+ "avg_errors_alone": 1.1
45
+ }
46
+ ],
47
+ "generated_at": "2026-03-04T12:00:00.000Z"
48
+ }
49
+ ```
50
+
51
+ ## How It Works
52
+
53
+ The analyzer is a pure function that computes conflict scores from telemetry:
54
+
55
+ 1. Filters sessions where `skills_triggered` includes the target skill
56
+ 2. For each co-occurring skill, computes:
57
+ - Average errors when both skills are triggered together
58
+ - Average errors when each skill is triggered alone
59
+ - `conflict_score = clamp((errors_together - errors_alone) / (errors_alone + 1), 0, 1)`
60
+ 3. Pairs with `conflict_score > 0.3` are flagged as conflict candidates
61
+ 4. Results sorted by co-occurrence count (most common first)
62
+
63
+ ## Steps
64
+
65
+ ### 1. Run Analysis
66
+
67
+ ```bash
68
+ selftune eval composability --skill Research
69
+ ```
70
+
71
+ ### 2. Interpret Results
72
+
73
+ | Conflict Score | Interpretation |
74
+ |---------------|---------------|
75
+ | 0.0–0.1 | No conflict — skills work well together |
76
+ | 0.1–0.3 | Minor friction — monitor but no action needed |
77
+ | 0.3–0.6 | Moderate conflict — investigate trigger overlap |
78
+ | 0.6–1.0 | Severe conflict — skills likely interfere with each other |
79
+
80
+ ### 3. Address Conflicts
81
+
82
+ When conflict candidates are identified, present them to the user with recommended actions:
83
+ - Check for trigger keyword overlap between the skills
84
+ - Check if one skill's workflow interferes with the other's
85
+ - Consider evolving descriptions to reduce false triggers
86
+ - Use the `pattern-analyst` agent for deeper cross-skill analysis
87
+
88
+ ## Subagent Escalation
89
+
90
+ For deep cross-skill analysis beyond what the composability command provides,
91
+ spawn the `pattern-analyst` agent as a subagent. This is useful when conflict
92
+ scores are high (> 0.3) and you need a full resolution plan with trigger
93
+ ownership recommendations.
94
+
95
+ ## Common Patterns
96
+
97
+ **"Are there conflicts between my skills?"**
98
+ > `selftune eval composability --skill Research`
99
+
100
+ **"Check composability for recent sessions only"**
101
+ > `selftune eval composability --skill pptx --window 7`
102
+
103
+ **"Which skills conflict with Research?"**
104
+ > Run composability and check the `conflict_candidates` array.
105
+
106
+ **"Why are sessions with multiple skills failing?"**
107
+ > Run composability for each skill involved, look for high conflict scores.
@@ -0,0 +1,94 @@
1
+ # selftune Contribute Workflow
2
+
3
+ Export anonymized skill observability data as a JSON bundle for community
4
+ contribution. Helps improve selftune's skill routing without exposing
5
+ private data.
6
+
7
+ ## When to Use
8
+
9
+ - The user asks to contribute data, share usage patterns, or help improve selftune
10
+ - The user wants to export anonymized skill observability data
11
+ - The agent needs to submit eval data for community skill evolution
12
+
13
+ ## Default Command
14
+
15
+ ```bash
16
+ selftune contribute --skill selftune
17
+ ```
18
+
19
+ ## Options
20
+
21
+ | Flag | Description |
22
+ |------|-------------|
23
+ | `--skill <name>` | Skill to contribute data for (default: "selftune") |
24
+ | `--output <path>` | Output file path (default: auto-generated in ~/.selftune/contributions/) |
25
+ | `--preview` | Show what would be shared without writing |
26
+ | `--sanitize <level>` | `conservative` (default) or `aggressive` |
27
+ | `--since <date>` | Only include data from this date onward |
28
+ | `--submit` | Auto-create GitHub Issue via `gh` CLI |
29
+
30
+ ## Sanitization Levels
31
+
32
+ ### Conservative (default)
33
+
34
+ | Pattern | Replacement |
35
+ |---------|-------------|
36
+ | File paths | `[PATH]` |
37
+ | Email addresses | `[EMAIL]` |
38
+ | API keys, tokens, JWTs | `[SECRET]` |
39
+ | IP addresses | `[IP]` |
40
+ | Project name from cwd | `[PROJECT]` |
41
+ | Session IDs | `[SESSION]` |
42
+
43
+ ### Aggressive
44
+
45
+ Extends conservative with:
46
+
47
+ | Pattern | Replacement |
48
+ |---------|-------------|
49
+ | camelCase/PascalCase identifiers > 8 chars | `[IDENTIFIER]` |
50
+ | Quoted strings | `[STRING]` |
51
+ | Import/require module paths | `[MODULE]` |
52
+ | Queries > 200 chars | Truncated |
53
+
54
+ ## Bundle Contents
55
+
56
+ The contribution bundle includes:
57
+ - **Positive queries** -- queries that triggered the skill (sanitized)
58
+ - **Eval entries** -- trigger eval set for the skill
59
+ - **Grading summary** -- aggregate pass rates (no raw transcripts)
60
+ - **Evolution summary** -- proposal counts and outcomes
61
+ - **Session metrics** -- average turns, tool usage, error rates
62
+
63
+ No raw transcripts, file contents, or identifiable information is included.
64
+
65
+ ## Submission
66
+
67
+ - Default: writes JSON file to `~/.selftune/contributions/`
68
+ - `--submit`: creates a GitHub Issue with the bundle
69
+ - Small bundles (< 50KB): inlined in issue body
70
+ - Large bundles (>= 50KB): uploaded as a gist
71
+
72
+ ## Steps
73
+
74
+ 1. Run `selftune contribute --preview --skill selftune` to preview the contribution bundle
75
+ 2. Parse the output and report the sanitized data summary to the user for review
76
+ 3. Run `selftune contribute --skill selftune` to write the bundle
77
+ 4. If the user wants to submit directly, run `selftune contribute --skill selftune --submit`
78
+
79
+ ## Common Patterns
80
+
81
+ **User wants to see what would be shared**
82
+ > Run `selftune contribute --preview`. Parse the output and report the
83
+ > sanitized data summary to the user before proceeding.
84
+
85
+ **User requests stronger anonymization**
86
+ > Run `selftune contribute --sanitize aggressive`. This replaces identifiers,
87
+ > quoted strings, and module paths in addition to standard PII scrubbing.
88
+
89
+ **User wants to submit directly**
90
+ > Run `selftune contribute --submit`. This creates a GitHub Issue via `gh`
91
+ > CLI with the bundle inlined or uploaded as a gist.
92
+
93
+ **User wants to limit to recent data**
94
+ > Run `selftune contribute --since <date>` with the user's specified date.
@@ -0,0 +1,132 @@
1
+ # selftune Cron Workflow
2
+
3
+ Set up scheduled automation for the selftune pipeline. Auto-detects the
4
+ platform (system cron, macOS launchd, Linux systemd) or can target
5
+ OpenClaw-specific cron integration.
6
+
7
+ ## When to Use
8
+
9
+ - Setting up selftune automation for the first time
10
+ - Checking which cron jobs are registered
11
+ - Removing selftune cron jobs (cleanup or reconfiguration)
12
+ - Enabling the autonomous observe-grade-evolve-deploy loop
13
+
14
+ ## Commands
15
+
16
+ ### `selftune cron setup`
17
+
18
+ Auto-detect the current platform and install scheduled jobs.
19
+
20
+ | Flag | Description | Default |
21
+ |------|-------------|---------|
22
+ | `--platform <name>` | Force a specific platform (`openclaw`, `cron`, `launchd`, `systemd`) | Auto-detect |
23
+ | `--dry-run` | Preview without installing | Off |
24
+ | `--tz <timezone>` | IANA timezone for job schedules (OpenClaw only) | Flag > `TZ` env > system timezone |
25
+
26
+ Platform auto-detection: macOS → launchd, Linux → systemd, other → cron.
27
+
28
+ ### `selftune cron setup --platform openclaw`
29
+
30
+ Register selftune cron jobs with OpenClaw. Requires OpenClaw installed and on PATH.
31
+
32
+ ```bash
33
+ which openclaw # Must resolve
34
+ ```
35
+
36
+ ### `selftune cron list`
37
+
38
+ Show all registered selftune cron jobs. Reads from
39
+ `~/.openclaw/cron/jobs.json` and filters for `selftune-*` entries.
40
+ No flags.
41
+
42
+ ### `selftune cron remove`
43
+
44
+ Remove all selftune cron jobs from OpenClaw.
45
+
46
+ | Flag | Description | Default |
47
+ |------|-------------|---------|
48
+ | `--dry-run` | Preview which jobs would be removed without deleting | Off |
49
+
50
+ ## Aliases
51
+
52
+ `selftune schedule` is an alias for `selftune cron`. Existing `selftune schedule`
53
+ invocations with flags (e.g. `selftune schedule --platform launchd`) continue to work.
54
+
55
+ ## Default Job Schedule
56
+
57
+ Setup registers these jobs:
58
+
59
+ | Name | Cron Expression | Schedule | Description |
60
+ |------|----------------|----------|-------------|
61
+ | `selftune-sync` | `*/30 * * * *` | Every 30 minutes | Sync source-truth telemetry |
62
+ | `selftune-status` | `0 8 * * *` | Daily at 8am | Health check — report skills with pass rate below 80% |
63
+ | `selftune-orchestrate` | `0 */6 * * *` | Every 6 hours | Full autonomous loop: sync → candidate selection → evolve → watch |
64
+
65
+ All jobs run in **isolated session** mode — each execution gets a clean
66
+ session with no context accumulation from previous runs.
67
+
68
+ ## Output
69
+
70
+ - **setup:** Installs platform-appropriate schedule artifacts and activates them
71
+ - **setup --platform openclaw:** Registers jobs via `openclaw cron add` and confirms each
72
+ - **list:** Prints a formatted table of registered selftune cron jobs (name, schedule, description)
73
+ - **remove:** Deletes each selftune cron job via `openclaw cron remove` and confirms
74
+
75
+ ## Steps
76
+
77
+ 1. Run `selftune cron setup --dry-run` to preview what would be installed
78
+ 2. Run `selftune cron setup` to install scheduled jobs for your platform
79
+ 3. Verify with `selftune status` after the first scheduled run fires
80
+
81
+ For OpenClaw specifically:
82
+ 1. Run `selftune cron setup --platform openclaw --dry-run` to preview
83
+ 2. Run `selftune cron setup --platform openclaw` to register jobs
84
+ 3. Run `selftune cron list` to verify jobs are registered
85
+
86
+ ## The Autonomous Evolution Loop
87
+
88
+ When scheduled jobs are active, selftune operates as a self-correcting system.
89
+ The OS scheduler calls the CLI binary directly — no agent session is needed,
90
+ no token cost for routine runs.
91
+
92
+ ```text
93
+ OS scheduler fires (cron/launchd/systemd)
94
+ |
95
+ v
96
+ selftune orchestrate --max-skills 3 (CLI runs directly, no agent)
97
+ |
98
+ v
99
+ sync → candidate selection → evolve → validate → deploy → watch
100
+ |
101
+ v
102
+ Improved SKILL.md written to disk
103
+ |
104
+ v
105
+ Next interactive agent session uses updated description
106
+ ```
107
+
108
+ This is distinct from interactive mode where the user says "improve my skills"
109
+ and the agent runs orchestrate. Automated mode is for routine maintenance;
110
+ interactive mode is for user-directed improvements.
111
+
112
+ ## Safety Controls
113
+
114
+ | Control | How It Works |
115
+ |---------|-------------|
116
+ | Dry-run first | `selftune cron setup --dry-run` previews commands before installing |
117
+ | Regression threshold | Evolution only deploys if improvement exceeds 5% on existing triggers |
118
+ | Auto-rollback | `selftune watch` automatically rolls back if pass rate drops below baseline minus threshold |
119
+ | Audit trail | Every evolution recorded in `evolution_audit_log.jsonl` with full history |
120
+ | SKILL.md backup | `.bak` file created before every deploy — primary rollback path exists via .bak; fallback depends on audit metadata integrity |
121
+ | Human override | `selftune evolve rollback --skill <name> --skill-path <path>` available anytime to manually revert |
122
+ | Pin descriptions | Config flag to freeze specific skills and prevent evolution on sensitive skills |
123
+
124
+ ## Common Patterns
125
+
126
+ - **User wants autonomous skill evolution** -- Run `selftune cron setup`. Auto-detects the platform and installs appropriate scheduled jobs.
127
+ - **User specifies OpenClaw** -- Run `selftune cron setup --platform openclaw`.
128
+ - **User wants to preview before installing** -- Run `selftune cron setup --dry-run` to show exactly what would be installed without changing anything.
129
+ - **User needs a specific timezone (OpenClaw)** -- Run `selftune cron setup --platform openclaw --tz America/New_York`.
130
+ - **User asks what jobs are registered** -- Run `selftune cron list`. Shows a table of all selftune cron jobs with their schedules and descriptions.
131
+ - **User wants to remove cron automation** -- Run `selftune cron remove`. Preview first with `selftune cron remove --dry-run`.
132
+ - **Skill regressed after cron evolution** -- The watch job should catch this automatically. If not, run `selftune evolve rollback --skill <name> --skill-path <path>` manually. See `Workflows/Rollback.md`.
@@ -0,0 +1,214 @@
1
+ # selftune Dashboard Workflow
2
+
3
+ Visual dashboard for selftune telemetry, skill performance, evolution
4
+ audit, and monitoring data. Supports static HTML export, file output,
5
+ and a live server with polling-based auto-refresh and action buttons.
6
+
7
+ ## Default Command
8
+
9
+ ```bash
10
+ selftune dashboard
11
+ ```
12
+
13
+ Opens a standalone HTML dashboard in the default browser with embedded
14
+ data from all selftune log files.
15
+
16
+ ## Options
17
+
18
+ | Flag | Description | Default |
19
+ |------|-------------|---------|
20
+ | `--export` | Export data-embedded HTML to stdout | Off |
21
+ | `--out FILE` | Write data-embedded HTML to FILE | None |
22
+ | `--serve` | Start live dashboard server | Off |
23
+ | `--port <port>` | Custom port for live server (requires `--serve`) | 3141 |
24
+
25
+ ## Modes
26
+
27
+ ### Static (Default)
28
+
29
+ Builds an HTML file with all telemetry data embedded as JSON, saves it
30
+ to `~/.selftune/dashboard.html`, and opens it in the default browser.
31
+ The data is a point-in-time snapshot -- refresh by re-running the command.
32
+
33
+ ```bash
34
+ selftune dashboard
35
+ ```
36
+
37
+ ### Export
38
+
39
+ Writes the same data-embedded HTML to stdout. Useful for piping to other
40
+ tools or capturing output programmatically.
41
+
42
+ ```bash
43
+ selftune dashboard --export > dashboard.html
44
+ ```
45
+
46
+ ### File
47
+
48
+ Writes the data-embedded HTML to a specific file path.
49
+
50
+ ```bash
51
+ selftune dashboard --out /tmp/report.html
52
+ ```
53
+
54
+ ### Live Server
55
+
56
+ Starts a Bun HTTP server with a React SPA dashboard. The SPA uses
57
+ TanStack Query polling to auto-refresh data (overview every 15s,
58
+ orchestrate runs every 30s, doctor every 30s) and provides action
59
+ buttons to trigger selftune commands.
60
+
61
+ ```bash
62
+ selftune dashboard --serve
63
+ selftune dashboard --serve --port 8080
64
+ ```
65
+
66
+ ## Live Server
67
+
68
+ ### Default Port
69
+
70
+ The live server binds to `localhost:3141` by default. Use `--port` to
71
+ override.
72
+
73
+ ### Endpoints
74
+
75
+ | Method | Path | Description |
76
+ |--------|------|-------------|
77
+ | `GET` | `/` | Serve dashboard SPA shell |
78
+ | `GET` | `/api/v2/overview` | SQLite-backed overview payload |
79
+ | `GET` | `/api/v2/skills/:name` | SQLite-backed per-skill report |
80
+ | `GET` | `/api/v2/orchestrate-runs` | Recent orchestrate run reports |
81
+ | `GET` | `/api/v2/doctor` | System health diagnostics (config, logs, hooks, evolution) |
82
+ | `GET` | `/api/health` | Dashboard server health probe |
83
+ | `POST` | `/api/actions/watch` | Trigger `selftune watch` for a skill |
84
+ | `POST` | `/api/actions/evolve` | Trigger `selftune evolve` for a skill |
85
+ | `POST` | `/api/actions/rollback` | Trigger `selftune evolve rollback` for a skill |
86
+
87
+ ### Auto-Refresh
88
+
89
+ The dashboard SPA uses TanStack Query with `refetchInterval` to poll
90
+ the v2 API endpoints automatically:
91
+
92
+ - `/api/v2/overview` — every 15 seconds
93
+ - `/api/v2/orchestrate-runs` — every 30 seconds
94
+ - `/api/v2/doctor` — every 30 seconds
95
+ - `/api/v2/skills/:name` — every 30 seconds (when viewing a skill)
96
+
97
+ Data also refreshes on window focus. No SSE or websocket connection
98
+ is required.
99
+
100
+ ### Action Endpoints
101
+
102
+ Action buttons in the dashboard trigger selftune commands via POST
103
+ requests. Each endpoint spawns a `bun run` subprocess.
104
+
105
+ **Watch and Evolve** request body:
106
+
107
+ ```json
108
+ {
109
+ "skill": "skill-name",
110
+ "skillPath": "/path/to/SKILL.md"
111
+ }
112
+ ```
113
+
114
+ **Rollback** request body:
115
+
116
+ ```json
117
+ {
118
+ "skill": "skill-name",
119
+ "skillPath": "/path/to/SKILL.md",
120
+ "proposalId": "proposal-uuid"
121
+ }
122
+ ```
123
+
124
+ All action endpoints return:
125
+
126
+ ```json
127
+ {
128
+ "success": true,
129
+ "output": "command stdout",
130
+ "error": null
131
+ }
132
+ ```
133
+
134
+ On failure, `success` is `false` and `error` contains the error message.
135
+
136
+ ### Browser and Shutdown
137
+
138
+ The live server auto-opens the dashboard URL in the default browser on
139
+ macOS (`open`) and Linux (`xdg-open`).
140
+
141
+ Graceful shutdown on `SIGINT` (Ctrl+C) and `SIGTERM`: closes the SQLite
142
+ database and stops the server.
143
+
144
+ ## Data Contents
145
+
146
+ The dashboard displays data from these sources:
147
+
148
+ | Data | Source | Description |
149
+ |------|--------|-------------|
150
+ | Telemetry | `session_telemetry_log.jsonl` | Session-level telemetry records |
151
+ | Skills | `skill_usage_log.jsonl` | Skill activation and usage events |
152
+ | Queries | `all_queries_log.jsonl` | All user queries across sessions |
153
+ | Evolution | `evolution_audit_log.jsonl` | Evolution audit trail (create, deploy, rollback) |
154
+ | Decisions | `~/.selftune/memory/` | Evolution decision records |
155
+ | Snapshots | Computed | Per-skill monitoring snapshots (pass rate, regression status) |
156
+ | Unmatched | Computed | Queries that did not trigger any skill |
157
+ | Pending | Computed | Evolution proposals not yet deployed, rejected, or rolled back |
158
+
159
+ If no log data is found, the static modes exit with an error message
160
+ listing the checked file paths.
161
+
162
+ ## Steps
163
+
164
+ ### 1. Choose Mode
165
+
166
+ | Goal | Command |
167
+ |------|---------|
168
+ | Quick visual check | `selftune dashboard` |
169
+ | Save report to file | `selftune dashboard --out report.html` |
170
+ | Pipe to another tool | `selftune dashboard --export` |
171
+ | Live monitoring | `selftune dashboard --serve` |
172
+
173
+ ### 2. Run Command
174
+
175
+ ```bash
176
+ # Static (opens browser)
177
+ selftune dashboard
178
+
179
+ # Live server
180
+ selftune dashboard --serve
181
+ ```
182
+
183
+ ### 3. Interact with Dashboard
184
+
185
+ - **Static mode**: View the snapshot. Re-run to refresh.
186
+ - **Live mode**: Data refreshes automatically via polling (15-30s intervals).
187
+ Use action buttons to trigger watch, evolve, or rollback directly from
188
+ the dashboard.
189
+
190
+ ## Common Patterns
191
+
192
+ **User wants to see skill performance visually**
193
+ > Run `selftune dashboard`. This opens a browser with a point-in-time snapshot.
194
+ > Report to the user that the dashboard is open.
195
+
196
+ **User wants live monitoring**
197
+ > Run `selftune dashboard --serve`. Inform the user that data refreshes
198
+ > automatically every 15-30 seconds via polling.
199
+
200
+ **User wants a shareable report**
201
+ > Run `selftune dashboard --out report.html`. Report the file path to the
202
+ > user. The HTML file is self-contained with all data embedded.
203
+
204
+ **Dashboard shows no data**
205
+ > Run `selftune doctor` to verify hooks are installed. If hooks are missing,
206
+ > route to the Initialize workflow. If hooks are present but no sessions
207
+ > have run, inform the user that sessions must generate telemetry first.
208
+
209
+ **User wants a different port**
210
+ > Run `selftune dashboard --serve --port <port>`. Port must be 1-65535.
211
+
212
+ **User wants to trigger actions from the dashboard**
213
+ > Run `selftune dashboard --serve` for live mode. The dashboard provides
214
+ > action buttons for watch, evolve, and rollback per skill via POST endpoints.