selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -0,0 +1,109 @@
1
+ import {
2
+ CANONICAL_CAPTURE_MODES,
3
+ CANONICAL_COMPLETION_STATUSES,
4
+ CANONICAL_INVOCATION_MODES,
5
+ CANONICAL_PLATFORMS,
6
+ CANONICAL_PROMPT_KINDS,
7
+ CANONICAL_RECORD_KINDS,
8
+ CANONICAL_SCHEMA_VERSION,
9
+ CANONICAL_SOURCE_SESSION_KINDS,
10
+ type CanonicalRawSourceRef,
11
+ type CanonicalRecord,
12
+ } from "./types.js";
13
+
14
+ function isObject(value: unknown): value is Record<string, unknown> {
15
+ return typeof value === "object" && value !== null && !Array.isArray(value);
16
+ }
17
+
18
+ function hasString(value: Record<string, unknown>, key: string): boolean {
19
+ return typeof value[key] === "string" && value[key].length > 0;
20
+ }
21
+
22
+ function includesValue<T extends readonly string[]>(values: T, value: unknown): value is T[number] {
23
+ return typeof value === "string" && values.includes(value);
24
+ }
25
+
26
+ function isFiniteNumber(value: unknown): value is number {
27
+ return typeof value === "number" && Number.isFinite(value);
28
+ }
29
+
30
+ function isStringArray(value: unknown): value is string[] {
31
+ return Array.isArray(value) && value.every((item) => typeof item === "string");
32
+ }
33
+
34
+ function isNumberRecord(value: unknown): value is Record<string, number> {
35
+ return isObject(value) && Object.values(value).every(isFiniteNumber);
36
+ }
37
+
38
+ function hasSessionScope(value: Record<string, unknown>): boolean {
39
+ return (
40
+ includesValue(CANONICAL_SOURCE_SESSION_KINDS, value.source_session_kind) &&
41
+ hasString(value, "session_id")
42
+ );
43
+ }
44
+
45
+ export function isCanonicalRawSourceRef(value: unknown): value is CanonicalRawSourceRef {
46
+ return isObject(value);
47
+ }
48
+
49
+ export function isCanonicalRecord(value: unknown): value is CanonicalRecord {
50
+ if (!isObject(value)) return false;
51
+ if (value.schema_version !== CANONICAL_SCHEMA_VERSION) return false;
52
+ if (!includesValue(CANONICAL_RECORD_KINDS, value.record_kind)) return false;
53
+ if (!includesValue(CANONICAL_PLATFORMS, value.platform)) return false;
54
+ if (!includesValue(CANONICAL_CAPTURE_MODES, value.capture_mode)) return false;
55
+ if (!hasString(value, "normalizer_version")) return false;
56
+ if (!hasString(value, "normalized_at")) return false;
57
+ if (!isCanonicalRawSourceRef(value.raw_source_ref)) return false;
58
+
59
+ switch (value.record_kind) {
60
+ case "session":
61
+ return (
62
+ hasSessionScope(value) &&
63
+ (value.completion_status === undefined ||
64
+ includesValue(CANONICAL_COMPLETION_STATUSES, value.completion_status))
65
+ );
66
+ case "prompt":
67
+ return (
68
+ hasSessionScope(value) &&
69
+ hasString(value, "prompt_id") &&
70
+ hasString(value, "occurred_at") &&
71
+ hasString(value, "prompt_text") &&
72
+ includesValue(CANONICAL_PROMPT_KINDS, value.prompt_kind) &&
73
+ typeof value.is_actionable === "boolean"
74
+ );
75
+ case "skill_invocation":
76
+ return (
77
+ hasSessionScope(value) &&
78
+ hasString(value, "skill_invocation_id") &&
79
+ hasString(value, "occurred_at") &&
80
+ (value.matched_prompt_id === undefined || hasString(value, "matched_prompt_id")) &&
81
+ hasString(value, "skill_name") &&
82
+ includesValue(CANONICAL_INVOCATION_MODES, value.invocation_mode) &&
83
+ typeof value.triggered === "boolean" &&
84
+ isFiniteNumber(value.confidence)
85
+ );
86
+ case "execution_fact":
87
+ return (
88
+ hasSessionScope(value) &&
89
+ hasString(value, "occurred_at") &&
90
+ isNumberRecord(value.tool_calls_json) &&
91
+ isFiniteNumber(value.total_tool_calls) &&
92
+ isStringArray(value.bash_commands_redacted) &&
93
+ isFiniteNumber(value.assistant_turns) &&
94
+ isFiniteNumber(value.errors_encountered) &&
95
+ (value.completion_status === undefined ||
96
+ includesValue(CANONICAL_COMPLETION_STATUSES, value.completion_status))
97
+ );
98
+ case "normalization_run":
99
+ return (
100
+ hasString(value, "run_id") &&
101
+ hasString(value, "run_at") &&
102
+ isFiniteNumber(value.raw_records_seen) &&
103
+ isFiniteNumber(value.canonical_records_written) &&
104
+ typeof value.repair_applied === "boolean"
105
+ );
106
+ default:
107
+ return false;
108
+ }
109
+ }
package/skill/SKILL.md CHANGED
@@ -1,11 +1,17 @@
1
1
  ---
2
2
  name: selftune
3
3
  description: >
4
- Skill observability and continuous improvement. Use when the user wants to:
4
+ Self-improving skills toolkit. Use when the user wants to:
5
5
  grade a session, generate evals, check undertriggering, evolve a skill
6
- description, rollback an evolution, monitor post-deploy performance, check
7
- skill health status, view last session insight, open the dashboard, run
8
- health checks, or ingest sessions from Codex/OpenCode.
6
+ description or full body, evolve routing tables, rollback an evolution,
7
+ monitor post-deploy performance, check skill health status, view last
8
+ session insight, open the dashboard, serve the live dashboard, run health
9
+ checks, manage activation rules, ingest sessions from Codex/OpenCode/OpenClaw,
10
+ replay Claude Code transcripts, contribute anonymized data to the community,
11
+ set up autonomous cron jobs, manage evolution memory, configure auto-activation
12
+ suggestions, diagnose underperforming skills, analyze cross-skill patterns,
13
+ review evolution proposals, measure baseline lift, run skill unit tests,
14
+ analyze skill composability, or import SkillsBench evaluation corpora.
9
15
  ---
10
16
 
11
17
  # selftune
@@ -13,6 +19,11 @@ description: >
13
19
  Observe real agent sessions, detect missed triggers, grade execution quality,
14
20
  and evolve skill descriptions toward the language real users actually use.
15
21
 
22
+ **You are the operator.** The user installed this skill so YOU can manage their
23
+ skill health autonomously. They will say things like "set up selftune",
24
+ "improve my skills", or "how are my skills doing?" — and you route to the
25
+ correct workflow below. The user does not run CLI commands directly; you do.
26
+
16
27
  ## Bootstrap
17
28
 
18
29
  If `~/.selftune/config.json` does not exist, read `Workflows/Initialize.md`
@@ -26,57 +37,131 @@ selftune <command> [options]
26
37
  ```
27
38
 
28
39
  Most commands output deterministic JSON. Parse JSON output for machine-readable commands.
29
- `selftune dashboard` is an exception: it generates an HTML artifact and may print
30
- informational progress lines.
40
+ `selftune dashboard` is an exception: `--export` generates an HTML artifact, while
41
+ `--serve` starts a local server; both may print informational progress lines.
31
42
 
32
43
  ## Quick Reference
33
44
 
34
45
  ```bash
35
- selftune grade --skill <name> [--expectations "..."] [--agent <name>]
36
- selftune evals --skill <name> [--list-skills] [--stats] [--max N]
37
- selftune evolve --skill <name> --skill-path <path> [--dry-run]
38
- selftune rollback --skill <name> --skill-path <path> [--proposal-id <id>]
46
+ # Ingest group
47
+ selftune ingest claude [--since DATE] [--dry-run] [--force] [--verbose]
48
+ selftune ingest codex # (experimental)
49
+ selftune ingest opencode # (experimental)
50
+ selftune ingest openclaw [--agents-dir PATH] [--since DATE] [--dry-run] [--force] [--verbose] # (experimental)
51
+ selftune ingest wrap-codex -- <codex args> # (experimental)
52
+
53
+ # Grade group
54
+ selftune grade auto --skill <name> [--expectations "..."] [--agent <name>]
55
+ selftune grade baseline --skill <name> --skill-path <path> [--eval-set <path>] [--agent <name>]
56
+
57
+ # Evolve group
58
+ selftune evolve --skill <name> --skill-path <path> [--dry-run]
59
+ selftune evolve body --skill <name> --skill-path <path> --target <routing_table|full_body> [--dry-run]
60
+ selftune evolve rollback --skill <name> --skill-path <path> [--proposal-id <id>]
61
+
62
+ # Eval group
63
+ selftune eval generate --skill <name> [--list-skills] [--stats] [--max N]
64
+ selftune eval unit-test --skill <name> --tests <path> [--run-agent] [--generate]
65
+ selftune eval import --dir <path> --skill <name> --output <path> [--match-strategy exact|fuzzy]
66
+ selftune eval composability --skill <name> [--window N] [--telemetry-log <path>]
67
+
68
+ # Other commands
39
69
  selftune watch --skill <name> --skill-path <path> [--auto-rollback]
40
70
  selftune status
41
71
  selftune last
42
72
  selftune doctor
43
- selftune dashboard [--export] [--out FILE]
44
- selftune ingest-codex
45
- selftune ingest-opencode
46
- selftune wrap-codex -- <codex args>
73
+ selftune dashboard [--export] [--out FILE] [--serve]
74
+ selftune dashboard --serve [--port <port>]
75
+ selftune contribute [--skill NAME] [--preview] [--sanitize LEVEL] [--submit]
76
+ selftune cron setup [--dry-run] # auto-detect platform (cron/launchd/systemd)
77
+ selftune cron setup --platform openclaw [--dry-run] [--tz <timezone>] # OpenClaw-specific
78
+ selftune cron list
79
+ selftune cron remove [--dry-run]
47
80
  ```
48
81
 
49
82
  ## Workflow Routing
50
83
 
51
84
  | Trigger keywords | Workflow | File |
52
85
  |------------------|----------|------|
53
- | grade, score, evaluate, assess session | Grade | Workflows/Grade.md |
54
- | evals, eval set, undertriggering, skill stats | Evals | Workflows/Evals.md |
55
- | evolve, improve, triggers, catch more queries | Evolve | Workflows/Evolve.md |
56
- | rollback, undo, restore, revert evolution | Rollback | Workflows/Rollback.md |
57
- | watch, monitor, regression, post-deploy, performing | Watch | Workflows/Watch.md |
58
- | doctor, health, hooks, broken, diagnose | Doctor | Workflows/Doctor.md |
59
- | ingest, import, codex logs, opencode, wrap codex | Ingest | Workflows/Ingest.md |
60
- | init, setup, bootstrap, first time | Initialize | Workflows/Initialize.md |
61
- | status, health summary, skill health, pass rates, how are skills | Status | *(direct command — no workflow file)* |
62
- | last, last session, recent session, what happened | Last | *(direct command no workflow file)* |
63
- | dashboard, visual, open dashboard, skill grid | Dashboard | *(direct command no workflow file)* |
86
+ | grade, score, evaluate, assess session, auto-grade | Grade | Workflows/Grade.md |
87
+ | evals, eval set, undertriggering, skill stats, eval generate | Evals | Workflows/Evals.md |
88
+ | evolve, improve, optimize skills, make skills better, triggers, catch more queries | Evolve | Workflows/Evolve.md |
89
+ | evolve rollback, undo, restore, revert evolution, go back, undo last change | Rollback | Workflows/Rollback.md |
90
+ | watch, monitor, regression, post-deploy, performing, keep an eye on | Watch | Workflows/Watch.md |
91
+ | doctor, health, hooks, broken, diagnose, not working, something wrong | Doctor | Workflows/Doctor.md |
92
+ | ingest, import, codex logs, opencode, openclaw, wrap codex, ingest claude | Ingest | Workflows/Ingest.md |
93
+ | ingest claude, backfill, claude transcripts, historical sessions | Replay | Workflows/Replay.md |
94
+ | contribute, share, community, export data, anonymized, give back, help others | Contribute | Workflows/Contribute.md |
95
+ | init, setup, set up, bootstrap, first time, install, configure selftune | Initialize | Workflows/Initialize.md |
96
+ | cron, schedule, autonomous, automate evolution, run automatically, run on its own | Cron | Workflows/Cron.md |
97
+ | auto-activate, suggestions, activation rules, nag, why suggest | AutoActivation | Workflows/AutoActivation.md |
98
+ | dashboard, visual, open dashboard, show dashboard, skill grid, serve dashboard, live dashboard | Dashboard | Workflows/Dashboard.md |
99
+ | evolution memory, context memory, session continuity, what happened last | EvolutionMemory | Workflows/EvolutionMemory.md |
100
+ | evolve body, evolve routing, full body evolution, rewrite skill, teacher student | EvolveBody | Workflows/EvolveBody.md |
101
+ | grade baseline, baseline lift, adds value, skill value, no-skill comparison | Baseline | Workflows/Baseline.md |
102
+ | eval unit-test, skill test, test skill, generate tests, run tests, assertions | UnitTest | Workflows/UnitTest.md |
103
+ | eval composability, co-occurrence, skill conflicts, skills together, conflict score | Composability | Workflows/Composability.md |
104
+ | eval import, skillsbench, external evals, benchmark tasks, import corpus | ImportSkillsBench | Workflows/ImportSkillsBench.md |
105
+ | status, health summary, skill health, pass rates, how are skills, skills working, skills doing, run selftune, start selftune | Status | *(direct command — no workflow file)* |
106
+ | last, last session, recent session, what happened, what changed, what did selftune do | Last | *(direct command — no workflow file)* |
107
+
108
+ Workflows marked with † also run autonomously via `selftune orchestrate` without user interaction.
109
+
110
+ ## Interactive Configuration
111
+
112
+ Before running mutating workflows (evolve, evolve-body, evals, baseline), present
113
+ a pre-flight configuration prompt to the user. This gives them control over
114
+ execution mode, model selection, and key parameters.
115
+
116
+ ### Pre-Flight Pattern
117
+
118
+ Each mutating workflow has a **Pre-Flight Configuration** step. Follow this pattern:
119
+
120
+ 1. Present a summary of what the command will do
121
+ 2. Show numbered options with `(recommended)` markers for suggested defaults
122
+ 3. Ask the user to pick options or say "use defaults" / "go with defaults"
123
+ 4. Show a confirmation summary of selected options before executing
124
+
125
+ ### Model Tier Reference
126
+
127
+ When presenting model choices, use this table:
128
+
129
+ | Tier | Model | Speed | Cost | Quality | Best for |
130
+ |------|-------|-------|------|---------|----------|
131
+ | Fast | `haiku` | ~2s/call | $ | Good | Iteration loops, bulk validation |
132
+ | Balanced | `sonnet` | ~5s/call | $$ | Great | Single-pass proposals, gate checks |
133
+ | Best | `opus` | ~10s/call | $$$ | Excellent | High-stakes final validation |
134
+
135
+ ### Quick Path
136
+
137
+ If the user says "use defaults", "just do it", or similar — skip the pre-flight
138
+ and run with recommended defaults. The pre-flight is for users who want control,
139
+ not a mandatory gate.
140
+
141
+ ### Workflows That Skip Pre-Flight
142
+
143
+ These read-only or simple workflows run immediately without prompting:
144
+ `status`, `last`, `doctor`, `dashboard`, `watch`, `evolve rollback`,
145
+ `grade auto`, `ingest *`, `contribute`, `cron`, `eval composability`,
146
+ `eval unit-test`, `eval import`.
64
147
 
65
148
  ## The Feedback Loop
66
149
 
67
- ```
68
- Observe --> Detect --> Diagnose --> Propose --> Validate --> Deploy --> Watch
150
+ ```text
151
+ Observe --> Detect --> Diagnose --> Propose --> Validate --> Audit --> Deploy --> Watch --> Rollback
69
152
  | |
70
153
  +--------------------------------------------------------------------+
71
154
  ```
72
155
 
73
156
  1. **Observe** -- Hooks capture every session (queries, triggers, metrics)
74
- 2. **Detect** -- `evals` finds missed triggers across invocation types
75
- 3. **Diagnose** -- `grade` evaluates session quality with evidence
76
- 4. **Propose** -- `evolve` generates description improvements
157
+ 2. **Detect** -- `selftune eval generate` extracts missed-trigger patterns across invocation types
158
+ 3. **Diagnose** -- `selftune grade` evaluates session quality with evidence
159
+ 4. **Propose** -- `selftune evolve` generates description improvements
77
160
  5. **Validate** -- Evolution is tested against the eval set
78
- 6. **Deploy** -- Updated description replaces the original (with backup)
79
- 7. **Watch** -- `watch` monitors for regressions post-deploy
161
+ 6. **Audit** -- Persist proposal, evidence, and decision metadata for traceability
162
+ 7. **Deploy** -- Updated description replaces the original (with backup)
163
+ 8. **Watch** -- `selftune watch` monitors for regressions post-deploy
164
+ 9. **Rollback** -- `selftune evolve rollback` restores the previous version when regressions are detected
80
165
 
81
166
  ## Resource Index
82
167
 
@@ -94,7 +179,30 @@ Observe --> Detect --> Diagnose --> Propose --> Validate --> Deploy --> Watch
94
179
  | `Workflows/Rollback.md` | Undo an evolution, restore previous description |
95
180
  | `Workflows/Watch.md` | Post-deploy regression monitoring |
96
181
  | `Workflows/Doctor.md` | Health checks on logs, hooks, schema |
97
- | `Workflows/Ingest.md` | Import sessions from Codex and OpenCode |
182
+ | `Workflows/Ingest.md` | Import sessions from Codex, OpenCode, and OpenClaw |
183
+ | `Workflows/Replay.md` | Backfill logs from Claude Code transcripts |
184
+ | `Workflows/Contribute.md` | Export anonymized data for community contribution |
185
+ | `Workflows/Cron.md` | Scheduling & automation (cron/launchd/systemd/OpenClaw) |
186
+ | `Workflows/AutoActivation.md` | Auto-activation hook behavior and rules |
187
+ | `Workflows/Dashboard.md` | Dashboard modes: static, export, live server |
188
+ | `Workflows/EvolutionMemory.md` | Evolution memory system for session continuity |
189
+ | `Workflows/EvolveBody.md` | Full body and routing table evolution |
190
+ | `Workflows/Baseline.md` | No-skill baseline comparison and lift measurement |
191
+ | `Workflows/UnitTest.md` | Skill-level unit test runner and generator |
192
+ | `Workflows/Composability.md` | Multi-skill co-occurrence conflict analysis |
193
+ | `Workflows/ImportSkillsBench.md` | SkillsBench task corpus importer |
194
+
195
+ ## Specialized Agents
196
+
197
+ selftune provides focused agents for deeper analysis. These live in
198
+ `.claude/agents/` and can be spawned as subagents for specialized tasks.
199
+
200
+ | Trigger keywords | Agent | Purpose | When to spawn |
201
+ |------------------|-------|---------|---------------|
202
+ | diagnose, root cause, why failing, skill failure, debug performance | diagnosis-analyst | Deep-dive analysis of underperforming skills | After doctor finds persistent issues, grades are consistently low, or status shows CRITICAL/WARNING |
203
+ | patterns, conflicts, cross-skill, overlap, trigger conflicts, optimize skills | pattern-analyst | Cross-skill pattern analysis and conflict detection | When user asks about cross-skill conflicts or composability scores indicate moderate-to-severe conflicts |
204
+ | review evolution, check proposal, safe to deploy, approve evolution | evolution-reviewer | Safety gate review of pending evolution proposals | Before deploying an evolution in interactive mode, especially for high-stakes or low-confidence proposals |
205
+ | set up selftune, integrate, configure project, install selftune | integration-guide | Guided interactive setup for specific project types | For complex project structures (monorepo, multi-skill, mixed agent platforms) |
98
206
 
99
207
  ## Examples
100
208
 
@@ -110,7 +218,46 @@ Observe --> Detect --> Diagnose --> Propose --> Validate --> Deploy --> Watch
110
218
  - "How are my skills performing?"
111
219
  - "What happened in my last session?"
112
220
  - "Open the selftune dashboard"
221
+ - "Serve the dashboard at http://localhost:3141"
113
222
  - "Show skill health status"
223
+ - "Replay my Claude Code transcripts"
224
+ - "Backfill logs from historical sessions"
225
+ - "Contribute my selftune data to the community"
226
+ - "Share anonymized skill data"
227
+ - "Set up cron jobs for autonomous evolution"
228
+ - "Schedule selftune to run automatically"
229
+ - "Ingest my OpenClaw sessions"
230
+ - "Why is selftune suggesting things?"
231
+ - "Customize activation rules"
232
+ - "Start the live dashboard"
233
+ - "Serve the dashboard on port 8080"
234
+ - "What happened in the last evolution?"
235
+ - "Read the evolution memory"
236
+ - "Why is this skill underperforming?"
237
+ - "Are there conflicts between my skills?"
238
+ - "Review this evolution before deploying"
239
+ - "Set up selftune for my project"
240
+ - "Evolve the full body of the Research skill"
241
+ - "Rewrite the routing table for pptx"
242
+ - "Does this skill add value over no-skill baseline?"
243
+ - "Measure baseline lift for the Research skill"
244
+ - "Generate unit tests for the pptx skill"
245
+ - "Run skill unit tests"
246
+ - "Which skills conflict with each other?"
247
+ - "Analyze composability for the Research skill"
248
+ - "Import SkillsBench tasks for my skill"
249
+ - "Install selftune"
250
+ - "Configure selftune for this project"
251
+ - "Make my skills better"
252
+ - "Optimize my skills"
253
+ - "Are my skills working?"
254
+ - "Show me the dashboard"
255
+ - "What changed since last time?"
256
+ - "What did selftune do?"
257
+ - "Run selftune"
258
+ - "Start selftune"
259
+ - "Go back to the previous version"
260
+ - "Undo the last change"
114
261
 
115
262
  ## Negative Examples
116
263
 
@@ -0,0 +1,145 @@
1
+ # selftune Auto-Activation Workflow
2
+
3
+ Automatically suggests selftune commands during a session based on
4
+ activation rules. Runs as a `UserPromptSubmit` hook, evaluates rules
5
+ against session context, and outputs advisory suggestions to stderr.
6
+
7
+ ## How It Works
8
+
9
+ The `hooks/auto-activate.ts` script runs on every `UserPromptSubmit` event.
10
+ It reads session telemetry, query logs, and evolution audit data, then
11
+ evaluates a set of activation rules against the current context. When a
12
+ rule fires, the suggestion is written to stderr (shown to Claude as a
13
+ system message). The hook always exits 0 -- suggestions are advisory and
14
+ never block the user.
15
+
16
+ Flow:
17
+
18
+ 1. Claude Code triggers `UserPromptSubmit` hook
19
+ 2. Hook receives `{ session_id }` payload on stdin
20
+ 3. Checks PAI coexistence (see below)
21
+ 4. Loads default activation rules
22
+ 5. Evaluates each rule against session context
23
+ 6. Outputs suggestions to stderr (if any)
24
+ 7. Exits 0
25
+
26
+ ## PAI Coexistence
27
+
28
+ If PAI's `skill-activation-prompt` hook is detected in
29
+ `~/.claude/settings.json`, selftune skips all suggestions. PAI handles
30
+ skill-level activation; selftune handles observability. This prevents
31
+ duplicate or conflicting suggestions.
32
+
33
+ Detection scans all hook entries in settings for any command containing
34
+ `skill-activation-prompt`. If found, the hook exits silently.
35
+
36
+ ## Default Rules
37
+
38
+ | Rule ID | Description | Trigger Condition | Suggestion |
39
+ |---------|-------------|-------------------|------------|
40
+ | `post-session-diagnostic` | Suggest diagnostic review | >2 unmatched queries in current session | `selftune last` |
41
+ | `grading-threshold-breach` | Suggest evolution | Session pass rate < 0.6 (60%) | `selftune evolve` |
42
+ | `stale-evolution` | Suggest evolution | >7 days since last evolution AND pending false negatives exist | `selftune evolve` |
43
+ | `regression-detected` | Suggest rollback | Watch snapshot shows `regression_detected: true` | `selftune evolve rollback` |
44
+
45
+ ### Rule Details
46
+
47
+ **post-session-diagnostic**: Compares query count against skill usage count
48
+ for the current session. If the difference exceeds 2, unmatched queries
49
+ likely indicate gaps in skill coverage.
50
+
51
+ **grading-threshold-breach**: Reads grading result files from
52
+ `~/.selftune/grading/result-*.json`. If the current session's pass rate
53
+ is below 0.6, the skill description may need evolution.
54
+
55
+ **stale-evolution**: Reads the evolution audit log to find the last
56
+ evolution timestamp. If older than 7 days, checks
57
+ `~/.selftune/false-negatives/pending.json` for pending false negatives.
58
+ Both conditions must be true.
59
+
60
+ **regression-detected**: Reads the latest monitoring snapshot from
61
+ `~/.selftune/monitoring/latest-snapshot.json`. If `regression_detected`
62
+ is true, suggests rollback with the skill name if available.
63
+
64
+ ## Session State Tracking
65
+
66
+ Each rule fires at most once per session. After a suggestion is shown,
67
+ the rule ID is recorded in session state to prevent repeated nags.
68
+
69
+ Session state is stored at `~/.selftune/session-state-<session_id>.json`:
70
+
71
+ ```json
72
+ {
73
+ "session_id": "abc-123",
74
+ "suggestions_shown": ["post-session-diagnostic", "grading-threshold-breach"],
75
+ "updated_at": "2026-03-02T10:00:00Z"
76
+ }
77
+ ```
78
+
79
+ State is keyed by `session_id`. If the session ID changes (new session),
80
+ state resets automatically.
81
+
82
+ ## Customizing Rules
83
+
84
+ Rules are defined in `cli/selftune/activation-rules.ts` as the
85
+ `DEFAULT_RULES` array. To customize rule behavior, edit that TypeScript
86
+ file directly. There is no runtime JSON config — the hook imports
87
+ `DEFAULT_RULES` at evaluation time.
88
+
89
+ Each rule conforms to the `ActivationRule` interface:
90
+
91
+ ```typescript
92
+ interface ActivationRule {
93
+ id: string;
94
+ description: string;
95
+ evaluate(ctx: ActivationContext): string | null;
96
+ }
97
+ ```
98
+
99
+ The `ActivationContext` provides paths to all log files and the selftune
100
+ config directory. Return a suggestion string when the rule fires, or
101
+ `null` to skip.
102
+
103
+ ## Disabling Auto-Activation
104
+
105
+ Remove the `auto-activate.ts` hook entry from `~/.claude/settings.json`.
106
+ The hook is registered under `UserPromptSubmit`:
107
+
108
+ ```json
109
+ {
110
+ "hooks": {
111
+ "UserPromptSubmit": [
112
+ {
113
+ "command": "bun run /path/to/cli/selftune/hooks/auto-activate.ts"
114
+ }
115
+ ]
116
+ }
117
+ }
118
+ ```
119
+
120
+ Delete or comment out the entry to disable all auto-activation suggestions.
121
+
122
+ ## Common Patterns
123
+
124
+ **User wants to disable auto-suggestions**
125
+ > Remove the auto-activate hook entry from `~/.claude/settings.json`
126
+ > (see Disabling section above). Each rule fires at most once per session.
127
+
128
+ **User asks why selftune suggestions appear**
129
+ > Explain that the auto-activate hook detected an actionable condition.
130
+ > Parse the suggestion text to identify which rule fired and report the
131
+ > recommended action.
132
+
133
+ **Suggestions are not appearing when expected**
134
+ > Run `selftune doctor` to verify the hook is installed. Check that
135
+ > `UserPromptSubmit` includes the auto-activate hook in settings.
136
+
137
+ **PAI coexistence conflict**
138
+ > Verify PAI's `skill-activation-prompt` hook is in `~/.claude/settings.json`.
139
+ > If present, selftune skips all suggestions automatically. If the user
140
+ > sees duplicates, one of the two hooks is misconfigured.
141
+
142
+ **User wants custom activation rules**
143
+ > Direct the user to `cli/selftune/activation-rules.ts`. New rules must
144
+ > conform to the `ActivationRule` interface: pure filesystem readers with
145
+ > no network calls or heavy imports.
@@ -0,0 +1,124 @@
1
+ # Badge Command
2
+
3
+ ## When to Use
4
+
5
+ When the user asks for a skill health badge for their README.
6
+
7
+ ## Overview
8
+
9
+ Generate skill health badges for embedding in READMEs and documentation.
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ selftune badge --skill <name> [--format svg|markdown|url] [--output <path>]
15
+ ```
16
+
17
+ ## Options
18
+
19
+ | Option | Required | Default | Description |
20
+ |--------|----------|---------|-------------|
21
+ | `--skill` | Yes | -- | Skill name to generate badge for |
22
+ | `--format` | No | `svg` | Output format: `svg`, `markdown`, or `url` |
23
+ | `--output` | No | stdout | Write output to file |
24
+ | `--help` | No | -- | Show usage information |
25
+
26
+ ## Examples
27
+
28
+ ### Generate SVG badge
29
+ ```bash
30
+ selftune badge --skill my-skill --format svg > badge.svg
31
+ ```
32
+
33
+ ### Get markdown for README
34
+ ```bash
35
+ selftune badge --skill my-skill --format markdown
36
+ ```
37
+ Output: `![Skill Health: my-skill](https://img.shields.io/badge/Skill%20Health-87%25%20%E2%86%91-4c1)`
38
+
39
+ ### Get shields.io URL
40
+ ```bash
41
+ selftune badge --skill my-skill --format url
42
+ ```
43
+
44
+ ### Write badge to file
45
+ ```bash
46
+ selftune badge --skill my-skill --output badge.svg
47
+ ```
48
+
49
+ ## Badge Branding
50
+
51
+ SVG badges (both `--format svg` and dashboard routes) include the selftune logo as an inline 14px icon in the label section. The logo is embedded as a base64 data URI — no external requests needed.
52
+
53
+ ```
54
+ [ 🔵 Skill Health (gray) ] [ 85% ↑ (green) ]
55
+ ^14px logo + 3px gap
56
+ ```
57
+
58
+ Markdown and URL formats use shields.io, which renders its own badge — the logo only appears in locally-generated SVGs.
59
+
60
+ ## Badge Colors
61
+
62
+ | Pass Rate | Color | Hex |
63
+ |-----------|-------|-----|
64
+ | > 80% | Green | `#4c1` |
65
+ | 60-80% | Yellow | `#dfb317` |
66
+ | < 60% | Red | `#e05d44` |
67
+ | No data | Gray | `#9f9f9f` |
68
+
69
+ ## Embedding in README
70
+
71
+ Add to your skill's README.md:
72
+ ```markdown
73
+ ![Skill Health: my-skill](https://img.shields.io/badge/Skill%20Health-87%25%20%E2%86%91-4c1)
74
+ ```
75
+
76
+ Or use the generated SVG directly for offline rendering.
77
+
78
+ ## Dashboard Routes (Phase 2)
79
+
80
+ The local dashboard server exposes badge and report routes:
81
+
82
+ ### GET /badge/:skillName
83
+
84
+ Returns a live SVG badge computed from local telemetry logs.
85
+
86
+ ```
87
+ http://localhost:<port>/badge/my-skill
88
+ ```
89
+
90
+ - Returns `image/svg+xml` with `Cache-Control: no-cache, no-store`
91
+ - Returns a gray "not found" badge (not JSON 404) for unknown skills
92
+
93
+ ### GET /report/:skillName
94
+
95
+ Returns an HTML report page with pass rate, trend, session count, and embed code.
96
+
97
+ ```
98
+ http://localhost:<port>/report/my-skill
99
+ ```
100
+
101
+ ## Hosted Service (Phase 3)
102
+
103
+ The hosted badge service at `badge.selftune.dev` aggregates community contributions and serves badges publicly.
104
+
105
+ ### Endpoints
106
+
107
+ | Route | Method | Description |
108
+ |-------|--------|-------------|
109
+ | `/badge/:skill` | GET | SVG badge from aggregated community data |
110
+ | `/badge/:org/:skill` | GET | Organization-scoped SVG badge |
111
+
112
+ ### Embedding from hosted service
113
+
114
+ ```markdown
115
+ ![Skill Health: my-skill](https://badge.selftune.dev/badge/my-skill)
116
+ ```
117
+
118
+ ### Contributing data
119
+
120
+ ```bash
121
+ selftune contribute --submit --skill my-skill
122
+ ```
123
+
124
+ Uses `--endpoint` to target a custom service URL, with `--github` as fallback.