selftune 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/.claude/agents/diagnosis-analyst.md +146 -0
  2. package/.claude/agents/evolution-reviewer.md +167 -0
  3. package/.claude/agents/integration-guide.md +200 -0
  4. package/.claude/agents/pattern-analyst.md +147 -0
  5. package/CHANGELOG.md +37 -0
  6. package/README.md +96 -256
  7. package/assets/BeforeAfter.gif +0 -0
  8. package/assets/FeedbackLoop.gif +0 -0
  9. package/assets/logo.svg +9 -0
  10. package/assets/skill-health-badge.svg +20 -0
  11. package/cli/selftune/activation-rules.ts +171 -0
  12. package/cli/selftune/badge/badge-data.ts +108 -0
  13. package/cli/selftune/badge/badge-svg.ts +212 -0
  14. package/cli/selftune/badge/badge.ts +103 -0
  15. package/cli/selftune/constants.ts +75 -1
  16. package/cli/selftune/contribute/bundle.ts +314 -0
  17. package/cli/selftune/contribute/contribute.ts +214 -0
  18. package/cli/selftune/contribute/sanitize.ts +162 -0
  19. package/cli/selftune/cron/setup.ts +266 -0
  20. package/cli/selftune/dashboard-server.ts +582 -0
  21. package/cli/selftune/dashboard.ts +25 -3
  22. package/cli/selftune/eval/baseline.ts +247 -0
  23. package/cli/selftune/eval/composability.ts +117 -0
  24. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  25. package/cli/selftune/eval/hooks-to-evals.ts +68 -2
  26. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  27. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  28. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  29. package/cli/selftune/eval/unit-test.ts +196 -0
  30. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  31. package/cli/selftune/evolution/evolve-body.ts +492 -0
  32. package/cli/selftune/evolution/evolve.ts +466 -103
  33. package/cli/selftune/evolution/extract-patterns.ts +32 -1
  34. package/cli/selftune/evolution/pareto.ts +314 -0
  35. package/cli/selftune/evolution/propose-body.ts +171 -0
  36. package/cli/selftune/evolution/propose-description.ts +100 -2
  37. package/cli/selftune/evolution/propose-routing.ts +166 -0
  38. package/cli/selftune/evolution/refine-body.ts +141 -0
  39. package/cli/selftune/evolution/rollback.ts +19 -2
  40. package/cli/selftune/evolution/validate-body.ts +254 -0
  41. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  42. package/cli/selftune/evolution/validate-routing.ts +177 -0
  43. package/cli/selftune/grading/grade-session.ts +138 -18
  44. package/cli/selftune/grading/pre-gates.ts +104 -0
  45. package/cli/selftune/hooks/auto-activate.ts +185 -0
  46. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  47. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  48. package/cli/selftune/index.ts +88 -0
  49. package/cli/selftune/ingestors/claude-replay.ts +351 -0
  50. package/cli/selftune/ingestors/openclaw-ingest.ts +440 -0
  51. package/cli/selftune/init.ts +150 -3
  52. package/cli/selftune/memory/writer.ts +447 -0
  53. package/cli/selftune/monitoring/watch.ts +25 -2
  54. package/cli/selftune/status.ts +17 -13
  55. package/cli/selftune/types.ts +377 -5
  56. package/cli/selftune/utils/frontmatter.ts +217 -0
  57. package/cli/selftune/utils/llm-call.ts +29 -3
  58. package/cli/selftune/utils/transcript.ts +35 -0
  59. package/cli/selftune/utils/trigger-check.ts +89 -0
  60. package/cli/selftune/utils/tui.ts +156 -0
  61. package/dashboard/index.html +569 -8
  62. package/package.json +8 -4
  63. package/skill/SKILL.md +124 -8
  64. package/skill/Workflows/AutoActivation.md +144 -0
  65. package/skill/Workflows/Badge.md +118 -0
  66. package/skill/Workflows/Baseline.md +121 -0
  67. package/skill/Workflows/Composability.md +100 -0
  68. package/skill/Workflows/Contribute.md +91 -0
  69. package/skill/Workflows/Cron.md +155 -0
  70. package/skill/Workflows/Dashboard.md +203 -0
  71. package/skill/Workflows/Doctor.md +37 -1
  72. package/skill/Workflows/Evals.md +69 -1
  73. package/skill/Workflows/EvolutionMemory.md +152 -0
  74. package/skill/Workflows/Evolve.md +111 -6
  75. package/skill/Workflows/EvolveBody.md +159 -0
  76. package/skill/Workflows/ImportSkillsBench.md +111 -0
  77. package/skill/Workflows/Ingest.md +117 -3
  78. package/skill/Workflows/Initialize.md +57 -3
  79. package/skill/Workflows/Replay.md +70 -0
  80. package/skill/Workflows/Rollback.md +20 -1
  81. package/skill/Workflows/UnitTest.md +138 -0
  82. package/skill/Workflows/Watch.md +22 -0
  83. package/skill/settings_snippet.json +23 -0
  84. package/templates/activation-rules-default.json +27 -0
  85. package/templates/multi-skill-settings.json +64 -0
  86. package/templates/single-skill-settings.json +58 -0
@@ -1,19 +1,69 @@
1
1
  # selftune Ingest Workflow
2
2
 
3
- Import sessions from non-Claude-Code agent platforms into the shared
4
- selftune log format. Covers three sub-commands: `ingest-codex`,
5
- `ingest-opencode`, and `wrap-codex`.
3
+ Import sessions from agent platforms into the shared selftune log format.
4
+ Covers five sub-commands: `replay`, `ingest-codex`, `ingest-opencode`,
5
+ `ingest-openclaw`, and `wrap-codex`.
6
6
 
7
7
  ## When to Use Each
8
8
 
9
9
  | Sub-command | Platform | Mode | When |
10
10
  |-------------|----------|------|------|
11
+ | `replay` | Claude Code | Batch | Backfill logs from existing Claude Code transcripts |
11
12
  | `ingest-codex` | Codex | Batch | Import existing Codex rollout logs |
12
13
  | `ingest-opencode` | OpenCode | Batch | Import existing OpenCode sessions |
14
+ | `ingest-openclaw` | OpenClaw | Batch | Import existing OpenClaw agent sessions |
13
15
  | `wrap-codex` | Codex | Real-time | Wrap `codex exec` to capture telemetry live |
14
16
 
15
17
  ---
16
18
 
19
+ ## replay
20
+
21
+ Batch ingest existing Claude Code session transcripts into the shared JSONL schema.
22
+
23
+ ### Default Command
24
+
25
+ ```bash
26
+ selftune replay
27
+ ```
28
+
29
+ ### Options
30
+
31
+ | Flag | Description |
32
+ |------|-------------|
33
+ | `--since <date>` | Only ingest sessions modified after this date (e.g., `2026-01-01`) |
34
+ | `--dry-run` | Show what would be ingested without writing to logs |
35
+ | `--force` | Re-ingest all sessions, ignoring the marker file |
36
+ | `--verbose` | Show per-file progress during ingestion |
37
+ | `--projects-dir <path>` | Override default `~/.claude/projects/` directory |
38
+
39
+ ### Source
40
+
41
+ Reads from `~/.claude/projects/<hash>/<session-id>.jsonl`. These are the
42
+ transcript files Claude Code automatically saves for every session.
43
+
44
+ ### Output
45
+
46
+ Writes to:
47
+ - `~/.claude/all_queries_log.jsonl` -- extracted user queries (one per query, not just last)
48
+ - `~/.claude/session_telemetry_log.jsonl` -- per-session metrics with `source: "claude_code_replay"`
49
+ - `~/.claude/skill_usage_log.jsonl` -- skill triggers with `source: "claude_code_replay"`
50
+
51
+ ### Steps
52
+
53
+ 1. Run `selftune replay --dry-run` to preview what would be ingested
54
+ 2. Run `selftune replay` to ingest all sessions
55
+ 3. Run `selftune doctor` to confirm logs are healthy
56
+ 4. Run `selftune evals --list-skills` to see if the ingested sessions appear
57
+
58
+ ### Notes
59
+
60
+ - Idempotent: uses a marker file (`~/.claude/claude_code_ingested_sessions.json`) to track
61
+ which transcripts have already been ingested. Safe to run repeatedly.
62
+ - Extracts ALL user queries per session, not just the last one.
63
+ - Filters out system messages, short queries (<4 chars), and queries matching `SKIP_PREFIXES`.
64
+
65
+ ---
66
+
17
67
  ## ingest-codex
18
68
 
19
69
  Batch ingest Codex rollout logs into the shared JSONL schema.
@@ -84,6 +134,58 @@ Writes to:
84
134
 
85
135
  ---
86
136
 
137
+ ## ingest-openclaw
138
+
139
+ Batch ingest OpenClaw agent session histories into the shared JSONL schema.
140
+ Supports multiple agents and auto-discovers session files across all agent directories.
141
+
142
+ ### Default Command
143
+
144
+ ```bash
145
+ selftune ingest-openclaw
146
+ ```
147
+
148
+ ### Options
149
+
150
+ | Flag | Description |
151
+ |------|-------------|
152
+ | `--agents-dir <path>` | Override default `~/.openclaw/agents/` directory |
153
+ | `--since <date>` | Only ingest sessions modified after this date (e.g., `2026-01-01`) |
154
+ | `--dry-run` | Show what would be ingested without writing to logs |
155
+ | `--force` | Re-ingest all sessions, ignoring the marker file |
156
+ | `--verbose` / `-v` | Show per-session progress during ingestion |
157
+
158
+ ### Source
159
+
160
+ Reads from `~/.openclaw/agents/<agentId>/sessions/*.jsonl`. Each JSONL file contains:
161
+ - Line 1 (session header): `{"type":"session","version":5,"id":"<uuid>","timestamp":"<iso>","cwd":"<path>"}`
162
+ - Line 2+ (messages): `{"role":"user|assistant|toolResult","content":[...],"timestamp":<ms>}`
163
+
164
+ ### Output
165
+
166
+ Writes to:
167
+ - `~/.claude/all_queries_log.jsonl` -- extracted user queries
168
+ - `~/.claude/session_telemetry_log.jsonl` -- per-session metrics with `source: "openclaw"`
169
+ - `~/.claude/skill_usage_log.jsonl` -- skill triggers with `source: "openclaw"`
170
+
171
+ ### Steps
172
+
173
+ 1. Run `selftune ingest-openclaw --dry-run` to preview what would be ingested
174
+ 2. Run `selftune ingest-openclaw` to ingest all sessions
175
+ 3. Run `selftune doctor` to confirm logs are healthy
176
+ 4. Run `selftune evals --list-skills` to see if the ingested sessions appear
177
+
178
+ ### Notes
179
+
180
+ - Idempotent: uses a marker file to track which sessions have already been ingested.
181
+ Safe to run repeatedly. Use `--force` to re-ingest everything.
182
+ - Skill detection heuristic: identifies skills by checking for `SKILL.md` file reads in
183
+ tool calls and by matching known skill names in assistant text content.
184
+ - Multi-agent support: scans all agent directories under the agents root, ingesting
185
+ sessions from every agent found.
186
+
187
+ ---
188
+
87
189
  ## wrap-codex
88
190
 
89
191
  Wrap `codex exec` with real-time telemetry capture. Drop-in replacement
@@ -123,12 +225,24 @@ stream for telemetry; it does not modify Codex behavior.
123
225
 
124
226
  ## Common Patterns
125
227
 
228
+ **"Backfill Claude Code sessions"**
229
+ > Run `selftune replay`. No options needed. Reads from `~/.claude/projects/`.
230
+
231
+ **"Replay only recent Claude Code sessions"**
232
+ > Run `selftune replay --since 2026-02-01` with an appropriate date.
233
+
126
234
  **"Ingest codex logs"**
127
235
  > Run `selftune ingest-codex`. No options needed. Reads from `$CODEX_HOME/sessions/`.
128
236
 
129
237
  **"Import opencode sessions"**
130
238
  > Run `selftune ingest-opencode`. Reads from the SQLite database automatically.
131
239
 
240
+ **"Ingest OpenClaw sessions"**
241
+ > Run `selftune ingest-openclaw`. Reads from `~/.openclaw/agents/` automatically.
242
+
243
+ **"Import only recent OpenClaw sessions"**
244
+ > Run `selftune ingest-openclaw --since 2026-02-01` with an appropriate date.
245
+
132
246
  **"Run codex through selftune"**
133
247
  > Use `selftune wrap-codex -- <codex args>` instead of `codex exec <args>` directly.
134
248
 
@@ -69,7 +69,7 @@ cat ~/.selftune/config.json 2>/dev/null
69
69
  ```
70
70
 
71
71
  If the file exists and is valid JSON, selftune is already initialized.
72
- Skip to Step 5 (verify with doctor) unless the user wants to reinitialize.
72
+ Skip to Step 8 (verify with doctor) unless the user wants to reinitialize.
73
73
 
74
74
  ### 3. Run Init
75
75
 
@@ -80,12 +80,15 @@ selftune init
80
80
  ### 4. Install Hooks (Claude Code)
81
81
 
82
82
  If `init` reports hooks are not installed, merge the entries from
83
- `skill/settings_snippet.json` into `~/.claude/settings.json`. Three hooks
83
+ `skill/settings_snippet.json` into `~/.claude/settings.json`. Six hooks
84
84
  are required:
85
85
 
86
86
  | Hook | Script | Purpose |
87
87
  |------|--------|---------|
88
88
  | `UserPromptSubmit` | `hooks/prompt-log.ts` | Log every user query |
89
+ | `UserPromptSubmit` | `hooks/auto-activate.ts` | Suggest skills before prompt processing |
90
+ | `PreToolUse` (Write/Edit) | `hooks/skill-change-guard.ts` | Detect uncontrolled skill edits |
91
+ | `PreToolUse` (Write/Edit) | `hooks/evolution-guard.ts` | Block SKILL.md edits on monitored skills |
89
92
  | `PostToolUse` (Read) | `hooks/skill-eval.ts` | Track skill triggers |
90
93
  | `Stop` | `hooks/session-stop.ts` | Capture session telemetry |
91
94
 
@@ -100,7 +103,48 @@ The hooks directory is at `dirname(cli_path)/hooks/`.
100
103
  - Use `selftune ingest-opencode` to import sessions from the SQLite database
101
104
  - See `Workflows/Ingest.md` for details
102
105
 
103
- ### 5. Verify with Doctor
106
+ ### 5. Initialize Memory Directory
107
+
108
+ Create the memory directory if it does not exist:
109
+
110
+ ```bash
111
+ mkdir -p ~/.selftune/memory
112
+ ```
113
+
114
+ The memory system stores three files at `~/.selftune/memory/`:
115
+ - `context.md` -- active evolution state and session context
116
+ - `decisions.md` -- evolution decisions and rollback history
117
+ - `plan.md` -- current priorities and evolution strategy
118
+
119
+ These files are created automatically by the memory writer during evolve,
120
+ watch, and rollback workflows. The directory just needs to exist.
121
+
122
+ ### 6. Set Up Activation Rules
123
+
124
+ Copy the default activation rules template:
125
+
126
+ ```bash
127
+ cp templates/activation-rules-default.json ~/.selftune/activation-rules.json
128
+ ```
129
+
130
+ The activation rules file configures auto-activation behavior -- which skills
131
+ get suggested and under what conditions. Edit `~/.selftune/activation-rules.json`
132
+ to customize thresholds and skill mappings for your project.
133
+
134
+ ### 7. Verify Agent Availability
135
+
136
+ Check that the specialized agent files are present:
137
+
138
+ ```bash
139
+ ls .claude/agents/
140
+ ```
141
+
142
+ Expected agents: `diagnosis-analyst.md`, `pattern-analyst.md`,
143
+ `evolution-reviewer.md`, `integration-guide.md`. These are used by evolve
144
+ and doctor workflows for deeper analysis. If missing, copy them from the
145
+ selftune repository's `.claude/agents/` directory.
146
+
147
+ ### 8. Verify with Doctor
104
148
 
105
149
  ```bash
106
150
  selftune doctor
@@ -109,6 +153,16 @@ selftune doctor
109
153
  Parse the JSON output. All checks should pass. If any fail, address the
110
154
  reported issues before proceeding.
111
155
 
156
+ ## Integration Guide
157
+
158
+ For project-type-specific setup (single-skill, multi-skill, monorepo, Codex,
159
+ OpenCode, mixed agents), see [docs/integration-guide.md](../../docs/integration-guide.md).
160
+
161
+ Templates for each project type are in the `templates/` directory:
162
+ - `templates/single-skill-settings.json` — hooks for single-skill projects
163
+ - `templates/multi-skill-settings.json` — hooks for multi-skill projects with activation rules
164
+ - `templates/activation-rules-default.json` — default auto-activation rule configuration
165
+
112
166
  ## Common Patterns
113
167
 
114
168
  **"Initialize selftune"**
@@ -0,0 +1,70 @@
1
+ # selftune Replay Workflow
2
+
3
+ Backfill the shared JSONL logs from existing Claude Code conversation
4
+ transcripts. Useful for bootstrapping selftune with historical session data.
5
+
6
+ ## When to Use
7
+
8
+ - New selftune installation with months of Claude Code history
9
+ - After re-initializing logs and wanting to recover data
10
+ - To populate eval data without waiting for new sessions
11
+
12
+ ## Key Difference from Hooks
13
+
14
+ Real-time hooks capture only the **last** user query per session. Replay
15
+ extracts **all** user queries, writing one `QueryLogRecord` per message.
16
+ This produces much richer eval data from historical sessions.
17
+
18
+ ## Default Command
19
+
20
+ ```bash
21
+ selftune replay
22
+ ```
23
+
24
+ ## Options
25
+
26
+ | Flag | Description |
27
+ |------|-------------|
28
+ | `--since <date>` | Only include transcripts modified after this date |
29
+ | `--dry-run` | Preview what would be ingested without writing |
30
+ | `--force` | Re-ingest all transcripts (ignore marker file) |
31
+ | `--verbose` | Show detailed progress per file |
32
+ | `--projects-dir <path>` | Override default `~/.claude/projects/` path |
33
+
34
+ ## Source
35
+
36
+ Reads Claude Code transcripts from `~/.claude/projects/<hash>/<session>.jsonl`.
37
+ Each transcript is a JSONL file containing user and assistant messages.
38
+
39
+ ## Output
40
+
41
+ Writes to:
42
+ - `~/.claude/all_queries_log.jsonl` -- one record per user query (all messages, not just last)
43
+ - `~/.claude/session_telemetry_log.jsonl` -- per-session metrics with `source: "claude_code_replay"`
44
+ - `~/.claude/skill_usage_log.jsonl` -- skill triggers detected in transcripts
45
+
46
+ ## Idempotency
47
+
48
+ Uses a marker file at `~/.claude/claude_code_ingested_sessions.json` to track
49
+ which transcripts have already been ingested. Use `--force` to re-ingest all.
50
+
51
+ ## Steps
52
+
53
+ 1. Run `selftune replay --dry-run` to preview what would be ingested
54
+ 2. Run `selftune replay` to perform the ingestion
55
+ 3. Run `selftune doctor` to verify logs are healthy
56
+ 4. Run `selftune evals --list-skills` to see if replayed sessions appear
57
+
58
+ ## Common Patterns
59
+
60
+ **"Backfill my logs"**
61
+ > Run `selftune replay`. No options needed.
62
+
63
+ **"Only replay recent sessions"**
64
+ > Run `selftune replay --since 2026-02-01`
65
+
66
+ **"Re-ingest everything"**
67
+ > Run `selftune replay --force`
68
+
69
+ **"How do I know it worked?"**
70
+ > Run `selftune doctor` after replay. Check log file line counts increased.
@@ -75,6 +75,16 @@ Manual restoration from version control is required.
75
75
 
76
76
  ## Steps
77
77
 
78
+ ### 0. Read Evolution Context
79
+
80
+ Before starting, read `~/.selftune/memory/context.md` for session context:
81
+ - Active evolutions and their current status
82
+ - Previous rollback history
83
+ - Last update timestamp
84
+
85
+ This provides continuity across context resets. If the file doesn't exist,
86
+ proceed normally — it will be created after the first rollback.
87
+
78
88
  ### 1. Find the Last Evolution
79
89
 
80
90
  Read `~/.claude/evolution_audit_log.jsonl` and find the most recent
@@ -101,7 +111,16 @@ After rollback, verify the SKILL.md content is restored:
101
111
  - Check the audit log for the `rolled_back` entry
102
112
  - Optionally re-run evals to confirm the original pass rate
103
113
 
104
- ### 4. Post-Rollback Audit
114
+ ### 4. Update Memory
115
+
116
+ After rollback completes, the memory writer updates:
117
+ - `~/.selftune/memory/decisions.md` -- records the rollback decision and reason
118
+ - `~/.selftune/memory/context.md` -- clears the active evolution state and notes the rollback
119
+
120
+ This ensures future evolve and watch workflows have context about why the
121
+ rollback occurred, even across context window resets.
122
+
123
+ ### 5. Post-Rollback Audit
105
124
 
106
125
  The rollback is logged. Future `evolve` runs will see the rollback in the
107
126
  audit trail and can use it to avoid repeating failed evolution patterns.
@@ -0,0 +1,138 @@
1
+ # selftune Unit Test Workflow
2
+
3
+ Run or generate unit tests for individual skills. Tests verify trigger
4
+ accuracy, output content, and tool usage with deterministic assertions.
5
+
6
+ ## Default Command
7
+
8
+ ```bash
9
+ selftune unit-test --skill <name> --tests <path> [options]
10
+ ```
11
+
12
+ ## Options
13
+
14
+ | Flag | Description | Default |
15
+ |------|-------------|---------|
16
+ | `--skill <name>` | Skill name | Required |
17
+ | `--tests <path>` | Path to unit test JSON file | `~/.selftune/unit-tests/<skill>.json` |
18
+ | `--run-agent` | Run agent-based assertions (not just trigger checks) | Off |
19
+ | `--generate` | Generate tests from skill content instead of running | Off |
20
+ | `--skill-path <path>` | Path to SKILL.md (required for `--generate`) | None |
21
+ | `--eval-set <path>` | Eval set for failure context (used with `--generate`) | None |
22
+ | `--model <flag>` | Model flag for LLM calls | Agent default |
23
+
24
+ ## Test Format
25
+
26
+ Tests are stored as JSON arrays in `~/.selftune/unit-tests/<skill>.json`:
27
+
28
+ ```json
29
+ [
30
+ {
31
+ "test_id": "research-trigger-1",
32
+ "skill_name": "Research",
33
+ "description": "Should trigger on explicit research request",
34
+ "query": "Research the latest trends in AI safety",
35
+ "expected_trigger": true,
36
+ "assertions": [
37
+ {
38
+ "type": "trigger_check",
39
+ "value": "true",
40
+ "description": "Skill should trigger for this query"
41
+ }
42
+ ],
43
+ "tags": ["explicit", "core"],
44
+ "source": "manual"
45
+ }
46
+ ]
47
+ ```
48
+
49
+ ## Assertion Types
50
+
51
+ | Type | What it checks | Requires agent? |
52
+ |------|---------------|-----------------|
53
+ | `trigger_check` | Query triggers the skill description | No (LLM only) |
54
+ | `output_contains` | Agent output contains expected text | Yes |
55
+ | `output_matches_regex` | Agent output matches regex pattern | Yes |
56
+ | `tool_called` | Agent used a specific tool | Yes |
57
+
58
+ Trigger check assertions are cheap (single LLM call). Agent-based assertions
59
+ require `--run-agent` and run the query through the full agent.
60
+
61
+ ## Output Format
62
+
63
+ ```json
64
+ {
65
+ "skill_name": "Research",
66
+ "total": 10,
67
+ "passed": 8,
68
+ "failed": 2,
69
+ "pass_rate": 0.80,
70
+ "results": [
71
+ {
72
+ "test_id": "research-trigger-1",
73
+ "overall_passed": true,
74
+ "trigger_passed": true,
75
+ "assertion_results": [
76
+ { "type": "trigger_check", "value": "true", "passed": true, "evidence": "LLM responded YES" }
77
+ ],
78
+ "duration_ms": 450
79
+ }
80
+ ],
81
+ "ran_at": "2026-03-04T12:00:00.000Z"
82
+ }
83
+ ```
84
+
85
+ ## Steps
86
+
87
+ ### 1. Generate Tests (First Time)
88
+
89
+ For a new skill, generate initial tests from the skill content:
90
+
91
+ ```bash
92
+ selftune unit-test --skill Research --generate --skill-path ~/.claude/skills/Research/SKILL.md
93
+ ```
94
+
95
+ This uses an LLM to create test cases covering:
96
+ - Explicit trigger queries
97
+ - Implicit trigger queries
98
+ - Contextual trigger queries
99
+ - Negative examples (should NOT trigger)
100
+
101
+ Tests are saved to `~/.selftune/unit-tests/Research.json`.
102
+
103
+ ### 2. Run Tests
104
+
105
+ ```bash
106
+ selftune unit-test --skill Research --tests ~/.selftune/unit-tests/Research.json
107
+ ```
108
+
109
+ By default, only `trigger_check` assertions run (fast, no agent needed).
110
+ Add `--run-agent` for full agent-based assertions.
111
+
112
+ ### 3. Review Results
113
+
114
+ Check `pass_rate` and investigate failures:
115
+ - Failed trigger checks → description needs improvement
116
+ - Failed output assertions → skill workflow needs fixes
117
+ - Failed tool assertions → skill routing is broken
118
+
119
+ ### 4. Iterate
120
+
121
+ After evolving a skill, re-run unit tests to verify improvements:
122
+ 1. Evolve: `selftune evolve --skill Research --skill-path /path/SKILL.md`
123
+ 2. Test: `selftune unit-test --skill Research`
124
+ 3. Check pass rate improved
125
+
126
+ ## Common Patterns
127
+
128
+ **"Generate tests for the pptx skill"**
129
+ > `selftune unit-test --skill pptx --generate --skill-path /path/SKILL.md`
130
+
131
+ **"Run existing tests"**
132
+ > `selftune unit-test --skill pptx --tests ~/.selftune/unit-tests/pptx.json`
133
+
134
+ **"Run full agent tests"**
135
+ > `selftune unit-test --skill pptx --tests /path/tests.json --run-agent`
136
+
137
+ **"Test after evolution"**
138
+ > Run `selftune unit-test` after each `selftune evolve` to verify improvements.
@@ -65,6 +65,21 @@ selftune watch --skill <name> --skill-path <path> [options]
65
65
 
66
66
  ## Steps
67
67
 
68
+ ### 0. Read Evolution Context
69
+
70
+ Before starting, read `~/.selftune/memory/context.md` for session context:
71
+ - Active evolutions and their current status
72
+ - Known issues and regression history
73
+ - Last update timestamp
74
+
75
+ This provides continuity across context resets. If the file doesn't exist,
76
+ proceed normally -- it will be created after the first watch.
77
+
78
+ The evolution-guard hook prevents conflicting SKILL.md edits while watch is
79
+ evaluating the skill. The auto-activation system uses watch results to
80
+ adjust suggestion confidence -- skills showing regressions get flagged for
81
+ attention in subsequent prompts.
82
+
68
83
  ### 1. Run Watch
69
84
 
70
85
  ```bash
@@ -100,6 +115,13 @@ Summarize the snapshot for the user:
100
115
  - Whether regression was detected
101
116
  - Recommended action
102
117
 
118
+ ### 5. Update Memory
119
+
120
+ After watch completes, the memory writer updates
121
+ `~/.selftune/memory/context.md` with the current regression status,
122
+ pass rates, and recommended next action. This ensures continuity if the
123
+ context window resets before the user acts on the results.
124
+
103
125
  ## Common Patterns
104
126
 
105
127
  **"Is the skill performing well after the change?"**
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "_readme": "Merge the 'hooks' block below into your ~/.claude/settings.json",
3
3
  "_readme2": "Replace /PATH/TO/ with the actual directory where you saved the scripts",
4
+ "_readme3": "This is the comprehensive template. Simpler templates are in templates/",
4
5
 
5
6
  "hooks": {
6
7
  "UserPromptSubmit": [
@@ -10,6 +11,28 @@
10
11
  "type": "command",
11
12
  "command": "bun run /PATH/TO/cli/selftune/hooks/prompt-log.ts",
12
13
  "timeout": 5
14
+ },
15
+ {
16
+ "type": "command",
17
+ "command": "bun run /PATH/TO/cli/selftune/hooks/auto-activate.ts",
18
+ "timeout": 5
19
+ }
20
+ ]
21
+ }
22
+ ],
23
+ "PreToolUse": [
24
+ {
25
+ "matcher": "Write|Edit",
26
+ "hooks": [
27
+ {
28
+ "type": "command",
29
+ "command": "bun run /PATH/TO/cli/selftune/hooks/skill-change-guard.ts",
30
+ "timeout": 5
31
+ },
32
+ {
33
+ "type": "command",
34
+ "command": "bun run /PATH/TO/cli/selftune/hooks/evolution-guard.ts",
35
+ "timeout": 5
13
36
  }
14
37
  ]
15
38
  }
@@ -0,0 +1,27 @@
1
+ {
2
+ "_readme": "Default activation rules for selftune auto-activation. Copy to ~/.selftune/activation-rules.json to customize.",
3
+ "_docs": "See docs/integration-guide.md for details on each rule.",
4
+
5
+ "rules": [
6
+ {
7
+ "id": "post-session-diagnostic",
8
+ "enabled": true,
9
+ "description": "Suggest `selftune last` when session has >2 unmatched queries"
10
+ },
11
+ {
12
+ "id": "grading-threshold-breach",
13
+ "enabled": true,
14
+ "description": "Suggest `selftune evolve` when session pass rate < 60%"
15
+ },
16
+ {
17
+ "id": "stale-evolution",
18
+ "enabled": true,
19
+ "description": "Suggest `selftune evolve` when no evolution in >7 days and pending false negatives exist"
20
+ },
21
+ {
22
+ "id": "regression-detected",
23
+ "enabled": true,
24
+ "description": "Suggest `selftune rollback` when monitoring detects a regression"
25
+ }
26
+ ]
27
+ }
@@ -0,0 +1,64 @@
1
+ {
2
+ "_readme": "Settings template for multi-skill selftune projects. Merge into ~/.claude/settings.json.",
3
+ "_usage": "Replace /PATH/TO with the absolute path to your selftune installation.",
4
+ "_note": "Multi-skill projects use activation rules to route queries to the correct skill. See templates/activation-rules-default.json.",
5
+
6
+ "hooks": {
7
+ "UserPromptSubmit": [
8
+ {
9
+ "hooks": [
10
+ {
11
+ "type": "command",
12
+ "command": "bun run /PATH/TO/cli/selftune/hooks/prompt-log.ts",
13
+ "timeout": 5
14
+ },
15
+ {
16
+ "type": "command",
17
+ "command": "bun run /PATH/TO/cli/selftune/hooks/auto-activate.ts",
18
+ "timeout": 5
19
+ }
20
+ ]
21
+ }
22
+ ],
23
+ "PreToolUse": [
24
+ {
25
+ "matcher": "Write|Edit",
26
+ "hooks": [
27
+ {
28
+ "type": "command",
29
+ "command": "bun run /PATH/TO/cli/selftune/hooks/skill-change-guard.ts",
30
+ "timeout": 5
31
+ },
32
+ {
33
+ "type": "command",
34
+ "command": "bun run /PATH/TO/cli/selftune/hooks/evolution-guard.ts",
35
+ "timeout": 5
36
+ }
37
+ ]
38
+ }
39
+ ],
40
+ "PostToolUse": [
41
+ {
42
+ "matcher": "Read",
43
+ "hooks": [
44
+ {
45
+ "type": "command",
46
+ "command": "bun run /PATH/TO/cli/selftune/hooks/skill-eval.ts",
47
+ "timeout": 5
48
+ }
49
+ ]
50
+ }
51
+ ],
52
+ "Stop": [
53
+ {
54
+ "hooks": [
55
+ {
56
+ "type": "command",
57
+ "command": "bun run /PATH/TO/cli/selftune/hooks/session-stop.ts",
58
+ "timeout": 15
59
+ }
60
+ ]
61
+ }
62
+ ]
63
+ }
64
+ }