selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -82,8 +82,37 @@ Doctor validates these areas:
82
82
 
83
83
  | Check | What it validates |
84
84
  |-------|-------------------|
85
- | Hooks installed | `UserPromptSubmit`, `PostToolUse`, and `Stop` hooks are configured in `~/.claude/settings.json` |
85
+ | Hooks installed | `UserPromptSubmit`, `PreToolUse`, `PostToolUse`, and `Stop` hooks are configured in `~/.claude/settings.json` |
86
86
  | Hook scripts exist | The script files referenced by hooks exist on disk |
87
+ | Auto-activate hook | `hooks/auto-activate.ts` is registered in `UserPromptSubmit` and the file is executable |
88
+ | Evolution guard hook | `hooks/evolution-guard.ts` is registered in `PreToolUse` and the file exists |
89
+
90
+ ### Memory Checks
91
+
92
+ | Check | What it validates |
93
+ |-------|-------------------|
94
+ | Memory directory exists | `~/.selftune/memory/` directory is present |
95
+ | Memory files valid | `context.md`, `decisions.md`, `plan.md` exist and are non-empty (if previously written) |
96
+
97
+ ### Activation Rules Checks
98
+
99
+ | Check | What it validates |
100
+ |-------|-------------------|
101
+ | Rules file exists | `~/.selftune/activation-rules.json` is present |
102
+ | Rules file valid | The file contains valid JSON conforming to the activation rules schema |
103
+
104
+ ### Agent Checks
105
+
106
+ | Check | What it validates |
107
+ |-------|-------------------|
108
+ | Optional agent directory exists | If `.claude/agents/` is present, it is readable |
109
+ | Optional agent files present | If the repo bundles helper agents, the expected files are present |
110
+
111
+ ### Dashboard Checks (optional)
112
+
113
+ | Check | What it validates |
114
+ |-------|-------------------|
115
+ | Dashboard server accessible | `dashboard-server.ts` exists in the CLI directory |
87
116
 
88
117
  ### Evolution Audit Checks
89
118
 
@@ -114,25 +143,45 @@ For each failed check, take the appropriate action:
114
143
  | Logs not parseable | Inspect the corrupted log file. Remove or fix invalid lines. |
115
144
  | Hooks not installed | Merge `skill/settings_snippet.json` into `~/.claude/settings.json`. Update paths. |
116
145
  | Hook scripts missing | Verify the selftune repo path. Re-run `init` if the repo was moved. |
146
+ | Auto-activate missing | Add `hooks/auto-activate.ts` to `UserPromptSubmit` in settings. |
147
+ | Evolution guard missing | Add `hooks/evolution-guard.ts` to `PreToolUse` in settings. |
148
+ | Memory directory missing | Run `mkdir -p ~/.selftune/memory`. |
149
+ | Memory files invalid | Delete and let the memory writer recreate them on next evolve/watch. |
150
+ | Activation rules missing | Copy `assets/activation-rules-default.json` to `~/.selftune/activation-rules.json`. |
151
+ | Activation rules invalid | Validate JSON syntax. Re-copy from template if corrupted. |
152
+ | Agent files missing | If your repo uses optional helper agents, restore them in `.claude/agents/`. Otherwise ignore this advisory. |
117
153
  | Audit log invalid | Remove corrupted entries. Future operations will append clean entries. |
118
154
 
119
155
  ### 4. Re-run Doctor
120
156
 
121
157
  After fixes, run doctor again to verify all checks pass.
122
158
 
123
- ## Common Patterns
124
-
125
- **"Something seems broken"**
126
- > Run doctor first. Report any failing checks with their detail messages.
159
+ ## Subagent Escalation
127
160
 
128
- **"Are my hooks working?"**
129
- > Doctor checks hook installation. If hooks pass but no data appears,
130
- > verify the hook script paths point to actual files.
161
+ If doctor reveals persistent issues with a specific skill — especially
162
+ recurring failures that basic fixes do not resolve spawn the
163
+ `diagnosis-analyst` agent as a subagent for root cause analysis.
131
164
 
132
- **"No telemetry available"**
133
- > Doctor will report missing log files. Install hooks using the
134
- > `settings_snippet.json` in the skill directory, then run a session.
165
+ ## Common Patterns
135
166
 
136
- **"Check selftune health"**
137
- > Run doctor and report the summary. A clean bill of health means
138
- > all checks pass and selftune is ready to grade/evolve/watch.
167
+ **User reports something seems broken**
168
+ > Run `selftune doctor`. Parse the JSON output for failed checks. Report
169
+ > each failure's `name` and `detail` to the user with the recommended fix.
170
+
171
+ **User asks if hooks are working**
172
+ > Run `selftune doctor`. Parse `.checks[]` for hook-related entries. If
173
+ > hooks pass but no data appears, verify hook script paths in
174
+ > `~/.claude/settings.json` point to actual files.
175
+
176
+ **No telemetry data available**
177
+ > Run `selftune doctor`. Route fixes by platform:
178
+ > - **Claude Code** — route to the Initialize workflow to install hooks
179
+ > - **Codex** — run `selftune ingest codex` or `selftune ingest wrap-codex`
180
+ > - **OpenCode** — run `selftune ingest opencode`
181
+ > - **OpenClaw** — run `selftune ingest openclaw`
182
+ > At least one session must complete after setup to generate telemetry.
183
+
184
+ **User asks to check selftune health**
185
+ > Run `selftune doctor`. Parse `.healthy` and `.summary`. If `healthy: true`,
186
+ > report that selftune is fully operational. If false, report failed checks
187
+ > and recommended fixes.
@@ -4,10 +4,19 @@ Generate eval sets from hook logs. Detects false negatives (queries that
4
4
  should have triggered a skill but did not) and annotates each entry with
5
5
  its invocation type.
6
6
 
7
+ ## When to Invoke
8
+
9
+ Invoke this workflow when the user requests any of the following:
10
+ - Generating eval sets or test data for a skill
11
+ - Checking which skills are undertriggering
12
+ - Viewing skill telemetry or usage stats
13
+ - Preparing data before running the Evolve workflow
14
+ - Any request containing "evals", "eval set", "test queries", or "skill stats"
15
+
7
16
  ## Default Command
8
17
 
9
18
  ```bash
10
- selftune evals --skill <name> [options]
19
+ selftune eval generate --skill <name> [options]
11
20
  ```
12
21
 
13
22
  ## Options
@@ -20,6 +29,9 @@ selftune evals --skill <name> [options]
20
29
  | `--max <n>` | Maximum eval entries to generate | 50 |
21
30
  | `--seed <n>` | Random seed for negative sampling | Random |
22
31
  | `--out <path>` | Output file path | `evals-<skill>.json` |
32
+ | `--synthetic` | Generate evals from SKILL.md via LLM (no logs needed) | Off |
33
+ | `--skill-path <path>` | Path to SKILL.md (required with `--synthetic`) | — |
34
+ | `--model <model>` | LLM model to use for synthetic generation | Agent default |
23
35
 
24
36
  ## Output Format
25
37
 
@@ -98,19 +110,41 @@ selftune evals --skill <name> [options]
98
110
  Discover which skills have telemetry data and how many queries each has.
99
111
 
100
112
  ```bash
101
- selftune evals --list-skills
113
+ selftune eval generate --list-skills
114
+ ```
115
+
116
+ Run this first to identify which skills have enough data for eval generation.
117
+
118
+ ### Generate Synthetic Evals (Cold Start)
119
+
120
+ When a skill has no telemetry data yet, use `--synthetic` to generate eval
121
+ queries directly from the SKILL.md content via an LLM.
122
+
123
+ ```bash
124
+ selftune eval generate --skill pptx --synthetic --skill-path /path/to/skills/pptx/SKILL.md
102
125
  ```
103
126
 
104
- Use this first to identify which skills have enough data for eval generation.
127
+ The command:
128
+ 1. Reads the SKILL.md file content
129
+ 2. Sends it to an LLM with a prompt requesting realistic test queries
130
+ 3. Parses the response into eval entries with invocation type annotations
131
+ 4. Classifies each positive query using the deterministic `classifyInvocation()` heuristic
132
+ 5. Writes the eval set to the output file
133
+
134
+ Use `--model` to override the default LLM model:
135
+
136
+ ```bash
137
+ selftune eval generate --skill pptx --synthetic --skill-path ./skills/pptx/SKILL.md --model claude-sonnet-4-5-20250514
138
+ ```
105
139
 
106
- ### Generate Evals
140
+ ### Generate Evals (Log-Based)
107
141
 
108
142
  Cross-reference `skill_usage_log.jsonl` (positive triggers) against
109
143
  `all_queries_log.jsonl` (all queries, including non-triggers) to produce
110
144
  an eval set annotated with invocation types.
111
145
 
112
146
  ```bash
113
- selftune evals --skill pptx --max 50 --out evals-pptx.json
147
+ selftune eval generate --skill pptx --max 50 --out evals-pptx.json
114
148
  ```
115
149
 
116
150
  The command:
@@ -127,20 +161,74 @@ View aggregate telemetry for a skill: average turns, tool call breakdown,
127
161
  error rates, and common bash command patterns.
128
162
 
129
163
  ```bash
130
- selftune evals --skill pptx --stats
164
+ selftune eval generate --skill pptx --stats
131
165
  ```
132
166
 
133
167
  ## Steps
134
168
 
169
+ ### 0. Pre-Flight Configuration
170
+
171
+ Before generating evals, present numbered configuration options to the user inline in your response, then wait for the user's answer before proceeding.
172
+
173
+ If the user responds with "use defaults", "just do it", or similar shorthand, skip to step 1 using the recommended defaults.
174
+
175
+ For `--list-skills` or `--stats` requests, skip pre-flight entirely — these are read-only operations.
176
+
177
+ Present the following options inline in your response:
178
+
179
+ 1. **Generation Mode**
180
+ - a) Log-based — build evals from real usage logs (recommended if logs exist)
181
+ - b) Synthetic — generate evals from SKILL.md via LLM (for new skills with no data)
182
+
183
+ 2. **Skill Path** (synthetic mode only)
184
+ - Provide absolute or relative path to the target SKILL.md
185
+ - Example: `./skills/pptx/SKILL.md`
186
+
187
+ 3. **Max Entries:** 50 (default — how many eval entries to generate)
188
+
189
+ 4. **Model** (synthetic mode only)
190
+ - a) Fast (haiku) — quick generation
191
+ - b) Balanced (sonnet) — better query diversity (recommended)
192
+ - c) Best (opus) — highest quality synthetic queries
193
+
194
+ 5. **Output Path:** `evals-<skill>.json` (default)
195
+
196
+ Ask: "Reply with your choices or 'use defaults' for recommended settings."
197
+
198
+ After the user responds, parse their selections and map each choice to the corresponding CLI flags:
199
+
200
+ | Selection | CLI Flag |
201
+ |-----------|----------|
202
+ | 1a (log-based) | _(no flag, default)_ |
203
+ | 1b (synthetic) | `--synthetic --skill-path <path>` |
204
+ | Custom max entries | `--max <value>` |
205
+ | 4a (haiku) | `--model haiku` (resolved internally by selftune) |
206
+ | 4b (sonnet) | `--model sonnet` |
207
+ | 4c (opus) | `--model opus` |
208
+ | Custom output path | `--out <path>` |
209
+
210
+ Show a confirmation summary to the user:
211
+
212
+ ```text
213
+ Configuration Summary:
214
+ Mode: log-based
215
+ Max entries: 50
216
+ Output: evals-pptx.json
217
+
218
+ Proceeding...
219
+ ```
220
+
221
+ Build the CLI command string with all selected flags and continue to step 1.
222
+
135
223
  ### 1. List Available Skills
136
224
 
137
- Run `selftune evals --list-skills` to see what skills have telemetry data. If the target
225
+ Run `selftune eval generate --list-skills` to see what skills have telemetry data. If the target
138
226
  skill has zero or very few queries, more sessions are needed before
139
227
  eval generation is useful.
140
228
 
141
229
  ### 2. Generate the Eval Set
142
230
 
143
- Run with `--skill <name>`. Review the output file for:
231
+ Run with `--skill <name>`. Parse the JSON output and review for:
144
232
  - Balance between positive and negative entries
145
233
  - Coverage of all three positive invocation types (explicit, implicit, contextual)
146
234
  - Reasonable negative examples (keyword overlap but wrong intent)
@@ -169,16 +257,20 @@ beyond trigger coverage.
169
257
 
170
258
  ## Common Patterns
171
259
 
172
- **"What skills are undertriggering?"**
173
- > Run `selftune evals --list-skills`, then for each skill with significant query counts,
174
- > generate evals and check for missed implicit/contextual queries.
260
+ **User asks which skills are undertriggering:**
261
+ Run `selftune eval generate --list-skills`, then for each skill with significant query counts,
262
+ generate evals and check for missed implicit/contextual queries.
263
+
264
+ **User asks to generate evals for a specific skill:**
265
+ Run `selftune eval generate --skill <name>`. Parse the JSON output and review the invocation type distribution.
266
+ Feed the output to the Evolve workflow if coverage gaps exist.
175
267
 
176
- **"Generate evals for pptx"**
177
- > Run `selftune evals --skill pptx`. Review the invocation type distribution.
178
- > Feed the output to `evolve` if coverage gaps exist.
268
+ **User asks for skill telemetry or stats:**
269
+ Run `selftune eval generate --skill <name> --stats` for aggregate telemetry.
179
270
 
180
- **"Show me skill stats"**
181
- > Run `selftune evals --skill <name> --stats` for aggregate telemetry.
271
+ **User has a new skill with no usage data:**
272
+ Use `selftune eval generate --skill <name> --synthetic --skill-path /path/to/SKILL.md`.
273
+ This generates eval queries from the skill description without needing session logs.
182
274
 
183
- **"I want reproducible evals"**
184
- > Use `--seed <n>` to fix the random sampling of negative examples.
275
+ **User wants reproducible evals:**
276
+ Add `--seed <n>` to fix the random sampling of negative examples.
@@ -0,0 +1,154 @@
1
+ # selftune Evolution Memory
2
+
3
+ This reference documents the evolution memory system. The agent reads these files automatically during evolve, watch, and rollback workflows for session continuity.
4
+
5
+ Human-readable session context that survives context window resets. Provides
6
+ continuity across evolve, watch, and rollback workflows by recording outcomes,
7
+ decisions, and known issues in plain markdown files.
8
+
9
+ ## When to Use
10
+
11
+ - **Reading evolution context for continuity** -- Step 0 in Evolve, Watch, and
12
+ Rollback workflows reads memory before starting.
13
+ - **Diagnosing what happened in previous sessions** -- the decision log provides
14
+ a chronological record of every evolution action and its outcome.
15
+
16
+ ## Location
17
+
18
+ ```text
19
+ ~/.selftune/memory/
20
+ ```
21
+
22
+ All memory files live in this directory. The directory is created automatically
23
+ on the first write.
24
+
25
+ ## The Three Files
26
+
27
+ ### 1. context.md -- Active Evolutions
28
+
29
+ Tracks the current state of every skill that has been evolved, watched, or
30
+ rolled back.
31
+
32
+ **Format:** Markdown with `##` sections.
33
+
34
+ ```markdown
35
+ # Selftune Context
36
+
37
+ ## Active Evolutions
38
+ - pptx: deployed -- Added implicit triggers for slide deck queries
39
+ - csv-parser: regression -- pass_rate=0.65, baseline=0.88
40
+
41
+ ## Known Issues
42
+ - Regression detected for csv-parser: pass_rate=0.65 below baseline=0.88
43
+
44
+ ## Last Updated
45
+ 2026-03-01T14:00:00.000Z
46
+ ```
47
+
48
+ **Status values:**
49
+
50
+ | Status | Meaning |
51
+ |--------|---------|
52
+ | `deployed` | Evolution was deployed successfully |
53
+ | `failed` | Evolution attempted but did not deploy |
54
+ | `regression` | Watch detected a regression in pass rate |
55
+ | `healthy` | Watch confirmed pass rate is within threshold |
56
+ | `rolled-back` | Rollback completed successfully |
57
+ | `rollback-failed` | Rollback was attempted but failed |
58
+
59
+ ### 2. plan.md -- Current Priorities
60
+
61
+ Records evolution priorities and strategy.
62
+
63
+ **Format:** Markdown with `##` sections.
64
+
65
+ ```markdown
66
+ # Evolution Plan
67
+
68
+ ## Current Priorities
69
+ 1. Improve csv-parser implicit trigger coverage
70
+ 2. Re-evolve pptx after eval set expansion
71
+
72
+ ## Strategy
73
+ Focus on skills with highest session volume first.
74
+
75
+ ## Last Updated
76
+ 2026-03-01T14:00:00.000Z
77
+ ```
78
+
79
+ ### 3. decisions.md -- Append-Only Decision Log
80
+
81
+ Chronological record of every evolution action. Entries are never removed,
82
+ only appended.
83
+
84
+ **Entry format:**
85
+
86
+ ```markdown
87
+ ## 2026-03-01T14:00:00.000Z -- evolve
88
+ - **Skill:** pptx
89
+ - **Action:** evolved
90
+ - **Rationale:** Missed implicit triggers for slide deck queries
91
+ - **Result:** Deployed with pass_rate improvement 0.70 -> 0.92
92
+
93
+ ---
94
+ ```
95
+
96
+ Each entry contains:
97
+
98
+ | Field | Description |
99
+ |-------|-------------|
100
+ | Timestamp | ISO 8601 timestamp in the `##` heading |
101
+ | Action type | `evolve`, `rollback`, or `watch` in the heading |
102
+ | Skill | The skill name |
103
+ | Action | Past-tense result: `evolved`, `rolled-back`, or `watched` |
104
+ | Rationale | Why the action was taken |
105
+ | Result | What happened |
106
+
107
+ Entries are separated by `---` markers.
108
+
109
+ ## Auto-Update Triggers
110
+
111
+ Memory is updated automatically by the memory writer (`cli/selftune/memory/writer.ts`).
112
+ No manual editing is required during normal operation.
113
+
114
+ | Trigger | Function | Updates |
115
+ |---------|----------|---------|
116
+ | After evolve completes | `updateContextAfterEvolve` | context.md + decisions.md |
117
+ | After rollback completes | `updateContextAfterRollback` | context.md + decisions.md |
118
+ | After watch completes | `updateContextAfterWatch` | context.md + decisions.md, adds known issues on regression |
119
+
120
+ ## Reading Memory
121
+
122
+ Step 0 in the Evolve, Watch, and Rollback workflows reads `~/.selftune/memory/context.md`
123
+ before starting any operation. This provides:
124
+
125
+ - Active evolutions and their current status
126
+ - Known issues from previous runs
127
+ - Last update timestamp
128
+
129
+ If the file does not exist, the workflow proceeds normally. Memory files are
130
+ created automatically after the first evolve, watch, or rollback operation.
131
+
132
+ ## Resetting Memory
133
+
134
+ Delete the files in `~/.selftune/memory/` to start fresh:
135
+
136
+ ```bash
137
+ rm -rf ~/.selftune/memory/
138
+ ```
139
+
140
+ They will be recreated automatically on the next evolve, watch, or rollback run.
141
+
142
+ ## Common Patterns
143
+
144
+ **"What happened in the last evolution?"**
145
+ > Read `~/.selftune/memory/decisions.md`. The most recent entry at the bottom
146
+ > of the file contains the last action, skill, rationale, and result.
147
+
148
+ **"What's the current state?"**
149
+ > Read `~/.selftune/memory/context.md`. The Active Evolutions section lists
150
+ > every tracked skill and its current status.
151
+
152
+ **"Memory seems stale"**
153
+ > Delete the files in `~/.selftune/memory/` and run `selftune evolve` or
154
+ > `selftune watch` to recreate them with fresh data.