npm - selftune - Versions diffs - 0.2.0 → 0.2.2 - Mend

selftune 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

package/.claude/agents/diagnosis-analyst.md +20 -10
package/.claude/agents/evolution-reviewer.md +14 -1
package/.claude/agents/integration-guide.md +18 -6
package/.claude/agents/pattern-analyst.md +18 -5
package/CHANGELOG.md +12 -4
package/README.md +43 -35
package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
package/apps/local-dashboard/dist/favicon.png +0 -0
package/apps/local-dashboard/dist/index.html +17 -0
package/apps/local-dashboard/dist/logo.png +0 -0
package/apps/local-dashboard/dist/logo.svg +9 -0
package/cli/selftune/badge/badge-data.ts +1 -1
package/cli/selftune/badge/badge.ts +4 -8
package/cli/selftune/canonical-export.ts +183 -0
package/cli/selftune/constants.ts +28 -0
package/cli/selftune/contribute/contribute.ts +1 -1
package/cli/selftune/cron/setup.ts +17 -17
package/cli/selftune/dashboard-contract.ts +202 -0
package/cli/selftune/dashboard-server.ts +653 -186
package/cli/selftune/dashboard.ts +41 -176
package/cli/selftune/eval/baseline.ts +5 -4
package/cli/selftune/eval/composability-v2.ts +273 -0
package/cli/selftune/eval/hooks-to-evals.ts +34 -15
package/cli/selftune/eval/unit-test-cli.ts +1 -1
package/cli/selftune/evolution/evidence.ts +26 -0
package/cli/selftune/evolution/evolve-body.ts +105 -11
package/cli/selftune/evolution/evolve.ts +371 -25
package/cli/selftune/evolution/extract-patterns.ts +87 -29
package/cli/selftune/evolution/rollback.ts +2 -2
package/cli/selftune/grading/auto-grade.ts +200 -0
package/cli/selftune/grading/grade-session.ts +448 -97
package/cli/selftune/grading/results.ts +42 -0
package/cli/selftune/hooks/prompt-log.ts +172 -2
package/cli/selftune/hooks/session-stop.ts +123 -3
package/cli/selftune/hooks/skill-eval.ts +119 -3
package/cli/selftune/index.ts +395 -116
package/cli/selftune/ingestors/claude-replay.ts +140 -114
package/cli/selftune/ingestors/codex-rollout.ts +345 -46
package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
package/cli/selftune/ingestors/openclaw-ingest.ts +141 -8
package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
package/cli/selftune/init.ts +227 -14
package/cli/selftune/last.ts +14 -5
package/cli/selftune/localdb/db.ts +63 -0
package/cli/selftune/localdb/materialize.ts +428 -0
package/cli/selftune/localdb/queries.ts +376 -0
package/cli/selftune/localdb/schema.ts +204 -0
package/cli/selftune/monitoring/watch.ts +66 -15
package/cli/selftune/normalization.ts +682 -0
package/cli/selftune/observability.ts +19 -44
package/cli/selftune/orchestrate.ts +1073 -0
package/cli/selftune/quickstart.ts +203 -0
package/cli/selftune/repair/skill-usage.ts +576 -0
package/cli/selftune/schedule.ts +561 -0
package/cli/selftune/status.ts +48 -26
package/cli/selftune/sync.ts +627 -0
package/cli/selftune/types.ts +148 -0
package/cli/selftune/utils/canonical-log.ts +45 -0
package/cli/selftune/utils/hooks.ts +41 -0
package/cli/selftune/utils/html.ts +27 -0
package/cli/selftune/utils/llm-call.ts +78 -20
package/cli/selftune/utils/math.ts +10 -0
package/cli/selftune/utils/query-filter.ts +139 -0
package/cli/selftune/utils/skill-discovery.ts +340 -0
package/cli/selftune/utils/skill-log.ts +68 -0
package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
package/cli/selftune/utils/transcript.ts +272 -26
package/cli/selftune/workflows/discover.ts +254 -0
package/cli/selftune/workflows/skill-md-writer.ts +288 -0
package/cli/selftune/workflows/workflows.ts +188 -0
package/package.json +21 -8
package/packages/telemetry-contract/README.md +11 -0
package/packages/telemetry-contract/fixtures/golden.json +87 -0
package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
package/packages/telemetry-contract/index.ts +1 -0
package/packages/telemetry-contract/package.json +19 -0
package/packages/telemetry-contract/src/index.ts +2 -0
package/packages/telemetry-contract/src/types.ts +163 -0
package/packages/telemetry-contract/src/validators.ts +109 -0
package/skill/SKILL.md +84 -53
package/skill/Workflows/AutoActivation.md +17 -16
package/skill/Workflows/Badge.md +6 -0
package/skill/Workflows/Baseline.md +46 -23
package/skill/Workflows/Composability.md +12 -5
package/skill/Workflows/Contribute.md +17 -14
package/skill/Workflows/Cron.md +56 -79
package/skill/Workflows/Dashboard.md +45 -34
package/skill/Workflows/Doctor.md +30 -17
package/skill/Workflows/Evals.md +64 -40
package/skill/Workflows/EvolutionMemory.md +2 -0
package/skill/Workflows/Evolve.md +102 -47
package/skill/Workflows/EvolveBody.md +6 -6
package/skill/Workflows/Grade.md +36 -31
package/skill/Workflows/ImportSkillsBench.md +11 -5
package/skill/Workflows/Ingest.md +43 -36
package/skill/Workflows/Initialize.md +44 -30
package/skill/Workflows/Orchestrate.md +139 -0
package/skill/Workflows/Replay.md +39 -18
package/skill/Workflows/Rollback.md +3 -3
package/skill/Workflows/Schedule.md +61 -0
package/skill/Workflows/Sync.md +88 -0
package/skill/Workflows/UnitTest.md +34 -22
package/skill/Workflows/Watch.md +14 -4
package/skill/Workflows/Workflows.md +129 -0
package/skill/assets/activation-rules-default.json +26 -0
package/skill/assets/multi-skill-settings.json +63 -0
package/skill/assets/single-skill-settings.json +57 -0
package/skill/references/invocation-taxonomy.md +2 -2
package/skill/references/logs.md +164 -2
package/skill/references/setup-patterns.md +65 -0
package/skill/references/version-history.md +40 -0
package/skill/settings_snippet.json +1 -1
package/templates/multi-skill-settings.json +7 -7
package/templates/single-skill-settings.json +6 -6
package/dashboard/index.html +0 -1680

package/skill/Workflows/Sync.md ADDED Viewed

@@ -0,0 +1,88 @@
+# selftune Sync Workflow
+Refresh source-truth telemetry across supported agent CLIs, then rebuild the
+repaired skill-usage overlay so status, dashboard, grading, and evolution work
+from real transcripts/rollouts instead of stale hook data.
+## When to Use
+- Before running `status`, `dashboard`, `watch`, or `evolve` when data may be stale
+- The user has run many Claude Code, Codex, OpenCode, or OpenClaw sessions since last sync
+- The agent detects host logs may be polluted and needs the repaired/source-first view
+- Before exporting data to cloud ingest
+## Default Command
+```bash
+selftune sync
+```
+## Options
+| Flag | Description |
+|------|-------------|
+| `--since <date>` | Only sync sessions modified on/after this date |
+| `--dry-run` | Show summary without writing files |
+| `--force` | Ignore per-source markers and rescan everything |
+| `--no-claude` | Skip Claude transcript replay |
+| `--no-codex` | Skip Codex rollout ingest |
+| `--no-opencode` | Skip OpenCode ingest |
+| `--no-openclaw` | Skip OpenClaw ingest |
+| `--no-repair` | Skip rebuilding `skill_usage_repaired.jsonl` |
+## Output
+Writes/refreshed data:
+- `~/.claude/session_telemetry_log.jsonl`
+- `~/.claude/all_queries_log.jsonl`
+- `~/.claude/skill_usage_log.jsonl`
+- `~/.claude/skill_usage_repaired.jsonl`
+- per-source marker files
+## Steps
+### 1. Preview Sync
+Run `selftune sync --dry-run`. The output includes per-source `scanned`
+counts. Report the preview summary to the user.
+### 2. Run Sync
+Run `selftune sync`. The output includes:
+- Per-source `scanned`, `synced`, and `skipped` counts
+- Repaired overlay totals
+- Any errors or warnings
+### 3. Verify Results
+Verify there are no sync errors and that per-source counters are internally
+consistent (`scanned`, `synced`, `skipped`). `synced=0` is valid when no
+new sessions exist since the last sync. Run `selftune doctor` only when
+sync reports source/hook failures or expected active sources are missing.
+### 4. Continue to Next Workflow
+After sync completes, proceed with the user's intended workflow:
+`selftune status`, `selftune dashboard`, `selftune watch --sync-first`,
+or `selftune evolve --sync-first`.
+## Common Patterns
+**User wants to refresh telemetry data**
+> Run `selftune sync`. Report per-source `scanned`, `synced`, and `skipped` counts.
+**User wants to sync only recent sessions**
+> Run `selftune sync --since <date>` with the user's specified date.
+**User wants a full rescan from scratch**
+> Run `selftune sync --force`. This ignores per-source markers and rescans
+> all sessions.
+**Agent needs to verify sync worked**
+> Check per-source `scanned`, `synced`, and `skipped` counts. `synced=0`
+> is normal when data is already up-to-date. Verify `scanned > 0` for
+> expected sources to confirm sync ran successfully.
+**Agent is chaining into monitoring or evolution**
+> Use `selftune watch --sync-first` or `selftune evolve --sync-first` to
+> refresh source truth automatically before making decisions.

package/skill/Workflows/UnitTest.md CHANGED Viewed

@@ -6,7 +6,7 @@ accuracy, output content, and tool usage with deterministic assertions.
 ## Default Command
 ```bash
-selftune unit-test --skill <name> --tests <path> [options]
+selftune eval unit-test --skill <name> --tests <path> [options]
 ```
 ## Options
@@ -86,13 +86,13 @@ require `--run-agent` and run the query through the full agent.
 ### 1. Generate Tests (First Time)
-For a new skill, generate initial tests from the skill content:
+If no test file exists for the skill, generate initial tests:
 ```bash
-selftune unit-test --skill Research --generate --skill-path ~/.claude/skills/Research/SKILL.md
+selftune eval unit-test --skill Research --generate --skill-path ~/.claude/skills/Research/SKILL.md
 ```
-This uses an LLM to create test cases covering:
+Parse the output. The LLM creates test cases covering:
 - Explicit trigger queries
 - Implicit trigger queries
 - Contextual trigger queries
@@ -102,37 +102,49 @@ Tests are saved to `~/.selftune/unit-tests/Research.json`.
 ### 2. Run Tests
+Run the test suite:
 ```bash
-selftune unit-test --skill Research --tests ~/.selftune/unit-tests/Research.json
+selftune eval unit-test --skill Research --tests ~/.selftune/unit-tests/Research.json
 ```
 By default, only `trigger_check` assertions run (fast, no agent needed).
 Add `--run-agent` for full agent-based assertions.
-### 3. Review Results
+### 3. Parse Results
+Parse the JSON output. Check `pass_rate` and investigate failures:
+- Failed trigger checks -- description needs improvement (route to Evolve)
+- Failed output assertions -- skill workflow needs fixes
+- Failed tool assertions -- skill routing is broken
-Check `pass_rate` and investigate failures:
-- Failed trigger checks → description needs improvement
-- Failed output assertions → skill workflow needs fixes
-- Failed tool assertions → skill routing is broken
+Report the pass rate and any failures to the user.
-### 4. Iterate
+### 4. Post-Evolution Verification
 After evolving a skill, re-run unit tests to verify improvements:
-1. Evolve: `selftune evolve --skill Research --skill-path /path/SKILL.md`
-2. Test: `selftune unit-test --skill Research`
-3. Check pass rate improved
+```bash
+selftune eval unit-test --skill Research
+```
+Compare the new `pass_rate` against the previous run. Report whether
+the evolution improved trigger accuracy.
 ## Common Patterns
-**"Generate tests for the pptx skill"**
-> `selftune unit-test --skill pptx --generate --skill-path /path/SKILL.md`
+**User asks to generate tests for a skill**
+> Run `selftune eval unit-test --skill <name> --generate --skill-path <path>`.
+> Parse the output and report how many tests were generated.
-**"Run existing tests"**
-> `selftune unit-test --skill pptx --tests ~/.selftune/unit-tests/pptx.json`
+**User asks to run existing tests**
+> Run `selftune eval unit-test --skill <name>`. Parse the JSON output and
+> report pass rate and any failures.
-**"Run full agent tests"**
-> `selftune unit-test --skill pptx --tests /path/tests.json --run-agent`
+**User asks for full agent-based testing**
+> Run `selftune eval unit-test --skill <name> --run-agent`. This runs queries
+> through the full agent, so inform the user it will take longer.
-**"Test after evolution"**
-> Run `selftune unit-test` after each `selftune evolve` to verify improvements.
+**After an evolution completes**
+> Run unit tests to verify the evolution improved trigger accuracy. Compare
+> the new pass rate against the pre-evolution baseline.

package/skill/Workflows/Watch.md CHANGED Viewed

@@ -67,13 +67,13 @@ selftune watch --skill <name> --skill-path <path> [options]
 ### 0. Read Evolution Context
-Before starting, read `~/.selftune/memory/context.md` for session context:
+Read `~/.selftune/memory/context.md` for session context:
 - Active evolutions and their current status
 - Known issues and regression history
 - Last update timestamp
-This provides continuity across context resets. If the file doesn't exist,
-proceed normally -- it will be created after the first watch.
+If the file does not exist, proceed normally -- it will be created after
+the first watch.
 The evolution-guard hook prevents conflicting SKILL.md edits while watch is
 evaluating the skill. The auto-activation system uses watch results to
@@ -102,7 +102,7 @@ Parse the JSON output. Key decision points:
 If regression is detected:
 - Review recent session transcripts to understand what changed
 - Check if the eval set is still representative
-- Run `rollback` if the regression is confirmed (see `Workflows/Rollback.md`)
+- Run `evolve rollback` if the regression is confirmed (see `Workflows/Rollback.md`)
 If `--auto-rollback` was set, the command automatically restores the
 previous description and logs a `rolled_back` entry.
@@ -141,3 +141,13 @@ context window resets before the user acts on the results.
 **"Set a custom baseline"**
 > Use `--baseline 0.85` to override auto-detection. Useful when the
 > auto-detected baseline is from an older evolution.
+## Autonomous Mode
+When called by `selftune orchestrate`, watch runs automatically on recently
+evolved skills:
+- Checks all skills evolved in the last --recent-window hours (default 24)
+- Auto-rollback is enabled by default
+- Results are included in the orchestrate run report
+- No user notification — regressions are handled silently via rollback

package/skill/Workflows/Workflows.md ADDED Viewed

@@ -0,0 +1,129 @@
+# selftune Workflows Workflow
+## When to Use
+When the user asks about multi-skill workflows, workflow discovery, or skill composition.
+## Overview
+Discover repeated multi-skill sequences from telemetry and optionally save a
+discovered workflow into a skill's `## Workflows` section.
+## Default Commands
+```bash
+selftune workflows [options]
+selftune workflows save <workflow-id|index> [--skill-path <path>]
+```
+## Options
+- `--min-occurrences <n>`: Minimum times a workflow must appear before it is
+  shown. Default: `3`.
+- `--window <n>`: Only analyze the last `n` sessions. Default: all sessions.
+- `--skill <name>`: Only show workflows containing this skill. Default: all
+  skills.
+- `--json`: Emit machine-readable `WorkflowDiscoveryReport` JSON. Default:
+  human-readable text.
+- `--skill-path <path>`: Target SKILL.md when using `save`. Default:
+  auto-detect the first skill's SKILL.md path across contributing sessions. If
+  that skill maps to multiple SKILL.md files in those sessions, the command
+  errors and you must pass `--skill-path` explicitly.
+## Save Semantics
+`save` accepts either:
+- A workflow ID, which is the ordered skill chain joined with `→`
+- A 1-based index from the `selftune workflows` output
+Examples:
+```bash
+selftune workflows save "Copywriting→MarketingAutomation→SelfTuneBlog"
+selftune workflows save 1
+```
+When saved, selftune appends a subsection to `## Workflows` in the target
+SKILL.md. The subsection name is derived from the skill chain
+(`Copywriting-MarketingAutomation-SelfTuneBlog`) and includes
+discovered-source metadata with occurrence count and synergy score.
+## Output Format
+### Human-readable output
+The number prefix (for example, `1.`) is the 1-based index you can pass to
+`selftune workflows save <index>`.
+```text
+Discovered Workflows (from 450 sessions):
+  1. Copywriting → MarketingAutomation → SelfTuneBlog
+     Occurrences: 12 | Synergy: 0.72 | Consistency: 92% | Completion: 83%
+     Common trigger: "write and publish a blog post"
+```
+### JSON output
+```json
+{
+  "workflows": [
+    {
+      "workflow_id": "Copywriting→MarketingAutomation→SelfTuneBlog",
+      "skills": ["Copywriting", "MarketingAutomation", "SelfTuneBlog"],
+      "occurrence_count": 12,
+      "avg_errors": 0.5,
+      "avg_errors_individual": 1.8,
+      "synergy_score": 0.72,
+      "representative_query": "write and publish a blog post",
+      "sequence_consistency": 0.92,
+      "completion_rate": 0.83,
+      "first_seen": "2026-03-01T10:00:00Z",
+      "last_seen": "2026-03-08T16:30:00Z",
+      "session_ids": ["s1", "s2"]
+    }
+  ],
+  "total_sessions_analyzed": 450,
+  "generated_at": "2026-03-09T12:00:00.000Z"
+}
+```
+## How It Works
+1. Reads `session_telemetry_log.jsonl` and `skill_usage_log.jsonl`
+2. Orders skill usage inside each session by timestamp
+3. Deduplicates consecutive same-skill entries
+4. Keeps only sequences with 2+ skills
+5. Counts repeated ordered sequences across sessions
+6. Computes workflow metrics:
+   - `synergy_score` — whether the sequence performs better together than solo
+     baselines, where each skill's solo baseline is its average error rate from
+     single-skill sessions and the workflow uses the max of those solo rates
+   - `sequence_consistency` — how stable the ordering is for the same skill
+     set
+   - `completion_rate` — how often all skills in the sequence fire
+7. Filters by `--min-occurrences` and optional `--skill`
+8. Optionally appends the chosen workflow to SKILL.md via `save`
+## Interpreting Results
+- `synergy_score > 0.3`: Strong candidate for codifying as a workflow.
+- `synergy_score < -0.3`: The sequence adds friction or conflicts.
+- Low `sequence_consistency`: Same skills appear in multiple orders; the
+  pattern may still be unstable.
+- Low `completion_rate`: One or more skills in the sequence often are not
+  invoked, so the full workflow does not complete.
+## Common Patterns
+- "Which skills always get used together?"
+  `selftune workflows`
+- "Only show workflows involving Deploy"
+  `selftune workflows --skill Deploy`
+- "Focus on recent behavior"
+  `selftune workflows --window 20`
+- "Save the top workflow into SKILL.md"
+  `selftune workflows save 1 --skill-path /path/to/SKILL.md`
+- "Save a specific discovered workflow by ID"
+  `selftune workflows save "Copywriting→MarketingAutomation→SelfTuneBlog"`

package/skill/assets/activation-rules-default.json ADDED Viewed

@@ -0,0 +1,26 @@
+{
+  "_readme": "Default activation rules for selftune auto-activation. Copy to ~/.selftune/activation-rules.json to customize.",
+  "_note": "These defaults are bundled inside the installed skill so setup does not depend on repository-level templates.",
+  "rules": [
+    {
+      "id": "post-session-diagnostic",
+      "enabled": true,
+      "description": "Suggest `selftune last` when session has >2 unmatched queries"
+    },
+    {
+      "id": "grading-threshold-breach",
+      "enabled": true,
+      "description": "Suggest `selftune evolve` when session pass rate < 60%"
+    },
+    {
+      "id": "stale-evolution",
+      "enabled": true,
+      "description": "Suggest `selftune evolve` when no evolution in >7 days and pending false negatives exist"
+    },
+    {
+      "id": "regression-detected",
+      "enabled": true,
+      "description": "Suggest `selftune rollback` when monitoring detects a regression"
+    }
+  ]
+}

package/skill/assets/multi-skill-settings.json ADDED Viewed

@@ -0,0 +1,63 @@
+{
+  "_readme": "Claude settings template for multi-skill selftune projects. Merge into ~/.claude/settings.json.",
+  "_usage": "These hooks use npx selftune, which works regardless of installation path.",
+  "_note": "Multi-skill projects use activation rules to route queries to the correct skill. See assets/activation-rules-default.json.",
+  "hooks": {
+    "UserPromptSubmit": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx selftune hook prompt-log",
+            "timeout": 5
+          },
+          {
+            "type": "command",
+            "command": "npx selftune hook auto-activate",
+            "timeout": 5
+          }
+        ]
+      }
+    ],
+    "PreToolUse": [
+      {
+        "matcher": "Write|Edit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx selftune hook skill-change-guard",
+            "timeout": 5
+          },
+          {
+            "type": "command",
+            "command": "npx selftune hook evolution-guard",
+            "timeout": 5
+          }
+        ]
+      }
+    ],
+    "PostToolUse": [
+      {
+        "matcher": "Read",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx selftune hook skill-eval",
+            "timeout": 5
+          }
+        ]
+      }
+    ],
+    "Stop": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx selftune hook session-stop",
+            "timeout": 15
+          }
+        ]
+      }
+    ]
+  }
+}

package/skill/assets/single-skill-settings.json ADDED Viewed

@@ -0,0 +1,57 @@
+{
+  "_readme": "Claude settings template for single-skill selftune projects. Merge into ~/.claude/settings.json.",
+  "_usage": "These hooks use npx selftune, which works regardless of installation path.",
+  "hooks": {
+    "UserPromptSubmit": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx selftune hook prompt-log",
+            "timeout": 5
+          },
+          {
+            "type": "command",
+            "command": "npx selftune hook auto-activate",
+            "timeout": 5
+          }
+        ]
+      }
+    ],
+    "PreToolUse": [
+      {
+        "matcher": "Write|Edit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx selftune hook skill-change-guard",
+            "timeout": 5
+          }
+        ]
+      }
+    ],
+    "PostToolUse": [
+      {
+        "matcher": "Read",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx selftune hook skill-eval",
+            "timeout": 5
+          }
+        ]
+      }
+    ],
+    "Stop": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "npx selftune hook session-stop",
+            "timeout": 15
+          }
+        ]
+      }
+    ]
+  }
+}

package/skill/references/invocation-taxonomy.md CHANGED Viewed

@@ -1,7 +1,7 @@
 # Invocation Taxonomy Reference
 How selftune classifies the ways users trigger (or should trigger) a skill.
-Used by the `evals` command and referenced by evolution workflows to understand
+Used by the `selftune eval generate` command and referenced by evolution workflows to understand
 coverage gaps.
 ---
@@ -92,7 +92,7 @@ The invocation taxonomy directly drives the evolution feedback loop:
 ### Missed Implicit = Undertriggering
-When `evals` shows implicit queries that don't trigger the skill, the
+When `selftune eval generate` shows implicit queries that don't trigger the skill, the
 description is too narrow. The `evolve` command will:
 1. Extract the missed implicit patterns
 2. Propose description changes that cover them