npm - selftune - Versions diffs - 0.1.4 → 0.2.1 - Mend

selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

package/.claude/agents/diagnosis-analyst.md +156 -0
package/.claude/agents/evolution-reviewer.md +180 -0
package/.claude/agents/integration-guide.md +212 -0
package/.claude/agents/pattern-analyst.md +160 -0
package/CHANGELOG.md +46 -1
package/README.md +105 -257
package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
package/apps/local-dashboard/dist/favicon.png +0 -0
package/apps/local-dashboard/dist/index.html +17 -0
package/apps/local-dashboard/dist/logo.png +0 -0
package/apps/local-dashboard/dist/logo.svg +9 -0
package/assets/BeforeAfter.gif +0 -0
package/assets/FeedbackLoop.gif +0 -0
package/assets/logo.svg +9 -0
package/assets/skill-health-badge.svg +20 -0
package/cli/selftune/activation-rules.ts +171 -0
package/cli/selftune/badge/badge-data.ts +108 -0
package/cli/selftune/badge/badge-svg.ts +212 -0
package/cli/selftune/badge/badge.ts +99 -0
package/cli/selftune/canonical-export.ts +183 -0
package/cli/selftune/constants.ts +103 -1
package/cli/selftune/contribute/bundle.ts +314 -0
package/cli/selftune/contribute/contribute.ts +214 -0
package/cli/selftune/contribute/sanitize.ts +162 -0
package/cli/selftune/cron/setup.ts +266 -0
package/cli/selftune/dashboard-contract.ts +202 -0
package/cli/selftune/dashboard-server.ts +1049 -0
package/cli/selftune/dashboard.ts +43 -156
package/cli/selftune/eval/baseline.ts +248 -0
package/cli/selftune/eval/composability-v2.ts +273 -0
package/cli/selftune/eval/composability.ts +117 -0
package/cli/selftune/eval/generate-unit-tests.ts +143 -0
package/cli/selftune/eval/hooks-to-evals.ts +101 -16
package/cli/selftune/eval/import-skillsbench.ts +221 -0
package/cli/selftune/eval/synthetic-evals.ts +172 -0
package/cli/selftune/eval/unit-test-cli.ts +152 -0
package/cli/selftune/eval/unit-test.ts +196 -0
package/cli/selftune/evolution/deploy-proposal.ts +142 -1
package/cli/selftune/evolution/evidence.ts +26 -0
package/cli/selftune/evolution/evolve-body.ts +586 -0
package/cli/selftune/evolution/evolve.ts +825 -116
package/cli/selftune/evolution/extract-patterns.ts +105 -16
package/cli/selftune/evolution/pareto.ts +314 -0
package/cli/selftune/evolution/propose-body.ts +171 -0
package/cli/selftune/evolution/propose-description.ts +100 -2
package/cli/selftune/evolution/propose-routing.ts +166 -0
package/cli/selftune/evolution/refine-body.ts +141 -0
package/cli/selftune/evolution/rollback.ts +21 -4
package/cli/selftune/evolution/validate-body.ts +254 -0
package/cli/selftune/evolution/validate-proposal.ts +257 -35
package/cli/selftune/evolution/validate-routing.ts +177 -0
package/cli/selftune/grading/auto-grade.ts +200 -0
package/cli/selftune/grading/grade-session.ts +513 -42
package/cli/selftune/grading/pre-gates.ts +104 -0
package/cli/selftune/grading/results.ts +42 -0
package/cli/selftune/hooks/auto-activate.ts +185 -0
package/cli/selftune/hooks/evolution-guard.ts +165 -0
package/cli/selftune/hooks/prompt-log.ts +172 -2
package/cli/selftune/hooks/session-stop.ts +123 -3
package/cli/selftune/hooks/skill-change-guard.ts +112 -0
package/cli/selftune/hooks/skill-eval.ts +119 -3
package/cli/selftune/index.ts +415 -48
package/cli/selftune/ingestors/claude-replay.ts +377 -0
package/cli/selftune/ingestors/codex-rollout.ts +345 -46
package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
package/cli/selftune/init.ts +376 -16
package/cli/selftune/last.ts +14 -5
package/cli/selftune/localdb/db.ts +63 -0
package/cli/selftune/localdb/materialize.ts +428 -0
package/cli/selftune/localdb/queries.ts +376 -0
package/cli/selftune/localdb/schema.ts +204 -0
package/cli/selftune/memory/writer.ts +447 -0
package/cli/selftune/monitoring/watch.ts +90 -16
package/cli/selftune/normalization.ts +682 -0
package/cli/selftune/observability.ts +19 -44
package/cli/selftune/orchestrate.ts +1073 -0
package/cli/selftune/quickstart.ts +203 -0
package/cli/selftune/repair/skill-usage.ts +576 -0
package/cli/selftune/schedule.ts +561 -0
package/cli/selftune/status.ts +59 -33
package/cli/selftune/sync.ts +627 -0
package/cli/selftune/types.ts +525 -5
package/cli/selftune/utils/canonical-log.ts +45 -0
package/cli/selftune/utils/frontmatter.ts +217 -0
package/cli/selftune/utils/hooks.ts +41 -0
package/cli/selftune/utils/html.ts +27 -0
package/cli/selftune/utils/llm-call.ts +103 -19
package/cli/selftune/utils/math.ts +10 -0
package/cli/selftune/utils/query-filter.ts +139 -0
package/cli/selftune/utils/skill-discovery.ts +340 -0
package/cli/selftune/utils/skill-log.ts +68 -0
package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
package/cli/selftune/utils/transcript.ts +307 -26
package/cli/selftune/utils/trigger-check.ts +89 -0
package/cli/selftune/utils/tui.ts +156 -0
package/cli/selftune/workflows/discover.ts +254 -0
package/cli/selftune/workflows/skill-md-writer.ts +288 -0
package/cli/selftune/workflows/workflows.ts +188 -0
package/package.json +28 -11
package/packages/telemetry-contract/README.md +11 -0
package/packages/telemetry-contract/fixtures/golden.json +87 -0
package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
package/packages/telemetry-contract/index.ts +1 -0
package/packages/telemetry-contract/package.json +19 -0
package/packages/telemetry-contract/src/index.ts +2 -0
package/packages/telemetry-contract/src/types.ts +163 -0
package/packages/telemetry-contract/src/validators.ts +109 -0
package/skill/SKILL.md +180 -33
package/skill/Workflows/AutoActivation.md +145 -0
package/skill/Workflows/Badge.md +124 -0
package/skill/Workflows/Baseline.md +144 -0
package/skill/Workflows/Composability.md +107 -0
package/skill/Workflows/Contribute.md +94 -0
package/skill/Workflows/Cron.md +132 -0
package/skill/Workflows/Dashboard.md +214 -0
package/skill/Workflows/Doctor.md +63 -14
package/skill/Workflows/Evals.md +110 -18
package/skill/Workflows/EvolutionMemory.md +154 -0
package/skill/Workflows/Evolve.md +181 -21
package/skill/Workflows/EvolveBody.md +159 -0
package/skill/Workflows/Grade.md +36 -31
package/skill/Workflows/ImportSkillsBench.md +117 -0
package/skill/Workflows/Ingest.md +142 -21
package/skill/Workflows/Initialize.md +91 -23
package/skill/Workflows/Orchestrate.md +139 -0
package/skill/Workflows/Replay.md +91 -0
package/skill/Workflows/Rollback.md +23 -4
package/skill/Workflows/Schedule.md +61 -0
package/skill/Workflows/Sync.md +88 -0
package/skill/Workflows/UnitTest.md +150 -0
package/skill/Workflows/Watch.md +33 -1
package/skill/Workflows/Workflows.md +129 -0
package/skill/assets/activation-rules-default.json +26 -0
package/skill/assets/multi-skill-settings.json +63 -0
package/skill/assets/single-skill-settings.json +57 -0
package/skill/references/invocation-taxonomy.md +2 -2
package/skill/references/logs.md +164 -2
package/skill/references/setup-patterns.md +65 -0
package/skill/references/version-history.md +40 -0
package/skill/settings_snippet.json +23 -0
package/templates/activation-rules-default.json +27 -0
package/templates/multi-skill-settings.json +64 -0
package/templates/single-skill-settings.json +58 -0
package/dashboard/index.html +0 -1119

package/packages/telemetry-contract/src/validators.ts ADDED Viewed

@@ -0,0 +1,109 @@
+import {
+  CANONICAL_CAPTURE_MODES,
+  CANONICAL_COMPLETION_STATUSES,
+  CANONICAL_INVOCATION_MODES,
+  CANONICAL_PLATFORMS,
+  CANONICAL_PROMPT_KINDS,
+  CANONICAL_RECORD_KINDS,
+  CANONICAL_SCHEMA_VERSION,
+  CANONICAL_SOURCE_SESSION_KINDS,
+  type CanonicalRawSourceRef,
+  type CanonicalRecord,
+} from "./types.js";
+function isObject(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+function hasString(value: Record<string, unknown>, key: string): boolean {
+  return typeof value[key] === "string" && value[key].length > 0;
+}
+function includesValue<T extends readonly string[]>(values: T, value: unknown): value is T[number] {
+  return typeof value === "string" && values.includes(value);
+}
+function isFiniteNumber(value: unknown): value is number {
+  return typeof value === "number" && Number.isFinite(value);
+}
+function isStringArray(value: unknown): value is string[] {
+  return Array.isArray(value) && value.every((item) => typeof item === "string");
+}
+function isNumberRecord(value: unknown): value is Record<string, number> {
+  return isObject(value) && Object.values(value).every(isFiniteNumber);
+}
+function hasSessionScope(value: Record<string, unknown>): boolean {
+  return (
+    includesValue(CANONICAL_SOURCE_SESSION_KINDS, value.source_session_kind) &&
+    hasString(value, "session_id")
+  );
+}
+export function isCanonicalRawSourceRef(value: unknown): value is CanonicalRawSourceRef {
+  return isObject(value);
+}
+export function isCanonicalRecord(value: unknown): value is CanonicalRecord {
+  if (!isObject(value)) return false;
+  if (value.schema_version !== CANONICAL_SCHEMA_VERSION) return false;
+  if (!includesValue(CANONICAL_RECORD_KINDS, value.record_kind)) return false;
+  if (!includesValue(CANONICAL_PLATFORMS, value.platform)) return false;
+  if (!includesValue(CANONICAL_CAPTURE_MODES, value.capture_mode)) return false;
+  if (!hasString(value, "normalizer_version")) return false;
+  if (!hasString(value, "normalized_at")) return false;
+  if (!isCanonicalRawSourceRef(value.raw_source_ref)) return false;
+  switch (value.record_kind) {
+    case "session":
+      return (
+        hasSessionScope(value) &&
+        (value.completion_status === undefined ||
+          includesValue(CANONICAL_COMPLETION_STATUSES, value.completion_status))
+      );
+    case "prompt":
+      return (
+        hasSessionScope(value) &&
+        hasString(value, "prompt_id") &&
+        hasString(value, "occurred_at") &&
+        hasString(value, "prompt_text") &&
+        includesValue(CANONICAL_PROMPT_KINDS, value.prompt_kind) &&
+        typeof value.is_actionable === "boolean"
+      );
+    case "skill_invocation":
+      return (
+        hasSessionScope(value) &&
+        hasString(value, "skill_invocation_id") &&
+        hasString(value, "occurred_at") &&
+        (value.matched_prompt_id === undefined || hasString(value, "matched_prompt_id")) &&
+        hasString(value, "skill_name") &&
+        includesValue(CANONICAL_INVOCATION_MODES, value.invocation_mode) &&
+        typeof value.triggered === "boolean" &&
+        isFiniteNumber(value.confidence)
+      );
+    case "execution_fact":
+      return (
+        hasSessionScope(value) &&
+        hasString(value, "occurred_at") &&
+        isNumberRecord(value.tool_calls_json) &&
+        isFiniteNumber(value.total_tool_calls) &&
+        isStringArray(value.bash_commands_redacted) &&
+        isFiniteNumber(value.assistant_turns) &&
+        isFiniteNumber(value.errors_encountered) &&
+        (value.completion_status === undefined ||
+          includesValue(CANONICAL_COMPLETION_STATUSES, value.completion_status))
+      );
+    case "normalization_run":
+      return (
+        hasString(value, "run_id") &&
+        hasString(value, "run_at") &&
+        isFiniteNumber(value.raw_records_seen) &&
+        isFiniteNumber(value.canonical_records_written) &&
+        typeof value.repair_applied === "boolean"
+      );
+    default:
+      return false;
+  }
+}

package/skill/SKILL.md CHANGED Viewed

@@ -1,11 +1,17 @@
 ---
 name: selftune
 description: >
-  Skill observability and continuous improvement. Use when the user wants to:
+  Self-improving skills toolkit. Use when the user wants to:
   grade a session, generate evals, check undertriggering, evolve a skill
-  description, rollback an evolution, monitor post-deploy performance, check
-  skill health status, view last session insight, open the dashboard, run
-  health checks, or ingest sessions from Codex/OpenCode.
+  description or full body, evolve routing tables, rollback an evolution,
+  monitor post-deploy performance, check skill health status, view last
+  session insight, open the dashboard, serve the live dashboard, run health
+  checks, manage activation rules, ingest sessions from Codex/OpenCode/OpenClaw,
+  replay Claude Code transcripts, contribute anonymized data to the community,
+  set up autonomous cron jobs, manage evolution memory, configure auto-activation
+  suggestions, diagnose underperforming skills, analyze cross-skill patterns,
+  review evolution proposals, measure baseline lift, run skill unit tests,
+  analyze skill composability, or import SkillsBench evaluation corpora.
 ---
 # selftune
@@ -13,6 +19,11 @@ description: >
 Observe real agent sessions, detect missed triggers, grade execution quality,
 and evolve skill descriptions toward the language real users actually use.
+**You are the operator.** The user installed this skill so YOU can manage their
+skill health autonomously. They will say things like "set up selftune",
+"improve my skills", or "how are my skills doing?" — and you route to the
+correct workflow below. The user does not run CLI commands directly; you do.
 ## Bootstrap
 If `~/.selftune/config.json` does not exist, read `Workflows/Initialize.md`
@@ -26,57 +37,131 @@ selftune <command> [options]
 ```
 Most commands output deterministic JSON. Parse JSON output for machine-readable commands.
-`selftune dashboard` is an exception: it generates an HTML artifact and may print
-informational progress lines.
+`selftune dashboard` is an exception: `--export` generates an HTML artifact, while
+`--serve` starts a local server; both may print informational progress lines.
 ## Quick Reference
 ```bash
-selftune grade    --skill <name> [--expectations "..."] [--agent <name>]
-selftune evals    --skill <name> [--list-skills] [--stats] [--max N]
-selftune evolve   --skill <name> --skill-path <path> [--dry-run]
-selftune rollback --skill <name> --skill-path <path> [--proposal-id <id>]
+# Ingest group
+selftune ingest claude   [--since DATE] [--dry-run] [--force] [--verbose]
+selftune ingest codex                                                          # (experimental)
+selftune ingest opencode                                                       # (experimental)
+selftune ingest openclaw [--agents-dir PATH] [--since DATE] [--dry-run] [--force] [--verbose]  # (experimental)
+selftune ingest wrap-codex -- <codex args>                                     # (experimental)
+# Grade group
+selftune grade auto      --skill <name> [--expectations "..."] [--agent <name>]
+selftune grade baseline  --skill <name> --skill-path <path> [--eval-set <path>] [--agent <name>]
+# Evolve group
+selftune evolve          --skill <name> --skill-path <path> [--dry-run]
+selftune evolve body     --skill <name> --skill-path <path> --target <routing_table|full_body> [--dry-run]
+selftune evolve rollback --skill <name> --skill-path <path> [--proposal-id <id>]
+# Eval group
+selftune eval generate      --skill <name> [--list-skills] [--stats] [--max N]
+selftune eval unit-test      --skill <name> --tests <path> [--run-agent] [--generate]
+selftune eval import         --dir <path> --skill <name> --output <path> [--match-strategy exact|fuzzy]
+selftune eval composability  --skill <name> [--window N] [--telemetry-log <path>]
+# Other commands
 selftune watch    --skill <name> --skill-path <path> [--auto-rollback]
 selftune status
 selftune last
 selftune doctor
-selftune dashboard [--export] [--out FILE]
-selftune ingest-codex
-selftune ingest-opencode
-selftune wrap-codex -- <codex args>
+selftune dashboard [--export] [--out FILE] [--serve]
+selftune dashboard --serve [--port <port>]
+selftune contribute [--skill NAME] [--preview] [--sanitize LEVEL] [--submit]
+selftune cron setup [--dry-run]                         # auto-detect platform (cron/launchd/systemd)
+selftune cron setup --platform openclaw [--dry-run] [--tz <timezone>]  # OpenClaw-specific
+selftune cron list
+selftune cron remove [--dry-run]
 ```
 ## Workflow Routing
 | Trigger keywords | Workflow | File |
 |------------------|----------|------|
-| grade, score, evaluate, assess session | Grade | Workflows/Grade.md |
-| evals, eval set, undertriggering, skill stats | Evals | Workflows/Evals.md |
-| evolve, improve, triggers, catch more queries | Evolve | Workflows/Evolve.md |
-| rollback, undo, restore, revert evolution | Rollback | Workflows/Rollback.md |
-| watch, monitor, regression, post-deploy, performing | Watch | Workflows/Watch.md |
-| doctor, health, hooks, broken, diagnose | Doctor | Workflows/Doctor.md |
-| ingest, import, codex logs, opencode, wrap codex | Ingest | Workflows/Ingest.md |
-| init, setup, bootstrap, first time | Initialize | Workflows/Initialize.md |
-| status, health summary, skill health, pass rates, how are skills | Status | *(direct command — no workflow file)* |
-| last, last session, recent session, what happened | Last | *(direct command — no workflow file)* |
-| dashboard, visual, open dashboard, skill grid | Dashboard | *(direct command — no workflow file)* |
+| grade, score, evaluate, assess session, auto-grade | Grade † | Workflows/Grade.md |
+| evals, eval set, undertriggering, skill stats, eval generate | Evals | Workflows/Evals.md |
+| evolve, improve, optimize skills, make skills better, triggers, catch more queries | Evolve † | Workflows/Evolve.md |
+| evolve rollback, undo, restore, revert evolution, go back, undo last change | Rollback | Workflows/Rollback.md |
+| watch, monitor, regression, post-deploy, performing, keep an eye on | Watch † | Workflows/Watch.md |
+| doctor, health, hooks, broken, diagnose, not working, something wrong | Doctor | Workflows/Doctor.md |
+| ingest, import, codex logs, opencode, openclaw, wrap codex, ingest claude | Ingest † | Workflows/Ingest.md |
+| ingest claude, backfill, claude transcripts, historical sessions | Replay | Workflows/Replay.md |
+| contribute, share, community, export data, anonymized, give back, help others | Contribute | Workflows/Contribute.md |
+| init, setup, set up, bootstrap, first time, install, configure selftune | Initialize | Workflows/Initialize.md |
+| cron, schedule, autonomous, automate evolution, run automatically, run on its own | Cron | Workflows/Cron.md |
+| auto-activate, suggestions, activation rules, nag, why suggest | AutoActivation | Workflows/AutoActivation.md |
+| dashboard, visual, open dashboard, show dashboard, skill grid, serve dashboard, live dashboard | Dashboard | Workflows/Dashboard.md |
+| evolution memory, context memory, session continuity, what happened last | EvolutionMemory | Workflows/EvolutionMemory.md |
+| evolve body, evolve routing, full body evolution, rewrite skill, teacher student | EvolveBody | Workflows/EvolveBody.md |
+| grade baseline, baseline lift, adds value, skill value, no-skill comparison | Baseline | Workflows/Baseline.md |
+| eval unit-test, skill test, test skill, generate tests, run tests, assertions | UnitTest | Workflows/UnitTest.md |
+| eval composability, co-occurrence, skill conflicts, skills together, conflict score | Composability | Workflows/Composability.md |
+| eval import, skillsbench, external evals, benchmark tasks, import corpus | ImportSkillsBench | Workflows/ImportSkillsBench.md |
+| status, health summary, skill health, pass rates, how are skills, skills working, skills doing, run selftune, start selftune | Status | *(direct command — no workflow file)* |
+| last, last session, recent session, what happened, what changed, what did selftune do | Last | *(direct command — no workflow file)* |
+Workflows marked with † also run autonomously via `selftune orchestrate` without user interaction.
+## Interactive Configuration
+Before running mutating workflows (evolve, evolve-body, evals, baseline), present
+a pre-flight configuration prompt to the user. This gives them control over
+execution mode, model selection, and key parameters.
+### Pre-Flight Pattern
+Each mutating workflow has a **Pre-Flight Configuration** step. Follow this pattern:
+1. Present a summary of what the command will do
+2. Show numbered options with `(recommended)` markers for suggested defaults
+3. Ask the user to pick options or say "use defaults" / "go with defaults"
+4. Show a confirmation summary of selected options before executing
+### Model Tier Reference
+When presenting model choices, use this table:
+| Tier | Model | Speed | Cost | Quality | Best for |
+|------|-------|-------|------|---------|----------|
+| Fast | `haiku` | ~2s/call | $ | Good | Iteration loops, bulk validation |
+| Balanced | `sonnet` | ~5s/call | $$ | Great | Single-pass proposals, gate checks |
+| Best | `opus` | ~10s/call | $$$ | Excellent | High-stakes final validation |
+### Quick Path
+If the user says "use defaults", "just do it", or similar — skip the pre-flight
+and run with recommended defaults. The pre-flight is for users who want control,
+not a mandatory gate.
+### Workflows That Skip Pre-Flight
+These read-only or simple workflows run immediately without prompting:
+`status`, `last`, `doctor`, `dashboard`, `watch`, `evolve rollback`,
+`grade auto`, `ingest *`, `contribute`, `cron`, `eval composability`,
+`eval unit-test`, `eval import`.
 ## The Feedback Loop
-```
-Observe --> Detect --> Diagnose --> Propose --> Validate --> Deploy --> Watch
+```text
+Observe --> Detect --> Diagnose --> Propose --> Validate --> Audit --> Deploy --> Watch --> Rollback
    |                                                                    |
    +--------------------------------------------------------------------+
 ```
 1. **Observe** -- Hooks capture every session (queries, triggers, metrics)
-2. **Detect** -- `evals` finds missed triggers across invocation types
-3. **Diagnose** -- `grade` evaluates session quality with evidence
-4. **Propose** -- `evolve` generates description improvements
+2. **Detect** -- `selftune eval generate` extracts missed-trigger patterns across invocation types
+3. **Diagnose** -- `selftune grade` evaluates session quality with evidence
+4. **Propose** -- `selftune evolve` generates description improvements
 5. **Validate** -- Evolution is tested against the eval set
-6. **Deploy** -- Updated description replaces the original (with backup)
-7. **Watch** -- `watch` monitors for regressions post-deploy
+6. **Audit** -- Persist proposal, evidence, and decision metadata for traceability
+7. **Deploy** -- Updated description replaces the original (with backup)
+8. **Watch** -- `selftune watch` monitors for regressions post-deploy
+9. **Rollback** -- `selftune evolve rollback` restores the previous version when regressions are detected
 ## Resource Index
@@ -94,7 +179,30 @@ Observe --> Detect --> Diagnose --> Propose --> Validate --> Deploy --> Watch
 | `Workflows/Rollback.md` | Undo an evolution, restore previous description |
 | `Workflows/Watch.md` | Post-deploy regression monitoring |
 | `Workflows/Doctor.md` | Health checks on logs, hooks, schema |
-| `Workflows/Ingest.md` | Import sessions from Codex and OpenCode |
+| `Workflows/Ingest.md` | Import sessions from Codex, OpenCode, and OpenClaw |
+| `Workflows/Replay.md` | Backfill logs from Claude Code transcripts |
+| `Workflows/Contribute.md` | Export anonymized data for community contribution |
+| `Workflows/Cron.md` | Scheduling & automation (cron/launchd/systemd/OpenClaw) |
+| `Workflows/AutoActivation.md` | Auto-activation hook behavior and rules |
+| `Workflows/Dashboard.md` | Dashboard modes: static, export, live server |
+| `Workflows/EvolutionMemory.md` | Evolution memory system for session continuity |
+| `Workflows/EvolveBody.md` | Full body and routing table evolution |
+| `Workflows/Baseline.md` | No-skill baseline comparison and lift measurement |
+| `Workflows/UnitTest.md` | Skill-level unit test runner and generator |
+| `Workflows/Composability.md` | Multi-skill co-occurrence conflict analysis |
+| `Workflows/ImportSkillsBench.md` | SkillsBench task corpus importer |
+## Specialized Agents
+selftune provides focused agents for deeper analysis. These live in
+`.claude/agents/` and can be spawned as subagents for specialized tasks.
+| Trigger keywords | Agent | Purpose | When to spawn |
+|------------------|-------|---------|---------------|
+| diagnose, root cause, why failing, skill failure, debug performance | diagnosis-analyst | Deep-dive analysis of underperforming skills | After doctor finds persistent issues, grades are consistently low, or status shows CRITICAL/WARNING |
+| patterns, conflicts, cross-skill, overlap, trigger conflicts, optimize skills | pattern-analyst | Cross-skill pattern analysis and conflict detection | When user asks about cross-skill conflicts or composability scores indicate moderate-to-severe conflicts |
+| review evolution, check proposal, safe to deploy, approve evolution | evolution-reviewer | Safety gate review of pending evolution proposals | Before deploying an evolution in interactive mode, especially for high-stakes or low-confidence proposals |
+| set up selftune, integrate, configure project, install selftune | integration-guide | Guided interactive setup for specific project types | For complex project structures (monorepo, multi-skill, mixed agent platforms) |
 ## Examples
@@ -110,7 +218,46 @@ Observe --> Detect --> Diagnose --> Propose --> Validate --> Deploy --> Watch
 - "How are my skills performing?"
 - "What happened in my last session?"
 - "Open the selftune dashboard"
+- "Serve the dashboard at http://localhost:3141"
 - "Show skill health status"
+- "Replay my Claude Code transcripts"
+- "Backfill logs from historical sessions"
+- "Contribute my selftune data to the community"
+- "Share anonymized skill data"
+- "Set up cron jobs for autonomous evolution"
+- "Schedule selftune to run automatically"
+- "Ingest my OpenClaw sessions"
+- "Why is selftune suggesting things?"
+- "Customize activation rules"
+- "Start the live dashboard"
+- "Serve the dashboard on port 8080"
+- "What happened in the last evolution?"
+- "Read the evolution memory"
+- "Why is this skill underperforming?"
+- "Are there conflicts between my skills?"
+- "Review this evolution before deploying"
+- "Set up selftune for my project"
+- "Evolve the full body of the Research skill"
+- "Rewrite the routing table for pptx"
+- "Does this skill add value over no-skill baseline?"
+- "Measure baseline lift for the Research skill"
+- "Generate unit tests for the pptx skill"
+- "Run skill unit tests"
+- "Which skills conflict with each other?"
+- "Analyze composability for the Research skill"
+- "Import SkillsBench tasks for my skill"
+- "Install selftune"
+- "Configure selftune for this project"
+- "Make my skills better"
+- "Optimize my skills"
+- "Are my skills working?"
+- "Show me the dashboard"
+- "What changed since last time?"
+- "What did selftune do?"
+- "Run selftune"
+- "Start selftune"
+- "Go back to the previous version"
+- "Undo the last change"
 ## Negative Examples

package/skill/Workflows/AutoActivation.md ADDED Viewed

@@ -0,0 +1,145 @@
+# selftune Auto-Activation Workflow
+Automatically suggests selftune commands during a session based on
+activation rules. Runs as a `UserPromptSubmit` hook, evaluates rules
+against session context, and outputs advisory suggestions to stderr.
+## How It Works
+The `hooks/auto-activate.ts` script runs on every `UserPromptSubmit` event.
+It reads session telemetry, query logs, and evolution audit data, then
+evaluates a set of activation rules against the current context. When a
+rule fires, the suggestion is written to stderr (shown to Claude as a
+system message). The hook always exits 0 -- suggestions are advisory and
+never block the user.
+Flow:
+1. Claude Code triggers `UserPromptSubmit` hook
+2. Hook receives `{ session_id }` payload on stdin
+3. Checks PAI coexistence (see below)
+4. Loads default activation rules
+5. Evaluates each rule against session context
+6. Outputs suggestions to stderr (if any)
+7. Exits 0
+## PAI Coexistence
+If PAI's `skill-activation-prompt` hook is detected in
+`~/.claude/settings.json`, selftune skips all suggestions. PAI handles
+skill-level activation; selftune handles observability. This prevents
+duplicate or conflicting suggestions.
+Detection scans all hook entries in settings for any command containing
+`skill-activation-prompt`. If found, the hook exits silently.
+## Default Rules
+| Rule ID | Description | Trigger Condition | Suggestion |
+|---------|-------------|-------------------|------------|
+| `post-session-diagnostic` | Suggest diagnostic review | >2 unmatched queries in current session | `selftune last` |
+| `grading-threshold-breach` | Suggest evolution | Session pass rate < 0.6 (60%) | `selftune evolve` |
+| `stale-evolution` | Suggest evolution | >7 days since last evolution AND pending false negatives exist | `selftune evolve` |
+| `regression-detected` | Suggest rollback | Watch snapshot shows `regression_detected: true` | `selftune evolve rollback` |
+### Rule Details
+**post-session-diagnostic**: Compares query count against skill usage count
+for the current session. If the difference exceeds 2, unmatched queries
+likely indicate gaps in skill coverage.
+**grading-threshold-breach**: Reads grading result files from
+`~/.selftune/grading/result-*.json`. If the current session's pass rate
+is below 0.6, the skill description may need evolution.
+**stale-evolution**: Reads the evolution audit log to find the last
+evolution timestamp. If older than 7 days, checks
+`~/.selftune/false-negatives/pending.json` for pending false negatives.
+Both conditions must be true.
+**regression-detected**: Reads the latest monitoring snapshot from
+`~/.selftune/monitoring/latest-snapshot.json`. If `regression_detected`
+is true, suggests rollback with the skill name if available.
+## Session State Tracking
+Each rule fires at most once per session. After a suggestion is shown,
+the rule ID is recorded in session state to prevent repeated nags.
+Session state is stored at `~/.selftune/session-state-<session_id>.json`:
+```json
+{
+  "session_id": "abc-123",
+  "suggestions_shown": ["post-session-diagnostic", "grading-threshold-breach"],
+  "updated_at": "2026-03-02T10:00:00Z"
+}
+```
+State is keyed by `session_id`. If the session ID changes (new session),
+state resets automatically.
+## Customizing Rules
+Rules are defined in `cli/selftune/activation-rules.ts` as the
+`DEFAULT_RULES` array. To customize rule behavior, edit that TypeScript
+file directly. There is no runtime JSON config — the hook imports
+`DEFAULT_RULES` at evaluation time.
+Each rule conforms to the `ActivationRule` interface:
+```typescript
+interface ActivationRule {
+  id: string;
+  description: string;
+  evaluate(ctx: ActivationContext): string | null;
+}
+```
+The `ActivationContext` provides paths to all log files and the selftune
+config directory. Return a suggestion string when the rule fires, or
+`null` to skip.
+## Disabling Auto-Activation
+Remove the `auto-activate.ts` hook entry from `~/.claude/settings.json`.
+The hook is registered under `UserPromptSubmit`:
+```json
+{
+  "hooks": {
+    "UserPromptSubmit": [
+      {
+        "command": "bun run /path/to/cli/selftune/hooks/auto-activate.ts"
+      }
+    ]
+  }
+}
+```
+Delete or comment out the entry to disable all auto-activation suggestions.
+## Common Patterns
+**User wants to disable auto-suggestions**
+> Remove the auto-activate hook entry from `~/.claude/settings.json`
+> (see Disabling section above). Each rule fires at most once per session.
+**User asks why selftune suggestions appear**
+> Explain that the auto-activate hook detected an actionable condition.
+> Parse the suggestion text to identify which rule fired and report the
+> recommended action.
+**Suggestions are not appearing when expected**
+> Run `selftune doctor` to verify the hook is installed. Check that
+> `UserPromptSubmit` includes the auto-activate hook in settings.
+**PAI coexistence conflict**
+> Verify PAI's `skill-activation-prompt` hook is in `~/.claude/settings.json`.
+> If present, selftune skips all suggestions automatically. If the user
+> sees duplicates, one of the two hooks is misconfigured.
+**User wants custom activation rules**
+> Direct the user to `cli/selftune/activation-rules.ts`. New rules must
+> conform to the `ActivationRule` interface: pure filesystem readers with
+> no network calls or heavy imports.

package/skill/Workflows/Badge.md ADDED Viewed

@@ -0,0 +1,124 @@
+# Badge Command
+## When to Use
+When the user asks for a skill health badge for their README.
+## Overview
+Generate skill health badges for embedding in READMEs and documentation.
+## Usage
+```bash
+selftune badge --skill <name> [--format svg|markdown|url] [--output <path>]
+```
+## Options
+| Option | Required | Default | Description |
+|--------|----------|---------|-------------|
+| `--skill` | Yes | -- | Skill name to generate badge for |
+| `--format` | No | `svg` | Output format: `svg`, `markdown`, or `url` |
+| `--output` | No | stdout | Write output to file |
+| `--help` | No | -- | Show usage information |
+## Examples
+### Generate SVG badge
+```bash
+selftune badge --skill my-skill --format svg > badge.svg
+```
+### Get markdown for README
+```bash
+selftune badge --skill my-skill --format markdown
+```
+Output: `![Skill Health: my-skill](https://img.shields.io/badge/Skill%20Health-87%25%20%E2%86%91-4c1)`
+### Get shields.io URL
+```bash
+selftune badge --skill my-skill --format url
+```
+### Write badge to file
+```bash
+selftune badge --skill my-skill --output badge.svg
+```
+## Badge Branding
+SVG badges (both `--format svg` and dashboard routes) include the selftune logo as an inline 14px icon in the label section. The logo is embedded as a base64 data URI — no external requests needed.
+```
+[ 🔵 Skill Health (gray) ] [ 85% ↑ (green) ]
+  ^14px logo + 3px gap
+```
+Markdown and URL formats use shields.io, which renders its own badge — the logo only appears in locally-generated SVGs.
+## Badge Colors
+| Pass Rate | Color | Hex |
+|-----------|-------|-----|
+| > 80% | Green | `#4c1` |
+| 60-80% | Yellow | `#dfb317` |
+| < 60% | Red | `#e05d44` |
+| No data | Gray | `#9f9f9f` |
+## Embedding in README
+Add to your skill's README.md:
+```markdown
+![Skill Health: my-skill](https://img.shields.io/badge/Skill%20Health-87%25%20%E2%86%91-4c1)
+```
+Or use the generated SVG directly for offline rendering.
+## Dashboard Routes (Phase 2)
+The local dashboard server exposes badge and report routes:
+### GET /badge/:skillName
+Returns a live SVG badge computed from local telemetry logs.
+```
+http://localhost:<port>/badge/my-skill
+```
+- Returns `image/svg+xml` with `Cache-Control: no-cache, no-store`
+- Returns a gray "not found" badge (not JSON 404) for unknown skills
+### GET /report/:skillName
+Returns an HTML report page with pass rate, trend, session count, and embed code.
+```
+http://localhost:<port>/report/my-skill
+```
+## Hosted Service (Phase 3)
+The hosted badge service at `badge.selftune.dev` aggregates community contributions and serves badges publicly.
+### Endpoints
+| Route | Method | Description |
+|-------|--------|-------------|
+| `/badge/:skill` | GET | SVG badge from aggregated community data |
+| `/badge/:org/:skill` | GET | Organization-scoped SVG badge |
+### Embedding from hosted service
+```markdown
+![Skill Health: my-skill](https://badge.selftune.dev/badge/my-skill)
+```
+### Contributing data
+```bash
+selftune contribute --submit --skill my-skill
+```
+Uses `--endpoint` to target a custom service URL, with `--github` as fallback.