selftune 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/.claude/agents/diagnosis-analyst.md +156 -0
  2. package/.claude/agents/evolution-reviewer.md +180 -0
  3. package/.claude/agents/integration-guide.md +212 -0
  4. package/.claude/agents/pattern-analyst.md +160 -0
  5. package/CHANGELOG.md +46 -1
  6. package/README.md +105 -257
  7. package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
  8. package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
  9. package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
  10. package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
  11. package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
  12. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
  13. package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
  14. package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
  15. package/apps/local-dashboard/dist/favicon.png +0 -0
  16. package/apps/local-dashboard/dist/index.html +17 -0
  17. package/apps/local-dashboard/dist/logo.png +0 -0
  18. package/apps/local-dashboard/dist/logo.svg +9 -0
  19. package/assets/BeforeAfter.gif +0 -0
  20. package/assets/FeedbackLoop.gif +0 -0
  21. package/assets/logo.svg +9 -0
  22. package/assets/skill-health-badge.svg +20 -0
  23. package/cli/selftune/activation-rules.ts +171 -0
  24. package/cli/selftune/badge/badge-data.ts +108 -0
  25. package/cli/selftune/badge/badge-svg.ts +212 -0
  26. package/cli/selftune/badge/badge.ts +99 -0
  27. package/cli/selftune/canonical-export.ts +183 -0
  28. package/cli/selftune/constants.ts +103 -1
  29. package/cli/selftune/contribute/bundle.ts +314 -0
  30. package/cli/selftune/contribute/contribute.ts +214 -0
  31. package/cli/selftune/contribute/sanitize.ts +162 -0
  32. package/cli/selftune/cron/setup.ts +266 -0
  33. package/cli/selftune/dashboard-contract.ts +202 -0
  34. package/cli/selftune/dashboard-server.ts +1049 -0
  35. package/cli/selftune/dashboard.ts +43 -156
  36. package/cli/selftune/eval/baseline.ts +248 -0
  37. package/cli/selftune/eval/composability-v2.ts +273 -0
  38. package/cli/selftune/eval/composability.ts +117 -0
  39. package/cli/selftune/eval/generate-unit-tests.ts +143 -0
  40. package/cli/selftune/eval/hooks-to-evals.ts +101 -16
  41. package/cli/selftune/eval/import-skillsbench.ts +221 -0
  42. package/cli/selftune/eval/synthetic-evals.ts +172 -0
  43. package/cli/selftune/eval/unit-test-cli.ts +152 -0
  44. package/cli/selftune/eval/unit-test.ts +196 -0
  45. package/cli/selftune/evolution/deploy-proposal.ts +142 -1
  46. package/cli/selftune/evolution/evidence.ts +26 -0
  47. package/cli/selftune/evolution/evolve-body.ts +586 -0
  48. package/cli/selftune/evolution/evolve.ts +825 -116
  49. package/cli/selftune/evolution/extract-patterns.ts +105 -16
  50. package/cli/selftune/evolution/pareto.ts +314 -0
  51. package/cli/selftune/evolution/propose-body.ts +171 -0
  52. package/cli/selftune/evolution/propose-description.ts +100 -2
  53. package/cli/selftune/evolution/propose-routing.ts +166 -0
  54. package/cli/selftune/evolution/refine-body.ts +141 -0
  55. package/cli/selftune/evolution/rollback.ts +21 -4
  56. package/cli/selftune/evolution/validate-body.ts +254 -0
  57. package/cli/selftune/evolution/validate-proposal.ts +257 -35
  58. package/cli/selftune/evolution/validate-routing.ts +177 -0
  59. package/cli/selftune/grading/auto-grade.ts +200 -0
  60. package/cli/selftune/grading/grade-session.ts +513 -42
  61. package/cli/selftune/grading/pre-gates.ts +104 -0
  62. package/cli/selftune/grading/results.ts +42 -0
  63. package/cli/selftune/hooks/auto-activate.ts +185 -0
  64. package/cli/selftune/hooks/evolution-guard.ts +165 -0
  65. package/cli/selftune/hooks/prompt-log.ts +172 -2
  66. package/cli/selftune/hooks/session-stop.ts +123 -3
  67. package/cli/selftune/hooks/skill-change-guard.ts +112 -0
  68. package/cli/selftune/hooks/skill-eval.ts +119 -3
  69. package/cli/selftune/index.ts +415 -48
  70. package/cli/selftune/ingestors/claude-replay.ts +377 -0
  71. package/cli/selftune/ingestors/codex-rollout.ts +345 -46
  72. package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
  73. package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
  74. package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
  75. package/cli/selftune/init.ts +376 -16
  76. package/cli/selftune/last.ts +14 -5
  77. package/cli/selftune/localdb/db.ts +63 -0
  78. package/cli/selftune/localdb/materialize.ts +428 -0
  79. package/cli/selftune/localdb/queries.ts +376 -0
  80. package/cli/selftune/localdb/schema.ts +204 -0
  81. package/cli/selftune/memory/writer.ts +447 -0
  82. package/cli/selftune/monitoring/watch.ts +90 -16
  83. package/cli/selftune/normalization.ts +682 -0
  84. package/cli/selftune/observability.ts +19 -44
  85. package/cli/selftune/orchestrate.ts +1073 -0
  86. package/cli/selftune/quickstart.ts +203 -0
  87. package/cli/selftune/repair/skill-usage.ts +576 -0
  88. package/cli/selftune/schedule.ts +561 -0
  89. package/cli/selftune/status.ts +59 -33
  90. package/cli/selftune/sync.ts +627 -0
  91. package/cli/selftune/types.ts +525 -5
  92. package/cli/selftune/utils/canonical-log.ts +45 -0
  93. package/cli/selftune/utils/frontmatter.ts +217 -0
  94. package/cli/selftune/utils/hooks.ts +41 -0
  95. package/cli/selftune/utils/html.ts +27 -0
  96. package/cli/selftune/utils/llm-call.ts +103 -19
  97. package/cli/selftune/utils/math.ts +10 -0
  98. package/cli/selftune/utils/query-filter.ts +139 -0
  99. package/cli/selftune/utils/skill-discovery.ts +340 -0
  100. package/cli/selftune/utils/skill-log.ts +68 -0
  101. package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
  102. package/cli/selftune/utils/transcript.ts +307 -26
  103. package/cli/selftune/utils/trigger-check.ts +89 -0
  104. package/cli/selftune/utils/tui.ts +156 -0
  105. package/cli/selftune/workflows/discover.ts +254 -0
  106. package/cli/selftune/workflows/skill-md-writer.ts +288 -0
  107. package/cli/selftune/workflows/workflows.ts +188 -0
  108. package/package.json +28 -11
  109. package/packages/telemetry-contract/README.md +11 -0
  110. package/packages/telemetry-contract/fixtures/golden.json +87 -0
  111. package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
  112. package/packages/telemetry-contract/index.ts +1 -0
  113. package/packages/telemetry-contract/package.json +19 -0
  114. package/packages/telemetry-contract/src/index.ts +2 -0
  115. package/packages/telemetry-contract/src/types.ts +163 -0
  116. package/packages/telemetry-contract/src/validators.ts +109 -0
  117. package/skill/SKILL.md +180 -33
  118. package/skill/Workflows/AutoActivation.md +145 -0
  119. package/skill/Workflows/Badge.md +124 -0
  120. package/skill/Workflows/Baseline.md +144 -0
  121. package/skill/Workflows/Composability.md +107 -0
  122. package/skill/Workflows/Contribute.md +94 -0
  123. package/skill/Workflows/Cron.md +132 -0
  124. package/skill/Workflows/Dashboard.md +214 -0
  125. package/skill/Workflows/Doctor.md +63 -14
  126. package/skill/Workflows/Evals.md +110 -18
  127. package/skill/Workflows/EvolutionMemory.md +154 -0
  128. package/skill/Workflows/Evolve.md +181 -21
  129. package/skill/Workflows/EvolveBody.md +159 -0
  130. package/skill/Workflows/Grade.md +36 -31
  131. package/skill/Workflows/ImportSkillsBench.md +117 -0
  132. package/skill/Workflows/Ingest.md +142 -21
  133. package/skill/Workflows/Initialize.md +91 -23
  134. package/skill/Workflows/Orchestrate.md +139 -0
  135. package/skill/Workflows/Replay.md +91 -0
  136. package/skill/Workflows/Rollback.md +23 -4
  137. package/skill/Workflows/Schedule.md +61 -0
  138. package/skill/Workflows/Sync.md +88 -0
  139. package/skill/Workflows/UnitTest.md +150 -0
  140. package/skill/Workflows/Watch.md +33 -1
  141. package/skill/Workflows/Workflows.md +129 -0
  142. package/skill/assets/activation-rules-default.json +26 -0
  143. package/skill/assets/multi-skill-settings.json +63 -0
  144. package/skill/assets/single-skill-settings.json +57 -0
  145. package/skill/references/invocation-taxonomy.md +2 -2
  146. package/skill/references/logs.md +164 -2
  147. package/skill/references/setup-patterns.md +65 -0
  148. package/skill/references/version-history.md +40 -0
  149. package/skill/settings_snippet.json +23 -0
  150. package/templates/activation-rules-default.json +27 -0
  151. package/templates/multi-skill-settings.json +64 -0
  152. package/templates/single-skill-settings.json +58 -0
  153. package/dashboard/index.html +0 -1119
@@ -6,7 +6,7 @@ Records the rollback in the evolution audit log for traceability.
6
6
  ## Default Command
7
7
 
8
8
  ```bash
9
- selftune rollback --skill <name> --skill-path <path> [options]
9
+ selftune evolve rollback --skill <name> --skill-path <path> [options]
10
10
  ```
11
11
 
12
12
  ## Options
@@ -75,6 +75,16 @@ Manual restoration from version control is required.
75
75
 
76
76
  ## Steps
77
77
 
78
+ ### 0. Read Evolution Context
79
+
80
+ Before starting, read `~/.selftune/memory/context.md` for session context:
81
+ - Active evolutions and their current status
82
+ - Previous rollback history
83
+ - Last update timestamp
84
+
85
+ This provides continuity across context resets. If the file doesn't exist,
86
+ proceed normally — it will be created after the first rollback.
87
+
78
88
  ### 1. Find the Last Evolution
79
89
 
80
90
  Read `~/.claude/evolution_audit_log.jsonl` and find the most recent
@@ -85,13 +95,13 @@ If `--proposal-id` is specified, use that instead.
85
95
  ### 2. Run Rollback
86
96
 
87
97
  ```bash
88
- selftune rollback --skill pptx --skill-path /path/to/SKILL.md
98
+ selftune evolve rollback --skill pptx --skill-path /path/to/SKILL.md
89
99
  ```
90
100
 
91
101
  Or to rollback a specific proposal:
92
102
 
93
103
  ```bash
94
- selftune rollback --skill pptx --skill-path /path/to/SKILL.md --proposal-id evolve-pptx-1709125200000
104
+ selftune evolve rollback --skill pptx --skill-path /path/to/SKILL.md --proposal-id evolve-pptx-1709125200000
95
105
  ```
96
106
 
97
107
  ### 3. Verify Restoration
@@ -101,7 +111,16 @@ After rollback, verify the SKILL.md content is restored:
101
111
  - Check the audit log for the `rolled_back` entry
102
112
  - Optionally re-run evals to confirm the original pass rate
103
113
 
104
- ### 4. Post-Rollback Audit
114
+ ### 4. Update Memory
115
+
116
+ After rollback completes, the memory writer updates:
117
+ - `~/.selftune/memory/decisions.md` -- records the rollback decision and reason
118
+ - `~/.selftune/memory/context.md` -- clears the active evolution state and notes the rollback
119
+
120
+ This ensures future evolve and watch workflows have context about why the
121
+ rollback occurred, even across context window resets.
122
+
123
+ ### 5. Post-Rollback Audit
105
124
 
106
125
  The rollback is logged. Future `evolve` runs will see the rollback in the
107
126
  audit trail and can use it to avoid repeating failed evolution patterns.
@@ -0,0 +1,61 @@
1
+ # selftune Schedule Workflow
2
+
3
+ Generate ready-to-use scheduling examples for automating selftune with
4
+ standard system tools. This is the **primary automation path** — it works
5
+ on any machine without requiring a specific agent runtime.
6
+
7
+ For OpenClaw-specific scheduling, see `Workflows/Cron.md`.
8
+
9
+ ## When to Use
10
+
11
+ - Setting up selftune automation for the first time
12
+ - Generating crontab entries for a Linux/macOS server
13
+ - Creating a launchd plist for a macOS machine
14
+ - Creating a systemd timer for a Linux server
15
+ - Understanding the selftune automation loop
16
+
17
+ ## The Automation Loop
18
+
19
+ The core selftune automation loop is one command:
20
+
21
+ ```bash
22
+ selftune orchestrate
23
+ ```
24
+
25
+ `selftune orchestrate` runs source-truth sync first, selects candidate skills,
26
+ deploys validated low-risk description changes autonomously, and watches recent
27
+ deployments with auto-rollback enabled.
28
+
29
+ ## Default Command
30
+
31
+ ```bash
32
+ selftune schedule
33
+ ```
34
+
35
+ Outputs examples for all three scheduling systems (cron, launchd, systemd).
36
+
37
+ ## Flags
38
+
39
+ | Flag | Description | Default |
40
+ |------|-------------|---------|
41
+ | `--format <type>` | Output only one format: `cron`, `launchd`, or `systemd` | All formats |
42
+ | `--install` | Write and activate scheduler artifacts for the selected/default platform | Off |
43
+ | `--dry-run` | Preview installed files and activation commands without writing | Off |
44
+ | `--help` | Show help message | — |
45
+
46
+ ## Steps
47
+
48
+ 1. Run `selftune schedule` to see all examples
49
+ 2. Pick the scheduling system for your platform
50
+ 3. Install them directly with `--install`, or inspect/customize the raw snippets first
51
+
52
+ ## Alias
53
+
54
+ `selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `Workflows/Cron.md` for the full cron workflow reference.
55
+
56
+ ## Common Patterns
57
+
58
+ - **User wants quick setup on a Linux server** -- Run `selftune schedule --install --format cron`.
59
+ - **User wants setup on macOS** -- Run `selftune schedule --install --format launchd`.
60
+ - **User wants setup on a systemd-based server** -- Run `selftune schedule --install --format systemd`.
61
+ - **User mentions OpenClaw** -- Use `selftune cron setup --platform openclaw` for the OpenClaw scheduler adapter. The default product path is still `selftune schedule --install`. See `Workflows/Cron.md`.
@@ -0,0 +1,88 @@
1
+ # selftune Sync Workflow
2
+
3
+ Refresh source-truth telemetry across supported agent CLIs, then rebuild the
4
+ repaired skill-usage overlay so status, dashboard, grading, and evolution work
5
+ from real transcripts/rollouts instead of stale hook data.
6
+
7
+ ## When to Use
8
+
9
+ - Before running `status`, `dashboard`, `watch`, or `evolve` when data may be stale
10
+ - The user has run many Claude Code, Codex, OpenCode, or OpenClaw sessions since last sync
11
+ - The agent detects host logs may be polluted and needs the repaired/source-first view
12
+ - Before exporting data to cloud ingest
13
+
14
+ ## Default Command
15
+
16
+ ```bash
17
+ selftune sync
18
+ ```
19
+
20
+ ## Options
21
+
22
+ | Flag | Description |
23
+ |------|-------------|
24
+ | `--since <date>` | Only sync sessions modified on/after this date |
25
+ | `--dry-run` | Show summary without writing files |
26
+ | `--force` | Ignore per-source markers and rescan everything |
27
+ | `--no-claude` | Skip Claude transcript replay |
28
+ | `--no-codex` | Skip Codex rollout ingest |
29
+ | `--no-opencode` | Skip OpenCode ingest |
30
+ | `--no-openclaw` | Skip OpenClaw ingest |
31
+ | `--no-repair` | Skip rebuilding `skill_usage_repaired.jsonl` |
32
+
33
+ ## Output
34
+
35
+ Writes/refreshed data:
36
+ - `~/.claude/session_telemetry_log.jsonl`
37
+ - `~/.claude/all_queries_log.jsonl`
38
+ - `~/.claude/skill_usage_log.jsonl`
39
+ - `~/.claude/skill_usage_repaired.jsonl`
40
+ - per-source marker files
41
+
42
+ ## Steps
43
+
44
+ ### 1. Preview Sync
45
+
46
+ Run `selftune sync --dry-run`. The output includes per-source `scanned`
47
+ counts. Report the preview summary to the user.
48
+
49
+ ### 2. Run Sync
50
+
51
+ Run `selftune sync`. The output includes:
52
+ - Per-source `scanned`, `synced`, and `skipped` counts
53
+ - Repaired overlay totals
54
+ - Any errors or warnings
55
+
56
+ ### 3. Verify Results
57
+
58
+ Verify there are no sync errors and that per-source counters are internally
59
+ consistent (`scanned`, `synced`, `skipped`). `synced=0` is valid when no
60
+ new sessions exist since the last sync. Run `selftune doctor` only when
61
+ sync reports source/hook failures or expected active sources are missing.
62
+
63
+ ### 4. Continue to Next Workflow
64
+
65
+ After sync completes, proceed with the user's intended workflow:
66
+ `selftune status`, `selftune dashboard`, `selftune watch --sync-first`,
67
+ or `selftune evolve --sync-first`.
68
+
69
+ ## Common Patterns
70
+
71
+ **User wants to refresh telemetry data**
72
+ > Run `selftune sync`. Report per-source `scanned`, `synced`, and `skipped` counts.
73
+
74
+ **User wants to sync only recent sessions**
75
+ > Run `selftune sync --since <date>` with the user's specified date.
76
+
77
+ **User wants a full rescan from scratch**
78
+ > Run `selftune sync --force`. This ignores per-source markers and rescans
79
+ > all sessions.
80
+
81
+ **Agent needs to verify sync worked**
82
+ > Check per-source `scanned`, `synced`, and `skipped` counts. `synced=0`
83
+ > is normal when data is already up-to-date. Verify `scanned > 0` for
84
+ > expected sources to confirm sync ran successfully.
85
+
86
+ **Agent is chaining into monitoring or evolution**
87
+ > Use `selftune watch --sync-first` or `selftune evolve --sync-first` to
88
+ > refresh source truth automatically before making decisions.
@@ -0,0 +1,150 @@
1
+ # selftune Unit Test Workflow
2
+
3
+ Run or generate unit tests for individual skills. Tests verify trigger
4
+ accuracy, output content, and tool usage with deterministic assertions.
5
+
6
+ ## Default Command
7
+
8
+ ```bash
9
+ selftune eval unit-test --skill <name> --tests <path> [options]
10
+ ```
11
+
12
+ ## Options
13
+
14
+ | Flag | Description | Default |
15
+ |------|-------------|---------|
16
+ | `--skill <name>` | Skill name | Required |
17
+ | `--tests <path>` | Path to unit test JSON file | `~/.selftune/unit-tests/<skill>.json` |
18
+ | `--run-agent` | Run agent-based assertions (not just trigger checks) | Off |
19
+ | `--generate` | Generate tests from skill content instead of running | Off |
20
+ | `--skill-path <path>` | Path to SKILL.md (required for `--generate`) | None |
21
+ | `--eval-set <path>` | Eval set for failure context (used with `--generate`) | None |
22
+ | `--model <flag>` | Model flag for LLM calls | Agent default |
23
+
24
+ ## Test Format
25
+
26
+ Tests are stored as JSON arrays in `~/.selftune/unit-tests/<skill>.json`:
27
+
28
+ ```json
29
+ [
30
+ {
31
+ "test_id": "research-trigger-1",
32
+ "skill_name": "Research",
33
+ "description": "Should trigger on explicit research request",
34
+ "query": "Research the latest trends in AI safety",
35
+ "expected_trigger": true,
36
+ "assertions": [
37
+ {
38
+ "type": "trigger_check",
39
+ "value": "true",
40
+ "description": "Skill should trigger for this query"
41
+ }
42
+ ],
43
+ "tags": ["explicit", "core"],
44
+ "source": "manual"
45
+ }
46
+ ]
47
+ ```
48
+
49
+ ## Assertion Types
50
+
51
+ | Type | What it checks | Requires agent? |
52
+ |------|---------------|-----------------|
53
+ | `trigger_check` | Query triggers the skill description | No (LLM only) |
54
+ | `output_contains` | Agent output contains expected text | Yes |
55
+ | `output_matches_regex` | Agent output matches regex pattern | Yes |
56
+ | `tool_called` | Agent used a specific tool | Yes |
57
+
58
+ Trigger check assertions are cheap (single LLM call). Agent-based assertions
59
+ require `--run-agent` and run the query through the full agent.
60
+
61
+ ## Output Format
62
+
63
+ ```json
64
+ {
65
+ "skill_name": "Research",
66
+ "total": 10,
67
+ "passed": 8,
68
+ "failed": 2,
69
+ "pass_rate": 0.80,
70
+ "results": [
71
+ {
72
+ "test_id": "research-trigger-1",
73
+ "overall_passed": true,
74
+ "trigger_passed": true,
75
+ "assertion_results": [
76
+ { "type": "trigger_check", "value": "true", "passed": true, "evidence": "LLM responded YES" }
77
+ ],
78
+ "duration_ms": 450
79
+ }
80
+ ],
81
+ "ran_at": "2026-03-04T12:00:00.000Z"
82
+ }
83
+ ```
84
+
85
+ ## Steps
86
+
87
+ ### 1. Generate Tests (First Time)
88
+
89
+ If no test file exists for the skill, generate initial tests:
90
+
91
+ ```bash
92
+ selftune eval unit-test --skill Research --generate --skill-path ~/.claude/skills/Research/SKILL.md
93
+ ```
94
+
95
+ Parse the output. The LLM creates test cases covering:
96
+ - Explicit trigger queries
97
+ - Implicit trigger queries
98
+ - Contextual trigger queries
99
+ - Negative examples (should NOT trigger)
100
+
101
+ Tests are saved to `~/.selftune/unit-tests/Research.json`.
102
+
103
+ ### 2. Run Tests
104
+
105
+ Run the test suite:
106
+
107
+ ```bash
108
+ selftune eval unit-test --skill Research --tests ~/.selftune/unit-tests/Research.json
109
+ ```
110
+
111
+ By default, only `trigger_check` assertions run (fast, no agent needed).
112
+ Add `--run-agent` for full agent-based assertions.
113
+
114
+ ### 3. Parse Results
115
+
116
+ Parse the JSON output. Check `pass_rate` and investigate failures:
117
+ - Failed trigger checks -- description needs improvement (route to Evolve)
118
+ - Failed output assertions -- skill workflow needs fixes
119
+ - Failed tool assertions -- skill routing is broken
120
+
121
+ Report the pass rate and any failures to the user.
122
+
123
+ ### 4. Post-Evolution Verification
124
+
125
+ After evolving a skill, re-run unit tests to verify improvements:
126
+
127
+ ```bash
128
+ selftune eval unit-test --skill Research
129
+ ```
130
+
131
+ Compare the new `pass_rate` against the previous run. Report whether
132
+ the evolution improved trigger accuracy.
133
+
134
+ ## Common Patterns
135
+
136
+ **User asks to generate tests for a skill**
137
+ > Run `selftune eval unit-test --skill <name> --generate --skill-path <path>`.
138
+ > Parse the output and report how many tests were generated.
139
+
140
+ **User asks to run existing tests**
141
+ > Run `selftune eval unit-test --skill <name>`. Parse the JSON output and
142
+ > report pass rate and any failures.
143
+
144
+ **User asks for full agent-based testing**
145
+ > Run `selftune eval unit-test --skill <name> --run-agent`. This runs queries
146
+ > through the full agent, so inform the user it will take longer.
147
+
148
+ **After an evolution completes**
149
+ > Run unit tests to verify the evolution improved trigger accuracy. Compare
150
+ > the new pass rate against the pre-evolution baseline.
@@ -65,6 +65,21 @@ selftune watch --skill <name> --skill-path <path> [options]
65
65
 
66
66
  ## Steps
67
67
 
68
+ ### 0. Read Evolution Context
69
+
70
+ Read `~/.selftune/memory/context.md` for session context:
71
+ - Active evolutions and their current status
72
+ - Known issues and regression history
73
+ - Last update timestamp
74
+
75
+ If the file does not exist, proceed normally -- it will be created after
76
+ the first watch.
77
+
78
+ The evolution-guard hook prevents conflicting SKILL.md edits while watch is
79
+ evaluating the skill. The auto-activation system uses watch results to
80
+ adjust suggestion confidence -- skills showing regressions get flagged for
81
+ attention in subsequent prompts.
82
+
68
83
  ### 1. Run Watch
69
84
 
70
85
  ```bash
@@ -87,7 +102,7 @@ Parse the JSON output. Key decision points:
87
102
  If regression is detected:
88
103
  - Review recent session transcripts to understand what changed
89
104
  - Check if the eval set is still representative
90
- - Run `rollback` if the regression is confirmed (see `Workflows/Rollback.md`)
105
+ - Run `evolve rollback` if the regression is confirmed (see `Workflows/Rollback.md`)
91
106
 
92
107
  If `--auto-rollback` was set, the command automatically restores the
93
108
  previous description and logs a `rolled_back` entry.
@@ -100,6 +115,13 @@ Summarize the snapshot for the user:
100
115
  - Whether regression was detected
101
116
  - Recommended action
102
117
 
118
+ ### 5. Update Memory
119
+
120
+ After watch completes, the memory writer updates
121
+ `~/.selftune/memory/context.md` with the current regression status,
122
+ pass rates, and recommended next action. This ensures continuity if the
123
+ context window resets before the user acts on the results.
124
+
103
125
  ## Common Patterns
104
126
 
105
127
  **"Is the skill performing well after the change?"**
@@ -119,3 +141,13 @@ Summarize the snapshot for the user:
119
141
  **"Set a custom baseline"**
120
142
  > Use `--baseline 0.85` to override auto-detection. Useful when the
121
143
  > auto-detected baseline is from an older evolution.
144
+
145
+ ## Autonomous Mode
146
+
147
+ When called by `selftune orchestrate`, watch runs automatically on recently
148
+ evolved skills:
149
+
150
+ - Checks all skills evolved in the last --recent-window hours (default 24)
151
+ - Auto-rollback is enabled by default
152
+ - Results are included in the orchestrate run report
153
+ - No user notification — regressions are handled silently via rollback
@@ -0,0 +1,129 @@
1
+ # selftune Workflows Workflow
2
+
3
+ ## When to Use
4
+
5
+ When the user asks about multi-skill workflows, workflow discovery, or skill composition.
6
+
7
+ ## Overview
8
+
9
+ Discover repeated multi-skill sequences from telemetry and optionally save a
10
+ discovered workflow into a skill's `## Workflows` section.
11
+
12
+ ## Default Commands
13
+
14
+ ```bash
15
+ selftune workflows [options]
16
+ selftune workflows save <workflow-id|index> [--skill-path <path>]
17
+ ```
18
+
19
+ ## Options
20
+
21
+ - `--min-occurrences <n>`: Minimum times a workflow must appear before it is
22
+ shown. Default: `3`.
23
+ - `--window <n>`: Only analyze the last `n` sessions. Default: all sessions.
24
+ - `--skill <name>`: Only show workflows containing this skill. Default: all
25
+ skills.
26
+ - `--json`: Emit machine-readable `WorkflowDiscoveryReport` JSON. Default:
27
+ human-readable text.
28
+ - `--skill-path <path>`: Target SKILL.md when using `save`. Default:
29
+ auto-detect the first skill's SKILL.md path across contributing sessions. If
30
+ that skill maps to multiple SKILL.md files in those sessions, the command
31
+ errors and you must pass `--skill-path` explicitly.
32
+
33
+ ## Save Semantics
34
+
35
+ `save` accepts either:
36
+
37
+ - A workflow ID, which is the ordered skill chain joined with `→`
38
+ - A 1-based index from the `selftune workflows` output
39
+
40
+ Examples:
41
+
42
+ ```bash
43
+ selftune workflows save "Copywriting→MarketingAutomation→SelfTuneBlog"
44
+ selftune workflows save 1
45
+ ```
46
+
47
+ When saved, selftune appends a subsection to `## Workflows` in the target
48
+ SKILL.md. The subsection name is derived from the skill chain
49
+ (`Copywriting-MarketingAutomation-SelfTuneBlog`) and includes
50
+ discovered-source metadata with occurrence count and synergy score.
51
+
52
+ ## Output Format
53
+
54
+ ### Human-readable output
55
+
56
+ The number prefix (for example, `1.`) is the 1-based index you can pass to
57
+ `selftune workflows save <index>`.
58
+
59
+ ```text
60
+ Discovered Workflows (from 450 sessions):
61
+
62
+ 1. Copywriting → MarketingAutomation → SelfTuneBlog
63
+ Occurrences: 12 | Synergy: 0.72 | Consistency: 92% | Completion: 83%
64
+ Common trigger: "write and publish a blog post"
65
+ ```
66
+
67
+ ### JSON output
68
+
69
+ ```json
70
+ {
71
+ "workflows": [
72
+ {
73
+ "workflow_id": "Copywriting→MarketingAutomation→SelfTuneBlog",
74
+ "skills": ["Copywriting", "MarketingAutomation", "SelfTuneBlog"],
75
+ "occurrence_count": 12,
76
+ "avg_errors": 0.5,
77
+ "avg_errors_individual": 1.8,
78
+ "synergy_score": 0.72,
79
+ "representative_query": "write and publish a blog post",
80
+ "sequence_consistency": 0.92,
81
+ "completion_rate": 0.83,
82
+ "first_seen": "2026-03-01T10:00:00Z",
83
+ "last_seen": "2026-03-08T16:30:00Z",
84
+ "session_ids": ["s1", "s2"]
85
+ }
86
+ ],
87
+ "total_sessions_analyzed": 450,
88
+ "generated_at": "2026-03-09T12:00:00.000Z"
89
+ }
90
+ ```
91
+
92
+ ## How It Works
93
+
94
+ 1. Reads `session_telemetry_log.jsonl` and `skill_usage_log.jsonl`
95
+ 2. Orders skill usage inside each session by timestamp
96
+ 3. Deduplicates consecutive same-skill entries
97
+ 4. Keeps only sequences with 2+ skills
98
+ 5. Counts repeated ordered sequences across sessions
99
+ 6. Computes workflow metrics:
100
+ - `synergy_score` — whether the sequence performs better together than solo
101
+ baselines, where each skill's solo baseline is its average error rate from
102
+ single-skill sessions and the workflow uses the max of those solo rates
103
+ - `sequence_consistency` — how stable the ordering is for the same skill
104
+ set
105
+ - `completion_rate` — how often all skills in the sequence fire
106
+ 7. Filters by `--min-occurrences` and optional `--skill`
107
+ 8. Optionally appends the chosen workflow to SKILL.md via `save`
108
+
109
+ ## Interpreting Results
110
+
111
+ - `synergy_score > 0.3`: Strong candidate for codifying as a workflow.
112
+ - `synergy_score < -0.3`: The sequence adds friction or conflicts.
113
+ - Low `sequence_consistency`: Same skills appear in multiple orders; the
114
+ pattern may still be unstable.
115
+ - Low `completion_rate`: One or more skills in the sequence often are not
116
+ invoked, so the full workflow does not complete.
117
+
118
+ ## Common Patterns
119
+
120
+ - "Which skills always get used together?"
121
+ `selftune workflows`
122
+ - "Only show workflows involving Deploy"
123
+ `selftune workflows --skill Deploy`
124
+ - "Focus on recent behavior"
125
+ `selftune workflows --window 20`
126
+ - "Save the top workflow into SKILL.md"
127
+ `selftune workflows save 1 --skill-path /path/to/SKILL.md`
128
+ - "Save a specific discovered workflow by ID"
129
+ `selftune workflows save "Copywriting→MarketingAutomation→SelfTuneBlog"`
@@ -0,0 +1,26 @@
1
+ {
2
+ "_readme": "Default activation rules for selftune auto-activation. Copy to ~/.selftune/activation-rules.json to customize.",
3
+ "_note": "These defaults are bundled inside the installed skill so setup does not depend on repository-level templates.",
4
+ "rules": [
5
+ {
6
+ "id": "post-session-diagnostic",
7
+ "enabled": true,
8
+ "description": "Suggest `selftune last` when session has >2 unmatched queries"
9
+ },
10
+ {
11
+ "id": "grading-threshold-breach",
12
+ "enabled": true,
13
+ "description": "Suggest `selftune evolve` when session pass rate < 60%"
14
+ },
15
+ {
16
+ "id": "stale-evolution",
17
+ "enabled": true,
18
+ "description": "Suggest `selftune evolve` when no evolution in >7 days and pending false negatives exist"
19
+ },
20
+ {
21
+ "id": "regression-detected",
22
+ "enabled": true,
23
+ "description": "Suggest `selftune rollback` when monitoring detects a regression"
24
+ }
25
+ ]
26
+ }
@@ -0,0 +1,63 @@
1
+ {
2
+ "_readme": "Claude settings template for multi-skill selftune projects. Merge into ~/.claude/settings.json.",
3
+ "_usage": "These hooks use npx selftune, which works regardless of installation path.",
4
+ "_note": "Multi-skill projects use activation rules to route queries to the correct skill. See assets/activation-rules-default.json.",
5
+ "hooks": {
6
+ "UserPromptSubmit": [
7
+ {
8
+ "hooks": [
9
+ {
10
+ "type": "command",
11
+ "command": "npx selftune hook prompt-log",
12
+ "timeout": 5
13
+ },
14
+ {
15
+ "type": "command",
16
+ "command": "npx selftune hook auto-activate",
17
+ "timeout": 5
18
+ }
19
+ ]
20
+ }
21
+ ],
22
+ "PreToolUse": [
23
+ {
24
+ "matcher": "Write|Edit",
25
+ "hooks": [
26
+ {
27
+ "type": "command",
28
+ "command": "npx selftune hook skill-change-guard",
29
+ "timeout": 5
30
+ },
31
+ {
32
+ "type": "command",
33
+ "command": "npx selftune hook evolution-guard",
34
+ "timeout": 5
35
+ }
36
+ ]
37
+ }
38
+ ],
39
+ "PostToolUse": [
40
+ {
41
+ "matcher": "Read",
42
+ "hooks": [
43
+ {
44
+ "type": "command",
45
+ "command": "npx selftune hook skill-eval",
46
+ "timeout": 5
47
+ }
48
+ ]
49
+ }
50
+ ],
51
+ "Stop": [
52
+ {
53
+ "hooks": [
54
+ {
55
+ "type": "command",
56
+ "command": "npx selftune hook session-stop",
57
+ "timeout": 15
58
+ }
59
+ ]
60
+ }
61
+ ]
62
+ }
63
+ }
@@ -0,0 +1,57 @@
1
+ {
2
+ "_readme": "Claude settings template for single-skill selftune projects. Merge into ~/.claude/settings.json.",
3
+ "_usage": "These hooks use npx selftune, which works regardless of installation path.",
4
+ "hooks": {
5
+ "UserPromptSubmit": [
6
+ {
7
+ "hooks": [
8
+ {
9
+ "type": "command",
10
+ "command": "npx selftune hook prompt-log",
11
+ "timeout": 5
12
+ },
13
+ {
14
+ "type": "command",
15
+ "command": "npx selftune hook auto-activate",
16
+ "timeout": 5
17
+ }
18
+ ]
19
+ }
20
+ ],
21
+ "PreToolUse": [
22
+ {
23
+ "matcher": "Write|Edit",
24
+ "hooks": [
25
+ {
26
+ "type": "command",
27
+ "command": "npx selftune hook skill-change-guard",
28
+ "timeout": 5
29
+ }
30
+ ]
31
+ }
32
+ ],
33
+ "PostToolUse": [
34
+ {
35
+ "matcher": "Read",
36
+ "hooks": [
37
+ {
38
+ "type": "command",
39
+ "command": "npx selftune hook skill-eval",
40
+ "timeout": 5
41
+ }
42
+ ]
43
+ }
44
+ ],
45
+ "Stop": [
46
+ {
47
+ "hooks": [
48
+ {
49
+ "type": "command",
50
+ "command": "npx selftune hook session-stop",
51
+ "timeout": 15
52
+ }
53
+ ]
54
+ }
55
+ ]
56
+ }
57
+ }