selftune 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +20 -10
- package/.claude/agents/evolution-reviewer.md +14 -1
- package/.claude/agents/integration-guide.md +18 -6
- package/.claude/agents/pattern-analyst.md +18 -5
- package/CHANGELOG.md +12 -4
- package/README.md +43 -35
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/cli/selftune/badge/badge-data.ts +1 -1
- package/cli/selftune/badge/badge.ts +4 -8
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +28 -0
- package/cli/selftune/contribute/contribute.ts +1 -1
- package/cli/selftune/cron/setup.ts +17 -17
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +653 -186
- package/cli/selftune/dashboard.ts +41 -176
- package/cli/selftune/eval/baseline.ts +5 -4
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/hooks-to-evals.ts +34 -15
- package/cli/selftune/eval/unit-test-cli.ts +1 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +105 -11
- package/cli/selftune/evolution/evolve.ts +371 -25
- package/cli/selftune/evolution/extract-patterns.ts +87 -29
- package/cli/selftune/evolution/rollback.ts +2 -2
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +448 -97
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +395 -116
- package/cli/selftune/ingestors/claude-replay.ts +140 -114
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +141 -8
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +227 -14
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/monitoring/watch.ts +66 -15
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +48 -26
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +148 -0
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +78 -20
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +272 -26
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +21 -8
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +84 -53
- package/skill/Workflows/AutoActivation.md +17 -16
- package/skill/Workflows/Badge.md +6 -0
- package/skill/Workflows/Baseline.md +46 -23
- package/skill/Workflows/Composability.md +12 -5
- package/skill/Workflows/Contribute.md +17 -14
- package/skill/Workflows/Cron.md +56 -79
- package/skill/Workflows/Dashboard.md +45 -34
- package/skill/Workflows/Doctor.md +30 -17
- package/skill/Workflows/Evals.md +64 -40
- package/skill/Workflows/EvolutionMemory.md +2 -0
- package/skill/Workflows/Evolve.md +102 -47
- package/skill/Workflows/EvolveBody.md +6 -6
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +11 -5
- package/skill/Workflows/Ingest.md +43 -36
- package/skill/Workflows/Initialize.md +44 -30
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +39 -18
- package/skill/Workflows/Rollback.md +3 -3
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +34 -22
- package/skill/Workflows/Watch.md +14 -4
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +1 -1
- package/templates/multi-skill-settings.json +7 -7
- package/templates/single-skill-settings.json +6 -6
- package/dashboard/index.html +0 -1680
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# selftune Sync Workflow
|
|
2
|
+
|
|
3
|
+
Refresh source-truth telemetry across supported agent CLIs, then rebuild the
|
|
4
|
+
repaired skill-usage overlay so status, dashboard, grading, and evolution work
|
|
5
|
+
from real transcripts/rollouts instead of stale hook data.
|
|
6
|
+
|
|
7
|
+
## When to Use
|
|
8
|
+
|
|
9
|
+
- Before running `status`, `dashboard`, `watch`, or `evolve` when data may be stale
|
|
10
|
+
- The user has run many Claude Code, Codex, OpenCode, or OpenClaw sessions since last sync
|
|
11
|
+
- The agent detects host logs may be polluted and needs the repaired/source-first view
|
|
12
|
+
- Before exporting data to cloud ingest
|
|
13
|
+
|
|
14
|
+
## Default Command
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
selftune sync
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Options
|
|
21
|
+
|
|
22
|
+
| Flag | Description |
|
|
23
|
+
|------|-------------|
|
|
24
|
+
| `--since <date>` | Only sync sessions modified on/after this date |
|
|
25
|
+
| `--dry-run` | Show summary without writing files |
|
|
26
|
+
| `--force` | Ignore per-source markers and rescan everything |
|
|
27
|
+
| `--no-claude` | Skip Claude transcript replay |
|
|
28
|
+
| `--no-codex` | Skip Codex rollout ingest |
|
|
29
|
+
| `--no-opencode` | Skip OpenCode ingest |
|
|
30
|
+
| `--no-openclaw` | Skip OpenClaw ingest |
|
|
31
|
+
| `--no-repair` | Skip rebuilding `skill_usage_repaired.jsonl` |
|
|
32
|
+
|
|
33
|
+
## Output
|
|
34
|
+
|
|
35
|
+
Writes/refreshed data:
|
|
36
|
+
- `~/.claude/session_telemetry_log.jsonl`
|
|
37
|
+
- `~/.claude/all_queries_log.jsonl`
|
|
38
|
+
- `~/.claude/skill_usage_log.jsonl`
|
|
39
|
+
- `~/.claude/skill_usage_repaired.jsonl`
|
|
40
|
+
- per-source marker files
|
|
41
|
+
|
|
42
|
+
## Steps
|
|
43
|
+
|
|
44
|
+
### 1. Preview Sync
|
|
45
|
+
|
|
46
|
+
Run `selftune sync --dry-run`. The output includes per-source `scanned`
|
|
47
|
+
counts. Report the preview summary to the user.
|
|
48
|
+
|
|
49
|
+
### 2. Run Sync
|
|
50
|
+
|
|
51
|
+
Run `selftune sync`. The output includes:
|
|
52
|
+
- Per-source `scanned`, `synced`, and `skipped` counts
|
|
53
|
+
- Repaired overlay totals
|
|
54
|
+
- Any errors or warnings
|
|
55
|
+
|
|
56
|
+
### 3. Verify Results
|
|
57
|
+
|
|
58
|
+
Verify there are no sync errors and that per-source counters are internally
|
|
59
|
+
consistent (`scanned`, `synced`, `skipped`). `synced=0` is valid when no
|
|
60
|
+
new sessions exist since the last sync. Run `selftune doctor` only when
|
|
61
|
+
sync reports source/hook failures or expected active sources are missing.
|
|
62
|
+
|
|
63
|
+
### 4. Continue to Next Workflow
|
|
64
|
+
|
|
65
|
+
After sync completes, proceed with the user's intended workflow:
|
|
66
|
+
`selftune status`, `selftune dashboard`, `selftune watch --sync-first`,
|
|
67
|
+
or `selftune evolve --sync-first`.
|
|
68
|
+
|
|
69
|
+
## Common Patterns
|
|
70
|
+
|
|
71
|
+
**User wants to refresh telemetry data**
|
|
72
|
+
> Run `selftune sync`. Report per-source `scanned`, `synced`, and `skipped` counts.
|
|
73
|
+
|
|
74
|
+
**User wants to sync only recent sessions**
|
|
75
|
+
> Run `selftune sync --since <date>` with the user's specified date.
|
|
76
|
+
|
|
77
|
+
**User wants a full rescan from scratch**
|
|
78
|
+
> Run `selftune sync --force`. This ignores per-source markers and rescans
|
|
79
|
+
> all sessions.
|
|
80
|
+
|
|
81
|
+
**Agent needs to verify sync worked**
|
|
82
|
+
> Check per-source `scanned`, `synced`, and `skipped` counts. `synced=0`
|
|
83
|
+
> is normal when data is already up-to-date. Verify `scanned > 0` for
|
|
84
|
+
> expected sources to confirm sync ran successfully.
|
|
85
|
+
|
|
86
|
+
**Agent is chaining into monitoring or evolution**
|
|
87
|
+
> Use `selftune watch --sync-first` or `selftune evolve --sync-first` to
|
|
88
|
+
> refresh source truth automatically before making decisions.
|
|
@@ -6,7 +6,7 @@ accuracy, output content, and tool usage with deterministic assertions.
|
|
|
6
6
|
## Default Command
|
|
7
7
|
|
|
8
8
|
```bash
|
|
9
|
-
selftune unit-test --skill <name> --tests <path> [options]
|
|
9
|
+
selftune eval unit-test --skill <name> --tests <path> [options]
|
|
10
10
|
```
|
|
11
11
|
|
|
12
12
|
## Options
|
|
@@ -86,13 +86,13 @@ require `--run-agent` and run the query through the full agent.
|
|
|
86
86
|
|
|
87
87
|
### 1. Generate Tests (First Time)
|
|
88
88
|
|
|
89
|
-
|
|
89
|
+
If no test file exists for the skill, generate initial tests:
|
|
90
90
|
|
|
91
91
|
```bash
|
|
92
|
-
selftune unit-test --skill Research --generate --skill-path ~/.claude/skills/Research/SKILL.md
|
|
92
|
+
selftune eval unit-test --skill Research --generate --skill-path ~/.claude/skills/Research/SKILL.md
|
|
93
93
|
```
|
|
94
94
|
|
|
95
|
-
|
|
95
|
+
Parse the output. The LLM creates test cases covering:
|
|
96
96
|
- Explicit trigger queries
|
|
97
97
|
- Implicit trigger queries
|
|
98
98
|
- Contextual trigger queries
|
|
@@ -102,37 +102,49 @@ Tests are saved to `~/.selftune/unit-tests/Research.json`.
|
|
|
102
102
|
|
|
103
103
|
### 2. Run Tests
|
|
104
104
|
|
|
105
|
+
Run the test suite:
|
|
106
|
+
|
|
105
107
|
```bash
|
|
106
|
-
selftune unit-test --skill Research --tests ~/.selftune/unit-tests/Research.json
|
|
108
|
+
selftune eval unit-test --skill Research --tests ~/.selftune/unit-tests/Research.json
|
|
107
109
|
```
|
|
108
110
|
|
|
109
111
|
By default, only `trigger_check` assertions run (fast, no agent needed).
|
|
110
112
|
Add `--run-agent` for full agent-based assertions.
|
|
111
113
|
|
|
112
|
-
### 3.
|
|
114
|
+
### 3. Parse Results
|
|
115
|
+
|
|
116
|
+
Parse the JSON output. Check `pass_rate` and investigate failures:
|
|
117
|
+
- Failed trigger checks -- description needs improvement (route to Evolve)
|
|
118
|
+
- Failed output assertions -- skill workflow needs fixes
|
|
119
|
+
- Failed tool assertions -- skill routing is broken
|
|
113
120
|
|
|
114
|
-
|
|
115
|
-
- Failed trigger checks → description needs improvement
|
|
116
|
-
- Failed output assertions → skill workflow needs fixes
|
|
117
|
-
- Failed tool assertions → skill routing is broken
|
|
121
|
+
Report the pass rate and any failures to the user.
|
|
118
122
|
|
|
119
|
-
### 4.
|
|
123
|
+
### 4. Post-Evolution Verification
|
|
120
124
|
|
|
121
125
|
After evolving a skill, re-run unit tests to verify improvements:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
selftune eval unit-test --skill Research
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Compare the new `pass_rate` against the previous run. Report whether
|
|
132
|
+
the evolution improved trigger accuracy.
|
|
125
133
|
|
|
126
134
|
## Common Patterns
|
|
127
135
|
|
|
128
|
-
**
|
|
129
|
-
> `selftune unit-test --skill
|
|
136
|
+
**User asks to generate tests for a skill**
|
|
137
|
+
> Run `selftune eval unit-test --skill <name> --generate --skill-path <path>`.
|
|
138
|
+
> Parse the output and report how many tests were generated.
|
|
130
139
|
|
|
131
|
-
**
|
|
132
|
-
> `selftune unit-test --skill
|
|
140
|
+
**User asks to run existing tests**
|
|
141
|
+
> Run `selftune eval unit-test --skill <name>`. Parse the JSON output and
|
|
142
|
+
> report pass rate and any failures.
|
|
133
143
|
|
|
134
|
-
**
|
|
135
|
-
> `selftune unit-test --skill
|
|
144
|
+
**User asks for full agent-based testing**
|
|
145
|
+
> Run `selftune eval unit-test --skill <name> --run-agent`. This runs queries
|
|
146
|
+
> through the full agent, so inform the user it will take longer.
|
|
136
147
|
|
|
137
|
-
**
|
|
138
|
-
> Run
|
|
148
|
+
**After an evolution completes**
|
|
149
|
+
> Run unit tests to verify the evolution improved trigger accuracy. Compare
|
|
150
|
+
> the new pass rate against the pre-evolution baseline.
|
package/skill/Workflows/Watch.md
CHANGED
|
@@ -67,13 +67,13 @@ selftune watch --skill <name> --skill-path <path> [options]
|
|
|
67
67
|
|
|
68
68
|
### 0. Read Evolution Context
|
|
69
69
|
|
|
70
|
-
|
|
70
|
+
Read `~/.selftune/memory/context.md` for session context:
|
|
71
71
|
- Active evolutions and their current status
|
|
72
72
|
- Known issues and regression history
|
|
73
73
|
- Last update timestamp
|
|
74
74
|
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
If the file does not exist, proceed normally -- it will be created after
|
|
76
|
+
the first watch.
|
|
77
77
|
|
|
78
78
|
The evolution-guard hook prevents conflicting SKILL.md edits while watch is
|
|
79
79
|
evaluating the skill. The auto-activation system uses watch results to
|
|
@@ -102,7 +102,7 @@ Parse the JSON output. Key decision points:
|
|
|
102
102
|
If regression is detected:
|
|
103
103
|
- Review recent session transcripts to understand what changed
|
|
104
104
|
- Check if the eval set is still representative
|
|
105
|
-
- Run `rollback` if the regression is confirmed (see `Workflows/Rollback.md`)
|
|
105
|
+
- Run `evolve rollback` if the regression is confirmed (see `Workflows/Rollback.md`)
|
|
106
106
|
|
|
107
107
|
If `--auto-rollback` was set, the command automatically restores the
|
|
108
108
|
previous description and logs a `rolled_back` entry.
|
|
@@ -141,3 +141,13 @@ context window resets before the user acts on the results.
|
|
|
141
141
|
**"Set a custom baseline"**
|
|
142
142
|
> Use `--baseline 0.85` to override auto-detection. Useful when the
|
|
143
143
|
> auto-detected baseline is from an older evolution.
|
|
144
|
+
|
|
145
|
+
## Autonomous Mode
|
|
146
|
+
|
|
147
|
+
When called by `selftune orchestrate`, watch runs automatically on recently
|
|
148
|
+
evolved skills:
|
|
149
|
+
|
|
150
|
+
- Checks all skills evolved in the last --recent-window hours (default 24)
|
|
151
|
+
- Auto-rollback is enabled by default
|
|
152
|
+
- Results are included in the orchestrate run report
|
|
153
|
+
- No user notification — regressions are handled silently via rollback
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# selftune Workflows Workflow
|
|
2
|
+
|
|
3
|
+
## When to Use
|
|
4
|
+
|
|
5
|
+
When the user asks about multi-skill workflows, workflow discovery, or skill composition.
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Discover repeated multi-skill sequences from telemetry and optionally save a
|
|
10
|
+
discovered workflow into a skill's `## Workflows` section.
|
|
11
|
+
|
|
12
|
+
## Default Commands
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
selftune workflows [options]
|
|
16
|
+
selftune workflows save <workflow-id|index> [--skill-path <path>]
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Options
|
|
20
|
+
|
|
21
|
+
- `--min-occurrences <n>`: Minimum times a workflow must appear before it is
|
|
22
|
+
shown. Default: `3`.
|
|
23
|
+
- `--window <n>`: Only analyze the last `n` sessions. Default: all sessions.
|
|
24
|
+
- `--skill <name>`: Only show workflows containing this skill. Default: all
|
|
25
|
+
skills.
|
|
26
|
+
- `--json`: Emit machine-readable `WorkflowDiscoveryReport` JSON. Default:
|
|
27
|
+
human-readable text.
|
|
28
|
+
- `--skill-path <path>`: Target SKILL.md when using `save`. Default:
|
|
29
|
+
auto-detect the first skill's SKILL.md path across contributing sessions. If
|
|
30
|
+
that skill maps to multiple SKILL.md files in those sessions, the command
|
|
31
|
+
errors and you must pass `--skill-path` explicitly.
|
|
32
|
+
|
|
33
|
+
## Save Semantics
|
|
34
|
+
|
|
35
|
+
`save` accepts either:
|
|
36
|
+
|
|
37
|
+
- A workflow ID, which is the ordered skill chain joined with `→`
|
|
38
|
+
- A 1-based index from the `selftune workflows` output
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
selftune workflows save "Copywriting→MarketingAutomation→SelfTuneBlog"
|
|
44
|
+
selftune workflows save 1
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
When saved, selftune appends a subsection to `## Workflows` in the target
|
|
48
|
+
SKILL.md. The subsection name is derived from the skill chain
|
|
49
|
+
(`Copywriting-MarketingAutomation-SelfTuneBlog`) and includes
|
|
50
|
+
discovered-source metadata with occurrence count and synergy score.
|
|
51
|
+
|
|
52
|
+
## Output Format
|
|
53
|
+
|
|
54
|
+
### Human-readable output
|
|
55
|
+
|
|
56
|
+
The number prefix (for example, `1.`) is the 1-based index you can pass to
|
|
57
|
+
`selftune workflows save <index>`.
|
|
58
|
+
|
|
59
|
+
```text
|
|
60
|
+
Discovered Workflows (from 450 sessions):
|
|
61
|
+
|
|
62
|
+
1. Copywriting → MarketingAutomation → SelfTuneBlog
|
|
63
|
+
Occurrences: 12 | Synergy: 0.72 | Consistency: 92% | Completion: 83%
|
|
64
|
+
Common trigger: "write and publish a blog post"
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### JSON output
|
|
68
|
+
|
|
69
|
+
```json
|
|
70
|
+
{
|
|
71
|
+
"workflows": [
|
|
72
|
+
{
|
|
73
|
+
"workflow_id": "Copywriting→MarketingAutomation→SelfTuneBlog",
|
|
74
|
+
"skills": ["Copywriting", "MarketingAutomation", "SelfTuneBlog"],
|
|
75
|
+
"occurrence_count": 12,
|
|
76
|
+
"avg_errors": 0.5,
|
|
77
|
+
"avg_errors_individual": 1.8,
|
|
78
|
+
"synergy_score": 0.72,
|
|
79
|
+
"representative_query": "write and publish a blog post",
|
|
80
|
+
"sequence_consistency": 0.92,
|
|
81
|
+
"completion_rate": 0.83,
|
|
82
|
+
"first_seen": "2026-03-01T10:00:00Z",
|
|
83
|
+
"last_seen": "2026-03-08T16:30:00Z",
|
|
84
|
+
"session_ids": ["s1", "s2"]
|
|
85
|
+
}
|
|
86
|
+
],
|
|
87
|
+
"total_sessions_analyzed": 450,
|
|
88
|
+
"generated_at": "2026-03-09T12:00:00.000Z"
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## How It Works
|
|
93
|
+
|
|
94
|
+
1. Reads `session_telemetry_log.jsonl` and `skill_usage_log.jsonl`
|
|
95
|
+
2. Orders skill usage inside each session by timestamp
|
|
96
|
+
3. Deduplicates consecutive same-skill entries
|
|
97
|
+
4. Keeps only sequences with 2+ skills
|
|
98
|
+
5. Counts repeated ordered sequences across sessions
|
|
99
|
+
6. Computes workflow metrics:
|
|
100
|
+
- `synergy_score` — whether the sequence performs better together than solo
|
|
101
|
+
baselines, where each skill's solo baseline is its average error rate from
|
|
102
|
+
single-skill sessions and the workflow uses the max of those solo rates
|
|
103
|
+
- `sequence_consistency` — how stable the ordering is for the same skill
|
|
104
|
+
set
|
|
105
|
+
- `completion_rate` — how often all skills in the sequence fire
|
|
106
|
+
7. Filters by `--min-occurrences` and optional `--skill`
|
|
107
|
+
8. Optionally appends the chosen workflow to SKILL.md via `save`
|
|
108
|
+
|
|
109
|
+
## Interpreting Results
|
|
110
|
+
|
|
111
|
+
- `synergy_score > 0.3`: Strong candidate for codifying as a workflow.
|
|
112
|
+
- `synergy_score < -0.3`: The sequence adds friction or conflicts.
|
|
113
|
+
- Low `sequence_consistency`: Same skills appear in multiple orders; the
|
|
114
|
+
pattern may still be unstable.
|
|
115
|
+
- Low `completion_rate`: One or more skills in the sequence often are not
|
|
116
|
+
invoked, so the full workflow does not complete.
|
|
117
|
+
|
|
118
|
+
## Common Patterns
|
|
119
|
+
|
|
120
|
+
- "Which skills always get used together?"
|
|
121
|
+
`selftune workflows`
|
|
122
|
+
- "Only show workflows involving Deploy"
|
|
123
|
+
`selftune workflows --skill Deploy`
|
|
124
|
+
- "Focus on recent behavior"
|
|
125
|
+
`selftune workflows --window 20`
|
|
126
|
+
- "Save the top workflow into SKILL.md"
|
|
127
|
+
`selftune workflows save 1 --skill-path /path/to/SKILL.md`
|
|
128
|
+
- "Save a specific discovered workflow by ID"
|
|
129
|
+
`selftune workflows save "Copywriting→MarketingAutomation→SelfTuneBlog"`
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_readme": "Default activation rules for selftune auto-activation. Copy to ~/.selftune/activation-rules.json to customize.",
|
|
3
|
+
"_note": "These defaults are bundled inside the installed skill so setup does not depend on repository-level templates.",
|
|
4
|
+
"rules": [
|
|
5
|
+
{
|
|
6
|
+
"id": "post-session-diagnostic",
|
|
7
|
+
"enabled": true,
|
|
8
|
+
"description": "Suggest `selftune last` when session has >2 unmatched queries"
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"id": "grading-threshold-breach",
|
|
12
|
+
"enabled": true,
|
|
13
|
+
"description": "Suggest `selftune evolve` when session pass rate < 60%"
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"id": "stale-evolution",
|
|
17
|
+
"enabled": true,
|
|
18
|
+
"description": "Suggest `selftune evolve` when no evolution in >7 days and pending false negatives exist"
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "regression-detected",
|
|
22
|
+
"enabled": true,
|
|
23
|
+
"description": "Suggest `selftune rollback` when monitoring detects a regression"
|
|
24
|
+
}
|
|
25
|
+
]
|
|
26
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_readme": "Claude settings template for multi-skill selftune projects. Merge into ~/.claude/settings.json.",
|
|
3
|
+
"_usage": "These hooks use npx selftune, which works regardless of installation path.",
|
|
4
|
+
"_note": "Multi-skill projects use activation rules to route queries to the correct skill. See assets/activation-rules-default.json.",
|
|
5
|
+
"hooks": {
|
|
6
|
+
"UserPromptSubmit": [
|
|
7
|
+
{
|
|
8
|
+
"hooks": [
|
|
9
|
+
{
|
|
10
|
+
"type": "command",
|
|
11
|
+
"command": "npx selftune hook prompt-log",
|
|
12
|
+
"timeout": 5
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"type": "command",
|
|
16
|
+
"command": "npx selftune hook auto-activate",
|
|
17
|
+
"timeout": 5
|
|
18
|
+
}
|
|
19
|
+
]
|
|
20
|
+
}
|
|
21
|
+
],
|
|
22
|
+
"PreToolUse": [
|
|
23
|
+
{
|
|
24
|
+
"matcher": "Write|Edit",
|
|
25
|
+
"hooks": [
|
|
26
|
+
{
|
|
27
|
+
"type": "command",
|
|
28
|
+
"command": "npx selftune hook skill-change-guard",
|
|
29
|
+
"timeout": 5
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"type": "command",
|
|
33
|
+
"command": "npx selftune hook evolution-guard",
|
|
34
|
+
"timeout": 5
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
}
|
|
38
|
+
],
|
|
39
|
+
"PostToolUse": [
|
|
40
|
+
{
|
|
41
|
+
"matcher": "Read",
|
|
42
|
+
"hooks": [
|
|
43
|
+
{
|
|
44
|
+
"type": "command",
|
|
45
|
+
"command": "npx selftune hook skill-eval",
|
|
46
|
+
"timeout": 5
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
"Stop": [
|
|
52
|
+
{
|
|
53
|
+
"hooks": [
|
|
54
|
+
{
|
|
55
|
+
"type": "command",
|
|
56
|
+
"command": "npx selftune hook session-stop",
|
|
57
|
+
"timeout": 15
|
|
58
|
+
}
|
|
59
|
+
]
|
|
60
|
+
}
|
|
61
|
+
]
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_readme": "Claude settings template for single-skill selftune projects. Merge into ~/.claude/settings.json.",
|
|
3
|
+
"_usage": "These hooks use npx selftune, which works regardless of installation path.",
|
|
4
|
+
"hooks": {
|
|
5
|
+
"UserPromptSubmit": [
|
|
6
|
+
{
|
|
7
|
+
"hooks": [
|
|
8
|
+
{
|
|
9
|
+
"type": "command",
|
|
10
|
+
"command": "npx selftune hook prompt-log",
|
|
11
|
+
"timeout": 5
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"type": "command",
|
|
15
|
+
"command": "npx selftune hook auto-activate",
|
|
16
|
+
"timeout": 5
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
}
|
|
20
|
+
],
|
|
21
|
+
"PreToolUse": [
|
|
22
|
+
{
|
|
23
|
+
"matcher": "Write|Edit",
|
|
24
|
+
"hooks": [
|
|
25
|
+
{
|
|
26
|
+
"type": "command",
|
|
27
|
+
"command": "npx selftune hook skill-change-guard",
|
|
28
|
+
"timeout": 5
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
],
|
|
33
|
+
"PostToolUse": [
|
|
34
|
+
{
|
|
35
|
+
"matcher": "Read",
|
|
36
|
+
"hooks": [
|
|
37
|
+
{
|
|
38
|
+
"type": "command",
|
|
39
|
+
"command": "npx selftune hook skill-eval",
|
|
40
|
+
"timeout": 5
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
}
|
|
44
|
+
],
|
|
45
|
+
"Stop": [
|
|
46
|
+
{
|
|
47
|
+
"hooks": [
|
|
48
|
+
{
|
|
49
|
+
"type": "command",
|
|
50
|
+
"command": "npx selftune hook session-stop",
|
|
51
|
+
"timeout": 15
|
|
52
|
+
}
|
|
53
|
+
]
|
|
54
|
+
}
|
|
55
|
+
]
|
|
56
|
+
}
|
|
57
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Invocation Taxonomy Reference
|
|
2
2
|
|
|
3
3
|
How selftune classifies the ways users trigger (or should trigger) a skill.
|
|
4
|
-
Used by the `
|
|
4
|
+
Used by the `selftune eval generate` command and referenced by evolution workflows to understand
|
|
5
5
|
coverage gaps.
|
|
6
6
|
|
|
7
7
|
---
|
|
@@ -92,7 +92,7 @@ The invocation taxonomy directly drives the evolution feedback loop:
|
|
|
92
92
|
|
|
93
93
|
### Missed Implicit = Undertriggering
|
|
94
94
|
|
|
95
|
-
When `
|
|
95
|
+
When `selftune eval generate` shows implicit queries that don't trigger the skill, the
|
|
96
96
|
description is too narrow. The `evolve` command will:
|
|
97
97
|
1. Extract the missed implicit patterns
|
|
98
98
|
2. Propose description changes that cover them
|