selftune 0.2.30 → 0.2.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -56
- package/apps/local-dashboard/dist/assets/index-B-ut4w0B.js +15 -0
- package/apps/local-dashboard/dist/assets/index-BFGfCVrL.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DfowE3Hu.js +1 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/command-surface.ts +613 -2
- package/cli/selftune/create/baseline.ts +429 -0
- package/cli/selftune/create/check.ts +35 -0
- package/cli/selftune/create/init.ts +115 -0
- package/cli/selftune/create/package-candidate-state.ts +771 -0
- package/cli/selftune/create/package-evaluator.ts +710 -0
- package/cli/selftune/create/package-fingerprint.ts +142 -0
- package/cli/selftune/create/package-search.ts +377 -0
- package/cli/selftune/create/publish.ts +431 -0
- package/cli/selftune/create/readiness.ts +495 -0
- package/cli/selftune/create/replay.ts +330 -0
- package/cli/selftune/create/report.ts +74 -0
- package/cli/selftune/create/scaffold.ts +121 -0
- package/cli/selftune/create/skills-ref-adapter.ts +177 -0
- package/cli/selftune/create/status.ts +33 -0
- package/cli/selftune/create/templates.ts +249 -0
- package/cli/selftune/cron/setup.ts +1 -1
- package/cli/selftune/dashboard-action-events.ts +4 -1
- package/cli/selftune/dashboard-action-result.ts +789 -24
- package/cli/selftune/dashboard-action-stream.ts +80 -0
- package/cli/selftune/dashboard-contract.ts +146 -3
- package/cli/selftune/dashboard-server.ts +5 -4
- package/cli/selftune/eval/hooks-to-evals.ts +58 -35
- package/cli/selftune/eval/synthetic-evals.ts +145 -17
- package/cli/selftune/evolution/bounded-mutations.ts +1045 -0
- package/cli/selftune/evolution/evolve-body.ts +9 -36
- package/cli/selftune/evolution/evolve.ts +8 -72
- package/cli/selftune/evolution/stopping-criteria.ts +5 -13
- package/cli/selftune/evolution/unblock-suggestions.ts +0 -16
- package/cli/selftune/evolution/validate-host-replay.ts +115 -15
- package/cli/selftune/improve.ts +206 -0
- package/cli/selftune/index.ts +123 -6
- package/cli/selftune/init.ts +1 -1
- package/cli/selftune/localdb/queries/dashboard.ts +30 -0
- package/cli/selftune/localdb/schema.ts +52 -0
- package/cli/selftune/monitoring/watch.ts +257 -23
- package/cli/selftune/orchestrate/execute.ts +300 -1
- package/cli/selftune/orchestrate/finalize.ts +14 -0
- package/cli/selftune/orchestrate/plan.ts +22 -5
- package/cli/selftune/orchestrate/prepare.ts +59 -4
- package/cli/selftune/orchestrate/report.ts +1 -1
- package/cli/selftune/orchestrate.ts +34 -1
- package/cli/selftune/publish.ts +35 -0
- package/cli/selftune/registry/github-install.ts +256 -0
- package/cli/selftune/registry/index.ts +1 -1
- package/cli/selftune/registry/install.ts +58 -7
- package/cli/selftune/routes/actions.ts +81 -15
- package/cli/selftune/routes/overview.ts +1 -1
- package/cli/selftune/routes/skill-report.ts +147 -2
- package/cli/selftune/run.ts +18 -0
- package/cli/selftune/schedule.ts +3 -3
- package/cli/selftune/search-run.ts +703 -0
- package/cli/selftune/status.ts +35 -11
- package/cli/selftune/testing-readiness.ts +431 -40
- package/cli/selftune/types.ts +316 -0
- package/cli/selftune/utils/eval-readiness.ts +1 -0
- package/cli/selftune/utils/json-output.ts +11 -0
- package/cli/selftune/utils/lifecycle-surface.ts +48 -0
- package/cli/selftune/utils/query-filter.ts +82 -1
- package/cli/selftune/utils/tui.ts +85 -2
- package/cli/selftune/verify.ts +205 -0
- package/cli/selftune/workflows/proposals.ts +1 -1
- package/cli/selftune/workflows/skill-scaffold.ts +141 -63
- package/cli/selftune/workflows/workflows.ts +4 -4
- package/package.json +1 -1
- package/packages/dashboard-core/src/routes/manifest.ts +2 -2
- package/packages/ui/src/components/SkillReportPanels.tsx +7 -7
- package/packages/ui/src/primitives/button.tsx +5 -0
- package/skill/SKILL.md +148 -85
- package/skill/references/cli-quick-reference.md +16 -1
- package/skill/references/creator-playbook.md +31 -10
- package/skill/workflows/Baseline.md +8 -9
- package/skill/workflows/Contributions.md +4 -4
- package/skill/workflows/Create.md +173 -0
- package/skill/workflows/CreateTestDeploy.md +34 -30
- package/skill/workflows/Cron.md +2 -2
- package/skill/workflows/Dashboard.md +3 -3
- package/skill/workflows/Evals.md +13 -7
- package/skill/workflows/Evolve.md +75 -32
- package/skill/workflows/EvolveBody.md +22 -15
- package/skill/workflows/Hook.md +1 -1
- package/skill/workflows/Improve.md +168 -0
- package/skill/workflows/Initialize.md +3 -3
- package/skill/workflows/Orchestrate.md +49 -12
- package/skill/workflows/Publish.md +100 -0
- package/skill/workflows/Registry.md +19 -13
- package/skill/workflows/Run.md +72 -0
- package/skill/workflows/Schedule.md +2 -2
- package/skill/workflows/SearchRun.md +89 -0
- package/skill/workflows/SignalsDashboard.md +2 -2
- package/skill/workflows/UnitTest.md +13 -4
- package/skill/workflows/Verify.md +136 -0
- package/skill/workflows/Watch.md +114 -47
- package/skill/workflows/Workflows.md +13 -8
- package/apps/local-dashboard/dist/assets/index-BcXquWFB.css +0 -1
- package/apps/local-dashboard/dist/assets/index-Coq42hE4.js +0 -15
- package/apps/local-dashboard/dist/assets/vendor-ui-B0H8s1mP.js +0 -1
package/skill/SKILL.md
CHANGED
|
@@ -2,26 +2,28 @@
|
|
|
2
2
|
name: selftune
|
|
3
3
|
description: >
|
|
4
4
|
Self-improving skills toolkit that watches real agent sessions, detects missed
|
|
5
|
-
triggers, grades execution quality, and evolves
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
triggers, grades execution quality, and evolves skills through a package
|
|
6
|
+
evaluation pipeline (replay, baseline, grading, unit tests, and post-deploy
|
|
7
|
+
watch). Use when verifying skill packages, publishing improvements, evolving
|
|
8
|
+
skill descriptions or routing tables, discovering reusable workflows, scaffolding
|
|
9
|
+
new workflow skills, checking skill health, viewing the dashboard, ingesting
|
|
10
|
+
sessions from other platforms, or running autonomous improvement loops.
|
|
10
11
|
Make sure to use this skill whenever the user mentions skill improvement, skill
|
|
11
12
|
performance, skill triggers, skill evolution, skill health, undertriggering,
|
|
12
13
|
overtriggering, session grading, or wants to know how their skills are doing —
|
|
13
14
|
even if they don't say "selftune" explicitly.
|
|
14
15
|
metadata:
|
|
15
16
|
author: selftune-dev
|
|
16
|
-
version: 0.2.
|
|
17
|
+
version: 0.2.32
|
|
17
18
|
category: developer-tools
|
|
18
19
|
---
|
|
19
20
|
|
|
20
21
|
# selftune
|
|
21
22
|
|
|
22
23
|
Observe real agent sessions, detect missed triggers, grade execution quality,
|
|
23
|
-
evolve
|
|
24
|
-
scaffold workflow skills from
|
|
24
|
+
evolve skills through package evaluation (replay, baseline, grading, body,
|
|
25
|
+
unit tests, and post-deploy watch), and scaffold workflow skills from
|
|
26
|
+
repeated telemetry patterns.
|
|
25
27
|
|
|
26
28
|
**You are the operator.** The user installed this skill so YOU can manage their
|
|
27
29
|
skill health autonomously. They will say things like "set up selftune",
|
|
@@ -34,6 +36,43 @@ If `~/.selftune/config.json` does not exist, read `workflows/Initialize.md`
|
|
|
34
36
|
first. The CLI must be installed (`selftune` on PATH) before other commands
|
|
35
37
|
will work. Do not proceed with other commands until initialization is complete.
|
|
36
38
|
|
|
39
|
+
## Primary Lifecycle
|
|
40
|
+
|
|
41
|
+
Default to this lifecycle unless the user explicitly asks for a low-level
|
|
42
|
+
workflow:
|
|
43
|
+
|
|
44
|
+
1. `status`
|
|
45
|
+
- use `selftune status`
|
|
46
|
+
- for draft packages, use `selftune create status --skill-path <path>`
|
|
47
|
+
|
|
48
|
+
2. `verify`
|
|
49
|
+
- use `selftune verify --skill-path <path>`
|
|
50
|
+
- if verify reports missing readiness or evidence, follow the returned next
|
|
51
|
+
low-level command instead of rerunning the full chain
|
|
52
|
+
|
|
53
|
+
3. `publish`
|
|
54
|
+
- for draft packages, use `selftune publish --skill-path <path>`
|
|
55
|
+
- for already-live skills, `publish` usually means a validated `Improve`
|
|
56
|
+
action plus `Watch`
|
|
57
|
+
|
|
58
|
+
4. `improve`
|
|
59
|
+
- use `selftune improve --skill <name> --skill-path <path>`
|
|
60
|
+
- let `--scope auto` choose bounded package search automatically when the
|
|
61
|
+
skill already has package evidence or a draft package manifest
|
|
62
|
+
- set `--scope description|routing|body|package` when the measured gap is
|
|
63
|
+
already clear and you want to force the mutation surface
|
|
64
|
+
- use `--scope package` when the problem spans routing and body together or
|
|
65
|
+
you want measured frontier comparison before deciding what to publish
|
|
66
|
+
- omit `--dry-run` when you want the winning package candidate promoted back
|
|
67
|
+
into the draft automatically
|
|
68
|
+
|
|
69
|
+
5. `run`
|
|
70
|
+
- use `selftune run`
|
|
71
|
+
|
|
72
|
+
Treat `eval generate`, `unit-test`, `replay`, `baseline`, `watch`, and
|
|
73
|
+
body-specific evolution as advanced supporting workflows unless the user asks
|
|
74
|
+
for them directly or the default lifecycle fails.
|
|
75
|
+
|
|
37
76
|
## Command Execution Policy
|
|
38
77
|
|
|
39
78
|
```bash
|
|
@@ -43,7 +82,8 @@ selftune <command> [options]
|
|
|
43
82
|
Commands vary in output format:
|
|
44
83
|
|
|
45
84
|
- **JSON by default:** `selftune doctor` and `selftune watch` emit structured JSON on stdout.
|
|
46
|
-
- **Text by default:** `selftune status`, `selftune last`, `selftune
|
|
85
|
+
- **Text by default:** `selftune status`, `selftune last`, `selftune verify`, `selftune publish`, and `selftune improve` print human-readable text when stdout is a TTY.
|
|
86
|
+
- **Mixed runtime output:** `selftune run` / `selftune orchestrate` emit JSON on stdout and a human report on stderr.
|
|
47
87
|
- **JSON opt-in:** `selftune sync --json` enables structured JSON output.
|
|
48
88
|
- **Server:** `selftune dashboard` starts a local SPA server — it does not emit data.
|
|
49
89
|
|
|
@@ -54,70 +94,78 @@ next step from prose.
|
|
|
54
94
|
Run `selftune <command> --help` for exact flags. Read
|
|
55
95
|
`references/cli-quick-reference.md` when you need the full flag reference.
|
|
56
96
|
|
|
57
|
-
## Creator Trust Loop
|
|
97
|
+
## Package Evaluation Pipeline (Creator Trust Loop)
|
|
58
98
|
|
|
59
|
-
When the user wants to improve a skill, default to this
|
|
60
|
-
jumping straight to mutation
|
|
99
|
+
When the user wants to improve a skill, default to this package evaluation
|
|
100
|
+
pipeline before jumping straight to mutation. Each step builds measured
|
|
101
|
+
evidence that the package is ready to publish:
|
|
61
102
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
103
|
+
- `draft` — the package exists but is still incomplete
|
|
104
|
+
- `verify_blocked` — the draft is still in one of the concrete readiness states: `needs_spec_validation`, `needs_package_resources`, `needs_evals`, `needs_unit_tests`, `needs_routing_replay`, or `needs_baseline`
|
|
105
|
+
- `verified` — the trust gates pass and the skill is ready to ship
|
|
106
|
+
- `published` — the skill was shipped successfully
|
|
107
|
+
- `watching` — post-deploy monitoring is active
|
|
108
|
+
- `needs_improvement` — measured evidence shows trigger, routing, body, or value gaps
|
|
109
|
+
- `unhealthy` — hooks, telemetry, config, or selftune itself is broken
|
|
68
110
|
|
|
69
111
|
If the user asks "how do I know this skill works?" or "can I trust this skill
|
|
70
|
-
yet?", start with this
|
|
71
|
-
skill report to explain what is still missing, whether the
|
|
72
|
-
|
|
112
|
+
yet?", start with this pipeline, then use `selftune status`, the dashboard, or
|
|
113
|
+
the skill report to explain what is still missing, whether the package is ready
|
|
114
|
+
to publish, or whether it is already being watched live.
|
|
73
115
|
|
|
74
116
|
## Workflow Routing
|
|
75
117
|
|
|
76
|
-
| Trigger keywords
|
|
77
|
-
|
|
|
78
|
-
| create
|
|
79
|
-
|
|
|
80
|
-
|
|
|
81
|
-
|
|
|
82
|
-
|
|
|
83
|
-
|
|
|
84
|
-
|
|
|
85
|
-
|
|
|
86
|
-
|
|
|
87
|
-
|
|
|
88
|
-
|
|
|
89
|
-
|
|
|
90
|
-
|
|
|
91
|
-
|
|
|
92
|
-
|
|
|
93
|
-
|
|
|
94
|
-
|
|
|
95
|
-
|
|
|
96
|
-
|
|
|
97
|
-
|
|
|
98
|
-
|
|
|
99
|
-
|
|
|
100
|
-
|
|
|
101
|
-
|
|
|
102
|
-
|
|
|
103
|
-
|
|
|
104
|
-
|
|
|
105
|
-
|
|
|
106
|
-
|
|
|
107
|
-
|
|
|
108
|
-
|
|
|
109
|
-
|
|
|
110
|
-
|
|
|
111
|
-
|
|
|
112
|
-
|
|
|
113
|
-
|
|
|
114
|
-
|
|
|
115
|
-
|
|
|
116
|
-
| export
|
|
117
|
-
|
|
|
118
|
-
|
|
|
119
|
-
|
|
120
|
-
|
|
118
|
+
| Trigger keywords | Workflow | File |
|
|
119
|
+
| -------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------- | --------------------------------- |
|
|
120
|
+
| create skill, new skill package, author skill, bootstrap skill, scaffold package, benchmark report, package report, publish report | Create | workflows/Create.md |
|
|
121
|
+
| verify skill, creator loop, can I trust this skill, how do I know this skill works, test this skill, ready to ship, ready to deploy | Verify | workflows/Verify.md |
|
|
122
|
+
| publish skill, ship skill, deploy skill, go live, release skill | Publish | workflows/Publish.md |
|
|
123
|
+
| search run, package frontier, candidate search, bounded package evolution, compare package candidates, optimize package, improve routing and body together, bounded evolution | SearchRun | workflows/SearchRun.md |
|
|
124
|
+
| grade, score, evaluate, assess session, auto-grade | Grade | workflows/Grade.md |
|
|
125
|
+
| evals, eval set, undertriggering, skill stats, eval generate | Evals | workflows/Evals.md |
|
|
126
|
+
| improve, optimize skills, make skills better, triggers, catch more queries, apply proposal, apply contributor proposal | Improve | workflows/Improve.md |
|
|
127
|
+
| evolve description, description-only evolution, improve trigger wording | Evolve | workflows/Evolve.md |
|
|
128
|
+
| evolve body, evolve routing, full body evolution, rewrite skill, teacher student | EvolveBody | workflows/EvolveBody.md |
|
|
129
|
+
| evolve rollback, undo, restore, revert evolution, go back, undo last change | Rollback | workflows/Rollback.md |
|
|
130
|
+
| watch, monitor, regression, post-deploy, keep an eye on | Watch | workflows/Watch.md |
|
|
131
|
+
| doctor, health, hooks, broken, diagnose, not working, something wrong | Doctor | workflows/Doctor.md |
|
|
132
|
+
| ingest, import, codex logs, opencode, openclaw, pi, wrap codex | Ingest | workflows/Ingest.md |
|
|
133
|
+
| replay, backfill, claude transcripts, historical sessions | Replay | workflows/Replay.md |
|
|
134
|
+
| contributions, sharing preferences, opt in/out creator sharing, approve/revoke contributions | Contributions | workflows/Contributions.md |
|
|
135
|
+
| creator contributions, selftune.contribute.json, enable/disable creator contribution | CreatorContributions | workflows/CreatorContributions.md |
|
|
136
|
+
| signals dashboard, contributor signals, signals page, community dashboard, community data, contributor stats, signal health, how are signals, how is community | SignalsDashboard | workflows/SignalsDashboard.md |
|
|
137
|
+
| contribute, share, export bundle, export data, anonymized, give back | Contribute | workflows/Contribute.md |
|
|
138
|
+
| init, setup, set up, bootstrap, first time, install, configure selftune, alpha, enroll | Initialize | workflows/Initialize.md |
|
|
139
|
+
| cron, schedule, automate evolution, run automatically | Cron | workflows/Cron.md |
|
|
140
|
+
| schedule, selftune schedule, launchd, systemd, crontab, automation setup | Schedule | workflows/Schedule.md |
|
|
141
|
+
| auto-activate, suggestions, activation rules, nag, why suggest | AutoActivation | workflows/AutoActivation.md |
|
|
142
|
+
| dashboard, visual, open dashboard, show dashboard, serve dashboard | Dashboard | workflows/Dashboard.md |
|
|
143
|
+
| evolution memory, session continuity, what happened last | EvolutionMemory | workflows/EvolutionMemory.md |
|
|
144
|
+
| grade baseline, baseline lift, adds value, skill value, no-skill comparison | Baseline | workflows/Baseline.md |
|
|
145
|
+
| eval unit-test, skill test, test skill, generate tests, run tests | UnitTest | workflows/UnitTest.md |
|
|
146
|
+
| eval composability, co-occurrence, skill conflicts, family overlap, sibling confusion | Composability | workflows/Composability.md |
|
|
147
|
+
| eval import, skillsbench, external evals, benchmark tasks | ImportSkillsBench | workflows/ImportSkillsBench.md |
|
|
148
|
+
| telemetry, analytics, disable analytics, opt out, tracking, privacy | Telemetry | workflows/Telemetry.md |
|
|
149
|
+
| orchestrate, autonomous, full loop, improve all skills, run selftune, run selftune loop, run with package search, automatic package improvement | Run | workflows/Run.md |
|
|
150
|
+
| sync, refresh, source truth, rescan sessions | Sync | workflows/Sync.md |
|
|
151
|
+
| badge, readme badge, skill badge, health badge | Badge | workflows/Badge.md |
|
|
152
|
+
| workflows, discover workflows, scaffold workflow skill, build skill from logs | Workflows | workflows/Workflows.md |
|
|
153
|
+
| alpha upload, upload data, send alpha data, manual upload | AlphaUpload | workflows/AlphaUpload.md |
|
|
154
|
+
| recover, rebuild sqlite, recover db, legacy backfill | Recover | workflows/Recover.md |
|
|
155
|
+
| quickstart, getting started, onboard, first time setup, new user | Quickstart | workflows/Quickstart.md |
|
|
156
|
+
| uninstall, remove selftune, clean up, teardown | Uninstall | workflows/Uninstall.md |
|
|
157
|
+
| repair, rebuild usage, fix skill usage, trustworthy usage | RepairSkillUsage | workflows/RepairSkillUsage.md |
|
|
158
|
+
| export canonical, canonical export, canonical telemetry, push payload | ExportCanonical | workflows/ExportCanonical.md |
|
|
159
|
+
| hook, run hook, invoke hook, manual hook, debug hook | Hook | workflows/Hook.md |
|
|
160
|
+
| codex/opencode/cline/pi hooks, platform hooks, non-claude hooks, multi-agent | PlatformHooks | workflows/PlatformHooks.md |
|
|
161
|
+
| registry, distribute, push/install/sync/rollback skill, team skills | Registry | workflows/Registry.md |
|
|
162
|
+
| export, dump, jsonl, export sqlite, debug export | Export | _(direct: `selftune export`)_ |
|
|
163
|
+
| status, health summary, skill health, how are skills, run selftune | Status | _(direct: `selftune status`)_ |
|
|
164
|
+
| last, last session, recent session, what happened | Last | _(direct: `selftune last`)_ |
|
|
165
|
+
|
|
166
|
+
Workflows Grade, Improve, Watch, and Ingest also run autonomously via `selftune orchestrate`.
|
|
167
|
+
When package evaluation evidence exists, `selftune orchestrate` (aliased as `selftune run`)
|
|
168
|
+
can automatically select package-level bounded search instead of description-level evolve.
|
|
121
169
|
|
|
122
170
|
## Interactive Configuration
|
|
123
171
|
|
|
@@ -130,12 +178,27 @@ tier reference, and quick-path rules.
|
|
|
130
178
|
selftune bundles focused agents in `agents/`. Read the relevant agent file and
|
|
131
179
|
follow its instructions — either inline or by spawning a subagent.
|
|
132
180
|
|
|
133
|
-
| Trigger keywords
|
|
134
|
-
|
|
|
135
|
-
| diagnose, root cause, why failing, debug performance | `agents/diagnosis-analyst.md`
|
|
136
|
-
| patterns, conflicts, cross-skill, overlap
|
|
137
|
-
| review evolution, check proposal, safe to deploy
|
|
138
|
-
| set up selftune, integrate, configure project
|
|
181
|
+
| Trigger keywords | Agent file | When to use |
|
|
182
|
+
| ---------------------------------------------------- | ------------------------------ | ------------------------------------------------------------ |
|
|
183
|
+
| diagnose, root cause, why failing, debug performance | `agents/diagnosis-analyst.md` | Recurring low grades or unclear failures after doctor/status |
|
|
184
|
+
| patterns, conflicts, cross-skill, overlap | `agents/pattern-analyst.md` | Skills overlap, misroute, or interfere |
|
|
185
|
+
| review evolution, check proposal, safe to deploy | `agents/evolution-reviewer.md` | Before deploying high-stakes or marginal evolutions |
|
|
186
|
+
| set up selftune, integrate, configure project | `agents/integration-guide.md` | Complex setup: monorepos, multi-skill, mixed-platform |
|
|
187
|
+
|
|
188
|
+
## Advanced Workflows
|
|
189
|
+
|
|
190
|
+
Load these when the user explicitly asks for a low-level step, when the primary
|
|
191
|
+
lifecycle fails, or when debugging needs deeper evidence:
|
|
192
|
+
|
|
193
|
+
- `workflows/Evals.md`
|
|
194
|
+
- `workflows/UnitTest.md`
|
|
195
|
+
- `workflows/Baseline.md`
|
|
196
|
+
- `workflows/Replay.md`
|
|
197
|
+
- `workflows/Watch.md`
|
|
198
|
+
- `workflows/Evolve.md`
|
|
199
|
+
- `workflows/EvolveBody.md`
|
|
200
|
+
- `workflows/Composability.md`
|
|
201
|
+
- `workflows/ImportSkillsBench.md`
|
|
139
202
|
|
|
140
203
|
## Negative Examples
|
|
141
204
|
|
|
@@ -173,16 +236,16 @@ community contribution, signal sharing, opt in creator, creator UUID.
|
|
|
173
236
|
|
|
174
237
|
Load these on demand — do not read unless needed for the current task:
|
|
175
238
|
|
|
176
|
-
| Reference
|
|
177
|
-
|
|
|
178
|
-
| `references/cli-quick-reference.md` | Need exact CLI flags beyond `--help`
|
|
179
|
-
| `references/troubleshooting.md`
|
|
180
|
-
| `references/examples.md`
|
|
181
|
-
| `references/creator-playbook.md`
|
|
182
|
-
| `references/interactive-config.md`
|
|
183
|
-
| `references/grading-methodology.md` | Grading sessions or interpreting grades
|
|
184
|
-
| `references/invocation-taxonomy.md` | Analyzing trigger coverage
|
|
185
|
-
| `references/logs.md`
|
|
186
|
-
| `references/setup-patterns.md`
|
|
187
|
-
| `references/version-history.md`
|
|
188
|
-
| `settings_snippet.json`
|
|
239
|
+
| Reference | When to read |
|
|
240
|
+
| ----------------------------------- | -------------------------------------------------------------------- |
|
|
241
|
+
| `references/cli-quick-reference.md` | Need exact CLI flags beyond `--help` |
|
|
242
|
+
| `references/troubleshooting.md` | Diagnosing common errors |
|
|
243
|
+
| `references/examples.md` | Need step-by-step scenario walkthroughs |
|
|
244
|
+
| `references/creator-playbook.md` | Publishing skills others install; before-ship vs after-ship pipeline |
|
|
245
|
+
| `references/interactive-config.md` | Before mutating workflows |
|
|
246
|
+
| `references/grading-methodology.md` | Grading sessions or interpreting grades |
|
|
247
|
+
| `references/invocation-taxonomy.md` | Analyzing trigger coverage |
|
|
248
|
+
| `references/logs.md` | Parsing or debugging log files |
|
|
249
|
+
| `references/setup-patterns.md` | Complex platform-specific setup |
|
|
250
|
+
| `references/version-history.md` | Checking what changed between versions |
|
|
251
|
+
| `settings_snippet.json` | During initialization |
|
|
@@ -20,9 +20,23 @@ selftune grade baseline --skill <name> --skill-path <path> [--eval-set <path>]
|
|
|
20
20
|
selftune evolve --skill <name> --skill-path <path> [--dry-run] [--validation-mode auto|replay|judge]
|
|
21
21
|
selftune evolve body --skill <name> --skill-path <path> --target <body|routing> [--dry-run]
|
|
22
22
|
selftune evolve rollback --skill <name> --skill-path <path> [--proposal-id <id>]
|
|
23
|
+
selftune improve --skill <name> --skill-path <path> [--scope auto|description|routing|body|package] [--dry-run] [--validation-mode auto|replay|judge]
|
|
24
|
+
|
|
25
|
+
# Create group
|
|
26
|
+
selftune verify --skill-path <path> [--agent AGENT] [--eval-set PATH] [--no-auto-fix] [--json]
|
|
27
|
+
selftune publish --skill-path <path> [--no-watch] [--ignore-watch-alerts] [--json]
|
|
28
|
+
selftune search-run --skill-path <path> [--skill NAME] [--surface routing|body|both] [--max-candidates N] [--agent AGENT] [--eval-set PATH] [--apply-winner] [--json]
|
|
29
|
+
selftune create status --skill-path <path> [--json]
|
|
30
|
+
selftune create init --name <name> --description <text> [--output-dir PATH] [--force] [--json]
|
|
31
|
+
selftune create scaffold --from-workflow <id|index> [--output-dir PATH] [--skill-name NAME] [--description TEXT] [--write] [--force] [--json]
|
|
32
|
+
selftune create check --skill-path <path> [--json]
|
|
33
|
+
selftune create replay --skill-path <path> [--mode routing|package] [--agent AGENT] [--json]
|
|
34
|
+
selftune create baseline --skill-path <path> [--mode routing|package] [--agent AGENT] [--json]
|
|
35
|
+
selftune create report --skill-path <path> [--agent AGENT] [--eval-set PATH] [--json]
|
|
36
|
+
selftune create publish --skill-path <path> [--watch] [--ignore-watch-alerts] [--json]
|
|
23
37
|
|
|
24
38
|
# Eval group
|
|
25
|
-
selftune eval generate --skill <name> [--list-skills] [--stats] [--max N] [--seed N] [--output PATH] [--blend]
|
|
39
|
+
selftune eval generate --skill <name> [--list-skills] [--stats] [--max N] [--seed N] [--output PATH] [--agent AGENT] [--blend]
|
|
26
40
|
selftune eval unit-test --skill <name> --tests <path> [--run-agent] [--generate]
|
|
27
41
|
selftune eval import --dir <path> --skill <name> --output <path> [--match-strategy exact|fuzzy]
|
|
28
42
|
selftune eval composability --skill <name> [--window N] [--telemetry-log <path>]
|
|
@@ -45,6 +59,7 @@ selftune telemetry [status|enable|disable]
|
|
|
45
59
|
selftune export [TABLE...] [--output/-o DIR] [--since DATE]
|
|
46
60
|
|
|
47
61
|
# Autonomous loop
|
|
62
|
+
selftune run [--dry-run] [--review-required] [--auto-approve] [--skill NAME] [--max-skills N] [--recent-window HOURS] [--sync-force] [--max-auto-grade N] [--loop] [--loop-interval SECS]
|
|
48
63
|
selftune orchestrate [--dry-run] [--review-required] [--auto-approve] [--skill NAME] [--max-skills N] [--recent-window HOURS] [--sync-force] [--max-auto-grade N] [--loop] [--loop-interval SECS]
|
|
49
64
|
selftune sync [--since DATE] [--dry-run] [--force] [--no-claude] [--no-codex] [--no-opencode] [--no-openclaw] [--no-pi] [--no-repair] [--json]
|
|
50
65
|
|
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
Use this when you are publishing a skill other people will install.
|
|
4
4
|
|
|
5
5
|
If the user wants the operational step-by-step loop from cold start to deploy,
|
|
6
|
-
route first to `workflows/
|
|
7
|
-
packaging and after-ship interpretation layer around that
|
|
6
|
+
route first to `workflows/Verify.md` and `workflows/Publish.md`. Use this
|
|
7
|
+
reference for the packaging and after-ship interpretation layer around that
|
|
8
|
+
loop.
|
|
8
9
|
|
|
9
10
|
The goal is simple:
|
|
10
11
|
|
|
@@ -39,20 +40,23 @@ Rule of thumb:
|
|
|
39
40
|
|
|
40
41
|
### Cold-start test and deploy the skill before publishing
|
|
41
42
|
|
|
42
|
-
The default
|
|
43
|
+
The default package evaluation pipeline is:
|
|
43
44
|
|
|
44
45
|
```bash
|
|
46
|
+
selftune verify --skill-path path/to/my-skill
|
|
45
47
|
selftune eval generate --skill my-skill
|
|
48
|
+
selftune verify --skill-path path/to/my-skill
|
|
46
49
|
selftune eval unit-test --skill my-skill --generate --skill-path path/to/SKILL.md
|
|
47
|
-
selftune
|
|
48
|
-
selftune
|
|
49
|
-
selftune
|
|
50
|
-
selftune
|
|
50
|
+
selftune verify --skill-path path/to/my-skill
|
|
51
|
+
selftune create replay --skill-path path/to/my-skill --mode package
|
|
52
|
+
selftune create baseline --skill-path path/to/my-skill --mode package
|
|
53
|
+
selftune verify --skill-path path/to/my-skill
|
|
54
|
+
selftune publish --skill-path path/to/my-skill
|
|
51
55
|
```
|
|
52
56
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
57
|
+
`verify` is the front door in that sequence. Evals, unit tests, replay, and
|
|
58
|
+
baseline remain the atomic supporting steps when the draft is still missing
|
|
59
|
+
evidence.
|
|
56
60
|
|
|
57
61
|
The dashboard overview, per-skill report, and `selftune status` all read from that loop and show
|
|
58
62
|
the next missing step directly, then flip to deploy-ready and watching states once the skill is shipped.
|
|
@@ -106,11 +110,28 @@ Actionable threshold today:
|
|
|
106
110
|
- at least `10` total signals
|
|
107
111
|
- at least `3` distinct contributor cohorts
|
|
108
112
|
|
|
113
|
+
### Package-level improvement
|
|
114
|
+
|
|
115
|
+
When a skill has enough package evaluation evidence (accepted frontier
|
|
116
|
+
candidates, canonical package evaluations), `selftune orchestrate` can
|
|
117
|
+
automatically select package-level bounded search instead of description-only
|
|
118
|
+
evolve. You can also trigger this manually:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
selftune improve --skill my-skill --skill-path path/to/SKILL.md --scope package
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Package search generates bounded mutations on routing and body surfaces,
|
|
125
|
+
evaluates them against the accepted frontier parent through the package
|
|
126
|
+
evaluator, and applies the winning candidate. Watch evidence feeds back into
|
|
127
|
+
frontier selection, so post-deploy regressions inform future search runs.
|
|
128
|
+
|
|
109
129
|
### Interpret signal correctly
|
|
110
130
|
|
|
111
131
|
- High missed counts with concentrated categories usually mean the **description/router** is wrong.
|
|
112
132
|
- Low grades with decent trigger rate usually mean the **body/workflow/reference/tool split** is wrong.
|
|
113
133
|
- Low-signal skills need more contributors before you trust a proposal.
|
|
134
|
+
- When both routing and body surfaces show weakness, `selftune improve --scope package` or automatic orchestrate scope selection can address them together.
|
|
114
135
|
|
|
115
136
|
## Fast Checklist
|
|
116
137
|
|
|
@@ -138,20 +138,19 @@ Report the interpretation to the user based on the lift value.
|
|
|
138
138
|
Add `--with-baseline` to evolve commands to prevent wasting evolution
|
|
139
139
|
cycles on skills that don't add value.
|
|
140
140
|
|
|
141
|
-
### 4. Canonical
|
|
141
|
+
### 4. Canonical pipeline position
|
|
142
142
|
|
|
143
|
-
Baseline is the last pre-deploy check in the
|
|
143
|
+
Baseline is the last pre-deploy check in the package evaluation pipeline:
|
|
144
144
|
|
|
145
145
|
```bash
|
|
146
|
-
selftune
|
|
147
|
-
selftune
|
|
148
|
-
selftune
|
|
149
|
-
selftune
|
|
150
|
-
selftune evolve --skill <name> --skill-path <path> --with-baseline
|
|
151
|
-
selftune watch --skill <name>
|
|
146
|
+
selftune verify --skill-path <path>
|
|
147
|
+
selftune create baseline --skill-path <path> --mode package
|
|
148
|
+
selftune verify --skill-path <path>
|
|
149
|
+
selftune publish --skill-path <path>
|
|
152
150
|
```
|
|
153
151
|
|
|
154
|
-
|
|
152
|
+
For already-published skills, `grade baseline` remains the explicit value gate
|
|
153
|
+
behind `evolve --with-baseline`.
|
|
155
154
|
|
|
156
155
|
## Common Patterns
|
|
157
156
|
|
|
@@ -56,16 +56,16 @@ selftune contributions upload [--dry-run] [--retry-failed] [--limit <n>]
|
|
|
56
56
|
|
|
57
57
|
## Automatic Flush via Orchestrate
|
|
58
58
|
|
|
59
|
-
When `selftune
|
|
59
|
+
When `selftune run` runs, it automatically flushes any staged
|
|
60
60
|
creator-directed relay signals as Step 10 (after alpha upload). This means
|
|
61
61
|
users who have opted in don't need to run `selftune contributions upload`
|
|
62
|
-
manually —
|
|
63
|
-
the
|
|
62
|
+
manually — the runtime handles it. The flush is fail-open and never blocks
|
|
63
|
+
the autonomous loop. An API key is required (alpha enrolled).
|
|
64
64
|
|
|
65
65
|
## Notes
|
|
66
66
|
|
|
67
67
|
- This workflow now shows which installed skills are requesting creator-directed sharing via `selftune.contribute.json`.
|
|
68
|
-
- Once approved, creator-directed contribution signals are staged locally during `selftune sync` / `selftune
|
|
68
|
+
- Once approved, creator-directed contribution signals are staged locally during `selftune sync` / `selftune run`.
|
|
69
69
|
- Use `selftune contributions upload` to flush staged rows to the creator-directed relay endpoint.
|
|
70
70
|
- Relay upload is separate from `selftune alpha upload` and currently reuses the local cloud API key when available.
|
|
71
71
|
- Use `selftune contribute` when the user explicitly wants to export/share an anonymized community bundle.
|