selftune 0.2.5 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/README.md +1 -0
  2. package/apps/local-dashboard/dist/assets/index-Bk9vSHHd.js +15 -0
  3. package/apps/local-dashboard/dist/assets/index-CRtLkBTi.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +60 -0
  5. package/apps/local-dashboard/dist/assets/{vendor-table-B7VF2Ipl.js → vendor-table-dK1QMLq9.js} +1 -1
  6. package/apps/local-dashboard/dist/assets/{vendor-ui-r2k_Ku_V.js → vendor-ui-CO2mrx6e.js} +60 -65
  7. package/apps/local-dashboard/dist/index.html +5 -5
  8. package/cli/selftune/activation-rules.ts +30 -9
  9. package/cli/selftune/agent-guidance.ts +96 -0
  10. package/cli/selftune/alpha-identity.ts +157 -0
  11. package/cli/selftune/alpha-upload/build-payloads.ts +151 -0
  12. package/cli/selftune/alpha-upload/client.ts +113 -0
  13. package/cli/selftune/alpha-upload/flush.ts +191 -0
  14. package/cli/selftune/alpha-upload/index.ts +194 -0
  15. package/cli/selftune/alpha-upload/queue.ts +252 -0
  16. package/cli/selftune/alpha-upload/stage-canonical.ts +242 -0
  17. package/cli/selftune/alpha-upload-contract.ts +52 -0
  18. package/cli/selftune/auth/device-code.ts +110 -0
  19. package/cli/selftune/auto-update.ts +130 -0
  20. package/cli/selftune/badge/badge.ts +19 -9
  21. package/cli/selftune/canonical-export.ts +16 -3
  22. package/cli/selftune/constants.ts +28 -8
  23. package/cli/selftune/contribute/bundle.ts +32 -5
  24. package/cli/selftune/dashboard-contract.ts +32 -1
  25. package/cli/selftune/dashboard-server.ts +256 -692
  26. package/cli/selftune/dashboard.ts +1 -1
  27. package/cli/selftune/eval/baseline.ts +11 -7
  28. package/cli/selftune/eval/hooks-to-evals.ts +27 -9
  29. package/cli/selftune/eval/synthetic-evals.ts +54 -1
  30. package/cli/selftune/evolution/audit.ts +24 -19
  31. package/cli/selftune/evolution/constitutional.ts +176 -0
  32. package/cli/selftune/evolution/evidence.ts +18 -13
  33. package/cli/selftune/evolution/evolve-body.ts +104 -7
  34. package/cli/selftune/evolution/evolve.ts +195 -22
  35. package/cli/selftune/evolution/propose-body.ts +18 -1
  36. package/cli/selftune/evolution/propose-description.ts +27 -2
  37. package/cli/selftune/evolution/rollback.ts +11 -15
  38. package/cli/selftune/export.ts +84 -0
  39. package/cli/selftune/grading/auto-grade.ts +13 -4
  40. package/cli/selftune/grading/grade-session.ts +16 -6
  41. package/cli/selftune/hooks/evolution-guard.ts +26 -9
  42. package/cli/selftune/hooks/prompt-log.ts +23 -9
  43. package/cli/selftune/hooks/session-stop.ts +78 -15
  44. package/cli/selftune/hooks/skill-eval.ts +189 -10
  45. package/cli/selftune/index.ts +274 -2
  46. package/cli/selftune/ingestors/claude-replay.ts +48 -21
  47. package/cli/selftune/init.ts +249 -47
  48. package/cli/selftune/last.ts +7 -7
  49. package/cli/selftune/localdb/db.ts +90 -10
  50. package/cli/selftune/localdb/direct-write.ts +531 -0
  51. package/cli/selftune/localdb/materialize.ts +296 -42
  52. package/cli/selftune/localdb/queries.ts +325 -32
  53. package/cli/selftune/localdb/schema.ts +109 -0
  54. package/cli/selftune/monitoring/watch.ts +26 -8
  55. package/cli/selftune/normalization.ts +85 -15
  56. package/cli/selftune/observability.ts +248 -2
  57. package/cli/selftune/orchestrate.ts +165 -20
  58. package/cli/selftune/quickstart.ts +34 -10
  59. package/cli/selftune/repair/skill-usage.ts +12 -2
  60. package/cli/selftune/routes/actions.ts +77 -0
  61. package/cli/selftune/routes/badge.ts +66 -0
  62. package/cli/selftune/routes/doctor.ts +12 -0
  63. package/cli/selftune/routes/index.ts +14 -0
  64. package/cli/selftune/routes/orchestrate-runs.ts +13 -0
  65. package/cli/selftune/routes/overview.ts +14 -0
  66. package/cli/selftune/routes/report.ts +293 -0
  67. package/cli/selftune/routes/skill-report.ts +230 -0
  68. package/cli/selftune/status.ts +203 -7
  69. package/cli/selftune/sync.ts +13 -1
  70. package/cli/selftune/types.ts +50 -0
  71. package/cli/selftune/utils/jsonl.ts +58 -1
  72. package/cli/selftune/utils/selftune-meta.ts +38 -0
  73. package/cli/selftune/utils/skill-log.ts +30 -4
  74. package/cli/selftune/utils/transcript.ts +15 -0
  75. package/cli/selftune/workflows/workflows.ts +7 -6
  76. package/package.json +11 -7
  77. package/packages/telemetry-contract/fixtures/complete-push.ts +184 -0
  78. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +58 -0
  79. package/packages/telemetry-contract/fixtures/golden.json +1 -0
  80. package/packages/telemetry-contract/fixtures/index.ts +4 -0
  81. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +40 -0
  82. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +79 -0
  83. package/packages/telemetry-contract/package.json +6 -1
  84. package/packages/telemetry-contract/src/index.ts +1 -0
  85. package/packages/telemetry-contract/src/schemas.ts +215 -0
  86. package/packages/telemetry-contract/src/types.ts +3 -1
  87. package/packages/telemetry-contract/src/validators.ts +3 -1
  88. package/packages/telemetry-contract/tests/compatibility.test.ts +144 -0
  89. package/packages/ui/package.json +4 -0
  90. package/packages/ui/src/components/ActivityTimeline.tsx +61 -29
  91. package/packages/ui/src/components/section-cards.tsx +31 -14
  92. package/packages/ui/src/types.ts +1 -0
  93. package/skill/SKILL.md +214 -174
  94. package/skill/Workflows/AlphaUpload.md +45 -0
  95. package/skill/Workflows/Baseline.md +18 -12
  96. package/skill/Workflows/Composability.md +3 -3
  97. package/skill/Workflows/Dashboard.md +44 -91
  98. package/skill/Workflows/Doctor.md +93 -66
  99. package/skill/Workflows/Evals.md +49 -40
  100. package/skill/Workflows/Evolve.md +76 -28
  101. package/skill/Workflows/EvolveBody.md +37 -38
  102. package/skill/Workflows/Initialize.md +172 -26
  103. package/skill/Workflows/Orchestrate.md +11 -2
  104. package/skill/Workflows/Sync.md +23 -0
  105. package/skill/Workflows/Watch.md +2 -5
  106. package/skill/agents/diagnosis-analyst.md +163 -0
  107. package/skill/agents/evolution-reviewer.md +149 -0
  108. package/skill/agents/integration-guide.md +154 -0
  109. package/skill/agents/pattern-analyst.md +149 -0
  110. package/skill/assets/multi-skill-settings.json +1 -1
  111. package/skill/assets/single-skill-settings.json +1 -1
  112. package/skill/references/interactive-config.md +39 -0
  113. package/skill/references/invocation-taxonomy.md +34 -0
  114. package/skill/references/logs.md +9 -1
  115. package/skill/references/setup-patterns.md +3 -3
  116. package/skill/settings_snippet.json +1 -1
  117. package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +0 -1
  118. package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +0 -15
  119. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +0 -60
@@ -30,7 +30,13 @@ selftune evolve --skill <name> --skill-path <path> [options]
30
30
  | `--confidence <n>` | Minimum confidence threshold (0-1) | 0.6 |
31
31
  | `--max-iterations <n>` | Maximum retry iterations | 3 |
32
32
  | `--validation-model <model>` | Model for trigger-check validation LLM calls | `haiku` |
33
- | `--cheap-loop` | Use cheap models for loop, expensive for final gate | Off |
33
+ | `--pareto` | Generate multiple candidates per iteration | Off |
34
+ | `--candidates <n>` | Number of candidates per iteration (with `--pareto`) | 3 |
35
+ | `--token-efficiency` | Optimize for token efficiency in proposals | Off |
36
+ | `--with-baseline` | Include a no-skill baseline comparison | Off |
37
+ | `--cheap-loop` | Use cheap models for loop, expensive for final gate | On |
38
+ | `--full-model` | Use full-cost model throughout (disables cheap-loop) | Off |
39
+ | `--verbose` | Print detailed progress during evolution | Off |
34
40
  | `--gate-model <model>` | Model for final gate validation | `sonnet` (when `--cheap-loop`) |
35
41
  | `--proposal-model <model>` | Model for proposal generation LLM calls | None |
36
42
  | `--sync-first` | Refresh source-truth telemetry before generating evals/failure patterns | Off |
@@ -89,34 +95,38 @@ The evolution process writes multiple audit entries:
89
95
 
90
96
  ### 0. Pre-Flight Configuration
91
97
 
92
- Before running the evolve command, present numbered configuration options to the user inline in your response, then wait for the user's answer before proceeding.
98
+ Before running the evolve command, use the `AskUserQuestion` tool to present structured configuration options. If the user responds with "use defaults" or similar shorthand, skip to step 1 using the recommended defaults. If the user cancels, stop and do not continue.
93
99
 
94
- If the user responds with "use defaults", "just run it", or similar shorthand, skip to step 1 using the recommended defaults marked below.
100
+ Use `AskUserQuestion` with these questions (max 4 per call split if needed):
95
101
 
96
- Present the following options inline in your response:
102
+ **Call 1:**
97
103
 
98
- 1. **Execution Mode**
99
- - a) Dry run — preview proposal without deploying (recommended for first run)
100
- - b) Live — validate and deploy if improved
101
-
102
- 2. **Model Tier** (see SKILL.md Model Tier Reference)
103
- - a) Fast (haiku) — cheapest, ~2s/call (recommended with cheap-loop)
104
- - b) Balanced (sonnet) — good quality, ~5s/call
105
- - c) Best (opus) — highest quality, ~10s/call
106
-
107
- 3. **Cost Optimization**
108
- - a) Cheap loop — haiku for iteration, sonnet for final gate (recommended)
109
- - b) Single model — use one model throughout
110
-
111
- 4. **Confidence Threshold:** 0.6 (default, higher = stricter)
112
-
113
- 5. **Max Iterations:** 3 (default, more = longer but better results)
104
+ ```json
105
+ {
106
+ "questions": [
107
+ {
108
+ "question": "Execution Mode",
109
+ "options": ["Dry run — preview without deploying (recommended for first run)", "Live validate and deploy if improved"]
110
+ },
111
+ {
112
+ "question": "Model Tier (see SKILL.md reference)",
113
+ "options": ["Fast (haiku) — cheapest, ~2s/call (recommended with cheap-loop)", "Balanced (sonnet) — good quality, ~5s/call", "Best (opus) — highest quality, ~10s/call"]
114
+ },
115
+ {
116
+ "question": "Cost Optimization",
117
+ "options": ["Cheap loop haiku for iteration, sonnet for final gate (recommended)", "Single model — use one model throughout"]
118
+ },
119
+ {
120
+ "question": "Advanced Options",
121
+ "options": ["Defaults (0.6 confidence, 3 iterations, single candidate) (recommended)", "Stricter (0.7 confidence, 5 iterations)", "Pareto mode (multiple candidates per iteration)"]
122
+ }
123
+ ]
124
+ }
125
+ ```
114
126
 
115
- 6. **Multi-Candidate Selection**
116
- - a) Single candidate — one proposal per iteration (recommended)
117
- - b) Pareto mode — generate multiple candidates, pick best on frontier
127
+ If `AskUserQuestion` is not available, fall back to presenting these as inline numbered options.
118
128
 
119
- Ask: "Reply with your choices (e.g., '1a, 2a, 3a, defaults for rest') or 'use defaults' for recommended settings."
129
+ If the user cancels, stop -- do not proceed with defaults. If the user selects "use defaults", skip to step 1 with recommended defaults.
120
130
 
121
131
  After the user responds, parse their selections and map each choice to the corresponding CLI flags:
122
132
 
@@ -193,6 +203,26 @@ The command groups missed queries by invocation type:
193
203
 
194
204
  See `references/invocation-taxonomy.md` for the taxonomy.
195
205
 
206
+ ### 4b. Constitutional Pre-Validation Gate
207
+
208
+ Before any LLM-based validation, each proposal passes through a
209
+ deterministic constitutional check that rejects obviously bad proposals
210
+ at zero cost. Four principles are enforced:
211
+
212
+ 1. **Size constraint** — description must be ≤1024 characters and within
213
+ 0.3x–3.0x word count of the original.
214
+ 2. **No XML injection** — reject proposals containing XML/HTML tags.
215
+ 3. **No unbounded broadening** — reject bare "all", "any", "every",
216
+ "everything" unless qualified by enumeration markers ("including",
217
+ "such as", "like", "e.g.", or a comma-separated list).
218
+ 4. **Anchor preservation** — if the original contains `USE WHEN` trigger
219
+ phrases or `$skillName` references, those must appear in the proposal.
220
+
221
+ If a proposal fails any principle, it is rejected with a descriptive
222
+ violation message and the pipeline retries (if iterations remain).
223
+
224
+ For body evolution (`evolve body`), only the size constraint applies.
225
+
196
226
  ### 5. Propose Description Changes
197
227
 
198
228
  An LLM generates a candidate description that would catch the missed
@@ -211,6 +241,23 @@ The candidate is tested against the full eval set:
211
241
  If validation fails, the command retries up to `--max-iterations` times
212
242
  with adjusted proposals.
213
243
 
244
+ ### Aggregate Metrics To Report
245
+
246
+ When summarizing an evolution run, include these aggregate metrics rather
247
+ than only saying "passed" or "failed":
248
+
249
+ | Metric | Meaning |
250
+ |--------|---------|
251
+ | `original_pass_rate` | Baseline pass rate before the proposal |
252
+ | `proposed_pass_rate` | Pass rate after applying the proposal |
253
+ | `regression_count` | Eval entries that passed before and failed after |
254
+ | `net_change` | Total passes gained minus regressions introduced |
255
+ | `iteration` / `iterations_used` | Which retry produced the current candidate |
256
+ | `baseline_lift` | Additional lift over the no-skill baseline when `--with-baseline` is enabled |
257
+
258
+ These metrics explain whether the proposal is genuinely better, merely
259
+ different, or too risky to deploy.
260
+
214
261
  ### 7. Deploy (or Preview)
215
262
 
216
263
  If `--dry-run`, the proposal is printed but not deployed. The audit log
@@ -292,10 +339,11 @@ Use `--agent <name>` to override (claude, codex, opencode).
292
339
 
293
340
  ## Subagent Escalation
294
341
 
295
- For high-stakes evolutions, consider spawning the `evolution-reviewer` agent
296
- as a subagent to review the proposal before deploying. This is especially
297
- valuable when the skill has a history of regressions, the evolution touches
298
- many trigger phrases, or the confidence score is near the threshold.
342
+ For high-stakes evolutions, read `skill/agents/evolution-reviewer.md` and spawn a
343
+ subagent with those instructions to review the proposal before deploying.
344
+ This is especially valuable when the skill has a history of regressions,
345
+ the evolution touches many trigger phrases, or the confidence score is near
346
+ the threshold.
299
347
 
300
348
  ## Autonomous Mode
301
349
 
@@ -16,7 +16,7 @@ selftune evolve body --skill <name> --skill-path <path> --target <target> [optio
16
16
  |------|-------------|---------|
17
17
  | `--skill <name>` | Skill name | Required |
18
18
  | `--skill-path <path>` | Path to the skill's SKILL.md | Required |
19
- | `--target <type>` | Evolution target: `routing_table` or `full_body` | Required |
19
+ | `--target <type>` | Evolution target: `routing` or `body` | Required |
20
20
  | `--teacher-agent <name>` | Agent CLI for proposal generation | Auto-detected |
21
21
  | `--student-agent <name>` | Agent CLI for validation | Same as teacher |
22
22
  | `--teacher-model <flag>` | Model flag for teacher (e.g. `opus`) | Agent default |
@@ -30,13 +30,13 @@ selftune evolve body --skill <name> --skill-path <path> --target <target> [optio
30
30
 
31
31
  ## Evolution Targets
32
32
 
33
- ### `routing_table`
33
+ ### `routing`
34
34
 
35
35
  Optimizes the `## Workflow Routing` markdown table in SKILL.md. The teacher
36
36
  LLM analyzes missed triggers and proposes new routing entries that map
37
37
  trigger keywords to the correct workflow files.
38
38
 
39
- ### `full_body`
39
+ ### `body`
40
40
 
41
41
  Rewrites the entire SKILL.md body below the frontmatter. This includes
42
42
  the description, routing table, examples, and all other sections. The
@@ -59,42 +59,41 @@ a refined proposal. This repeats up to `--max-iterations` times.
59
59
 
60
60
  ### 0. Pre-Flight Configuration
61
61
 
62
- Before running evolve-body, present configuration options to the user.
63
- If the user says "use defaults" or similar, skip to step 1 with recommended defaults.
64
-
65
- Present these options:
66
-
62
+ Before running evolve-body, use the `AskUserQuestion` tool to present structured configuration options.
63
+ If the user says "use defaults" or similar, skip to step 1 with recommended defaults. If the user cancels, abort the workflow -- do not proceed with defaults.
64
+
65
+ Use `AskUserQuestion` with these questions:
66
+
67
+ ```json
68
+ {
69
+ "questions": [
70
+ {
71
+ "question": "Evolution Target",
72
+ "options": ["Routing table — optimize workflow routing only (recommended)", "Full body — rewrite entire SKILL.md (more aggressive)"]
73
+ },
74
+ {
75
+ "question": "Execution Mode",
76
+ "options": ["Dry run — preview without deploying (recommended)", "Live — validate and deploy if improved"]
77
+ },
78
+ {
79
+ "question": "Teacher Model (generates proposals)",
80
+ "options": ["Balanced (sonnet) — good quality (recommended)", "Best (opus) — highest quality, slower"]
81
+ },
82
+ {
83
+ "question": "Student Model & Iterations",
84
+ "options": ["Fast (haiku) + 3 iterations (recommended)", "Balanced (sonnet) + 3 iterations", "Fast (haiku) + 5 iterations"]
85
+ }
86
+ ]
87
+ }
67
88
  ```
68
- selftune evolve body — Pre-Flight Configuration
69
-
70
- 1. Evolution Target
71
- a) Routing table — optimize the workflow routing table only
72
- b) Full body — rewrite entire SKILL.md body (more aggressive)
73
-
74
- 2. Execution Mode
75
- a) Dry run — preview proposal without deploying (recommended)
76
- b) Live — validate and deploy if improved
77
-
78
- 3. Teacher Model (generates proposals)
79
- a) Balanced (sonnet) — good quality proposals (recommended)
80
- b) Best (opus) — highest quality, slower and more expensive
81
89
 
82
- 4. Student Model (validates proposals)
83
- a) Fast (haiku) — cheap validation (recommended)
84
- b) Balanced (sonnet) — higher quality validation
85
-
86
- 5. Max Iterations: [3] (default)
87
-
88
- 6. Few-Shot Examples: [none] (paths to example SKILL.md files for guidance)
89
-
90
- → Reply with your choices or "use defaults" for recommended settings.
91
- ```
90
+ If `AskUserQuestion` is not available, fall back to presenting these as inline numbered options.
92
91
 
93
92
  After the user responds, show a confirmation summary:
94
93
 
95
94
  ```
96
95
  Configuration Summary:
97
- Target: routing_table
96
+ Target: routing
98
97
  Mode: dry-run
99
98
  Teacher model: sonnet
100
99
  Student model: haiku
@@ -126,8 +125,8 @@ pipeline. See `references/invocation-taxonomy.md`.
126
125
  ### 4. Generate Proposal (Teacher)
127
126
 
128
127
  The teacher LLM generates a proposal based on the target:
129
- - **routing_table**: Optimized `## Workflow Routing` markdown table
130
- - **full_body**: Complete SKILL.md body replacement
128
+ - **routing**: Optimized `## Workflow Routing` markdown table
129
+ - **body**: Complete SKILL.md body replacement
131
130
 
132
131
  Few-shot examples from `--few-shot` paths provide structural guidance.
133
132
 
@@ -140,20 +139,20 @@ failure details and generates a refined proposal.
140
139
 
141
140
  If `--dry-run`, prints the proposal without deploying. Otherwise:
142
141
  1. Creates a timestamped backup of the current SKILL.md
143
- 2. Applies the change: `replaceSection()` for routing, `replaceBody()` for full_body
142
+ 2. Applies the change: `replaceSection()` for routing, `replaceBody()` for body
144
143
  3. Records audit entries
145
144
  4. Updates evolution memory
146
145
 
147
146
  ## Common Patterns
148
147
 
149
148
  **"Evolve the routing table for the Research skill"**
150
- > `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target routing_table`
149
+ > `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target routing`
151
150
 
152
151
  **"Rewrite the entire skill body"**
153
- > `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target full_body --dry-run`
152
+ > `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target body --dry-run`
154
153
 
155
154
  **"Use a stronger model for generation"**
156
- > `selftune evolve body --skill pptx --skill-path /path/SKILL.md --target full_body --teacher-model opus --student-model haiku`
155
+ > `selftune evolve body --skill pptx --skill-path /path/SKILL.md --target body --teacher-model opus --student-model haiku`
157
156
 
158
157
  **"Preview what would change"**
159
158
  > Always start with `--dry-run` to review the proposal before deploying.
@@ -12,15 +12,24 @@ Bootstrap selftune for first-time use or after changing environments.
12
12
 
13
13
  ```bash
14
14
  selftune init [--agent <type>] [--cli-path <path>] [--force]
15
+ selftune init --alpha --alpha-email <email> [--alpha-name "Name"] [--force]
16
+ selftune init --no-alpha [--force]
15
17
  ```
16
18
 
17
19
  ## Options
18
20
 
19
21
  | Flag | Description | Default |
20
22
  |------|-------------|---------|
21
- | `--agent <type>` | Agent platform: `claude`, `codex`, `opencode` | Auto-detected |
23
+ | `--agent <type>` | Agent platform: `claude_code`, `codex`, `opencode`, `openclaw` | Auto-detected |
22
24
  | `--cli-path <path>` | Override auto-detected CLI entry-point path | Auto-detected |
23
25
  | `--force` | Reinitialize even if config already exists | Off |
26
+ | `--enable-autonomy` | Enable autonomous scheduling during init | Off |
27
+ | `--schedule-format <fmt>` | Schedule format: `cron`, `launchd`, `systemd` | Auto-detected |
28
+ | `--alpha` | Enroll in the selftune alpha program | Off |
29
+ | `--no-alpha` | Unenroll from the alpha program (preserves user_id) | Off |
30
+ | `--alpha-email <email>` | Email for alpha enrollment (required with `--alpha`) | - |
31
+ | `--alpha-name <name>` | Display name for alpha enrollment | - |
32
+ | `--alpha-key <key>` | API key for cloud uploads (`st_live_*` format) | - |
24
33
 
25
34
  ## Output Format
26
35
 
@@ -28,12 +37,19 @@ Creates `~/.selftune/config.json`:
28
37
 
29
38
  ```json
30
39
  {
31
- "agent_type": "claude",
40
+ "agent_type": "claude_code",
32
41
  "cli_path": "/Users/you/selftune/cli/selftune/index.ts",
33
42
  "llm_mode": "agent",
34
43
  "agent_cli": "claude",
35
44
  "hooks_installed": true,
36
- "initialized_at": "2026-02-28T10:00:00Z"
45
+ "initialized_at": "2026-02-28T10:00:00Z",
46
+ "alpha": {
47
+ "enrolled": true,
48
+ "user_id": "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
49
+ "email": "user@example.com",
50
+ "display_name": "User Name",
51
+ "consent_timestamp": "2026-02-28T10:00:00Z"
52
+ }
37
53
  }
38
54
  ```
39
55
 
@@ -47,6 +63,12 @@ Creates `~/.selftune/config.json`:
47
63
  | `agent_cli` | string | CLI binary name for the detected agent |
48
64
  | `hooks_installed` | boolean | Whether telemetry hooks are installed |
49
65
  | `initialized_at` | string | ISO 8601 timestamp |
66
+ | `alpha` | object? | Alpha program enrollment (present only if enrolled) |
67
+ | `alpha.enrolled` | boolean | Whether the user is currently enrolled |
68
+ | `alpha.user_id` | string | Stable UUID, generated once, preserved across reinits |
69
+ | `alpha.email` | string? | Email provided at enrollment |
70
+ | `alpha.display_name` | string? | Optional display name |
71
+ | `alpha.consent_timestamp` | string | ISO 8601 timestamp of consent |
50
72
 
51
73
  ## Steps
52
74
 
@@ -98,7 +120,7 @@ The init output will report what was installed, e.g.:
98
120
  | `UserPromptSubmit` | `hooks/auto-activate.ts` | Suggest skills before prompt processing |
99
121
  | `PreToolUse` (Write/Edit) | `hooks/skill-change-guard.ts` | Detect uncontrolled skill edits |
100
122
  | `PreToolUse` (Write/Edit) | `hooks/evolution-guard.ts` | Block SKILL.md edits on monitored skills |
101
- | `PostToolUse` (Read) | `hooks/skill-eval.ts` | Track skill triggers |
123
+ | `PostToolUse` (Read/Skill) | `hooks/skill-eval.ts` | Track skill triggers and Skill tool invocations |
102
124
  | `Stop` | `hooks/session-stop.ts` | Capture session telemetry |
103
125
 
104
126
  **Codex agents:**
@@ -135,21 +157,7 @@ The activation rules file configures auto-activation behavior -- which skills
135
157
  get suggested and under what conditions. Edit `~/.selftune/activation-rules.json`
136
158
  to customize thresholds and skill mappings for your project.
137
159
 
138
- ### 7. Verify Agent Availability
139
-
140
- `selftune init` installs the specialized agent files to `~/.claude/agents/`
141
- automatically. Verify they are present:
142
-
143
- ```bash
144
- ls ~/.claude/agents/
145
- ```
146
-
147
- Expected agents: `diagnosis-analyst.md`, `pattern-analyst.md`,
148
- `evolution-reviewer.md`, `integration-guide.md`. These are used by evolve
149
- and doctor workflows for deeper analysis. If missing, run `selftune init --force`
150
- to reinstall them.
151
-
152
- ### 8. Verify with Doctor
160
+ ### 7. Verify with Doctor
153
161
 
154
162
  ```bash
155
163
  selftune doctor
@@ -163,17 +171,150 @@ reported issues before proceeding.
163
171
  For project-type-specific setup (single-skill, multi-skill, monorepo, Codex,
164
172
  OpenCode, mixed agents), see [docs/integration-guide.md](../../docs/integration-guide.md).
165
173
 
166
- Templates for each project type are in the `templates/` directory:
167
- - `templates/single-skill-settings.json` — hooks for single-skill projects
168
- - `templates/multi-skill-settings.json` — hooks for multi-skill projects with activation rules
169
- - `templates/activation-rules-default.json` — default auto-activation rule configuration
174
+ Templates for each project type are bundled with the skill:
175
+ - `skill/settings_snippet.json` — hooks for Claude Code projects
176
+ - `assets/activation-rules-default.json` — default auto-activation rule configuration
170
177
 
171
178
  ## Subagent Escalation
172
179
 
173
180
  For complex project structures (monorepos, multi-skill repos, mixed agent
174
- platforms), spawn the `integration-guide` agent as a subagent for guided
175
- setup. This agent handles project-type detection, per-package configuration,
176
- and verification steps that go beyond what the basic init workflow covers.
181
+ platforms), read `agents/integration-guide.md` and spawn a subagent with
182
+ those instructions. That agent handles project-type detection, per-package
183
+ configuration, and verification steps that go beyond what the basic init
184
+ workflow covers.
185
+
186
+ ## Alpha Enrollment
187
+
188
+ Enroll the user in the selftune alpha program for early access features.
189
+
190
+ Before running the alpha command:
191
+ 1. Ask whether the user wants to opt into the selftune alpha data-sharing program
192
+ 2. If they opt in, ask for their email and optional display name
193
+ 3. If they decline, skip alpha enrollment and continue with plain `selftune init`
194
+
195
+ The CLI stays non-interactive. The agent is responsible for collecting consent
196
+ and the required `--alpha-email` value before invoking the command.
197
+
198
+ ## Alpha Enrollment (Agent-First Flow)
199
+
200
+ The alpha program sends canonical telemetry to the selftune cloud for analysis.
201
+ Setup is agent-first — the cloud app is a one-time control-plane handoff, not the main UX.
202
+
203
+ ### Setup Sequence
204
+
205
+ 1. **Check local config**: Run `selftune status` — look for the "Alpha Upload" section
206
+ 2. **If not linked**: Tell the user:
207
+ > To join the selftune alpha program, you need to create an account at https://app.selftune.dev and issue an upload credential. This is a one-time step — afterwards everything runs locally through the CLI.
208
+ 3. **User completes cloud enrollment**: Signs in, enrolls, copies the `st_live_*` credential
209
+ 4. **Store credential locally**:
210
+
211
+ ```bash
212
+ selftune init --alpha --alpha-email <user-email> --alpha-key <st_live_credential>
213
+ ```
214
+
215
+ 5. **Verify readiness**: The init command prints a readiness check. If all checks pass, alpha upload is active.
216
+ The readiness JSON now includes a `guidance` object with:
217
+ - `message`
218
+ - `next_command`
219
+ - `suggested_commands[]`
220
+ - `blocking`
221
+ 6. **If readiness fails**: Run `selftune doctor` to diagnose. Common issues:
222
+ - `api_key not set` → re-run init with `--alpha-key`
223
+ - `api_key has invalid format` → credential must start with `st_live_` or `st_test_`
224
+ - `not enrolled` → re-run init with `--alpha --alpha-email <email> --alpha-key <key>`
225
+
226
+ ### Key Principle
227
+
228
+ The cloud app is used **only** for:
229
+ - Sign-in
230
+ - Alpha enrollment
231
+ - Upload credential issuance
232
+
233
+ All other selftune operations happen through the local CLI and this agent.
234
+
235
+ ### Enroll
236
+
237
+ ```bash
238
+ selftune init --alpha --alpha-email user@example.com --alpha-name "User Name" --force
239
+ selftune init --alpha-key st_live_abc123... # after enrollment, store the API key
240
+ ```
241
+
242
+ The `--alpha-email` flag is required. The command will:
243
+ 1. Generate a stable UUID (preserved across reinits)
244
+ 2. Write the alpha block to `~/.selftune/config.json`
245
+ 3. Print an `alpha_enrolled` JSON message to stdout
246
+ 4. Print the consent notice to stderr
247
+ 5. If an `--alpha-key` is provided, chmod `~/.selftune/config.json` to `0600`
248
+
249
+ The consent notice explicitly states that the friendly alpha cohort shares raw
250
+ prompt/query text in addition to skill/session/evolution metadata.
251
+
252
+ ### API Key Provisioning
253
+
254
+ After enrollment, users need to configure an API key for cloud uploads:
255
+
256
+ 1. Create a cloud account at the selftune web app
257
+ 2. Generate an API key (format: `st_live_*`)
258
+ 3. Store the key locally:
259
+
260
+ ```bash
261
+ selftune init --alpha --alpha-email <email> --alpha-key st_live_abc123... --force
262
+ ```
263
+
264
+ Without an API key, alpha enrollment is recorded locally but no uploads are attempted. When a key is stored, selftune tightens the local config file permissions to `0600`.
265
+
266
+ ### Upload Behavior
267
+
268
+ Once enrolled and an API key is configured, `selftune orchestrate` automatically
269
+ uploads new session, invocation, and evolution data to the cloud API at the end of
270
+ each run. This upload step is fail-open -- errors never block the orchestrate loop.
271
+ Use `selftune alpha upload` for manual uploads or `selftune alpha upload --dry-run`
272
+ to preview what would be sent.
273
+
274
+ The upload endpoint is `https://api.selftune.dev/api/v1/push`, authenticated with
275
+ the stored API key via `Authorization: Bearer` header. The endpoint can be
276
+ overridden with the `SELFTUNE_ALPHA_ENDPOINT` environment variable.
277
+
278
+ ### Unenroll
279
+
280
+ ```bash
281
+ selftune init --no-alpha --force
282
+ ```
283
+
284
+ Sets `enrolled: false` in the alpha block but preserves the `user_id` so re-enrollment does not create a new identity.
285
+
286
+ ### Error Handling
287
+
288
+ If `--alpha` is passed without `--alpha-email`, the CLI throws a JSON error:
289
+
290
+ ```json
291
+ {
292
+ "code": "alpha_email_required",
293
+ "error": "alpha_email_required",
294
+ "message": "The --alpha-email flag is required for alpha enrollment.",
295
+ "next_command": "selftune init --alpha --alpha-email <email>",
296
+ "suggested_commands": ["selftune status", "selftune doctor"],
297
+ "blocking": true
298
+ }
299
+ ```
300
+
301
+ When alpha readiness is evaluated after `selftune init --alpha`, the CLI emits:
302
+
303
+ ```json
304
+ {
305
+ "alpha_readiness": {
306
+ "ready": false,
307
+ "missing": ["api_key not set"],
308
+ "guidance": {
309
+ "code": "alpha_credential_required",
310
+ "message": "Alpha enrollment exists, but the local upload credential is missing or invalid.",
311
+ "next_command": "selftune init --alpha --alpha-email user@example.com --alpha-key <st_live_key> --force",
312
+ "suggested_commands": ["selftune status", "selftune doctor"],
313
+ "blocking": true
314
+ }
315
+ }
316
+ }
317
+ ```
177
318
 
178
319
  ## Common Patterns
179
320
 
@@ -182,6 +323,11 @@ and verification steps that go beyond what the basic init workflow covers.
182
323
  > `npm install -g selftune`. Run `selftune init`, then verify with
183
324
  > `selftune doctor`. Report results to the user.
184
325
 
326
+ **User wants alpha enrollment**
327
+ > Ask whether they want to opt into alpha data sharing. If yes, collect email
328
+ > and optional display name, then run `selftune init --alpha --alpha-email ...`.
329
+ > If no, continue with plain `selftune init`.
330
+
185
331
  **Hooks not capturing data**
186
332
  > Run `selftune doctor` to check hook installation. Parse the JSON output
187
333
  > for failed hook checks. If paths are wrong, update
@@ -26,10 +26,13 @@ selftune orchestrate
26
26
  |------|-------------|---------|
27
27
  | `--dry-run` | Plan and validate without deploying changes | Off |
28
28
  | `--review-required` | Keep validated changes in review mode instead of deploying | Off |
29
+ | `--auto-approve` | *(Deprecated)* Autonomous mode is now the default | — |
29
30
  | `--skill <name>` | Limit the loop to one skill | All skills |
30
- | `--max-skills <n>` | Cap how many candidates are processed in one run | `3` |
31
- | `--recent-window <hours>` | Window for post-deploy watch/rollback checks | `24` |
31
+ | `--max-skills <n>` | Cap how many candidates are processed in one run | `5` |
32
+ | `--recent-window <hours>` | Window for post-deploy watch/rollback checks | `48` |
32
33
  | `--sync-force` | Force a full source replay before candidate selection | Off |
34
+ | `--loop` | Run as a long-lived process that cycles continuously | Off |
35
+ | `--loop-interval <seconds>` | Pause between cycles (minimum 60) | `3600` |
33
36
 
34
37
  ## Default Behavior
35
38
 
@@ -133,6 +136,12 @@ In autonomous mode, orchestrate calls sub-workflows in this fixed order:
133
136
  2. **Status** — compute skill health using existing grade results (reads `grading.json` outputs from previous sessions)
134
137
  3. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
135
138
  4. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
139
+ 5. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
140
+
141
+ Between candidate selection and evolution, orchestrate checks for
142
+ **cross-skill eval set overlap**. When two or more evolution candidates
143
+ share >30% of their positive eval queries, a warning is logged to stderr.
144
+ This is an informational diagnostic only — it does not block evolution.
136
145
 
137
146
  All sub-workflows run with defaults and no user interaction. The safety
138
147
  model relies on regression thresholds, automatic rollback, and SKILL.md
@@ -29,6 +29,7 @@ selftune sync
29
29
  | `--no-opencode` | Skip OpenCode ingest |
30
30
  | `--no-openclaw` | Skip OpenClaw ingest |
31
31
  | `--no-repair` | Skip rebuilding `skill_usage_repaired.jsonl` |
32
+ | `--json` | Output results as JSON |
32
33
 
33
34
  ## Output
34
35
 
@@ -66,6 +67,28 @@ After sync completes, proceed with the user's intended workflow:
66
67
  `selftune status`, `selftune dashboard`, `selftune watch --sync-first`,
67
68
  or `selftune evolve --sync-first`.
68
69
 
70
+ ## `--json` Usage
71
+
72
+ ```bash
73
+ selftune sync --json
74
+ ```
75
+
76
+ Sample output:
77
+
78
+ ```json
79
+ {
80
+ "sources": {
81
+ "claude": { "scanned": 12, "synced": 3, "skipped": 9 },
82
+ "codex": { "scanned": 0, "synced": 0, "skipped": 0 }
83
+ },
84
+ "repaired": { "total": 42 },
85
+ "errors": []
86
+ }
87
+ ```
88
+
89
+ Use `--json` when the agent needs to parse sync results programmatically
90
+ (e.g., to decide whether to proceed with evolution or surface counts to the user).
91
+
69
92
  ## Common Patterns
70
93
 
71
94
  **User wants to refresh telemetry data**
@@ -17,8 +17,9 @@ selftune watch --skill <name> --skill-path <path> [options]
17
17
  | `--skill-path <path>` | Path to the skill's SKILL.md | Required |
18
18
  | `--window <n>` | Sliding window size (number of sessions) | 20 |
19
19
  | `--threshold <n>` | Regression threshold (drop from baseline) | 0.1 |
20
- | `--baseline <n>` | Explicit baseline pass rate (0-1) | Auto-detected from last deploy |
21
20
  | `--auto-rollback` | Automatically rollback on detected regression | Off |
21
+ | `--sync-first` | Refresh source-truth telemetry before evaluating | Off |
22
+ | `--sync-force` | Force a full source rescan during `--sync-first` | Off |
22
23
 
23
24
  ## Output Format
24
25
 
@@ -138,10 +139,6 @@ context window resets before the user acts on the results.
138
139
  > Use `--auto-rollback`. The command will restore the previous description
139
140
  > automatically if pass rate drops below baseline minus threshold.
140
141
 
141
- **"Set a custom baseline"**
142
- > Use `--baseline 0.85` to override auto-detection. Useful when the
143
- > auto-detected baseline is from an older evolution.
144
-
145
142
  ## Autonomous Mode
146
143
 
147
144
  When called by `selftune orchestrate`, watch runs automatically on recently