selftune 0.2.6 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/README.md +1 -0
  2. package/apps/local-dashboard/dist/assets/index-Bs3Y4ixf.css +1 -0
  3. package/apps/local-dashboard/dist/assets/index-C4UYGWKr.js +15 -0
  4. package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +60 -0
  5. package/apps/local-dashboard/dist/assets/{vendor-table-B7VF2Ipl.js → vendor-table-dK1QMLq9.js} +1 -1
  6. package/apps/local-dashboard/dist/assets/{vendor-ui-r2k_Ku_V.js → vendor-ui-CO2mrx6e.js} +60 -65
  7. package/apps/local-dashboard/dist/index.html +5 -5
  8. package/cli/selftune/activation-rules.ts +57 -18
  9. package/cli/selftune/agent-guidance.ts +96 -0
  10. package/cli/selftune/alpha-identity.ts +156 -0
  11. package/cli/selftune/alpha-upload/build-payloads.ts +151 -0
  12. package/cli/selftune/alpha-upload/client.ts +113 -0
  13. package/cli/selftune/alpha-upload/flush.ts +191 -0
  14. package/cli/selftune/alpha-upload/index.ts +194 -0
  15. package/cli/selftune/alpha-upload/queue.ts +252 -0
  16. package/cli/selftune/alpha-upload/stage-canonical.ts +251 -0
  17. package/cli/selftune/alpha-upload-contract.ts +52 -0
  18. package/cli/selftune/auth/device-code.ts +110 -0
  19. package/cli/selftune/auto-update.ts +130 -0
  20. package/cli/selftune/badge/badge.ts +19 -9
  21. package/cli/selftune/canonical-export.ts +16 -3
  22. package/cli/selftune/constants.ts +28 -8
  23. package/cli/selftune/contribute/bundle.ts +33 -5
  24. package/cli/selftune/dashboard-contract.ts +32 -1
  25. package/cli/selftune/dashboard-server.ts +215 -693
  26. package/cli/selftune/dashboard.ts +1 -1
  27. package/cli/selftune/eval/baseline.ts +11 -7
  28. package/cli/selftune/eval/hooks-to-evals.ts +39 -15
  29. package/cli/selftune/eval/synthetic-evals.ts +54 -1
  30. package/cli/selftune/evolution/audit.ts +24 -19
  31. package/cli/selftune/evolution/constitutional.ts +176 -0
  32. package/cli/selftune/evolution/evidence.ts +18 -13
  33. package/cli/selftune/evolution/evolve-body.ts +104 -7
  34. package/cli/selftune/evolution/evolve.ts +195 -22
  35. package/cli/selftune/evolution/propose-body.ts +18 -1
  36. package/cli/selftune/evolution/propose-description.ts +27 -2
  37. package/cli/selftune/evolution/rollback.ts +11 -15
  38. package/cli/selftune/export.ts +84 -0
  39. package/cli/selftune/grading/auto-grade.ts +14 -4
  40. package/cli/selftune/grading/grade-session.ts +17 -6
  41. package/cli/selftune/hooks/auto-activate.ts +5 -0
  42. package/cli/selftune/hooks/evolution-guard.ts +25 -11
  43. package/cli/selftune/hooks/prompt-log.ts +23 -9
  44. package/cli/selftune/hooks/session-stop.ts +78 -15
  45. package/cli/selftune/hooks/skill-eval.ts +189 -10
  46. package/cli/selftune/index.ts +274 -2
  47. package/cli/selftune/ingestors/claude-replay.ts +48 -21
  48. package/cli/selftune/init.ts +260 -49
  49. package/cli/selftune/last.ts +7 -7
  50. package/cli/selftune/localdb/db.ts +90 -10
  51. package/cli/selftune/localdb/direct-write.ts +573 -0
  52. package/cli/selftune/localdb/materialize.ts +296 -42
  53. package/cli/selftune/localdb/queries.ts +482 -32
  54. package/cli/selftune/localdb/schema.ts +153 -1
  55. package/cli/selftune/monitoring/watch.ts +27 -8
  56. package/cli/selftune/normalization.ts +88 -15
  57. package/cli/selftune/observability.ts +257 -5
  58. package/cli/selftune/orchestrate.ts +176 -53
  59. package/cli/selftune/quickstart.ts +34 -10
  60. package/cli/selftune/repair/skill-usage.ts +15 -2
  61. package/cli/selftune/routes/actions.ts +77 -0
  62. package/cli/selftune/routes/badge.ts +66 -0
  63. package/cli/selftune/routes/doctor.ts +12 -0
  64. package/cli/selftune/routes/index.ts +14 -0
  65. package/cli/selftune/routes/orchestrate-runs.ts +13 -0
  66. package/cli/selftune/routes/overview.ts +14 -0
  67. package/cli/selftune/routes/report.ts +293 -0
  68. package/cli/selftune/routes/skill-report.ts +230 -0
  69. package/cli/selftune/status.ts +203 -7
  70. package/cli/selftune/sync.ts +14 -1
  71. package/cli/selftune/types.ts +52 -2
  72. package/cli/selftune/utils/jsonl.ts +58 -1
  73. package/cli/selftune/utils/selftune-meta.ts +38 -0
  74. package/cli/selftune/utils/skill-log.ts +30 -4
  75. package/cli/selftune/utils/transcript.ts +15 -0
  76. package/cli/selftune/workflows/workflows.ts +7 -6
  77. package/package.json +11 -6
  78. package/packages/telemetry-contract/fixtures/complete-push.ts +184 -0
  79. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +58 -0
  80. package/packages/telemetry-contract/fixtures/golden.json +1 -0
  81. package/packages/telemetry-contract/fixtures/index.ts +4 -0
  82. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +40 -0
  83. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +79 -0
  84. package/packages/telemetry-contract/package.json +6 -1
  85. package/packages/telemetry-contract/src/schemas.ts +196 -0
  86. package/packages/telemetry-contract/src/types.ts +3 -1
  87. package/packages/telemetry-contract/src/validators.ts +3 -1
  88. package/packages/telemetry-contract/tests/compatibility.test.ts +144 -0
  89. package/packages/ui/package.json +4 -0
  90. package/packages/ui/src/components/ActivityTimeline.tsx +61 -29
  91. package/packages/ui/src/components/section-cards.tsx +31 -14
  92. package/packages/ui/src/types.ts +1 -0
  93. package/skill/SKILL.md +214 -174
  94. package/skill/Workflows/AlphaUpload.md +45 -0
  95. package/skill/Workflows/Baseline.md +18 -12
  96. package/skill/Workflows/Composability.md +3 -3
  97. package/skill/Workflows/Dashboard.md +39 -91
  98. package/skill/Workflows/Doctor.md +93 -66
  99. package/skill/Workflows/Evals.md +49 -40
  100. package/skill/Workflows/Evolve.md +76 -28
  101. package/skill/Workflows/EvolveBody.md +37 -38
  102. package/skill/Workflows/Initialize.md +145 -26
  103. package/skill/Workflows/Orchestrate.md +11 -2
  104. package/skill/Workflows/Sync.md +23 -0
  105. package/skill/Workflows/Watch.md +2 -5
  106. package/skill/agents/diagnosis-analyst.md +163 -0
  107. package/skill/agents/evolution-reviewer.md +149 -0
  108. package/skill/agents/integration-guide.md +154 -0
  109. package/skill/agents/pattern-analyst.md +149 -0
  110. package/skill/assets/multi-skill-settings.json +1 -1
  111. package/skill/assets/single-skill-settings.json +1 -1
  112. package/skill/references/interactive-config.md +39 -0
  113. package/skill/references/invocation-taxonomy.md +34 -0
  114. package/skill/references/logs.md +15 -1
  115. package/skill/references/setup-patterns.md +3 -3
  116. package/skill/settings_snippet.json +1 -1
  117. package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +0 -1
  118. package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +0 -15
  119. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +0 -60
@@ -30,7 +30,13 @@ selftune evolve --skill <name> --skill-path <path> [options]
30
30
  | `--confidence <n>` | Minimum confidence threshold (0-1) | 0.6 |
31
31
  | `--max-iterations <n>` | Maximum retry iterations | 3 |
32
32
  | `--validation-model <model>` | Model for trigger-check validation LLM calls | `haiku` |
33
- | `--cheap-loop` | Use cheap models for loop, expensive for final gate | Off |
33
+ | `--pareto` | Generate multiple candidates per iteration | Off |
34
+ | `--candidates <n>` | Number of candidates per iteration (with `--pareto`) | 3 |
35
+ | `--token-efficiency` | Optimize for token efficiency in proposals | Off |
36
+ | `--with-baseline` | Include a no-skill baseline comparison | Off |
37
+ | `--cheap-loop` | Use cheap models for loop, expensive for final gate | On |
38
+ | `--full-model` | Use full-cost model throughout (disables cheap-loop) | Off |
39
+ | `--verbose` | Print detailed progress during evolution | Off |
34
40
  | `--gate-model <model>` | Model for final gate validation | `sonnet` (when `--cheap-loop`) |
35
41
  | `--proposal-model <model>` | Model for proposal generation LLM calls | None |
36
42
  | `--sync-first` | Refresh source-truth telemetry before generating evals/failure patterns | Off |
@@ -89,34 +95,38 @@ The evolution process writes multiple audit entries:
89
95
 
90
96
  ### 0. Pre-Flight Configuration
91
97
 
92
- Before running the evolve command, present numbered configuration options to the user inline in your response, then wait for the user's answer before proceeding.
98
+ Before running the evolve command, use the `AskUserQuestion` tool to present structured configuration options. If the user responds with "use defaults" or similar shorthand, skip to step 1 using the recommended defaults. If the user cancels, stop and do not continue.
93
99
 
94
- If the user responds with "use defaults", "just run it", or similar shorthand, skip to step 1 using the recommended defaults marked below.
100
+ Use `AskUserQuestion` with these questions (max 4 per call split if needed):
95
101
 
96
- Present the following options inline in your response:
102
+ **Call 1:**
97
103
 
98
- 1. **Execution Mode**
99
- - a) Dry run — preview proposal without deploying (recommended for first run)
100
- - b) Live — validate and deploy if improved
101
-
102
- 2. **Model Tier** (see SKILL.md Model Tier Reference)
103
- - a) Fast (haiku) — cheapest, ~2s/call (recommended with cheap-loop)
104
- - b) Balanced (sonnet) — good quality, ~5s/call
105
- - c) Best (opus) — highest quality, ~10s/call
106
-
107
- 3. **Cost Optimization**
108
- - a) Cheap loop — haiku for iteration, sonnet for final gate (recommended)
109
- - b) Single model — use one model throughout
110
-
111
- 4. **Confidence Threshold:** 0.6 (default, higher = stricter)
112
-
113
- 5. **Max Iterations:** 3 (default, more = longer but better results)
104
+ ```json
105
+ {
106
+ "questions": [
107
+ {
108
+ "question": "Execution Mode",
109
+ "options": ["Dry run — preview without deploying (recommended for first run)", "Live validate and deploy if improved"]
110
+ },
111
+ {
112
+ "question": "Model Tier (see SKILL.md reference)",
113
+ "options": ["Fast (haiku) — cheapest, ~2s/call (recommended with cheap-loop)", "Balanced (sonnet) — good quality, ~5s/call", "Best (opus) — highest quality, ~10s/call"]
114
+ },
115
+ {
116
+ "question": "Cost Optimization",
117
+ "options": ["Cheap loop haiku for iteration, sonnet for final gate (recommended)", "Single model — use one model throughout"]
118
+ },
119
+ {
120
+ "question": "Advanced Options",
121
+ "options": ["Defaults (0.6 confidence, 3 iterations, single candidate) (recommended)", "Stricter (0.7 confidence, 5 iterations)", "Pareto mode (multiple candidates per iteration)"]
122
+ }
123
+ ]
124
+ }
125
+ ```
114
126
 
115
- 6. **Multi-Candidate Selection**
116
- - a) Single candidate — one proposal per iteration (recommended)
117
- - b) Pareto mode — generate multiple candidates, pick best on frontier
127
+ If `AskUserQuestion` is not available, fall back to presenting these as inline numbered options.
118
128
 
119
- Ask: "Reply with your choices (e.g., '1a, 2a, 3a, defaults for rest') or 'use defaults' for recommended settings."
129
+ If the user cancels, stop -- do not proceed with defaults. If the user selects "use defaults", skip to step 1 with recommended defaults.
120
130
 
121
131
  After the user responds, parse their selections and map each choice to the corresponding CLI flags:
122
132
 
@@ -193,6 +203,26 @@ The command groups missed queries by invocation type:
193
203
 
194
204
  See `references/invocation-taxonomy.md` for the taxonomy.
195
205
 
206
+ ### 4b. Constitutional Pre-Validation Gate
207
+
208
+ Before any LLM-based validation, each proposal passes through a
209
+ deterministic constitutional check that rejects obviously bad proposals
210
+ at zero cost. Four principles are enforced:
211
+
212
+ 1. **Size constraint** — description must be ≤1024 characters and within
213
+ 0.3x–3.0x word count of the original.
214
+ 2. **No XML injection** — reject proposals containing XML/HTML tags.
215
+ 3. **No unbounded broadening** — reject bare "all", "any", "every",
216
+ "everything" unless qualified by enumeration markers ("including",
217
+ "such as", "like", "e.g.", or a comma-separated list).
218
+ 4. **Anchor preservation** — if the original contains `USE WHEN` trigger
219
+ phrases or `$skillName` references, those must appear in the proposal.
220
+
221
+ If a proposal fails any principle, it is rejected with a descriptive
222
+ violation message and the pipeline retries (if iterations remain).
223
+
224
+ For body evolution (`evolve body`), only the size constraint applies.
225
+
196
226
  ### 5. Propose Description Changes
197
227
 
198
228
  An LLM generates a candidate description that would catch the missed
@@ -211,6 +241,23 @@ The candidate is tested against the full eval set:
211
241
  If validation fails, the command retries up to `--max-iterations` times
212
242
  with adjusted proposals.
213
243
 
244
+ ### Aggregate Metrics To Report
245
+
246
+ When summarizing an evolution run, include these aggregate metrics rather
247
+ than only saying "passed" or "failed":
248
+
249
+ | Metric | Meaning |
250
+ |--------|---------|
251
+ | `original_pass_rate` | Baseline pass rate before the proposal |
252
+ | `proposed_pass_rate` | Pass rate after applying the proposal |
253
+ | `regression_count` | Eval entries that passed before and failed after |
254
+ | `net_change` | Total passes gained minus regressions introduced |
255
+ | `iteration` / `iterations_used` | Which retry produced the current candidate |
256
+ | `baseline_lift` | Additional lift over the no-skill baseline when `--with-baseline` is enabled |
257
+
258
+ These metrics explain whether the proposal is genuinely better, merely
259
+ different, or too risky to deploy.
260
+
214
261
  ### 7. Deploy (or Preview)
215
262
 
216
263
  If `--dry-run`, the proposal is printed but not deployed. The audit log
@@ -292,10 +339,11 @@ Use `--agent <name>` to override (claude, codex, opencode).
292
339
 
293
340
  ## Subagent Escalation
294
341
 
295
- For high-stakes evolutions, consider spawning the `evolution-reviewer` agent
296
- as a subagent to review the proposal before deploying. This is especially
297
- valuable when the skill has a history of regressions, the evolution touches
298
- many trigger phrases, or the confidence score is near the threshold.
342
+ For high-stakes evolutions, read `skill/agents/evolution-reviewer.md` and spawn a
343
+ subagent with those instructions to review the proposal before deploying.
344
+ This is especially valuable when the skill has a history of regressions,
345
+ the evolution touches many trigger phrases, or the confidence score is near
346
+ the threshold.
299
347
 
300
348
  ## Autonomous Mode
301
349
 
@@ -16,7 +16,7 @@ selftune evolve body --skill <name> --skill-path <path> --target <target> [optio
16
16
  |------|-------------|---------|
17
17
  | `--skill <name>` | Skill name | Required |
18
18
  | `--skill-path <path>` | Path to the skill's SKILL.md | Required |
19
- | `--target <type>` | Evolution target: `routing_table` or `full_body` | Required |
19
+ | `--target <type>` | Evolution target: `routing` or `body` | Required |
20
20
  | `--teacher-agent <name>` | Agent CLI for proposal generation | Auto-detected |
21
21
  | `--student-agent <name>` | Agent CLI for validation | Same as teacher |
22
22
  | `--teacher-model <flag>` | Model flag for teacher (e.g. `opus`) | Agent default |
@@ -30,13 +30,13 @@ selftune evolve body --skill <name> --skill-path <path> --target <target> [optio
30
30
 
31
31
  ## Evolution Targets
32
32
 
33
- ### `routing_table`
33
+ ### `routing`
34
34
 
35
35
  Optimizes the `## Workflow Routing` markdown table in SKILL.md. The teacher
36
36
  LLM analyzes missed triggers and proposes new routing entries that map
37
37
  trigger keywords to the correct workflow files.
38
38
 
39
- ### `full_body`
39
+ ### `body`
40
40
 
41
41
  Rewrites the entire SKILL.md body below the frontmatter. This includes
42
42
  the description, routing table, examples, and all other sections. The
@@ -59,42 +59,41 @@ a refined proposal. This repeats up to `--max-iterations` times.
59
59
 
60
60
  ### 0. Pre-Flight Configuration
61
61
 
62
- Before running evolve-body, present configuration options to the user.
63
- If the user says "use defaults" or similar, skip to step 1 with recommended defaults.
64
-
65
- Present these options:
66
-
62
+ Before running evolve-body, use the `AskUserQuestion` tool to present structured configuration options.
63
+ If the user says "use defaults" or similar, skip to step 1 with recommended defaults. If the user cancels, abort the workflow -- do not proceed with defaults.
64
+
65
+ Use `AskUserQuestion` with these questions:
66
+
67
+ ```json
68
+ {
69
+ "questions": [
70
+ {
71
+ "question": "Evolution Target",
72
+ "options": ["Routing table — optimize workflow routing only (recommended)", "Full body — rewrite entire SKILL.md (more aggressive)"]
73
+ },
74
+ {
75
+ "question": "Execution Mode",
76
+ "options": ["Dry run — preview without deploying (recommended)", "Live — validate and deploy if improved"]
77
+ },
78
+ {
79
+ "question": "Teacher Model (generates proposals)",
80
+ "options": ["Balanced (sonnet) — good quality (recommended)", "Best (opus) — highest quality, slower"]
81
+ },
82
+ {
83
+ "question": "Student Model & Iterations",
84
+ "options": ["Fast (haiku) + 3 iterations (recommended)", "Balanced (sonnet) + 3 iterations", "Fast (haiku) + 5 iterations"]
85
+ }
86
+ ]
87
+ }
67
88
  ```
68
- selftune evolve body — Pre-Flight Configuration
69
-
70
- 1. Evolution Target
71
- a) Routing table — optimize the workflow routing table only
72
- b) Full body — rewrite entire SKILL.md body (more aggressive)
73
-
74
- 2. Execution Mode
75
- a) Dry run — preview proposal without deploying (recommended)
76
- b) Live — validate and deploy if improved
77
-
78
- 3. Teacher Model (generates proposals)
79
- a) Balanced (sonnet) — good quality proposals (recommended)
80
- b) Best (opus) — highest quality, slower and more expensive
81
89
 
82
- 4. Student Model (validates proposals)
83
- a) Fast (haiku) — cheap validation (recommended)
84
- b) Balanced (sonnet) — higher quality validation
85
-
86
- 5. Max Iterations: [3] (default)
87
-
88
- 6. Few-Shot Examples: [none] (paths to example SKILL.md files for guidance)
89
-
90
- → Reply with your choices or "use defaults" for recommended settings.
91
- ```
90
+ If `AskUserQuestion` is not available, fall back to presenting these as inline numbered options.
92
91
 
93
92
  After the user responds, show a confirmation summary:
94
93
 
95
94
  ```
96
95
  Configuration Summary:
97
- Target: routing_table
96
+ Target: routing
98
97
  Mode: dry-run
99
98
  Teacher model: sonnet
100
99
  Student model: haiku
@@ -126,8 +125,8 @@ pipeline. See `references/invocation-taxonomy.md`.
126
125
  ### 4. Generate Proposal (Teacher)
127
126
 
128
127
  The teacher LLM generates a proposal based on the target:
129
- - **routing_table**: Optimized `## Workflow Routing` markdown table
130
- - **full_body**: Complete SKILL.md body replacement
128
+ - **routing**: Optimized `## Workflow Routing` markdown table
129
+ - **body**: Complete SKILL.md body replacement
131
130
 
132
131
  Few-shot examples from `--few-shot` paths provide structural guidance.
133
132
 
@@ -140,20 +139,20 @@ failure details and generates a refined proposal.
140
139
 
141
140
  If `--dry-run`, prints the proposal without deploying. Otherwise:
142
141
  1. Creates a timestamped backup of the current SKILL.md
143
- 2. Applies the change: `replaceSection()` for routing, `replaceBody()` for full_body
142
+ 2. Applies the change: `replaceSection()` for routing, `replaceBody()` for body
144
143
  3. Records audit entries
145
144
  4. Updates evolution memory
146
145
 
147
146
  ## Common Patterns
148
147
 
149
148
  **"Evolve the routing table for the Research skill"**
150
- > `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target routing_table`
149
+ > `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target routing`
151
150
 
152
151
  **"Rewrite the entire skill body"**
153
- > `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target full_body --dry-run`
152
+ > `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target body --dry-run`
154
153
 
155
154
  **"Use a stronger model for generation"**
156
- > `selftune evolve body --skill pptx --skill-path /path/SKILL.md --target full_body --teacher-model opus --student-model haiku`
155
+ > `selftune evolve body --skill pptx --skill-path /path/SKILL.md --target body --teacher-model opus --student-model haiku`
157
156
 
158
157
  **"Preview what would change"**
159
158
  > Always start with `--dry-run` to review the proposal before deploying.
@@ -12,15 +12,23 @@ Bootstrap selftune for first-time use or after changing environments.
12
12
 
13
13
  ```bash
14
14
  selftune init [--agent <type>] [--cli-path <path>] [--force]
15
+ selftune init --alpha --alpha-email <email> [--alpha-name "Name"] [--force]
16
+ selftune init --no-alpha [--force]
15
17
  ```
16
18
 
17
19
  ## Options
18
20
 
19
21
  | Flag | Description | Default |
20
22
  |------|-------------|---------|
21
- | `--agent <type>` | Agent platform: `claude`, `codex`, `opencode` | Auto-detected |
23
+ | `--agent <type>` | Agent platform: `claude_code`, `codex`, `opencode`, `openclaw` | Auto-detected |
22
24
  | `--cli-path <path>` | Override auto-detected CLI entry-point path | Auto-detected |
23
25
  | `--force` | Reinitialize even if config already exists | Off |
26
+ | `--enable-autonomy` | Enable autonomous scheduling during init | Off |
27
+ | `--schedule-format <fmt>` | Schedule format: `cron`, `launchd`, `systemd` | Auto-detected |
28
+ | `--alpha` | Enroll in the selftune alpha program (opens browser for device-code auth) | Off |
29
+ | `--no-alpha` | Unenroll from the alpha program (preserves user_id) | Off |
30
+ | `--alpha-email <email>` | Email for alpha enrollment (required with `--alpha`) | - |
31
+ | `--alpha-name <name>` | Display name for alpha enrollment | - |
24
32
 
25
33
  ## Output Format
26
34
 
@@ -28,12 +36,22 @@ Creates `~/.selftune/config.json`:
28
36
 
29
37
  ```json
30
38
  {
31
- "agent_type": "claude",
39
+ "agent_type": "claude_code",
32
40
  "cli_path": "/Users/you/selftune/cli/selftune/index.ts",
33
41
  "llm_mode": "agent",
34
42
  "agent_cli": "claude",
35
43
  "hooks_installed": true,
36
- "initialized_at": "2026-02-28T10:00:00Z"
44
+ "initialized_at": "2026-02-28T10:00:00Z",
45
+ "alpha": {
46
+ "enrolled": true,
47
+ "user_id": "a1b2c3d4-e5f6-4a7b-8c9d-0e1f2a3b4c5d",
48
+ "cloud_user_id": "cloud-uuid-...",
49
+ "cloud_org_id": "org-uuid-...",
50
+ "email": "user@example.com",
51
+ "display_name": "User Name",
52
+ "consent_timestamp": "2026-02-28T10:00:00Z",
53
+ "api_key": "<provisioned automatically via device-code flow>"
54
+ }
37
55
  }
38
56
  ```
39
57
 
@@ -47,6 +65,15 @@ Creates `~/.selftune/config.json`:
47
65
  | `agent_cli` | string | CLI binary name for the detected agent |
48
66
  | `hooks_installed` | boolean | Whether telemetry hooks are installed |
49
67
  | `initialized_at` | string | ISO 8601 timestamp |
68
+ | `alpha` | object? | Alpha program enrollment (present only if enrolled) |
69
+ | `alpha.enrolled` | boolean | Whether the user is currently enrolled |
70
+ | `alpha.user_id` | string | Stable UUID, generated once, preserved across reinits |
71
+ | `alpha.cloud_user_id` | string? | Cloud account UUID (set by device-code flow) |
72
+ | `alpha.cloud_org_id` | string? | Cloud organization UUID (set by device-code flow) |
73
+ | `alpha.email` | string? | Email provided at enrollment |
74
+ | `alpha.display_name` | string? | Optional display name |
75
+ | `alpha.consent_timestamp` | string | ISO 8601 timestamp of consent |
76
+ | `alpha.api_key` | string? | Upload credential (provisioned automatically by device-code flow) |
50
77
 
51
78
  ## Steps
52
79
 
@@ -98,7 +125,7 @@ The init output will report what was installed, e.g.:
98
125
  | `UserPromptSubmit` | `hooks/auto-activate.ts` | Suggest skills before prompt processing |
99
126
  | `PreToolUse` (Write/Edit) | `hooks/skill-change-guard.ts` | Detect uncontrolled skill edits |
100
127
  | `PreToolUse` (Write/Edit) | `hooks/evolution-guard.ts` | Block SKILL.md edits on monitored skills |
101
- | `PostToolUse` (Read) | `hooks/skill-eval.ts` | Track skill triggers |
128
+ | `PostToolUse` (Read/Skill) | `hooks/skill-eval.ts` | Track skill triggers and Skill tool invocations |
102
129
  | `Stop` | `hooks/session-stop.ts` | Capture session telemetry |
103
130
 
104
131
  **Codex agents:**
@@ -135,21 +162,7 @@ The activation rules file configures auto-activation behavior -- which skills
135
162
  get suggested and under what conditions. Edit `~/.selftune/activation-rules.json`
136
163
  to customize thresholds and skill mappings for your project.
137
164
 
138
- ### 7. Verify Agent Availability
139
-
140
- `selftune init` installs the specialized agent files to `~/.claude/agents/`
141
- automatically. Verify they are present:
142
-
143
- ```bash
144
- ls ~/.claude/agents/
145
- ```
146
-
147
- Expected agents: `diagnosis-analyst.md`, `pattern-analyst.md`,
148
- `evolution-reviewer.md`, `integration-guide.md`. These are used by evolve
149
- and doctor workflows for deeper analysis. If missing, run `selftune init --force`
150
- to reinstall them.
151
-
152
- ### 8. Verify with Doctor
165
+ ### 7. Verify with Doctor
153
166
 
154
167
  ```bash
155
168
  selftune doctor
@@ -163,17 +176,118 @@ reported issues before proceeding.
163
176
  For project-type-specific setup (single-skill, multi-skill, monorepo, Codex,
164
177
  OpenCode, mixed agents), see [docs/integration-guide.md](../../docs/integration-guide.md).
165
178
 
166
- Templates for each project type are in the `templates/` directory:
167
- - `templates/single-skill-settings.json` — hooks for single-skill projects
168
- - `templates/multi-skill-settings.json` — hooks for multi-skill projects with activation rules
169
- - `templates/activation-rules-default.json` — default auto-activation rule configuration
179
+ Templates for each project type are bundled with the skill:
180
+ - `skill/settings_snippet.json` — hooks for Claude Code projects
181
+ - `assets/activation-rules-default.json` — default auto-activation rule configuration
170
182
 
171
183
  ## Subagent Escalation
172
184
 
173
185
  For complex project structures (monorepos, multi-skill repos, mixed agent
174
- platforms), spawn the `integration-guide` agent as a subagent for guided
175
- setup. This agent handles project-type detection, per-package configuration,
176
- and verification steps that go beyond what the basic init workflow covers.
186
+ platforms), read `agents/integration-guide.md` and spawn a subagent with
187
+ those instructions. That agent handles project-type detection, per-package
188
+ configuration, and verification steps that go beyond what the basic init
189
+ workflow covers.
190
+
191
+ ## Alpha Enrollment
192
+
193
+ Enroll the user in the selftune alpha program for early access features.
194
+
195
+ Before running the alpha command:
196
+ 1. Ask whether the user wants to opt into the selftune alpha data-sharing program
197
+ 2. If they opt in, ask for their email and optional display name
198
+ 3. If they decline, skip alpha enrollment and continue with plain `selftune init`
199
+
200
+ The CLI stays non-interactive. The agent is responsible for collecting consent
201
+ and the required `--alpha-email` value before invoking the command.
202
+
203
+ ## Alpha Enrollment (Device-Code Flow)
204
+
205
+ The alpha program sends canonical telemetry to the selftune cloud for analysis.
206
+ Enrollment uses a device-code flow — one command, one browser approval, fully automatic.
207
+
208
+ ### Setup Sequence
209
+
210
+ 1. **Check local config**: Run `selftune status` — look for the "Alpha Upload" section
211
+ 2. **If not linked**: Collect the user's email and run:
212
+
213
+ ```bash
214
+ selftune init --alpha --alpha-email <user-email> --force
215
+ ```
216
+
217
+ 3. **Browser opens automatically**: The CLI requests a device code, opens the verification URL in the browser with the code pre-filled, and polls for approval.
218
+ 4. **User approves in browser**: One click to authorize.
219
+ 5. **CLI receives credentials**: API key, cloud_user_id, and org_id are automatically provisioned and stored in `~/.selftune/config.json` with `0600` permissions.
220
+ 6. **Verify readiness**: The init command prints a readiness check. If all checks pass, alpha upload is active.
221
+ The readiness JSON includes a `guidance` object with:
222
+ - `message`
223
+ - `next_command`
224
+ - `suggested_commands[]`
225
+ - `blocking`
226
+ 7. **If readiness fails**: Run `selftune doctor` to diagnose. Common issues:
227
+ - `not enrolled` → re-run `selftune init --alpha --alpha-email <email> --force`
228
+ - Device-code expired → re-run the init command (codes expire after ~15 minutes)
229
+
230
+ ### Key Principle
231
+
232
+ The cloud app is used **only** for the one-time browser approval during device-code auth. All other selftune operations happen through the local CLI and this agent.
233
+
234
+ ### Enroll
235
+
236
+ ```bash
237
+ selftune init --alpha --alpha-email user@example.com --alpha-name "User Name" --force
238
+ ```
239
+
240
+ The `--alpha-email` flag is required. The command will:
241
+ 1. Generate a stable UUID (preserved across reinits)
242
+ 2. Request a device code from the cloud API
243
+ 3. Open the browser to the verification URL
244
+ 4. Poll until the user approves
245
+ 5. Receive and store the API key, cloud_user_id, and org_id automatically
246
+ 6. Write the alpha block to `~/.selftune/config.json` with `0600` permissions
247
+ 7. Print an `alpha_enrolled` JSON message to stdout
248
+ 8. Print the consent notice to stderr
249
+
250
+ The consent notice explicitly states that the friendly alpha cohort shares raw
251
+ prompt/query text in addition to skill/session/evolution metadata.
252
+
253
+ ### Upload Behavior
254
+
255
+ Once enrolled, `selftune orchestrate` automatically uploads new session,
256
+ invocation, and evolution data to the cloud API at the end of each run.
257
+ This upload step is fail-open -- errors never block the orchestrate loop.
258
+ Use `selftune alpha upload` for manual uploads or `selftune alpha upload --dry-run`
259
+ to preview what would be sent.
260
+
261
+ The upload endpoint is `https://api.selftune.dev/api/v1/push`, authenticated with
262
+ the stored API key via `Authorization: Bearer` header. The endpoint can be
263
+ overridden with the `SELFTUNE_ALPHA_ENDPOINT` environment variable.
264
+
265
+ ### Unenroll
266
+
267
+ ```bash
268
+ selftune init --no-alpha --force
269
+ ```
270
+
271
+ Sets `enrolled: false` in the alpha block but preserves the `user_id` so re-enrollment does not create a new identity.
272
+
273
+ ### Error Handling
274
+
275
+ If `--alpha` is passed without `--alpha-email`, the CLI throws a JSON error:
276
+
277
+ ```json
278
+ {
279
+ "code": "alpha_email_required",
280
+ "error": "alpha_email_required",
281
+ "message": "The --alpha-email flag is required for alpha enrollment.",
282
+ "next_command": "selftune init --alpha --alpha-email <email>",
283
+ "suggested_commands": ["selftune status", "selftune doctor"],
284
+ "blocking": true
285
+ }
286
+ ```
287
+
288
+ If the device-code flow fails (network error, timeout, user denied), the CLI throws
289
+ with a descriptive error message. The agent should relay this to the user and suggest
290
+ retrying with `selftune init --alpha --alpha-email <email> --force`.
177
291
 
178
292
  ## Common Patterns
179
293
 
@@ -182,6 +296,11 @@ and verification steps that go beyond what the basic init workflow covers.
182
296
  > `npm install -g selftune`. Run `selftune init`, then verify with
183
297
  > `selftune doctor`. Report results to the user.
184
298
 
299
+ **User wants alpha enrollment**
300
+ > Ask whether they want to opt into alpha data sharing. If yes, collect email
301
+ > and optional display name, then run `selftune init --alpha --alpha-email ...`.
302
+ > The browser opens automatically for approval. No manual key management needed.
303
+
185
304
  **Hooks not capturing data**
186
305
  > Run `selftune doctor` to check hook installation. Parse the JSON output
187
306
  > for failed hook checks. If paths are wrong, update
@@ -26,10 +26,13 @@ selftune orchestrate
26
26
  |------|-------------|---------|
27
27
  | `--dry-run` | Plan and validate without deploying changes | Off |
28
28
  | `--review-required` | Keep validated changes in review mode instead of deploying | Off |
29
+ | `--auto-approve` | *(Deprecated)* Autonomous mode is now the default | — |
29
30
  | `--skill <name>` | Limit the loop to one skill | All skills |
30
- | `--max-skills <n>` | Cap how many candidates are processed in one run | `3` |
31
- | `--recent-window <hours>` | Window for post-deploy watch/rollback checks | `24` |
31
+ | `--max-skills <n>` | Cap how many candidates are processed in one run | `5` |
32
+ | `--recent-window <hours>` | Window for post-deploy watch/rollback checks | `48` |
32
33
  | `--sync-force` | Force a full source replay before candidate selection | Off |
34
+ | `--loop` | Run as a long-lived process that cycles continuously | Off |
35
+ | `--loop-interval <seconds>` | Pause between cycles (minimum 60) | `3600` |
33
36
 
34
37
  ## Default Behavior
35
38
 
@@ -133,6 +136,12 @@ In autonomous mode, orchestrate calls sub-workflows in this fixed order:
133
136
  2. **Status** — compute skill health using existing grade results (reads `grading.json` outputs from previous sessions)
134
137
  3. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
135
138
  4. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
139
+ 5. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
140
+
141
+ Between candidate selection and evolution, orchestrate checks for
142
+ **cross-skill eval set overlap**. When two or more evolution candidates
143
+ share >30% of their positive eval queries, a warning is logged to stderr.
144
+ This is an informational diagnostic only — it does not block evolution.
136
145
 
137
146
  All sub-workflows run with defaults and no user interaction. The safety
138
147
  model relies on regression thresholds, automatic rollback, and SKILL.md
@@ -29,6 +29,7 @@ selftune sync
29
29
  | `--no-opencode` | Skip OpenCode ingest |
30
30
  | `--no-openclaw` | Skip OpenClaw ingest |
31
31
  | `--no-repair` | Skip rebuilding `skill_usage_repaired.jsonl` |
32
+ | `--json` | Output results as JSON |
32
33
 
33
34
  ## Output
34
35
 
@@ -66,6 +67,28 @@ After sync completes, proceed with the user's intended workflow:
66
67
  `selftune status`, `selftune dashboard`, `selftune watch --sync-first`,
67
68
  or `selftune evolve --sync-first`.
68
69
 
70
+ ## `--json` Usage
71
+
72
+ ```bash
73
+ selftune sync --json
74
+ ```
75
+
76
+ Sample output:
77
+
78
+ ```json
79
+ {
80
+ "sources": {
81
+ "claude": { "scanned": 12, "synced": 3, "skipped": 9 },
82
+ "codex": { "scanned": 0, "synced": 0, "skipped": 0 }
83
+ },
84
+ "repaired": { "total": 42 },
85
+ "errors": []
86
+ }
87
+ ```
88
+
89
+ Use `--json` when the agent needs to parse sync results programmatically
90
+ (e.g., to decide whether to proceed with evolution or surface counts to the user).
91
+
69
92
  ## Common Patterns
70
93
 
71
94
  **User wants to refresh telemetry data**
@@ -17,8 +17,9 @@ selftune watch --skill <name> --skill-path <path> [options]
17
17
  | `--skill-path <path>` | Path to the skill's SKILL.md | Required |
18
18
  | `--window <n>` | Sliding window size (number of sessions) | 20 |
19
19
  | `--threshold <n>` | Regression threshold (drop from baseline) | 0.1 |
20
- | `--baseline <n>` | Explicit baseline pass rate (0-1) | Auto-detected from last deploy |
21
20
  | `--auto-rollback` | Automatically rollback on detected regression | Off |
21
+ | `--sync-first` | Refresh source-truth telemetry before evaluating | Off |
22
+ | `--sync-force` | Force a full source rescan during `--sync-first` | Off |
22
23
 
23
24
  ## Output Format
24
25
 
@@ -138,10 +139,6 @@ context window resets before the user acts on the results.
138
139
  > Use `--auto-rollback`. The command will restore the previous description
139
140
  > automatically if pass rate drops below baseline minus threshold.
140
141
 
141
- **"Set a custom baseline"**
142
- > Use `--baseline 0.85` to override auto-detection. Useful when the
143
- > auto-detected baseline is from an older evolution.
144
-
145
142
  ## Autonomous Mode
146
143
 
147
144
  When called by `selftune orchestrate`, watch runs automatically on recently