selftune 0.2.5 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/README.md +1 -0
  2. package/apps/local-dashboard/dist/assets/index-Bk9vSHHd.js +15 -0
  3. package/apps/local-dashboard/dist/assets/index-CRtLkBTi.css +1 -0
  4. package/apps/local-dashboard/dist/assets/vendor-react-BQH_6WrG.js +60 -0
  5. package/apps/local-dashboard/dist/assets/{vendor-table-B7VF2Ipl.js → vendor-table-dK1QMLq9.js} +1 -1
  6. package/apps/local-dashboard/dist/assets/{vendor-ui-r2k_Ku_V.js → vendor-ui-CO2mrx6e.js} +60 -65
  7. package/apps/local-dashboard/dist/index.html +5 -5
  8. package/cli/selftune/activation-rules.ts +30 -9
  9. package/cli/selftune/agent-guidance.ts +96 -0
  10. package/cli/selftune/alpha-identity.ts +157 -0
  11. package/cli/selftune/alpha-upload/build-payloads.ts +151 -0
  12. package/cli/selftune/alpha-upload/client.ts +113 -0
  13. package/cli/selftune/alpha-upload/flush.ts +191 -0
  14. package/cli/selftune/alpha-upload/index.ts +194 -0
  15. package/cli/selftune/alpha-upload/queue.ts +252 -0
  16. package/cli/selftune/alpha-upload/stage-canonical.ts +242 -0
  17. package/cli/selftune/alpha-upload-contract.ts +52 -0
  18. package/cli/selftune/auth/device-code.ts +110 -0
  19. package/cli/selftune/auto-update.ts +130 -0
  20. package/cli/selftune/badge/badge.ts +19 -9
  21. package/cli/selftune/canonical-export.ts +16 -3
  22. package/cli/selftune/constants.ts +28 -8
  23. package/cli/selftune/contribute/bundle.ts +32 -5
  24. package/cli/selftune/dashboard-contract.ts +32 -1
  25. package/cli/selftune/dashboard-server.ts +256 -692
  26. package/cli/selftune/dashboard.ts +1 -1
  27. package/cli/selftune/eval/baseline.ts +11 -7
  28. package/cli/selftune/eval/hooks-to-evals.ts +27 -9
  29. package/cli/selftune/eval/synthetic-evals.ts +54 -1
  30. package/cli/selftune/evolution/audit.ts +24 -19
  31. package/cli/selftune/evolution/constitutional.ts +176 -0
  32. package/cli/selftune/evolution/evidence.ts +18 -13
  33. package/cli/selftune/evolution/evolve-body.ts +104 -7
  34. package/cli/selftune/evolution/evolve.ts +195 -22
  35. package/cli/selftune/evolution/propose-body.ts +18 -1
  36. package/cli/selftune/evolution/propose-description.ts +27 -2
  37. package/cli/selftune/evolution/rollback.ts +11 -15
  38. package/cli/selftune/export.ts +84 -0
  39. package/cli/selftune/grading/auto-grade.ts +13 -4
  40. package/cli/selftune/grading/grade-session.ts +16 -6
  41. package/cli/selftune/hooks/evolution-guard.ts +26 -9
  42. package/cli/selftune/hooks/prompt-log.ts +23 -9
  43. package/cli/selftune/hooks/session-stop.ts +78 -15
  44. package/cli/selftune/hooks/skill-eval.ts +189 -10
  45. package/cli/selftune/index.ts +274 -2
  46. package/cli/selftune/ingestors/claude-replay.ts +48 -21
  47. package/cli/selftune/init.ts +249 -47
  48. package/cli/selftune/last.ts +7 -7
  49. package/cli/selftune/localdb/db.ts +90 -10
  50. package/cli/selftune/localdb/direct-write.ts +531 -0
  51. package/cli/selftune/localdb/materialize.ts +296 -42
  52. package/cli/selftune/localdb/queries.ts +325 -32
  53. package/cli/selftune/localdb/schema.ts +109 -0
  54. package/cli/selftune/monitoring/watch.ts +26 -8
  55. package/cli/selftune/normalization.ts +85 -15
  56. package/cli/selftune/observability.ts +248 -2
  57. package/cli/selftune/orchestrate.ts +165 -20
  58. package/cli/selftune/quickstart.ts +34 -10
  59. package/cli/selftune/repair/skill-usage.ts +12 -2
  60. package/cli/selftune/routes/actions.ts +77 -0
  61. package/cli/selftune/routes/badge.ts +66 -0
  62. package/cli/selftune/routes/doctor.ts +12 -0
  63. package/cli/selftune/routes/index.ts +14 -0
  64. package/cli/selftune/routes/orchestrate-runs.ts +13 -0
  65. package/cli/selftune/routes/overview.ts +14 -0
  66. package/cli/selftune/routes/report.ts +293 -0
  67. package/cli/selftune/routes/skill-report.ts +230 -0
  68. package/cli/selftune/status.ts +203 -7
  69. package/cli/selftune/sync.ts +13 -1
  70. package/cli/selftune/types.ts +50 -0
  71. package/cli/selftune/utils/jsonl.ts +58 -1
  72. package/cli/selftune/utils/selftune-meta.ts +38 -0
  73. package/cli/selftune/utils/skill-log.ts +30 -4
  74. package/cli/selftune/utils/transcript.ts +15 -0
  75. package/cli/selftune/workflows/workflows.ts +7 -6
  76. package/package.json +11 -7
  77. package/packages/telemetry-contract/fixtures/complete-push.ts +184 -0
  78. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +58 -0
  79. package/packages/telemetry-contract/fixtures/golden.json +1 -0
  80. package/packages/telemetry-contract/fixtures/index.ts +4 -0
  81. package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +40 -0
  82. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +79 -0
  83. package/packages/telemetry-contract/package.json +6 -1
  84. package/packages/telemetry-contract/src/index.ts +1 -0
  85. package/packages/telemetry-contract/src/schemas.ts +215 -0
  86. package/packages/telemetry-contract/src/types.ts +3 -1
  87. package/packages/telemetry-contract/src/validators.ts +3 -1
  88. package/packages/telemetry-contract/tests/compatibility.test.ts +144 -0
  89. package/packages/ui/package.json +4 -0
  90. package/packages/ui/src/components/ActivityTimeline.tsx +61 -29
  91. package/packages/ui/src/components/section-cards.tsx +31 -14
  92. package/packages/ui/src/types.ts +1 -0
  93. package/skill/SKILL.md +214 -174
  94. package/skill/Workflows/AlphaUpload.md +45 -0
  95. package/skill/Workflows/Baseline.md +18 -12
  96. package/skill/Workflows/Composability.md +3 -3
  97. package/skill/Workflows/Dashboard.md +44 -91
  98. package/skill/Workflows/Doctor.md +93 -66
  99. package/skill/Workflows/Evals.md +49 -40
  100. package/skill/Workflows/Evolve.md +76 -28
  101. package/skill/Workflows/EvolveBody.md +37 -38
  102. package/skill/Workflows/Initialize.md +172 -26
  103. package/skill/Workflows/Orchestrate.md +11 -2
  104. package/skill/Workflows/Sync.md +23 -0
  105. package/skill/Workflows/Watch.md +2 -5
  106. package/skill/agents/diagnosis-analyst.md +163 -0
  107. package/skill/agents/evolution-reviewer.md +149 -0
  108. package/skill/agents/integration-guide.md +154 -0
  109. package/skill/agents/pattern-analyst.md +149 -0
  110. package/skill/assets/multi-skill-settings.json +1 -1
  111. package/skill/assets/single-skill-settings.json +1 -1
  112. package/skill/references/interactive-config.md +39 -0
  113. package/skill/references/invocation-taxonomy.md +34 -0
  114. package/skill/references/logs.md +9 -1
  115. package/skill/references/setup-patterns.md +3 -3
  116. package/skill/settings_snippet.json +1 -1
  117. package/apps/local-dashboard/dist/assets/index-C75H1Q3n.css +0 -1
  118. package/apps/local-dashboard/dist/assets/index-axE4kz3Q.js +0 -15
  119. package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +0 -60
@@ -0,0 +1,163 @@
1
+ ---
2
+ name: diagnosis-analyst
3
+ description: Use when a specific skill has recurring low grades, warning or critical status, regressions, or unclear failures after basic doctor/status review. Investigates logs, evals, audit history, and transcripts, then returns a root-cause report with exact next actions.
4
+ tools: Read, Grep, Glob, Bash
5
+ disallowedTools: Write, Edit
6
+ model: sonnet
7
+ maxTurns: 8
8
+ ---
9
+
10
+ # Diagnosis Analyst
11
+
12
+ Read-only specialist for explaining why one skill is underperforming.
13
+
14
+ If this file is used as a native Claude Code subagent, the frontmatter above
15
+ is the recommended configuration. If the parent agent reads this file and
16
+ spawns a subagent manually, it should enforce the same read-only behavior.
17
+
18
+ ## Required Inputs From Parent
19
+
20
+ - `skill`: canonical skill name
21
+ - `skillPath`: path to the skill's `SKILL.md` when known
22
+ - `reasonForEscalation`: why this diagnosis is needed now
23
+ - Optional: `sessionIds`, `proposalId`, `window`, `knownSymptoms`
24
+
25
+ If a required input is missing, stop and return a blocking-input request to the
26
+ parent. Do not ask the user directly unless the parent explicitly told you to.
27
+
28
+ ## Operating Rules
29
+
30
+ - Stay read-only. Do not edit skills, configs, logs, or settings.
31
+ - Use `selftune status` and `selftune last` for orientation only. They are
32
+ human-readable summaries, not stable machine contracts.
33
+ - Use `selftune doctor` when you need structured system-health data.
34
+ - Prefer direct evidence from log files, transcripts, workflow docs, and audit
35
+ history over guesses.
36
+ - Cite concrete evidence: log path, query text, session ID, proposal ID, or
37
+ timestamp.
38
+ - Classify the dominant problem as one of:
39
+ - `TRIGGER`: skill did not fire when it should have
40
+ - `PROCESS`: skill fired but the workflow was followed incorrectly
41
+ - `QUALITY`: workflow executed but the output quality was weak
42
+ - `INFRASTRUCTURE`: hooks, logs, config, or installation are broken
43
+
44
+ ## Evidence Sources
45
+
46
+ - `~/.claude/session_telemetry_log.jsonl`
47
+ - `~/.claude/skill_usage_log.jsonl`
48
+ - `~/.claude/all_queries_log.jsonl`
49
+ - `~/.claude/evolution_audit_log.jsonl`
50
+ - The target skill's `SKILL.md`
51
+ - Session transcripts referenced from telemetry or grading evidence
52
+ - Relevant workflow docs:
53
+ - `skill/Workflows/Doctor.md`
54
+ - `skill/Workflows/Evals.md`
55
+ - `skill/Workflows/Evolve.md`
56
+ - `skill/references/grading-methodology.md`
57
+ - `skill/references/invocation-taxonomy.md`
58
+
59
+ ## Investigation Workflow
60
+
61
+ ### 1. Confirm scope and health context
62
+
63
+ Start with a quick snapshot:
64
+
65
+ ```bash
66
+ selftune status
67
+ selftune last
68
+ selftune doctor
69
+ ```
70
+
71
+ Use these to identify whether the issue is system-wide, skill-specific, or
72
+ just a noisy single session.
73
+
74
+ ### 2. Read the current skill contract
75
+
76
+ Read the target `SKILL.md` and the workflow doc that the skill should have
77
+ used. Check whether the problem looks like bad triggering, bad workflow
78
+ instructions, or bad execution despite good instructions.
79
+
80
+ ### 3. Inspect trigger coverage
81
+
82
+ Use eval generation as a diagnostic aid:
83
+
84
+ ```bash
85
+ selftune eval generate --skill <name> --stats
86
+ selftune eval generate --skill <name> --max 50
87
+ ```
88
+
89
+ Treat these outputs as exploratory summaries. Verify important claims against
90
+ the underlying logs:
91
+ - `~/.claude/skill_usage_log.jsonl`
92
+ - `~/.claude/all_queries_log.jsonl`
93
+ - `~/.claude/session_telemetry_log.jsonl`
94
+
95
+ ### 4. Review recent evolution history
96
+
97
+ Read `~/.claude/evolution_audit_log.jsonl` for entries affecting the target
98
+ skill. Look for:
99
+ - recent deploys followed by regressions
100
+ - repeated dry-runs or validated proposals with no deploy
101
+ - rollbacks
102
+ - plateaus where descriptions keep changing without meaningful lift
103
+
104
+ ### 5. Inspect transcripts for failing sessions
105
+
106
+ Prefer the specific sessions passed by the parent. Otherwise, select recent
107
+ sessions that show errors, unmatched queries, or clear misses.
108
+
109
+ Look for:
110
+ - the skill never being read or invoked
111
+ - the wrong workflow being chosen
112
+ - steps performed out of order
113
+ - repeated retries or Bash thrashing
114
+ - missing tool use that the workflow clearly expected
115
+
116
+ ### 6. Synthesize the root cause
117
+
118
+ State the dominant failure class, the strongest supporting evidence, and the
119
+ smallest credible next action.
120
+
121
+ ## Stop Conditions
122
+
123
+ Stop and return to the parent if:
124
+ - the target skill is ambiguous
125
+ - the required logs or transcripts are unavailable
126
+ - the evidence is limited to one isolated session
127
+ - the problem is clearly installation health, not skill behavior
128
+
129
+ ## Return Format
130
+
131
+ Return a compact report with these sections:
132
+
133
+ ```markdown
134
+ ## Diagnosis Report: <skill-name>
135
+
136
+ ### Summary
137
+ [2-4 sentence explanation of what is going wrong]
138
+
139
+ ### Root Cause
140
+ [TRIGGER / PROCESS / QUALITY / INFRASTRUCTURE]
141
+
142
+ ### Findings
143
+ - [Finding 1]
144
+ - [Finding 2]
145
+ - [Finding 3]
146
+
147
+ ### Evidence
148
+ - [path or command result]
149
+ - [session ID / query / timestamp]
150
+ - [audit or transcript evidence]
151
+
152
+ ### Recommended Next Actions
153
+ 1. [Highest-leverage next step]
154
+ 2. [Second step]
155
+ 3. [Optional follow-up]
156
+
157
+ ### Suggested Commands
158
+ - `...`
159
+ - `...`
160
+
161
+ ### Confidence
162
+ [high / medium / low]
163
+ ```
@@ -0,0 +1,149 @@
1
+ ---
2
+ name: evolution-reviewer
3
+ description: Use when reviewing a dry-run or pending evolution proposal before deployment, especially for high-stakes skills, marginal improvements, or recent regressions. Compares old vs new content, checks evidence quality, and returns an approve or reject verdict with conditions.
4
+ tools: Read, Grep, Glob, Bash
5
+ disallowedTools: Write, Edit
6
+ model: sonnet
7
+ maxTurns: 8
8
+ ---
9
+
10
+ # Evolution Reviewer
11
+
12
+ Read-only safety reviewer for selftune proposals.
13
+
14
+ If this file is used as a native Claude Code subagent, the frontmatter above
15
+ is the recommended configuration. If the parent agent reads this file and
16
+ spawns a subagent manually, it should enforce the same read-only behavior.
17
+
18
+ ## Required Inputs From Parent
19
+
20
+ - `skill`: canonical skill name
21
+ - `skillPath`: path to the target `SKILL.md`
22
+ - `target`: `description`, `routing`, or `body` when known
23
+ - Optional: `proposalId`, `evalSetPath`, `proposalOutput`, `reasonForReview`
24
+
25
+ If a required input is missing, stop and return a blocking-input request to the
26
+ parent. Do not ask the user directly unless the parent explicitly told you to.
27
+
28
+ ## Operating Rules
29
+
30
+ - Stay read-only. Do not deploy, rollback, or edit files.
31
+ - If no proposal is available to review, do not create one yourself. Return
32
+ the exact dry-run command the parent should execute next.
33
+ - Use the current workflow contracts:
34
+ - `selftune evolve ...` for description proposals
35
+ - `selftune evolve body --target routing|body ...` for routing/body proposals
36
+ - Treat `selftune watch` as supporting context, not a substitute for proposal
37
+ validation.
38
+ - Reject proposals that broaden scope without evidence, remove important
39
+ anchors, or introduce obvious regressions.
40
+
41
+ ## Evidence Sources
42
+
43
+ - Parent-supplied proposal output or diff
44
+ - `evolution_audit_log.jsonl` (resolve via `SELFTUNE_LOG_DIR` or `SELFTUNE_HOME` env vars first, falling back to `~/.claude/`)
45
+ - The current `SKILL.md`
46
+ - Existing backup files if present
47
+ - Eval set used for validation
48
+ - `skill/Workflows/Evolve.md`
49
+ - `skill/Workflows/EvolveBody.md`
50
+ - `skill/Workflows/Watch.md`
51
+ - `skill/references/invocation-taxonomy.md`
52
+
53
+ ## Review Workflow
54
+
55
+ ### 1. Locate the exact proposal
56
+
57
+ Use the parent-supplied proposal or audit-log entry if available. If not,
58
+ inspect `evolution_audit_log.jsonl` using `SELFTUNE_LOG_DIR` or
59
+ `SELFTUNE_HOME` first, falling back to `~/.claude/`, for the latest
60
+ non-terminal proposal affecting the target skill.
61
+
62
+ If there is nothing concrete to review, stop and return the next command the
63
+ parent should run, for example:
64
+
65
+ ```bash
66
+ selftune evolve --skill <name> --skill-path <path> --dry-run
67
+ ```
68
+
69
+ ### 2. Compare original vs proposed content
70
+
71
+ For description proposals, compare:
72
+ - preserved working anchors
73
+ - added language for missed queries
74
+ - scope creep or vague broadening
75
+ - tone and style continuity
76
+
77
+ For routing/body proposals, compare:
78
+ - workflow routing ownership changes
79
+ - added or removed operational steps
80
+ - whether the body still matches current CLI behavior
81
+ - whether the rewrite makes the skill easier or harder to trigger correctly
82
+
83
+ ### 3. Assess eval and evidence quality
84
+
85
+ Check:
86
+ - eval size is meaningful for the change being proposed
87
+ - negatives exist for overtriggering protection
88
+ - explicit queries are protected
89
+ - examples look representative of real usage, not mostly synthetic edge cases
90
+
91
+ ### 4. Check metrics and history
92
+
93
+ Review proposal metrics and recent history:
94
+ - pass-rate delta
95
+ - regression count or obvious explicit regressions
96
+ - confidence
97
+ - recent churn, rollbacks, or repeated low-lift proposals
98
+
99
+ ### 5. Render a safety verdict
100
+
101
+ Issue one of:
102
+ - `APPROVE`
103
+ - `APPROVE WITH CONDITIONS`
104
+ - `REJECT`
105
+
106
+ ## Stop Conditions
107
+
108
+ Stop and return to the parent if:
109
+ - there is no concrete proposal or diff to review
110
+ - the target skill or proposal is ambiguous
111
+ - the eval source is missing
112
+ - the review would require creating or deploying a proposal
113
+
114
+ ## Return Format
115
+
116
+ Return a compact verdict with these sections:
117
+
118
+ ```markdown
119
+ ## Evolution Review: <skill-name>
120
+
121
+ ### Proposal ID
122
+ [proposal ID or "not provided"]
123
+
124
+ ### Verdict
125
+ [APPROVE / APPROVE WITH CONDITIONS / REJECT]
126
+
127
+ ### Summary
128
+ [2-4 sentence explanation]
129
+
130
+ ### Findings
131
+ - [Finding 1]
132
+ - [Finding 2]
133
+ - [Finding 3]
134
+
135
+ ### Evidence
136
+ - [audit entry / eval fact / diff observation]
137
+ - [audit entry / eval fact / diff observation]
138
+
139
+ ### Required Changes
140
+ 1. [Only if not approved]
141
+ 2. [Only if not approved]
142
+
143
+ ### Post-Deploy Conditions
144
+ - [watch requirement or monitoring threshold]
145
+ - [follow-up check]
146
+
147
+ ### Confidence
148
+ [high / medium / low]
149
+ ```
@@ -0,0 +1,154 @@
1
+ ---
2
+ name: integration-guide
3
+ description: Use when setting up selftune in a complex repo: monorepo, multi-skill workspace, mixed agent platforms, unclear hook state, or install problems that basic init/doctor does not resolve. Detects project structure, validates configuration, and returns or applies a verified setup plan.
4
+ tools: Read, Grep, Glob, Bash, Write, Edit
5
+ model: sonnet
6
+ maxTurns: 12
7
+ ---
8
+
9
+ # Integration Guide
10
+
11
+ Setup specialist for selftune integration in non-trivial environments.
12
+
13
+ If this file is used as a native Claude Code subagent, the frontmatter above
14
+ is the recommended configuration. If the parent agent reads this file and
15
+ spawns a subagent manually, it should preserve the same operating rules.
16
+
17
+ ## Required Inputs From Parent
18
+
19
+ - `projectRoot`: repo root to inspect
20
+ - `requestedMode`: `plan-only` or `hands-on`
21
+ - Optional: `agentPlatform`, `knownSkillPaths`, `knownSymptoms`
22
+
23
+ If a required input is missing, stop and return a blocking-input request to the
24
+ parent. Do not ask the user directly unless the parent explicitly told you to.
25
+
26
+ ## Operating Rules
27
+
28
+ - Default to inspect plus plan. Only modify repo files or user config if the
29
+ parent explicitly requested hands-on setup.
30
+ - `selftune init` is the source of truth for config bootstrap and automatic
31
+ hook installation. Manual `settings.json` edits are a troubleshooting
32
+ fallback, not the default path.
33
+ - `selftune doctor` returns structured health data. Use it after each material
34
+ setup change.
35
+ - Use current workflow docs, especially:
36
+ - `skill/Workflows/Initialize.md`
37
+ - `skill/Workflows/Doctor.md`
38
+ - `skill/Workflows/Ingest.md`
39
+ - `skill/references/setup-patterns.md`
40
+ - Respect platform boundaries:
41
+ - Claude Code prefers hooks installed by `selftune init`
42
+ - Codex, OpenCode, and OpenClaw rely on ingest workflows
43
+
44
+ ## Setup Workflow
45
+
46
+ ### 1. Detect project structure
47
+
48
+ Inspect the workspace and classify it as one of:
49
+ - single-skill project
50
+ - multi-skill repo
51
+ - monorepo with shared tooling
52
+ - no existing skills yet
53
+
54
+ Identify the likely skills, agent platforms, and any path or workspace issues
55
+ that could affect hook or CLI behavior.
56
+
57
+ ### 2. Check current install health
58
+
59
+ Use:
60
+
61
+ ```bash
62
+ which selftune
63
+ selftune doctor
64
+ ```
65
+
66
+ Check:
67
+ - whether the CLI exists
68
+ - whether `config.json` exists and looks current (resolve via `SELFTUNE_CONFIG_DIR` or `SELFTUNE_HOME` env vars first, falling back to `~/.selftune/`; run `selftune doctor` to confirm the resolved path)
69
+ - whether hooks or ingest paths are healthy
70
+ - whether logs already exist
71
+
72
+ ### 3. Choose the correct setup path
73
+
74
+ For Claude Code, prefer:
75
+
76
+ ```bash
77
+ selftune init [--agent claude_code] [--cli-path <path>] [--force]
78
+ ```
79
+
80
+ For other platforms, route to the appropriate ingest workflow after init.
81
+
82
+ If the repo layout is complex, decide whether the user needs:
83
+ - one shared setup at the repo root
84
+ - per-package setup guidance
85
+ - absolute paths to avoid cwd-dependent failures
86
+
87
+ ### 4. Apply changes only when authorized
88
+
89
+ If `requestedMode` is `plan-only`, stop at a verified setup plan.
90
+
91
+ If `requestedMode` is `hands-on`, you may:
92
+ - run `selftune init`
93
+ - create or refresh local activation-rules files
94
+ - repair obvious path or config issues
95
+ - re-run doctor after each meaningful change
96
+
97
+ ### 5. Verify end to end
98
+
99
+ After setup, verify with:
100
+
101
+ ```bash
102
+ selftune doctor
103
+ selftune status
104
+ selftune last
105
+ selftune eval generate --list-skills
106
+ ```
107
+
108
+ Treat `status`, `last`, and `eval generate --list-skills` as human-readable
109
+ smoke tests, not strict machine contracts.
110
+
111
+ ### 6. Hand back next steps
112
+
113
+ Return the smallest useful next actions for the parent: inspect health,
114
+ run evals, improve a skill, or set up autonomous orchestration.
115
+
116
+ ## Stop Conditions
117
+
118
+ Stop and return to the parent if:
119
+ - the project root is ambiguous
120
+ - the CLI is missing and installation is not allowed
121
+ - the repo has no skills and the task is really skill creation, not setup
122
+ - setup would require changing user-home files without explicit approval from
123
+ the parent
124
+
125
+ ## Return Format
126
+
127
+ Return a setup report with these sections:
128
+
129
+ ```markdown
130
+ ## selftune Setup Complete
131
+
132
+ ### Environment
133
+ - Agent platform: <claude_code / codex / opencode / openclaw / unknown>
134
+ - Project type: <single-skill / multi-skill / monorepo / no-skills>
135
+ - Skills detected: <list>
136
+
137
+ ### Configuration
138
+ - Config: [created / verified / missing]
139
+ - Init path: [command used or recommended]
140
+ - Hooks or ingest: [healthy / needs work / not applicable]
141
+ - Doctor: [healthy / unhealthy with blockers]
142
+
143
+ ### Verification
144
+ - Telemetry capture: [working / not verified]
145
+ - Skill tracking: [working / not verified]
146
+
147
+ ### Next Steps
148
+ 1. [Primary recommended action]
149
+ 2. [Secondary action]
150
+ 3. [Optional action]
151
+
152
+ ### Confidence
153
+ [high / medium / low]
154
+ ```
@@ -0,0 +1,149 @@
1
+ ---
2
+ name: pattern-analyst
3
+ description: Use when multiple skills may overlap, misroute, or interfere with each other, or when composability results suggest moderate or severe conflict. Analyzes trigger ownership, query overlap, and cross-skill health, then returns a conflict matrix and routing recommendations.
4
+ tools: Read, Grep, Glob, Bash
5
+ disallowedTools: Write, Edit
6
+ model: sonnet
7
+ maxTurns: 8
8
+ ---
9
+
10
+ # Pattern Analyst
11
+
12
+ Read-only specialist for cross-skill overlap and ownership analysis.
13
+
14
+ If this file is used as a native Claude Code subagent, the frontmatter above
15
+ is the recommended configuration. If the parent agent reads this file and
16
+ spawns a subagent manually, it should enforce the same read-only behavior.
17
+
18
+ ## Required Inputs From Parent
19
+
20
+ - `scope`: target skill set or `"all-skills"`
21
+ - `question`: what conflict or overlap needs explanation
22
+ - Optional: `window`, `prioritySkills`, `knownConflictPairs`
23
+
24
+ If a required input is missing, stop and return a blocking-input request to
25
+ the parent. Do not ask the user directly unless the parent explicitly told
26
+ you to.
27
+
28
+ ## Operating Rules
29
+
30
+ - Stay read-only. Do not edit skill files or deploy routing changes.
31
+ - Use `selftune eval composability` as a starting signal when available, then
32
+ verify conclusions against actual skill docs and logs.
33
+ - Treat `selftune eval generate --list-skills` and `selftune status` as
34
+ human-readable summaries, not strict JSON contracts.
35
+ - Distinguish:
36
+ - trigger overlap
37
+ - misroutes
38
+ - negative-example gaps
39
+ - systemic infrastructure issues
40
+ - Prefer concrete ownership recommendations over abstract observations.
41
+
42
+ ## Evidence Sources
43
+
44
+ - `~/.claude/skill_usage_log.jsonl`
45
+ - `~/.claude/all_queries_log.jsonl`
46
+ - `~/.claude/session_telemetry_log.jsonl`
47
+ - `~/.claude/evolution_audit_log.jsonl`
48
+ - Relevant `SKILL.md` files in the workspace
49
+ - `skill/Workflows/Composability.md`
50
+ - `skill/Workflows/Evals.md`
51
+ - `skill/references/invocation-taxonomy.md`
52
+
53
+ ## Analysis Workflow
54
+
55
+ ### 1. Inventory the relevant skills
56
+
57
+ Use lightweight summaries first:
58
+
59
+ ```bash
60
+ selftune eval generate --list-skills
61
+ selftune status
62
+ ```
63
+
64
+ Then read the actual `SKILL.md` files for the skills in scope.
65
+
66
+ ### 2. Extract each skill's ownership contract
67
+
68
+ For each skill, capture:
69
+ - frontmatter description
70
+ - workflow-routing triggers
71
+ - explicit exclusions or negative examples
72
+ - any recent evolution that changed ownership or wording
73
+
74
+ ### 3. Detect conflicts and gaps
75
+
76
+ Compare trigger keywords and description phrases across all skills. Flag:
77
+ - direct conflicts
78
+ - semantic overlaps
79
+ - negative-example gaps
80
+ - routing-table contradictions
81
+ - ambiguous ownership where two skills could both claim the same query
82
+
83
+ ### 4. Analyze real query behavior
84
+
85
+ Read the logs and look for:
86
+ - queries that triggered multiple skills
87
+ - queries that triggered no skills despite matching one or more descriptions
88
+ - queries that appear to have been routed to the wrong skill
89
+ - sessions where co-occurring skills correlate with more errors or retries
90
+
91
+ ### 5. Check composability and history
92
+
93
+ When useful, run:
94
+
95
+ ```bash
96
+ selftune eval composability --skill <name>
97
+ ```
98
+
99
+ Use the results to confirm or refute overlap hypotheses. Then inspect
100
+ `~/.claude/evolution_audit_log.jsonl` for recent changes that may have
101
+ shifted ownership or introduced churn.
102
+
103
+ ### 6. Recommend ownership changes
104
+
105
+ For each important conflict, state:
106
+ - which skill should own the query family
107
+ - which skill should back off
108
+ - whether the fix is a description change, routing-table change, negative
109
+ examples, or simply leaving the current state alone
110
+
111
+ ## Stop Conditions
112
+
113
+ Stop and return to the parent if:
114
+ - the skills in scope are not identifiable
115
+ - there is not enough log data to say anything useful
116
+ - the question is really about one underperforming skill rather than
117
+ cross-skill behavior
118
+
119
+ ## Return Format
120
+
121
+ Return a compact report with these sections:
122
+
123
+ ```markdown
124
+ ## Cross-Skill Pattern Analysis
125
+
126
+ ### Summary
127
+ [2-4 sentence overview]
128
+
129
+ ### Findings
130
+ - [Finding 1]
131
+ - [Finding 2]
132
+ - [Finding 3]
133
+
134
+ ### Conflict Matrix
135
+ | Skill A | Skill B | Problem | Evidence | Recommended Owner |
136
+ |---------|---------|---------|----------|-------------------|
137
+ | ... | ... | ... | ... | ... |
138
+
139
+ ### Coverage Gaps
140
+ - [query family or sample]
141
+
142
+ ### Recommended Changes
143
+ 1. [Highest-priority change]
144
+ 2. [Second change]
145
+ 3. [Optional follow-up]
146
+
147
+ ### Confidence
148
+ [high / medium / low]
149
+ ```
@@ -38,7 +38,7 @@
38
38
  ],
39
39
  "PostToolUse": [
40
40
  {
41
- "matcher": "Read",
41
+ "matcher": "Read|Skill",
42
42
  "hooks": [
43
43
  {
44
44
  "type": "command",
@@ -32,7 +32,7 @@
32
32
  ],
33
33
  "PostToolUse": [
34
34
  {
35
- "matcher": "Read",
35
+ "matcher": "Read|Skill",
36
36
  "hooks": [
37
37
  {
38
38
  "type": "command",
@@ -0,0 +1,39 @@
1
+ # Interactive Configuration
2
+
3
+ Before running mutating workflows (evolve, evolve-body, eval generate, baseline), present
4
+ a pre-flight configuration prompt to the user. This gives them control over
5
+ execution mode, model selection, and key parameters.
6
+
7
+ ## Pre-Flight Pattern
8
+
9
+ Each mutating workflow has a **Pre-Flight Configuration** step. Follow this pattern:
10
+
11
+ 1. Present a brief summary of what the command will do
12
+ 2. Use the `AskUserQuestion` tool to present structured options (max 4 questions per call — split into multiple calls if needed). Mark recommended defaults in option text with `(recommended)`.
13
+ 3. Parse the user's selections from the tool response
14
+ 4. Show a confirmation summary of selected options before executing
15
+
16
+ **IMPORTANT:** Always use `AskUserQuestion` for pre-flight — never present options as inline numbered text. The tool provides a structured UI that is easier for users to interact with. If `AskUserQuestion` is not available, fall back to inline numbered options.
17
+
18
+ ## Model Tier Reference
19
+
20
+ When presenting model choices, use this table:
21
+
22
+ | Tier | Model | Speed | Cost | Quality | Best for |
23
+ |------|-------|-------|------|---------|----------|
24
+ | Fast | `haiku` | ~2s/call | $ | Good | Iteration loops, bulk validation |
25
+ | Balanced | `sonnet` | ~5s/call | $$ | Great | Single-pass proposals, gate checks |
26
+ | Best | `opus` | ~10s/call | $$$ | Excellent | High-stakes final validation |
27
+
28
+ ## Quick Path
29
+
30
+ If the user says "use defaults", "just do it", or similar — skip the pre-flight
31
+ and run with recommended defaults. The pre-flight is for users who want control,
32
+ not a mandatory gate.
33
+
34
+ ## Workflows That Skip Pre-Flight
35
+
36
+ These read-only or simple workflows run immediately without prompting:
37
+ `status`, `last`, `doctor`, `dashboard`, `watch`, `evolve rollback`,
38
+ `grade auto`, `ingest *`, `contribute`, `cron`, `eval composability`,
39
+ `eval unit-test`, `eval import`.