xtrm-tools 0.7.13 → 0.7.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.xtrm/config/hooks.json +10 -0
- package/.xtrm/hooks/specialists-agent-guard.mjs +76 -0
- package/.xtrm/registry.json +440 -412
- package/.xtrm/skills/default/releasing/SKILL.md +49 -45
- package/.xtrm/skills/default/releasing/scripts/xt-reports.ts +18 -0
- package/.xtrm/skills/default/session-close-report/SKILL.md +85 -17
- package/.xtrm/skills/default/specialists-creator/SKILL.md +117 -42
- package/.xtrm/skills/default/specialists-creator/scripts/audit-spec-uniformity.mjs +86 -0
- package/.xtrm/skills/default/specialists-creator/scripts/scaffold-specialist.ts +223 -0
- package/.xtrm/skills/default/specialists-creator/scripts/validate-specialist.ts +1 -1
- package/.xtrm/skills/default/update-specialists/SKILL.md +98 -392
- package/.xtrm/skills/default/using-nodes/SKILL.md +18 -102
- package/.xtrm/skills/default/using-script-specialists/SKILL.md +208 -0
- package/.xtrm/skills/default/using-specialists/SKILL.md +13 -0
- package/.xtrm/skills/default/using-specialists-v2/SKILL.md +105 -15
- package/.xtrm/skills/default/using-specialists-v3/SKILL.md +284 -0
- package/.xtrm/skills/default/using-specialists-v3/evals/evals.json +89 -0
- package/CHANGELOG.md +17 -0
- package/README.md +5 -1
- package/cli/dist/index.cjs +2991 -627
- package/cli/dist/index.cjs.map +1 -1
- package/cli/package.json +1 -1
- package/package.json +3 -2
- package/packages/pi-extensions/.serena/project.yml +11 -0
- package/packages/pi-extensions/package.json +1 -1
- package/scripts/patch-external-pi-tools.mjs +154 -0
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: releasing
|
|
3
3
|
description: >-
|
|
4
|
-
Cut a release
|
|
5
|
-
operator wants to publish a new tag (vX.Y.Z)
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
version: 1.
|
|
4
|
+
Cut a release with the canonical xt release prepare/publish flow. Use when the
|
|
5
|
+
operator wants to publish a new tag (vX.Y.Z). Prepare drafts CHANGELOG from xt
|
|
6
|
+
reports and performs deterministic release-file mutations; publish creates the
|
|
7
|
+
annotated tag, pushes commits/tags, and can create a GitHub release.
|
|
8
|
+
version: 1.2.0
|
|
9
9
|
---
|
|
10
10
|
|
|
11
11
|
# releasing
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
Canonical release publication via `xt release prepare` and `xt release publish`.
|
|
14
14
|
|
|
15
15
|
## When to use
|
|
16
16
|
|
|
@@ -18,73 +18,77 @@ The operator wants to cut a release. They say "release it", "ship vX.Y.Z", "cut
|
|
|
18
18
|
|
|
19
19
|
## How
|
|
20
20
|
|
|
21
|
-
1. Determine
|
|
21
|
+
1. Determine target version. Default is patch bump from most recent semver tag. Operator may specify `--minor`, `--major`, or explicit version.
|
|
22
22
|
|
|
23
|
-
2. Determine
|
|
23
|
+
2. Determine tag range. Default is `<latest-tag>..HEAD`. For backfills, operator names `--from` / `--to` explicitly.
|
|
24
24
|
|
|
25
|
-
3.
|
|
26
|
-
|
|
27
|
-
```
|
|
28
|
-
PROBLEM: Cut release vX.Y.Z covering <prev-tag>..HEAD.
|
|
29
|
-
SUCCESS: CHANGELOG.md updated with new section above prior release; package.json bumped; dist rebuilt; commit `release: vX.Y.Z` pushed with tag.
|
|
30
|
-
SCOPE: CHANGELOG.md, package.json, dist/. Synthesis input: xt reports under .xtrm/reports/ dated within <prev-tag-date>..HEAD.
|
|
31
|
-
NON_GOALS: No source/docs/config edits. No retroactive changes to prior release sections.
|
|
32
|
-
CONSTRAINTS: Keep-a-Changelog v1.0.0 format. One-line bullets. Default bucket Changed. Deprecated only for explicit sunsets.
|
|
33
|
-
VALIDATION: git diff --stat HEAD~1 HEAD shows only CHANGELOG.md, package.json, dist/.
|
|
34
|
-
OUTPUT: Final report with VERSION, COMMIT, TAG, PUSHED status.
|
|
35
|
-
GH_RELEASE: <true|false> # whether to also `gh release create`
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
4. Dispatch the specialist:
|
|
25
|
+
3. Prepare release files:
|
|
39
26
|
|
|
40
27
|
```bash
|
|
41
|
-
|
|
28
|
+
xt release prepare --patch
|
|
29
|
+
# or: xt release prepare --minor --from <tag> --to HEAD
|
|
42
30
|
```
|
|
43
31
|
|
|
44
|
-
|
|
32
|
+
`prepare` is the canonical path. It builds the xt report bundle, calls the specialists changelog drafting script (`sp script changelog-keeper`), updates release files, rebuilds dist, and enforces the release scope guard.
|
|
33
|
+
|
|
34
|
+
Current blocker: until specialists issue `unitAI-dnmcg` lands, `prepare` can fail with `interactive specialists are not allowed` because the changelog drafting specialist is not yet script-compatible. If that happens, do a manual prepare using the same scope rules and then continue with `xt release publish`.
|
|
45
35
|
|
|
46
|
-
|
|
36
|
+
4. Verify release diff before publishing.
|
|
47
37
|
|
|
48
38
|
```bash
|
|
49
39
|
git diff --stat HEAD~1 HEAD
|
|
40
|
+
git status --short
|
|
50
41
|
```
|
|
51
42
|
|
|
52
|
-
|
|
43
|
+
Release diff must be limited to release artifacts such as:
|
|
53
44
|
- `CHANGELOG.md`
|
|
54
|
-
-
|
|
55
|
-
- `
|
|
45
|
+
- package manifests / lockfile for version sync
|
|
46
|
+
- generated `cli/dist/**` or `dist/**`
|
|
56
47
|
|
|
57
|
-
|
|
48
|
+
5. Publish:
|
|
58
49
|
|
|
59
50
|
```bash
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
git push --force-with-lease # only if push already happened
|
|
51
|
+
xt release publish
|
|
52
|
+
# optional GitHub release:
|
|
53
|
+
xt release publish --gh-release
|
|
64
54
|
```
|
|
65
55
|
|
|
66
|
-
|
|
56
|
+
`publish` creates the annotated tag for the current package version, pushes commits and tags, and optionally creates the GitHub release.
|
|
67
57
|
|
|
68
|
-
6.
|
|
58
|
+
6. Confirm:
|
|
69
59
|
|
|
70
60
|
```bash
|
|
71
|
-
git tag --list 'v*' | tail -3
|
|
72
|
-
git log --oneline -1
|
|
61
|
+
git tag --list 'v*' | tail -3
|
|
62
|
+
git log --oneline -1
|
|
63
|
+
git status --short --branch
|
|
73
64
|
```
|
|
74
65
|
|
|
75
66
|
## Why this design
|
|
76
67
|
|
|
77
|
-
-
|
|
78
|
-
-
|
|
79
|
-
-
|
|
80
|
-
- xt
|
|
68
|
+
- `xt` owns deterministic release mutation: changelog insertion, version bump, build, scope guard, commit/tag/push.
|
|
69
|
+
- The specialist owns only changelog drafting from xt reports through a script-compatible, READ_ONLY surface.
|
|
70
|
+
- xt reports are synthesis input, not raw git log + bd query. Reports are pre-curated, signal-rich, written in user-facing language.
|
|
71
|
+
- `xt release publish` is intentionally separate so operators can inspect prepared release files before pushing the tag.
|
|
72
|
+
|
|
73
|
+
## Manual fallback while unitAI-dnmcg is open
|
|
74
|
+
|
|
75
|
+
If `xt release prepare` fails on the changelog script compatibility guard:
|
|
76
|
+
|
|
77
|
+
1. Draft the CHANGELOG section manually from `.xtrm/reports/` and recent commits.
|
|
78
|
+
2. Bump package versions and lockfile.
|
|
79
|
+
3. Run `npm run build`.
|
|
80
|
+
4. Commit with `release: vX.Y.Z`.
|
|
81
|
+
5. Run `xt release publish`.
|
|
82
|
+
|
|
83
|
+
Do not broaden the release diff beyond release artifacts.
|
|
81
84
|
|
|
82
85
|
## Parallel sessions
|
|
83
86
|
|
|
84
|
-
Each orchestrator runs this skill in its own session.
|
|
87
|
+
Each orchestrator runs this skill in its own session. Specialist commits + tags + pushes atomically. If two sessions try same version, first push wins; second sees remote tag conflict and aborts cleanly. Operator picks next version and retries.
|
|
85
88
|
|
|
86
89
|
## Don't
|
|
87
90
|
|
|
88
|
-
- Don't
|
|
89
|
-
- Don't
|
|
90
|
-
- Don't
|
|
91
|
+
- Don't call `sp release prepare` / `sp release publish` as the canonical path. They are deprecated aliases in specialists.
|
|
92
|
+
- Don't bypass `xt release publish` for tag/push unless the command itself is broken.
|
|
93
|
+
- Don't broaden release diffs with source/docs/config changes. File a separate bead for non-release work.
|
|
94
|
+
- Don't pre-stage unrelated files. The release scope guard should see a clean tree except allowed release artifacts.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
import { buildReportBundle, listXtReports } from '../../../../../cli/src/core/xt-reports.ts';
|
|
4
|
+
|
|
5
|
+
async function main() {
|
|
6
|
+
const since = process.argv[2];
|
|
7
|
+
const to = process.argv[3] ?? 'HEAD';
|
|
8
|
+
const capArg = process.argv[4];
|
|
9
|
+
const capBytes = capArg ? Number(capArg) : 50_000;
|
|
10
|
+
|
|
11
|
+
if (!since) throw new Error('Usage: xt-reports.ts <since> [to] [capBytes]');
|
|
12
|
+
|
|
13
|
+
const reports = listXtReports({ since, to, capBytes });
|
|
14
|
+
const bundle = buildReportBundle(reports, capBytes);
|
|
15
|
+
console.log(bundle.output);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (import.meta.main) await main();
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: session-close-report
|
|
3
3
|
description: |
|
|
4
|
-
Generate
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
Generate or update the structured technical handoff report at session close.
|
|
5
|
+
Prefer one same-day SSOT report: update the latest report for today when it
|
|
6
|
+
exists, otherwise run `xt report generate`, then fill every `<!-- FILL -->`
|
|
7
|
+
section from orchestrator context.
|
|
8
8
|
---
|
|
9
9
|
|
|
10
10
|
# session-close-report
|
|
@@ -15,9 +15,43 @@ Invoke this skill at the end of a productive session — after issues are closed
|
|
|
15
15
|
code is committed, but before final push. It produces the handoff report that
|
|
16
16
|
the next agent reads to start cold without losing context.
|
|
17
17
|
|
|
18
|
+
## Report identity rule
|
|
19
|
+
|
|
20
|
+
Prefer a single same-day SSOT handoff report.
|
|
21
|
+
|
|
22
|
+
Before generating anything, check existing reports:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
xt report list
|
|
26
|
+
ls -t .xtrm/reports/*.md 2>/dev/null | head
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Decision:
|
|
30
|
+
- If a report for today already exists, update the latest same-day report.
|
|
31
|
+
- If multiple orchestrators ran today, merge your context into that same report;
|
|
32
|
+
do not create a competing handoff unless the operator explicitly asks for a
|
|
33
|
+
separate report.
|
|
34
|
+
- If no suitable same-day report exists, run `xt report generate` and fill the
|
|
35
|
+
new skeleton.
|
|
36
|
+
|
|
37
|
+
When updating an existing report, preserve prior orchestrator content. Append,
|
|
38
|
+
merge, or revise sections so the file remains one coherent handoff package — do
|
|
39
|
+
not overwrite earlier waves, issue context, problems, or decisions unless they
|
|
40
|
+
are factually superseded.
|
|
41
|
+
|
|
18
42
|
## Workflow
|
|
19
43
|
|
|
20
|
-
### 1.
|
|
44
|
+
### 1. Select report: update existing or generate new
|
|
45
|
+
|
|
46
|
+
For same-day update:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
REPORT=$(ls -t .xtrm/reports/$(date +%F)-*.md 2>/dev/null | head -1)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
If `$REPORT` is non-empty, read and update it.
|
|
53
|
+
|
|
54
|
+
If no same-day report exists:
|
|
21
55
|
|
|
22
56
|
```bash
|
|
23
57
|
xt report generate
|
|
@@ -26,28 +60,42 @@ xt report generate
|
|
|
26
60
|
This collects data from git log, bd, .specialists/jobs/ and writes a skeleton
|
|
27
61
|
to `.xtrm/reports/<date>-<hash>.md` with YAML frontmatter and pre-filled tables.
|
|
28
62
|
|
|
29
|
-
### 2. Read the
|
|
63
|
+
### 2. Read the target report
|
|
64
|
+
|
|
65
|
+
Read the chosen report completely enough to understand existing content.
|
|
30
66
|
|
|
31
|
-
|
|
32
|
-
|
|
67
|
+
Skeleton reports have `<!-- FILL -->` markers in every section that needs your
|
|
68
|
+
input. Existing same-day reports may already be partially filled; update those
|
|
69
|
+
sections with the new session context and remove any now-stale placeholders.
|
|
33
70
|
|
|
34
|
-
### 3. Fill every section from your context
|
|
71
|
+
### 3. Fill or update every section from your context
|
|
35
72
|
|
|
36
73
|
You are the orchestrator. You have the full session context. The CLI only
|
|
37
74
|
collected raw data — you provide the meaning.
|
|
38
75
|
|
|
76
|
+
When updating an existing same-day report:
|
|
77
|
+
- Add new waves, issues, commits, problems, and decisions without duplicating
|
|
78
|
+
existing rows.
|
|
79
|
+
- Update summary/frontmatter counts to cover the whole same-day handoff, not
|
|
80
|
+
just your sub-session.
|
|
81
|
+
- Reconcile stale “open issues” entries if you closed them later in the day.
|
|
82
|
+
- Keep one chronological/coherent narrative instead of separate mini-reports.
|
|
83
|
+
|
|
39
84
|
**For each section, here is exactly what to write:**
|
|
40
85
|
|
|
41
86
|
#### Summary
|
|
42
87
|
One dense paragraph. What was accomplished, key decisions made, discoveries,
|
|
43
88
|
outcomes. Technical prose — no filler, no "in this session we...". Lead with
|
|
44
|
-
the most important result.
|
|
89
|
+
the most important result. For same-day updates, summarize the whole day’s SSOT
|
|
90
|
+
state, including earlier orchestrators and your additions.
|
|
45
91
|
|
|
46
92
|
#### Issues Closed
|
|
47
93
|
The skeleton has a flat table. Restructure it:
|
|
48
94
|
- Group by category: bugs discovered, backlog items, cleanup/closures, features
|
|
49
95
|
- If specialists were used, add Specialist and Wave columns
|
|
50
96
|
- Expand terse close reasons into useful context
|
|
97
|
+
- When updating an existing report, add newly closed issues and revise stale open
|
|
98
|
+
entries that are now closed
|
|
51
99
|
|
|
52
100
|
#### Issues Filed
|
|
53
101
|
Add every issue you created this session. The **Why** column is mandatory —
|
|
@@ -61,18 +109,22 @@ If specialists were dispatched:
|
|
|
61
109
|
- Add a Problems sub-table for any failed/stalled dispatches
|
|
62
110
|
- Update `specialist_dispatches` and `models_used` in frontmatter
|
|
63
111
|
|
|
64
|
-
If no specialists were used
|
|
112
|
+
If no specialists were used and the report has no prior specialist dispatches,
|
|
113
|
+
delete this section. If prior dispatches exist, keep and extend them.
|
|
65
114
|
|
|
66
115
|
#### Problems Encountered
|
|
67
116
|
Every problem hit during the session. Root Cause and Resolution columns are
|
|
68
117
|
mandatory. Include: bugs discovered, wrong approaches tried, blockers hit,
|
|
69
|
-
tooling failures. If no problems
|
|
118
|
+
tooling failures. If no problems exist anywhere in the same-day report, delete
|
|
119
|
+
this section entirely.
|
|
70
120
|
|
|
71
121
|
#### Code Changes
|
|
72
122
|
The skeleton lists files. Add narrative:
|
|
73
123
|
- Explain key modifications (not every file — focus on the important ones)
|
|
74
124
|
- Group logically if many changes (e.g., "CLI commands", "Hook changes")
|
|
75
125
|
- Note architectural decisions embedded in the changes
|
|
126
|
+
- For same-day updates, include changes from all orchestrators that contributed
|
|
127
|
+
to the final pushed stack
|
|
76
128
|
|
|
77
129
|
#### Documentation Updates
|
|
78
130
|
List doc changes, skill updates, memory saves, CHANGELOG entries.
|
|
@@ -84,6 +136,8 @@ This is the most valuable handoff section. For each open issue:
|
|
|
84
136
|
blockers discovered, suggested approach, files to look at, gotchas.
|
|
85
137
|
- Group into "Ready for next session" and "Backlog" subsections
|
|
86
138
|
- Put the most actionable items first
|
|
139
|
+
- If an issue listed earlier in the day was closed later, remove it from open
|
|
140
|
+
issues and move it to Issues Closed with closure context
|
|
87
141
|
|
|
88
142
|
#### Memories Saved
|
|
89
143
|
List all `bd remember` calls made this session. If the skeleton missed any,
|
|
@@ -96,36 +150,50 @@ Ordered list of 1-4 items with rationale for each. Based on:
|
|
|
96
150
|
- Urgency of discovered issues
|
|
97
151
|
- Blocked items about to unblock
|
|
98
152
|
|
|
153
|
+
For same-day updates, make this the next priority from the final state of the
|
|
154
|
+
whole day, not from an earlier partial state.
|
|
155
|
+
|
|
99
156
|
### 4. Update frontmatter
|
|
100
157
|
|
|
101
|
-
Ensure all frontmatter counts are accurate after filling:
|
|
102
|
-
- `issues_filed` — actual count
|
|
103
|
-
- `specialist_dispatches` — actual count
|
|
104
|
-
- `models_used` — list of models that did work
|
|
158
|
+
Ensure all frontmatter counts are accurate after filling/updating:
|
|
159
|
+
- `issues_filed` — actual count represented in the report
|
|
160
|
+
- `specialist_dispatches` — actual count represented in the report
|
|
161
|
+
- `models_used` — list of models that did work represented in the report
|
|
162
|
+
- `issues_closed` — actual closed issue count represented in the report
|
|
163
|
+
- `commits` — commit count represented in the report, if known
|
|
105
164
|
|
|
106
165
|
### 5. Commit the report
|
|
107
166
|
|
|
167
|
+
Reports are versioned handoff artifacts and should be tracked.
|
|
168
|
+
|
|
108
169
|
```bash
|
|
109
170
|
git add .xtrm/reports/
|
|
110
171
|
git commit -m "session report: <date>"
|
|
111
172
|
```
|
|
112
173
|
|
|
174
|
+
If you updated an existing same-day report after an earlier report commit, commit
|
|
175
|
+
that update with the same message style or fold it into the current final commit
|
|
176
|
+
before push.
|
|
177
|
+
|
|
113
178
|
## Quality bar
|
|
114
179
|
|
|
115
180
|
The reference is `~/projects/specialists/.xtrm/reports/2026-03-30-orchestration-session.md`.
|
|
116
181
|
Every report must match that level of detail. Specifically:
|
|
117
182
|
|
|
118
183
|
- No empty `<!-- FILL -->` markers left in the final output
|
|
184
|
+
- No duplicate same-day reports unless explicitly requested by the operator
|
|
119
185
|
- Every closed issue has context, not just an ID
|
|
120
186
|
- Every open issue has actionable handoff suggestions
|
|
121
187
|
- Problems section captures root causes, not just symptoms
|
|
122
188
|
- Summary is a dense technical paragraph, not a list of bullet points
|
|
189
|
+
- Same-day updates preserve earlier orchestrator context while making the final
|
|
190
|
+
file read as one SSOT handoff package
|
|
123
191
|
|
|
124
192
|
## CLI commands
|
|
125
193
|
|
|
126
194
|
| Command | Purpose |
|
|
127
195
|
|---------|---------|
|
|
128
|
-
| `xt report generate` | Collect data, write skeleton |
|
|
196
|
+
| `xt report generate` | Collect data, write skeleton when no suitable report exists |
|
|
129
197
|
| `xt report show [target]` | Display latest or specified report |
|
|
130
198
|
| `xt report list` | List all reports with frontmatter summary |
|
|
131
199
|
| `xt report diff <a> <b>` | Compare two reports |
|
|
@@ -5,7 +5,7 @@ description: >
|
|
|
5
5
|
agent through writing a valid `.specialist.json`, choosing supported models,
|
|
6
6
|
validating against the schema, and avoiding common specialist authoring
|
|
7
7
|
mistakes.
|
|
8
|
-
version: 1.
|
|
8
|
+
version: 1.2
|
|
9
9
|
synced_at: 236ca5e6
|
|
10
10
|
---
|
|
11
11
|
|
|
@@ -40,6 +40,7 @@ Model tiers:
|
|
|
40
40
|
Rules:
|
|
41
41
|
- Always pick the **highest version** in a family (`claude-sonnet-4-6` not `4-5`, `gemini-3.1-pro-preview` not `gemini-2.5-pro`)
|
|
42
42
|
- `model` and `fallback_model` must be **different providers**
|
|
43
|
+
- If a specialist needs a longer fallback chain, keep first fallback in `fallback_model` and let runtime supply any extra retry tier.
|
|
43
44
|
- Never write a model string you have not pinged in this session
|
|
44
45
|
|
|
45
46
|
---
|
|
@@ -162,6 +163,10 @@ specialists models # confirm assignments look balanced
|
|
|
162
163
|
|
|
163
164
|
---
|
|
164
165
|
|
|
166
|
+
## Canonical references
|
|
167
|
+
|
|
168
|
+
Reference any canonical skill or rule by name; runtime finds it.
|
|
169
|
+
|
|
165
170
|
## Quick Start: Scaffold + `sp edit`
|
|
166
171
|
|
|
167
172
|
```bash
|
|
@@ -169,7 +174,7 @@ specialists models # confirm assignments look balanced
|
|
|
169
174
|
node config/skills/specialists-creator/scripts/scaffold-specialist.ts config/specialists/my-specialist.specialist.json
|
|
170
175
|
|
|
171
176
|
# 2. Apply a preset for common model/thinking defaults (optional but preferred)
|
|
172
|
-
sp edit my-specialist --preset
|
|
177
|
+
sp edit my-specialist --preset medium
|
|
173
178
|
|
|
174
179
|
# 3. Set individual fields via dot.path (primary mutation workflow)
|
|
175
180
|
sp edit my-specialist specialist.metadata.name my-specialist
|
|
@@ -177,6 +182,8 @@ sp edit my-specialist specialist.metadata.version 1.0.0
|
|
|
177
182
|
sp edit my-specialist specialist.execution.model anthropic/claude-sonnet-4-6
|
|
178
183
|
sp edit my-specialist specialist.execution.fallback_model google-gemini-cli/gemini-3.1-pro-preview
|
|
179
184
|
sp edit my-specialist specialist.execution.permission_required READ_ONLY
|
|
185
|
+
sp edit my-specialist specialist.execution.extensions.serena false
|
|
186
|
+
sp edit my-specialist specialist.execution.extensions.gitnexus false
|
|
180
187
|
|
|
181
188
|
# 4. Use --file only for multiline prompt fields
|
|
182
189
|
sp edit my-specialist specialist.prompt.system --file .tmp/system.prompt.txt
|
|
@@ -186,7 +193,7 @@ sp edit my-specialist specialist.prompt.task_template --file .tmp/task-template.
|
|
|
186
193
|
sp view my-specialist
|
|
187
194
|
|
|
188
195
|
# 6. Validate schema
|
|
189
|
-
bun skills/
|
|
196
|
+
bun config/skills/specialists-creator/scripts/validate-specialist.ts config/specialists/my-specialist.specialist.json
|
|
190
197
|
```
|
|
191
198
|
|
|
192
199
|
---
|
|
@@ -199,19 +206,47 @@ bun skills/specialist-author/scripts/validate-specialist.ts config/specialists/m
|
|
|
199
206
|
|-------|------|----------|-------|
|
|
200
207
|
| `name` | string | yes | kebab-case: `[a-z][a-z0-9-]*` |
|
|
201
208
|
| `version` | string | yes | semver: `1.0.0` |
|
|
202
|
-
| `description` | string | yes |
|
|
209
|
+
| `description` | string | yes | Routing summary surfaced by `specialists list`; see Description writing below |
|
|
203
210
|
| `category` | string | yes | Free text (e.g. `workflow`, `analysis`, `codegen`) |
|
|
204
211
|
| `author` | string | no | Optional |
|
|
205
212
|
| `created` | string | no | Optional date |
|
|
206
213
|
| `updated` | string | no | Optional date, quote it: `"2026-03-22"` |
|
|
207
214
|
| `tags` | string[] | no | Optional list |
|
|
208
215
|
|
|
216
|
+
|
|
217
|
+
### Description writing for `specialists list`
|
|
218
|
+
|
|
219
|
+
`specialist.metadata.description` is the routing surface that orchestrators see in `specialists list`. Write it as an operational role definition, not marketing copy. Keep the first clause distinctive because list output may truncate.
|
|
220
|
+
|
|
221
|
+
A good description answers, in this order:
|
|
222
|
+
|
|
223
|
+
1. **Choose when** — the task shape that should route here.
|
|
224
|
+
2. **Do not choose when** — adjacent roles that should win instead.
|
|
225
|
+
3. **Distinctive capability** — what this specialist does that others do not.
|
|
226
|
+
4. **Permission/risk note** — READ_ONLY/LOW/MEDIUM/HIGH implication when it affects orchestration.
|
|
227
|
+
|
|
228
|
+
Pattern:
|
|
229
|
+
|
|
230
|
+
```text
|
|
231
|
+
<role noun>. Use for <specific task shape>. Not for <near misses>; use <better roles>. <permission/workflow distinction>.
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
Examples:
|
|
235
|
+
|
|
236
|
+
```text
|
|
237
|
+
Scoped implementation only. Use when requirements, files/symbols, constraints, and validation are clear. Not diagnosis, planning, review, tests, release, or research. HIGH worktree.
|
|
238
|
+
|
|
239
|
+
Debug symptoms/errors/regressions first. Use when cause is unknown or tests fail unexpectedly; traces, fixes targeted code, and verifies. HIGH keep-alive.
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Avoid vague descriptions like "general purpose assistant" or "helps with code". Those cause orchestrators to overuse familiar specialists instead of routing to debugger, test-runner, researcher, sync-docs, or other sharper roles.
|
|
243
|
+
|
|
209
244
|
### `specialist.execution` (required)
|
|
210
245
|
|
|
211
246
|
| Field | Type | Default | Notes |
|
|
212
247
|
|-------|------|---------|-------|
|
|
213
248
|
| `model` | string | — | required — ping before using |
|
|
214
|
-
| `fallback_model` | string | — |
|
|
249
|
+
| `fallback_model` | string | — | first fallback only; runtime may append more tiers |
|
|
215
250
|
| `mode` | enum | `auto` | `tool` \| `skill` \| `auto` |
|
|
216
251
|
| `timeout_ms` | number | `120000` | ms |
|
|
217
252
|
| `stall_timeout_ms` | number | — | kill if no event for N ms |
|
|
@@ -220,6 +255,8 @@ bun skills/specialist-author/scripts/validate-specialist.ts config/specialists/m
|
|
|
220
255
|
| `output_type` | enum | `custom` | `codegen` \| `analysis` \| `review` \| `synthesis` \| `orchestration` \| `workflow` \| `research` \| `custom` |
|
|
221
256
|
| `permission_required` | enum | `READ_ONLY` | see tier table below |
|
|
222
257
|
| `thinking_level` | enum | — | `off` \| `minimal` \| `low` \| `medium` \| `high` \| `xhigh` |
|
|
258
|
+
| `extensions.serena` | boolean | `true` | set `false` to opt out of Serena extension injection for this specialist |
|
|
259
|
+
| `extensions.gitnexus` | boolean | `true` | set `false` to opt out of GitNexus extension injection for this specialist |
|
|
223
260
|
|
|
224
261
|
**When to use `execution.interactive`**
|
|
225
262
|
|
|
@@ -230,17 +267,81 @@ bun skills/specialist-author/scripts/validate-specialist.ts config/specialists/m
|
|
|
230
267
|
- MCP `start_specialist`: `keep_alive` enables, `no_keep_alive` disables.
|
|
231
268
|
- Effective precedence: explicit disable (`--no-keep-alive` / `no_keep_alive`) → explicit enable (`--keep-alive` / `keep_alive`) → `execution.interactive` → one-shot default.
|
|
232
269
|
|
|
233
|
-
**Permission tiers** — controls
|
|
270
|
+
**Permission tiers** — controls the *native* pi tools the specialist gets. The full resolved tool set also includes catalog-defined GitNexus and Serena tools per tier; see [docs/manifest.md](../../../docs/manifest.md) for the complete picture.
|
|
271
|
+
|
|
272
|
+
| Level | Native tools (cumulative) | Use when |
|
|
273
|
+
|-------|---------------------------|----------|
|
|
274
|
+
| `READ_ONLY` | `read, grep, find, ls` | Read-only analysis, no bash |
|
|
275
|
+
| `LOW` | `+ bash` | Inspect/run commands, no file edits |
|
|
276
|
+
| `MEDIUM` | `+ edit` | Can edit existing files |
|
|
277
|
+
| `HIGH` | `+ write` | Full access — can create new files |
|
|
234
278
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
| `HIGH` | `+write` | Full access — can create new files |
|
|
279
|
+
After choosing a tier, verify the resolved tool list before dispatching:
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
sp config show <name> --resolved
|
|
283
|
+
```
|
|
241
284
|
|
|
242
285
|
**Common pitfall:** `READ_WRITE` is **not** a valid value — use `LOW` or higher.
|
|
243
286
|
|
|
287
|
+
### Per-specialist `permissions[<TIER>]` override (rarely needed)
|
|
288
|
+
|
|
289
|
+
Most specialists use the catalog default deny baseline. **Do not declare an override unless this specialist's policy genuinely diverges from its tier.** When you do override, remember the specialist block replaces catalog defaults for that tier.
|
|
290
|
+
|
|
291
|
+
If divergence is real, add a top-level `permissions` block (sibling to `execution`):
|
|
292
|
+
|
|
293
|
+
```jsonc
|
|
294
|
+
{
|
|
295
|
+
"specialist": {
|
|
296
|
+
"execution": { "permission_required": "READ_ONLY" },
|
|
297
|
+
"permissions": {
|
|
298
|
+
"READ_ONLY": {
|
|
299
|
+
"denied_natives_when_extension": ["grep", "find", "ls"],
|
|
300
|
+
"denied_natives_mode": "hard"
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
| Field | Type | Default | Effect |
|
|
308
|
+
|-------|------|---------|--------|
|
|
309
|
+
| `denied_natives_when_extension` | `string[]` | `[]` | Native tools to deny only when a replacement extension is healthy. Catalog defaults apply first; specialist override replaces them for that tier. |
|
|
310
|
+
| `denied_natives_mode` | `"soft"` \| `"hard"` | `"soft"` | `soft` keeps the tool with a preference hint; `hard` removes it (with auto-restore if the extension degrades) |
|
|
311
|
+
|
|
312
|
+
The override block can only *deny* natives — it cannot add new tools beyond the catalog tier. To add tools, change the tier or update the catalog file.
|
|
313
|
+
|
|
314
|
+
**Decision rule when authoring:**
|
|
315
|
+
1. Pick the lowest tier that satisfies the specialist's actual capability needs.
|
|
316
|
+
2. Run `sp config show <name> --resolved` and inspect the `--tools` line.
|
|
317
|
+
3. If the tools are right, you're done — no override needed.
|
|
318
|
+
4. If a native tool is genuinely worse than an extension equivalent for this specialist's task, declare a soft-deny first to observe behavior, then promote to hard-deny once you trust it.
|
|
319
|
+
|
|
320
|
+
See [docs/manifest.md](../../../docs/manifest.md) for full deny-mode semantics, extension health gating, and the canonical explorer example.
|
|
321
|
+
|
|
322
|
+
**Per-specialist extension opt-out**
|
|
323
|
+
|
|
324
|
+
Use `execution.extensions` only when this specialist must suppress default extension injection.
|
|
325
|
+
Both flags default to `true`, so omit this block unless opt-out is required.
|
|
326
|
+
|
|
327
|
+
```json
|
|
328
|
+
{
|
|
329
|
+
"specialist": {
|
|
330
|
+
"execution": {
|
|
331
|
+
"extensions": {
|
|
332
|
+
"serena": false,
|
|
333
|
+
"gitnexus": false
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
Typical use cases:
|
|
341
|
+
- `serena: false` for specialists that must avoid Serena tool/LSP injection
|
|
342
|
+
- `gitnexus: false` for specialists that should not receive GitNexus graph tooling
|
|
343
|
+
- set both `false` for constrained runs that need clean extension surface
|
|
344
|
+
|
|
244
345
|
### `specialist.prompt` (required)
|
|
245
346
|
|
|
246
347
|
| Field | Type | Required | Notes |
|
|
@@ -356,8 +457,6 @@ planner — epic result:
|
|
|
356
457
|
|
|
357
458
|
`run` accepts either a **file path** (`./scripts/foo.sh`, `~/scripts/foo.sh`) or a **shell command** (`bd ready`, `git status`). Pre-run validation checks that file paths exist and shell commands are on `PATH`. Shebang typos (e.g. `pytho` instead of `python`) are caught and reported as errors before the session starts.
|
|
358
459
|
|
|
359
|
-
`path` is accepted as a deprecated alias for `run`.
|
|
360
|
-
|
|
361
460
|
### `specialist.capabilities` (optional)
|
|
362
461
|
|
|
363
462
|
Informational declarations used by pre-run validation and future tooling (e.g. `specialists doctor`).
|
|
@@ -383,27 +482,6 @@ Informational declarations used by pre-run validation and future tooling (e.g. `
|
|
|
383
482
|
|
|
384
483
|
Writes the final session output to this file path after the session completes. Relative to the working directory.
|
|
385
484
|
|
|
386
|
-
### `specialist.communication` (optional)
|
|
387
|
-
|
|
388
|
-
```json
|
|
389
|
-
{
|
|
390
|
-
"communication": {
|
|
391
|
-
"next_specialists": "planner"
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
```
|
|
395
|
-
|
|
396
|
-
Or as an array:
|
|
397
|
-
```json
|
|
398
|
-
{
|
|
399
|
-
"communication": {
|
|
400
|
-
"next_specialists": ["planner", "test-runner"]
|
|
401
|
-
}
|
|
402
|
-
}
|
|
403
|
-
```
|
|
404
|
-
|
|
405
|
-
`next_specialists` declares which specialist(s) should receive this specialist's output as `$previous_result`. Chaining is executed by the caller (e.g. `run_parallel` pipeline) — this field is declarative metadata.
|
|
406
|
-
|
|
407
485
|
### `specialist.validation` (optional)
|
|
408
486
|
|
|
409
487
|
Drives the staleness detection shown in `specialists status` and `specialists list`.
|
|
@@ -480,7 +558,7 @@ Files listed under `skills.paths` are read and appended to the system prompt at
|
|
|
480
558
|
{
|
|
481
559
|
"skills": {
|
|
482
560
|
"paths": [
|
|
483
|
-
"skills/
|
|
561
|
+
".xtrm/skills/active/specialists-creator/SKILL.md",
|
|
484
562
|
".claude/agents.md"
|
|
485
563
|
]
|
|
486
564
|
}
|
|
@@ -576,9 +654,6 @@ Scripts run **locally** (not inside the agent session):
|
|
|
576
654
|
"required_tools": ["bash", "read"],
|
|
577
655
|
"external_commands": ["git"]
|
|
578
656
|
},
|
|
579
|
-
"communication": {
|
|
580
|
-
"next_specialists": ["sync-docs"]
|
|
581
|
-
},
|
|
582
657
|
"output_file": ".specialists/review.md",
|
|
583
658
|
"beads_integration": "auto"
|
|
584
659
|
}
|
|
@@ -681,7 +756,7 @@ pi --model <provider>/<fallback-model-id> --print "ping" # must return "pong"
|
|
|
681
756
|
node config/skills/specialists-creator/scripts/scaffold-specialist.ts config/specialists/my-specialist.specialist.json
|
|
682
757
|
|
|
683
758
|
# 3. Mutate with sp edit (dot.path + presets)
|
|
684
|
-
sp edit my-specialist --preset
|
|
759
|
+
sp edit my-specialist --preset medium
|
|
685
760
|
sp edit my-specialist specialist.execution.model <provider>/<primary-model-id>
|
|
686
761
|
sp edit my-specialist specialist.execution.fallback_model <provider>/<fallback-model-id>
|
|
687
762
|
|
|
@@ -693,7 +768,7 @@ sp edit my-specialist specialist.prompt.task_template --file .tmp/task-template.
|
|
|
693
768
|
sp view my-specialist
|
|
694
769
|
|
|
695
770
|
# 6. Validate schema with the bundled helper
|
|
696
|
-
bun skills/
|
|
771
|
+
bun config/skills/specialists-creator/scripts/validate-specialist.ts config/specialists/my-specialist.specialist.json
|
|
697
772
|
|
|
698
773
|
# 7. List to confirm discovery
|
|
699
774
|
specialists list
|
|
@@ -702,4 +777,4 @@ specialists list
|
|
|
702
777
|
specialists run my-specialist --prompt "ping" --no-beads
|
|
703
778
|
```
|
|
704
779
|
|
|
705
|
-
If you need the underlying implementation, read `skills/
|
|
780
|
+
If you need the underlying implementation, read `config/skills/specialists-creator/scripts/validate-specialist.ts`. It is a thin Bun/TypeScript wrapper over `parseSpecialist()` from `src/specialist/schema.ts`, which keeps the helper cross-platform for Windows, macOS, and Linux.
|