@jaggerxtrm/specialists 3.10.0 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/README.md +3 -0
  2. package/config/hooks/specialists-session-start.mjs +33 -1
  3. package/config/mandatory-rules/changelog-conventions.md +21 -0
  4. package/config/mandatory-rules/changelog-keeper-scope.md +50 -0
  5. package/config/mandatory-rules/gitnexus-required.md +6 -1
  6. package/config/mandatory-rules/sync-docs-scope-discipline.md +40 -0
  7. package/config/skills/releasing/SKILL.md +82 -0
  8. package/config/skills/specialists-creator/SKILL.md +84 -10
  9. package/config/skills/specialists-creator/scripts/validate-specialist.ts +1 -1
  10. package/config/skills/update-specialists/SKILL.md +41 -7
  11. package/config/skills/using-kpi/SKILL.md +150 -0
  12. package/config/skills/using-script-specialists/SKILL.md +208 -0
  13. package/config/skills/using-specialists-v2/SKILL.md +162 -28
  14. package/config/skills/using-specialists-v3/SKILL.md +284 -0
  15. package/config/skills/using-specialists-v3/evals/evals.json +89 -0
  16. package/config/specialists/changelog-drafter.specialist.json +62 -0
  17. package/config/specialists/changelog-keeper.specialist.json +79 -0
  18. package/config/specialists/code-sanity.specialist.json +106 -0
  19. package/config/specialists/debugger.specialist.json +4 -4
  20. package/config/specialists/executor.specialist.json +4 -4
  21. package/config/specialists/explorer.specialist.json +14 -4
  22. package/config/specialists/memory-processor.specialist.json +4 -4
  23. package/config/specialists/node-coordinator.specialist.json +3 -3
  24. package/config/specialists/overthinker.specialist.json +3 -3
  25. package/config/specialists/planner.specialist.json +4 -4
  26. package/config/specialists/researcher.specialist.json +3 -3
  27. package/config/specialists/reviewer.specialist.json +4 -4
  28. package/config/specialists/security-auditor.specialist.json +68 -0
  29. package/config/specialists/specialists-creator.specialist.json +6 -5
  30. package/config/specialists/sync-docs.specialist.json +15 -18
  31. package/config/specialists/test-runner.specialist.json +3 -3
  32. package/config/specialists/xt-merge.specialist.json +4 -4
  33. package/dist/index.js +3323 -1004
  34. package/dist/lib.js +480 -135
  35. package/dist/types/cli/clean.d.ts.map +1 -1
  36. package/dist/types/cli/config.d.ts.map +1 -1
  37. package/dist/types/cli/db.d.ts.map +1 -1
  38. package/dist/types/cli/doctor.d.ts.map +1 -1
  39. package/dist/types/cli/feed.d.ts.map +1 -1
  40. package/dist/types/cli/help.d.ts.map +1 -1
  41. package/dist/types/cli/init.d.ts.map +1 -1
  42. package/dist/types/cli/list.d.ts +4 -0
  43. package/dist/types/cli/list.d.ts.map +1 -1
  44. package/dist/types/cli/merge.d.ts +4 -2
  45. package/dist/types/cli/merge.d.ts.map +1 -1
  46. package/dist/types/cli/node.d.ts.map +1 -1
  47. package/dist/types/cli/prune-stale-defaults.d.ts +2 -0
  48. package/dist/types/cli/prune-stale-defaults.d.ts.map +1 -0
  49. package/dist/types/cli/ps.d.ts.map +1 -1
  50. package/dist/types/cli/result.d.ts.map +1 -1
  51. package/dist/types/cli/run.d.ts.map +1 -1
  52. package/dist/types/cli/script.d.ts.map +1 -1
  53. package/dist/types/cli/serve-hot-reload.d.ts +13 -0
  54. package/dist/types/cli/serve-hot-reload.d.ts.map +1 -0
  55. package/dist/types/cli/serve.d.ts +28 -0
  56. package/dist/types/cli/serve.d.ts.map +1 -1
  57. package/dist/types/cli/status.d.ts.map +1 -1
  58. package/dist/types/cli/stop.d.ts.map +1 -1
  59. package/dist/types/cli/version-check.d.ts +17 -0
  60. package/dist/types/cli/version-check.d.ts.map +1 -0
  61. package/dist/types/index.d.ts +1 -1
  62. package/dist/types/pi/session.d.ts +10 -0
  63. package/dist/types/pi/session.d.ts.map +1 -1
  64. package/dist/types/specialist/canonical-asset-resolver.d.ts +6 -0
  65. package/dist/types/specialist/canonical-asset-resolver.d.ts.map +1 -0
  66. package/dist/types/specialist/drift-detector.d.ts +39 -0
  67. package/dist/types/specialist/drift-detector.d.ts.map +1 -0
  68. package/dist/types/specialist/epic-lifecycle.d.ts.map +1 -1
  69. package/dist/types/specialist/epic-readiness.d.ts.map +1 -1
  70. package/dist/types/specialist/epic-reconciler.d.ts.map +1 -1
  71. package/dist/types/specialist/loader.d.ts +2 -1
  72. package/dist/types/specialist/loader.d.ts.map +1 -1
  73. package/dist/types/specialist/mandatory-rules.d.ts.map +1 -1
  74. package/dist/types/specialist/manifest-resolver.d.ts +55 -0
  75. package/dist/types/specialist/manifest-resolver.d.ts.map +1 -0
  76. package/dist/types/specialist/node-contract.d.ts +2 -2
  77. package/dist/types/specialist/observability-sqlite.d.ts +43 -0
  78. package/dist/types/specialist/observability-sqlite.d.ts.map +1 -1
  79. package/dist/types/specialist/payload-measure.d.ts +19 -0
  80. package/dist/types/specialist/payload-measure.d.ts.map +1 -0
  81. package/dist/types/specialist/porcelain-parser.d.ts +2 -0
  82. package/dist/types/specialist/porcelain-parser.d.ts.map +1 -0
  83. package/dist/types/specialist/resolution-diagnostics.d.ts +36 -0
  84. package/dist/types/specialist/resolution-diagnostics.d.ts.map +1 -0
  85. package/dist/types/specialist/runner.d.ts +8 -0
  86. package/dist/types/specialist/runner.d.ts.map +1 -1
  87. package/dist/types/specialist/schema.d.ts +27 -0
  88. package/dist/types/specialist/schema.d.ts.map +1 -1
  89. package/dist/types/specialist/script-runner.d.ts +44 -1
  90. package/dist/types/specialist/script-runner.d.ts.map +1 -1
  91. package/dist/types/specialist/supervisor.d.ts +4 -0
  92. package/dist/types/specialist/supervisor.d.ts.map +1 -1
  93. package/dist/types/specialist/timeline-events.d.ts +29 -1
  94. package/dist/types/specialist/timeline-events.d.ts.map +1 -1
  95. package/dist/types/specialist/timeline-query.d.ts.map +1 -1
  96. package/dist/types/specialist/tool-catalog.d.ts +126 -0
  97. package/dist/types/specialist/tool-catalog.d.ts.map +1 -0
  98. package/dist/types/tools/specialist/feed_specialist.tool.d.ts +2 -2
  99. package/dist/types/tools/specialist/use_specialist.tool.d.ts.map +1 -1
  100. package/package.json +1 -1
package/README.md CHANGED
@@ -79,9 +79,12 @@ specialists doctor
79
79
  | Need | Doc |
80
80
  |---|---|
81
81
  | Install and bootstrap a project | [docs/bootstrap.md](docs/bootstrap.md) |
82
+ | Release notes and version history | [CHANGELOG.md](CHANGELOG.md) |
83
+ | Changelog drafting specialist | [config/specialists/changelog-keeper.specialist.json](config/specialists/changelog-keeper.specialist.json) |
82
84
  | Run a script-class specialist over HTTP (`sp serve`) — overview & contract | [docs/specialists-service.md](docs/specialists-service.md) |
83
85
  | Install `sp serve` in another project (sidecar Docker / Podman) | [docs/specialists-service-install.md](docs/specialists-service-install.md) |
84
86
  | Build & publish the specialists-service image | [docs/release-image.md](docs/release-image.md) |
87
+ | Release flow (skill + specialist) | [config/skills/releasing/SKILL.md](config/skills/releasing/SKILL.md) |
85
88
  | Bead-first workflow and semantics | [docs/workflow.md](docs/workflow.md) |
86
89
  | CLI commands and flags | [docs/cli-reference.md](docs/cli-reference.md) |
87
90
  | Background jobs, feed, result, stop | [docs/background-jobs.md](docs/background-jobs.md) |
@@ -9,7 +9,8 @@
9
9
  // Hook type: SessionStart
10
10
 
11
11
  import { existsSync, readdirSync, readFileSync } from 'node:fs';
12
- import { join } from 'node:path';
12
+ import { dirname, join } from 'node:path';
13
+ import { fileURLToPath } from 'node:url';
13
14
  import { homedir } from 'node:os';
14
15
 
15
16
  const cwd = process.env.CLAUDE_PROJECT_DIR ?? process.cwd();
@@ -17,6 +18,25 @@ const HOME = homedir();
17
18
  const jobsDir = join(cwd, '.specialists', 'jobs');
18
19
  const lines = [];
19
20
 
21
+ // Resolve specialists package version for hot-tips header.
22
+ function readSpecialistsVersion() {
23
+ // Walk up from this hook's location looking for package.json with name=@jaggerxtrm/specialists or name=specialists.
24
+ let dir = dirname(fileURLToPath(import.meta.url));
25
+ for (let i = 0; i < 8; i++) {
26
+ const pkg = join(dir, 'package.json');
27
+ if (existsSync(pkg)) {
28
+ try {
29
+ const j = JSON.parse(readFileSync(pkg, 'utf-8'));
30
+ if (j?.name && j.name.includes('specialists')) return j.version ?? 'unknown';
31
+ } catch { /* skip */ }
32
+ }
33
+ const parent = dirname(dir);
34
+ if (parent === dir) break;
35
+ dir = parent;
36
+ }
37
+ return 'unknown';
38
+ }
39
+
20
40
  // ── 1. Active background jobs ──────────────────────────────────────────────
21
41
  if (existsSync(jobsDir)) {
22
42
  let entries = [];
@@ -95,6 +115,18 @@ lines.push('specialists doctor # troubleshoot is
95
115
  lines.push('```');
96
116
  lines.push('');
97
117
  lines.push('MCP tools: use_specialist (foreground only)');
118
+ lines.push('');
119
+
120
+ // ── 4. Hot tips (version-pinned, current sp release) ───────────────────────
121
+ const spVersion = readSpecialistsVersion();
122
+ lines.push(`## Specialists — Hot Tips (sp v${spVersion})`);
123
+ lines.push('');
124
+ lines.push('- `--bead` on edit-capable specialists auto-provisions worktree');
125
+ lines.push('- Reviewer enters with `--job <exec-job>`; `--worktree`/`--job` exclusive');
126
+ lines.push('- `sp epic merge <epic>` for epic chains; `sp merge <chain>` for standalone');
127
+ lines.push('- `sp ps`/`sp feed`/`sp result` (sp poll deprecated)');
128
+ lines.push('- `--keep-alive` required so reviewer/overthinker can be `sp resume`d');
129
+ lines.push('- `sp merge` fails after `sp stop` cleans status.json — see unitAI-ofjvj');
98
130
 
99
131
  // ── Output ─────────────────────────────────────────────────────────────────
100
132
  if (lines.length === 0) process.exit(0);
@@ -0,0 +1,21 @@
1
+ ---
2
+ name: changelog-conventions
3
+ kind: mandatory-rule
4
+ rules:
5
+ - id: keep-a-changelog
6
+ level: required
7
+ text: "Use Keep-a-Changelog format with YAML frontmatter, version headers, and top-level sections Added, Changed, Fixed, Removed, Deprecated, Security."
8
+ - id: one-line-entries
9
+ level: required
10
+ text: "Keep each changelog entry to one line."
11
+ - id: bead-references
12
+ level: required
13
+ text: "Include bead-id references in parentheses when helpful, like (unitAI-123)."
14
+ - id: conventional-commit-mapping
15
+ level: required
16
+ text: "Map conventional commits to sections: feat -> Added, fix -> Fixed, refactor/perf -> Changed, docs -> Changed, chore -> Changed unless user-facing, revert -> Removed, sec/security -> Security."
17
+ - id: section-completeness
18
+ level: required
19
+ text: "Draft all applicable sections in this order: Added, Changed, Fixed, Removed, Deprecated, Security. Omit only empty sections."
20
+ ---
21
+ Changelog drafting rules for release automation.
@@ -0,0 +1,50 @@
1
+ ---
2
+ name: changelog-keeper-scope
3
+ kind: mandatory-rule
4
+ ---
5
+ SINGLE PURPOSE. You exist to produce one release: draft the next CHANGELOG.md section, bump package.json, rebuild dist, commit, tag, and push. Nothing else.
6
+
7
+ EDIT WHITELIST. You may write to ONLY these paths:
8
+ - `CHANGELOG.md` (insert the new release section above the previous one)
9
+ - `package.json` (version field only — no other field)
10
+ - `dist/index.js`, `dist/lib.js`, `dist/types/**` (regenerated by `npm run build` — never hand-edit)
11
+
12
+ EDIT BLACKLIST. NEVER write to ANY of:
13
+ - `src/**` (source code — out of scope, ever)
14
+ - `tests/**` (test code — out of scope)
15
+ - `docs/**` (any markdown except CHANGELOG.md is out of scope)
16
+ - `config/**` (specialist configs, mandatory rules, skills — out of scope)
17
+ - `.specialists/**` (runtime state — out of scope)
18
+ - `.xtrm/**`, `.wolf/**`, `.beads/**` (session bookkeeping — out of scope)
19
+ - `README.md`, `CLAUDE.md`, `AGENTS.md`, `XTRM-GUIDE.md` (top-level docs — out of scope)
20
+ - Any other file not in the EDIT WHITELIST above.
21
+
22
+ If you believe a file outside the whitelist must be edited, STOP and emit `BLOCKED: scope-violation` naming the file and the reason. Do not attempt the edit.
23
+
24
+ INPUT DISCIPLINE. Your synthesis input is xtrm session reports under `.xtrm/reports/`. The bead's SCOPE field names the relevant tag range. Read reports with `Read` and decide which apply. Supplement with `git log --oneline <prev-tag>..HEAD` for tag verification. Do not crawl `src/`, `docs/`, or other source. The reports are pre-filtered, curated synthesis input — that is why they exist.
25
+
26
+ SECTION FORMAT. Apply changelog-conventions (Keep-a-Changelog v1.0.0, one-line bullets, bead-id refs in parens, sections in order Added/Changed/Fixed/Removed/Deprecated/Security, omit empty). Default bucket is Changed. Deprecated is ONLY for explicit sunset/removal notices. No meta-commentary in bullets ("Conventional commit mapping applied", "Bead IDs included parenthetically", etc. — banned).
27
+
28
+ VERSION POLICY. Default is patch bump (`v3.10.0` → `v3.10.1`). Use minor for new features (`v3.11.0`), major only on explicit operator instruction. The bead names the target version explicitly OR specifies the bump type; if neither is present, STOP and emit `BLOCKED: version-not-specified`.
29
+
30
+ INSERT POSITION. The new section goes immediately above the most recent existing release section, below the `[Unreleased]` placeholder. Re-emit an empty `[Unreleased]` placeholder above the new section.
31
+
32
+ GIT DISCIPLINE. After file edits + rebuild succeed:
33
+ - `git add CHANGELOG.md package.json dist/` (no other paths)
34
+ - `git diff --cached --stat` and confirm only whitelisted paths are staged. If anything else is staged, STOP and report.
35
+ - `git commit -m "release: vX.Y.Z"` (exactly this format, no other prefix or suffix)
36
+ - `git tag -a vX.Y.Z -m "<one-line summary derived from changelog section>"`
37
+ - `git push --follow-tags origin <branch>`
38
+ - Optional: `gh release create vX.Y.Z --notes "<the changelog section body>"` (only if `gh` is available and the bead requests it)
39
+
40
+ NO DESTRUCTIVE OPS. Never `git reset --hard`, never `git push --force`, never delete tags, never rewrite history. If a prior release commit/tag is wrong, STOP and report — operator handles repair.
41
+
42
+ SELF-VERIFY. Before finishing, run `git diff --stat HEAD~1 HEAD` and confirm the result matches:
43
+ - `CHANGELOG.md` modified
44
+ - `package.json` modified
45
+ - `dist/**` modified
46
+ - nothing else
47
+
48
+ If anything else appears, the operator's manual edits leaked in. STOP and emit `BLOCKED: scope-leak` naming the offending paths.
49
+
50
+ OUTPUT SHAPE. Final report must include: `VERSION: vX.Y.Z`, `VERDICT: <RELEASED|BLOCKED>`, `SECTION_DRAFTED: <one-line summary>`, `FILES_CHANGED: <list>`, `COMMIT: <sha>`, `TAG: <vX.Y.Z>`, `PUSHED: <true|false>`, `GH_RELEASE: <url|none>`. On BLOCKED, name the precondition violated.
@@ -11,6 +11,11 @@ Tools (prefer MCP; fall back to CLI if MCP unavailable):
11
11
  - Pre-commit scope check: `gitnexus_detect_changes()` (MCP only — fallback: `git diff --stat`).
12
12
 
13
13
  Rules:
14
- - Run impact for every symbol you modify; never edit without it.
14
+ - Run impact for every existing symbol you modify; never edit without it.
15
15
  - Never rename via find-replace — use `gitnexus_rename({symbol_name, new_name, dry_run:true})` first.
16
16
  - If index is stale, ask the user to run `npx gitnexus analyze`.
17
+
18
+ New-file scope (escape hatch):
19
+ - When the diff adds only new files (new specialist JSON, new mandatory-rule, new test, new doc) and modifies no existing functions/classes/methods, blast-radius analysis is moot. State this explicitly in your output ("new-file scope; no existing-symbol modifications") and skip the impact call.
20
+ - This applies to dispatch entries that merely add a routing case to an existing function (e.g. `src/index.ts` subcommand wiring): the touched symbol is the dispatcher, but the change is purely additive and equivalent to a registration. Note the dispatch addition and skip impact — list the new files instead.
21
+ - Reviewer compliance: when authoritative_diff shows only new files (or additive dispatch entries), the "verify blast radius" requirement is satisfied by the executor's new-file-scope statement; do not flag as unmet.
@@ -0,0 +1,40 @@
1
+ ---
2
+ name: sync-docs-scope-discipline
3
+ kind: mandatory-rule
4
+ ---
5
+ ONE DOC PER INVOCATION. The bead's `SCOPE` field MUST name exactly one doc path. If `SCOPE` names zero docs, more than one doc, or anything other than a single `docs/<name>.md` (or `CHANGELOG.md` / `README.md` if that IS the SCOPE doc), STOP immediately and emit a `BLOCKED: scope-violation` report. Do not proceed.
6
+
7
+ INPUTS ARE FIXED. Your only sources of truth for what changed in the project:
8
+ - The pre-script output above (latest xt report excerpt + recent master commits).
9
+ - Your one doc's content (read with `Read`).
10
+ - `python3 .xtrm/skills/default/sync-docs/scripts/drift_detector.py scan --json` — output MUST be filtered to your one SCOPE doc (jq or python filter). Discard all other entries.
11
+ - `python3 .xtrm/skills/default/sync-docs/scripts/context_gatherer.py --doc <YOUR_SCOPE_DOC>` — exactly that doc, no broader flags.
12
+
13
+ DIFF ESCAPE VALVE — STRICT. When a commit subject is insufficient to judge a claim in your doc, run `git show <hash> -- <path1> [<path2>...]` for ONE commit, naming only paths the doc actually claims about. Maximum 3 such commits per run. FORBIDDEN: `git diff <a>..<b>` (range diffs), `git show <hash>` without `--`, `git log -p`, `git log --stat` over more than 5 commits.
14
+
15
+ DO NOT INSPECT SOURCE FILES BY ANY TOOL. The following are forbidden on `src/`, `tests/`, `pi/`, `packages/`, `config/specialists/`, `.specialists/default/`, or any non-doc path:
16
+ - `Read` / `cat` / `head` / `tail` / `sed -n` / `awk` / `less` / `more`
17
+ - `Grep` / `grep` / `rg` / `git grep`
18
+ - `Glob` / `find` / `ls -R`
19
+ - `python -c "open(...)"`, `python -c "Path(...).read_text()"`, or any scripted file slurp
20
+ - `Bash` invocations that pipe source files anywhere (`< srcfile`, `cat srcfile | ...`)
21
+
22
+ The pre-script context plus per-commit `git show -- <paths>` is exhaustive. Reading source by any other route is the failure mode this specialist exists to prevent.
23
+
24
+ EXCEPTION (sole allowed source-inspection path): the `git show <hash> -- <paths>` form described above. No other tool, command, or pattern is permitted to read source files. The "DO NOT INSPECT SOURCE FILES" ban applies to every tool *except* this one bounded form.
25
+
26
+ EDIT BOUNDARY. Edit ONLY your one SCOPE doc. NEVER touch CHANGELOG (unless it IS your SCOPE doc), README, `.xtrm/skills/`, other docs, or any source file. Cross-cutting updates are separate beads with their own SCOPE.
27
+
28
+ NO RE-READING. If you have already gathered context this turn, refer to your prior output. Do NOT re-fetch after compaction. If prior gathered context is unreachable after compaction, STOP and emit `BLOCKED: context-lost-after-compaction` — do not re-run tools to recover.
29
+
30
+ OBEY STEER AND STOP. When the orchestrator or user issues a steer or stop, comply on the very next tool call. Do not finish "one more thing".
31
+
32
+ BUDGET. Per run: ONE drift scan (filtered), ONE context_gatherer call (only if pre-script context is insufficient), max THREE `git show <hash> -- <paths>` calls, ONE doc edit pass, ONE final drift validation. No exploratory loops.
33
+
34
+ STOP CONDITIONS. Stop and emit your final report when ANY is true:
35
+ - The one doc has been edited and stamped (`VERDICT: UPDATED`).
36
+ - You determine no edit is needed (`VERDICT: NO_CHANGE_NEEDED`, cite commit evidence).
37
+ - A precondition above is violated (`VERDICT: BLOCKED`, name the violation).
38
+ - Steer or stop received.
39
+
40
+ OUTPUT SHAPE. Final report must include: `DOC: <path>`, `VERDICT: <UPDATED|NO_CHANGE_NEEDED|BLOCKED>`, `COMMITS_REVIEWED: <hashes>`, `EDITS: <summary or "none">`, `DRIFT_BEFORE`, `DRIFT_AFTER`, optional `SUGGESTED_FOLLOWUPS: <other doc names — never edited>`.
@@ -0,0 +1,82 @@
1
+ ---
2
+ name: releasing
3
+ description: >-
4
+ Cut a release end-to-end via xt release. Use when the operator wants to
5
+ publish a new tag (vX.Y.Z) — drafts CHANGELOG section from xt reports,
6
+ bumps package.json, rebuilds dist, commits, tags, pushes, optional GH
7
+ release. Strict scope: only CHANGELOG.md + package.json + dist/.
8
+ version: 1.2.0
9
+ ---
10
+
11
+ # releasing
12
+
13
+ One-step release publication via specialist delegation.
14
+
15
+ ## When to use
16
+
17
+ The operator wants to cut a release. They say "release it", "ship vX.Y.Z", "cut a tag", or just "release".
18
+
19
+ ## How
20
+
21
+ 1. Determine target version. Default is patch bump from most recent semver tag. Operator may specify `--minor`, `--major`, or explicit version.
22
+
23
+ 2. Determine tag range. Default is `<latest-tag>..HEAD`. For backfills, operator names `--from` / `--to` explicitly.
24
+
25
+ 3. Create release bead. Template:
26
+
27
+ ```
28
+ PROBLEM: Cut release vX.Y.Z covering <prev-tag>..HEAD.
29
+ SUCCESS: CHANGELOG.md updated with new section above prior release; package.json bumped; dist rebuilt; commit `release: vX.Y.Z` pushed with tag.
30
+ SCOPE: CHANGELOG.md, package.json, dist/. Synthesis input: xt reports under .xtrm/reports/ dated within <prev-tag-date>..HEAD.
31
+ NON_GOALS: No source/docs/config edits. No retroactive changes to prior release sections.
32
+ CONSTRAINTS: Keep-a-Changelog v1.0.0 format. One-line bullets. Default bucket Changed. Deprecated only for explicit sunsets.
33
+ VALIDATION: git diff --stat HEAD~1 HEAD shows only CHANGELOG.md, package.json, dist/.
34
+ OUTPUT: Final report with VERSION, COMMIT, TAG, PUSHED status.
35
+ GH_RELEASE: <true|false> # whether to also `gh release create`
36
+ ```
37
+
38
+ 4. Dispatch specialist:
39
+
40
+ ```bash
41
+ xt release prepare --from <prev-tag> --to HEAD --patch
42
+ ```
43
+
44
+ or `xt release publish` once draft is approved. `xt release` invokes
45
+ `sp script changelog-keeper` synchronously, READ_ONLY, template-driven.
46
+ No HTTP. No bead. No worktree.
47
+
48
+ 5. Verify diff after specialist completes.
49
+
50
+ ```bash
51
+ git diff --stat HEAD~1 HEAD
52
+ ```
53
+
54
+ Output MUST show only:
55
+ - `CHANGELOG.md`
56
+ - `package.json`
57
+ - `dist/index.js`, `dist/lib.js`, `dist/types/**`
58
+
59
+ 6. If diff check passes, release shipped. Confirm:
60
+
61
+ ```bash
62
+ git tag --list 'v*' | tail -3
63
+ git log --oneline -1
64
+ ```
65
+
66
+ ## Why this design
67
+
68
+ - Specialist does work itself. No CLI plumbing, no template substitution, no JSON output schema, no two-phase prepare/publish gate.
69
+ - Mandatory rule `changelog-keeper-scope` enforces edit whitelist.
70
+ - Operator gate is single `git diff --stat HEAD~1 HEAD` check after specialist finishes.
71
+ - xt reports are synthesis input, not git log + bd query. Reports are pre-curated, signal-rich, written in user-facing language.
72
+ - New pre-script injects a bounded xt report bundle first so changelog bullets can reflect intent and post-mortem context, not just file diffs.
73
+
74
+ ## Parallel sessions
75
+
76
+ Each orchestrator runs this skill in its own session. Specialist commits + tags + pushes atomically. If two sessions try same version, first push wins; second sees remote tag conflict and aborts cleanly. Operator picks next version and retries.
77
+
78
+ ## Don't
79
+
80
+ - Don't manually `sp release prepare`/`publish` — deprecated aliases only. Use `xt release prepare`/`publish`.
81
+ - Don't edit CHANGELOG.md outside release flow — manual edits leak into next release diff and break scope verification.
82
+ - Don't pre-stage files. Specialist stages exactly what it commits.
@@ -5,7 +5,7 @@ description: >
5
5
  agent through writing a valid `.specialist.json`, choosing supported models,
6
6
  validating against the schema, and avoiding common specialist authoring
7
7
  mistakes.
8
- version: 1.1
8
+ version: 1.2
9
9
  synced_at: 236ca5e6
10
10
  ---
11
11
 
@@ -40,6 +40,7 @@ Model tiers:
40
40
  Rules:
41
41
  - Always pick the **highest version** in a family (`claude-sonnet-4-6` not `4-5`, `gemini-3.1-pro-preview` not `gemini-2.5-pro`)
42
42
  - `model` and `fallback_model` must be **different providers**
43
+ - If a specialist needs a longer fallback chain, keep first fallback in `fallback_model` and let runtime supply any extra retry tier.
43
44
  - Never write a model string you have not pinged in this session
44
45
 
45
46
  ---
@@ -162,6 +163,10 @@ specialists models # confirm assignments look balanced
162
163
 
163
164
  ---
164
165
 
166
+ ## Canonical references
167
+
168
+ Reference any canonical skill or rule by name; runtime finds it.
169
+
165
170
  ## Quick Start: Scaffold + `sp edit`
166
171
 
167
172
  ```bash
@@ -201,19 +206,47 @@ bun config/skills/specialists-creator/scripts/validate-specialist.ts config/spec
201
206
  |-------|------|----------|-------|
202
207
  | `name` | string | yes | kebab-case: `[a-z][a-z0-9-]*` |
203
208
  | `version` | string | yes | semver: `1.0.0` |
204
- | `description` | string | yes | One sentence |
209
+ | `description` | string | yes | Routing summary surfaced by `specialists list`; see Description writing below |
205
210
  | `category` | string | yes | Free text (e.g. `workflow`, `analysis`, `codegen`) |
206
211
  | `author` | string | no | Optional |
207
212
  | `created` | string | no | Optional date |
208
213
  | `updated` | string | no | Optional date, quote it: `"2026-03-22"` |
209
214
  | `tags` | string[] | no | Optional list |
210
215
 
216
+
217
+ ### Description writing for `specialists list`
218
+
219
+ `specialist.metadata.description` is the routing surface that orchestrators see in `specialists list`. Write it as an operational role definition, not marketing copy. Keep the first clause distinctive because list output may truncate.
220
+
221
+ A good description answers, in this order:
222
+
223
+ 1. **Choose when** — the task shape that should route here.
224
+ 2. **Do not choose when** — adjacent roles that should win instead.
225
+ 3. **Distinctive capability** — what this specialist does that others do not.
226
+ 4. **Permission/risk note** — READ_ONLY/LOW/MEDIUM/HIGH implication when it affects orchestration.
227
+
228
+ Pattern:
229
+
230
+ ```text
231
+ <role noun>. Use for <specific task shape>. Not for <near misses>; use <better roles>. <permission/workflow distinction>.
232
+ ```
233
+
234
+ Examples:
235
+
236
+ ```text
237
+ Scoped implementation only. Use when requirements, files/symbols, constraints, and validation are clear. Not diagnosis, planning, review, tests, release, or research. HIGH worktree.
238
+
239
+ Debug symptoms/errors/regressions first. Use when cause is unknown or tests fail unexpectedly; traces, fixes targeted code, and verifies. HIGH keep-alive.
240
+ ```
241
+
242
+ Avoid vague descriptions like "general purpose assistant" or "helps with code". Those cause orchestrators to overuse familiar specialists instead of routing to debugger, test-runner, researcher, sync-docs, or other sharper roles.
243
+
211
244
  ### `specialist.execution` (required)
212
245
 
213
246
  | Field | Type | Default | Notes |
214
247
  |-------|------|---------|-------|
215
248
  | `model` | string | — | required — ping before using |
216
- | `fallback_model` | string | — | must be a different provider |
249
+ | `fallback_model` | string | — | first fallback only; runtime may append more tiers |
217
250
  | `mode` | enum | `auto` | `tool` \| `skill` \| `auto` |
218
251
  | `timeout_ms` | number | `120000` | ms |
219
252
  | `stall_timeout_ms` | number | — | kill if no event for N ms |
@@ -234,17 +267,58 @@ bun config/skills/specialists-creator/scripts/validate-specialist.ts config/spec
234
267
  - MCP `start_specialist`: `keep_alive` enables, `no_keep_alive` disables.
235
268
  - Effective precedence: explicit disable (`--no-keep-alive` / `no_keep_alive`) → explicit enable (`--keep-alive` / `keep_alive`) → `execution.interactive` → one-shot default.
236
269
 
237
- **Permission tiers** — controls which pi tools are available:
270
+ **Permission tiers** — controls the *native* pi tools the specialist gets. The full resolved tool set also includes catalog-defined GitNexus and Serena tools per tier; see [docs/manifest.md](../../../docs/manifest.md) for the complete picture.
271
+
272
+ | Level | Native tools (cumulative) | Use when |
273
+ |-------|---------------------------|----------|
274
+ | `READ_ONLY` | `read, grep, find, ls` | Read-only analysis, no bash |
275
+ | `LOW` | `+ bash` | Inspect/run commands, no file edits |
276
+ | `MEDIUM` | `+ edit` | Can edit existing files |
277
+ | `HIGH` | `+ write` | Full access — can create new files |
278
+
279
+ After choosing a tier, verify the resolved tool list before dispatching:
238
280
 
239
- | Level | pi --tools | Use when |
240
- |-------|-----------|----------|
241
- | `READ_ONLY` | `read,grep,find,ls` | Read-only analysis, no bash |
242
- | `LOW` | `+bash` | Inspect/run commands, no file edits |
243
- | `MEDIUM` | `+edit` | Can edit existing files |
244
- | `HIGH` | `+write` | Full access — can create new files |
281
+ ```bash
282
+ sp config show <name> --resolved
283
+ ```
245
284
 
246
285
  **Common pitfall:** `READ_WRITE` is **not** a valid value — use `LOW` or higher.
247
286
 
287
+ ### Per-specialist `permissions[<TIER>]` override (rarely needed)
288
+
289
+ Most specialists use the catalog default deny baseline. **Do not declare an override unless this specialist's policy genuinely diverges from its tier.** When you do override, remember the specialist block replaces catalog defaults for that tier.
290
+
291
+ If divergence is real, add a top-level `permissions` block (sibling to `execution`):
292
+
293
+ ```jsonc
294
+ {
295
+ "specialist": {
296
+ "execution": { "permission_required": "READ_ONLY" },
297
+ "permissions": {
298
+ "READ_ONLY": {
299
+ "denied_natives_when_extension": ["grep", "find", "ls"],
300
+ "denied_natives_mode": "hard"
301
+ }
302
+ }
303
+ }
304
+ }
305
+ ```
306
+
307
+ | Field | Type | Default | Effect |
308
+ |-------|------|---------|--------|
309
+ | `denied_natives_when_extension` | `string[]` | `[]` | Native tools to deny only when a replacement extension is healthy. Catalog defaults apply first; specialist override replaces them for that tier. |
310
+ | `denied_natives_mode` | `"soft"` \| `"hard"` | `"soft"` | `soft` keeps the tool with a preference hint; `hard` removes it (with auto-restore if the extension degrades) |
311
+
312
+ The override block can only *deny* natives — it cannot add new tools beyond the catalog tier. To add tools, change the tier or update the catalog file.
313
+
314
+ **Decision rule when authoring:**
315
+ 1. Pick the lowest tier that satisfies the specialist's actual capability needs.
316
+ 2. Run `sp config show <name> --resolved` and inspect the `--tools` line.
317
+ 3. If the tools are right, you're done — no override needed.
318
+ 4. If a native tool is genuinely worse than an extension equivalent for this specialist's task, declare a soft-deny first to observe behavior, then promote to hard-deny once you trust it.
319
+
320
+ See [docs/manifest.md](../../../docs/manifest.md) for full deny-mode semantics, extension health gating, and the canonical explorer example.
321
+
248
322
  **Per-specialist extension opt-out**
249
323
 
250
324
  Use `execution.extensions` only when this specialist must suppress default extension injection.
@@ -1,5 +1,5 @@
1
1
  import { readFileSync } from "node:fs";
2
- import { parseSpecialist } from "../../../src/specialist/schema.ts";
2
+ import { parseSpecialist } from "../../../../src/specialist/schema.ts";
3
3
 
4
4
  function printUsage(): void {
5
5
  console.error("Usage: bun skills/specialist-author/scripts/validate-specialist.ts <path-to.specialist.yaml>");
@@ -6,8 +6,8 @@ description: >
6
6
  "sp is out of date", "hooks not firing", "skills not loading after update",
7
7
  or when drift is detected in installed specialists config, hooks, jobs, DB,
8
8
  extensions, or worktree cleanup.
9
- version: 1.3
10
- synced_at: 2026-04-25
9
+ version: 1.4
10
+ synced_at: 2026-04-29
11
11
  ---
12
12
 
13
13
  # update-specialists
@@ -91,13 +91,18 @@ looks like.
91
91
  | Check | Expected value |
92
92
  |-------|----------------|
93
93
  | specialists DB | Opens cleanly (`.specialists/db/observability.db`) |
94
- | Schema version | Matches runtime expectation (current: v11) |
94
+ | Schema version | Matches runtime expectation (current: v11; auto-migrates on next runtime startup) |
95
95
  | `specialist_job_metrics` table | Present at v11+ — holds aggregated per-job metrics |
96
+ | `specialist_job_metrics` columns | Includes `active_runtime_ms` + `waiting_ms` (drs41.1 — auto-added by idempotent `migrateToV11` ALTER TABLE on first start of upgraded runtime; pre-existing rows get NULL until next aggregate) |
97
+ | Auto-aggregation hook | Supervisor + `sp stop` invoke `aggregateJobMetricsBestEffort` after terminal-status persistence (drs41.1) — table populates without manual `sp db extract` under normal operation |
98
+ | Merge target lookup | DB-first (post-ofjvj): `readAllJobStatuses()` reads `specialist_jobs` via `listStatuses()`. `sp merge` no longer reads `.specialists/jobs/<id>/status.json`. Older versions silently failed after `sp stop` cleaned status.json. |
96
99
  | WAL / busy timeout settings | Present when runtime uses SQLite |
97
100
  | Corruption / lock errors | None in `sp doctor` |
98
101
  | Pre-prune extract | `sp db prune --apply` extracts metrics to `specialist_job_metrics` before deleting events |
99
- | Extract backfill | `sp db extract --all-missing` populates metrics for jobs whose events still exist |
100
- | Historical stats query | `sp db stats [--spec <name>] [--model <glob>] [--since <dur>]` reads the aggregated table |
102
+ | Extract backfill | `sp db extract --all-missing` populates metrics for jobs whose events still exist (still useful for backfilling historical jobs that ran before the auto-aggregate hook landed) |
103
+ | Historical stats query | `sp db stats [--spec <name>] [--model <glob>] [--since <dur>]` reads the aggregated table; output includes `active_s`, `waiting_s`, `total_s` (drs41.1) |
104
+
105
+ **Safety: `sp init` and `sp init --sync-defaults` do NOT touch `.specialists/db/observability.db`.** Init checks file existence and skips with "observability database already exists (not touched)" when present. Schema migrations run on next runtime startup (any `sp` invocation that opens the DB), additively via `ALTER TABLE ADD COLUMN`. No data loss path during a normal package upgrade.
101
106
 
102
107
  ### Skills + extensions parity
103
108
 
@@ -126,6 +131,28 @@ Loader unions indexes from three paths and probes set files in reverse precedenc
126
131
  | Index files (`index.json`) | Any of the three tiers may define `required_template_sets` / `default_template_sets`; loader unions + dedups |
127
132
  | Prompt injection behavior | Runner appends resolved `MANDATORY_RULES` block at end of prompt; supervisor emits `mandatory_rules_injection` meta event |
128
133
 
134
+ ## Discover Latest Release
135
+
136
+ Before reconciling, determine whether a newer release is published. Compare local `package.json` version to the most recent `vX.Y.Z` tag on `origin`:
137
+
138
+ ```bash
139
+ LOCAL=$(node -p "require('./package.json').version")
140
+ LATEST=$(git ls-remote --tags --refs origin | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+$' | sort -V | tail -1 | sed 's/^v//')
141
+ echo "local: $LOCAL latest: $LATEST"
142
+ ```
143
+
144
+ If `LATEST > LOCAL`, read the corresponding `CHANGELOG.md` section to summarize what shipped:
145
+
146
+ ```bash
147
+ awk -v ver="$LATEST" '/^## \[v?'"$LATEST"'\]/,/^## \[/{print}' CHANGELOG.md | head -60
148
+ ```
149
+
150
+ Surface a one-line summary to the user (Added/Changed/Fixed counts plus the headline) and **ask before pulling**. The reconcile flow below applies regardless of whether the user pulls a new release first or stays on the current version — drift detection is independent of release version.
151
+
152
+ Skip this discovery step entirely when `SPECIALISTS_OFFLINE=1` is set, when offline, or when the user already specified the version. The `using-specialists-v2` skill performs the same lightweight check on session-load and may have already surfaced the notice; do not repeat it.
153
+
154
+ After the user confirms a pull (e.g. `git fetch && git pull origin master`), proceed with detection below to catch any drift introduced by the new release.
155
+
129
156
  ## Detection
130
157
 
131
158
  Run these in order. Report which checks pass and which drift.
@@ -182,8 +209,8 @@ find .worktrees -maxdepth 2 -mindepth 1 -type d 2>/dev/null || true
182
209
  # 12. Extension registration
183
210
  node -e "const fs=require('fs'); const p='.pi/settings.json'; if (fs.existsSync(p)) console.log(JSON.stringify(JSON.parse(fs.readFileSync(p,'utf8')).skills ?? JSON.parse(fs.readFileSync(p,'utf8')).extensions ?? {}, null, 2)); else console.log('MISSING .pi/settings.json')"
184
211
 
185
- # 13a. Observability schema + metrics coverage
186
- node -e "const {Database} = require('bun:sqlite'); const p='.specialists/db/observability.db'; const fs=require('fs'); if (!fs.existsSync(p)) { console.log('NO_DB'); process.exit(0); } const db=new Database(p,{readonly:true}); const v=db.query(\"SELECT value FROM schema_meta WHERE key='version'\").get(); const has=db.query(\"SELECT name FROM sqlite_master WHERE type='table' AND name='specialist_job_metrics'\").get(); const jobs=db.query('SELECT COUNT(*) c FROM specialist_jobs').get(); const metrics=has ? db.query('SELECT COUNT(*) c FROM specialist_job_metrics').get() : null; console.log(JSON.stringify({schema_version: v?.value, has_metrics_table: !!has, jobs: jobs.c, metrics_rows: metrics?.c ?? 0, metrics_coverage: metrics ? (metrics.c/jobs.c).toFixed(2) : null}, null, 2));" 2>/dev/null || echo "REQUIRES_BUN_RUNTIME"
212
+ # 13a. Observability schema + metrics coverage + drs41.1 column presence
213
+ node -e "const {Database} = require('bun:sqlite'); const p='.specialists/db/observability.db'; const fs=require('fs'); if (!fs.existsSync(p)) { console.log('NO_DB'); process.exit(0); } const db=new Database(p,{readonly:true}); const v=db.query(\"SELECT value FROM schema_meta WHERE key='version'\").get(); const has=db.query(\"SELECT name FROM sqlite_master WHERE type='table' AND name='specialist_job_metrics'\").get(); const jobs=db.query('SELECT COUNT(*) c FROM specialist_jobs').get(); const metrics=has ? db.query('SELECT COUNT(*) c FROM specialist_job_metrics').get() : null; const cols=has ? new Set(db.query('PRAGMA table_info(specialist_job_metrics)').all().map(r=>r.name)) : new Set(); const drs41Cols={active_runtime_ms: cols.has('active_runtime_ms'), waiting_ms: cols.has('waiting_ms')}; console.log(JSON.stringify({schema_version: v?.value, has_metrics_table: !!has, drs41_columns_present: drs41Cols, jobs: jobs.c, metrics_rows: metrics?.c ?? 0, metrics_coverage: metrics ? (metrics.c/jobs.c).toFixed(2) : null}, null, 2));" 2>/dev/null || echo "REQUIRES_BUN_RUNTIME"
187
214
 
188
215
  # 13. Mandatory-rules template tiers + reference checks (three-tier resolution)
189
216
  find .specialists/default/mandatory-rules -maxdepth 1 -type f 2>/dev/null || true
@@ -213,6 +240,9 @@ Use targeted fixes first. Escalate to full sync only if needed.
213
240
  | Orphaned `.worktrees/` entries | `specialists clean` |
214
241
  | SQLite schema/version mismatch | `sp doctor` first, then `specialists init --sync-defaults` or runtime migration command |
215
242
  | Schema below v11 (no `specialist_job_metrics`) | Reinstall / upgrade runtime; table is created by initSchema / migrateToV11. No data loss — raw events untouched. |
243
+ | `specialist_job_metrics` missing `active_runtime_ms` / `waiting_ms` columns (post-drs41.1) | Open any `sp` command — `migrateToV11` is idempotent and ALTERs the table to add the columns. No reinstall needed. Pre-existing rows show NULL until next aggregate or `sp db extract --all-missing`. |
244
+ | Auto-aggregate hook absent (older runtime) — empty `specialist_job_metrics` despite job activity | Upgrade `@jaggerxtrm/specialists` package. Post-drs41.1, supervisor + `sp stop` invoke `aggregateJobMetricsBestEffort` on every terminal status, so the table fills under normal operation. Backfill historical with `sp db extract --all-missing`. |
245
+ | `sp merge` fails after `sp stop` (older runtime) — "No chain-root job with worktree metadata found" | Upgrade `@jaggerxtrm/specialists` past ofjvj fix. Merge lookup is now DB-first via `readAllJobStatuses()` / `listStatuses()`. Pre-fix workaround was manual `git merge --no-ff feature/<branch>` (skips tsc + conflict gates). |
216
246
  | Events about to be pruned but never aggregated | `sp db extract --all-missing` BEFORE `sp db prune --apply`. Prune refuses when extract fails (safe by design). |
217
247
  | Emergency: need to prune but extract is wedged | `sp db prune --apply --skip-extract` — raw events deleted without aggregation. Use only when data loss is acceptable. |
218
248
  | Historical per-job stats needed | `sp db stats` reads `specialist_job_metrics`. Replaces ad-hoc `status.json` scans. Supports `--format json\|table`. |
@@ -300,6 +330,10 @@ If doctor reports DB version mismatch or recovery issue:
300
330
  2. Apply runtime migration command if available.
301
331
  3. If no automated migration exists, flag manual intervention.
302
332
 
333
+ For additive schema bumps (e.g. drs41.1 added `active_runtime_ms` / `waiting_ms` columns within v11): just open any `sp` command in the repo. `initSchema()` runs every migrate-up function on every startup; `migrateToV11` is idempotent — it detects existing v11 schema and `ALTER TABLE ADD COLUMN`s the missing fields. Existing rows are preserved (new fields = NULL until next aggregate). No data loss, no manual SQL.
334
+
335
+ `sp init` and `sp init --sync-defaults` skip the DB entirely when it exists — the only way to wipe `.specialists/db/observability.db` is to delete the file manually.
336
+
303
337
  ### Fix: metrics aggregation missing or stale
304
338
 
305
339
  Schema v11 introduced `specialist_job_metrics` (aggregated per-job stats). If you see low `metrics_coverage` in the detection output, or want historical stats before running `sp db prune`: