@jaggerxtrm/specialists 3.10.0 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +3 -0
  2. package/config/hooks/specialists-session-start.mjs +33 -1
  3. package/config/mandatory-rules/bead-id-verbatim.md +14 -0
  4. package/config/mandatory-rules/changelog-conventions.md +21 -0
  5. package/config/mandatory-rules/changelog-keeper-scope.md +50 -0
  6. package/config/mandatory-rules/gitnexus-required.md +6 -1
  7. package/config/mandatory-rules/per-turn-handoff-schema.md +16 -0
  8. package/config/mandatory-rules/sync-docs-scope-discipline.md +40 -0
  9. package/config/skills/releasing/SKILL.md +82 -0
  10. package/config/skills/specialists-creator/SKILL.md +100 -10
  11. package/config/skills/specialists-creator/scripts/validate-specialist.ts +1 -1
  12. package/config/skills/update-specialists/SKILL.md +192 -325
  13. package/config/skills/using-kpi/SKILL.md +236 -0
  14. package/config/skills/using-script-specialists/SKILL.md +208 -0
  15. package/config/skills/using-specialists-v2/SKILL.md +162 -28
  16. package/config/skills/using-specialists-v3/SKILL.md +562 -0
  17. package/config/skills/using-specialists-v3/evals/evals.json +89 -0
  18. package/config/specialists/changelog-drafter.specialist.json +62 -0
  19. package/config/specialists/changelog-keeper.specialist.json +80 -0
  20. package/config/specialists/code-sanity.specialist.json +108 -0
  21. package/config/specialists/debugger.specialist.json +7 -5
  22. package/config/specialists/executor.specialist.json +7 -5
  23. package/config/specialists/explorer.specialist.json +16 -5
  24. package/config/specialists/memory-processor.specialist.json +4 -4
  25. package/config/specialists/node-coordinator.specialist.json +3 -3
  26. package/config/specialists/overthinker.specialist.json +5 -4
  27. package/config/specialists/planner.specialist.json +7 -5
  28. package/config/specialists/researcher.specialist.json +5 -4
  29. package/config/specialists/reviewer.specialist.json +7 -5
  30. package/config/specialists/security-auditor.specialist.json +111 -0
  31. package/config/specialists/specialists-creator.specialist.json +6 -5
  32. package/config/specialists/sync-docs.specialist.json +18 -19
  33. package/config/specialists/test-runner.specialist.json +5 -4
  34. package/config/specialists/xt-merge.specialist.json +4 -4
  35. package/dist/index.js +3379 -1168
  36. package/dist/lib.js +518 -154
  37. package/dist/types/cli/clean.d.ts.map +1 -1
  38. package/dist/types/cli/config.d.ts.map +1 -1
  39. package/dist/types/cli/db.d.ts.map +1 -1
  40. package/dist/types/cli/doctor.d.ts.map +1 -1
  41. package/dist/types/cli/feed.d.ts.map +1 -1
  42. package/dist/types/cli/help.d.ts.map +1 -1
  43. package/dist/types/cli/init.d.ts.map +1 -1
  44. package/dist/types/cli/list.d.ts +4 -0
  45. package/dist/types/cli/list.d.ts.map +1 -1
  46. package/dist/types/cli/merge.d.ts +4 -2
  47. package/dist/types/cli/merge.d.ts.map +1 -1
  48. package/dist/types/cli/node.d.ts.map +1 -1
  49. package/dist/types/cli/prune-stale-defaults.d.ts +2 -0
  50. package/dist/types/cli/prune-stale-defaults.d.ts.map +1 -0
  51. package/dist/types/cli/ps.d.ts.map +1 -1
  52. package/dist/types/cli/result.d.ts.map +1 -1
  53. package/dist/types/cli/run.d.ts.map +1 -1
  54. package/dist/types/cli/script.d.ts.map +1 -1
  55. package/dist/types/cli/serve-hot-reload.d.ts +13 -0
  56. package/dist/types/cli/serve-hot-reload.d.ts.map +1 -0
  57. package/dist/types/cli/serve.d.ts +28 -0
  58. package/dist/types/cli/serve.d.ts.map +1 -1
  59. package/dist/types/cli/status.d.ts.map +1 -1
  60. package/dist/types/cli/stop.d.ts.map +1 -1
  61. package/dist/types/cli/version-check.d.ts +20 -0
  62. package/dist/types/cli/version-check.d.ts.map +1 -0
  63. package/dist/types/index.d.ts +1 -1
  64. package/dist/types/pi/session.d.ts +10 -0
  65. package/dist/types/pi/session.d.ts.map +1 -1
  66. package/dist/types/specialist/canonical-asset-resolver.d.ts +6 -0
  67. package/dist/types/specialist/canonical-asset-resolver.d.ts.map +1 -0
  68. package/dist/types/specialist/drift-detector.d.ts +39 -0
  69. package/dist/types/specialist/drift-detector.d.ts.map +1 -0
  70. package/dist/types/specialist/epic-lifecycle.d.ts.map +1 -1
  71. package/dist/types/specialist/epic-readiness.d.ts.map +1 -1
  72. package/dist/types/specialist/epic-reconciler.d.ts.map +1 -1
  73. package/dist/types/specialist/loader.d.ts +2 -1
  74. package/dist/types/specialist/loader.d.ts.map +1 -1
  75. package/dist/types/specialist/mandatory-rules.d.ts +5 -0
  76. package/dist/types/specialist/mandatory-rules.d.ts.map +1 -1
  77. package/dist/types/specialist/manifest-resolver.d.ts +55 -0
  78. package/dist/types/specialist/manifest-resolver.d.ts.map +1 -0
  79. package/dist/types/specialist/node-contract.d.ts +2 -2
  80. package/dist/types/specialist/observability-sqlite.d.ts +43 -0
  81. package/dist/types/specialist/observability-sqlite.d.ts.map +1 -1
  82. package/dist/types/specialist/payload-measure.d.ts +19 -0
  83. package/dist/types/specialist/payload-measure.d.ts.map +1 -0
  84. package/dist/types/specialist/porcelain-parser.d.ts +2 -0
  85. package/dist/types/specialist/porcelain-parser.d.ts.map +1 -0
  86. package/dist/types/specialist/resolution-diagnostics.d.ts +36 -0
  87. package/dist/types/specialist/resolution-diagnostics.d.ts.map +1 -0
  88. package/dist/types/specialist/runner.d.ts +8 -0
  89. package/dist/types/specialist/runner.d.ts.map +1 -1
  90. package/dist/types/specialist/schema.d.ts +27 -0
  91. package/dist/types/specialist/schema.d.ts.map +1 -1
  92. package/dist/types/specialist/script-runner.d.ts +44 -1
  93. package/dist/types/specialist/script-runner.d.ts.map +1 -1
  94. package/dist/types/specialist/supervisor.d.ts +4 -0
  95. package/dist/types/specialist/supervisor.d.ts.map +1 -1
  96. package/dist/types/specialist/timeline-events.d.ts +29 -1
  97. package/dist/types/specialist/timeline-events.d.ts.map +1 -1
  98. package/dist/types/specialist/timeline-query.d.ts.map +1 -1
  99. package/dist/types/specialist/tool-catalog.d.ts +126 -0
  100. package/dist/types/specialist/tool-catalog.d.ts.map +1 -0
  101. package/dist/types/tools/specialist/feed_specialist.tool.d.ts +2 -2
  102. package/dist/types/tools/specialist/use_specialist.tool.d.ts.map +1 -1
  103. package/package.json +4 -4
  104. package/config/specialists/.serena/project.yml +0 -151
@@ -0,0 +1,236 @@
1
+ ---
2
+ name: using-kpi
3
+ description: >-
4
+ Analyze specialist KPI data in observability SQLite. Use for runtime, payload,
5
+ waiting, tool-call, and outlier analysis. Token estimates use cl100k_base-style
6
+ approximation with ~±5% accuracy.
7
+ gemini-command: using-kpi
8
+ version: 3.1.0
9
+ ---
10
+
11
+ # using-kpi
12
+
13
+ KPI analysis skill for `sp db stats` / `sp db extract` data.
14
+
15
+ ## Quick rule
16
+
17
+ `active_runtime_ms` = real paid runtime. Rank by that first. `elapsed_ms` is total wall time. `waiting_ms` catches forgotten keep-alives.
18
+
19
+ Token counts are approximate, cl100k_base-style, about ±5%. Bytes are exact UTF-8 size.
20
+
21
+ ## Recipe 1 — specialist × model leaderboard by active cost
22
+
23
+ ```bash
24
+ sp db stats --format json \
25
+ | jq -r '
26
+ .rows
27
+ | group_by([.specialist, .model])
28
+ | map({
29
+ specialist: .[0].specialist,
30
+ model: .[0].model,
31
+ jobs: length,
32
+ active_ms: (map((.active_runtime_ms // 0)) | add),
33
+ total_ms: (map((.total_runtime_ms // .elapsed_ms // 0)) | add),
34
+ turns: (map((.total_turns // 0)) | add),
35
+ tools: (map((.total_tools // 0)) | add),
36
+ payload_kb: (map((.payload_kb // 0)) | add)
37
+ })
38
+ | sort_by(-.active_ms, -.jobs)
39
+ | .[]
40
+ | [ .specialist, .model, .jobs, .active_ms, .total_ms, .turns, .tools, .payload_kb ]
41
+ | @tsv'
42
+ ```
43
+
44
+ ## Recipe 2 — outliers above p95
45
+
46
+ ```bash
47
+ sp db stats --format json \
48
+ | jq '
49
+ .rows as $rows
50
+ | {
51
+ active: ($rows | map(.active_runtime_ms // 0) | sort),
52
+ tools: ($rows | map(.total_tools // 0) | sort),
53
+ turns: ($rows | map(.total_turns // 0) | sort),
54
+ payload: ($rows | map(.payload_kb // 0) | sort)
55
+ } as $s
56
+ | {
57
+ active_p95: $s.active[(($s.active|length)*95/100|floor)],
58
+ tools_p95: $s.tools[(($s.tools|length)*95/100|floor)],
59
+ turns_p95: $s.turns[(($s.turns|length)*95/100|floor)],
60
+ payload_p95: $s.payload[(($s.payload|length)*95/100|floor)]
61
+ } as $p
62
+ | $rows
63
+ | map(select(
64
+ ((.active_runtime_ms // 0) >= $p.active_p95) or
65
+ ((.total_tools // 0) >= $p.tools_p95) or
66
+ ((.total_turns // 0) >= $p.turns_p95) or
67
+ ((.payload_kb // 0) >= $p.payload_p95)
68
+ ))
69
+ | .[]
70
+ | [ .job_id, .specialist, .model, .active_runtime_ms, .total_tools, .total_turns, .payload_kb ]
71
+ | @tsv'
72
+ ```
73
+
74
+ ## Recipe 3 — payload bloat ranking
75
+
76
+ ```bash
77
+ sp db stats --with-payload --format json \
78
+ | jq -r '
79
+ .rows
80
+ | group_by(.specialist)
81
+ | map({
82
+ specialist: .[0].specialist,
83
+ jobs: length,
84
+ avg_payload_kb: ((map((.payload_kb // 0)) | add) / length),
85
+ max_payload_kb: (map((.payload_kb // 0)) | max)
86
+ })
87
+ | sort_by(-.avg_payload_kb)
88
+ | .[:10]
89
+ | .[]
90
+ | [ .specialist, .jobs, (.avg_payload_kb|tostring), (.max_payload_kb|tostring) ]
91
+ | @tsv'
92
+ ```
93
+
94
+ ## Recipe 4 — waiting-state hygiene
95
+
96
+ ```bash
97
+ sp db stats --format json \
98
+ | jq -r '
99
+ .rows
100
+ | map(select((.waiting_s? // 0) != 0))
101
+ | map(. + {waiting_ratio: ((.waiting_ms // 0) / ((.total_runtime_ms // .elapsed_ms // 1) + 0.0))})
102
+ | sort_by(-.waiting_ratio, -.waiting_ms)
103
+ | .[]
104
+ | [ .job_id, .specialist, .model, (.waiting_ms|tostring), (.total_runtime_ms // .elapsed_ms|tostring), (.waiting_ratio|tostring) ]
105
+ | @tsv'
106
+ ```
107
+
108
+ ## Recipe 5 — tool-call distribution per specialist
109
+
110
+ ```bash
111
+ sp db stats --format json \
112
+ | jq -r '
113
+ .rows
114
+ | group_by(.specialist)
115
+ | map({
116
+ specialist: .[0].specialist,
117
+ counts: (map(.tool_call_counts_json? // "{}")
118
+ | map(fromjson)
119
+ | add)
120
+ })
121
+ | .[]
122
+ | .counts
123
+ | to_entries
124
+ | sort_by(-.value)
125
+ | .[]
126
+ | [ .key, .value ]
127
+ | @tsv'
128
+ ```
129
+
130
+ ## Recipe 6 — payload vs active runtime correlation
131
+
132
+ ```bash
133
+ sp db stats --with-payload --format json \
134
+ | jq -r '
135
+ .rows
136
+ | map(select((.payload_kb? // 0) > 0 and ((.active_runtime_ms? // 0) > 0)))
137
+ | map([(.payload_kb|tonumber), (.active_runtime_ms|tonumber)])
138
+ | if length < 2 then empty else
139
+ (map(.[0]) | add / length) as $mx |
140
+ (map(.[1]) | add / length) as $my |
141
+ (map((.[0]-$mx)*(.[1]-$my)) | add) /
142
+ ((map((.[0]-$mx)^2) | add) * (map((.[1]-$my)^2) | add)) ^ 0.5
143
+ end'
144
+ ```
145
+
146
+ ## Recipe 7 — payload component breakdown per specialist
147
+
148
+ **Truth source first.** The actual prompt size billed by the API is the first turn's `input_tokens` from `token_trajectory_json[0]`. Use it as the ground truth — `payload_breakdown` events undercount (tool definitions and harness framing are not captured) and historical rows before the rule N× fix overcount mandatory_rule by attached-rule count.
149
+
150
+ ```bash
151
+ DB=.specialists/db/observability.db
152
+ sqlite3 "$DB" "SELECT specialist, model, AVG(json_extract(token_trajectory_json, '\$[0].token_usage.input_tokens')) AS avg_first_in, COUNT(*) AS n FROM specialist_job_metrics WHERE token_trajectory_json IS NOT NULL AND status='done' GROUP BY specialist, model ORDER BY avg_first_in DESC"
153
+ ```
154
+
155
+ Use this number for cost decisions. Use `payload_breakdown` only for *relative* component analysis (which knob to tune), not absolute sizing.
156
+
157
+ `sp db stats --with-payload` only surfaces total `payload_kb` / `payload_tokens`. To audit *what* fills the prompt (system_prompt vs mandatory rules vs skills vs bead_context vs memory), query `payload_breakdown` events directly. Use this for eager-load bloat investigations, prompt/rule consolidation planning, or duplication hunts — but cross-check against the truth source above.
158
+
159
+ ```bash
160
+ DB=.specialists/db/observability.db
161
+ sqlite3 "$DB" "SELECT specialist, event_json FROM specialist_events WHERE type='payload_breakdown' GROUP BY specialist ORDER BY t DESC" \
162
+ | python3 -c '
163
+ import json, sys
164
+ rows = []
165
+ for line in sys.stdin:
166
+ if "|" not in line: continue
167
+ spec, js = line.split("|", 1)
168
+ d = json.loads(js)
169
+ agg = {}
170
+ for c in d["payload_breakdown"]["components"]:
171
+ a = agg.setdefault(c["kind"], {"tokens":0,"n":0})
172
+ a["tokens"] += c["tokens"]; a["n"] += 1
173
+ rows.append((spec, d["payload_breakdown"]["totals"]["tokens"], agg))
174
+ rows.sort(key=lambda r: -r[1])
175
+ print(f"{\"specialist\":<22}{\"total\":>8}{\"rules\":>8}{\"rules_n\":>8}{\"sys\":>8}{\"skills\":>8}{\"bead\":>8}{\"mem\":>8}")
176
+ for s, t, a in rows:
177
+ g = lambda k: a.get(k, {"tokens":0,"n":0})
178
+ print(f"{s:<22}{t:>8}{g(\"mandatory_rule\")[\"tokens\"]:>8}{g(\"mandatory_rule\")[\"n\"]:>8}{g(\"system_prompt\")[\"tokens\"]:>8}{g(\"skill\")[\"tokens\"]:>8}{g(\"bead_context\")[\"tokens\"]:>8}{g(\"memory\")[\"tokens\"]:>8}")
179
+ '
180
+ ```
181
+
182
+ Component kinds: `system_prompt`, `mandatory_rule` (one event entry per attached rule), `skill` (path/description label only — full bodies are eagerly injected at runtime but NOT counted here), `task_template`, `bead_context`, `memory`.
183
+
184
+ **Important:** `skill` entries in `payload_breakdown` show only the path/description label (~10-40 tokens). The full skill body is forcefully injected via `skills.paths` on every run and IS billed as input tokens. To measure the real eager-skill cost, see Recipe 8.
185
+
186
+ Optimization signals (from breakdown alone):
187
+ - `mandatory_rule` total dominates: audit wrapper inflation by comparing `bytes` per rule in the event vs `wc -c config/mandatory-rules/<id>.md`. Mismatch >5x means a wrapper or richer source is adding hidden cost.
188
+ - `bead_context` huge: bead description is bloated — orchestrator should write more concise contracts.
189
+ - `memory` huge: stale or noisy memories — run `bd memories` cleanup or consolidation.
190
+
191
+ ## Recipe 8 — eager skill-body cost per specialist
192
+
193
+ `skills.paths` are eagerly injected on every run; the bodies appear in the API-billed prompt but the `payload_breakdown` event records only the path label. To derive the real eager-skill cost:
194
+
195
+ ```
196
+ eager_skill_cost ≈ first_turn_input_tokens − sum(payload_breakdown non-skill components)
197
+ − constant per-specialist framing/tool-defs overhead
198
+ ```
199
+
200
+ Two-step audit:
201
+
202
+ ```bash
203
+ # Step 1: real first-turn input tokens per specialist (truth)
204
+ DB=.specialists/db/observability.db
205
+ sqlite3 "$DB" "
206
+ SELECT specialist, AVG(json_extract(token_trajectory_json, '\$[0].token_usage.input_tokens')) AS avg_first_in, COUNT(*) AS n
207
+ FROM specialist_job_metrics
208
+ WHERE token_trajectory_json IS NOT NULL AND status='done'
209
+ GROUP BY specialist ORDER BY avg_first_in DESC"
210
+
211
+ # Step 2: per-specialist measured non-skill components (post-kdl4n)
212
+ sqlite3 "$DB" "SELECT specialist, event_json FROM specialist_events WHERE type='payload_breakdown' GROUP BY specialist ORDER BY t DESC" \
213
+ | python3 -c '
214
+ import json, sys
215
+ for line in sys.stdin:
216
+ if "|" not in line: continue
217
+ spec, js = line.split("|", 1)
218
+ d = json.loads(js)
219
+ non_skill = sum(c["tokens"] for c in d["payload_breakdown"]["components"] if c["kind"] != "skill")
220
+ print(f"{spec:<22}{non_skill:>10}")
221
+ '
222
+ ```
223
+
224
+ Then `delta = first_in − non_skill_total`. The framing/tool-defs constant is roughly the same across specialists with the same model — you can estimate it by running a specialist with NO `skills.paths` attached as a baseline.
225
+
226
+ Per-skill body weight: `wc -c <skill-path>/SKILL.md` divided by 4 (cl100k_base approximation). High-frequency, large-body skills are the inlining candidates; low-frequency or small ones stay attached.
227
+
228
+ Optimization signals (skills):
229
+ - `delta` >> sum of attached skill body bytes/4: framing/tool defs are the bulk — leave skills alone.
230
+ - `delta` ≈ sum of skill body weights: skills dominate eager cost — inline frequently-used hot guidance into `system_prompt`, keep rare deep references as skills, consider splitting big mixed skills.
231
+
232
+ ## References
233
+
234
+ - `docs/observability-metrics.md`
235
+ - `src/cli/db.ts`
236
+ - `src/specialist/observability-sqlite.ts`
@@ -0,0 +1,208 @@
1
+ ---
2
+ name: using-script-specialists
3
+ description: >
4
+ Use this skill for synchronous one-shot specialist invocations via `sp script`
5
+ (CLI) or `sp serve` (HTTP daemon). These run READ_ONLY, template-driven
6
+ specialists with `$var` substitution and return JSON in-process — no beads,
7
+ no chains, no worktrees, no job lifecycle. Trigger when integrating a
8
+ specialist into a service, script, or library, when the caller needs the
9
+ output immediately, or when the work is a single LLM call with structured
10
+ input/output. Do NOT use for tracked agent work — that belongs to
11
+ `using-specialists-v2`.
12
+ version: 1.0
13
+ ---
14
+
15
+ # Script-Class Specialists
16
+
17
+ `sp script` and `sp serve` are a separate runtime from the bead-first
18
+ orchestration covered by `using-specialists-v2`. They exist for service and
19
+ library integration, not for agent chains.
20
+
21
+ | Aspect | `sp run` (orchestration) | `sp script` / `sp serve` |
22
+ | --- | --- | --- |
23
+ | Driver | bead contract | template + variables |
24
+ | Execution | supervised job, async | one-shot, synchronous |
25
+ | Permissions | READ_ONLY / MEDIUM / HIGH | READ_ONLY only |
26
+ | Worktrees | edit-capable provisions one | rejected |
27
+ | Output | result.txt + events.jsonl + bead notes | stdout JSON / HTTP body |
28
+ | Audit | `.specialists/jobs/<id>/` | one row in `.specialists/db/observability.db` |
29
+
30
+ Use `sp script` from a shell or build pipeline. Use `sp serve` from a service
31
+ that needs an HTTP endpoint backed by `pi`. The same `.specialist.json` runs
32
+ under both.
33
+
34
+ ## When To Use This Skill
35
+
36
+ Trigger when:
37
+
38
+ - A service or script needs a single LLM-backed transform (summarize, classify,
39
+ extract) returning JSON.
40
+ - You are integrating specialists into Python/Node code that cannot block on a
41
+ supervised job lifecycle.
42
+ - The call is request/response shaped: variables in, structured output out.
43
+ - You need a sidecar HTTP endpoint (`sp serve`) to wrap a specialist for a
44
+ service consumer that already speaks HTTP.
45
+
46
+ Do NOT trigger for: code review, debugging, implementation, multi-turn work,
47
+ keep-alive sessions, anything that should write files. Those belong to
48
+ `using-specialists-v2`.
49
+
50
+ ## Specialist Compatibility (compatGuard)
51
+
52
+ A spec is rejected at request time (`specialist_load_error`) if any of:
53
+
54
+ - `execution.interactive` is `true`
55
+ - `execution.requires_worktree` is `true`
56
+ - `execution.permission_required` is anything other than `READ_ONLY`
57
+ - `skills.scripts` is non-empty
58
+ - `prompt.task_template` is missing
59
+ - a referenced `$var` in the chosen template is not supplied (`template_variable_missing`)
60
+
61
+ Author specs that explicitly target script-class:
62
+
63
+ ```json
64
+ {
65
+ "specialist": {
66
+ "metadata": { "name": "summarize-event", "version": "1.0.0", "category": "ingestion" },
67
+ "execution": {
68
+ "mode": "auto",
69
+ "model": "anthropic/claude-haiku-4-5",
70
+ "timeout_ms": 30000,
71
+ "interactive": false,
72
+ "response_format": "json",
73
+ "output_type": "custom",
74
+ "permission_required": "READ_ONLY",
75
+ "requires_worktree": false,
76
+ "max_retries": 0
77
+ },
78
+ "prompt": {
79
+ "task_template": "Summarize event $event_id with body: $body. Return JSON {\"summary\": \"...\"}.",
80
+ "output_schema": { "required": ["summary"] }
81
+ }
82
+ }
83
+ }
84
+ ```
85
+
86
+ ## `sp script` — One-Shot CLI
87
+
88
+ ```bash
89
+ sp script <specialist-name> \
90
+ --vars key1=value1 --vars key2=value2 \
91
+ [--template task_template] \
92
+ [--model anthropic/claude-sonnet-4-6] \
93
+ [--thinking medium] \
94
+ [--timeout-ms 60000] \
95
+ [--db-path /path/to/observability.db] \
96
+ [--single-instance <lock-name>] \
97
+ [--no-trace] \
98
+ [--json]
99
+ ```
100
+
101
+ Behaviour:
102
+
103
+ - Loads the spec via `SpecialistLoader` (same loader as `sp run`).
104
+ - Renders `prompt.task_template` (or named template) with `--vars`.
105
+ - Spawns `pi --mode json --no-session --no-extensions --no-tools` with the
106
+ resolved model.
107
+ - Returns the final assistant text on stdout. With `--json`, returns the full
108
+ `ScriptGenerateResult` envelope.
109
+ - Writes one row to `.specialists/db/observability.db` (same writer as `sp run`).
110
+
111
+ Exit codes:
112
+
113
+ - `0` — success.
114
+ - non-zero — failure; with `--json`, body has `success: false` and `error_type`.
115
+
116
+ Use `--single-instance <lock>` when concurrent invocations of the same logical
117
+ job must be serialized (cron, batch script).
118
+
119
+ ## `sp serve` — HTTP Daemon
120
+
121
+ ```bash
122
+ sp serve \
123
+ [--port 8000] \
124
+ [--concurrency 4] \
125
+ [--queue-timeout-ms 5000] \
126
+ [--shutdown-grace-ms 30000] \
127
+ [--project-dir /path/to/project] \
128
+ [--fallback-model anthropic/claude-haiku-4-5]
129
+ ```
130
+
131
+ POST `/v1/generate`:
132
+
133
+ ```json
134
+ {
135
+ "specialist": "summarize-event",
136
+ "variables": { "event_id": "abc", "body": "..." },
137
+ "template": "task_template",
138
+ "model_override": "anthropic/...",
139
+ "timeout_ms": 60000,
140
+ "trace": true
141
+ }
142
+ ```
143
+
144
+ Response (200, success):
145
+
146
+ ```json
147
+ {
148
+ "success": true,
149
+ "output": "<final text>",
150
+ "parsed_json": { "summary": "..." },
151
+ "meta": {
152
+ "specialist": "summarize-event",
153
+ "model": "anthropic/claude-haiku-4-5",
154
+ "duration_ms": 1234,
155
+ "trace_id": "<uuid>"
156
+ }
157
+ }
158
+ ```
159
+
160
+ Response (200, failure):
161
+
162
+ ```json
163
+ { "success": false, "error": "...", "error_type": "..." }
164
+ ```
165
+
166
+ Error types: `specialist_not_found | specialist_load_error |
167
+ template_variable_missing | auth | quota | timeout | network | invalid_json |
168
+ output_too_large | internal`.
169
+
170
+ `400` is reserved for malformed HTTP. `429` returns when concurrency cap is
171
+ saturated past `queue-timeout-ms`.
172
+
173
+ ## Operational Rules
174
+
175
+ - One `pi` subprocess per in-flight request, bounded by `--concurrency`.
176
+ - Credentials come from `pi`'s own `~/.pi/agent/auth.json`. The service never
177
+ touches API keys.
178
+ - Observability DB is shared with `sp run`. Audit trail is unified.
179
+ - The service is sidecar-per-consumer: no multi-tenant routing, no session
180
+ state, no orchestration. If you need orchestration, use `sp run` + beads.
181
+ - For container deployments, see `docs/specialists-service-install.md`. Image
182
+ runs as non-root UID 10001; bind-mount `~/.pi` and `.specialists/`.
183
+
184
+ ## When To Switch Back To `using-specialists-v2`
185
+
186
+ If any of these become true mid-design, drop script-class and use the
187
+ orchestration runtime:
188
+
189
+ - The work needs to write files.
190
+ - The caller wants a multi-turn / keep-alive session.
191
+ - A reviewer pass is needed.
192
+ - The work should be tracked as a bead with auditability beyond a single
193
+ observability row.
194
+ - The output is iterative (steer / resume).
195
+
196
+ ## What Not To Put Here
197
+
198
+ - Bead workflow, chains, epics, reviewers, worktrees — those live in
199
+ `using-specialists-v2`.
200
+ - Orchestration MCP tooling (`use_specialist`).
201
+ - Long-running multi-turn examples.
202
+
203
+ ## Reference
204
+
205
+ - `docs/specialists-service.md` — HTTP contract and operational notes.
206
+ - `docs/specialists-service-install.md` — Docker/Podman install path.
207
+ - `docs/script-specialists.md` — historical context for the script-class shape.
208
+ - `src/cli/script.ts`, `src/cli/serve.ts`, `src/specialist/script-runner.ts` — runtime.