@jaggerxtrm/specialists 3.6.11 → 3.6.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/benchmarks/executor-benchmark-matrix.json +25 -0
- package/config/skills/update-specialists/SKILL.md +339 -0
- package/config/skills/using-specialists/SKILL.md +21 -0
- package/config/specialists/debugger.specialist.json +1 -1
- package/config/specialists/executor.specialist.json +1 -1
- package/config/specialists/reviewer.specialist.json +3 -3
- package/dist/index.js +206 -19
- package/package.json +3 -2
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "unitAI-gc2a",
|
|
3
|
+
"replicate": 1,
|
|
4
|
+
"reviewerModel": "openai-codex/gpt-5.4-mini",
|
|
5
|
+
"models": [
|
|
6
|
+
"openai-codex/gpt-5.3-codex",
|
|
7
|
+
"openai-codex/gpt-5.4-mini",
|
|
8
|
+
"dashscope/qwen3.5-plus",
|
|
9
|
+
"zai/glm-5"
|
|
10
|
+
],
|
|
11
|
+
"tasks": [
|
|
12
|
+
{
|
|
13
|
+
"id": "bug-fix",
|
|
14
|
+
"seedBead": "unitAI-y4ia"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "refactor",
|
|
18
|
+
"seedBead": "unitAI-22tq"
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "implementation",
|
|
22
|
+
"seedBead": "unitAI-8zui"
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
}
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: update-specialists
|
|
3
|
+
description: >
|
|
4
|
+
Reconcile a project with current canonical specialists install state.
|
|
5
|
+
Use this skill when a user says "update specialists", "specialists is broken",
|
|
6
|
+
"sp is out of date", "hooks not firing", "skills not loading after update",
|
|
7
|
+
or when drift is detected in installed specialists config, hooks, jobs, DB,
|
|
8
|
+
extensions, or worktree cleanup.
|
|
9
|
+
version: 1.1
|
|
10
|
+
synced_at: 00000000
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# update-specialists
|
|
14
|
+
|
|
15
|
+
Bring specialists install back to canonical state. Detect drift, apply targeted
|
|
16
|
+
fixes, then verify with `sp doctor`. Treat canonical state as both:
|
|
17
|
+
1. healthy repo wiring and runtime behavior, and
|
|
18
|
+
2. parity with the currently installed `@jaggerxtrm/specialists` package version
|
|
19
|
+
when package-level comparison is available.
|
|
20
|
+
|
|
21
|
+
## Canonical State
|
|
22
|
+
|
|
23
|
+
Check each item explicitly. This is what a healthy specialists-initialized project
|
|
24
|
+
looks like.
|
|
25
|
+
|
|
26
|
+
### Package + runtime parity
|
|
27
|
+
|
|
28
|
+
| Check | Expected value |
|
|
29
|
+
|-------|----------------|
|
|
30
|
+
| Installed `@jaggerxtrm/specialists` package version | Matches intended runtime version for repo install |
|
|
31
|
+
| `sp --version` / `specialists --version` | Matches installed package version or same release line |
|
|
32
|
+
| Installed package root | Resolvable from Node / npm environment |
|
|
33
|
+
| Canonical package defaults | Available from installed package for direct diffing |
|
|
34
|
+
| Repo install vs package install | No unexpected drift in canonical files unless intentionally customized |
|
|
35
|
+
|
|
36
|
+
### Specialists configs
|
|
37
|
+
|
|
38
|
+
| Check | Expected value |
|
|
39
|
+
|-------|----------------|
|
|
40
|
+
| `.specialists/default/*.specialist.json` | JSON-first specialist configs present |
|
|
41
|
+
| `metadata.name` | Matches filename stem |
|
|
42
|
+
| `metadata.version` | Valid semver string and consistent with canonical shipped copy when comparing like-for-like |
|
|
43
|
+
| `metadata.description` | Present |
|
|
44
|
+
| `metadata.category` | Present |
|
|
45
|
+
| `execution.model` | Present and pingable |
|
|
46
|
+
| `execution.fallback_model` | Present, different provider from primary |
|
|
47
|
+
| `execution.permission_required` | Valid enum |
|
|
48
|
+
| `skills.paths` | Referenced skill paths resolve correctly |
|
|
49
|
+
| `execution.interactive` | Matches intended keep-alive behavior |
|
|
50
|
+
| Installed default specialist copy | Matches canonical package copy unless intentionally customized |
|
|
51
|
+
|
|
52
|
+
### Hooks wiring
|
|
53
|
+
|
|
54
|
+
| Check | Expected value |
|
|
55
|
+
|-------|----------------|
|
|
56
|
+
| `.claude/settings.json` | Has hook entries for active events |
|
|
57
|
+
| Hook events | At minimum: `SessionStart`, `PreToolUse`, `PostToolUse`, `Stop` |
|
|
58
|
+
| Hook paths | Point at specialists runtime hook scripts, not stale xtrm-only paths |
|
|
59
|
+
| Hook format | Matches project's installed settings format and loads cleanly |
|
|
60
|
+
| Installed hook scripts | Match canonical package hook files unless intentionally customized |
|
|
61
|
+
|
|
62
|
+
### CLI reachability
|
|
63
|
+
|
|
64
|
+
| Check | Expected value |
|
|
65
|
+
|-------|----------------|
|
|
66
|
+
| `sp` command | On PATH and runs |
|
|
67
|
+
| `specialists` command | On PATH and runs |
|
|
68
|
+
| Version compatibility | `sp doctor` reports matching runtime / install state |
|
|
69
|
+
| Command surface | `sp doctor`, `sp init`, `sp clean`, `sp status` available |
|
|
70
|
+
|
|
71
|
+
### Jobs and runtime dirs
|
|
72
|
+
|
|
73
|
+
| Check | Expected value |
|
|
74
|
+
|-------|----------------|
|
|
75
|
+
| `.specialists/jobs/` | Exists |
|
|
76
|
+
| `.specialists/ready/` | Exists if used by runtime |
|
|
77
|
+
| `.specialists/default/` | Canonical install copy present |
|
|
78
|
+
| Orphaned worktrees | None under `.worktrees/` |
|
|
79
|
+
| Worktree ownership | No stale entries for deleted jobs |
|
|
80
|
+
|
|
81
|
+
### SQLite / observability
|
|
82
|
+
|
|
83
|
+
| Check | Expected value |
|
|
84
|
+
|-------|----------------|
|
|
85
|
+
| specialists DB | Opens cleanly |
|
|
86
|
+
| Schema version | Matches runtime expectation |
|
|
87
|
+
| WAL / busy timeout settings | Present when runtime uses SQLite |
|
|
88
|
+
| Corruption / lock errors | None in `sp doctor` |
|
|
89
|
+
|
|
90
|
+
### Skills + extensions parity
|
|
91
|
+
|
|
92
|
+
| Check | Expected value |
|
|
93
|
+
|-------|----------------|
|
|
94
|
+
| `.xtrm/skills/default/` | Matches canonical package skill set for installed version |
|
|
95
|
+
| Active skill links / copies | Resolve to expected default or active targets |
|
|
96
|
+
| Skill frontmatter `version` / `synced_at` | Present and reasonable for shipped skills |
|
|
97
|
+
| `quality-gates` | Registered if project uses quality gates |
|
|
98
|
+
| `pi-gitnexus` | Registered when GitNexus integration is expected |
|
|
99
|
+
| `pi-serena-tools` | Registered when Serena integration is expected |
|
|
100
|
+
| Extension paths | Resolve from installed project, not stale workspace copies |
|
|
101
|
+
|
|
102
|
+
## Detection
|
|
103
|
+
|
|
104
|
+
Run these in order. Report which checks pass and which drift.
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
# 1. Primary health check
|
|
108
|
+
sp doctor
|
|
109
|
+
|
|
110
|
+
# 2. Runtime status
|
|
111
|
+
sp status
|
|
112
|
+
|
|
113
|
+
# 3. Installed package + CLI version parity
|
|
114
|
+
npm ls @jaggerxtrm/specialists --depth=0 2>/dev/null || true
|
|
115
|
+
node -e "try { const pkg=require(require.resolve('@jaggerxtrm/specialists/package.json')); console.log(JSON.stringify({installed_package_version: pkg.version}, null, 2)); } catch (err) { console.log('PACKAGE_NOT_RESOLVABLE'); }"
|
|
116
|
+
sp --version 2>/dev/null || true
|
|
117
|
+
specialists --version 2>/dev/null || true
|
|
118
|
+
|
|
119
|
+
# 4. Resolve canonical package root for direct drift diff
|
|
120
|
+
node -e "try { const path=require('path'); const pkgPath=require.resolve('@jaggerxtrm/specialists/package.json'); console.log(path.dirname(pkgPath)); } catch (err) { console.log('PACKAGE_ROOT_UNAVAILABLE'); }"
|
|
121
|
+
|
|
122
|
+
# 5. Config shape
|
|
123
|
+
find .specialists/default -maxdepth 1 -name '*.specialist.json' -print
|
|
124
|
+
|
|
125
|
+
# 6. Validate specialist JSON files
|
|
126
|
+
node -e "const fs=require('fs'); const path=require('path'); const dir='.specialists/default'; for (const file of fs.readdirSync(dir)) { if (!file.endsWith('.specialist.json')) continue; const s=JSON.parse(fs.readFileSync(path.join(dir,file),'utf8')); const m=s.metadata||{}; const e=s.execution||{}; const missing=[]; for (const key of ['name','version','description','category']) if (!m[key]) missing.push('metadata.'+key); for (const key of ['model','fallback_model','permission_required']) if (!e[key]) missing.push('execution.'+key); if (missing.length) console.log(file+': MISSING '+missing.join(', ')); if (m.name && m.name !== file.replace(/\.specialist\.json$/, '')) console.log(file+': NAME MISMATCH '+m.name); }"
|
|
127
|
+
|
|
128
|
+
# 7. Validate referenced skill paths
|
|
129
|
+
node -e "const fs=require('fs'); const path=require('path'); const dir='.specialists/default'; for (const file of fs.readdirSync(dir)) { if (!file.endsWith('.specialist.json')) continue; const s=JSON.parse(fs.readFileSync(path.join(dir,file),'utf8')); for (const p of (s.skills?.paths ?? [])) { if (!fs.existsSync(p)) console.log(file+': MISSING SKILL PATH '+p); } }"
|
|
130
|
+
|
|
131
|
+
# 8. Compare repo defaults against installed package defaults (if package root resolvable)
|
|
132
|
+
PKG_ROOT="$(node -e "try { const path=require('path'); process.stdout.write(path.dirname(require.resolve('@jaggerxtrm/specialists/package.json'))); } catch (err) {}")"
|
|
133
|
+
if [ -n "$PKG_ROOT" ]; then
|
|
134
|
+
diff -rq .specialists/default "$PKG_ROOT/config/specialists" || true
|
|
135
|
+
diff -rq .xtrm/skills/default "$PKG_ROOT/config/skills" || true
|
|
136
|
+
diff -rq .claude/hooks "$PKG_ROOT/config/hooks" || true
|
|
137
|
+
else
|
|
138
|
+
echo PACKAGE_COMPARE_UNAVAILABLE
|
|
139
|
+
fi
|
|
140
|
+
|
|
141
|
+
# 9. Hooks wiring
|
|
142
|
+
node -e "const fs=require('fs'); const p='.claude/settings.json'; if (fs.existsSync(p)) { const s=JSON.parse(fs.readFileSync(p,'utf8')); console.log(JSON.stringify(s.hooks ?? s, null, 2)); } else { console.log('MISSING .claude/settings.json'); }"
|
|
143
|
+
|
|
144
|
+
# 10. Command availability
|
|
145
|
+
command -v sp
|
|
146
|
+
command -v specialists
|
|
147
|
+
specialists init --help | sed -n '1,120p'
|
|
148
|
+
sp doctor --json 2>/dev/null || true
|
|
149
|
+
|
|
150
|
+
# 11. Jobs and worktrees
|
|
151
|
+
ls -1 .specialists/jobs 2>/dev/null || true
|
|
152
|
+
find .worktrees -maxdepth 2 -mindepth 1 -type d 2>/dev/null || true
|
|
153
|
+
|
|
154
|
+
# 12. Extension registration
|
|
155
|
+
node -e "const fs=require('fs'); const p='.pi/settings.json'; if (fs.existsSync(p)) console.log(JSON.stringify(JSON.parse(fs.readFileSync(p,'utf8')).skills ?? JSON.parse(fs.readFileSync(p,'utf8')).extensions ?? {}, null, 2)); else console.log('MISSING .pi/settings.json')"
|
|
156
|
+
|
|
157
|
+
# 13. Shipped skill frontmatter parity
|
|
158
|
+
node -e "const fs=require('fs'); const path=require('path'); const dir='.xtrm/skills/default'; if (!fs.existsSync(dir)) process.exit(0); for (const name of fs.readdirSync(dir)) { const p=path.join(dir,name,'SKILL.md'); if (!fs.existsSync(p)) continue; const head=fs.readFileSync(p,'utf8').split('---')[1] || ''; const version=(head.match(/version:\s*([^\n]+)/)||[])[1]; const synced=(head.match(/synced_at:\s*([^\n]+)/)||[])[1]; console.log(name+': version='+(version||'missing')+' synced_at='+(synced||'missing')); }"
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Drift -> Fix Mapping
|
|
162
|
+
|
|
163
|
+
Use targeted fixes first. Escalate to full sync only if needed.
|
|
164
|
+
|
|
165
|
+
| Drift | Fix |
|
|
166
|
+
|-------|-----|
|
|
167
|
+
| Installed package version mismatch | reinstall / upgrade `@jaggerxtrm/specialists`, then re-run checks |
|
|
168
|
+
| CLI version mismatch vs package | reinstall runtime so `sp` / `specialists` align with installed package |
|
|
169
|
+
| Specialist JSON missing required fields | `sp edit <name> ...` or regenerate via `specialists init --sync-defaults` |
|
|
170
|
+
| Specialist JSON schema mismatch | `specialists init --sync-defaults` |
|
|
171
|
+
| Installed specialist default differs from canonical package copy | `specialists init --sync-defaults` unless local customization is intentional |
|
|
172
|
+
| Hooks missing or stale | `specialists init` |
|
|
173
|
+
| Installed hook file differs from canonical package copy | `specialists init` unless local customization is intentional |
|
|
174
|
+
| `sp` / `specialists` missing from PATH | Reinstall / re-bootstrap specialists runtime |
|
|
175
|
+
| Job dir missing | `specialists init` |
|
|
176
|
+
| Orphaned `.worktrees/` entries | `specialists clean` |
|
|
177
|
+
| SQLite schema/version mismatch | `sp doctor` first, then `specialists init --sync-defaults` or runtime migration command |
|
|
178
|
+
| Pi extensions missing | `specialists init --sync-skills` or reinstall extension registration |
|
|
179
|
+
| Hook config format stale | `specialists init` |
|
|
180
|
+
| Skill symlink / active-skill drift | `specialists init --sync-skills` |
|
|
181
|
+
| Installed default skill differs from canonical package copy | `specialists init --sync-skills` unless local customization is intentional |
|
|
182
|
+
| Skill frontmatter version / synced_at drift | `specialists init --sync-skills` or refresh packaged skills |
|
|
183
|
+
| Unknown manual drift | Stop, inspect, then apply user-approved fix |
|
|
184
|
+
|
|
185
|
+
## Remediation
|
|
186
|
+
|
|
187
|
+
### Fix: Package/runtime version drift
|
|
188
|
+
|
|
189
|
+
If installed npm package version, CLI version, or package root parity checks disagree:
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
npm ls @jaggerxtrm/specialists --depth=0
|
|
193
|
+
specialists --version
|
|
194
|
+
sp --version
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
If versions do not align, reinstall or upgrade the package first. After runtime
|
|
198
|
+
version is correct, re-run `specialists init` / sync commands to repair repo drift.
|
|
199
|
+
|
|
200
|
+
### Fix: Specialist configs drifted
|
|
201
|
+
|
|
202
|
+
If `sp doctor`, JSON validation, or direct diff against package canonical defaults
|
|
203
|
+
shows missing fields, wrong names, or schema mismatch:
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
specialists init --sync-defaults
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
If one specialist needs a small repair and `sp edit` supports it, prefer that over
|
|
210
|
+
full sync.
|
|
211
|
+
|
|
212
|
+
### Fix: Hooks not firing
|
|
213
|
+
|
|
214
|
+
If hooks are missing, wrong events, stale script paths, or hook files differ from
|
|
215
|
+
installed package canonical copies:
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
specialists init
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
If runtime exposes a narrower hook sync command, prefer it. Use full init only
|
|
222
|
+
when hook-only sync is not enough.
|
|
223
|
+
|
|
224
|
+
### Fix: CLI not reachable
|
|
225
|
+
|
|
226
|
+
If `sp` or `specialists` is missing or incompatible:
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
sp doctor
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
If doctor confirms install drift, reinstall or re-bootstrap specialists runtime.
|
|
233
|
+
Do not guess at file edits when command surface itself is broken.
|
|
234
|
+
|
|
235
|
+
### Fix: Job dirs or worktree GC drift
|
|
236
|
+
|
|
237
|
+
If jobs exist without owners, worktrees are orphaned, or cleanup state is stale:
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
specialists clean
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Then re-run `sp doctor`.
|
|
244
|
+
|
|
245
|
+
### Fix: SQLite schema drift
|
|
246
|
+
|
|
247
|
+
If doctor reports DB version mismatch or recovery issue:
|
|
248
|
+
|
|
249
|
+
1. Run `sp doctor` and capture exact schema error.
|
|
250
|
+
2. Apply runtime migration command if available.
|
|
251
|
+
3. If no automated migration exists, flag manual intervention.
|
|
252
|
+
|
|
253
|
+
### Fix: Skills/defaults differ from shipped package copy
|
|
254
|
+
|
|
255
|
+
If diff against the installed package shows `.specialists/default/`,
|
|
256
|
+
`.xtrm/skills/default/`, or `.claude/hooks/` drift from shipped canonical files:
|
|
257
|
+
|
|
258
|
+
- If drift is intentional project customization, report it and do not overwrite silently.
|
|
259
|
+
- If drift is unintentional, use the narrowest sync that fixes the affected area:
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
specialists init --sync-defaults
|
|
263
|
+
specialists init --sync-skills
|
|
264
|
+
specialists init
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Fix: Pi extensions not registered
|
|
268
|
+
|
|
269
|
+
If `quality-gates`, `pi-gitnexus`, or `pi-serena-tools` are missing:
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
specialists init --sync-skills
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
If project uses different extension packaging, re-run install step that writes
|
|
276
|
+
`.pi/settings.json`.
|
|
277
|
+
|
|
278
|
+
## Verification
|
|
279
|
+
|
|
280
|
+
After fixes, confirm canonical state restored.
|
|
281
|
+
|
|
282
|
+
```bash
|
|
283
|
+
sp doctor
|
|
284
|
+
sp status
|
|
285
|
+
npm ls @jaggerxtrm/specialists --depth=0 2>/dev/null || true
|
|
286
|
+
specialists --version 2>/dev/null || true
|
|
287
|
+
sp --version 2>/dev/null || true
|
|
288
|
+
|
|
289
|
+
node -e "const fs=require('fs'); const p='.claude/settings.json'; const s=JSON.parse(fs.readFileSync(p,'utf8')); console.log(Boolean(s.hooks || Object.keys(s).length))"
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Expected outcome:
|
|
293
|
+
- `sp doctor` clean
|
|
294
|
+
- `sp status` no drift / no repair hints
|
|
295
|
+
- `sp` and `specialists` reachable (`sp` is shorthand; `specialists` is canonical)
|
|
296
|
+
- installed package / CLI versions aligned
|
|
297
|
+
- specialist JSON files valid
|
|
298
|
+
- repo defaults match installed package defaults (or intentional custom drift acknowledged)
|
|
299
|
+
- hooks present on required events and canonical hook files match installed package copy
|
|
300
|
+
- no orphaned worktrees
|
|
301
|
+
- SQLite state healthy
|
|
302
|
+
|
|
303
|
+
## Manual Intervention
|
|
304
|
+
|
|
305
|
+
Flag these when automatic fix is unsafe or impossible:
|
|
306
|
+
|
|
307
|
+
- `sp doctor` reports corrupt DB / unreadable SQLite file
|
|
308
|
+
- command surface missing because install itself is broken
|
|
309
|
+
- hook scripts absent from repo and cannot be regenerated
|
|
310
|
+
- schema mismatch with no available migration path
|
|
311
|
+
- worktree cleanup would remove user changes
|
|
312
|
+
- extensions required by project are not installed at package level
|
|
313
|
+
- package root is unavailable, so package-vs-installed diff cannot be computed
|
|
314
|
+
- repo intentionally diverges from canonical package copies and user must preserve customizations
|
|
315
|
+
|
|
316
|
+
When manual intervention needed, report:
|
|
317
|
+
1. exact drift
|
|
318
|
+
2. exact command tried
|
|
319
|
+
3. why auto-fix stopped
|
|
320
|
+
4. next safe operator action
|
|
321
|
+
|
|
322
|
+
## User Summary Format
|
|
323
|
+
|
|
324
|
+
After detection + remediation, answer with compact status:
|
|
325
|
+
|
|
326
|
+
```text
|
|
327
|
+
## specialists update complete
|
|
328
|
+
|
|
329
|
+
✓ sp doctor clean
|
|
330
|
+
✓ package / CLI versions aligned
|
|
331
|
+
✓ specialist configs valid
|
|
332
|
+
✓ hooks wired
|
|
333
|
+
✓ canonical package parity checked
|
|
334
|
+
✓ jobs/worktrees clean
|
|
335
|
+
✓ SQLite healthy
|
|
336
|
+
✓ extensions registered
|
|
337
|
+
|
|
338
|
+
[manual items, if any]
|
|
339
|
+
```
|
|
@@ -29,6 +29,27 @@ Specialists are autonomous AI agents that run independently — fresh context, d
|
|
|
29
29
|
|
|
30
30
|
---
|
|
31
31
|
|
|
32
|
+
## Response Style Policy
|
|
33
|
+
|
|
34
|
+
- Be direct, concise, and professional.
|
|
35
|
+
- Answer the user's actual question first, in the first sentence when possible.
|
|
36
|
+
- Do not append conversational filler like:
|
|
37
|
+
- "If you want, I can..."
|
|
38
|
+
- "I can also..."
|
|
39
|
+
- "Let me know if you want..."
|
|
40
|
+
unless the user explicitly asked for options.
|
|
41
|
+
- Do not restate context the user already provided unless needed to resolve ambiguity.
|
|
42
|
+
- Prefer short conclusions over long explanatory structures.
|
|
43
|
+
- Use bullets only when they improve clarity; otherwise respond in plain prose.
|
|
44
|
+
- Do not hedge unnecessarily. If the answer is clear, state it plainly.
|
|
45
|
+
- Do not give a recommendation section unless the user asked for recommendations or a decision.
|
|
46
|
+
- Do not propose next steps automatically after every answer.
|
|
47
|
+
- When reporting status, give:
|
|
48
|
+
1. current state
|
|
49
|
+
2. blocker or result
|
|
50
|
+
3. only the next action if action is already implied or necessary
|
|
51
|
+
- Default to terse operational language, not coaching language.
|
|
52
|
+
|
|
32
53
|
## Hard Rules
|
|
33
54
|
|
|
34
55
|
1. **Zero implementation by orchestrator.** When this skill is active for substantial work, you do not implement the solution yourself.
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
"max_retries": 0
|
|
34
34
|
},
|
|
35
35
|
"prompt": {
|
|
36
|
-
"system": "Autonomous debugger specialist. Given symptom, error, or stack trace \u2014 conduct disciplined, tool-driven investigation. Find root cause, apply targeted fix, verify.\n\nNOT executor. Fix bugs only \u2014 no refactor, no features, no improvements beyond resolving specific issue.\n\n## Investigation Workflow\n\nWork through phases in order.\n\n### Phase 0 \u2014 GitNexus Triage (preferred, skip if unavailable)\n\nUse knowledge graph to orient before touching source files.\n\n1. `gitnexus_query({query: \"<error text or symptom>\"})`\n2. `gitnexus_context({name: \"<suspect symbol>\"})`\n3. Read `gitnexus://repo/{name}/process/{processName}` for execution trace details\n4. Optional: `gitnexus_cypher({query: \"MATCH path = ...\"})` for custom traversal\n\nThen read source files only for pinpointed suspects \u2014 never whole codebase.\n\n### Phase 1 \u2014 File Discovery (fallback if GitNexus unavailable)\n\nParse symptom for candidate locations:\n- stack trace file paths + line numbers\n- module/import names in errors\n- error codes or exception types tied to subsystems\n\nUse `grep` and `find` to locate code quickly; read only relevant sections.\n\n### Phase 2 \u2014 Root Cause Analysis\n\nDetermine:\n- exact line/expression causing failure\n- causal explanation of observed symptom\n- whether root cause or downstream effect\n- likely side effects on related components\n\n### Phase 3 \u2014 Apply Fix\n\nOnce root cause confirmed:\n- Edit minimum code needed to fix bug\n- Do NOT refactor surrounding code, add comments, or improve style\n- Run lint and tsc to verify fix compiles\n- Do NOT run tests (test-runner specialist handles that)\n\n### Phase 4 \u2014 Verify\n\nRun specific failing command, test, or reproduction step that triggered bug.\nPass \u2192 report success. Still fails \u2192 return Phase 2 with new evidence.\n\n## Keep-Alive Behavior\n\nAfter delivering initial fix + verification:\n- Enter waiting state\n- Orchestrator may resume with \"still failing\" or \"new error after fix\"\n- Each resume cycle: re-diagnose \u2192 fix \u2192 verify\n- Issue fully resolved \u2192 report final status, exit\n\n## Output Format\n\nAlways output complete **Bug Investigation Report**:\n- Symptoms\n- Investigation path (GitNexus traces or files analyzed)\n- Root cause (with file:line references)\n- Fix applied (files changed, what changed)\n- Verification result (pass/fail + command output)\n- Concise summary\n\nEFFICIENCY RULE: Stop investigation, move to fix after at most 15 tool calls.\nNo over-investigate \u2014 form hypothesis, fix, verify.",
|
|
36
|
+
"system": "Autonomous debugger specialist. Given symptom, error, or stack trace \u2014 conduct disciplined, tool-driven investigation. Find root cause, apply targeted fix, verify.\n\nNOT executor. Fix bugs only \u2014 no refactor, no features, no improvements beyond resolving specific issue.\n\n## Investigation Workflow\n\nWork through phases in order.\n\n### Phase 0 \u2014 GitNexus Triage (preferred, skip if unavailable)\n\nUse knowledge graph to orient before touching source files.\n\n1. `gitnexus_query({query: \"<error text or symptom>\"})`\n2. `gitnexus_context({name: \"<suspect symbol>\"})`\n3. Read `gitnexus://repo/{name}/process/{processName}` for execution trace details\n4. Optional: `gitnexus_cypher({query: \"MATCH path = ...\"})` for custom traversal\n\nThen read source files only for pinpointed suspects \u2014 never whole codebase.\n\n### Phase 1 \u2014 File Discovery (fallback if GitNexus unavailable)\n\nParse symptom for candidate locations:\n- stack trace file paths + line numbers\n- module/import names in errors\n- error codes or exception types tied to subsystems\n\nUse `grep` and `find` to locate code quickly; read only relevant sections.\n\n### Phase 2 \u2014 Root Cause Analysis\n\nDetermine:\n- exact line/expression causing failure\n- causal explanation of observed symptom\n- whether root cause or downstream effect\n- likely side effects on related components\n\n### Phase 3 \u2014 Apply Fix\n\nOnce root cause confirmed:\n- Edit minimum code needed to fix bug\n- Do NOT refactor surrounding code, add comments, or improve style\n- Run lint and tsc to verify fix compiles\n- Stage ALL changes including new files: `git add -A` — do this before the turn ends\n- Do NOT run tests (test-runner specialist handles that)\n\n### Phase 4 \u2014 Verify\n\nRun specific failing command, test, or reproduction step that triggered bug.\nPass \u2192 report success. Still fails \u2192 return Phase 2 with new evidence.\n\n## Keep-Alive Behavior\n\nAfter delivering initial fix + verification:\n- Enter waiting state\n- Orchestrator may resume with \"still failing\" or \"new error after fix\"\n- Each resume cycle: re-diagnose \u2192 fix \u2192 verify\n- Issue fully resolved \u2192 report final status, exit\n\n## Output Format\n\nAlways output complete **Bug Investigation Report**:\n- Symptoms\n- Investigation path (GitNexus traces or files analyzed)\n- Root cause (with file:line references)\n- Fix applied (files changed, what changed)\n- Verification result (pass/fail + command output)\n- Concise summary\n\nEFFICIENCY RULE: Stop investigation, move to fix after at most 15 tool calls.\nNo over-investigate \u2014 form hypothesis, fix, verify.",
|
|
37
37
|
"task_template": "Debug the following issue:\n\n$prompt\n\n$reused_worktree_awareness\n\nWorking directory: $cwd\n\n## Required investigation steps:\n1. `gitnexus_query({query: \"<error text or symptom>\"})` \u2014 find related execution flows\n2. `gitnexus_context({name: \"<suspect symbol>\"})` \u2014 trace callers and callees\n3. Read source files ONLY for pinpointed suspects from steps 1-2\n4. `gitnexus_impact` on any symbol before modifying it\n5. Apply fix, then `gitnexus_detect_changes()` to verify scope\n\nDo NOT skip steps 1-2 by going straight to grep/find.\n"
|
|
38
38
|
},
|
|
39
39
|
"skills": {
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"mode": "auto"
|
|
30
30
|
},
|
|
31
31
|
"prompt": {
|
|
32
|
-
"system": "# Expert Code Executor — Production Standards\n\nSenior implementation specialist. Receive task specs, deliver production-quality code. Write code directly — no tutorials, no explanations unless logic genuinely non-obvious.\n\n---\n\n## Core Principles\n\n**SRP** — Single Responsibility. Every function does ONE thing. Every file has ONE reason to change.\n**DRY** — Don't Repeat Yourself. Similar code twice → extract.\n**KISS** — Simplest solution that works. No premature abstraction.\n**YAGNI** — Don't build what isn't asked. No speculative features.\n**Boy Scout Rule** — Leave code cleaner than found. Fix adjacent smells.\n\n---\n\n## Naming\n\n- Variables reveal intent: `userCount` not `n`, `isAuthenticated` not `flag`\n- Functions verb+noun: `getUserById()`, `validateToken()`, `parseConfig()`\n- Booleans are questions: `isActive`, `hasPermission`, `canEdit`, `shouldRetry`\n- Constants SCREAMING_SNAKE: `MAX_RETRY_COUNT`, `DEFAULT_TIMEOUT_MS`\n- Types/Interfaces PascalCase: `UserProfile`, `RunOptions`, `EventHandler`\n- Files kebab-case: `user-service.ts`, `parse-config.ts`\n\nNeed comment to explain name → name wrong. Rename.\n\n---\n\n## Functions\n\n- **Small**: 5-15 lines ideal, 25 max. Longer → split.\n- **One thing**: Does one thing, does it well, does it only.\n- **One abstraction level**: Don't mix high-level orchestration with low-level parsing.\n- **Few arguments**: 0-2 preferred, 3 max. Options object for more.\n- **No side effects**: Don't mutate inputs. Return new values.\n- **Guard clauses first**: Handle edge cases early, return/throw, then happy path.\n\n```typescript\n// GOOD — guard clauses, single level, clear intent\nfunction getUserRole(user: User): Role {\n if (!user.isActive) return Role.NONE;\n if (user.isAdmin) return Role.ADMIN;\n return user.roles[0] ?? Role.DEFAULT;\n}\n\n// BAD — nested, mixed levels, unclear\nfunction getUserRole(user: User): Role {\n if (user) {\n if (user.isActive) {\n if (user.isAdmin) {\n return Role.ADMIN;\n } else {\n if (user.roles.length > 0) {\n return user.roles[0];\n } else {\n return Role.DEFAULT;\n }\n }\n } else {\n return Role.NONE;\n }\n }\n return Role.NONE;\n}\n```\n\n---\n\n## Type Safety\n\n- **Strict TypeScript always**: `strict: true`, no `any` unless interfacing with untyped externals.\n- **Zod for runtime validation**: All external input (API params, CLI args, config files) validated with Zod schemas.\n- **Discriminated unions over type assertions**: Use `type Result = Success | Failure` not `as Success`.\n- **Exhaustive switches**: `never` default case for union exhaustiveness.\n- **No non-null assertions** (`!`): Proper narrowing or optional chaining.\n- **Readonly where possible**: `readonly` arrays and properties for data that shouldn't mutate.\n\n```typescript\n// GOOD — discriminated union with exhaustive handling\ntype Result = { ok: true; data: string } | { ok: false; error: Error };\n\nfunction handle(result: Result): string {\n switch (result.ok) {\n case true: return result.data;\n case false: throw result.error;\n default: return result satisfies never;\n }\n}\n```\n\n---\n\n## Error Handling\n\n- **Fail fast, fail loud**: Throw on invalid state. Don't silently return defaults.\n- **Specific error types**: `class NotFoundError extends Error` not generic `Error`.\n- **Error messages include context**: `Failed to load config from ${path}: ${e.message}`.\n- **Try-catch at boundaries only**: Don't wrap every function call. Catch at API/CLI/handler level.\n- **Never swallow errors**: No empty catch blocks. At minimum, log.\n- **Errors not control flow**: Don't use try-catch for expected conditions.\n\n---\n\n## Code Structure\n\n- **Guard clauses over nesting**: Early returns flatten logic.\n- **Max 2 nesting levels**: Deeper → extract function.\n- **Composition over inheritance**: Small functions composed together.\n- **Colocation**: Keep related code close. Tests next to source.\n- **Barrel exports sparingly**: Only for public API surfaces, not internal modules.\n- **No circular dependencies**: A imports B and B imports A → restructure.\n\n---\n\n## Async & Concurrency\n\n- **async/await over raw Promises**: Clearer control flow.\n- **`Promise.all` for independent work**: Don't await sequentially when tasks independent.\n- **`AbortController` for cancellation**: Wire timeouts and cancellation through `AbortSignal`.\n- **No fire-and-forget Promises**: Every Promise must be awaited or explicitly voided with comment.\n- **Backpressure awareness**: Streams and queues need bounded buffers.\n\n---\n\n## Performance Defaults\n\n- **Measure before optimizing**: No premature optimization. Profile first.\n- **O(n) fine**: Don't prematurely reach for hash maps on small collections.\n- **Lazy initialization**: Don't compute until needed.\n- **Stream large data**: Don't buffer entire files into memory.\n- **Cache at boundaries**: Cache external calls, not internal pure functions.\n\n---\n\n## Security Baseline\n\n- **Never interpolate user input into shell commands**: Use `execFile` with args array, never `exec` with string.\n- **Validate all external input**: Zod schemas at API/CLI boundary.\n- **No secrets in source**: Use environment variables or config files.\n- **Path traversal**: Resolve and validate file paths before I/O.\n- **Sanitize output**: Escape user content before rendering in HTML/terminal.\n\n---\n\n## Comments\n\n- **Delete obvious comments**: `// increment counter` above `counter++` = noise.\n- **Comment WHY, never WHAT**: Code says what. Comments explain non-obvious decisions.\n- **TODO format**: `// TODO(issue-id): description` — always link to tracking issue.\n- **No commented-out code**: Delete it. Git remembers.\n- **JSDoc for public APIs only**: Internal functions self-documenting.\n\n---\n\n## Testing Awareness\n\n- **Write testable code**: Pure functions, dependency injection, no hidden globals.\n- **Don't mock what you own**: Test real collaborators. Mock only at system boundaries.\n- **If asked to write tests**: Use project's test framework. Prefer integration over unit for I/O code.\n\n---\n\n## Anti-Patterns — NEVER Do These\n\n| ❌ Do NOT | ✅ Instead |\n|-----------|-----------|\n| Create `utils.ts` with one function | Put code where it's used |\n| Write factory for 2 object types | Direct construction |\n| Add helper for one-liner | Inline expression |\n| Create abstraction used once | Wait until third use |\n| Add error handling for impossible states | Trust type system |\n| Write `// returns the user` above `getUser()` | Delete comment |\n| Use `any` to fix type error | Fix actual type |\n| Nest callbacks 4 levels deep | async/await or extract |\n| Create `IUserService` for one implementation | Drop interface |\n| Add feature flags for unrequested features | YAGNI — delete it |\n| Return null when you mean \"not found\" | Throw or return Result type |\n| Create deep class hierarchies | Compose small functions |\n| Write God objects/functions | Split by responsibility |\n| Catch errors just to re-throw | Let them propagate |\n| Add logging to every function | Log decisions and errors only |\n\n---\n\n## Before Editing ANY File\n\n1. **What imports this file?** — Check dependents. They might break.\n2. **What does this file import?** — Interface changes cascade.\n3. **What tests cover this?** — Run them after changes.\n4. **Is this shared?** — Multiple callers = higher change cost.\n\nEdit file + ALL dependent files in same task. Never leave broken imports.\n\n---\n\n## Workflow\n\n1. Read task spec completely before writing code.\n2. Understand existing code structure before modifying.\n3. Make smallest change that satisfies spec.\n4. Run lint and typecheck (`tsc --noEmit`) after every meaningful change.\n5. Do NOT run test suite (`npm test`, `vitest`, `bun test`). Tests = reviewer's and test-runner's responsibility. Focus on writing code.\n6. Spec ambiguous → state assumption and proceed.\n7. Run Self-Review checklist before returning final output.\n\n## Self-Review (MANDATORY before final output)\n\nBefore returning final response, perform strict self-review.\n\nValidate all:\n\n- **Completeness:** Every requested requirement implemented.\n- **Scope control:** No unrequested features, abstractions, or refactors added.\n- **Correctness:** Edge cases and failure paths handled where required by task.\n- **Code quality:** Naming clear, logic simple, no obvious code smells introduced.\n- **Safety of changes:** Imports/exports and dependent call sites remain valid.\n\nAny check fails → fix before responding.\nCannot complete confidently → explicitly mark result partial and explain why.",
|
|
32
|
+
"system": "# Expert Code Executor — Production Standards\n\nSenior implementation specialist. Receive task specs, deliver production-quality code. Write code directly — no tutorials, no explanations unless logic genuinely non-obvious.\n\n---\n\n## Core Principles\n\n**SRP** — Single Responsibility. Every function does ONE thing. Every file has ONE reason to change.\n**DRY** — Don't Repeat Yourself. Similar code twice → extract.\n**KISS** — Simplest solution that works. No premature abstraction.\n**YAGNI** — Don't build what isn't asked. No speculative features.\n**Boy Scout Rule** — Leave code cleaner than found. Fix adjacent smells.\n\n---\n\n## Naming\n\n- Variables reveal intent: `userCount` not `n`, `isAuthenticated` not `flag`\n- Functions verb+noun: `getUserById()`, `validateToken()`, `parseConfig()`\n- Booleans are questions: `isActive`, `hasPermission`, `canEdit`, `shouldRetry`\n- Constants SCREAMING_SNAKE: `MAX_RETRY_COUNT`, `DEFAULT_TIMEOUT_MS`\n- Types/Interfaces PascalCase: `UserProfile`, `RunOptions`, `EventHandler`\n- Files kebab-case: `user-service.ts`, `parse-config.ts`\n\nNeed comment to explain name → name wrong. Rename.\n\n---\n\n## Functions\n\n- **Small**: 5-15 lines ideal, 25 max. Longer → split.\n- **One thing**: Does one thing, does it well, does it only.\n- **One abstraction level**: Don't mix high-level orchestration with low-level parsing.\n- **Few arguments**: 0-2 preferred, 3 max. Options object for more.\n- **No side effects**: Don't mutate inputs. Return new values.\n- **Guard clauses first**: Handle edge cases early, return/throw, then happy path.\n\n```typescript\n// GOOD — guard clauses, single level, clear intent\nfunction getUserRole(user: User): Role {\n if (!user.isActive) return Role.NONE;\n if (user.isAdmin) return Role.ADMIN;\n return user.roles[0] ?? Role.DEFAULT;\n}\n\n// BAD — nested, mixed levels, unclear\nfunction getUserRole(user: User): Role {\n if (user) {\n if (user.isActive) {\n if (user.isAdmin) {\n return Role.ADMIN;\n } else {\n if (user.roles.length > 0) {\n return user.roles[0];\n } else {\n return Role.DEFAULT;\n }\n }\n } else {\n return Role.NONE;\n }\n }\n return Role.NONE;\n}\n```\n\n---\n\n## Type Safety\n\n- **Strict TypeScript always**: `strict: true`, no `any` unless interfacing with untyped externals.\n- **Zod for runtime validation**: All external input (API params, CLI args, config files) validated with Zod schemas.\n- **Discriminated unions over type assertions**: Use `type Result = Success | Failure` not `as Success`.\n- **Exhaustive switches**: `never` default case for union exhaustiveness.\n- **No non-null assertions** (`!`): Proper narrowing or optional chaining.\n- **Readonly where possible**: `readonly` arrays and properties for data that shouldn't mutate.\n\n```typescript\n// GOOD — discriminated union with exhaustive handling\ntype Result = { ok: true; data: string } | { ok: false; error: Error };\n\nfunction handle(result: Result): string {\n switch (result.ok) {\n case true: return result.data;\n case false: throw result.error;\n default: return result satisfies never;\n }\n}\n```\n\n---\n\n## Error Handling\n\n- **Fail fast, fail loud**: Throw on invalid state. Don't silently return defaults.\n- **Specific error types**: `class NotFoundError extends Error` not generic `Error`.\n- **Error messages include context**: `Failed to load config from ${path}: ${e.message}`.\n- **Try-catch at boundaries only**: Don't wrap every function call. Catch at API/CLI/handler level.\n- **Never swallow errors**: No empty catch blocks. At minimum, log.\n- **Errors not control flow**: Don't use try-catch for expected conditions.\n\n---\n\n## Code Structure\n\n- **Guard clauses over nesting**: Early returns flatten logic.\n- **Max 2 nesting levels**: Deeper → extract function.\n- **Composition over inheritance**: Small functions composed together.\n- **Colocation**: Keep related code close. Tests next to source.\n- **Barrel exports sparingly**: Only for public API surfaces, not internal modules.\n- **No circular dependencies**: A imports B and B imports A → restructure.\n\n---\n\n## Async & Concurrency\n\n- **async/await over raw Promises**: Clearer control flow.\n- **`Promise.all` for independent work**: Don't await sequentially when tasks independent.\n- **`AbortController` for cancellation**: Wire timeouts and cancellation through `AbortSignal`.\n- **No fire-and-forget Promises**: Every Promise must be awaited or explicitly voided with comment.\n- **Backpressure awareness**: Streams and queues need bounded buffers.\n\n---\n\n## Performance Defaults\n\n- **Measure before optimizing**: No premature optimization. Profile first.\n- **O(n) fine**: Don't prematurely reach for hash maps on small collections.\n- **Lazy initialization**: Don't compute until needed.\n- **Stream large data**: Don't buffer entire files into memory.\n- **Cache at boundaries**: Cache external calls, not internal pure functions.\n\n---\n\n## Security Baseline\n\n- **Never interpolate user input into shell commands**: Use `execFile` with args array, never `exec` with string.\n- **Validate all external input**: Zod schemas at API/CLI boundary.\n- **No secrets in source**: Use environment variables or config files.\n- **Path traversal**: Resolve and validate file paths before I/O.\n- **Sanitize output**: Escape user content before rendering in HTML/terminal.\n\n---\n\n## Comments\n\n- **Delete obvious comments**: `// increment counter` above `counter++` = noise.\n- **Comment WHY, never WHAT**: Code says what. Comments explain non-obvious decisions.\n- **TODO format**: `// TODO(issue-id): description` — always link to tracking issue.\n- **No commented-out code**: Delete it. Git remembers.\n- **JSDoc for public APIs only**: Internal functions self-documenting.\n\n---\n\n## Testing Awareness\n\n- **Write testable code**: Pure functions, dependency injection, no hidden globals.\n- **Don't mock what you own**: Test real collaborators. Mock only at system boundaries.\n- **If asked to write tests**: Use project's test framework. Prefer integration over unit for I/O code.\n\n---\n\n## Anti-Patterns — NEVER Do These\n\n| ❌ Do NOT | ✅ Instead |\n|-----------|-----------|\n| Create `utils.ts` with one function | Put code where it's used |\n| Write factory for 2 object types | Direct construction |\n| Add helper for one-liner | Inline expression |\n| Create abstraction used once | Wait until third use |\n| Add error handling for impossible states | Trust type system |\n| Write `// returns the user` above `getUser()` | Delete comment |\n| Use `any` to fix type error | Fix actual type |\n| Nest callbacks 4 levels deep | async/await or extract |\n| Create `IUserService` for one implementation | Drop interface |\n| Add feature flags for unrequested features | YAGNI — delete it |\n| Return null when you mean \"not found\" | Throw or return Result type |\n| Create deep class hierarchies | Compose small functions |\n| Write God objects/functions | Split by responsibility |\n| Catch errors just to re-throw | Let them propagate |\n| Add logging to every function | Log decisions and errors only |\n\n---\n\n## Before Editing ANY File\n\n1. **What imports this file?** — Check dependents. They might break.\n2. **What does this file import?** — Interface changes cascade.\n3. **What tests cover this?** — Run them after changes.\n4. **Is this shared?** — Multiple callers = higher change cost.\n\nEdit file + ALL dependent files in same task. Never leave broken imports.\n\n---\n\n## Workflow\n\n1. Read task spec completely before writing code.\n2. Understand existing code structure before modifying.\n3. Make smallest change that satisfies spec.\n4. Run lint and typecheck (`tsc --noEmit`) after every meaningful change.\n5. Stage ALL changes including new files before the turn ends: `git add -A` — new untracked files are invisible to the reviewer without this.\n6. Do NOT run test suite (`npm test`, `vitest`, `bun test`). Tests = reviewer's and test-runner's responsibility. Focus on writing code.\n6. Spec ambiguous → state assumption and proceed.\n7. Run Self-Review checklist before returning final output.\n\n## Self-Review (MANDATORY before final output)\n\nBefore returning final response, perform strict self-review.\n\nValidate all:\n\n- **Completeness:** Every requested requirement implemented.\n- **Scope control:** No unrequested features, abstractions, or refactors added.\n- **Correctness:** Edge cases and failure paths handled where required by task.\n- **Code quality:** Naming clear, logic simple, no obvious code smells introduced.\n- **Safety of changes:** Imports/exports and dependent call sites remain valid.\n\nAny check fails → fix before responding.\nCannot complete confidently → explicitly mark result partial and explain why.",
|
|
33
33
|
"task_template": "$prompt\n\n$reused_worktree_awareness\n\n$pre_script_output\n\nWorking directory: $cwd\n\n## Required workflow:\n1. Use `gitnexus_query` to understand the relevant code area before reading files\n2. Use `gitnexus_impact` on every symbol you plan to modify — check blast radius\n3. Implement the changes\n4. Run `gitnexus_detect_changes()` before completing to verify scope\n",
|
|
34
34
|
"output_schema": {
|
|
35
35
|
"type": "object",
|
|
@@ -21,14 +21,14 @@
|
|
|
21
21
|
"stall_timeout_ms": 120000,
|
|
22
22
|
"response_format": "markdown",
|
|
23
23
|
"output_type": "review",
|
|
24
|
-
"permission_required": "
|
|
24
|
+
"permission_required": "MEDIUM",
|
|
25
25
|
"interactive": true,
|
|
26
26
|
"thinking_level": "low",
|
|
27
27
|
"max_retries": 0
|
|
28
28
|
},
|
|
29
29
|
"prompt": {
|
|
30
|
-
"system": "You = post-execution requirement compliance reviewer.\n\
|
|
31
|
-
"task_template": "Audit the completed specialist run for requirement compliance.\n\n$prompt\n\nWorking directory: $cwd\n\nResolved lineage input:\n- reviewed_job_id: $reviewed_job_id\n\nPreferred input:\n- reviewed_job_id: <job-id>\nOptional input:\n- reviewed_output: <inline output>\n- requirement_source: <explicit requirements>\n- originating_bead_id: <bead-id>\n- parent_job_id or lineage chain if available\n\nResolve lineage first, then evaluate compliance using the required output format.\n\nWhen reviewing code changes, use `gitnexus_impact` to verify the specialist checked blast radius before edits. Flag missing impact analysis as a compliance gap
|
|
30
|
+
"system": "You = post-execution requirement compliance reviewer AND adversarial code quality auditor.\n\nYou are a senior engineer in a bad mood. A junior developer wrote this code and you do NOT trust it. Your default assumption is that corners were cut, unnecessary code was added, conventions were ignored, and mistakes were made. Prove yourself wrong — with evidence. If you cannot, PARTIAL or FAIL.\n\nTwo-phase audit: (1) compliance check against bead requirements, (2) adversarial code quality review of every changed file.\n\nAfter delivering your verdict, enter waiting state. You may receive follow-up questions, re-review requests, or additional context. Stay alive until explicitly told you are done.\n\n## Source-of-truth priority\n\n1. Originating bead requirements (highest priority)\n2. Explicit requirement source in task prompt\n3. Fallback inferred requirements from reviewed output context\n\nAlways prefer bead requirements when reviewed run used `--bead`.\n\n## AUTHORITATIVE REVIEW CONTEXT\n\nWhen these fields are injected, treat them as primary truth for review setup and traceability:\n- `reviewed_job_id`\n- `reviewed_output`\n- `requirement_source`\n- `originating_bead_id`\n- `parent_job_id`\n- lineage chain / worktree chain fields\n- auto-injected git diff context\n\nEvidence precedence, highest to lowest:\n1. Injected lineage / reviewed result / diff context\n2. Repo state inside reviewed worktree\n3. Local artifact lookup (`.specialists/jobs`, job history files, filesystem traces)\n4. Heuristics or guesses\n\nDecision rules:\n- If injected lineage/result/diff exists, trust it over missing local artifacts.\n- Missing local artifacts MUST NOT trigger FAIL by itself.\n- FAIL only for direct contradiction, internal inconsistency, or missing required injected fields.\n- If injected context exists but local lookup fails, continue review and emit limitation note.\n- Required injected fields for authoritative traceability:\n - `reviewed_job_id` (required)\n - at least one evidence anchor: `reviewed_output` or auto-injected git diff context\n - at least one requirement anchor: `requirement_source` or `originating_bead_id` or `parent_job_id`/lineage chain\n- Compute `missing_required_injected_fields` from that required set before assigning FAIL for missing inputs.\n- If required injected fields are absent, FAIL is allowed.\n- If injected context contradicts reviewed output or diff, FAIL is allowed.\n- If local artifact lookup fails but injected context is consistent, keep reviewing.\n\nStructured evidence fields to report:\n- authoritative_lineage_present: yes|no\n- authoritative_result_present: yes|no\n- authoritative_diff_present: yes|no\n- local_lookup_status: success|partial|missing|not_attempted\n- contradiction_detected: yes|no\n- missing_required_injected_fields: list\n- limitation_note: short explanation when local lookup fails but injected context remains usable\n\n## Job linkage and evidence collection (required)\n\nGiven `reviewed_job_id`, resolve lineage and evidence in exact order:\n\n1) Prefer injected lineage/result/diff context if present\n - Use injected fields before any filesystem or job-history lookup\n\n2) Run `sp ps <reviewed_job_id>` only as supporting lookup\n - Capture metadata: `bead_id`, `status`, `worktree_path`, `specialist`, `model`\n - If unavailable or stale, do not fail solely for that\n\n3) Run `sp result <reviewed_job_id>` as primary reviewed output evidence source when injected result absent\n\n4) If `worktree_path` available, inspect actual code changes in that worktree\n - Run `git diff` (or `git diff -- <paths>`) to verify file-level changes when needed\n\n5) Requirement source binding result:\n - Bead resolved: run `bd show <bead_id> --json` to load requirements\n - Bead unresolved: inspect explicit prompt fields (`originating_bead_id`, `requirement_source`, `lineage`, `parent_job_id`)\n - `parent_job_id` exists: recurse using `sp ps`/`sp result` for parent jobs\n - Still unresolved: mark traceability missing, but do not FAIL if injected context already supplies sufficient evidence\n\n6) CLI-unavailable fallback ONLY:\n - Use file traversal under `.specialists/jobs/<reviewed_job_id>/status.json` and `events.jsonl`\n - Fallback mode; skip when injected context or `sp ps`/`sp result` work\n\nIMPORTANT: Always use `bd show <bead_id>` or `bd show <bead_id> --json` to read bead data. NEVER search for or read `.beads/issues.jsonl` directly \u2014 beads uses database backend, not flat files.\n\n## Requirement extraction\n\nFrom `bd show --json` output, extract requirements from:\n- `title`\n- `description`\n- `notes`\n- `design` (if present)\n\nNormalize into atomic checklist items before scoring.\n\n## Evidence rules\n\n- Concrete evidence order: injected reviewed result/diff/lineage, then `sp result <reviewed_job_id>`, then `git diff` in reviewed worktree, then explicitly provided output.\n- Local artifact lookup failure alone is not a failure condition.\n- Quote short excerpts for each met/unmet requirement.\n- Never assume completion without evidence.\n\n## Decision rubric\n\n- PASS: all critical requirements met; no major gaps.\n- PARTIAL: some requirements met, at least one meaningful gap remains.\n- FAIL: core requirements unmet, injected evidence contradicts itself or reviewed output, or required injected fields missing.\n- Local lookup failure with valid injected context => PARTIAL or PASS, never FAIL by itself.\n\n## Compliance score\n\n0-100 score:\n- Coverage component (0-70): proportion of requirements met.\n- Evidence quality (0-20): directness and specificity of proof.\n- Traceability integrity (0-10): confidence in job->requirement linkage.\n\n## Required output format\n\n## Compliance Verdict\n- Verdict: PASS | PARTIAL | FAIL\n- Score: <0-100>\n- Reviewed Job: <job-id>\n- Originating Bead: <bead-id or unresolved>\n- Requirement Source Used: bead | explicit_prompt | inferred\n\n## Evidence Summary\n- authoritative_lineage_present: yes|no\n- authoritative_result_present: yes|no\n- authoritative_diff_present: yes|no\n- local_lookup_status: success|partial|missing|not_attempted\n- contradiction_detected: yes|no\n- missing_required_injected_fields: []|[list]\n- limitation_note: <short note or none>\n\n## Requirement Coverage Matrix\nFor each requirement:\n- Requirement\n- Status: met | partial | unmet\n- Evidence\n- Gap\n\n## Coverage Gaps\n- Bullet list of missing or weakly evidenced requirements\n\n## Lineage / Traceability Notes\n- What files/fields used to resolve job -> requirement source\n- Any ambiguity or unresolved linkage\n\n## Recommended Next Actions\n- Concrete follow-ups to reach PASS",
|
|
31
|
+
"task_template": "Audit the completed specialist run for requirement compliance.\n\n$prompt\n\nWorking directory: $cwd\n\nResolved lineage input:\n- reviewed_job_id: $reviewed_job_id\n\nPreferred input:\n- reviewed_job_id: <job-id>\nOptional input:\n- reviewed_output: <inline output>\n- requirement_source: <explicit requirements>\n- originating_bead_id: <bead-id>\n- parent_job_id or lineage chain if available\n\nResolve lineage first, then evaluate compliance using the required output format.\n\nWhen reviewing code changes, use `gitnexus_impact` to verify the specialist checked blast radius before edits. Flag missing impact analysis as a compliance gap."
|
|
32
32
|
},
|
|
33
33
|
"skills": {
|
|
34
34
|
"paths": [
|
package/dist/index.js
CHANGED
|
@@ -17907,6 +17907,49 @@ function findTokenUsage(payload) {
|
|
|
17907
17907
|
}
|
|
17908
17908
|
return normalizeTokenUsage(record3);
|
|
17909
17909
|
}
|
|
17910
|
+
function findApiErrorMessage(payload) {
|
|
17911
|
+
if (!payload || typeof payload !== "object")
|
|
17912
|
+
return;
|
|
17913
|
+
const record3 = payload;
|
|
17914
|
+
const direct = [record3.errorMessage, record3.error_message, record3.error, record3.message].find((value) => typeof value === "string" && value.trim().length > 0);
|
|
17915
|
+
if (typeof direct === "string")
|
|
17916
|
+
return direct.trim();
|
|
17917
|
+
const nestedError = record3.error;
|
|
17918
|
+
if (nestedError && typeof nestedError === "object") {
|
|
17919
|
+
const nested = nestedError;
|
|
17920
|
+
const nestedMessage = [nested.message, nested.errorMessage, nested.error_message].find((value) => typeof value === "string" && value.trim().length > 0);
|
|
17921
|
+
if (typeof nestedMessage === "string")
|
|
17922
|
+
return nestedMessage.trim();
|
|
17923
|
+
}
|
|
17924
|
+
const message = record3.assistantMessageEvent;
|
|
17925
|
+
if (message && typeof message === "object") {
|
|
17926
|
+
const nested = message;
|
|
17927
|
+
const nestedMessage = [nested.errorMessage, nested.error_message, nested.error, nested.message].find((value) => typeof value === "string" && value.trim().length > 0);
|
|
17928
|
+
if (typeof nestedMessage === "string")
|
|
17929
|
+
return nestedMessage.trim();
|
|
17930
|
+
}
|
|
17931
|
+
return;
|
|
17932
|
+
}
|
|
17933
|
+
function extractApiErrorFromStderr(stderr) {
|
|
17934
|
+
const compact = stderr.trim();
|
|
17935
|
+
if (!compact)
|
|
17936
|
+
return;
|
|
17937
|
+
const patterns = [
|
|
17938
|
+
/You have hit your ChatGPT usage limit[^\n]*/i,
|
|
17939
|
+
/rate limit[^\n]*/i,
|
|
17940
|
+
/quota[^\n]*/i,
|
|
17941
|
+
/auth(?:entication)?[^\n]*/i,
|
|
17942
|
+
/unauthori[sz]ed[^\n]*/i,
|
|
17943
|
+
/forbidden[^\n]*/i,
|
|
17944
|
+
/overloaded[^\n]*/i
|
|
17945
|
+
];
|
|
17946
|
+
for (const pattern of patterns) {
|
|
17947
|
+
const match = compact.match(pattern);
|
|
17948
|
+
if (match)
|
|
17949
|
+
return match[0].trim();
|
|
17950
|
+
}
|
|
17951
|
+
return;
|
|
17952
|
+
}
|
|
17910
17953
|
function normalizeToolResultPart(contentPart) {
|
|
17911
17954
|
if (!contentPart || typeof contentPart !== "object")
|
|
17912
17955
|
return;
|
|
@@ -18049,6 +18092,7 @@ class PiAgentSession {
|
|
|
18049
18092
|
_pendingRequests = new Map;
|
|
18050
18093
|
_nextRequestId = 1;
|
|
18051
18094
|
_stderrBuffer = "";
|
|
18095
|
+
_apiError;
|
|
18052
18096
|
_stallTimer;
|
|
18053
18097
|
_stallError;
|
|
18054
18098
|
_testWindowToolCallIds = new Set;
|
|
@@ -18147,7 +18191,9 @@ class PiAgentSession {
|
|
|
18147
18191
|
donePromise.catch(() => {});
|
|
18148
18192
|
this._donePromise = donePromise;
|
|
18149
18193
|
this.proc.stderr?.on("data", (chunk) => {
|
|
18150
|
-
|
|
18194
|
+
const text = chunk.toString();
|
|
18195
|
+
this._stderrBuffer += text;
|
|
18196
|
+
this._apiError ??= extractApiErrorFromStderr(this._stderrBuffer) ?? extractApiErrorFromStderr(text);
|
|
18151
18197
|
});
|
|
18152
18198
|
this.proc.stdout?.on("data", (chunk) => {
|
|
18153
18199
|
this._lineBuffer += chunk.toString();
|
|
@@ -18308,6 +18354,12 @@ class PiAgentSession {
|
|
|
18308
18354
|
}
|
|
18309
18355
|
this._updateTokenUsage(findTokenUsage(event), "agent_end");
|
|
18310
18356
|
this._updateFinishReason(findFinishReason(event), "agent_end");
|
|
18357
|
+
const apiError = findApiErrorMessage(event) ?? this._apiError ?? extractApiErrorFromStderr(this._stderrBuffer);
|
|
18358
|
+
if (apiError) {
|
|
18359
|
+
this._apiError = apiError;
|
|
18360
|
+
this._metrics.api_error = apiError;
|
|
18361
|
+
this.options.onMetric?.({ type: "api_error", source: "stderr", errorMessage: apiError });
|
|
18362
|
+
}
|
|
18311
18363
|
this._agentEndReceived = true;
|
|
18312
18364
|
this._clearStallTimer();
|
|
18313
18365
|
this.options.onEvent?.("agent_end");
|
|
@@ -18434,6 +18486,16 @@ class PiAgentSession {
|
|
|
18434
18486
|
this.options.onEvent?.("message_done");
|
|
18435
18487
|
break;
|
|
18436
18488
|
}
|
|
18489
|
+
case "error": {
|
|
18490
|
+
const apiError = findApiErrorMessage(ae) ?? findApiErrorMessage(event);
|
|
18491
|
+
if (apiError) {
|
|
18492
|
+
this._apiError = apiError;
|
|
18493
|
+
this._metrics.api_error = apiError;
|
|
18494
|
+
this.options.onMetric?.({ type: "api_error", source: "rpc", errorMessage: apiError });
|
|
18495
|
+
}
|
|
18496
|
+
this.options.onEvent?.("message_error");
|
|
18497
|
+
break;
|
|
18498
|
+
}
|
|
18437
18499
|
}
|
|
18438
18500
|
}
|
|
18439
18501
|
}
|
|
@@ -20083,6 +20145,19 @@ class SqliteClient {
|
|
|
20083
20145
|
return this.db.query("SELECT chain_id, epic_id, chain_root_bead_id, chain_root_job_id, updated_at_ms FROM epic_chain_membership WHERE epic_id = ? ORDER BY updated_at_ms DESC").all(epicId);
|
|
20084
20146
|
}, "listEpicChains");
|
|
20085
20147
|
}
|
|
20148
|
+
deleteEpicChainMembership(epicId, chainIds) {
|
|
20149
|
+
if (chainIds.length === 0)
|
|
20150
|
+
return [];
|
|
20151
|
+
return withRetry(() => {
|
|
20152
|
+
const existing = new Set(this.db.query("SELECT chain_id FROM epic_chain_membership WHERE epic_id = ?").all(epicId).map((row) => row.chain_id));
|
|
20153
|
+
const removable = chainIds.filter((chainId) => existing.has(chainId));
|
|
20154
|
+
if (removable.length === 0)
|
|
20155
|
+
return [];
|
|
20156
|
+
const placeholders = removable.map(() => "?").join(", ");
|
|
20157
|
+
this.db.query(`DELETE FROM epic_chain_membership WHERE epic_id = ? AND chain_id IN (${placeholders})`).run(epicId, ...removable);
|
|
20158
|
+
return removable;
|
|
20159
|
+
}, "deleteEpicChainMembership");
|
|
20160
|
+
}
|
|
20086
20161
|
listEpicChainsWithLatestJob(epicId) {
|
|
20087
20162
|
return withRetry(() => {
|
|
20088
20163
|
const rows = this.db.query(`
|
|
@@ -20995,6 +21070,9 @@ function resolveOutputContractSchema(responseFormat, outputType, outputSchema) {
|
|
|
20995
21070
|
}
|
|
20996
21071
|
return mergedSchema;
|
|
20997
21072
|
}
|
|
21073
|
+
function shellQuote(value) {
|
|
21074
|
+
return `'${value.replace(/'/g, `'''`)}'`;
|
|
21075
|
+
}
|
|
20998
21076
|
function buildOutputContractInstruction(responseFormat, outputType, outputSchema) {
|
|
20999
21077
|
if (responseFormat === "text")
|
|
21000
21078
|
return "";
|
|
@@ -21019,6 +21097,58 @@ function buildOutputContractInstruction(responseFormat, outputType, outputSchema
|
|
|
21019
21097
|
${lines.join(`
|
|
21020
21098
|
`)}`;
|
|
21021
21099
|
}
|
|
21100
|
+
function buildReviewerDiffContext(cwd, maxFiles = 20) {
|
|
21101
|
+
const stat2 = execSync2("git diff --stat", {
|
|
21102
|
+
cwd,
|
|
21103
|
+
encoding: "utf8",
|
|
21104
|
+
timeout: 1e4,
|
|
21105
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
21106
|
+
}).trim();
|
|
21107
|
+
const files = execSync2("git diff --name-only", {
|
|
21108
|
+
cwd,
|
|
21109
|
+
encoding: "utf8",
|
|
21110
|
+
timeout: 1e4,
|
|
21111
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
21112
|
+
}).split(`
|
|
21113
|
+
`).map((line) => line.trim()).filter(Boolean).slice(0, maxFiles);
|
|
21114
|
+
if (files.length === 0) {
|
|
21115
|
+
throw new Error("Reviewer startup blocked: git diff is empty. No patch context to review.");
|
|
21116
|
+
}
|
|
21117
|
+
const hunks = files.map((file) => {
|
|
21118
|
+
const diff = execSync2(`git diff -- ${shellQuote(file)}`, {
|
|
21119
|
+
cwd,
|
|
21120
|
+
encoding: "utf8",
|
|
21121
|
+
timeout: 1e4,
|
|
21122
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
21123
|
+
}).trim();
|
|
21124
|
+
return diff ? `### ${file}
|
|
21125
|
+
${diff}` : `### ${file}
|
|
21126
|
+
(no hunks)`;
|
|
21127
|
+
}).join(`
|
|
21128
|
+
|
|
21129
|
+
`);
|
|
21130
|
+
return { stat: stat2, files, hunks };
|
|
21131
|
+
}
|
|
21132
|
+
function buildReviewerDiffInstruction(context) {
|
|
21133
|
+
return `
|
|
21134
|
+
|
|
21135
|
+
---
|
|
21136
|
+
## Reviewer Diff Context
|
|
21137
|
+
Review only patch below. Ignore unrelated files, repo-wide exploration, and filesystem hunting.
|
|
21138
|
+
If patch context is empty, stop and fail fast.
|
|
21139
|
+
|
|
21140
|
+
Diff stat:
|
|
21141
|
+
${context.stat || "(no stat)"}
|
|
21142
|
+
|
|
21143
|
+
Changed files:
|
|
21144
|
+
${context.files.map((file) => `- ${file}`).join(`
|
|
21145
|
+
`)}
|
|
21146
|
+
|
|
21147
|
+
Diff hunks:
|
|
21148
|
+
${context.hunks}
|
|
21149
|
+
---
|
|
21150
|
+
`;
|
|
21151
|
+
}
|
|
21022
21152
|
function tryParseJson(input) {
|
|
21023
21153
|
try {
|
|
21024
21154
|
return { value: JSON.parse(input) };
|
|
@@ -21340,6 +21470,10 @@ ${summaries.join(`
|
|
|
21340
21470
|
}
|
|
21341
21471
|
})
|
|
21342
21472
|
});
|
|
21473
|
+
if (metadata.name === "reviewer" && options.reusedFromJobId) {
|
|
21474
|
+
const reviewerDiffContext = buildReviewerDiffContext(runCwd);
|
|
21475
|
+
agentsMd += buildReviewerDiffInstruction(reviewerDiffContext);
|
|
21476
|
+
}
|
|
21343
21477
|
const responseFormat = execution.response_format ?? "text";
|
|
21344
21478
|
const outputType = execution.output_type ?? "custom";
|
|
21345
21479
|
const specialistOutputSchema = prompt.output_schema;
|
|
@@ -21939,6 +22073,13 @@ function mapCallbackEventToTimelineEvent(callbackEvent, context) {
|
|
|
21939
22073
|
...context.extensionError?.extension ? { extension: context.extensionError.extension } : {},
|
|
21940
22074
|
...context.extensionError?.errorMessage ? { error_message: context.extensionError.errorMessage } : {}
|
|
21941
22075
|
};
|
|
22076
|
+
case "api_error":
|
|
22077
|
+
return {
|
|
22078
|
+
t,
|
|
22079
|
+
type: TIMELINE_EVENT_TYPES.ERROR,
|
|
22080
|
+
source: context.apiError?.source ?? "rpc",
|
|
22081
|
+
error_message: context.apiError?.errorMessage ?? "Unknown API error"
|
|
22082
|
+
};
|
|
21942
22083
|
case "memory_injection":
|
|
21943
22084
|
return {
|
|
21944
22085
|
t,
|
|
@@ -22124,6 +22265,7 @@ var init_timeline_events = __esm(() => {
|
|
|
22124
22265
|
RETRY: "retry",
|
|
22125
22266
|
MODEL_CHANGE: "model_change",
|
|
22126
22267
|
EXTENSION_ERROR: "extension_error",
|
|
22268
|
+
ERROR: "error",
|
|
22127
22269
|
AUTO_COMMIT_SUCCESS: "auto_commit_success",
|
|
22128
22270
|
AUTO_COMMIT_SKIPPED: "auto_commit_skipped",
|
|
22129
22271
|
AUTO_COMMIT_FAILED: "auto_commit_failed",
|
|
@@ -23725,6 +23867,16 @@ ${appendError}
|
|
|
23725
23867
|
appendTimelineEvent(createFinishReasonEvent(metricEvent.finish_reason, metricEvent.source));
|
|
23726
23868
|
return;
|
|
23727
23869
|
}
|
|
23870
|
+
if (metricEvent.type === "api_error") {
|
|
23871
|
+
mergeRunMetrics({ api_error: metricEvent.errorMessage });
|
|
23872
|
+
appendTimelineEvent({
|
|
23873
|
+
t: Date.now(),
|
|
23874
|
+
type: TIMELINE_EVENT_TYPES.ERROR,
|
|
23875
|
+
source: metricEvent.source,
|
|
23876
|
+
error_message: metricEvent.errorMessage
|
|
23877
|
+
});
|
|
23878
|
+
return;
|
|
23879
|
+
}
|
|
23728
23880
|
if (metricEvent.type === "turn_summary") {
|
|
23729
23881
|
mergeRunMetrics({
|
|
23730
23882
|
turns: metricEvent.turn_index,
|
|
@@ -27579,6 +27731,9 @@ function formatEventLine(event, options) {
|
|
|
27579
27731
|
detailParts.push(`backend=${event.backend}`);
|
|
27580
27732
|
} else if (event.type === "tool") {
|
|
27581
27733
|
detail = formatToolDetail(event);
|
|
27734
|
+
} else if (event.type === "error") {
|
|
27735
|
+
detailParts.push(`source=${event.source}`);
|
|
27736
|
+
detailParts.push(`error=${event.error_message}`);
|
|
27582
27737
|
} else if (event.type === "run_complete") {
|
|
27583
27738
|
detailParts.push(`status=${event.status}`);
|
|
27584
27739
|
detailParts.push(`elapsed=${formatElapsed(event.elapsed_s)}`);
|
|
@@ -27668,6 +27823,8 @@ function formatEventInline(event) {
|
|
|
27668
27823
|
}
|
|
27669
27824
|
case "stale_warning":
|
|
27670
27825
|
return yellow10(`[warning] ${event.reason}: ${Math.round(event.silence_ms / 1000)}s silent`);
|
|
27826
|
+
case "error":
|
|
27827
|
+
return red2(`[error] ${event.source}: ${event.error_message}`);
|
|
27671
27828
|
default:
|
|
27672
27829
|
return null;
|
|
27673
27830
|
}
|
|
@@ -27701,7 +27858,7 @@ var init_format_helpers = __esm(() => {
|
|
|
27701
27858
|
turn_summary: "TURN+",
|
|
27702
27859
|
compaction: "CMPCT",
|
|
27703
27860
|
retry: "RETRY",
|
|
27704
|
-
error: "
|
|
27861
|
+
error: "ERROR"
|
|
27705
27862
|
};
|
|
27706
27863
|
});
|
|
27707
27864
|
|
|
@@ -28046,7 +28203,7 @@ function formatFooterModel(backend, model) {
|
|
|
28046
28203
|
return model;
|
|
28047
28204
|
return model.startsWith(`${backend}/`) ? model : `${backend}/${model}`;
|
|
28048
28205
|
}
|
|
28049
|
-
function
|
|
28206
|
+
function shellQuote2(value) {
|
|
28050
28207
|
return `'${value.replace(/'/g, `'\\''`)}'`;
|
|
28051
28208
|
}
|
|
28052
28209
|
function extractReviewedJobIdOverride(prompt) {
|
|
@@ -28098,7 +28255,7 @@ async function run13() {
|
|
|
28098
28255
|
})();
|
|
28099
28256
|
const cwd = process.cwd();
|
|
28100
28257
|
const innerArgs = process.argv.slice(2).filter((a) => a !== "--background");
|
|
28101
|
-
const cmd = `${process.execPath} ${process.argv[1]} ${innerArgs.map(
|
|
28258
|
+
const cmd = `${process.execPath} ${process.argv[1]} ${innerArgs.map(shellQuote2).join(" ")}`;
|
|
28102
28259
|
let childPid;
|
|
28103
28260
|
if (isTmuxAvailable()) {
|
|
28104
28261
|
const suffix = randomBytes(3).toString("hex");
|
|
@@ -31505,12 +31662,16 @@ function syncEpicState(sqlite, epicId, apply) {
|
|
|
31505
31662
|
stale_redirect_markers: epicRun && hasRedirectMarkers(epicRun.status_json) ? [epicId] : []
|
|
31506
31663
|
};
|
|
31507
31664
|
let deadJobsMarkedError = [];
|
|
31665
|
+
let staleChainRefsPruned = [];
|
|
31508
31666
|
let readinessResynced = false;
|
|
31509
31667
|
let redirectMarkersCleared = false;
|
|
31510
31668
|
if (apply) {
|
|
31511
31669
|
if (drift.dead_jobs_blocking_readiness.length > 0) {
|
|
31512
31670
|
deadJobsMarkedError = markDeadJobsAsError(sqlite, jobs);
|
|
31513
31671
|
}
|
|
31672
|
+
if (drift.stale_chain_refs.length > 0) {
|
|
31673
|
+
staleChainRefsPruned = sqlite.deleteEpicChainMembership(epicId, drift.stale_chain_refs);
|
|
31674
|
+
}
|
|
31514
31675
|
const readinessNext = loadEpicReadinessSummary(sqlite, epicId);
|
|
31515
31676
|
const synced = syncEpicStateFromReadiness(sqlite, readinessNext);
|
|
31516
31677
|
readinessResynced = synced.status !== readinessNext.persisted_state;
|
|
@@ -31533,6 +31694,7 @@ function syncEpicState(sqlite, epicId, apply) {
|
|
|
31533
31694
|
drift,
|
|
31534
31695
|
repairs: {
|
|
31535
31696
|
dead_jobs_marked_error: deadJobsMarkedError,
|
|
31697
|
+
stale_chain_refs_pruned: staleChainRefsPruned,
|
|
31536
31698
|
readiness_resynced: readinessResynced,
|
|
31537
31699
|
redirect_markers_cleared: redirectMarkersCleared
|
|
31538
31700
|
},
|
|
@@ -31732,7 +31894,7 @@ function parseSyncOptions(argv) {
|
|
|
31732
31894
|
return { epicId, apply, json };
|
|
31733
31895
|
}
|
|
31734
31896
|
function parseAbandonOptions(argv) {
|
|
31735
|
-
|
|
31897
|
+
let epicId = "";
|
|
31736
31898
|
let reason = "";
|
|
31737
31899
|
let force = false;
|
|
31738
31900
|
let json = false;
|
|
@@ -31755,9 +31917,16 @@ function parseAbandonOptions(argv) {
|
|
|
31755
31917
|
index += 1;
|
|
31756
31918
|
continue;
|
|
31757
31919
|
}
|
|
31758
|
-
if (argument.startsWith("-")
|
|
31920
|
+
if (argument.startsWith("-")) {
|
|
31759
31921
|
throw new Error(`Unknown option: ${argument}`);
|
|
31760
31922
|
}
|
|
31923
|
+
if (epicId.length > 0) {
|
|
31924
|
+
throw new Error("Only one epic ID is supported");
|
|
31925
|
+
}
|
|
31926
|
+
epicId = argument;
|
|
31927
|
+
}
|
|
31928
|
+
if (!epicId) {
|
|
31929
|
+
throw new Error("Missing epic ID");
|
|
31761
31930
|
}
|
|
31762
31931
|
if (reason.length === 0) {
|
|
31763
31932
|
throw new Error("Missing required --reason <text>");
|
|
@@ -32161,6 +32330,7 @@ async function handleEpicSyncCommand(argv) {
|
|
|
32161
32330
|
console.log(` stale_redirect_markers: ${result.drift.stale_redirect_markers.length}`);
|
|
32162
32331
|
if (result.apply) {
|
|
32163
32332
|
console.log(` repaired_dead_jobs: ${result.repairs.dead_jobs_marked_error.length}`);
|
|
32333
|
+
console.log(` stale_chain_refs_pruned: ${result.repairs.stale_chain_refs_pruned.length}`);
|
|
32164
32334
|
console.log(` readiness_resynced: ${result.repairs.readiness_resynced}`);
|
|
32165
32335
|
console.log(` redirect_markers_cleared: ${result.repairs.redirect_markers_cleared}`);
|
|
32166
32336
|
}
|
|
@@ -33709,6 +33879,10 @@ function deriveStartupSnapshot(status, events) {
|
|
|
33709
33879
|
merged.branch = status.branch;
|
|
33710
33880
|
return Object.keys(merged).length > 0 ? merged : null;
|
|
33711
33881
|
}
|
|
33882
|
+
function deriveApiError(events) {
|
|
33883
|
+
const errorEvent = [...events].reverse().find((event) => event.type === "error");
|
|
33884
|
+
return errorEvent?.error_message ?? null;
|
|
33885
|
+
}
|
|
33712
33886
|
function formatStartupSnapshot(snapshot) {
|
|
33713
33887
|
if (!snapshot)
|
|
33714
33888
|
return null;
|
|
@@ -33829,20 +34003,25 @@ async function run16() {
|
|
|
33829
34003
|
process.exit(1);
|
|
33830
34004
|
}
|
|
33831
34005
|
if (status2.status === "done") {
|
|
33832
|
-
const
|
|
34006
|
+
const events2 = readTimelineEventsForResult(sqliteClient, jobsDir, jobId);
|
|
34007
|
+
const startupContext2 = deriveStartupSnapshot(status2, events2);
|
|
34008
|
+
const apiError2 = status2.error ?? deriveApiError(events2);
|
|
33833
34009
|
const output3 = readResultOutput();
|
|
33834
34010
|
if (!output3) {
|
|
34011
|
+
const message = apiError2 ? `Job ${jobId} failed: ${apiError2}` : `Result not found for job ${jobId}`;
|
|
33835
34012
|
if (args.json) {
|
|
33836
|
-
emitJson(status2, null,
|
|
34013
|
+
emitJson(status2, null, message, startupContext2);
|
|
33837
34014
|
} else {
|
|
33838
|
-
|
|
34015
|
+
process.stderr.write(`${red3(message)}
|
|
34016
|
+
`);
|
|
33839
34017
|
}
|
|
33840
34018
|
process.exit(1);
|
|
33841
34019
|
}
|
|
34020
|
+
const enrichedStatus2 = apiError2 && !status2.error ? { ...status2, error: apiError2 } : status2;
|
|
33842
34021
|
if (args.json) {
|
|
33843
|
-
emitJson(
|
|
34022
|
+
emitJson(enrichedStatus2, output3, null, startupContext2);
|
|
33844
34023
|
} else {
|
|
33845
|
-
emitHumanResult(output3,
|
|
34024
|
+
emitHumanResult(output3, enrichedStatus2, startupContext2);
|
|
33846
34025
|
}
|
|
33847
34026
|
return;
|
|
33848
34027
|
}
|
|
@@ -33929,31 +34108,37 @@ async function run16() {
|
|
|
33929
34108
|
return;
|
|
33930
34109
|
}
|
|
33931
34110
|
if (status.status === "error") {
|
|
33932
|
-
const
|
|
33933
|
-
const
|
|
34111
|
+
const events2 = readTimelineEventsForResult(sqliteClient, jobsDir, jobId);
|
|
34112
|
+
const startupContext2 = deriveStartupSnapshot(status, events2);
|
|
34113
|
+
const message = `Job ${jobId} failed: ${status.error ?? deriveApiError(events2) ?? "unknown error"}`;
|
|
33934
34114
|
if (args.json) {
|
|
33935
34115
|
emitJson(status, null, message, startupContext2);
|
|
33936
34116
|
} else {
|
|
33937
|
-
process.stderr.write(`${red3(`Job ${jobId} failed:`)} ${status.error ?? "unknown error"}
|
|
34117
|
+
process.stderr.write(`${red3(`Job ${jobId} failed:`)} ${status.error ?? deriveApiError(events2) ?? "unknown error"}
|
|
33938
34118
|
`);
|
|
33939
34119
|
}
|
|
33940
34120
|
process.exit(1);
|
|
33941
34121
|
}
|
|
34122
|
+
const events = readTimelineEventsForResult(sqliteClient, jobsDir, jobId);
|
|
34123
|
+
const apiError = status.error ?? deriveApiError(events);
|
|
33942
34124
|
const output2 = readResultOutput();
|
|
33943
34125
|
if (!output2) {
|
|
34126
|
+
const message = apiError ? `Job ${jobId} failed: ${apiError}` : `Result not found for job ${jobId}`;
|
|
33944
34127
|
if (args.json) {
|
|
33945
|
-
emitJson(status, null,
|
|
34128
|
+
emitJson(status, null, message);
|
|
33946
34129
|
} else {
|
|
33947
|
-
|
|
34130
|
+
process.stderr.write(`${red3(message)}
|
|
34131
|
+
`);
|
|
33948
34132
|
}
|
|
33949
34133
|
process.exit(1);
|
|
33950
34134
|
}
|
|
33951
|
-
const startupContext = deriveStartupSnapshot(status,
|
|
34135
|
+
const startupContext = deriveStartupSnapshot(status, events);
|
|
34136
|
+
const enrichedStatus = apiError && !status.error ? { ...status, error: apiError } : status;
|
|
33952
34137
|
if (args.json) {
|
|
33953
|
-
emitJson(
|
|
34138
|
+
emitJson(enrichedStatus, output2, null, startupContext);
|
|
33954
34139
|
return;
|
|
33955
34140
|
}
|
|
33956
|
-
emitHumanResult(output2,
|
|
34141
|
+
emitHumanResult(output2, enrichedStatus, startupContext);
|
|
33957
34142
|
} catch (error2) {
|
|
33958
34143
|
const message = error2 instanceof Error ? error2.message : String(error2);
|
|
33959
34144
|
if (args.json) {
|
|
@@ -34150,6 +34335,8 @@ function getHumanEventKey(event) {
|
|
|
34150
34335
|
return `run_start:${event.specialist}:${event.bead_id ?? ""}`;
|
|
34151
34336
|
case "run_complete":
|
|
34152
34337
|
return `run_complete:${event.status}:${event.error ?? ""}`;
|
|
34338
|
+
case "error":
|
|
34339
|
+
return `error:${event.source}:${event.error_message}`;
|
|
34153
34340
|
case "token_usage":
|
|
34154
34341
|
return `token_usage:${event.token_usage.total_tokens ?? ""}:${event.source}`;
|
|
34155
34342
|
case "finish_reason":
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@jaggerxtrm/specialists",
|
|
3
|
-
"version": "3.6.
|
|
3
|
+
"version": "3.6.13",
|
|
4
4
|
"description": "OmniSpecialist — 7-tool MCP orchestration layer powered by the Specialist System. Discover and execute .specialist.yaml files across project/user/system scopes via pi.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -24,7 +24,8 @@
|
|
|
24
24
|
"test:bun": "bun test tests/unit/specialist/observability-sqlite.test.ts tests/unit/specialist/observability-db.test.ts tests/unit/cli/db.test.ts",
|
|
25
25
|
"test:watch": "bun --bun vitest",
|
|
26
26
|
"test:coverage": "bun --bun vitest run --coverage",
|
|
27
|
-
"test:supervisor": "bun --bun vitest run tests/unit/specialist/supervisor.test.ts --no-file-parallelism"
|
|
27
|
+
"test:supervisor": "bun --bun vitest run tests/unit/specialist/supervisor.test.ts --no-file-parallelism",
|
|
28
|
+
"benchmark:executor": "node scripts/run-executor-benchmark.mjs"
|
|
28
29
|
},
|
|
29
30
|
"keywords": [
|
|
30
31
|
"omnispecialist",
|