xtrm-tools 0.7.12 → 0.7.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.xtrm/config/hooks.json +10 -0
- package/.xtrm/hooks/specialists/specialists-memory-cache-sync.mjs +57 -0
- package/.xtrm/hooks/specialists-agent-guard.mjs +76 -0
- package/.xtrm/registry.json +509 -393
- package/.xtrm/skills/default/premortem/SKILL.md +218 -0
- package/.xtrm/skills/default/releasing/SKILL.md +94 -0
- package/.xtrm/skills/default/releasing/scripts/xt-reports.ts +18 -0
- package/.xtrm/skills/default/session-close-report/SKILL.md +85 -17
- package/.xtrm/skills/default/specialists-creator/SKILL.md +117 -42
- package/.xtrm/skills/default/specialists-creator/scripts/audit-spec-uniformity.mjs +86 -0
- package/.xtrm/skills/default/specialists-creator/scripts/scaffold-specialist.ts +223 -0
- package/.xtrm/skills/default/specialists-creator/scripts/validate-specialist.ts +1 -1
- package/.xtrm/skills/default/sync-docs/SKILL.md +88 -208
- package/.xtrm/skills/default/sync-docs/scripts/pre-context.sh +17 -0
- package/.xtrm/skills/default/update-specialists/SKILL.md +99 -201
- package/.xtrm/skills/default/update-xt/SKILL.md +34 -0
- package/.xtrm/skills/default/using-kpi/SKILL.md +150 -0
- package/.xtrm/skills/default/using-nodes/SKILL.md +18 -102
- package/.xtrm/skills/default/using-script-specialists/SKILL.md +208 -0
- package/.xtrm/skills/default/using-specialists/SKILL.md +13 -0
- package/.xtrm/skills/default/using-specialists-v2/SKILL.md +773 -0
- package/.xtrm/skills/default/using-specialists-v3/SKILL.md +284 -0
- package/.xtrm/skills/default/using-specialists-v3/evals/evals.json +89 -0
- package/CHANGELOG.md +17 -0
- package/README.md +5 -1
- package/cli/dist/index.cjs +3401 -627
- package/cli/dist/index.cjs.map +1 -1
- package/cli/package.json +1 -1
- package/package.json +3 -2
- package/packages/pi-extensions/.serena/project.yml +130 -0
- package/packages/pi-extensions/extensions/pi-serena-compact/index.ts +4 -12
- package/packages/pi-extensions/extensions/xtrm-loader/index.ts +0 -1
- package/packages/pi-extensions/extensions/xtrm-ui/index.ts +201 -36
- package/packages/pi-extensions/extensions/xtrm-ui/themes/pidex-dark-flattools.json +79 -0
- package/packages/pi-extensions/extensions/xtrm-ui/themes/pidex-dark.json +85 -0
- package/packages/pi-extensions/extensions/xtrm-ui/themes/pidex-light-flattools.json +79 -0
- package/packages/pi-extensions/extensions/xtrm-ui/themes/pidex-light.json +85 -0
- package/packages/pi-extensions/package.json +1 -1
- package/packages/pi-extensions/themes/xtrm-ui/pidex-dark-flattools.json +79 -0
- package/packages/pi-extensions/themes/xtrm-ui/pidex-dark.json +3 -3
- package/packages/pi-extensions/themes/xtrm-ui/pidex-light-flattools.json +79 -0
- package/scripts/patch-external-pi-tools.mjs +154 -0
|
@@ -1,256 +1,154 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: update-specialists
|
|
3
3
|
description: >
|
|
4
|
-
Reconcile
|
|
5
|
-
Use this skill when
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
version: 1.0
|
|
10
|
-
synced_at: 00000000
|
|
4
|
+
Reconcile all xtrm-managed asset drift across repos.
|
|
5
|
+
Use this skill when user says "update specialists", "xtrm drift", "assets out of date",
|
|
6
|
+
or when operator needs guided refresh across one repo or many.
|
|
7
|
+
version: 2.0
|
|
8
|
+
synced_at: 2026-05-05
|
|
11
9
|
---
|
|
12
10
|
|
|
13
11
|
# update-specialists
|
|
14
12
|
|
|
15
|
-
|
|
16
|
-
fixes, then verify with `sp doctor`.
|
|
13
|
+
Interactive wrapper over `xt update` for xtrm-managed asset drift.
|
|
17
14
|
|
|
18
|
-
|
|
15
|
+
Canonical-live model:
|
|
16
|
+
- **Category A**: specialist runtime / loader-live surfaces. No refresh needed; verify only.
|
|
17
|
+
- **Category B**: xtrm-managed snapshots under repos (`.xtrm/skills/default/`, `.xtrm/hooks/default/`, and related managed assets). These can drift and need operator-confirmed refresh.
|
|
19
18
|
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
Skill goal:
|
|
20
|
+
1. find projects root,
|
|
21
|
+
2. inspect drift,
|
|
22
|
+
3. summarize per-repo state,
|
|
23
|
+
4. ask operator which repos to refresh,
|
|
24
|
+
5. run `xt update --apply`,
|
|
25
|
+
6. re-check,
|
|
26
|
+
7. report final state.
|
|
22
27
|
|
|
23
|
-
|
|
28
|
+
No automatic execution. Always operator-confirmed.
|
|
24
29
|
|
|
25
|
-
|
|
26
|
-
|-------|----------------|
|
|
27
|
-
| `.specialists/default/*.specialist.json` | JSON-first specialist configs present |
|
|
28
|
-
| `metadata.name` | Matches filename stem |
|
|
29
|
-
| `metadata.version` | Valid semver string |
|
|
30
|
-
| `metadata.description` | Present |
|
|
31
|
-
| `metadata.category` | Present |
|
|
32
|
-
| `execution.model` | Present and pingable |
|
|
33
|
-
| `execution.fallback_model` | Present, different provider from primary |
|
|
34
|
-
| `execution.permission_required` | Valid enum |
|
|
35
|
-
| `execution.extensions.serena` | Present when skill needs opt-out or default true |
|
|
36
|
-
| `execution.extensions.gitnexus` | Present when skill needs opt-out or default true |
|
|
37
|
-
| `execution.interactive` | Matches intended keep-alive behavior |
|
|
30
|
+
## Operator Flow
|
|
38
31
|
|
|
39
|
-
###
|
|
32
|
+
### 1) Discover projects root
|
|
40
33
|
|
|
41
|
-
|
|
42
|
-
|-------|----------------|
|
|
43
|
-
| `.claude/settings.json` | Has hook entries for active events |
|
|
44
|
-
| Hook events | At minimum: `SessionStart`, `PreToolUse`, `PostToolUse`, `Stop` |
|
|
45
|
-
| Hook paths | Point at specialists runtime hook scripts, not stale xtrm-only paths |
|
|
46
|
-
| Hook format | Matches project's installed settings format and loads cleanly |
|
|
34
|
+
Ask for root if user did not name one.
|
|
47
35
|
|
|
48
|
-
|
|
36
|
+
Default order:
|
|
37
|
+
1. explicit user root,
|
|
38
|
+
2. `~/dev`,
|
|
39
|
+
3. git-discovered repo root / workspace root,
|
|
40
|
+
4. current directory as last fallback.
|
|
49
41
|
|
|
50
|
-
|
|
51
|
-
|-------|----------------|
|
|
52
|
-
| `sp` command | On PATH and runs |
|
|
53
|
-
| `specialists` command | On PATH and runs |
|
|
54
|
-
| Version compatibility | `sp doctor` reports matching runtime / install state |
|
|
55
|
-
| Command surface | `sp doctor`, `sp init`, `sp clean`, `sp status` available |
|
|
42
|
+
If multiple candidate roots exist, ask which one to use.
|
|
56
43
|
|
|
57
|
-
###
|
|
44
|
+
### 2) Run doctor
|
|
58
45
|
|
|
59
|
-
|
|
60
|
-
|-------|----------------|
|
|
61
|
-
| `.specialists/jobs/` | Exists |
|
|
62
|
-
| `.specialists/ready/` | Exists if used by runtime |
|
|
63
|
-
| `.specialists/default/` | Canonical install copy present |
|
|
64
|
-
| Orphaned worktrees | None under `.worktrees/` |
|
|
65
|
-
| Worktree ownership | No stale entries for deleted jobs |
|
|
66
|
-
|
|
67
|
-
### SQLite / observability
|
|
68
|
-
|
|
69
|
-
| Check | Expected value |
|
|
70
|
-
|-------|----------------|
|
|
71
|
-
| specialists DB | Opens cleanly |
|
|
72
|
-
| Schema version | Matches runtime expectation |
|
|
73
|
-
| WAL / busy timeout settings | Present when runtime uses SQLite |
|
|
74
|
-
| Corruption / lock errors | None in `sp doctor` |
|
|
75
|
-
|
|
76
|
-
### Pi extensions
|
|
77
|
-
|
|
78
|
-
| Check | Expected value |
|
|
79
|
-
|-------|----------------|
|
|
80
|
-
| `quality-gates` | Registered if project uses quality gates |
|
|
81
|
-
| `pi-gitnexus` | Registered when GitNexus integration is expected |
|
|
82
|
-
| `pi-serena-tools` | Registered when Serena integration is expected |
|
|
83
|
-
| Extension paths | Resolve from installed project, not stale workspace copies |
|
|
84
|
-
|
|
85
|
-
## Detection
|
|
86
|
-
|
|
87
|
-
Run these in order. Report which checks pass and which drift.
|
|
46
|
+
Use:
|
|
88
47
|
|
|
89
48
|
```bash
|
|
90
|
-
|
|
91
|
-
sp doctor
|
|
92
|
-
|
|
93
|
-
# 2. Runtime status
|
|
94
|
-
sp status
|
|
95
|
-
|
|
96
|
-
# 3. Config shape
|
|
97
|
-
find .specialists/default -maxdepth 1 -name '*.specialist.json' -print
|
|
98
|
-
|
|
99
|
-
# 4. Validate specialist JSON files
|
|
100
|
-
node -e "const fs=require('fs'); const path=require('path'); const dir='.specialists/default'; for (const file of fs.readdirSync(dir)) { if (!file.endsWith('.specialist.json')) continue; const data=JSON.parse(fs.readFileSync(path.join(dir,file),'utf8')); const s=data.specialist||data; const m=s.metadata||{}; const e=s.execution||{}; const missing=[]; for (const key of ['name','version','description','category']) if (!m[key]) missing.push('metadata.'+key); for (const key of ['model','fallback_model','permission_required']) if (!e[key]) missing.push('execution.'+key); if (missing.length) console.log(file+': MISSING '+missing.join(', ')); if (m.name && m.name !== file.replace(/\.specialist\.json$/, '')) console.log(file+': NAME MISMATCH '+m.name); }"
|
|
101
|
-
|
|
102
|
-
# 5. Hooks wiring
|
|
103
|
-
node -e "const fs=require('fs'); const p='.claude/settings.json'; if (fs.existsSync(p)) { const s=JSON.parse(fs.readFileSync(p,'utf8')); console.log(JSON.stringify(s.hooks ?? s, null, 2)); } else { console.log('MISSING .claude/settings.json'); }"
|
|
104
|
-
|
|
105
|
-
# 6. Command availability
|
|
106
|
-
command -v sp
|
|
107
|
-
command -v specialists
|
|
108
|
-
sp doctor --json 2>/dev/null || true
|
|
109
|
-
|
|
110
|
-
# 7. Jobs and worktrees
|
|
111
|
-
ls -1 .specialists/jobs 2>/dev/null || true
|
|
112
|
-
find .worktrees -maxdepth 2 -mindepth 1 -type d 2>/dev/null || true
|
|
113
|
-
|
|
114
|
-
# 8. Extension registration
|
|
115
|
-
node -e "const fs=require('fs'); const p='.pi/settings.json'; if (fs.existsSync(p)) console.log(JSON.stringify(JSON.parse(fs.readFileSync(p,'utf8')).skills ?? JSON.parse(fs.readFileSync(p,'utf8')).extensions ?? {}, null, 2)); else console.log('MISSING .pi/settings.json')"
|
|
49
|
+
xt doctor --cwd <root> --json
|
|
116
50
|
```
|
|
117
51
|
|
|
118
|
-
|
|
52
|
+
If `xt` is unavailable, stop and switch to fallback guidance below.
|
|
119
53
|
|
|
120
|
-
|
|
54
|
+
### 3) Summarize drift
|
|
121
55
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
| Job dir missing | `sp init -y` |
|
|
129
|
-
| Orphaned `.worktrees/` entries | `specialists clean` |
|
|
130
|
-
| SQLite schema/version mismatch | `sp doctor` first, then `sp init --sync-skills` or runtime migration command |
|
|
131
|
-
| Pi extensions missing | `sp init --sync-skills` or reinstall extension registration |
|
|
132
|
-
| Hook config format stale | `sp init -y` |
|
|
133
|
-
| Unknown manual drift | Stop, inspect, then apply user-approved fix |
|
|
56
|
+
Render clean table grouped by repo:
|
|
57
|
+
- repo path
|
|
58
|
+
- status
|
|
59
|
+
- drift count
|
|
60
|
+
- missing / extra / mismatched assets
|
|
61
|
+
- suggested action
|
|
134
62
|
|
|
135
|
-
|
|
63
|
+
Keep focus on operator action, not internal diagnostics.
|
|
136
64
|
|
|
137
|
-
###
|
|
65
|
+
### 4) Ask for confirm
|
|
138
66
|
|
|
139
|
-
|
|
140
|
-
|
|
67
|
+
Offer three paths:
|
|
68
|
+
- refresh all repos,
|
|
69
|
+
- refresh specific repos,
|
|
70
|
+
- dry-run only.
|
|
141
71
|
|
|
142
|
-
|
|
143
|
-
sp init --sync-skills
|
|
144
|
-
```
|
|
72
|
+
If user names one repo, keep flow narrow and confirm only that repo.
|
|
145
73
|
|
|
146
|
-
|
|
147
|
-
full sync.
|
|
74
|
+
### 5) Apply refresh
|
|
148
75
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
If hooks are missing, wrong events, or stale script paths:
|
|
76
|
+
Use:
|
|
152
77
|
|
|
153
78
|
```bash
|
|
154
|
-
|
|
79
|
+
xt update --apply --root <root>
|
|
155
80
|
```
|
|
156
81
|
|
|
157
|
-
|
|
158
|
-
when hook-only sync is not enough.
|
|
159
|
-
|
|
160
|
-
### Fix: CLI not reachable
|
|
161
|
-
|
|
162
|
-
If `sp` or `specialists` is missing or incompatible:
|
|
82
|
+
Or for one repo:
|
|
163
83
|
|
|
164
84
|
```bash
|
|
165
|
-
|
|
85
|
+
xt update --apply --repo <repo>
|
|
166
86
|
```
|
|
167
87
|
|
|
168
|
-
|
|
169
|
-
Do not guess at file edits when command surface itself is broken.
|
|
88
|
+
For dry-run, omit `--apply`.
|
|
170
89
|
|
|
171
|
-
###
|
|
90
|
+
### 6) Re-run doctor
|
|
172
91
|
|
|
173
|
-
|
|
92
|
+
Run same doctor command again after update and confirm clean state.
|
|
174
93
|
|
|
175
|
-
|
|
176
|
-
specialists clean
|
|
177
|
-
```
|
|
94
|
+
### 7) Final report
|
|
178
95
|
|
|
179
|
-
|
|
96
|
+
State:
|
|
97
|
+
- what drift existed,
|
|
98
|
+
- what refreshed,
|
|
99
|
+
- what stayed untouched,
|
|
100
|
+
- any residual manual fixes.
|
|
180
101
|
|
|
181
|
-
|
|
102
|
+
## Fallback When xt Missing
|
|
182
103
|
|
|
183
|
-
If
|
|
104
|
+
If `xt` / `xtrm` not installed or doctor/update help unavailable:
|
|
105
|
+
- do not block user,
|
|
106
|
+
- switch to per-repo guidance,
|
|
107
|
+
- tell user to run repo-local checks manually,
|
|
108
|
+
- do not invent bulk repair commands.
|
|
184
109
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
110
|
+
Fallback response shape:
|
|
111
|
+
- identify likely drifted repos,
|
|
112
|
+
- point user at repo-local `sp doctor` / package-specific checks already available in that repo,
|
|
113
|
+
- say bulk refresh needs `xt` installed.
|
|
188
114
|
|
|
189
|
-
|
|
115
|
+
## Drift Review Rules
|
|
190
116
|
|
|
191
|
-
|
|
117
|
+
- Treat repo-custom overlays as intentional unless doctor marks them mismatched against managed snapshot.
|
|
118
|
+
- Do not overwrite user-owned layers.
|
|
119
|
+
- Prefer dry-run first when drift touches multiple repos.
|
|
120
|
+
- If only one repo needs refresh, keep output narrow and use single-repo update path.
|
|
121
|
+
- If doctor shows mixed drift across 3 repos, summarize each repo separately and ask which to refresh.
|
|
192
122
|
|
|
193
|
-
|
|
194
|
-
sp init --sync-skills
|
|
195
|
-
```
|
|
123
|
+
## Output Shape
|
|
196
124
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
125
|
+
Use this order:
|
|
126
|
+
1. root chosen
|
|
127
|
+
2. doctor summary
|
|
128
|
+
3. drift table
|
|
129
|
+
4. confirm prompt
|
|
130
|
+
5. update action
|
|
131
|
+
6. post-update doctor result
|
|
132
|
+
7. final status
|
|
201
133
|
|
|
202
|
-
|
|
134
|
+
## Example Operator Loop
|
|
203
135
|
|
|
204
|
-
```
|
|
205
|
-
|
|
206
|
-
|
|
136
|
+
```text
|
|
137
|
+
Root: ~/dev
|
|
138
|
+
Doctor: 3 repos checked
|
|
207
139
|
|
|
208
|
-
|
|
209
|
-
|
|
140
|
+
repo status drift
|
|
141
|
+
repo-a drifted 4 assets
|
|
142
|
+
repo-b in-sync 0 assets
|
|
143
|
+
repo-c drifted 1 asset
|
|
210
144
|
|
|
211
|
-
|
|
145
|
+
Refresh all / specific repos / dry-run?
|
|
212
146
|
```
|
|
213
147
|
|
|
214
|
-
|
|
215
|
-
- `sp doctor` clean
|
|
216
|
-
- `sp status` no drift / no repair hints
|
|
217
|
-
- `sp` and `specialists` reachable
|
|
218
|
-
- specialist JSON files valid
|
|
219
|
-
- hooks present on required events
|
|
220
|
-
- no orphaned worktrees
|
|
221
|
-
- SQLite state healthy
|
|
222
|
-
|
|
223
|
-
## Manual Intervention
|
|
224
|
-
|
|
225
|
-
Flag these when automatic fix is unsafe or impossible:
|
|
226
|
-
|
|
227
|
-
- `sp doctor` reports corrupt DB / unreadable SQLite file
|
|
228
|
-
- command surface missing because install itself is broken
|
|
229
|
-
- hook scripts absent from repo and cannot be regenerated
|
|
230
|
-
- schema mismatch with no available migration path
|
|
231
|
-
- worktree cleanup would remove user changes
|
|
232
|
-
- extensions required by project are not installed at package level
|
|
233
|
-
|
|
234
|
-
When manual intervention needed, report:
|
|
235
|
-
1. exact drift
|
|
236
|
-
2. exact command tried
|
|
237
|
-
3. why auto-fix stopped
|
|
238
|
-
4. next safe operator action
|
|
239
|
-
|
|
240
|
-
## User Summary Format
|
|
241
|
-
|
|
242
|
-
After detection + remediation, answer with compact status:
|
|
243
|
-
|
|
244
|
-
```text
|
|
245
|
-
## specialists update complete
|
|
246
|
-
|
|
247
|
-
✓ sp doctor clean
|
|
248
|
-
✓ specialist configs valid
|
|
249
|
-
✓ hooks wired
|
|
250
|
-
✓ CLI reachable
|
|
251
|
-
✓ jobs/worktrees clean
|
|
252
|
-
✓ SQLite healthy
|
|
253
|
-
✓ extensions registered
|
|
148
|
+
## Verification
|
|
254
149
|
|
|
255
|
-
|
|
256
|
-
|
|
150
|
+
After refresh:
|
|
151
|
+
- `xt doctor --cwd <root> --json` clean or reduced to intentional custom drift,
|
|
152
|
+
- repo-specific follow-up actions called out only when needed,
|
|
153
|
+
- single-repo case stays single-repo,
|
|
154
|
+
- missing `xt` path falls back cleanly.
|
|
@@ -74,6 +74,31 @@ node -e "const s=require('./.pi/settings.json'); console.log(s.skills)" 2>/dev/n
|
|
|
74
74
|
for f in .xtrm/skills/active/*; do [ -L "$f" ] || echo "NOT A SYMLINK: $f"; done
|
|
75
75
|
```
|
|
76
76
|
|
|
77
|
+
## Implementation Self-Check
|
|
78
|
+
|
|
79
|
+
Do not trust the surface commands alone. Before claiming that `xt init` handles
|
|
80
|
+
drift correctly, verify the underlying implementation behavior in the CLI source.
|
|
81
|
+
|
|
82
|
+
Required checks:
|
|
83
|
+
|
|
84
|
+
| File | What to verify |
|
|
85
|
+
|------|----------------|
|
|
86
|
+
| `cli/src/core/drift.ts` | Drift is classified by comparing installed user file hashes against registry hashes from the package payload |
|
|
87
|
+
| `cli/src/core/registry-scaffold.ts` | Drifted files are reported and skipped by default unless `force` is enabled |
|
|
88
|
+
| `cli/src/commands/init.ts` | `xt init` calls the registry install step with `force: false` |
|
|
89
|
+
|
|
90
|
+
What you must confirm from code before reporting success:
|
|
91
|
+
|
|
92
|
+
- `xt init` does check for local drift between the user's `.xtrm` files and the
|
|
93
|
+
package payload that bootstrapped them.
|
|
94
|
+
- That check is hash-based for registry-managed `.xtrm` files, not just a loose
|
|
95
|
+
status heuristic.
|
|
96
|
+
- `xt init -y` is non-destructive for drifted `.xtrm` files by default. It
|
|
97
|
+
preserves local edits unless a separate force path is used.
|
|
98
|
+
|
|
99
|
+
If the implementation no longer matches those rules, stop and report the mismatch
|
|
100
|
+
instead of repeating this skill's older assumptions.
|
|
101
|
+
|
|
77
102
|
## Remediation
|
|
78
103
|
|
|
79
104
|
Two commands cover almost all drift. Know which fixes what:
|
|
@@ -86,6 +111,8 @@ Two commands cover almost all drift. Know which fixes what:
|
|
|
86
111
|
### Fix: Skills symlink stale or active/ view wrong
|
|
87
112
|
|
|
88
113
|
`xt claude install` does NOT rebuild skills. Only `xt init` does (Phase 6b).
|
|
114
|
+
`xt init -y` will repair missing/outdated registry-managed files, but it will
|
|
115
|
+
preserve locally drifted `.xtrm` files by default.
|
|
89
116
|
|
|
90
117
|
```bash
|
|
91
118
|
xt init -y
|
|
@@ -159,6 +186,13 @@ node -e "const s=require('./.pi/settings.json'); console.log(s.skills.includes('
|
|
|
159
186
|
# Must output: true
|
|
160
187
|
```
|
|
161
188
|
|
|
189
|
+
Also restate the implementation-level conclusion in your report:
|
|
190
|
+
|
|
191
|
+
- `xt init` verified drift against package registry hashes
|
|
192
|
+
- local drifted `.xtrm` files were preserved by default
|
|
193
|
+
- no forced overwrite path was used unless explicitly requested
|
|
194
|
+
|
|
195
|
+
|
|
162
196
|
If `xt status` still shows drift after targeted fixes, run the full sync:
|
|
163
197
|
```bash
|
|
164
198
|
xt init
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: using-kpi
|
|
3
|
+
description: >-
|
|
4
|
+
Analyze specialist KPI data in observability SQLite. Use for runtime, payload,
|
|
5
|
+
waiting, tool-call, and outlier analysis. Token estimates use cl100k_base-style
|
|
6
|
+
approximation with ~±5% accuracy.
|
|
7
|
+
gemini-command: using-kpi
|
|
8
|
+
version: 3.1.0
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# using-kpi
|
|
12
|
+
|
|
13
|
+
KPI analysis skill for `sp db stats` / `sp db extract` data.
|
|
14
|
+
|
|
15
|
+
## Quick rule
|
|
16
|
+
|
|
17
|
+
`active_runtime_ms` = real paid runtime. Rank by that first. `elapsed_ms` is total wall time. `waiting_ms` catches forgotten keep-alives.
|
|
18
|
+
|
|
19
|
+
Token counts are approximate, cl100k_base-style, about ±5%. Bytes are exact UTF-8 size.
|
|
20
|
+
|
|
21
|
+
## Recipe 1 — specialist × model leaderboard by active cost
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
sp db stats --format json \
|
|
25
|
+
| jq -r '
|
|
26
|
+
.rows
|
|
27
|
+
| group_by([.specialist, .model])
|
|
28
|
+
| map({
|
|
29
|
+
specialist: .[0].specialist,
|
|
30
|
+
model: .[0].model,
|
|
31
|
+
jobs: length,
|
|
32
|
+
active_ms: (map((.active_runtime_ms // 0)) | add),
|
|
33
|
+
total_ms: (map((.total_runtime_ms // .elapsed_ms // 0)) | add),
|
|
34
|
+
turns: (map((.total_turns // 0)) | add),
|
|
35
|
+
tools: (map((.total_tools // 0)) | add),
|
|
36
|
+
payload_kb: (map((.payload_kb // 0)) | add)
|
|
37
|
+
})
|
|
38
|
+
| sort_by(-.active_ms, -.jobs)
|
|
39
|
+
| .[]
|
|
40
|
+
| [ .specialist, .model, .jobs, .active_ms, .total_ms, .turns, .tools, .payload_kb ]
|
|
41
|
+
| @tsv'
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Recipe 2 — outliers above p95
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
sp db stats --format json \
|
|
48
|
+
| jq '
|
|
49
|
+
.rows as $rows
|
|
50
|
+
| {
|
|
51
|
+
active: ($rows | map(.active_runtime_ms // 0) | sort),
|
|
52
|
+
tools: ($rows | map(.total_tools // 0) | sort),
|
|
53
|
+
turns: ($rows | map(.total_turns // 0) | sort),
|
|
54
|
+
payload: ($rows | map(.payload_kb // 0) | sort)
|
|
55
|
+
} as $s
|
|
56
|
+
| {
|
|
57
|
+
active_p95: $s.active[(($s.active|length)*95/100|floor)],
|
|
58
|
+
tools_p95: $s.tools[(($s.tools|length)*95/100|floor)],
|
|
59
|
+
turns_p95: $s.turns[(($s.turns|length)*95/100|floor)],
|
|
60
|
+
payload_p95: $s.payload[(($s.payload|length)*95/100|floor)]
|
|
61
|
+
} as $p
|
|
62
|
+
| $rows
|
|
63
|
+
| map(select(
|
|
64
|
+
((.active_runtime_ms // 0) >= $p.active_p95) or
|
|
65
|
+
((.total_tools // 0) >= $p.tools_p95) or
|
|
66
|
+
((.total_turns // 0) >= $p.turns_p95) or
|
|
67
|
+
((.payload_kb // 0) >= $p.payload_p95)
|
|
68
|
+
))
|
|
69
|
+
| .[]
|
|
70
|
+
| [ .job_id, .specialist, .model, .active_runtime_ms, .total_tools, .total_turns, .payload_kb ]
|
|
71
|
+
| @tsv'
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Recipe 3 — payload bloat ranking
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
sp db stats --with-payload --format json \
|
|
78
|
+
| jq -r '
|
|
79
|
+
.rows
|
|
80
|
+
| group_by(.specialist)
|
|
81
|
+
| map({
|
|
82
|
+
specialist: .[0].specialist,
|
|
83
|
+
jobs: length,
|
|
84
|
+
avg_payload_kb: ((map((.payload_kb // 0)) | add) / length),
|
|
85
|
+
max_payload_kb: (map((.payload_kb // 0)) | max)
|
|
86
|
+
})
|
|
87
|
+
| sort_by(-.avg_payload_kb)
|
|
88
|
+
| .[:10]
|
|
89
|
+
| .[]
|
|
90
|
+
| [ .specialist, .jobs, (.avg_payload_kb|tostring), (.max_payload_kb|tostring) ]
|
|
91
|
+
| @tsv'
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Recipe 4 — waiting-state hygiene
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
sp db stats --format json \
|
|
98
|
+
| jq -r '
|
|
99
|
+
.rows
|
|
100
|
+
| map(select((.waiting_s? // 0) != 0))
|
|
101
|
+
| map(. + {waiting_ratio: ((.waiting_ms // 0) / ((.total_runtime_ms // .elapsed_ms // 1) + 0.0))})
|
|
102
|
+
| sort_by(-.waiting_ratio, -.waiting_ms)
|
|
103
|
+
| .[]
|
|
104
|
+
| [ .job_id, .specialist, .model, (.waiting_ms|tostring), (.total_runtime_ms // .elapsed_ms|tostring), (.waiting_ratio|tostring) ]
|
|
105
|
+
| @tsv'
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Recipe 5 — tool-call distribution per specialist
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
sp db stats --format json \
|
|
112
|
+
| jq -r '
|
|
113
|
+
.rows
|
|
114
|
+
| group_by(.specialist)
|
|
115
|
+
| map({
|
|
116
|
+
specialist: .[0].specialist,
|
|
117
|
+
counts: (map(.tool_call_counts_json? // "{}")
|
|
118
|
+
| map(fromjson)
|
|
119
|
+
| add)
|
|
120
|
+
})
|
|
121
|
+
| .[]
|
|
122
|
+
| .counts
|
|
123
|
+
| to_entries
|
|
124
|
+
| sort_by(-.value)
|
|
125
|
+
| .[]
|
|
126
|
+
| [ .key, .value ]
|
|
127
|
+
| @tsv'
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Recipe 6 — payload vs active runtime correlation
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
sp db stats --with-payload --format json \
|
|
134
|
+
| jq -r '
|
|
135
|
+
.rows
|
|
136
|
+
| map(select((.payload_kb? // 0) > 0 and ((.active_runtime_ms? // 0) > 0)))
|
|
137
|
+
| map([(.payload_kb|tonumber), (.active_runtime_ms|tonumber)])
|
|
138
|
+
| if length < 2 then empty else
|
|
139
|
+
(map(.[0]) | add / length) as $mx |
|
|
140
|
+
(map(.[1]) | add / length) as $my |
|
|
141
|
+
(map((.[0]-$mx)*(.[1]-$my)) | add) /
|
|
142
|
+
((map((.[0]-$mx)^2) | add) * (map((.[1]-$my)^2) | add)) ^ 0.5
|
|
143
|
+
end'
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## References
|
|
147
|
+
|
|
148
|
+
- `docs/observability-metrics.md`
|
|
149
|
+
- `src/cli/db.ts`
|
|
150
|
+
- `src/specialist/observability-sqlite.ts`
|