switchroom 0.5.0 → 0.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -121
- package/bin/autoaccept.exp +29 -6
- package/dist/agent-scheduler/index.js +12261 -0
- package/dist/cli/autoaccept-poll.js +10 -0
- package/dist/cli/switchroom.js +27250 -25324
- package/dist/vault/approvals/kernel-server.js +12709 -0
- package/dist/vault/broker/server.js +15724 -0
- package/package.json +4 -3
- package/profiles/_base/start.sh.hbs +133 -0
- package/profiles/_shared/telegram-style.md.hbs +3 -3
- package/profiles/default/CLAUDE.md +3 -3
- package/profiles/default/CLAUDE.md.hbs +2 -2
- package/profiles/default/workspace/CLAUDE.md.hbs +9 -0
- package/skills/docx/VENDORED.md +1 -1
- package/skills/mcp-builder/VENDORED.md +1 -1
- package/skills/pdf/VENDORED.md +1 -1
- package/skills/pptx/VENDORED.md +1 -1
- package/skills/skill-creator/VENDORED.md +1 -1
- package/skills/switchroom-architecture/SKILL.md +8 -7
- package/skills/switchroom-cli/SKILL.md +23 -15
- package/skills/switchroom-health/SKILL.md +7 -7
- package/skills/switchroom-install/SKILL.md +36 -39
- package/skills/switchroom-manage/SKILL.md +4 -4
- package/skills/switchroom-status/SKILL.md +1 -1
- package/skills/webapp-testing/VENDORED.md +1 -1
- package/skills/xlsx/VENDORED.md +1 -1
- package/telegram-plugin/admin-commands/dispatch.test.ts +119 -1
- package/telegram-plugin/admin-commands/index.ts +71 -0
- package/telegram-plugin/ask-user.ts +1 -0
- package/telegram-plugin/card-event-log.ts +138 -0
- package/telegram-plugin/dist/bridge/bridge.js +178 -31
- package/telegram-plugin/dist/foreman/foreman.js +6875 -6526
- package/telegram-plugin/dist/gateway/gateway.js +13862 -11834
- package/telegram-plugin/dist/server.js +202 -40
- package/telegram-plugin/fleet-state.ts +25 -10
- package/telegram-plugin/foreman/foreman.ts +38 -3
- package/telegram-plugin/gateway/approval-callback.ts +126 -0
- package/telegram-plugin/gateway/approval-card.test.ts +90 -0
- package/telegram-plugin/gateway/approval-card.ts +127 -0
- package/telegram-plugin/gateway/approvals-commands.ts +126 -0
- package/telegram-plugin/gateway/boot-card.ts +31 -6
- package/telegram-plugin/gateway/boot-probes.ts +503 -72
- package/telegram-plugin/gateway/gateway.ts +822 -94
- package/telegram-plugin/gateway/ipc-protocol.ts +34 -1
- package/telegram-plugin/gateway/ipc-server.ts +35 -0
- package/telegram-plugin/gateway/startup-mutex.ts +110 -2
- package/telegram-plugin/hooks/hooks.json +19 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +216 -0
- package/telegram-plugin/hooks/tool-label-stop.mjs +63 -0
- package/telegram-plugin/package.json +4 -1
- package/telegram-plugin/plugin-logger.ts +20 -1
- package/telegram-plugin/progress-card-driver.ts +202 -13
- package/telegram-plugin/progress-card.ts +2 -2
- package/telegram-plugin/quota-check.ts +1 -0
- package/telegram-plugin/registry/subagents-schema.ts +37 -0
- package/telegram-plugin/registry/subagents.test.ts +64 -0
- package/telegram-plugin/session-tail.ts +58 -5
- package/telegram-plugin/shared/bot-runtime.ts +48 -2
- package/telegram-plugin/subagent-watcher.ts +139 -7
- package/telegram-plugin/tests/_progress-card-harness.ts +4 -0
- package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +201 -0
- package/telegram-plugin/tests/boot-card-probe-target.test.ts +10 -34
- package/telegram-plugin/tests/boot-card-render.test.ts +6 -5
- package/telegram-plugin/tests/boot-probes.test.ts +558 -0
- package/telegram-plugin/tests/card-event-log.test.ts +145 -0
- package/telegram-plugin/tests/gateway-startup-mutex.test.ts +102 -0
- package/telegram-plugin/tests/ipc-server-validate-inject-inbound.test.ts +134 -0
- package/telegram-plugin/tests/progress-card-delay-842.test.ts +160 -0
- package/telegram-plugin/tests/quota-check.test.ts +37 -1
- package/telegram-plugin/tests/subagent-registry-bugs.test.ts +5 -0
- package/telegram-plugin/tests/subagent-watcher-stall-notification.test.ts +104 -1
- package/telegram-plugin/tests/subagent-watcher.test.ts +5 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +114 -0
- package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +5 -3
- package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +10 -0
- package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +58 -14
- package/telegram-plugin/tests/welcome-text.test.ts +57 -0
- package/telegram-plugin/tool-label-sidecar.ts +140 -0
- package/telegram-plugin/tool-labels.ts +55 -0
- package/telegram-plugin/two-zone-card.ts +27 -7
- package/telegram-plugin/uat/SETUP.md +160 -0
- package/telegram-plugin/uat/assertions.ts +140 -0
- package/telegram-plugin/uat/driver.ts +174 -0
- package/telegram-plugin/uat/harness.ts +161 -0
- package/telegram-plugin/uat/login.ts +134 -0
- package/telegram-plugin/uat/port-allocator.ts +71 -0
- package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +61 -0
- package/telegram-plugin/welcome-text.ts +44 -2
- package/bin/bridge-watchdog.sh +0 -967
|
@@ -28,16 +28,19 @@ function subAgentUserMsg(promptText: string) {
|
|
|
28
28
|
interface StallHarness {
|
|
29
29
|
notifications: string[]
|
|
30
30
|
stallCalls: Array<{ agentId: string; idleMs: number; description: string }>
|
|
31
|
+
unstallCalls: Array<{ agentId: string; description: string }>
|
|
31
32
|
logs: string[]
|
|
32
33
|
advance: (ms: number) => void
|
|
33
34
|
watcher: ReturnType<typeof startSubagentWatcher>
|
|
34
35
|
now: () => number
|
|
35
36
|
fileContents: Map<string, Buffer>
|
|
37
|
+
jsonlPath: string
|
|
36
38
|
}
|
|
37
39
|
|
|
38
40
|
function makeStallHarness(opts: {
|
|
39
41
|
agentDir?: string
|
|
40
42
|
stallThresholdMs?: number
|
|
43
|
+
silentSynthesisStallThresholdMs?: number
|
|
41
44
|
rescanMs?: number
|
|
42
45
|
initialContent?: string
|
|
43
46
|
agentId?: string
|
|
@@ -45,6 +48,7 @@ function makeStallHarness(opts: {
|
|
|
45
48
|
const {
|
|
46
49
|
agentDir = '/home/user/.switchroom/agents/myagent',
|
|
47
50
|
stallThresholdMs = 60_000,
|
|
51
|
+
silentSynthesisStallThresholdMs,
|
|
48
52
|
rescanMs = 500,
|
|
49
53
|
agentId = 'test-stall-agent-01',
|
|
50
54
|
initialContent,
|
|
@@ -53,6 +57,7 @@ function makeStallHarness(opts: {
|
|
|
53
57
|
let currentTime = 1000
|
|
54
58
|
const notifications: string[] = []
|
|
55
59
|
const stallCalls: Array<{ agentId: string; idleMs: number; description: string }> = []
|
|
60
|
+
const unstallCalls: Array<{ agentId: string; description: string }> = []
|
|
56
61
|
const logs: string[] = []
|
|
57
62
|
|
|
58
63
|
// Build realistic path: <agentDir>/.claude/projects/<sanitized-cwd>/<sessionId>/subagents/
|
|
@@ -127,9 +132,16 @@ function makeStallHarness(opts: {
|
|
|
127
132
|
const watcher = startSubagentWatcher({
|
|
128
133
|
agentDir,
|
|
129
134
|
stallThresholdMs,
|
|
135
|
+
// When the test doesn't explicitly distinguish the two thresholds,
|
|
136
|
+
// mirror them so existing fixtures (which have toolCount=0 and a
|
|
137
|
+
// simple "advance past 60s" model) keep working under the new
|
|
138
|
+
// adaptive logic. New tests pass an explicit value to exercise the
|
|
139
|
+
// silent-synthesis vs active-loop split.
|
|
140
|
+
silentSynthesisStallThresholdMs: silentSynthesisStallThresholdMs ?? stallThresholdMs,
|
|
130
141
|
rescanMs,
|
|
131
142
|
sendNotification: (text) => notifications.push(text),
|
|
132
143
|
onStall: (id, idle, desc) => stallCalls.push({ agentId: id, idleMs: idle, description: desc }),
|
|
144
|
+
onUnstall: (id, desc) => unstallCalls.push({ agentId: id, description: desc }),
|
|
133
145
|
now: () => currentTime,
|
|
134
146
|
setInterval: (fn, ms) => {
|
|
135
147
|
const ref = nextRef++
|
|
@@ -156,7 +168,7 @@ function makeStallHarness(opts: {
|
|
|
156
168
|
}
|
|
157
169
|
}
|
|
158
170
|
|
|
159
|
-
return { notifications, stallCalls, logs, advance, watcher, now: () => currentTime, fileContents }
|
|
171
|
+
return { notifications, stallCalls, unstallCalls, logs, advance, watcher, now: () => currentTime, fileContents, jsonlPath }
|
|
160
172
|
}
|
|
161
173
|
|
|
162
174
|
// ─── Tests ────────────────────────────────────────────────────────────────────
|
|
@@ -211,6 +223,97 @@ describe('subagent-watcher onStall callback (Option C, issue #393)', () => {
|
|
|
211
223
|
expect(stallCalls.length).toBe(countAfterFirstStall) // still exactly 1
|
|
212
224
|
})
|
|
213
225
|
|
|
226
|
+
// Test 11 (silent-synthesis): a sub-agent that hasn't fired any tools
|
|
227
|
+
// yet should NOT trip the stall detector at the active-loop threshold
|
|
228
|
+
// (60s) — it's almost certainly in long-form synthesis mode where the
|
|
229
|
+
// model is still composing its first emit. The silent-synthesis
|
|
230
|
+
// threshold (5min by default) is what gates that case. Pre-fix the
|
|
231
|
+
// single 60s threshold tripped on plan/research sub-agents that ran
|
|
232
|
+
// 2-3min legitimately, freezing the card at ⚠ until completion.
|
|
233
|
+
it('does NOT trip stall at 60s when toolCount=0 (silent synthesis adaptive threshold)', () => {
|
|
234
|
+
const agentId = 'stall-test-11'
|
|
235
|
+
const { stallCalls, advance, watcher } = makeStallHarness({
|
|
236
|
+
agentId,
|
|
237
|
+
stallThresholdMs: 60_000,
|
|
238
|
+
silentSynthesisStallThresholdMs: 300_000, // 5min
|
|
239
|
+
rescanMs: 500,
|
|
240
|
+
})
|
|
241
|
+
advance(500) // register
|
|
242
|
+
const entry = watcher.getRegistry().get(agentId)
|
|
243
|
+
if (entry) entry.historical = false
|
|
244
|
+
advance(120_000) // 2min idle, far past 60s but well under 5min
|
|
245
|
+
expect(stallCalls).toHaveLength(0)
|
|
246
|
+
advance(200_000) // total ~5min 20s — past silent-synthesis threshold
|
|
247
|
+
expect(stallCalls).toHaveLength(1)
|
|
248
|
+
expect(stallCalls[0].agentId).toBe(agentId)
|
|
249
|
+
})
|
|
250
|
+
|
|
251
|
+
// Test 12 (un-stall transition): once JSONL activity returns after a
|
|
252
|
+
// stall, the watcher must reset stallNotified, fire onUnstall, and
|
|
253
|
+
// re-arm so a subsequent stall detects again. Pre-fix none of those
|
|
254
|
+
// happened — the card stuck at ⚠ even when the sub-agent was clearly
|
|
255
|
+
// alive again.
|
|
256
|
+
it('fires onUnstall when activity returns after a stall and re-arms detection', () => {
|
|
257
|
+
const agentId = 'stall-test-12'
|
|
258
|
+
const { stallCalls, unstallCalls, advance, watcher, fileContents, jsonlPath } = makeStallHarness({
|
|
259
|
+
agentId,
|
|
260
|
+
// Force the active-loop threshold by giving the entry a tool right
|
|
261
|
+
// away (avoids the silent-synthesis adaptive path). We append a
|
|
262
|
+
// sub_agent_tool_use line in the initial content so toolCount > 0
|
|
263
|
+
// by the first activity bump.
|
|
264
|
+
stallThresholdMs: 60_000,
|
|
265
|
+
silentSynthesisStallThresholdMs: 60_000, // keep flat for this test
|
|
266
|
+
rescanMs: 500,
|
|
267
|
+
initialContent: buildJSONL(
|
|
268
|
+
subAgentUserMsg('background task'),
|
|
269
|
+
{ type: 'assistant', message: { content: [{ type: 'tool_use', id: 'tool-A', name: 'Read', input: { path: '/x' } }] } },
|
|
270
|
+
),
|
|
271
|
+
})
|
|
272
|
+
advance(500) // register + initial tail read (toolCount becomes 1)
|
|
273
|
+
const entry = watcher.getRegistry().get(agentId)
|
|
274
|
+
if (entry) entry.historical = false
|
|
275
|
+
advance(65_000) // cross 60s — stall fires
|
|
276
|
+
expect(stallCalls).toHaveLength(1)
|
|
277
|
+
expect(unstallCalls).toHaveLength(0)
|
|
278
|
+
|
|
279
|
+
// Append a fresh JSONL line — the sub-agent emits text, proving it's
|
|
280
|
+
// alive. The watcher should reset stallNotified, fire onUnstall, and
|
|
281
|
+
// re-arm so a *future* idle period can stall it again.
|
|
282
|
+
const existing = fileContents.get(jsonlPath) ?? Buffer.from('')
|
|
283
|
+
const resumeLine = JSON.stringify({ type: 'assistant', message: { content: [{ type: 'text', text: 'still alive' }] } }) + '\n'
|
|
284
|
+
fileContents.set(jsonlPath, Buffer.concat([existing, Buffer.from(resumeLine, 'utf-8')]))
|
|
285
|
+
advance(500) // poll picks up the new line
|
|
286
|
+
|
|
287
|
+
expect(unstallCalls).toHaveLength(1)
|
|
288
|
+
expect(unstallCalls[0].agentId).toBe(agentId)
|
|
289
|
+
// stallNotified must be re-armed: another idle window crosses
|
|
290
|
+
// threshold again and onStall fires a SECOND time.
|
|
291
|
+
advance(65_000)
|
|
292
|
+
expect(stallCalls).toHaveLength(2)
|
|
293
|
+
})
|
|
294
|
+
|
|
295
|
+
// Test 13 (un-stall + tool-loop adaptive): once tools have been used,
|
|
296
|
+
// a 60s gap correctly re-trips the stall detector. Sanity check that
|
|
297
|
+
// toolCount > 0 selects the active-loop threshold, not silent-synthesis.
|
|
298
|
+
it('uses 60s threshold once toolCount>0 (active-loop adaptive)', () => {
|
|
299
|
+
const agentId = 'stall-test-13'
|
|
300
|
+
const { stallCalls, advance, watcher } = makeStallHarness({
|
|
301
|
+
agentId,
|
|
302
|
+
stallThresholdMs: 60_000,
|
|
303
|
+
silentSynthesisStallThresholdMs: 600_000, // way out — 10min
|
|
304
|
+
rescanMs: 500,
|
|
305
|
+
initialContent: buildJSONL(
|
|
306
|
+
subAgentUserMsg('worker'),
|
|
307
|
+
{ type: 'assistant', message: { content: [{ type: 'tool_use', id: 'tool-A', name: 'Read', input: {} }] } },
|
|
308
|
+
),
|
|
309
|
+
})
|
|
310
|
+
advance(500) // register + tail (toolCount=1)
|
|
311
|
+
const entry = watcher.getRegistry().get(agentId)
|
|
312
|
+
if (entry) entry.historical = false
|
|
313
|
+
advance(65_000) // 65s of silence with tools active → stall
|
|
314
|
+
expect(stallCalls).toHaveLength(1)
|
|
315
|
+
})
|
|
316
|
+
|
|
214
317
|
// Test 10: onStall is NOT called for sub-agents already done/failed
|
|
215
318
|
it('does not call onStall for sub-agents in done/failed state', () => {
|
|
216
319
|
const agentId = 'stall-test-10-done'
|
|
@@ -200,6 +200,11 @@ function makeHarness(opts: {
|
|
|
200
200
|
agentDir,
|
|
201
201
|
sendNotification: (text) => notifications.push(text),
|
|
202
202
|
stallThresholdMs,
|
|
203
|
+
// Mirror the active-loop threshold so existing fixtures (which have
|
|
204
|
+
// toolCount=0 and use the simple "advance past N" model) keep
|
|
205
|
+
// working under the adaptive split. Tests that need the silent-
|
|
206
|
+
// synthesis vs active-loop distinction set both explicitly.
|
|
207
|
+
silentSynthesisStallThresholdMs: stallThresholdMs,
|
|
203
208
|
rescanMs,
|
|
204
209
|
now: () => currentTime,
|
|
205
210
|
setInterval: (fn, ms) => {
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
2
|
+
import { mkdtempSync, rmSync, appendFileSync, writeFileSync } from 'node:fs'
|
|
3
|
+
import { tmpdir } from 'node:os'
|
|
4
|
+
import { join } from 'node:path'
|
|
5
|
+
import { createToolLabelSidecar } from '../tool-label-sidecar.js'
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Unit tests for tool-label-sidecar.ts (#783).
|
|
9
|
+
*
|
|
10
|
+
* Uses an injected scheduler so we drive polls deterministically — no
|
|
11
|
+
* setTimeout, no flake.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
function makeManualScheduler() {
|
|
15
|
+
let tickFn: (() => void) | null = null
|
|
16
|
+
return {
|
|
17
|
+
setInterval: (cb: () => void, _ms: number) => {
|
|
18
|
+
tickFn = cb
|
|
19
|
+
return Symbol('handle')
|
|
20
|
+
},
|
|
21
|
+
clearInterval: (_h: unknown) => {
|
|
22
|
+
tickFn = null
|
|
23
|
+
},
|
|
24
|
+
tick: () => { if (tickFn) tickFn() },
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
describe('tool-label-sidecar', () => {
|
|
29
|
+
let stateDir: string
|
|
30
|
+
beforeEach(() => {
|
|
31
|
+
stateDir = mkdtempSync(join(tmpdir(), 'tool-label-sidecar-'))
|
|
32
|
+
})
|
|
33
|
+
afterEach(() => {
|
|
34
|
+
rmSync(stateDir, { recursive: true, force: true })
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
it('returns undefined when sidecar file is missing', () => {
|
|
38
|
+
const sched = makeManualScheduler()
|
|
39
|
+
const s = createToolLabelSidecar({ stateDir, sessionId: 'no-such', scheduler: sched })
|
|
40
|
+
expect(s.getLabel('whatever')).toBeUndefined()
|
|
41
|
+
s.stop()
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
it('reads existing sidecar lines on construction', () => {
|
|
45
|
+
const sessionId = 'sess1'
|
|
46
|
+
const f = join(stateDir, `tool-labels-${sessionId}.jsonl`)
|
|
47
|
+
writeFileSync(f, JSON.stringify({ ts: 1, tool_use_id: 'A', agent_id: 'g', label: 'Reading foo.ts', tool_name: 'Read' }) + '\n')
|
|
48
|
+
const sched = makeManualScheduler()
|
|
49
|
+
const s = createToolLabelSidecar({ stateDir, sessionId, scheduler: sched })
|
|
50
|
+
expect(s.getLabel('A')).toBe('Reading foo.ts')
|
|
51
|
+
expect(s.getLabel('B')).toBeUndefined()
|
|
52
|
+
s.stop()
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
it('picks up appended lines on poll() (renderer reads, hook then writes)', () => {
|
|
56
|
+
const sessionId = 'sess2'
|
|
57
|
+
const sched = makeManualScheduler()
|
|
58
|
+
const s = createToolLabelSidecar({ stateDir, sessionId, scheduler: sched })
|
|
59
|
+
expect(s.getLabel('A')).toBeUndefined()
|
|
60
|
+
|
|
61
|
+
const f = join(stateDir, `tool-labels-${sessionId}.jsonl`)
|
|
62
|
+
appendFileSync(f, JSON.stringify({ ts: 1, tool_use_id: 'A', agent_id: null, label: 'Replying', tool_name: 'mcp__switchroom-telegram__reply' }) + '\n')
|
|
63
|
+
s.poll()
|
|
64
|
+
expect(s.getLabel('A')).toBe('Replying')
|
|
65
|
+
s.stop()
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
it('fires onLabel subscribers as new lines arrive', () => {
|
|
69
|
+
const sessionId = 'sess3'
|
|
70
|
+
const sched = makeManualScheduler()
|
|
71
|
+
const s = createToolLabelSidecar({ stateDir, sessionId, scheduler: sched })
|
|
72
|
+
const seen: Array<[string, string]> = []
|
|
73
|
+
s.onLabel((id, label) => seen.push([id, label]))
|
|
74
|
+
|
|
75
|
+
const f = join(stateDir, `tool-labels-${sessionId}.jsonl`)
|
|
76
|
+
appendFileSync(f, JSON.stringify({ ts: 1, tool_use_id: 'X', agent_id: null, label: 'Reading a.ts', tool_name: 'Read' }) + '\n')
|
|
77
|
+
s.poll()
|
|
78
|
+
expect(seen).toEqual([['X', 'Reading a.ts']])
|
|
79
|
+
|
|
80
|
+
appendFileSync(f, JSON.stringify({ ts: 2, tool_use_id: 'Y', agent_id: null, label: 'Editing b.ts', tool_name: 'Edit' }) + '\n')
|
|
81
|
+
s.poll()
|
|
82
|
+
expect(seen).toEqual([['X', 'Reading a.ts'], ['Y', 'Editing b.ts']])
|
|
83
|
+
s.stop()
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
it('ignores malformed JSON lines', () => {
|
|
87
|
+
const sessionId = 'sess4'
|
|
88
|
+
const sched = makeManualScheduler()
|
|
89
|
+
const f = join(stateDir, `tool-labels-${sessionId}.jsonl`)
|
|
90
|
+
writeFileSync(
|
|
91
|
+
f,
|
|
92
|
+
'not-json\n' +
|
|
93
|
+
JSON.stringify({ tool_use_id: 'good', label: 'Saved memory', ts: 1, tool_name: 'mcp__hindsight__retain', agent_id: null }) + '\n' +
|
|
94
|
+
'{partial\n',
|
|
95
|
+
)
|
|
96
|
+
const s = createToolLabelSidecar({ stateDir, sessionId, scheduler: sched })
|
|
97
|
+
expect(s.getLabel('good')).toBe('Saved memory')
|
|
98
|
+
s.stop()
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
it('first write wins (idempotent on duplicates)', () => {
|
|
102
|
+
const sessionId = 'sess5'
|
|
103
|
+
const sched = makeManualScheduler()
|
|
104
|
+
const f = join(stateDir, `tool-labels-${sessionId}.jsonl`)
|
|
105
|
+
writeFileSync(
|
|
106
|
+
f,
|
|
107
|
+
JSON.stringify({ tool_use_id: 'A', label: 'first', ts: 1, tool_name: 'Read', agent_id: null }) + '\n' +
|
|
108
|
+
JSON.stringify({ tool_use_id: 'A', label: 'second', ts: 2, tool_name: 'Read', agent_id: null }) + '\n',
|
|
109
|
+
)
|
|
110
|
+
const s = createToolLabelSidecar({ stateDir, sessionId, scheduler: sched })
|
|
111
|
+
expect(s.getLabel('A')).toBe('first')
|
|
112
|
+
s.stop()
|
|
113
|
+
})
|
|
114
|
+
})
|
|
@@ -59,12 +59,14 @@ const enqueue = (chatId: string): SessionEvent => ({
|
|
|
59
59
|
|
|
60
60
|
describe('P2: completion gates on background fleet members', () => {
|
|
61
61
|
it('hasLiveBackground reflects fleet status correctly', () => {
|
|
62
|
+
// isBackgroundDispatch is the sticky flag used by hasLiveBackground —
|
|
63
|
+
// status alone is no longer the gate (fixes #757).
|
|
62
64
|
const fleet = new Map([
|
|
63
|
-
['a', { agentId: 'a', status: 'background' as const, terminalAt: null } as never],
|
|
64
|
-
['b', { agentId: 'b', status: 'done' as const, terminalAt: 2000 } as never],
|
|
65
|
+
['a', { agentId: 'a', status: 'background' as const, terminalAt: null, isBackgroundDispatch: true } as never],
|
|
66
|
+
['b', { agentId: 'b', status: 'done' as const, terminalAt: 2000, isBackgroundDispatch: false } as never],
|
|
65
67
|
])
|
|
66
68
|
expect(hasLiveBackground(fleet as never)).toBe(true)
|
|
67
|
-
fleet.set('a', { agentId: 'a', status: 'done' as const, terminalAt: 3000 } as never)
|
|
69
|
+
fleet.set('a', { agentId: 'a', status: 'done' as const, terminalAt: 3000, isBackgroundDispatch: true } as never)
|
|
68
70
|
expect(hasLiveBackground(fleet as never)).toBe(false)
|
|
69
71
|
})
|
|
70
72
|
|
|
@@ -61,6 +61,16 @@ describe('phaseFor truth table', () => {
|
|
|
61
61
|
['parent-done + fg-failed + bg-running → Background, not Done', st({ stage: 'done' }), fleetOf(fm('a', 'failed'), fm('b', 'running', NOW)), { parentDone: true }, 'Background'],
|
|
62
62
|
['mixed terminal+stuck → not Done', st({ stage: 'run' }), fleetOf(fm('a', 'done'), fm('b', 'stuck', 0)), {}, 'Stalled'],
|
|
63
63
|
['reply tool fired AND fleet running → Background (parentDone)', st({ stage: 'done' }), fleetOf(fm('a', 'running', NOW)), { parentDone: true }, 'Background'],
|
|
64
|
+
// Regression: pre-fix the `[].every(...)` vacuous-truth at
|
|
65
|
+
// two-zone-card.ts fleetAllStuck would mark the fleet stalled the
|
|
66
|
+
// moment the last sub-agent finished while the parent was still
|
|
67
|
+
// running. Plan agents that completed in 2-3min showed ⚠ Stalled
|
|
68
|
+
// on the pinned card until the parent itself wrapped up. Now: zero
|
|
69
|
+
// running-or-stuck members in the fleet means we fall through to
|
|
70
|
+
// the default "Working…" instead.
|
|
71
|
+
['regression: all fleet done + parent still running → Working… (was Stalled)', st({ stage: 'run' }), fleetOf(fm('a', 'done'), fm('b', 'done')), {}, 'Working…'],
|
|
72
|
+
['regression: lone done sub-agent + parent still running → Working…', st({ stage: 'run' }), fleetOf(fm('a', 'done')), {}, 'Working…'],
|
|
73
|
+
['regression: failed-only fleet + parent still running → Working… (was Stalled)', st({ stage: 'run' }), fleetOf(fm('a', 'failed')), {}, 'Working…'],
|
|
64
74
|
])('%s', (_name, state, fleet, opts, expectedLabel) => {
|
|
65
75
|
const phase = phaseFor(state, fleet, NOW, opts as Record<string, unknown>)
|
|
66
76
|
expect(phase.label).toBe(expectedLabel)
|
|
@@ -100,13 +100,16 @@ describe('PR-C2: two-zone card snapshot extras', () => {
|
|
|
100
100
|
// 12 items, cap 8 → 4 hidden.
|
|
101
101
|
expect(out).toContain('(+4 earlier)')
|
|
102
102
|
// The visible bullets are the LAST 8 (slice(-8) → f4..f11).
|
|
103
|
-
|
|
104
|
-
expect(out).toContain('
|
|
105
|
-
|
|
106
|
-
|
|
103
|
+
// f11 is the in-flight bullet (stage=run, last index) → ◉.
|
|
104
|
+
expect(out).toContain('◉ f11.ts')
|
|
105
|
+
expect(out).toContain('● f4.ts')
|
|
106
|
+
// f3 (the latest hidden) must not appear as a bullet.
|
|
107
|
+
expect(out).not.toContain('f3.ts')
|
|
108
|
+
// No <code> wrapping around row labels anymore.
|
|
109
|
+
expect(out).not.toContain('<code>f11.ts</code>')
|
|
107
110
|
})
|
|
108
111
|
|
|
109
|
-
it('parent zone: in-flight last bullet uses ◉ <
|
|
112
|
+
it('parent zone: in-flight last bullet uses ◉ <plain>; earlier use ● <plain>', () => {
|
|
110
113
|
const items = [
|
|
111
114
|
{ tool: 'Read', label: 'a.ts' },
|
|
112
115
|
{ tool: 'Read', label: 'b.ts' },
|
|
@@ -117,13 +120,16 @@ describe('PR-C2: two-zone card snapshot extras', () => {
|
|
|
117
120
|
fleet: new Map(),
|
|
118
121
|
now: NOW,
|
|
119
122
|
})
|
|
120
|
-
// last item active
|
|
121
|
-
expect(out).toContain('◉
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
expect(out).toContain('●
|
|
125
|
-
|
|
126
|
-
expect(out).not.toContain('
|
|
123
|
+
// last item active — plain text, no <b>, no <code>, no tool prefix
|
|
124
|
+
expect(out).toContain('◉ ls')
|
|
125
|
+
expect(out).not.toContain('◉ <b>')
|
|
126
|
+
// earlier items — plain text only, no tool prefix
|
|
127
|
+
expect(out).toContain('● a.ts')
|
|
128
|
+
expect(out).toContain('● b.ts')
|
|
129
|
+
expect(out).not.toContain('Read <code>')
|
|
130
|
+
// No <code> wrapping anywhere on parent rows.
|
|
131
|
+
expect(out).not.toContain('<code>ls</code>')
|
|
132
|
+
expect(out).not.toContain('<code>a.ts</code>')
|
|
127
133
|
})
|
|
128
134
|
|
|
129
135
|
it('parent zone: when stage=done all bullets render as ● (no active marker)', () => {
|
|
@@ -136,8 +142,46 @@ describe('PR-C2: two-zone card snapshot extras', () => {
|
|
|
136
142
|
fleet: new Map(),
|
|
137
143
|
now: NOW,
|
|
138
144
|
})
|
|
139
|
-
expect(out).toContain('●
|
|
140
|
-
expect(out).toContain('●
|
|
145
|
+
expect(out).toContain('● a.ts')
|
|
146
|
+
expect(out).toContain('● ls')
|
|
141
147
|
expect(out).not.toContain('◉')
|
|
142
148
|
})
|
|
149
|
+
|
|
150
|
+
it('parent zone: row with no label falls back to humanised tool name', () => {
|
|
151
|
+
const items = [
|
|
152
|
+
{ tool: 'TodoWrite', label: '' },
|
|
153
|
+
{ tool: 'Edit', label: '' },
|
|
154
|
+
]
|
|
155
|
+
const out = renderTwoZoneCard({
|
|
156
|
+
state: st({ stage: 'run', turnStartedAt: NOW - 5000, items }),
|
|
157
|
+
fleet: new Map(),
|
|
158
|
+
now: NOW,
|
|
159
|
+
})
|
|
160
|
+
expect(out).toContain('● updating tasks')
|
|
161
|
+
expect(out).toContain('◉ editing file')
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
it('parent zone: row with no label on mcp tool uses mcpDisplayName', () => {
|
|
165
|
+
const items = [
|
|
166
|
+
{ tool: 'mcp__switchroom-telegram__reply', label: '' },
|
|
167
|
+
]
|
|
168
|
+
const out = renderTwoZoneCard({
|
|
169
|
+
state: st({ stage: 'run', turnStartedAt: NOW - 5000, items }),
|
|
170
|
+
fleet: new Map(),
|
|
171
|
+
now: NOW,
|
|
172
|
+
})
|
|
173
|
+
expect(out).toContain('◉ Telegram: reply')
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
it('parent zone: HTML in label is escaped (no raw <code> styling)', () => {
|
|
177
|
+
const items = [
|
|
178
|
+
{ tool: 'Bash', label: 'echo <hi>' },
|
|
179
|
+
]
|
|
180
|
+
const out = renderTwoZoneCard({
|
|
181
|
+
state: st({ stage: 'done', turnStartedAt: NOW - 5000, items }),
|
|
182
|
+
fleet: new Map(),
|
|
183
|
+
now: NOW,
|
|
184
|
+
})
|
|
185
|
+
expect(out).toContain('● echo <hi>')
|
|
186
|
+
})
|
|
143
187
|
})
|
|
@@ -243,6 +243,63 @@ describe("statusPairedText", () => {
|
|
|
243
243
|
expect(out).toContain("<b>Version</b>");
|
|
244
244
|
});
|
|
245
245
|
});
|
|
246
|
+
|
|
247
|
+
// Live probe block — `/status` shows EVERY probe (green and otherwise).
|
|
248
|
+
// This is the deliberate opposite of the boot card's silent-when-healthy
|
|
249
|
+
// contract: boot card = quiet ack, /status = dashboard.
|
|
250
|
+
describe("live health block", () => {
|
|
251
|
+
it("does NOT render a Health section when meta.live is undefined", () => {
|
|
252
|
+
const out = statusPairedText({ user: "@ken", meta });
|
|
253
|
+
expect(out).not.toContain("<b>Health</b>");
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
it("does NOT render a Health section when meta.live is empty array", () => {
|
|
257
|
+
const out = statusPairedText({ user: "@ken", meta: { ...meta, live: [] } });
|
|
258
|
+
expect(out).not.toContain("<b>Health</b>");
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
it("renders all probe rows including green ones", () => {
|
|
262
|
+
const live: AgentMetadata["live"] = [
|
|
263
|
+
{ status: "ok", label: "Account", detail: "ken@x.com · Max · token 60d" },
|
|
264
|
+
{ status: "ok", label: "Broker", detail: "reachable" },
|
|
265
|
+
{ status: "degraded", label: "Skills", detail: "1/5 dangling: foo" },
|
|
266
|
+
{ status: "fail", label: "Scheduler", detail: "sidecar not running" },
|
|
267
|
+
];
|
|
268
|
+
const out = statusPairedText({ user: "@ken", meta: { ...meta, live } });
|
|
269
|
+
expect(out).toContain("<b>Health</b>");
|
|
270
|
+
expect(out).toContain("🟢 <b>Account</b> ken@x.com · Max · token 60d");
|
|
271
|
+
expect(out).toContain("🟢 <b>Broker</b> reachable");
|
|
272
|
+
expect(out).toContain("🟡 <b>Skills</b> 1/5 dangling: foo");
|
|
273
|
+
expect(out).toContain("🔴 <b>Scheduler</b> sidecar not running");
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
it("renders Health section before the audit block", () => {
|
|
277
|
+
const live: AgentMetadata["live"] = [
|
|
278
|
+
{ status: "ok", label: "Account", detail: "ok" },
|
|
279
|
+
];
|
|
280
|
+
const audit = {
|
|
281
|
+
version: "v0.3.0", tools: "all", toolsDeny: null, skills: null,
|
|
282
|
+
limits: "idle 30m", channel: "switchroom", memoryBank: "x",
|
|
283
|
+
};
|
|
284
|
+
const out = statusPairedText({
|
|
285
|
+
user: "@ken",
|
|
286
|
+
meta: { ...meta, live, audit },
|
|
287
|
+
});
|
|
288
|
+
const healthIdx = out.indexOf("<b>Health</b>");
|
|
289
|
+
const versionIdx = out.indexOf("<b>Version</b>");
|
|
290
|
+
expect(healthIdx).toBeGreaterThan(-1);
|
|
291
|
+
expect(versionIdx).toBeGreaterThan(healthIdx);
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it("escapes HTML in probe detail strings", () => {
|
|
295
|
+
const live: AgentMetadata["live"] = [
|
|
296
|
+
{ status: "fail", label: "Skills", detail: "<script>alert(1)</script>" },
|
|
297
|
+
];
|
|
298
|
+
const out = statusPairedText({ user: "@ken", meta: { ...meta, live } });
|
|
299
|
+
expect(out).not.toContain("<script>alert");
|
|
300
|
+
expect(out).toContain("<script>");
|
|
301
|
+
});
|
|
302
|
+
});
|
|
246
303
|
});
|
|
247
304
|
|
|
248
305
|
// Local alias for the audit shape — duplicates the AgentMetadata.audit
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sidecar reader for $TELEGRAM_STATE_DIR/tool-labels-${session_id}.jsonl —
|
|
3
|
+
* the per-tool-call human labels emitted by the PreToolUse hook
|
|
4
|
+
* `tool-label-pretool.mjs` (#783).
|
|
5
|
+
*
|
|
6
|
+
* Two surfaces:
|
|
7
|
+
*
|
|
8
|
+
* getLabel(toolUseId): string | undefined
|
|
9
|
+
* Returns the label if the sidecar has already produced one for this
|
|
10
|
+
* tool_use. Synchronous, in-memory.
|
|
11
|
+
*
|
|
12
|
+
* onLabel(cb): unsubscribe
|
|
13
|
+
* Subscribes to "label arrived for this tool_use_id" notifications,
|
|
14
|
+
* used by the renderer to re-emit a checklist row when a label
|
|
15
|
+
* arrives AFTER the matching JSONL `tool_use` has been processed.
|
|
16
|
+
*
|
|
17
|
+
* Design notes:
|
|
18
|
+
* - Plain stat()-poll watcher (every 250ms) — simpler than fs.watch and
|
|
19
|
+
* robust to all the platform quirks. The hot path is two-digit ms.
|
|
20
|
+
* - Append-only: we track a per-file byte offset and only read the new
|
|
21
|
+
* suffix on each tick, so re-reading is cheap.
|
|
22
|
+
* - One reader per session_id. The driver instantiates a reader when a
|
|
23
|
+
* session JSONL is first observed; old readers are stopped when the
|
|
24
|
+
* session is evicted from the chat-state TTL map.
|
|
25
|
+
*
|
|
26
|
+
* Pure module — no globals. Tests inject a custom directory and clock.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { existsSync, readFileSync, statSync } from 'node:fs'
|
|
30
|
+
import { join } from 'node:path'
|
|
31
|
+
|
|
32
|
+
export interface ToolLabelRow {
|
|
33
|
+
ts: number
|
|
34
|
+
tool_use_id: string
|
|
35
|
+
agent_id: string | null
|
|
36
|
+
label: string
|
|
37
|
+
tool_name: string
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface ToolLabelSidecar {
|
|
41
|
+
/** Synchronous label lookup. */
|
|
42
|
+
getLabel(toolUseId: string): string | undefined
|
|
43
|
+
/** Subscribe to "label arrived" notifications. */
|
|
44
|
+
onLabel(cb: (toolUseId: string, label: string) => void): () => void
|
|
45
|
+
/** Force a re-poll (tests). */
|
|
46
|
+
poll(): void
|
|
47
|
+
/** Stop polling and release resources. */
|
|
48
|
+
stop(): void
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface SidecarOptions {
|
|
52
|
+
stateDir: string
|
|
53
|
+
sessionId: string
|
|
54
|
+
/** Polling interval in ms. Default 250. */
|
|
55
|
+
pollMs?: number
|
|
56
|
+
/** Inject for tests; defaults to setInterval. */
|
|
57
|
+
scheduler?: {
|
|
58
|
+
setInterval: (cb: () => void, ms: number) => unknown
|
|
59
|
+
clearInterval: (handle: unknown) => void
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function createToolLabelSidecar(opts: SidecarOptions): ToolLabelSidecar {
|
|
64
|
+
const path = join(opts.stateDir, `tool-labels-${opts.sessionId}.jsonl`)
|
|
65
|
+
const labels = new Map<string, string>()
|
|
66
|
+
const subscribers = new Set<(toolUseId: string, label: string) => void>()
|
|
67
|
+
let offset = 0
|
|
68
|
+
let stopped = false
|
|
69
|
+
|
|
70
|
+
const sched = opts.scheduler ?? {
|
|
71
|
+
setInterval: (cb, ms) => setInterval(cb, ms),
|
|
72
|
+
clearInterval: (h) => clearInterval(h as ReturnType<typeof setInterval>),
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function ingestSuffix(text: string): void {
|
|
76
|
+
if (!text) return
|
|
77
|
+
const lines = text.split('\n')
|
|
78
|
+
for (const raw of lines) {
|
|
79
|
+
const line = raw.trim()
|
|
80
|
+
if (!line) continue
|
|
81
|
+
let row: ToolLabelRow | null = null
|
|
82
|
+
try {
|
|
83
|
+
row = JSON.parse(line) as ToolLabelRow
|
|
84
|
+
} catch {
|
|
85
|
+
continue
|
|
86
|
+
}
|
|
87
|
+
if (!row || typeof row.tool_use_id !== 'string' || typeof row.label !== 'string') continue
|
|
88
|
+
// First write wins — sidecar lines are append-only and we don't
|
|
89
|
+
// expect duplicates, but if one lands we keep the earliest.
|
|
90
|
+
if (labels.has(row.tool_use_id)) continue
|
|
91
|
+
labels.set(row.tool_use_id, row.label)
|
|
92
|
+
for (const cb of subscribers) {
|
|
93
|
+
try { cb(row.tool_use_id, row.label) } catch { /* ignore */ }
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function poll(): void {
|
|
99
|
+
if (stopped) return
|
|
100
|
+
if (!existsSync(path)) return
|
|
101
|
+
let size = 0
|
|
102
|
+
try { size = statSync(path).size } catch { return }
|
|
103
|
+
if (size <= offset) {
|
|
104
|
+
// Truncation safety: if the file shrank (rotation / manual delete),
|
|
105
|
+
// reset offset so we re-read from the start.
|
|
106
|
+
if (size < offset) offset = 0
|
|
107
|
+
else return
|
|
108
|
+
}
|
|
109
|
+
let text = ''
|
|
110
|
+
try {
|
|
111
|
+
const buf = readFileSync(path)
|
|
112
|
+
text = buf.subarray(offset).toString('utf8')
|
|
113
|
+
offset = buf.length
|
|
114
|
+
} catch {
|
|
115
|
+
return
|
|
116
|
+
}
|
|
117
|
+
ingestSuffix(text)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Initial drain, in case the file already exists when we start.
|
|
121
|
+
poll()
|
|
122
|
+
const handle = sched.setInterval(poll, opts.pollMs ?? 250) as unknown
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
getLabel(toolUseId) {
|
|
126
|
+
return labels.get(toolUseId)
|
|
127
|
+
},
|
|
128
|
+
onLabel(cb) {
|
|
129
|
+
subscribers.add(cb)
|
|
130
|
+
return () => subscribers.delete(cb)
|
|
131
|
+
},
|
|
132
|
+
poll,
|
|
133
|
+
stop() {
|
|
134
|
+
if (stopped) return
|
|
135
|
+
stopped = true
|
|
136
|
+
try { sched.clearInterval(handle) } catch { /* ignore */ }
|
|
137
|
+
subscribers.clear()
|
|
138
|
+
},
|
|
139
|
+
}
|
|
140
|
+
}
|