switchroom 0.5.0 → 0.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +142 -121
- package/bin/autoaccept.exp +29 -6
- package/dist/agent-scheduler/index.js +12261 -0
- package/dist/cli/autoaccept-poll.js +10 -0
- package/dist/cli/switchroom.js +27250 -25324
- package/dist/vault/approvals/kernel-server.js +12709 -0
- package/dist/vault/broker/server.js +15724 -0
- package/package.json +4 -3
- package/profiles/_base/start.sh.hbs +133 -0
- package/profiles/_shared/telegram-style.md.hbs +3 -3
- package/profiles/default/CLAUDE.md +3 -3
- package/profiles/default/CLAUDE.md.hbs +2 -2
- package/profiles/default/workspace/CLAUDE.md.hbs +9 -0
- package/skills/docx/VENDORED.md +1 -1
- package/skills/mcp-builder/VENDORED.md +1 -1
- package/skills/pdf/VENDORED.md +1 -1
- package/skills/pptx/VENDORED.md +1 -1
- package/skills/skill-creator/VENDORED.md +1 -1
- package/skills/switchroom-architecture/SKILL.md +8 -7
- package/skills/switchroom-cli/SKILL.md +23 -15
- package/skills/switchroom-health/SKILL.md +7 -7
- package/skills/switchroom-install/SKILL.md +36 -39
- package/skills/switchroom-manage/SKILL.md +4 -4
- package/skills/switchroom-status/SKILL.md +1 -1
- package/skills/webapp-testing/VENDORED.md +1 -1
- package/skills/xlsx/VENDORED.md +1 -1
- package/telegram-plugin/admin-commands/dispatch.test.ts +119 -1
- package/telegram-plugin/admin-commands/index.ts +71 -0
- package/telegram-plugin/ask-user.ts +1 -0
- package/telegram-plugin/card-event-log.ts +138 -0
- package/telegram-plugin/dist/bridge/bridge.js +178 -31
- package/telegram-plugin/dist/foreman/foreman.js +6875 -6526
- package/telegram-plugin/dist/gateway/gateway.js +13862 -11834
- package/telegram-plugin/dist/server.js +202 -40
- package/telegram-plugin/fleet-state.ts +25 -10
- package/telegram-plugin/foreman/foreman.ts +38 -3
- package/telegram-plugin/gateway/approval-callback.ts +126 -0
- package/telegram-plugin/gateway/approval-card.test.ts +90 -0
- package/telegram-plugin/gateway/approval-card.ts +127 -0
- package/telegram-plugin/gateway/approvals-commands.ts +126 -0
- package/telegram-plugin/gateway/boot-card.ts +31 -6
- package/telegram-plugin/gateway/boot-probes.ts +503 -72
- package/telegram-plugin/gateway/gateway.ts +822 -94
- package/telegram-plugin/gateway/ipc-protocol.ts +34 -1
- package/telegram-plugin/gateway/ipc-server.ts +35 -0
- package/telegram-plugin/gateway/startup-mutex.ts +110 -2
- package/telegram-plugin/hooks/hooks.json +19 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +216 -0
- package/telegram-plugin/hooks/tool-label-stop.mjs +63 -0
- package/telegram-plugin/package.json +4 -1
- package/telegram-plugin/plugin-logger.ts +20 -1
- package/telegram-plugin/progress-card-driver.ts +202 -13
- package/telegram-plugin/progress-card.ts +2 -2
- package/telegram-plugin/quota-check.ts +1 -0
- package/telegram-plugin/registry/subagents-schema.ts +37 -0
- package/telegram-plugin/registry/subagents.test.ts +64 -0
- package/telegram-plugin/session-tail.ts +58 -5
- package/telegram-plugin/shared/bot-runtime.ts +48 -2
- package/telegram-plugin/subagent-watcher.ts +139 -7
- package/telegram-plugin/tests/_progress-card-harness.ts +4 -0
- package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +201 -0
- package/telegram-plugin/tests/boot-card-probe-target.test.ts +10 -34
- package/telegram-plugin/tests/boot-card-render.test.ts +6 -5
- package/telegram-plugin/tests/boot-probes.test.ts +558 -0
- package/telegram-plugin/tests/card-event-log.test.ts +145 -0
- package/telegram-plugin/tests/gateway-startup-mutex.test.ts +102 -0
- package/telegram-plugin/tests/ipc-server-validate-inject-inbound.test.ts +134 -0
- package/telegram-plugin/tests/progress-card-delay-842.test.ts +160 -0
- package/telegram-plugin/tests/quota-check.test.ts +37 -1
- package/telegram-plugin/tests/subagent-registry-bugs.test.ts +5 -0
- package/telegram-plugin/tests/subagent-watcher-stall-notification.test.ts +104 -1
- package/telegram-plugin/tests/subagent-watcher.test.ts +5 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +114 -0
- package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +5 -3
- package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +10 -0
- package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +58 -14
- package/telegram-plugin/tests/welcome-text.test.ts +57 -0
- package/telegram-plugin/tool-label-sidecar.ts +140 -0
- package/telegram-plugin/tool-labels.ts +55 -0
- package/telegram-plugin/two-zone-card.ts +27 -7
- package/telegram-plugin/uat/SETUP.md +160 -0
- package/telegram-plugin/uat/assertions.ts +140 -0
- package/telegram-plugin/uat/driver.ts +174 -0
- package/telegram-plugin/uat/harness.ts +161 -0
- package/telegram-plugin/uat/login.ts +134 -0
- package/telegram-plugin/uat/port-allocator.ts +71 -0
- package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +61 -0
- package/telegram-plugin/welcome-text.ts +44 -2
- package/bin/bridge-watchdog.sh +0 -967
|
@@ -25,9 +25,54 @@ import { GrammyError, type Bot, type Context } from 'grammy'
|
|
|
25
25
|
import { run, type RunnerHandle } from '@grammyjs/runner'
|
|
26
26
|
import { execFileSync, spawnSync } from 'child_process'
|
|
27
27
|
import { createHash } from 'crypto'
|
|
28
|
+
import { AsyncLocalStorage } from 'async_hooks'
|
|
28
29
|
import { clearStaleTelegramPollingState } from '../startup-reset.js'
|
|
29
30
|
import { createRetryApiCall } from '../retry-api-call.js'
|
|
30
31
|
|
|
32
|
+
// ─── tg-post tag plumbing ─────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Per-call tag context for `tg-post` log lines. Callers wrap a Telegram
|
|
36
|
+
* API invocation in `withTgPostTags({ turnKey, cardMessageId, ... }, () => ...)`
|
|
37
|
+
* and the transformer reads the tags off the active store and appends them
|
|
38
|
+
* `key=value` after the existing fields. Used to correlate progress-card
|
|
39
|
+
* sends/edits to a turnKey + cardMessageId in days-old session audits.
|
|
40
|
+
*
|
|
41
|
+
* Untagged callers are unaffected — when no store is active, no tag fields
|
|
42
|
+
* are emitted and the existing log shape is byte-for-byte unchanged.
|
|
43
|
+
*/
|
|
44
|
+
export type TgPostTags = Record<string, string | number>
|
|
45
|
+
|
|
46
|
+
const tgPostTagStore = new AsyncLocalStorage<TgPostTags>()
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Run `fn` with the given tags attached to any `tg-post` lines emitted from
|
|
50
|
+
* the inner Telegram API calls. Tags are inherited across awaits within
|
|
51
|
+
* the same async chain (AsyncLocalStorage semantics). Pass an empty record
|
|
52
|
+
* or omit tags entirely to fall back to the untagged shape.
|
|
53
|
+
*/
|
|
54
|
+
export function withTgPostTags<T>(tags: TgPostTags, fn: () => T): T {
|
|
55
|
+
return tgPostTagStore.run(tags, fn)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Exposed for the transformer (and tests). Returns undefined when no store is active. */
|
|
59
|
+
export function _getTgPostTags(): TgPostTags | undefined {
|
|
60
|
+
return tgPostTagStore.getStore()
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function formatTgPostTags(tags: TgPostTags | undefined): string {
|
|
64
|
+
if (!tags) return ''
|
|
65
|
+
const parts: string[] = []
|
|
66
|
+
for (const [k, v] of Object.entries(tags)) {
|
|
67
|
+
if (v == null) continue
|
|
68
|
+
// Sanitise: tag values land in a single-line space-separated log
|
|
69
|
+
// record. Strip whitespace + collapse to keep grep happy.
|
|
70
|
+
const s = String(v).replace(/\s+/g, '_')
|
|
71
|
+
parts.push(`${k}=${s}`)
|
|
72
|
+
}
|
|
73
|
+
return parts.length > 0 ? ' ' + parts.join(' ') : ''
|
|
74
|
+
}
|
|
75
|
+
|
|
31
76
|
// ─── tg-post observability transformer ────────────────────────────────────
|
|
32
77
|
|
|
33
78
|
/**
|
|
@@ -64,10 +109,11 @@ export function installTgPostLogger(bot: Bot): void {
|
|
|
64
109
|
const hash = bytes > 0
|
|
65
110
|
? createHash('sha1').update(text).digest('hex').slice(0, 12)
|
|
66
111
|
: '-'
|
|
112
|
+
const tagSuffix = formatTgPostTags(_getTgPostTags())
|
|
67
113
|
try {
|
|
68
114
|
const res = await prev(method, payload, signal)
|
|
69
115
|
process.stderr.write(
|
|
70
|
-
`tg-post method=${method} chat=${chat} thread=${thread} parse_mode=${parseMode} bytes=${bytes} hash=${hash} status=ok err=- code=- desc
|
|
116
|
+
`tg-post method=${method} chat=${chat} thread=${thread} parse_mode=${parseMode} bytes=${bytes} hash=${hash} status=ok err=- code=- desc=-${tagSuffix}\n`,
|
|
71
117
|
)
|
|
72
118
|
return res
|
|
73
119
|
} catch (err) {
|
|
@@ -85,7 +131,7 @@ export function installTgPostLogger(bot: Bot): void {
|
|
|
85
131
|
? rawDesc.replace(/\s+/g, ' ').slice(0, 80).replace(/[\r\n]/g, ' ') || '-'
|
|
86
132
|
: '-'
|
|
87
133
|
process.stderr.write(
|
|
88
|
-
`tg-post method=${method} chat=${chat} thread=${thread} parse_mode=${parseMode} bytes=${bytes} hash=${hash} status=err err=${errClass} code=${code} desc=${desc}\n`,
|
|
134
|
+
`tg-post method=${method} chat=${chat} thread=${thread} parse_mode=${parseMode} bytes=${bytes} hash=${hash} status=err err=${errClass} code=${code} desc=${desc}${tagSuffix}\n`,
|
|
89
135
|
)
|
|
90
136
|
throw err
|
|
91
137
|
}
|
|
@@ -43,7 +43,7 @@ import { homedir } from 'os'
|
|
|
43
43
|
import { projectSubagentLine } from './session-tail.js'
|
|
44
44
|
import { sanitiseToolArg } from './fleet-state.js'
|
|
45
45
|
import { escapeHtml, truncate } from './card-format.js'
|
|
46
|
-
import { bumpSubagentActivity, recordSubagentStall, recordSubagentEnd, reapStuckRunningRows } from './registry/subagents-schema.js'
|
|
46
|
+
import { bumpSubagentActivity, recordSubagentStall, recordSubagentResume, recordSubagentEnd, reapStuckRunningRows } from './registry/subagents-schema.js'
|
|
47
47
|
import { touchTurnActiveMarker } from './gateway/turn-active-marker.js'
|
|
48
48
|
|
|
49
49
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
|
@@ -119,10 +119,24 @@ export interface SubagentWatcherConfig {
|
|
|
119
119
|
*/
|
|
120
120
|
rescanMs?: number
|
|
121
121
|
/**
|
|
122
|
-
* How long without JSONL activity before a worker is considered stalled
|
|
123
|
-
* Default
|
|
122
|
+
* How long without JSONL activity before a worker is considered stalled
|
|
123
|
+
* **once at least one tool has been used**. Default 60_000ms. Tool-call
|
|
124
|
+
* loops emit JSONL events frequently, so 60s of silence in that phase
|
|
125
|
+
* is a strong signal the sub-agent is stuck on a single tool.
|
|
124
126
|
*/
|
|
125
127
|
stallThresholdMs?: number
|
|
128
|
+
/**
|
|
129
|
+
* Stall threshold (ms) used **before any tool has been used** —
|
|
130
|
+
* "silent synthesis" mode where the model is composing a response without
|
|
131
|
+
* emitting events yet. Long-running plan / synthesis sub-agents commonly
|
|
132
|
+
* spend 2-5 minutes in this state legitimately, so the active-loop
|
|
133
|
+
* threshold (60s) misfires. Default 300_000 (5 min).
|
|
134
|
+
*
|
|
135
|
+
* The watcher selects between this and `stallThresholdMs` per-entry
|
|
136
|
+
* based on `entry.toolCount`: 0 ⇒ silent synthesis, ≥1 ⇒ active loop.
|
|
137
|
+
* Both can be overridden for tests.
|
|
138
|
+
*/
|
|
139
|
+
silentSynthesisStallThresholdMs?: number
|
|
126
140
|
/**
|
|
127
141
|
* Reaper TTL (ms): background rows in `status='running'` whose
|
|
128
142
|
* `last_activity_at` (or `started_at` if liveness never wrote) is older
|
|
@@ -171,6 +185,39 @@ export interface SubagentWatcherConfig {
|
|
|
171
185
|
* the same sub-agent across subsequent poll ticks.
|
|
172
186
|
*/
|
|
173
187
|
onStall?: (agentId: string, idleMs: number, description: string) => void
|
|
188
|
+
/**
|
|
189
|
+
* Symmetric to `onStall`: fires when a previously-stalled sub-agent's
|
|
190
|
+
* JSONL grows again (text emission, tool use, turn_end — anything that
|
|
191
|
+
* moves last_activity_at). Wired to `progressDriver.onSubAgentUnstall`
|
|
192
|
+
* in gateway.ts so the pinned card clears the ⚠ Stalled badge as soon
|
|
193
|
+
* as activity resumes, instead of waiting on the next render tick.
|
|
194
|
+
*
|
|
195
|
+
* Each stall→resume cycle fires exactly once: the watcher resets
|
|
196
|
+
* `entry.stallNotified` on resume, so a sub-agent that stalls again
|
|
197
|
+
* later in the same lifetime is detected (and reported) again.
|
|
198
|
+
*/
|
|
199
|
+
onUnstall?: (agentId: string, description: string) => void
|
|
200
|
+
/**
|
|
201
|
+
* Called exactly once per sub-agent when its watcher observes a terminal
|
|
202
|
+
* transition (`done` or `failed`). Mirrors the existing `sub_agent_started`
|
|
203
|
+
* surface (emitted from session-tail) so the audit trail is symmetric.
|
|
204
|
+
*
|
|
205
|
+
* `outcome`:
|
|
206
|
+
* - 'completed' — the JSONL contained a `turn_duration` line.
|
|
207
|
+
* - 'failed' — reserved (no caller flips state to 'failed' today).
|
|
208
|
+
* - 'orphan' — the entry was historical at boot and its terminal
|
|
209
|
+
* transition fires after watcher startup. (Pre-existing
|
|
210
|
+
* `done` files at boot do NOT fire — see registerAgent.)
|
|
211
|
+
* Background-vs-foreground classification is the gateway's call (it owns
|
|
212
|
+
* the registry DB); the watcher just reports the lifecycle.
|
|
213
|
+
*/
|
|
214
|
+
onFinish?: (args: {
|
|
215
|
+
agentId: string
|
|
216
|
+
state: WorkerState
|
|
217
|
+
outcome: 'completed' | 'failed' | 'orphan'
|
|
218
|
+
toolCount: number
|
|
219
|
+
durationMs: number
|
|
220
|
+
}) => void
|
|
174
221
|
/** `Date.now` override for tests. */
|
|
175
222
|
now?: () => number
|
|
176
223
|
/** `setInterval` override for tests. */
|
|
@@ -205,6 +252,11 @@ export interface SubagentWatcherHandle {
|
|
|
205
252
|
|
|
206
253
|
const DEFAULT_RESCAN_MS = 1000
|
|
207
254
|
const DEFAULT_STALL_THRESHOLD_MS = 60_000
|
|
255
|
+
/** Silent-synthesis threshold (no tools used yet). 5min covers plan /
|
|
256
|
+
* research sub-agents that legitimately think for several minutes
|
|
257
|
+
* before emitting their first event — the 60s active-loop threshold
|
|
258
|
+
* misfires on those and freezes the card at ⚠. */
|
|
259
|
+
const DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS = 300_000
|
|
208
260
|
const DEFAULT_REAPER_TTL_MS = 60 * 60_000 // 1 hour
|
|
209
261
|
const DEFAULT_REAPER_INTERVAL_MS = 15 * 60_000 // 15 minutes
|
|
210
262
|
/**
|
|
@@ -317,6 +369,10 @@ function readSubTail(
|
|
|
317
369
|
log?: (msg: string) => void,
|
|
318
370
|
db?: SubagentLivenessDb | null,
|
|
319
371
|
parentStateDir?: string | null,
|
|
372
|
+
/** Fires when the watcher observes JSONL activity returning for a
|
|
373
|
+
* previously-stalled entry. Closes the resume edge the schema doc
|
|
374
|
+
* has always promised. */
|
|
375
|
+
onUnstall?: (agentId: string, description: string) => void,
|
|
320
376
|
): void {
|
|
321
377
|
try {
|
|
322
378
|
const stat = fs.statSync(entry.filePath)
|
|
@@ -390,7 +446,39 @@ function readSubTail(
|
|
|
390
446
|
if (!line) continue
|
|
391
447
|
const events = projectSubagentLine(line, entry.agentId, startState)
|
|
392
448
|
for (const ev of events) {
|
|
449
|
+
const idleSecBeforeBump = Math.round((now - entry.lastActivityAt) / 1000)
|
|
393
450
|
entry.lastActivityAt = now
|
|
451
|
+
// Un-stall transition (#previously-missing). The schema doc
|
|
452
|
+
// promised "stalled → running (may resume)" but neither the
|
|
453
|
+
// in-memory `stallNotified` flag nor the DB `status` column was
|
|
454
|
+
// ever flipped back. That left the pinned card stuck at ⚠ until
|
|
455
|
+
// terminal completion, by which point the user had often
|
|
456
|
+
// already interrupted or redispatched. Reset both halves on the
|
|
457
|
+
// first activity tick after a stall + fire onUnstall for the
|
|
458
|
+
// driver to clear its render-time badge.
|
|
459
|
+
if (entry.stallNotified) {
|
|
460
|
+
entry.stallNotified = false
|
|
461
|
+
if (db != null) {
|
|
462
|
+
try {
|
|
463
|
+
const rowRef = db
|
|
464
|
+
.prepare('SELECT id FROM subagents WHERE jsonl_agent_id = ?')
|
|
465
|
+
.get(entry.agentId) as { id: string } | null
|
|
466
|
+
if (rowRef != null) {
|
|
467
|
+
recordSubagentResume(db, { id: rowRef.id, resumedAt: now })
|
|
468
|
+
}
|
|
469
|
+
} catch (dbErr) {
|
|
470
|
+
log?.(`subagent-watcher: resume DB write error ${entry.agentId}: ${(dbErr as Error).message}`)
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
if (onUnstall != null) {
|
|
474
|
+
try {
|
|
475
|
+
onUnstall(entry.agentId, entry.description)
|
|
476
|
+
} catch (cbErr) {
|
|
477
|
+
log?.(`subagent-watcher: onUnstall callback error ${entry.agentId}: ${(cbErr as Error).message}`)
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
log?.(`subagent-watcher: stall cleared for ${entry.agentId} (activity resumed after ${idleSecBeforeBump}s — re-arming detection)`)
|
|
481
|
+
}
|
|
394
482
|
if (ev.kind === 'sub_agent_tool_use') {
|
|
395
483
|
entry.toolCount++
|
|
396
484
|
// P0 of #662: surface the most recent tool name + sanitised
|
|
@@ -446,6 +534,8 @@ function readSubTail(
|
|
|
446
534
|
export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWatcherHandle {
|
|
447
535
|
const agentDir = config.agentDir
|
|
448
536
|
const stallThresholdMs = config.stallThresholdMs ?? DEFAULT_STALL_THRESHOLD_MS
|
|
537
|
+
const silentSynthesisStallThresholdMs =
|
|
538
|
+
config.silentSynthesisStallThresholdMs ?? DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS
|
|
449
539
|
const reaperTtlMs = config.reaperTtlMs ?? DEFAULT_REAPER_TTL_MS
|
|
450
540
|
const reaperIntervalMs = config.reaperIntervalMs ?? DEFAULT_REAPER_INTERVAL_MS
|
|
451
541
|
const rescanMs = config.rescanMs ?? DEFAULT_RESCAN_MS
|
|
@@ -562,7 +652,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
562
652
|
// Initial read
|
|
563
653
|
readSubTail(entry, tail, n, (desc) => {
|
|
564
654
|
log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
|
|
565
|
-
}, fs, log, db, parentStateDir)
|
|
655
|
+
}, fs, log, db, parentStateDir, config.onUnstall)
|
|
566
656
|
|
|
567
657
|
// If the JSONL already contained a turn_end at registration time
|
|
568
658
|
// (file written-then-watched), fire the state-transition + completion
|
|
@@ -593,7 +683,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
593
683
|
if (!entry || !t) return
|
|
594
684
|
readSubTail(entry, t, nowFn(), (desc) => {
|
|
595
685
|
log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
|
|
596
|
-
}, fs, log, db, parentStateDir)
|
|
686
|
+
}, fs, log, db, parentStateDir, config.onUnstall)
|
|
597
687
|
maybySendStateTransition(agentId)
|
|
598
688
|
})
|
|
599
689
|
} catch (err) {
|
|
@@ -619,11 +709,43 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
619
709
|
} catch (err) {
|
|
620
710
|
log?.(`subagent-watcher: completion notification error: ${(err as Error).message}`)
|
|
621
711
|
}
|
|
712
|
+
// Symmetric `sub_agent_finished` surface (#card-audit-log). Emit
|
|
713
|
+
// before the deferred cleanup runs so the callback always sees a
|
|
714
|
+
// live registry entry. Historical entries that already-completed at
|
|
715
|
+
// boot get their `completionNotified=true` shortcut in registerAgent
|
|
716
|
+
// and skip this path entirely — only post-boot transitions fire.
|
|
717
|
+
if (config.onFinish) {
|
|
718
|
+
try {
|
|
719
|
+
config.onFinish({
|
|
720
|
+
agentId,
|
|
721
|
+
state: entry.state,
|
|
722
|
+
outcome: entry.historical ? 'orphan' : 'completed',
|
|
723
|
+
toolCount: entry.toolCount,
|
|
724
|
+
durationMs: nowFn() - entry.dispatchedAt,
|
|
725
|
+
})
|
|
726
|
+
} catch (cbErr) {
|
|
727
|
+
log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
|
|
728
|
+
}
|
|
729
|
+
}
|
|
622
730
|
scheduleTerminalCleanup(agentId)
|
|
623
731
|
}
|
|
624
732
|
// Defensive: if state ever flips to 'failed' (currently no caller
|
|
625
733
|
// sets this, but the type allows it), still clean up the FSWatcher.
|
|
626
734
|
if (entry.state === 'failed') {
|
|
735
|
+
if (config.onFinish && !entry.completionNotified) {
|
|
736
|
+
entry.completionNotified = true
|
|
737
|
+
try {
|
|
738
|
+
config.onFinish({
|
|
739
|
+
agentId,
|
|
740
|
+
state: entry.state,
|
|
741
|
+
outcome: 'failed',
|
|
742
|
+
toolCount: entry.toolCount,
|
|
743
|
+
durationMs: nowFn() - entry.dispatchedAt,
|
|
744
|
+
})
|
|
745
|
+
} catch (cbErr) {
|
|
746
|
+
log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
|
|
747
|
+
}
|
|
748
|
+
}
|
|
627
749
|
scheduleTerminalCleanup(agentId)
|
|
628
750
|
}
|
|
629
751
|
}
|
|
@@ -678,7 +800,17 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
678
800
|
if (entry.historical) continue
|
|
679
801
|
if (entry.stallNotified) continue
|
|
680
802
|
const idleMs = n - entry.lastActivityAt
|
|
681
|
-
|
|
803
|
+
// Adaptive: a sub-agent that hasn't fired any tools yet is in
|
|
804
|
+
// "silent synthesis" mode (model thinking before its first emit).
|
|
805
|
+
// 60s is way too aggressive for plan / research sub-agents that
|
|
806
|
+
// legitimately spend 2-5 minutes composing before their first
|
|
807
|
+
// tool_use. Once tools have started, switch to the tighter loop
|
|
808
|
+
// threshold — frequent JSONL writes mean 60s of silence is a
|
|
809
|
+
// strong signal the sub-agent is genuinely stuck.
|
|
810
|
+
const threshold = entry.toolCount === 0
|
|
811
|
+
? silentSynthesisStallThresholdMs
|
|
812
|
+
: stallThresholdMs
|
|
813
|
+
if (idleMs >= threshold) {
|
|
682
814
|
entry.stallNotified = true
|
|
683
815
|
const desc = escapeHtml(truncate(entry.description, 80))
|
|
684
816
|
const idleSec = Math.floor(idleMs / 1000)
|
|
@@ -807,7 +939,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
|
|
|
807
939
|
if (!tail) continue
|
|
808
940
|
readSubTail(entry, tail, n, (desc) => {
|
|
809
941
|
log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
|
|
810
|
-
}, fs, log, db, parentStateDir)
|
|
942
|
+
}, fs, log, db, parentStateDir, config.onUnstall)
|
|
811
943
|
maybySendStateTransition(agentId)
|
|
812
944
|
}
|
|
813
945
|
|
|
@@ -21,6 +21,7 @@ export interface HarnessOpts {
|
|
|
21
21
|
minIntervalMs?: number
|
|
22
22
|
coalesceMs?: number
|
|
23
23
|
initialDelayMs?: number
|
|
24
|
+
initialDelayMsBackground?: number
|
|
24
25
|
heartbeatMs?: number
|
|
25
26
|
maxIdleMs?: number
|
|
26
27
|
deferredCompletionTimeoutMs?: number
|
|
@@ -43,6 +44,9 @@ export function makeHarness(opts: HarnessOpts = {}): DriverHarness {
|
|
|
43
44
|
minIntervalMs: opts.minIntervalMs ?? 0,
|
|
44
45
|
coalesceMs: opts.coalesceMs ?? 0,
|
|
45
46
|
initialDelayMs: opts.initialDelayMs ?? 0,
|
|
47
|
+
...(opts.initialDelayMsBackground != null
|
|
48
|
+
? { initialDelayMsBackground: opts.initialDelayMsBackground }
|
|
49
|
+
: {}),
|
|
46
50
|
heartbeatMs: opts.heartbeatMs ?? 1_000,
|
|
47
51
|
maxIdleMs: opts.maxIdleMs ?? 30_000,
|
|
48
52
|
deferredCompletionTimeoutMs: opts.deferredCompletionTimeoutMs ?? 10_000,
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression tests for #757 — progress card goes silent for background
|
|
3
|
+
* Agent workers (run_in_background: true).
|
|
4
|
+
*
|
|
5
|
+
* Root cause: `applyToolUse` in fleet-state.ts only promoted `stuck →
|
|
6
|
+
* running`; background members stayed at `status: 'background'` even
|
|
7
|
+
* while actively running tools. The fleet row rendered ⏸ idle instead
|
|
8
|
+
* of ↻ + last-tool, so the card appeared frozen.
|
|
9
|
+
*
|
|
10
|
+
* Fix: applyToolUse now also promotes `background → running` on the
|
|
11
|
+
* first live tool event. A separate sticky `isBackgroundDispatch` flag
|
|
12
|
+
* preserves the background-carry semantics used by hasLiveBackground
|
|
13
|
+
* (keeps PerChatState alive past parent turn_end until bg member
|
|
14
|
+
* reaches terminal status).
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { describe, it, expect } from 'vitest'
|
|
18
|
+
import { createProgressDriver } from '../progress-card-driver.js'
|
|
19
|
+
import { applyToolUse, createFleetMember, hasLiveBackground } from '../fleet-state.js'
|
|
20
|
+
import type { SessionEvent } from '../session-tail.js'
|
|
21
|
+
|
|
22
|
+
const T0 = 1_700_000_000_000
|
|
23
|
+
|
|
24
|
+
// ─── Pure-function unit tests ────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
describe('applyToolUse: background → running promotion (#757)', () => {
|
|
27
|
+
it('promotes background to running on first tool event', () => {
|
|
28
|
+
const m = { ...createFleetMember({ agentId: 'a', role: 'worker', startedAt: T0, originatingTurnKey: 'k', isBackgroundDispatch: true }), status: 'background' as const }
|
|
29
|
+
const after = applyToolUse(m, 'Read', { file_path: '/foo/bar.ts' }, T0 + 1000)
|
|
30
|
+
expect(after.status).toBe('running')
|
|
31
|
+
expect(after.lastTool?.name).toBe('Read')
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
it('preserves isBackgroundDispatch after promotion', () => {
|
|
35
|
+
const m = { ...createFleetMember({ agentId: 'a', role: 'worker', startedAt: T0, originatingTurnKey: 'k', isBackgroundDispatch: true }), status: 'background' as const }
|
|
36
|
+
const after = applyToolUse(m, 'Bash', { command: 'ls' }, T0 + 1000)
|
|
37
|
+
expect(after.isBackgroundDispatch).toBe(true)
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
it('does not affect foreground members (status stays running)', () => {
|
|
41
|
+
const m = createFleetMember({ agentId: 'a', role: 'worker', startedAt: T0, originatingTurnKey: 'k' })
|
|
42
|
+
const after = applyToolUse(m, 'Read', { file_path: '/x' }, T0 + 1000)
|
|
43
|
+
expect(after.status).toBe('running')
|
|
44
|
+
expect(after.isBackgroundDispatch).toBe(false)
|
|
45
|
+
})
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
describe('hasLiveBackground: sticky flag survives status promotion (#757)', () => {
|
|
49
|
+
it('returns true when background member is promoted to running (not yet terminal)', () => {
|
|
50
|
+
const fleet = new Map([
|
|
51
|
+
['a', { ...createFleetMember({ agentId: 'a', role: 'w', startedAt: T0, originatingTurnKey: 'k', isBackgroundDispatch: true }), status: 'running' as const }],
|
|
52
|
+
])
|
|
53
|
+
expect(hasLiveBackground(fleet)).toBe(true)
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
it('returns false when background member reaches terminal status', () => {
|
|
57
|
+
const fleet = new Map([
|
|
58
|
+
['a', { ...createFleetMember({ agentId: 'a', role: 'w', startedAt: T0, originatingTurnKey: 'k', isBackgroundDispatch: true }), status: 'done' as const, terminalAt: T0 + 5000 }],
|
|
59
|
+
])
|
|
60
|
+
expect(hasLiveBackground(fleet)).toBe(false)
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
it('returns false when no members are background dispatches', () => {
|
|
64
|
+
const fleet = new Map([
|
|
65
|
+
['a', createFleetMember({ agentId: 'a', role: 'w', startedAt: T0, originatingTurnKey: 'k' })],
|
|
66
|
+
])
|
|
67
|
+
expect(hasLiveBackground(fleet)).toBe(false)
|
|
68
|
+
})
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
// ─── Integration: driver-level lifecycle ─────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
function harness() {
|
|
74
|
+
let now = 1000
|
|
75
|
+
const timers: Array<{ fireAt: number; fn: () => void; ref: number; repeat?: number }> = []
|
|
76
|
+
let nextRef = 0
|
|
77
|
+
const completions: string[] = []
|
|
78
|
+
const driver = createProgressDriver({
|
|
79
|
+
emit: () => {},
|
|
80
|
+
minIntervalMs: 500,
|
|
81
|
+
coalesceMs: 400,
|
|
82
|
+
initialDelayMs: 0,
|
|
83
|
+
promoteAfterMs: 999_999,
|
|
84
|
+
onTurnComplete: (s) => completions.push(s.turnKey),
|
|
85
|
+
now: () => now,
|
|
86
|
+
setTimeout: (fn, ms) => {
|
|
87
|
+
const ref = nextRef++
|
|
88
|
+
timers.push({ fireAt: now + ms, fn, ref })
|
|
89
|
+
return { ref }
|
|
90
|
+
},
|
|
91
|
+
clearTimeout: (h) => {
|
|
92
|
+
const ref = (h as { ref: number }).ref
|
|
93
|
+
const idx = timers.findIndex((t) => t.ref === ref)
|
|
94
|
+
if (idx !== -1) timers.splice(idx, 1)
|
|
95
|
+
},
|
|
96
|
+
setInterval: (fn, ms) => {
|
|
97
|
+
const ref = nextRef++
|
|
98
|
+
timers.push({ fireAt: now + ms, fn, ref, repeat: ms })
|
|
99
|
+
return { ref }
|
|
100
|
+
},
|
|
101
|
+
clearInterval: (h) => {
|
|
102
|
+
const ref = (h as { ref: number }).ref
|
|
103
|
+
const idx = timers.findIndex((t) => t.ref === ref)
|
|
104
|
+
if (idx !== -1) timers.splice(idx, 1)
|
|
105
|
+
},
|
|
106
|
+
})
|
|
107
|
+
function advance(ms: number) {
|
|
108
|
+
const target = now + ms
|
|
109
|
+
while (true) {
|
|
110
|
+
const due = timers.filter((t) => t.fireAt <= target).sort((a, b) => a.fireAt - b.fireAt)
|
|
111
|
+
if (due.length === 0) break
|
|
112
|
+
const t = due[0]
|
|
113
|
+
now = t.fireAt
|
|
114
|
+
t.fn()
|
|
115
|
+
if (t.repeat) t.fireAt = now + t.repeat
|
|
116
|
+
else timers.splice(timers.indexOf(t), 1)
|
|
117
|
+
}
|
|
118
|
+
now = target
|
|
119
|
+
}
|
|
120
|
+
return { driver, completions, advance, getNow: () => now }
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const enqueue = (chatId: string): SessionEvent => ({
|
|
124
|
+
kind: 'enqueue',
|
|
125
|
+
chatId,
|
|
126
|
+
messageId: '1',
|
|
127
|
+
threadId: null,
|
|
128
|
+
rawContent: `<channel chat_id="${chatId}">go</channel>`,
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
describe('driver integration: bg worker tool activity (#757)', () => {
|
|
132
|
+
it('background fleet member promotes to running when tool events arrive', () => {
|
|
133
|
+
const { driver } = harness()
|
|
134
|
+
const CHAT = 'c1'
|
|
135
|
+
driver.ingest(enqueue(CHAT), null)
|
|
136
|
+
driver.ingest(
|
|
137
|
+
{ kind: 'tool_use', toolName: 'Agent', toolUseId: 'tu1', input: { prompt: 'bg work', run_in_background: true } },
|
|
138
|
+
CHAT,
|
|
139
|
+
)
|
|
140
|
+
driver.ingest({ kind: 'sub_agent_started', agentId: 'sa1', firstPromptText: 'bg work' }, CHAT)
|
|
141
|
+
|
|
142
|
+
// Initial state: background.
|
|
143
|
+
expect(driver.peekFleet(CHAT)!.get('sa1')!.status).toBe('background')
|
|
144
|
+
|
|
145
|
+
// Tool activity arrives from the sub-agent JSONL.
|
|
146
|
+
driver.ingest({ kind: 'sub_agent_tool_use', agentId: 'sa1', toolUseId: 't1', toolName: 'Bash', input: { command: 'npm test' } }, CHAT)
|
|
147
|
+
|
|
148
|
+
const m = driver.peekFleet(CHAT)!.get('sa1')!
|
|
149
|
+
// Promoted to running — card now shows active tool work.
|
|
150
|
+
expect(m.status).toBe('running')
|
|
151
|
+
expect(m.lastTool?.name).toBe('Bash')
|
|
152
|
+
// Sticky flag preserved — bg-carry still works.
|
|
153
|
+
expect(m.isBackgroundDispatch).toBe(true)
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
it('background carry survives promotion: turn completion holds until bg reaches terminal', () => {
|
|
157
|
+
const { driver, completions } = harness()
|
|
158
|
+
const CHAT = 'c2'
|
|
159
|
+
driver.ingest(enqueue(CHAT), null)
|
|
160
|
+
driver.ingest(
|
|
161
|
+
{ kind: 'tool_use', toolName: 'Agent', toolUseId: 'tu1', input: { prompt: 'bg', run_in_background: true } },
|
|
162
|
+
CHAT,
|
|
163
|
+
)
|
|
164
|
+
driver.ingest({ kind: 'sub_agent_started', agentId: 'sa1', firstPromptText: 'bg' }, CHAT)
|
|
165
|
+
// Bg worker starts doing tool work — status becomes running.
|
|
166
|
+
driver.ingest({ kind: 'sub_agent_tool_use', agentId: 'sa1', toolUseId: 't1', toolName: 'Read', input: { file_path: '/a' } }, CHAT)
|
|
167
|
+
expect(driver.peekFleet(CHAT)!.get('sa1')!.status).toBe('running')
|
|
168
|
+
|
|
169
|
+
driver.ingest({ kind: 'tool_use', toolName: 'mcp__switchroom-telegram__reply' }, CHAT)
|
|
170
|
+
driver.recordOutboundDelivered(CHAT)
|
|
171
|
+
// Parent ends while bg worker is still running.
|
|
172
|
+
driver.ingest({ kind: 'turn_end', durationMs: 100 }, CHAT)
|
|
173
|
+
|
|
174
|
+
// Turn completion must NOT fire — bg worker is still active.
|
|
175
|
+
expect(completions.length).toBe(0)
|
|
176
|
+
|
|
177
|
+
// Bg worker finishes.
|
|
178
|
+
driver.ingest({ kind: 'sub_agent_turn_end', agentId: 'sa1' }, CHAT)
|
|
179
|
+
expect(completions.length).toBe(1)
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
it('terminal state reached after promotion fires completion correctly', () => {
|
|
183
|
+
const { driver, completions } = harness()
|
|
184
|
+
const CHAT = 'c3'
|
|
185
|
+
driver.ingest(enqueue(CHAT), null)
|
|
186
|
+
driver.ingest(
|
|
187
|
+
{ kind: 'tool_use', toolName: 'Agent', toolUseId: 'tu1', input: { prompt: 'bg', run_in_background: true } },
|
|
188
|
+
CHAT,
|
|
189
|
+
)
|
|
190
|
+
driver.ingest({ kind: 'sub_agent_started', agentId: 'sa1', firstPromptText: 'bg' }, CHAT)
|
|
191
|
+
driver.ingest({ kind: 'sub_agent_tool_use', agentId: 'sa1', toolUseId: 't1', toolName: 'Write', input: { file_path: '/out.ts' } }, CHAT)
|
|
192
|
+
|
|
193
|
+
driver.ingest({ kind: 'tool_use', toolName: 'mcp__switchroom-telegram__reply' }, CHAT)
|
|
194
|
+
driver.recordOutboundDelivered(CHAT)
|
|
195
|
+
driver.ingest({ kind: 'turn_end', durationMs: 100 }, CHAT)
|
|
196
|
+
// Peek before sub_agent_turn_end so fleet is still live.
|
|
197
|
+
expect(driver.peekFleet(CHAT)!.get('sa1')!.status).toBe('running')
|
|
198
|
+
driver.ingest({ kind: 'sub_agent_turn_end', agentId: 'sa1' }, CHAT)
|
|
199
|
+
expect(completions.length).toBe(1)
|
|
200
|
+
})
|
|
201
|
+
})
|
|
@@ -2,14 +2,19 @@
|
|
|
2
2
|
* Tests for #309: boot card uses the agent slug (not display name) for
|
|
3
3
|
* systemd unit probes.
|
|
4
4
|
*
|
|
5
|
-
* Root cause: probeAgentProcess
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
5
|
+
* Root cause: probeAgentProcess was called with opts.agentName (the
|
|
6
|
+
* persona display name, e.g. "Klanker") instead of the lowercase slug
|
|
7
|
+
* ("klanker"). systemctl returns LoadState=not-found for the
|
|
8
|
+
* capitalised name because unit files are always lowercase.
|
|
9
9
|
*
|
|
10
10
|
* Fix: RunProbesOpts.agentSlug carries the slug separately; runAllProbes
|
|
11
11
|
* passes opts.agentSlug (falling back to opts.agentName for compat) to
|
|
12
|
-
*
|
|
12
|
+
* probeAgentProcess.
|
|
13
|
+
*
|
|
14
|
+
* (Pre-Phase-4 this also covered probeCronTimers; that probe was
|
|
15
|
+
* replaced by probeScheduler when the singleton switchroom-cron
|
|
16
|
+
* container was retired and cron moved in-container — the slug now
|
|
17
|
+
* only matters for the systemd Agent probe.)
|
|
13
18
|
*/
|
|
14
19
|
|
|
15
20
|
import { describe, it, expect } from 'vitest'
|
|
@@ -102,35 +107,6 @@ describe('#309: runAllProbes — slug vs display name for systemd calls', () =>
|
|
|
102
107
|
}
|
|
103
108
|
})
|
|
104
109
|
|
|
105
|
-
it('probeCronTimers target is switchroom-<slug>-cron-*, not switchroom-<displayName>-cron-*', async () => {
|
|
106
|
-
const tmpDir = makeTmpAgentDir()
|
|
107
|
-
try {
|
|
108
|
-
const { fn: execFileMock, calls } = makeDispatchingExecFile('klanker')
|
|
109
|
-
|
|
110
|
-
await runAllProbes({
|
|
111
|
-
agentName: 'Klanker',
|
|
112
|
-
agentSlug: 'klanker',
|
|
113
|
-
version: 'v0.3.0',
|
|
114
|
-
agentDir: tmpDir,
|
|
115
|
-
gatewayInfo: { pid: 12345, startedAtMs: Date.now() },
|
|
116
|
-
fetchImpl: async () => new Response('', { status: 200 }),
|
|
117
|
-
settleWindowMs: 0,
|
|
118
|
-
agentLiveWindowMs: 0,
|
|
119
|
-
probeExecFileImpl: execFileMock,
|
|
120
|
-
})
|
|
121
|
-
|
|
122
|
-
// probeCronTimers calls: systemctl --user list-timers switchroom-<name>-cron-*
|
|
123
|
-
const cronProbeCall = calls.find(c =>
|
|
124
|
-
c.cmd === 'systemctl' && c.args.includes('list-timers'),
|
|
125
|
-
)
|
|
126
|
-
expect(cronProbeCall, 'probeCronTimers must call systemctl list-timers').toBeDefined()
|
|
127
|
-
const cronGlob = cronProbeCall!.args.find(a => a.includes('cron'))
|
|
128
|
-
expect(cronGlob).toBe('switchroom-klanker-cron-*')
|
|
129
|
-
} finally {
|
|
130
|
-
rmSync(tmpDir, { recursive: true })
|
|
131
|
-
}
|
|
132
|
-
})
|
|
133
|
-
|
|
134
110
|
it('no systemctl call uses the capitalised display name as the unit target', async () => {
|
|
135
111
|
const tmpDir = makeTmpAgentDir()
|
|
136
112
|
try {
|
|
@@ -94,27 +94,28 @@ describe('renderBootCard — degraded conditions', () => {
|
|
|
94
94
|
expect(out).not.toContain('Agent</b>')
|
|
95
95
|
expect(out).not.toContain('Gateway</b>')
|
|
96
96
|
expect(out).not.toContain('Hindsight</b>')
|
|
97
|
-
expect(out).not.toContain('
|
|
97
|
+
expect(out).not.toContain('Scheduler</b>')
|
|
98
98
|
})
|
|
99
99
|
|
|
100
100
|
it('orders probe rows in PROBE_KEYS canonical order regardless of object iteration', () => {
|
|
101
101
|
// Insert in a non-canonical order; renderer must still output Account first,
|
|
102
|
-
// then Hindsight, then
|
|
102
|
+
// then Hindsight, then Scheduler (matching PROBE_KEYS — Phase 4 renamed
|
|
103
|
+
// crons → scheduler when the in-container agent-scheduler took over).
|
|
103
104
|
const out = renderBootCard({
|
|
104
105
|
agentName: 'a',
|
|
105
106
|
version: 'v',
|
|
106
107
|
probes: {
|
|
107
|
-
|
|
108
|
+
scheduler: { status: 'fail', label: 'Scheduler', detail: 'sidecar not running' },
|
|
108
109
|
hindsight: { status: 'fail', label: 'Hindsight', detail: 'unreachable' },
|
|
109
110
|
account: { status: 'degraded', label: 'Account', detail: 'expiring' },
|
|
110
111
|
},
|
|
111
112
|
})
|
|
112
113
|
const accountIdx = out.indexOf('Account</b>')
|
|
113
114
|
const hindsightIdx = out.indexOf('Hindsight</b>')
|
|
114
|
-
const
|
|
115
|
+
const schedulerIdx = out.indexOf('Scheduler</b>')
|
|
115
116
|
expect(accountIdx).toBeGreaterThan(-1)
|
|
116
117
|
expect(hindsightIdx).toBeGreaterThan(accountIdx)
|
|
117
|
-
expect(
|
|
118
|
+
expect(schedulerIdx).toBeGreaterThan(hindsightIdx)
|
|
118
119
|
})
|
|
119
120
|
|
|
120
121
|
it('crash + degraded probe = both rows render', () => {
|