switchroom 0.5.0 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +142 -121
  2. package/bin/autoaccept.exp +29 -6
  3. package/dist/agent-scheduler/index.js +12261 -0
  4. package/dist/cli/autoaccept-poll.js +10 -0
  5. package/dist/cli/switchroom.js +27250 -25324
  6. package/dist/vault/approvals/kernel-server.js +12709 -0
  7. package/dist/vault/broker/server.js +15724 -0
  8. package/package.json +4 -3
  9. package/profiles/_base/start.sh.hbs +133 -0
  10. package/profiles/_shared/telegram-style.md.hbs +3 -3
  11. package/profiles/default/CLAUDE.md +3 -3
  12. package/profiles/default/CLAUDE.md.hbs +2 -2
  13. package/profiles/default/workspace/CLAUDE.md.hbs +9 -0
  14. package/skills/docx/VENDORED.md +1 -1
  15. package/skills/mcp-builder/VENDORED.md +1 -1
  16. package/skills/pdf/VENDORED.md +1 -1
  17. package/skills/pptx/VENDORED.md +1 -1
  18. package/skills/skill-creator/VENDORED.md +1 -1
  19. package/skills/switchroom-architecture/SKILL.md +8 -7
  20. package/skills/switchroom-cli/SKILL.md +23 -15
  21. package/skills/switchroom-health/SKILL.md +7 -7
  22. package/skills/switchroom-install/SKILL.md +36 -39
  23. package/skills/switchroom-manage/SKILL.md +4 -4
  24. package/skills/switchroom-status/SKILL.md +1 -1
  25. package/skills/webapp-testing/VENDORED.md +1 -1
  26. package/skills/xlsx/VENDORED.md +1 -1
  27. package/telegram-plugin/admin-commands/dispatch.test.ts +119 -1
  28. package/telegram-plugin/admin-commands/index.ts +71 -0
  29. package/telegram-plugin/ask-user.ts +1 -0
  30. package/telegram-plugin/card-event-log.ts +138 -0
  31. package/telegram-plugin/dist/bridge/bridge.js +178 -31
  32. package/telegram-plugin/dist/foreman/foreman.js +6875 -6526
  33. package/telegram-plugin/dist/gateway/gateway.js +13862 -11834
  34. package/telegram-plugin/dist/server.js +202 -40
  35. package/telegram-plugin/fleet-state.ts +25 -10
  36. package/telegram-plugin/foreman/foreman.ts +38 -3
  37. package/telegram-plugin/gateway/approval-callback.ts +126 -0
  38. package/telegram-plugin/gateway/approval-card.test.ts +90 -0
  39. package/telegram-plugin/gateway/approval-card.ts +127 -0
  40. package/telegram-plugin/gateway/approvals-commands.ts +126 -0
  41. package/telegram-plugin/gateway/boot-card.ts +31 -6
  42. package/telegram-plugin/gateway/boot-probes.ts +510 -72
  43. package/telegram-plugin/gateway/gateway.ts +822 -94
  44. package/telegram-plugin/gateway/ipc-protocol.ts +34 -1
  45. package/telegram-plugin/gateway/ipc-server.ts +35 -0
  46. package/telegram-plugin/gateway/startup-mutex.ts +110 -2
  47. package/telegram-plugin/hooks/hooks.json +19 -0
  48. package/telegram-plugin/hooks/tool-label-pretool.mjs +216 -0
  49. package/telegram-plugin/hooks/tool-label-stop.mjs +63 -0
  50. package/telegram-plugin/package.json +4 -1
  51. package/telegram-plugin/plugin-logger.ts +20 -1
  52. package/telegram-plugin/progress-card-driver.ts +202 -13
  53. package/telegram-plugin/progress-card.ts +2 -2
  54. package/telegram-plugin/quota-check.ts +1 -0
  55. package/telegram-plugin/registry/subagents-schema.ts +37 -0
  56. package/telegram-plugin/registry/subagents.test.ts +64 -0
  57. package/telegram-plugin/session-tail.ts +58 -5
  58. package/telegram-plugin/shared/bot-runtime.ts +48 -2
  59. package/telegram-plugin/subagent-watcher.ts +139 -7
  60. package/telegram-plugin/tests/_progress-card-harness.ts +4 -0
  61. package/telegram-plugin/tests/bg-agent-progress-card-757.test.ts +201 -0
  62. package/telegram-plugin/tests/boot-card-probe-target.test.ts +10 -34
  63. package/telegram-plugin/tests/boot-card-render.test.ts +6 -5
  64. package/telegram-plugin/tests/boot-probes.test.ts +564 -0
  65. package/telegram-plugin/tests/card-event-log.test.ts +145 -0
  66. package/telegram-plugin/tests/gateway-startup-mutex.test.ts +102 -0
  67. package/telegram-plugin/tests/ipc-server-validate-inject-inbound.test.ts +134 -0
  68. package/telegram-plugin/tests/progress-card-delay-842.test.ts +160 -0
  69. package/telegram-plugin/tests/quota-check.test.ts +37 -1
  70. package/telegram-plugin/tests/subagent-registry-bugs.test.ts +5 -0
  71. package/telegram-plugin/tests/subagent-watcher-stall-notification.test.ts +104 -1
  72. package/telegram-plugin/tests/subagent-watcher.test.ts +5 -0
  73. package/telegram-plugin/tests/tool-label-sidecar.test.ts +114 -0
  74. package/telegram-plugin/tests/two-zone-bg-done-when-all-terminal.test.ts +5 -3
  75. package/telegram-plugin/tests/two-zone-card-header-phases.test.ts +10 -0
  76. package/telegram-plugin/tests/two-zone-snapshot-extras.test.ts +58 -14
  77. package/telegram-plugin/tests/welcome-text.test.ts +57 -0
  78. package/telegram-plugin/tool-label-sidecar.ts +140 -0
  79. package/telegram-plugin/tool-labels.ts +55 -0
  80. package/telegram-plugin/two-zone-card.ts +27 -7
  81. package/telegram-plugin/uat/SETUP.md +160 -0
  82. package/telegram-plugin/uat/assertions.ts +140 -0
  83. package/telegram-plugin/uat/driver.ts +174 -0
  84. package/telegram-plugin/uat/harness.ts +161 -0
  85. package/telegram-plugin/uat/login.ts +134 -0
  86. package/telegram-plugin/uat/port-allocator.ts +71 -0
  87. package/telegram-plugin/uat/scenarios/smoke-clerk-reply.test.ts +61 -0
  88. package/telegram-plugin/welcome-text.ts +44 -2
  89. package/bin/bridge-watchdog.sh +0 -967
@@ -25,9 +25,54 @@ import { GrammyError, type Bot, type Context } from 'grammy'
25
25
  import { run, type RunnerHandle } from '@grammyjs/runner'
26
26
  import { execFileSync, spawnSync } from 'child_process'
27
27
  import { createHash } from 'crypto'
28
+ import { AsyncLocalStorage } from 'async_hooks'
28
29
  import { clearStaleTelegramPollingState } from '../startup-reset.js'
29
30
  import { createRetryApiCall } from '../retry-api-call.js'
30
31
 
32
+ // ─── tg-post tag plumbing ─────────────────────────────────────────────────
33
+
34
+ /**
35
+ * Per-call tag context for `tg-post` log lines. Callers wrap a Telegram
36
+ * API invocation in `withTgPostTags({ turnKey, cardMessageId, ... }, () => ...)`
37
+ * and the transformer reads the tags off the active store and appends them
38
+ * `key=value` after the existing fields. Used to correlate progress-card
39
+ * sends/edits to a turnKey + cardMessageId in days-old session audits.
40
+ *
41
+ * Untagged callers are unaffected — when no store is active, no tag fields
42
+ * are emitted and the existing log shape is byte-for-byte unchanged.
43
+ */
44
+ export type TgPostTags = Record<string, string | number>
45
+
46
+ const tgPostTagStore = new AsyncLocalStorage<TgPostTags>()
47
+
48
+ /**
49
+ * Run `fn` with the given tags attached to any `tg-post` lines emitted from
50
+ * the inner Telegram API calls. Tags are inherited across awaits within
51
+ * the same async chain (AsyncLocalStorage semantics). Pass an empty record
52
+ * or omit tags entirely to fall back to the untagged shape.
53
+ */
54
+ export function withTgPostTags<T>(tags: TgPostTags, fn: () => T): T {
55
+ return tgPostTagStore.run(tags, fn)
56
+ }
57
+
58
+ /** Exposed for the transformer (and tests). Returns undefined when no store is active. */
59
+ export function _getTgPostTags(): TgPostTags | undefined {
60
+ return tgPostTagStore.getStore()
61
+ }
62
+
63
+ function formatTgPostTags(tags: TgPostTags | undefined): string {
64
+ if (!tags) return ''
65
+ const parts: string[] = []
66
+ for (const [k, v] of Object.entries(tags)) {
67
+ if (v == null) continue
68
+ // Sanitise: tag values land in a single-line space-separated log
69
+ // record. Strip whitespace + collapse to keep grep happy.
70
+ const s = String(v).replace(/\s+/g, '_')
71
+ parts.push(`${k}=${s}`)
72
+ }
73
+ return parts.length > 0 ? ' ' + parts.join(' ') : ''
74
+ }
75
+
31
76
  // ─── tg-post observability transformer ────────────────────────────────────
32
77
 
33
78
  /**
@@ -64,10 +109,11 @@ export function installTgPostLogger(bot: Bot): void {
64
109
  const hash = bytes > 0
65
110
  ? createHash('sha1').update(text).digest('hex').slice(0, 12)
66
111
  : '-'
112
+ const tagSuffix = formatTgPostTags(_getTgPostTags())
67
113
  try {
68
114
  const res = await prev(method, payload, signal)
69
115
  process.stderr.write(
70
- `tg-post method=${method} chat=${chat} thread=${thread} parse_mode=${parseMode} bytes=${bytes} hash=${hash} status=ok err=- code=- desc=-\n`,
116
+ `tg-post method=${method} chat=${chat} thread=${thread} parse_mode=${parseMode} bytes=${bytes} hash=${hash} status=ok err=- code=- desc=-${tagSuffix}\n`,
71
117
  )
72
118
  return res
73
119
  } catch (err) {
@@ -85,7 +131,7 @@ export function installTgPostLogger(bot: Bot): void {
85
131
  ? rawDesc.replace(/\s+/g, ' ').slice(0, 80).replace(/[\r\n]/g, ' ') || '-'
86
132
  : '-'
87
133
  process.stderr.write(
88
- `tg-post method=${method} chat=${chat} thread=${thread} parse_mode=${parseMode} bytes=${bytes} hash=${hash} status=err err=${errClass} code=${code} desc=${desc}\n`,
134
+ `tg-post method=${method} chat=${chat} thread=${thread} parse_mode=${parseMode} bytes=${bytes} hash=${hash} status=err err=${errClass} code=${code} desc=${desc}${tagSuffix}\n`,
89
135
  )
90
136
  throw err
91
137
  }
@@ -43,7 +43,7 @@ import { homedir } from 'os'
43
43
  import { projectSubagentLine } from './session-tail.js'
44
44
  import { sanitiseToolArg } from './fleet-state.js'
45
45
  import { escapeHtml, truncate } from './card-format.js'
46
- import { bumpSubagentActivity, recordSubagentStall, recordSubagentEnd, reapStuckRunningRows } from './registry/subagents-schema.js'
46
+ import { bumpSubagentActivity, recordSubagentStall, recordSubagentResume, recordSubagentEnd, reapStuckRunningRows } from './registry/subagents-schema.js'
47
47
  import { touchTurnActiveMarker } from './gateway/turn-active-marker.js'
48
48
 
49
49
  // ─── Types ───────────────────────────────────────────────────────────────────
@@ -119,10 +119,24 @@ export interface SubagentWatcherConfig {
119
119
  */
120
120
  rescanMs?: number
121
121
  /**
122
- * How long without JSONL activity before a worker is considered stalled (ms).
123
- * Default 60_000.
122
+ * How long without JSONL activity before a worker is considered stalled
123
+ * **once at least one tool has been used**. Default 60_000ms. Tool-call
124
+ * loops emit JSONL events frequently, so 60s of silence in that phase
125
+ * is a strong signal the sub-agent is stuck on a single tool.
124
126
  */
125
127
  stallThresholdMs?: number
128
+ /**
129
+ * Stall threshold (ms) used **before any tool has been used** —
130
+ * "silent synthesis" mode where the model is composing a response without
131
+ * emitting events yet. Long-running plan / synthesis sub-agents commonly
132
+ * spend 2-5 minutes in this state legitimately, so the active-loop
133
+ * threshold (60s) misfires. Default 300_000 (5 min).
134
+ *
135
+ * The watcher selects between this and `stallThresholdMs` per-entry
136
+ * based on `entry.toolCount`: 0 ⇒ silent synthesis, ≥1 ⇒ active loop.
137
+ * Both can be overridden for tests.
138
+ */
139
+ silentSynthesisStallThresholdMs?: number
126
140
  /**
127
141
  * Reaper TTL (ms): background rows in `status='running'` whose
128
142
  * `last_activity_at` (or `started_at` if liveness never wrote) is older
@@ -171,6 +185,39 @@ export interface SubagentWatcherConfig {
171
185
  * the same sub-agent across subsequent poll ticks.
172
186
  */
173
187
  onStall?: (agentId: string, idleMs: number, description: string) => void
188
+ /**
189
+ * Symmetric to `onStall`: fires when a previously-stalled sub-agent's
190
+ * JSONL grows again (text emission, tool use, turn_end — anything that
191
+ * moves last_activity_at). Wired to `progressDriver.onSubAgentUnstall`
192
+ * in gateway.ts so the pinned card clears the ⚠ Stalled badge as soon
193
+ * as activity resumes, instead of waiting on the next render tick.
194
+ *
195
+ * Each stall→resume cycle fires exactly once: the watcher resets
196
+ * `entry.stallNotified` on resume, so a sub-agent that stalls again
197
+ * later in the same lifetime is detected (and reported) again.
198
+ */
199
+ onUnstall?: (agentId: string, description: string) => void
200
+ /**
201
+ * Called exactly once per sub-agent when its watcher observes a terminal
202
+ * transition (`done` or `failed`). Mirrors the existing `sub_agent_started`
203
+ * surface (emitted from session-tail) so the audit trail is symmetric.
204
+ *
205
+ * `outcome`:
206
+ * - 'completed' — the JSONL contained a `turn_duration` line.
207
+ * - 'failed' — reserved (no caller flips state to 'failed' today).
208
+ * - 'orphan' — the entry was historical at boot and its terminal
209
+ * transition fires after watcher startup. (Pre-existing
210
+ * `done` files at boot do NOT fire — see registerAgent.)
211
+ * Background-vs-foreground classification is the gateway's call (it owns
212
+ * the registry DB); the watcher just reports the lifecycle.
213
+ */
214
+ onFinish?: (args: {
215
+ agentId: string
216
+ state: WorkerState
217
+ outcome: 'completed' | 'failed' | 'orphan'
218
+ toolCount: number
219
+ durationMs: number
220
+ }) => void
174
221
  /** `Date.now` override for tests. */
175
222
  now?: () => number
176
223
  /** `setInterval` override for tests. */
@@ -205,6 +252,11 @@ export interface SubagentWatcherHandle {
205
252
 
206
253
  const DEFAULT_RESCAN_MS = 1000
207
254
  const DEFAULT_STALL_THRESHOLD_MS = 60_000
255
+ /** Silent-synthesis threshold (no tools used yet). 5min covers plan /
256
+ * research sub-agents that legitimately think for several minutes
257
+ * before emitting their first event — the 60s active-loop threshold
258
+ * misfires on those and freezes the card at ⚠. */
259
+ const DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS = 300_000
208
260
  const DEFAULT_REAPER_TTL_MS = 60 * 60_000 // 1 hour
209
261
  const DEFAULT_REAPER_INTERVAL_MS = 15 * 60_000 // 15 minutes
210
262
  /**
@@ -317,6 +369,10 @@ function readSubTail(
317
369
  log?: (msg: string) => void,
318
370
  db?: SubagentLivenessDb | null,
319
371
  parentStateDir?: string | null,
372
+ /** Fires when the watcher observes JSONL activity returning for a
373
+ * previously-stalled entry. Closes the resume edge the schema doc
374
+ * has always promised. */
375
+ onUnstall?: (agentId: string, description: string) => void,
320
376
  ): void {
321
377
  try {
322
378
  const stat = fs.statSync(entry.filePath)
@@ -390,7 +446,39 @@ function readSubTail(
390
446
  if (!line) continue
391
447
  const events = projectSubagentLine(line, entry.agentId, startState)
392
448
  for (const ev of events) {
449
+ const idleSecBeforeBump = Math.round((now - entry.lastActivityAt) / 1000)
393
450
  entry.lastActivityAt = now
451
+ // Un-stall transition (#previously-missing). The schema doc
452
+ // promised "stalled → running (may resume)" but neither the
453
+ // in-memory `stallNotified` flag nor the DB `status` column was
454
+ // ever flipped back. That left the pinned card stuck at ⚠ until
455
+ // terminal completion, by which point the user had often
456
+ // already interrupted or redispatched. Reset both halves on the
457
+ // first activity tick after a stall + fire onUnstall for the
458
+ // driver to clear its render-time badge.
459
+ if (entry.stallNotified) {
460
+ entry.stallNotified = false
461
+ if (db != null) {
462
+ try {
463
+ const rowRef = db
464
+ .prepare('SELECT id FROM subagents WHERE jsonl_agent_id = ?')
465
+ .get(entry.agentId) as { id: string } | null
466
+ if (rowRef != null) {
467
+ recordSubagentResume(db, { id: rowRef.id, resumedAt: now })
468
+ }
469
+ } catch (dbErr) {
470
+ log?.(`subagent-watcher: resume DB write error ${entry.agentId}: ${(dbErr as Error).message}`)
471
+ }
472
+ }
473
+ if (onUnstall != null) {
474
+ try {
475
+ onUnstall(entry.agentId, entry.description)
476
+ } catch (cbErr) {
477
+ log?.(`subagent-watcher: onUnstall callback error ${entry.agentId}: ${(cbErr as Error).message}`)
478
+ }
479
+ }
480
+ log?.(`subagent-watcher: stall cleared for ${entry.agentId} (activity resumed after ${idleSecBeforeBump}s — re-arming detection)`)
481
+ }
394
482
  if (ev.kind === 'sub_agent_tool_use') {
395
483
  entry.toolCount++
396
484
  // P0 of #662: surface the most recent tool name + sanitised
@@ -446,6 +534,8 @@ function readSubTail(
446
534
  export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWatcherHandle {
447
535
  const agentDir = config.agentDir
448
536
  const stallThresholdMs = config.stallThresholdMs ?? DEFAULT_STALL_THRESHOLD_MS
537
+ const silentSynthesisStallThresholdMs =
538
+ config.silentSynthesisStallThresholdMs ?? DEFAULT_SILENT_SYNTHESIS_STALL_THRESHOLD_MS
449
539
  const reaperTtlMs = config.reaperTtlMs ?? DEFAULT_REAPER_TTL_MS
450
540
  const reaperIntervalMs = config.reaperIntervalMs ?? DEFAULT_REAPER_INTERVAL_MS
451
541
  const rescanMs = config.rescanMs ?? DEFAULT_RESCAN_MS
@@ -562,7 +652,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
562
652
  // Initial read
563
653
  readSubTail(entry, tail, n, (desc) => {
564
654
  log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
565
- }, fs, log, db, parentStateDir)
655
+ }, fs, log, db, parentStateDir, config.onUnstall)
566
656
 
567
657
  // If the JSONL already contained a turn_end at registration time
568
658
  // (file written-then-watched), fire the state-transition + completion
@@ -593,7 +683,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
593
683
  if (!entry || !t) return
594
684
  readSubTail(entry, t, nowFn(), (desc) => {
595
685
  log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
596
- }, fs, log, db, parentStateDir)
686
+ }, fs, log, db, parentStateDir, config.onUnstall)
597
687
  maybySendStateTransition(agentId)
598
688
  })
599
689
  } catch (err) {
@@ -619,11 +709,43 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
619
709
  } catch (err) {
620
710
  log?.(`subagent-watcher: completion notification error: ${(err as Error).message}`)
621
711
  }
712
+ // Symmetric `sub_agent_finished` surface (#card-audit-log). Emit
713
+ // before the deferred cleanup runs so the callback always sees a
714
+ // live registry entry. Historical entries that already-completed at
715
+ // boot get their `completionNotified=true` shortcut in registerAgent
716
+ // and skip this path entirely — only post-boot transitions fire.
717
+ if (config.onFinish) {
718
+ try {
719
+ config.onFinish({
720
+ agentId,
721
+ state: entry.state,
722
+ outcome: entry.historical ? 'orphan' : 'completed',
723
+ toolCount: entry.toolCount,
724
+ durationMs: nowFn() - entry.dispatchedAt,
725
+ })
726
+ } catch (cbErr) {
727
+ log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
728
+ }
729
+ }
622
730
  scheduleTerminalCleanup(agentId)
623
731
  }
624
732
  // Defensive: if state ever flips to 'failed' (currently no caller
625
733
  // sets this, but the type allows it), still clean up the FSWatcher.
626
734
  if (entry.state === 'failed') {
735
+ if (config.onFinish && !entry.completionNotified) {
736
+ entry.completionNotified = true
737
+ try {
738
+ config.onFinish({
739
+ agentId,
740
+ state: entry.state,
741
+ outcome: 'failed',
742
+ toolCount: entry.toolCount,
743
+ durationMs: nowFn() - entry.dispatchedAt,
744
+ })
745
+ } catch (cbErr) {
746
+ log?.(`subagent-watcher: onFinish callback error ${agentId}: ${(cbErr as Error).message}`)
747
+ }
748
+ }
627
749
  scheduleTerminalCleanup(agentId)
628
750
  }
629
751
  }
@@ -678,7 +800,17 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
678
800
  if (entry.historical) continue
679
801
  if (entry.stallNotified) continue
680
802
  const idleMs = n - entry.lastActivityAt
681
- if (idleMs >= stallThresholdMs) {
803
+ // Adaptive: a sub-agent that hasn't fired any tools yet is in
804
+ // "silent synthesis" mode (model thinking before its first emit).
805
+ // 60s is way too aggressive for plan / research sub-agents that
806
+ // legitimately spend 2-5 minutes composing before their first
807
+ // tool_use. Once tools have started, switch to the tighter loop
808
+ // threshold — frequent JSONL writes mean 60s of silence is a
809
+ // strong signal the sub-agent is genuinely stuck.
810
+ const threshold = entry.toolCount === 0
811
+ ? silentSynthesisStallThresholdMs
812
+ : stallThresholdMs
813
+ if (idleMs >= threshold) {
682
814
  entry.stallNotified = true
683
815
  const desc = escapeHtml(truncate(entry.description, 80))
684
816
  const idleSec = Math.floor(idleMs / 1000)
@@ -807,7 +939,7 @@ export function startSubagentWatcher(config: SubagentWatcherConfig): SubagentWat
807
939
  if (!tail) continue
808
940
  readSubTail(entry, tail, n, (desc) => {
809
941
  log?.(`subagent-watcher: description updated for ${agentId}: ${desc}`)
810
- }, fs, log, db, parentStateDir)
942
+ }, fs, log, db, parentStateDir, config.onUnstall)
811
943
  maybySendStateTransition(agentId)
812
944
  }
813
945
 
@@ -21,6 +21,7 @@ export interface HarnessOpts {
21
21
  minIntervalMs?: number
22
22
  coalesceMs?: number
23
23
  initialDelayMs?: number
24
+ initialDelayMsBackground?: number
24
25
  heartbeatMs?: number
25
26
  maxIdleMs?: number
26
27
  deferredCompletionTimeoutMs?: number
@@ -43,6 +44,9 @@ export function makeHarness(opts: HarnessOpts = {}): DriverHarness {
43
44
  minIntervalMs: opts.minIntervalMs ?? 0,
44
45
  coalesceMs: opts.coalesceMs ?? 0,
45
46
  initialDelayMs: opts.initialDelayMs ?? 0,
47
+ ...(opts.initialDelayMsBackground != null
48
+ ? { initialDelayMsBackground: opts.initialDelayMsBackground }
49
+ : {}),
46
50
  heartbeatMs: opts.heartbeatMs ?? 1_000,
47
51
  maxIdleMs: opts.maxIdleMs ?? 30_000,
48
52
  deferredCompletionTimeoutMs: opts.deferredCompletionTimeoutMs ?? 10_000,
@@ -0,0 +1,201 @@
1
+ /**
2
+ * Regression tests for #757 — progress card goes silent for background
3
+ * Agent workers (run_in_background: true).
4
+ *
5
+ * Root cause: `applyToolUse` in fleet-state.ts only promoted `stuck →
6
+ * running`; background members stayed at `status: 'background'` even
7
+ * while actively running tools. The fleet row rendered ⏸ idle instead
8
+ * of ↻ + last-tool, so the card appeared frozen.
9
+ *
10
+ * Fix: applyToolUse now also promotes `background → running` on the
11
+ * first live tool event. A separate sticky `isBackgroundDispatch` flag
12
+ * preserves the background-carry semantics used by hasLiveBackground
13
+ * (keeps PerChatState alive past parent turn_end until bg member
14
+ * reaches terminal status).
15
+ */
16
+
17
+ import { describe, it, expect } from 'vitest'
18
+ import { createProgressDriver } from '../progress-card-driver.js'
19
+ import { applyToolUse, createFleetMember, hasLiveBackground } from '../fleet-state.js'
20
+ import type { SessionEvent } from '../session-tail.js'
21
+
22
+ const T0 = 1_700_000_000_000
23
+
24
+ // ─── Pure-function unit tests ────────────────────────────────────────────────
25
+
26
+ describe('applyToolUse: background → running promotion (#757)', () => {
27
+ it('promotes background to running on first tool event', () => {
28
+ const m = { ...createFleetMember({ agentId: 'a', role: 'worker', startedAt: T0, originatingTurnKey: 'k', isBackgroundDispatch: true }), status: 'background' as const }
29
+ const after = applyToolUse(m, 'Read', { file_path: '/foo/bar.ts' }, T0 + 1000)
30
+ expect(after.status).toBe('running')
31
+ expect(after.lastTool?.name).toBe('Read')
32
+ })
33
+
34
+ it('preserves isBackgroundDispatch after promotion', () => {
35
+ const m = { ...createFleetMember({ agentId: 'a', role: 'worker', startedAt: T0, originatingTurnKey: 'k', isBackgroundDispatch: true }), status: 'background' as const }
36
+ const after = applyToolUse(m, 'Bash', { command: 'ls' }, T0 + 1000)
37
+ expect(after.isBackgroundDispatch).toBe(true)
38
+ })
39
+
40
+ it('does not affect foreground members (status stays running)', () => {
41
+ const m = createFleetMember({ agentId: 'a', role: 'worker', startedAt: T0, originatingTurnKey: 'k' })
42
+ const after = applyToolUse(m, 'Read', { file_path: '/x' }, T0 + 1000)
43
+ expect(after.status).toBe('running')
44
+ expect(after.isBackgroundDispatch).toBe(false)
45
+ })
46
+ })
47
+
48
+ describe('hasLiveBackground: sticky flag survives status promotion (#757)', () => {
49
+ it('returns true when background member is promoted to running (not yet terminal)', () => {
50
+ const fleet = new Map([
51
+ ['a', { ...createFleetMember({ agentId: 'a', role: 'w', startedAt: T0, originatingTurnKey: 'k', isBackgroundDispatch: true }), status: 'running' as const }],
52
+ ])
53
+ expect(hasLiveBackground(fleet)).toBe(true)
54
+ })
55
+
56
+ it('returns false when background member reaches terminal status', () => {
57
+ const fleet = new Map([
58
+ ['a', { ...createFleetMember({ agentId: 'a', role: 'w', startedAt: T0, originatingTurnKey: 'k', isBackgroundDispatch: true }), status: 'done' as const, terminalAt: T0 + 5000 }],
59
+ ])
60
+ expect(hasLiveBackground(fleet)).toBe(false)
61
+ })
62
+
63
+ it('returns false when no members are background dispatches', () => {
64
+ const fleet = new Map([
65
+ ['a', createFleetMember({ agentId: 'a', role: 'w', startedAt: T0, originatingTurnKey: 'k' })],
66
+ ])
67
+ expect(hasLiveBackground(fleet)).toBe(false)
68
+ })
69
+ })
70
+
71
+ // ─── Integration: driver-level lifecycle ─────────────────────────────────────
72
+
73
+ function harness() {
74
+ let now = 1000
75
+ const timers: Array<{ fireAt: number; fn: () => void; ref: number; repeat?: number }> = []
76
+ let nextRef = 0
77
+ const completions: string[] = []
78
+ const driver = createProgressDriver({
79
+ emit: () => {},
80
+ minIntervalMs: 500,
81
+ coalesceMs: 400,
82
+ initialDelayMs: 0,
83
+ promoteAfterMs: 999_999,
84
+ onTurnComplete: (s) => completions.push(s.turnKey),
85
+ now: () => now,
86
+ setTimeout: (fn, ms) => {
87
+ const ref = nextRef++
88
+ timers.push({ fireAt: now + ms, fn, ref })
89
+ return { ref }
90
+ },
91
+ clearTimeout: (h) => {
92
+ const ref = (h as { ref: number }).ref
93
+ const idx = timers.findIndex((t) => t.ref === ref)
94
+ if (idx !== -1) timers.splice(idx, 1)
95
+ },
96
+ setInterval: (fn, ms) => {
97
+ const ref = nextRef++
98
+ timers.push({ fireAt: now + ms, fn, ref, repeat: ms })
99
+ return { ref }
100
+ },
101
+ clearInterval: (h) => {
102
+ const ref = (h as { ref: number }).ref
103
+ const idx = timers.findIndex((t) => t.ref === ref)
104
+ if (idx !== -1) timers.splice(idx, 1)
105
+ },
106
+ })
107
+ function advance(ms: number) {
108
+ const target = now + ms
109
+ while (true) {
110
+ const due = timers.filter((t) => t.fireAt <= target).sort((a, b) => a.fireAt - b.fireAt)
111
+ if (due.length === 0) break
112
+ const t = due[0]
113
+ now = t.fireAt
114
+ t.fn()
115
+ if (t.repeat) t.fireAt = now + t.repeat
116
+ else timers.splice(timers.indexOf(t), 1)
117
+ }
118
+ now = target
119
+ }
120
+ return { driver, completions, advance, getNow: () => now }
121
+ }
122
+
123
+ const enqueue = (chatId: string): SessionEvent => ({
124
+ kind: 'enqueue',
125
+ chatId,
126
+ messageId: '1',
127
+ threadId: null,
128
+ rawContent: `<channel chat_id="${chatId}">go</channel>`,
129
+ })
130
+
131
+ describe('driver integration: bg worker tool activity (#757)', () => {
132
+ it('background fleet member promotes to running when tool events arrive', () => {
133
+ const { driver } = harness()
134
+ const CHAT = 'c1'
135
+ driver.ingest(enqueue(CHAT), null)
136
+ driver.ingest(
137
+ { kind: 'tool_use', toolName: 'Agent', toolUseId: 'tu1', input: { prompt: 'bg work', run_in_background: true } },
138
+ CHAT,
139
+ )
140
+ driver.ingest({ kind: 'sub_agent_started', agentId: 'sa1', firstPromptText: 'bg work' }, CHAT)
141
+
142
+ // Initial state: background.
143
+ expect(driver.peekFleet(CHAT)!.get('sa1')!.status).toBe('background')
144
+
145
+ // Tool activity arrives from the sub-agent JSONL.
146
+ driver.ingest({ kind: 'sub_agent_tool_use', agentId: 'sa1', toolUseId: 't1', toolName: 'Bash', input: { command: 'npm test' } }, CHAT)
147
+
148
+ const m = driver.peekFleet(CHAT)!.get('sa1')!
149
+ // Promoted to running — card now shows active tool work.
150
+ expect(m.status).toBe('running')
151
+ expect(m.lastTool?.name).toBe('Bash')
152
+ // Sticky flag preserved — bg-carry still works.
153
+ expect(m.isBackgroundDispatch).toBe(true)
154
+ })
155
+
156
+ it('background carry survives promotion: turn completion holds until bg reaches terminal', () => {
157
+ const { driver, completions } = harness()
158
+ const CHAT = 'c2'
159
+ driver.ingest(enqueue(CHAT), null)
160
+ driver.ingest(
161
+ { kind: 'tool_use', toolName: 'Agent', toolUseId: 'tu1', input: { prompt: 'bg', run_in_background: true } },
162
+ CHAT,
163
+ )
164
+ driver.ingest({ kind: 'sub_agent_started', agentId: 'sa1', firstPromptText: 'bg' }, CHAT)
165
+ // Bg worker starts doing tool work — status becomes running.
166
+ driver.ingest({ kind: 'sub_agent_tool_use', agentId: 'sa1', toolUseId: 't1', toolName: 'Read', input: { file_path: '/a' } }, CHAT)
167
+ expect(driver.peekFleet(CHAT)!.get('sa1')!.status).toBe('running')
168
+
169
+ driver.ingest({ kind: 'tool_use', toolName: 'mcp__switchroom-telegram__reply' }, CHAT)
170
+ driver.recordOutboundDelivered(CHAT)
171
+ // Parent ends while bg worker is still running.
172
+ driver.ingest({ kind: 'turn_end', durationMs: 100 }, CHAT)
173
+
174
+ // Turn completion must NOT fire — bg worker is still active.
175
+ expect(completions.length).toBe(0)
176
+
177
+ // Bg worker finishes.
178
+ driver.ingest({ kind: 'sub_agent_turn_end', agentId: 'sa1' }, CHAT)
179
+ expect(completions.length).toBe(1)
180
+ })
181
+
182
+ it('terminal state reached after promotion fires completion correctly', () => {
183
+ const { driver, completions } = harness()
184
+ const CHAT = 'c3'
185
+ driver.ingest(enqueue(CHAT), null)
186
+ driver.ingest(
187
+ { kind: 'tool_use', toolName: 'Agent', toolUseId: 'tu1', input: { prompt: 'bg', run_in_background: true } },
188
+ CHAT,
189
+ )
190
+ driver.ingest({ kind: 'sub_agent_started', agentId: 'sa1', firstPromptText: 'bg' }, CHAT)
191
+ driver.ingest({ kind: 'sub_agent_tool_use', agentId: 'sa1', toolUseId: 't1', toolName: 'Write', input: { file_path: '/out.ts' } }, CHAT)
192
+
193
+ driver.ingest({ kind: 'tool_use', toolName: 'mcp__switchroom-telegram__reply' }, CHAT)
194
+ driver.recordOutboundDelivered(CHAT)
195
+ driver.ingest({ kind: 'turn_end', durationMs: 100 }, CHAT)
196
+ // Peek before sub_agent_turn_end so fleet is still live.
197
+ expect(driver.peekFleet(CHAT)!.get('sa1')!.status).toBe('running')
198
+ driver.ingest({ kind: 'sub_agent_turn_end', agentId: 'sa1' }, CHAT)
199
+ expect(completions.length).toBe(1)
200
+ })
201
+ })
@@ -2,14 +2,19 @@
2
2
  * Tests for #309: boot card uses the agent slug (not display name) for
3
3
  * systemd unit probes.
4
4
  *
5
- * Root cause: probeAgentProcess and probeCronTimers were called with
6
- * opts.agentName (the persona display name, e.g. "Klanker") instead of
7
- * the lowercase slug ("klanker"). systemctl returns LoadState=not-found
8
- * for the capitalised name because unit files are always lowercase.
5
+ * Root cause: probeAgentProcess was called with opts.agentName (the
6
+ * persona display name, e.g. "Klanker") instead of the lowercase slug
7
+ * ("klanker"). systemctl returns LoadState=not-found for the
8
+ * capitalised name because unit files are always lowercase.
9
9
  *
10
10
  * Fix: RunProbesOpts.agentSlug carries the slug separately; runAllProbes
11
11
  * passes opts.agentSlug (falling back to opts.agentName for compat) to
12
- * both probeAgentProcess and probeCronTimers.
12
+ * probeAgentProcess.
13
+ *
14
+ * (Pre-Phase-4 this also covered probeCronTimers; that probe was
15
+ * replaced by probeScheduler when the singleton switchroom-cron
16
+ * container was retired and cron moved in-container — the slug now
17
+ * only matters for the systemd Agent probe.)
13
18
  */
14
19
 
15
20
  import { describe, it, expect } from 'vitest'
@@ -102,35 +107,6 @@ describe('#309: runAllProbes — slug vs display name for systemd calls', () =>
102
107
  }
103
108
  })
104
109
 
105
- it('probeCronTimers target is switchroom-<slug>-cron-*, not switchroom-<displayName>-cron-*', async () => {
106
- const tmpDir = makeTmpAgentDir()
107
- try {
108
- const { fn: execFileMock, calls } = makeDispatchingExecFile('klanker')
109
-
110
- await runAllProbes({
111
- agentName: 'Klanker',
112
- agentSlug: 'klanker',
113
- version: 'v0.3.0',
114
- agentDir: tmpDir,
115
- gatewayInfo: { pid: 12345, startedAtMs: Date.now() },
116
- fetchImpl: async () => new Response('', { status: 200 }),
117
- settleWindowMs: 0,
118
- agentLiveWindowMs: 0,
119
- probeExecFileImpl: execFileMock,
120
- })
121
-
122
- // probeCronTimers calls: systemctl --user list-timers switchroom-<name>-cron-*
123
- const cronProbeCall = calls.find(c =>
124
- c.cmd === 'systemctl' && c.args.includes('list-timers'),
125
- )
126
- expect(cronProbeCall, 'probeCronTimers must call systemctl list-timers').toBeDefined()
127
- const cronGlob = cronProbeCall!.args.find(a => a.includes('cron'))
128
- expect(cronGlob).toBe('switchroom-klanker-cron-*')
129
- } finally {
130
- rmSync(tmpDir, { recursive: true })
131
- }
132
- })
133
-
134
110
  it('no systemctl call uses the capitalised display name as the unit target', async () => {
135
111
  const tmpDir = makeTmpAgentDir()
136
112
  try {
@@ -94,27 +94,28 @@ describe('renderBootCard — degraded conditions', () => {
94
94
  expect(out).not.toContain('Agent</b>')
95
95
  expect(out).not.toContain('Gateway</b>')
96
96
  expect(out).not.toContain('Hindsight</b>')
97
- expect(out).not.toContain('Crons</b>')
97
+ expect(out).not.toContain('Scheduler</b>')
98
98
  })
99
99
 
100
100
  it('orders probe rows in PROBE_KEYS canonical order regardless of object iteration', () => {
101
101
  // Insert in a non-canonical order; renderer must still output Account first,
102
- // then Hindsight, then Crons (matching PROBE_KEYS).
102
+ // then Hindsight, then Scheduler (matching PROBE_KEYS — Phase 4 renamed
103
+ // crons → scheduler when the in-container agent-scheduler took over).
103
104
  const out = renderBootCard({
104
105
  agentName: 'a',
105
106
  version: 'v',
106
107
  probes: {
107
- crons: { status: 'fail', label: 'Crons', detail: 'bad' },
108
+ scheduler: { status: 'fail', label: 'Scheduler', detail: 'sidecar not running' },
108
109
  hindsight: { status: 'fail', label: 'Hindsight', detail: 'unreachable' },
109
110
  account: { status: 'degraded', label: 'Account', detail: 'expiring' },
110
111
  },
111
112
  })
112
113
  const accountIdx = out.indexOf('Account</b>')
113
114
  const hindsightIdx = out.indexOf('Hindsight</b>')
114
- const cronsIdx = out.indexOf('Crons</b>')
115
+ const schedulerIdx = out.indexOf('Scheduler</b>')
115
116
  expect(accountIdx).toBeGreaterThan(-1)
116
117
  expect(hindsightIdx).toBeGreaterThan(accountIdx)
117
- expect(cronsIdx).toBeGreaterThan(hindsightIdx)
118
+ expect(schedulerIdx).toBeGreaterThan(hindsightIdx)
118
119
  })
119
120
 
120
121
  it('crash + degraded probe = both rows render', () => {