switchroom 0.15.45 → 0.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +56 -15
  2. package/dist/auth-broker/index.js +383 -97
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +7 -4
  5. package/dist/cli/notion-write-pretool.mjs +35 -4
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/switchroom.js +2894 -841
  9. package/dist/host-control/main.js +2685 -207
  10. package/dist/vault/approvals/kernel-server.js +7453 -7413
  11. package/dist/vault/broker/server.js +11428 -11388
  12. package/examples/minimal.yaml +1 -0
  13. package/examples/switchroom.yaml +1 -0
  14. package/package.json +3 -3
  15. package/profiles/_base/start.sh.hbs +97 -1
  16. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  17. package/profiles/default/CLAUDE.md.hbs +0 -19
  18. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  19. package/telegram-plugin/answer-stream-flag.ts +12 -49
  20. package/telegram-plugin/answer-stream.ts +5 -150
  21. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  22. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  23. package/telegram-plugin/context-exhaustion.ts +12 -0
  24. package/telegram-plugin/demo-mask.ts +154 -0
  25. package/telegram-plugin/dist/bridge/bridge.js +55 -12
  26. package/telegram-plugin/dist/gateway/gateway.js +2938 -977
  27. package/telegram-plugin/dist/server.js +55 -12
  28. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  29. package/telegram-plugin/draft-stream.ts +47 -410
  30. package/telegram-plugin/final-answer-detect.ts +17 -12
  31. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  32. package/telegram-plugin/format.ts +56 -19
  33. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  34. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  35. package/telegram-plugin/gateway/auth-command.ts +70 -14
  36. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  37. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  38. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  39. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  40. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  41. package/telegram-plugin/gateway/effort-command.ts +8 -3
  42. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  43. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  44. package/telegram-plugin/gateway/gateway.ts +1857 -292
  45. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  46. package/telegram-plugin/gateway/model-command.ts +115 -4
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-command.test.ts +134 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -152,12 +152,13 @@ describe('handleInjectCommand — outcome=ok_no_output', () => {
152
152
  expect(replies[0].text).toContain('empty capture')
153
153
  })
154
154
 
155
- it('bare ack with accent=done when expectsOutput=false and no silentNote (/clear)', async () => {
155
+ it('uses silentNote for /clear (context cleared fresh slate)', async () => {
156
156
  const inject = vi.fn().mockResolvedValue(noOutputResult('/clear'))
157
157
  const { deps, replies } = makeDeps({ getArgs: () => '/clear', inject })
158
158
  await handleInjectCommand(fakeCtx(), deps)
159
159
  expect(replies[0].opts?.accent).toBe('done')
160
160
  expect(replies[0].text).toContain('<code>/clear</code>')
161
+ expect(replies[0].text).toContain('context cleared')
161
162
  expect(replies[0].text).not.toContain('empty capture')
162
163
  expect(replies[0].text).not.toContain('<pre>')
163
164
  })
@@ -233,6 +233,53 @@ export interface ModelMenuReply {
233
233
  export const MODEL_CALLBACK_PREFIX = 'mdl:'
234
234
  const MODEL_CALLBACK_SELECT = 'mdl:s:'
235
235
  export const MODEL_CALLBACK_REFRESH = 'mdl:r'
236
+ /** Callback prefix for sr-* (LiteLLM non-Anthropic) model selection. */
237
+ export const MODEL_CALLBACK_SR = 'mdl:sr:'
238
+
239
+ /**
240
+ * Friendly display names for sr-* synthetic model names. An sr-* model in
241
+ * LiteLLM has no entry in `model_group_settings.*.forward_client_headers_to_llm_api`
242
+ * so the Anthropic OAuth credential is NEVER forwarded — safe to route to
243
+ * OpenRouter. Names here are display-only; the raw `sr-*` id is what gets
244
+ * injected into the agent's session. See reference/rfcs/litellm-max-subscription-invariants.md § I6.
245
+ */
246
+ export const SR_MODEL_LABELS: Record<string, string> = {
247
+ 'sr-gemini-2.5-pro': 'Gemini 2.5 Pro',
248
+ 'sr-gemini-2.5-flash': 'Gemini 2.5 Flash',
249
+ 'sr-deepseek-r1': 'DeepSeek R1',
250
+ 'sr-deepseek-v3': 'DeepSeek V3',
251
+ 'sr-glm-5': 'GLM-5',
252
+ }
253
+
254
+ function srFriendlyLabel(srName: string): string {
255
+ return SR_MODEL_LABELS[srName] ?? srName.replace(/^sr-/, '').replace(/-/g, ' ')
256
+ }
257
+
258
+ /**
259
+ * Split picker-discovered options into native Claude options and sr-*
260
+ * (LiteLLM non-Anthropic) options. Options with "/" in the label or
261
+ * other non-native prefixes (e.g., "openrouter/...", "gpt-4") are
262
+ * silently dropped — they're internal LiteLLM routing paths, not
263
+ * user-facing switching targets.
264
+ */
265
+ export function classifyDiscoveredOptions(options: ModelPickerOption[]): {
266
+ claude: ModelPickerOption[]
267
+ sr: ModelPickerOption[]
268
+ } {
269
+ return {
270
+ // Native Claude picker labels start with an uppercase letter (e.g.
271
+ // "Default (recommended)", "Opus", "Sonnet") or with "claude-" for full
272
+ // model IDs. This excludes sr-* names, internal routing paths
273
+ // ("openrouter/..."), and non-Claude models exposed by GATEWAY_MODEL_DISCOVERY
274
+ // ("gpt-4", "gpt-4o", "voyage-law-2", etc.) — those are LiteLLM internals
275
+ // not meant as user-facing switching targets.
276
+ claude: options.filter(
277
+ (o) => !o.label.startsWith('sr-') && !o.label.includes('/') &&
278
+ (/^[A-Z]/.test(o.label) || o.label.startsWith('claude-')),
279
+ ),
280
+ sr: options.filter((o) => o.label.startsWith('sr-')),
281
+ }
282
+ }
236
283
 
237
284
  export function modelSelectCallbackData(label: string): string {
238
285
  // Identity is the label's hash, not its index — a tap re-discovers
@@ -249,15 +296,29 @@ function busyReply(deps: Pick<ModelMenuDeps, 'escapeHtml'>): ModelMenuReply {
249
296
  }
250
297
  }
251
298
 
252
- function menuKeyboard(options: ModelPickerOption[]): ModelMenuKeyboardButton[][] {
299
+ function menuKeyboard(
300
+ claudeOptions: ModelPickerOption[],
301
+ srOptions: ModelPickerOption[],
302
+ ): ModelMenuKeyboardButton[][] {
253
303
  // One option per row (labels + ✔ render cleanly at full width on
254
304
  // mobile), refresh on a trailing row.
255
- const rows: ModelMenuKeyboardButton[][] = options.map((o) => [
305
+ const rows: ModelMenuKeyboardButton[][] = claudeOptions.map((o) => [
256
306
  {
257
307
  text: o.current ? `✅ ${o.label}` : o.label,
258
308
  callback_data: modelSelectCallbackData(o.label),
259
309
  },
260
310
  ])
311
+ // sr-* models are non-Anthropic (routed via LiteLLM → OpenRouter).
312
+ // Selection uses text-inject rather than cursor-nav — more reliable
313
+ // when the picker has many models (GATEWAY_MODEL_DISCOVERY=1).
314
+ for (const o of srOptions) {
315
+ rows.push([
316
+ {
317
+ text: `🌐 ${srFriendlyLabel(o.label)}`,
318
+ callback_data: `${MODEL_CALLBACK_SR}${o.label}`,
319
+ },
320
+ ])
321
+ }
261
322
  rows.push([{ text: '🔄 Refresh', callback_data: MODEL_CALLBACK_REFRESH }])
262
323
  return rows
263
324
  }
@@ -292,7 +353,8 @@ export async function buildModelMenu(
292
353
  // or a prior session switch). Labelling the ✔ row "Now:" was misleading —
293
354
  // it could read "Opus 4.8" while the live session is on Fable. Call it what
294
355
  // it is, and tell the operator a switch applies to the live session.
295
- const current = discovered.options.find((o) => o.current)
356
+ const { claude: claudeOptions, sr: srOptions } = classifyDiscoveredOptions(discovered.options)
357
+ const current = claudeOptions.find((o) => o.current)
296
358
  const lines: string[] = [`<b>Model — ${deps.escapeHtml(deps.getAgentName())}</b>`]
297
359
  if (discovered.dismissFailed) {
298
360
  lines.push('⚠️ <i>The picker may still be open on the agent pane — check it before switching.</i>')
@@ -305,9 +367,10 @@ export async function buildModelMenu(
305
367
  }
306
368
  if (quota) lines.push(`Quota: ${deps.escapeHtml(quota)}`)
307
369
  lines.push('', 'Tap a model to switch the <b>live session</b>:')
370
+ if (srOptions.length > 0) lines.push('🌐 = non-Anthropic via LiteLLM (session only)')
308
371
  lines.push(PERSIST_NOTE)
309
372
 
310
- return { text: lines.join('\n'), html: true, keyboard: menuKeyboard(discovered.options) }
373
+ return { text: lines.join('\n'), html: true, keyboard: menuKeyboard(claudeOptions, srOptions) }
311
374
  }
312
375
 
313
376
  export interface ModelCallbackOutcome {
@@ -346,6 +409,54 @@ export async function handleModelMenuCallback(
346
409
  if (data === MODEL_CALLBACK_REFRESH) {
347
410
  return { answer: 'Refreshed', reply: await buildModelMenu(deps) }
348
411
  }
412
+
413
+ // sr-* model tap: text-inject `/model sr-<name>` rather than cursor-nav.
414
+ // Text-inject is more reliable when the picker has many models; sr-* names
415
+ // are safe (no entry in model_group_settings → no OAuth forwarding). See I6.
416
+ if (data.startsWith(MODEL_CALLBACK_SR)) {
417
+ const srName = data.slice(MODEL_CALLBACK_SR.length)
418
+ if (!isValidModelArg(srName)) {
419
+ return { answer: 'Invalid model name', reply: await buildModelMenu(deps) }
420
+ }
421
+ if (deps.isBusy()) {
422
+ return {
423
+ answer: '⏳ Agent is mid-turn — tap again when it’s idle',
424
+ reply: busyReply(deps),
425
+ toastOnly: true,
426
+ }
427
+ }
428
+ let srResult: InjectResult
429
+ try {
430
+ srResult = await deps.inject(deps.getAgentName(), `/model ${srName}`)
431
+ } catch (err) {
432
+ const msg = err instanceof Error ? err.message : String(err)
433
+ return {
434
+ answer: 'Switch failed',
435
+ reply: await menuWithBanner(deps, `❌ Switch to <b>${deps.escapeHtml(srName)}</b> failed: ${deps.escapeHtml(msg)}`),
436
+ }
437
+ }
438
+ if (srResult.outcome === 'ok') {
439
+ const friendlyName = srFriendlyLabel(srName)
440
+ const confirmation =
441
+ srResult.output
442
+ .split('\n')
443
+ .map((l) => l.trim())
444
+ .find((l) => /set model|switched/i.test(l)) ?? `Switched to ${friendlyName} (session)`
445
+ return {
446
+ answer: confirmation,
447
+ reply: await menuWithBanner(deps, `✅ ${deps.escapeHtml(confirmation)}`),
448
+ selectedModel: srName,
449
+ }
450
+ }
451
+ return {
452
+ answer: 'Switch failed',
453
+ reply: await menuWithBanner(
454
+ deps,
455
+ `❌ Switch to <b>${deps.escapeHtml(srFriendlyLabel(srName))}</b> failed — agent may be mid-turn`,
456
+ ),
457
+ }
458
+ }
459
+
349
460
  if (!data.startsWith(MODEL_CALLBACK_SELECT)) {
350
461
  return { answer: 'Unknown action', reply: await buildModelMenu(deps) }
351
462
  }
@@ -26,7 +26,7 @@ describe("validateMs365Preview", () => {
26
26
  toolName: "mcp__ms-365__upload-file-content",
27
27
  itemId: "01ABCDEFG",
28
28
  itemDisplayName: "Q3-Strategy.docx",
29
- accountEmail: "ken@outlook.com",
29
+ accountEmail: "bob@example.com",
30
30
  };
31
31
 
32
32
  it("accepts a minimal valid preview", () => {
@@ -93,7 +93,7 @@ describe("buildMs365CardText", () => {
93
93
  toolName: "mcp__ms-365__upload-file-content",
94
94
  itemId: "01ABCDEFG",
95
95
  itemDisplayName: "Q3-Strategy.docx",
96
- accountEmail: "ken@outlook.com",
96
+ accountEmail: "bob@example.com",
97
97
  };
98
98
 
99
99
  it("includes agent, tool, item, account", () => {
@@ -102,7 +102,7 @@ describe("buildMs365CardText", () => {
102
102
  expect(text).toContain("ms-365__upload-file-content");
103
103
  expect(text).toContain("Q3-Strategy.docx");
104
104
  expect(text).toContain("01ABCDEFG");
105
- expect(text).toContain("ken@outlook.com");
105
+ expect(text).toContain("bob@example.com");
106
106
  });
107
107
 
108
108
  it("omits ID line for new files", () => {
@@ -183,7 +183,7 @@ function makeMsg(overrides: Partial<RequestMs365ApprovalMessage> = {}): RequestM
183
183
  toolName: "mcp__ms-365__upload-file-content",
184
184
  itemId: "01ABC",
185
185
  itemDisplayName: "Strategy.docx",
186
- accountEmail: "ken@outlook.com",
186
+ accountEmail: "bob@example.com",
187
187
  },
188
188
  ttlMs: 5 * 60 * 1000,
189
189
  ...overrides,
@@ -0,0 +1,72 @@
1
+ /**
2
+ * represent-guard.ts — the duplicate-represent guard for the obligation sweep,
3
+ * extracted from obligationSweep so the "satisfied-but-misdetected obligation
4
+ * must NOT re-fire" decision (#2472) is EXECUTABLE in a pure unit test.
5
+ *
6
+ * The bug (#2472): obligation_represent re-fired for the same origin_turn_id even
7
+ * after the agent had already answered represent_count=1 with a reply tool call,
8
+ * producing a second near-identical message. The reply landed but its routing did
9
+ * not resolve back to the origin, so the ledger's normal close path missed it —
10
+ * and the represent branch (unlike the escalate branch) had no belt-and-braces
11
+ * outbound-history check before re-firing.
12
+ *
13
+ * This helper is the decision the sweep's represent branch now consults. PURE —
14
+ * no Telegram, no SQLite; the gateway injects `hasOutboundDeliveredSince` as a
15
+ * predicate. The single load-bearing subtlety lives here in one testable place:
16
+ *
17
+ * The cutoff is `lastRepresentedAt` (the time of the PREVIOUS represent), NOT
18
+ * `openedAt`. On the FIRST represent (`lastRepresentedAt` undefined) the guard
19
+ * is a no-op, so the genuine "agent wrote a plain-text answer and never called
20
+ * the reply tool" case still re-presents ONCE. Only the SECOND-and-later
21
+ * represent is gated — exactly where a reply that landed BETWEEN fires must
22
+ * suppress the re-ask. A reply that predates the last represent (e.g. the
23
+ * original plain-text answer) does not count, because it is not evidence the
24
+ * most recent represent was answered.
25
+ */
26
+
27
+ /** The obligation fields the represent guard inspects. */
28
+ export interface RepresentGuardObligation {
29
+ readonly originTurnId: string
30
+ readonly chatId: string
31
+ readonly threadId?: number
32
+ /** Wall-clock ms this obligation was most recently re-presented, if ever. */
33
+ readonly lastRepresentedAt?: number
34
+ }
35
+
36
+ export interface RepresentGuardDeps {
37
+ /** True when history is available to query (else the guard never suppresses). */
38
+ historyEnabled: boolean
39
+ /**
40
+ * Has a genuine assistant reply been delivered to this chat (optionally scoped
41
+ * to thread) at or after `sinceMs`? Wraps history.hasOutboundDeliveredSince.
42
+ *
43
+ * For the represent guard the gateway binds this with a LOW minChars (#2474
44
+ * follow-up): ANY real reply to the turn — even a terse "Yes — done." — means
45
+ * the user was answered and the duplicate represent must be suppressed. The
46
+ * 200-char "substantive" proxy is the ESCALATE branch's concern, not this one;
47
+ * applying it here left short-but-real replies failing to suppress the duplicate
48
+ * (the #2472 gap). The underlying query only counts recordOutbound rows, so
49
+ * typing indicators / progress-card edits are never miscounted as a reply.
50
+ */
51
+ hasOutboundDeliveredSince: (chatId: string, sinceMs: number, threadId?: number) => boolean
52
+ }
53
+
54
+ /**
55
+ * Decide whether a represent for `o` should be SUPPRESSED because the agent has
56
+ * already delivered a reply since the obligation was last re-presented.
57
+ *
58
+ * Returns true ⇒ the obligation is satisfied-but-misdetected; the caller closes
59
+ * it silently and does NOT re-fire. Returns false ⇒ proceed with the represent
60
+ * (first represent always proceeds; a represent with no reply since the last one
61
+ * proceeds; an unavailable history proceeds — never suppress on doubt).
62
+ */
63
+ export function shouldSuppressRepresent(
64
+ o: RepresentGuardObligation,
65
+ deps: RepresentGuardDeps,
66
+ ): boolean {
67
+ if (!deps.historyEnabled) return false
68
+ // First represent: nothing to compare against — let the single re-ask fire so
69
+ // the genuine plain-text-no-reply case is preserved.
70
+ if (o.lastRepresentedAt == null) return false
71
+ return deps.hasOutboundDeliveredSince(o.chatId, o.lastRepresentedAt, o.threadId)
72
+ }
@@ -12,6 +12,7 @@ function turn(overrides: Partial<StatusSurfaceTurnView> = {}): StatusSurfaceTurn
12
12
  sessionThreadId: undefined,
13
13
  startedAt: 1_780_000_000_000,
14
14
  toolCallCount: 0,
15
+ labeledToolCount: 0,
15
16
  activityMessageId: null,
16
17
  activityEverOpened: false,
17
18
  activityDrainFailures: 0,
@@ -35,7 +36,7 @@ describe('formatTurnLifecycle', () => {
35
36
  const line = formatTurnLifecycle(
36
37
  'clear',
37
38
  'turn_end',
38
- turn({ sessionThreadId: 3, toolCallCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
39
+ turn({ sessionThreadId: 3, toolCallCount: 5, labeledToolCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
39
40
  1_780_000_300_000, // +300s
40
41
  )
41
42
  expect(line).toContain('turn-lifecycle clear reason=turn_end')
@@ -63,7 +64,7 @@ describe('formatTurnLifecycle', () => {
63
64
  describe('detectStatusSurfaceDegraded', () => {
64
65
  it('flags a turn that did tool work but never opened the feed due to send failures (the resume-400 signature)', () => {
65
66
  const d = detectStatusSurfaceDegraded(
66
- turn({ toolCallCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
67
+ turn({ toolCallCount: 3, labeledToolCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
67
68
  )
68
69
  expect(d).not.toBeNull()
69
70
  expect(d!.reason).toBe('feed-never-opened')
@@ -75,7 +76,7 @@ describe('detectStatusSurfaceDegraded', () => {
75
76
  // the sticky activityEverOpened keeps this from false-positiving.
76
77
  expect(
77
78
  detectStatusSurfaceDegraded(
78
- turn({ toolCallCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
79
+ turn({ toolCallCount: 4, labeledToolCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
79
80
  ),
80
81
  ).toBeNull()
81
82
  })
@@ -83,7 +84,7 @@ describe('detectStatusSurfaceDegraded', () => {
83
84
  it('does NOT flag a turn that never attempted a feed send (e.g. ack-first suppression)', () => {
84
85
  expect(
85
86
  detectStatusSurfaceDegraded(
86
- turn({ toolCallCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
87
+ turn({ toolCallCount: 2, labeledToolCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
87
88
  ),
88
89
  ).toBeNull()
89
90
  })
@@ -30,6 +30,17 @@ export interface StatusSurfaceTurnView {
30
30
  sessionThreadId: number | undefined
31
31
  startedAt: number
32
32
  toolCallCount: number
33
+ /**
34
+ * Count of tool_label events that passed the surface-tool guard this turn —
35
+ * i.e. the number of surfaced (non-surface, non-suppressed) tool steps. This
36
+ * is the deterministic single source of truth for the `tools=` lifecycle
37
+ * field and the `✓ N steps` activity-feed total. Incremented in
38
+ * `case 'tool_label':` AFTER the `isTelegramSurfaceTool` guard so that
39
+ * reply/stream_reply/edit_message/react are never counted. send_typing and
40
+ * sync_retain are suppressed at the hook level (computeLabel returns null)
41
+ * and never arrive as tool_label events, so they are excluded automatically.
42
+ */
43
+ labeledToolCount: number
33
44
  /** Live activity-feed message id; null until the first send captures it. */
34
45
  activityMessageId: number | null
35
46
  /**
@@ -67,7 +78,7 @@ export function formatTurnLifecycle(
67
78
  return (
68
79
  `turn-lifecycle ${action} reason=${reason} turnId=${t.turnId} ` +
69
80
  `chat=${t.sessionChatId} thread=${t.sessionThreadId ?? '-'} ` +
70
- `tools=${t.toolCallCount} activityMsgId=${t.activityMessageId ?? 'none'} ` +
81
+ `tools=${t.labeledToolCount} activityMsgId=${t.activityMessageId ?? 'none'} ` +
71
82
  `feedOpened=${t.activityEverOpened} drainFailures=${t.activityDrainFailures} ` +
72
83
  `replyCalled=${t.replyCalled} finalAnswer=${t.finalAnswerDelivered} age_ms=${ageMs}`
73
84
  )
@@ -89,13 +100,13 @@ export function formatTurnLifecycle(
89
100
  export function detectStatusSurfaceDegraded(
90
101
  t: StatusSurfaceTurnView,
91
102
  ): { reason: string; detail: string } | null {
92
- if (t.toolCallCount === 0) return null
103
+ if (t.labeledToolCount === 0) return null
93
104
  if (t.activityEverOpened) return null
94
105
  if (t.activityDrainFailures === 0) return null
95
106
  return {
96
107
  reason: 'feed-never-opened',
97
108
  detail:
98
- `tools=${t.toolCallCount} drainFailures=${t.activityDrainFailures} ` +
109
+ `tools=${t.labeledToolCount} drainFailures=${t.activityDrainFailures} ` +
99
110
  `activityMsgId=none — the live activity feed failed every send this turn ` +
100
111
  `(card was dark despite tool work)`,
101
112
  }
@@ -557,11 +557,26 @@ export function getRecentOutboundCount(
557
557
  * SUBSTANTIVE: we never suppress escalation on a bare ack ("on it", "give me a
558
558
  * sec") — an agent that acks then ghosts must still escalate. The history schema
559
559
  * does not store a done/substantive flag, so we approximate: a row counts only
560
- * when LENGTH(text) >= 200 (the FINAL_ANSWER_MIN_CHARS constant from
561
- * final-answer-detect.ts). This is false-negative-safe: a genuine substantive
562
- * answer that happens to be < 200 chars will still fire an escalation, which is
563
- * the conservative (safe) outcome. A schema column would be more precise but is
564
- * disproportionate for this predicate; the reviewer accepted this approach.
560
+ * when LENGTH(text) >= `minChars` (default 200, the FINAL_ANSWER_MIN_CHARS
561
+ * constant from final-answer-detect.ts). This is false-negative-safe for the
562
+ * escalate branch: a genuine substantive answer that happens to be < 200 chars
563
+ * will still fire an escalation, which is the conservative (safe) outcome. A
564
+ * schema column would be more precise but is disproportionate for this predicate;
565
+ * the reviewer accepted this approach.
566
+ *
567
+ * `minChars` semantics (decoupled per caller, #2474 follow-up):
568
+ * - The ESCALATE branch (Fix 4) keeps the 200 default: it must not stand down an
569
+ * escalation on a mere ack, so it still demands a substantive-LENGTH outbound.
570
+ * - The duplicate-represent GUARD (#2472) passes a LOW value (1): for that path
571
+ * ANY genuine assistant reply to the turn — even a terse "Yes — done." or
572
+ * "Merged, all three landed." — means the user was answered, so the duplicate
573
+ * represent must be suppressed. The 200-char proxy was borrowed from the
574
+ * escalate branch and is WRONG there: a short-but-real reply left the
575
+ * duplicate-represent bug (#2472) alive. This is safe because the rows this
576
+ * query counts (role='assistant') are ONLY ever written by recordOutbound —
577
+ * i.e. real bot→user messages (reply / stream_reply / silent-anchor content /
578
+ * command acks). Typing indicators and progress-card edits NEVER call
579
+ * recordOutbound, so they cannot be miscounted as "the user was answered".
565
580
  *
566
581
  * `threadId` semantics:
567
582
  * - undefined → any message in the chat regardless of thread (DMs + supergroups)
@@ -575,16 +590,23 @@ export function hasOutboundDeliveredSince(
575
590
  chatId: string,
576
591
  sinceMs: number,
577
592
  threadId?: number | null,
593
+ minChars = 200,
578
594
  ): boolean {
579
595
  try {
580
596
  const cutoffSec = Math.floor(sinceMs / 1000)
581
- const params: unknown[] = [chatId, cutoffSec]
582
- // LENGTH(text) >= 200 scopes to substantive replies only never suppress
583
- // escalation on a mere ack. Mirrors FINAL_ANSWER_MIN_CHARS (200) from
584
- // final-answer-detect.ts; the `done` flag is not stored in the history
585
- // schema, so length is the closest available proxy.
597
+ // Clamp to >= 1 so the predicate never counts an empty/whitespace-only row
598
+ // (a degenerate outbound) as a delivered reply, even if a caller passes 0.
599
+ const minLen = Math.max(1, Math.floor(minChars))
600
+ const params: unknown[] = [chatId, cutoffSec, minLen]
601
+ // LENGTH(text) >= minChars scopes to replies of at least the caller's
602
+ // threshold. ESCALATE passes the default 200 (substantive-only — never stand
603
+ // down on a mere ack). The duplicate-represent GUARD passes a low value so a
604
+ // terse-but-real reply counts (#2472/#2474). The `done` flag is not stored in
605
+ // the history schema, so length is the closest available proxy; rows here are
606
+ // only ever recordOutbound writes (real bot→user sends), so progress-card
607
+ // edits / typing indicators are structurally excluded.
586
608
  let sql =
587
- "SELECT 1 FROM messages WHERE chat_id = ? AND role = 'assistant' AND ts >= ? AND LENGTH(text) >= 200"
609
+ "SELECT 1 FROM messages WHERE chat_id = ? AND role = 'assistant' AND ts >= ? AND LENGTH(text) >= ?"
588
610
  if (threadId !== undefined) {
589
611
  if (threadId === null) {
590
612
  sql += ' AND thread_id IS NULL'
@@ -297,6 +297,32 @@ async function main() {
297
297
  const markerPath = findNearestMarker(targetDir)
298
298
  if (markerPath == null) process.exit(0)
299
299
 
300
+ // Own-agent marker guard: suppress the agent's own CLAUDE.md / AGENTS.md /
301
+ // AGENT.md so it is never injected as additionalContext. The agent's own
302
+ // marker is already in the system prompt (baked by start.sh via
303
+ // --append-system-prompt); re-injecting it wastes ~30KB per session.
304
+ //
305
+ // The existing isUnderAgentWorkspace guard only blocks paths under the
306
+ // agent's workspace/ subdirectory. It misses the agent's start cwd
307
+ // (/home/.../.switchroom/agents/<name>) because that guard computes against
308
+ // workspace/, not agentDir itself. This marker-path check closes that gap.
309
+ //
310
+ // We do NOT add a "targetDir under startCwd" directory guard because that
311
+ // would wrongly suppress a legitimate worktree repo the operator has checked
312
+ // out inside the agent dir (e.g. agentDir/workspace/ repos) — the directory
313
+ // guard would catch those too. The marker-path equality check is surgical:
314
+ // only the exact CLAUDE.md / AGENTS.md / AGENT.md at agentDir root is blocked;
315
+ // any nested repo's marker injects normally.
316
+ if (agentName) {
317
+ const startCwd = normalize(
318
+ process.env.SWITCHROOM_AGENT_START_CWD ??
319
+ join(home, '.switchroom', 'agents', agentName),
320
+ )
321
+ for (const m of MARKER_FILES) {
322
+ if (markerPath === join(startCwd, m)) process.exit(0)
323
+ }
324
+ }
325
+
300
326
  const state = readSessionState(sessionId)
301
327
 
302
328
  // Already-loaded dedup — the load-once-per-repo-per-session invariant.
@@ -313,6 +313,11 @@ function updateRow(dbPath, { id, status, resultSummary, now, asyncLaunch }, done
313
313
  setImmediate(() => {
314
314
  try {
315
315
  const db = new SnapDatabaseSync(snapDbPath)
316
+ // Concurrency: per-connection busy_timeout so this hook's writes
317
+ // wait-and-retry instead of failing with SQLITE_BUSY under concurrent
318
+ // sub-agent dispatch. Set on the real open so BOTH the node:sqlite
319
+ // (production) and bun:sqlite branches are armed (#2535 review).
320
+ try { db.exec('PRAGMA busy_timeout = 5000') } catch { /* best-effort */ }
316
321
  const row = db.prepare(SELECT_SQL).get(snapId)
317
322
  const isBackground = row != null && row.background === 1
318
323
  if (isBackground) {
@@ -184,6 +184,14 @@ function writeRow(dbPath, { id, parentSessionId, parentTurnKey, agentType, descr
184
184
  setImmediate(() => {
185
185
  try {
186
186
  const db = new SnapDatabaseSync(snapDbPath)
187
+ // Concurrency: this hook writes registry.db from a separate process
188
+ // that contends with the gateway's subagent-watcher + the PostToolUse
189
+ // hook. Without a busy_timeout, the contending write fails IMMEDIATELY
190
+ // with SQLITE_BUSY ("database is locked") when several sub-agents
191
+ // dispatch at once, dropping the row → NULL jsonl_agent_id/parent_turn_key.
192
+ // Per-connection PRAGMA, set on the real open so BOTH the node:sqlite
193
+ // (production) and bun:sqlite branches are armed.
194
+ try { db.exec('PRAGMA busy_timeout = 5000') } catch { /* best-effort */ }
187
195
  db.exec(snapSchemaSql)
188
196
  // Migrate older DBs that pre-date jsonl_agent_id.
189
197
  const hasJsonlCol = db.prepare(snapMigrateSql).get()
@@ -149,30 +149,42 @@ export function computeLabel(toolName, input) {
149
149
  // the progress card path that used to surface this was retired
150
150
  // when `progressDriver` was nulled out in #1122 PR3.
151
151
  const slug = clip(asText(i.skill), 64)
152
+ // Empty-slug Skill stays suppressed (degenerate/malformed call): the
153
+ // liveness feed-open backstops visibility for a tool-less turn, so this
154
+ // does not need a label. Keeps the #2111 sidecar contract.
152
155
  return slug ? `Running skill ${slug}` : null
153
156
  }
154
157
  }
155
158
 
156
159
  // MCP tools.
157
160
  if (typeof toolName === 'string' && toolName.startsWith('mcp__')) {
158
- // Explicit labels / suppressions for the built-in servers.
161
+ // Telegram-plugin tools: matched by the key-agnostic regex so renames/forks work.
162
+ // Strip the `mcp__<server>__` prefix to get just the tool suffix.
163
+ const TELEGRAM_PREFIX_RE = /^mcp__[^_].*?telegram__/
164
+ const telegramMatch = TELEGRAM_PREFIX_RE.exec(toolName)
165
+ if (telegramMatch) {
166
+ const suffix = toolName.slice(telegramMatch[0].length)
167
+ // Surface tools (reply, stream_reply, edit_message, react) are the
168
+ // conversation itself — suppress them from the activity feed entirely.
169
+ // Mirrors isTelegramSurfaceTool in tool-names.ts.
170
+ if (
171
+ suffix === 'reply' ||
172
+ suffix === 'stream_reply' ||
173
+ suffix === 'edit_message' ||
174
+ suffix === 'react'
175
+ ) return null
176
+ if (suffix === 'get_recent_messages') return 'Reading chat history'
177
+ // send_typing and all other surface/control tools: suppress.
178
+ return null
179
+ }
180
+ // Explicit labels / suppressions for the hindsight server.
159
181
  switch (toolName) {
160
- case 'mcp__switchroom-telegram__reply':
161
- case 'mcp__switchroom-telegram__stream_reply':
162
- return 'Replying'
163
- case 'mcp__switchroom-telegram__react': {
164
- const emoji = clip(asText(i.emoji), 8)
165
- return emoji ? `Reacting ${emoji}` : 'Reacting'
166
- }
167
- case 'mcp__switchroom-telegram__get_recent_messages':
168
- return 'Reading chat history'
169
182
  case 'mcp__hindsight__recall':
170
183
  case 'mcp__hindsight__reflect':
171
184
  return 'Searching memory'
172
185
  case 'mcp__hindsight__retain':
173
186
  return 'Saving memory'
174
187
  // Explicit suppressions — return null so we don't emit a sidecar line.
175
- case 'mcp__switchroom-telegram__send_typing':
176
188
  case 'mcp__hindsight__sync_retain':
177
189
  return null
178
190
  }
@@ -182,13 +194,17 @@ export function computeLabel(toolName, input) {
182
194
  // entirely by MCP tools read as pure silence (only a typing dot + the
183
195
  // 👀 reaction) — the "I can't see what it's doing" report. Mirror the
184
196
  // gateway's describeToolUse: friendly per-server labels, else a
185
- // model-authored field, else a humanized tool name. NEVER label
186
- // switchroom-telegram surface/control tools (they ARE the conversation).
197
+ // model-authored field, else a humanized tool name. NEVER label any
198
+ // Telegram surface/control tools (they ARE the conversation). Use the
199
+ // same regex predicate as isTelegramSurfaceTool in tool-names.ts so
200
+ // this works regardless of the plugin's registration key (clerk-telegram,
201
+ // switchroom-telegram, custom fork, …).
202
+ const TELEGRAM_SURFACE_RE = /^mcp__[^_].*?telegram__/
203
+ if (TELEGRAM_SURFACE_RE.test(toolName)) return null
187
204
  const m = /^mcp__(.+?)__(.+)$/.exec(toolName)
188
205
  if (!m) return null
189
206
  const server = m[1].toLowerCase()
190
207
  const tool = m[2].toLowerCase()
191
- if (server === 'switchroom-telegram') return null
192
208
  if (server === 'hindsight') return 'Working with memory'
193
209
  if (server === 'google-workspace' || server === 'claude_ai_google_calendar')
194
210
  return 'Checking your calendar'
@@ -213,7 +229,15 @@ export function computeLabel(toolName, input) {
213
229
  return `Using ${tool.replace(/[-_]+/g, ' ')}`
214
230
  }
215
231
 
216
- return null
232
+ // Never-null fallthrough: any unrecognized BUILT-IN tool (no mcp__ prefix,
233
+ // not matched above) gets a generic label rather than dropping its sidecar
234
+ // line. A null here was the dark-turn mechanism — if such a tool was a
235
+ // turn's first/only tool, no tool_label event fired, the activity feed
236
+ // never opened, and a working turn read as pure silence. Surface tools
237
+ // (reply/react/send_typing/sync_retain) return earlier and are also
238
+ // suppressed at the gateway's isTelegramSurfaceTool guard, so this does
239
+ // not resurface them.
240
+ return 'Working…'
217
241
  }
218
242
 
219
243
  function main() {
@@ -328,6 +328,10 @@ export function createIssuesCardHandle(
328
328
  const sendOpts: Record<string, unknown> = {
329
329
  parse_mode: "HTML",
330
330
  disable_web_page_preview: true,
331
+ // Status card, not the user's answer — silence the open ping.
332
+ // (editMessageText ignores disable_notification, so the shared
333
+ // edit path below is unaffected.)
334
+ disable_notification: true,
331
335
  ...(opts.threadId != null ? { message_thread_id: opts.threadId } : {}),
332
336
  };
333
337