switchroom 0.15.45 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3158 -1178
  10. package/dist/host-control/main.js +2833 -355
  11. package/dist/vault/approvals/kernel-server.js +7479 -7439
  12. package/dist/vault/broker/server.js +11312 -11272
  13. package/examples/minimal.yaml +1 -0
  14. package/examples/switchroom.yaml +1 -0
  15. package/package.json +3 -3
  16. package/profiles/_base/start.sh.hbs +88 -1
  17. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  18. package/profiles/default/CLAUDE.md.hbs +0 -19
  19. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  20. package/telegram-plugin/answer-stream-flag.ts +12 -49
  21. package/telegram-plugin/answer-stream.ts +5 -150
  22. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  23. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  24. package/telegram-plugin/context-exhaustion.ts +12 -0
  25. package/telegram-plugin/demo-mask.ts +154 -0
  26. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  27. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  28. package/telegram-plugin/dist/server.js +215 -172
  29. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  30. package/telegram-plugin/draft-stream.ts +47 -410
  31. package/telegram-plugin/final-answer-detect.ts +17 -12
  32. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  33. package/telegram-plugin/format.ts +56 -19
  34. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  35. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  36. package/telegram-plugin/gateway/auth-command.ts +70 -14
  37. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  38. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  39. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  40. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  41. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  42. package/telegram-plugin/gateway/effort-command.ts +8 -3
  43. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  44. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  45. package/telegram-plugin/gateway/gateway.ts +1837 -291
  46. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  47. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  48. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  49. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  50. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  51. package/telegram-plugin/history.ts +33 -11
  52. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  53. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  54. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  55. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  56. package/telegram-plugin/issues-card.ts +4 -0
  57. package/telegram-plugin/model-unavailable.ts +124 -0
  58. package/telegram-plugin/narrative-dedup.ts +69 -0
  59. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  60. package/telegram-plugin/package.json +3 -3
  61. package/telegram-plugin/pending-work-progress.ts +12 -0
  62. package/telegram-plugin/permission-rule.ts +32 -5
  63. package/telegram-plugin/permission-title.ts +152 -9
  64. package/telegram-plugin/quota-check.ts +13 -0
  65. package/telegram-plugin/quota-watch.ts +135 -7
  66. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  67. package/telegram-plugin/registry/turns-schema.ts +9 -0
  68. package/telegram-plugin/runtime-metrics.ts +13 -0
  69. package/telegram-plugin/session-tail.ts +96 -11
  70. package/telegram-plugin/silence-poke.ts +170 -24
  71. package/telegram-plugin/slot-banner-driver.ts +3 -0
  72. package/telegram-plugin/status-no-truncate.ts +44 -0
  73. package/telegram-plugin/status-reactions.ts +20 -3
  74. package/telegram-plugin/stream-controller.ts +4 -23
  75. package/telegram-plugin/stream-reply-handler.ts +6 -24
  76. package/telegram-plugin/streaming-metrics.ts +91 -0
  77. package/telegram-plugin/subagent-watcher.ts +212 -66
  78. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  79. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  80. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  81. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  82. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  83. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  84. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  85. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  86. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  87. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  88. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  89. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  90. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  91. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  92. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  93. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  94. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  95. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  96. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  97. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  98. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  99. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  100. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  101. package/telegram-plugin/tests/history.test.ts +60 -0
  102. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  103. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  104. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  105. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  106. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  107. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  108. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  109. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  110. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  111. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  112. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  113. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  114. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  115. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  116. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  117. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  118. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  119. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  120. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  121. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  122. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  123. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  124. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  125. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  126. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  127. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  128. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  129. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  130. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  131. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  132. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  133. package/telegram-plugin/tool-activity-summary.ts +375 -58
  134. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  135. package/telegram-plugin/uat/assertions.ts +115 -0
  136. package/telegram-plugin/uat/driver.ts +68 -0
  137. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  138. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  139. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  145. package/telegram-plugin/welcome-text.ts +13 -1
  146. package/telegram-plugin/worker-activity-feed.ts +157 -82
  147. package/telegram-plugin/draft-transport.ts +0 -122
  148. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  149. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -152,12 +152,13 @@ describe('handleInjectCommand — outcome=ok_no_output', () => {
152
152
  expect(replies[0].text).toContain('empty capture')
153
153
  })
154
154
 
155
- it('bare ack with accent=done when expectsOutput=false and no silentNote (/clear)', async () => {
155
+ it('uses silentNote for /clear (context cleared fresh slate)', async () => {
156
156
  const inject = vi.fn().mockResolvedValue(noOutputResult('/clear'))
157
157
  const { deps, replies } = makeDeps({ getArgs: () => '/clear', inject })
158
158
  await handleInjectCommand(fakeCtx(), deps)
159
159
  expect(replies[0].opts?.accent).toBe('done')
160
160
  expect(replies[0].text).toContain('<code>/clear</code>')
161
+ expect(replies[0].text).toContain('context cleared')
161
162
  expect(replies[0].text).not.toContain('empty capture')
162
163
  expect(replies[0].text).not.toContain('<pre>')
163
164
  })
@@ -26,7 +26,7 @@ describe("validateMs365Preview", () => {
26
26
  toolName: "mcp__ms-365__upload-file-content",
27
27
  itemId: "01ABCDEFG",
28
28
  itemDisplayName: "Q3-Strategy.docx",
29
- accountEmail: "ken@outlook.com",
29
+ accountEmail: "bob@example.com",
30
30
  };
31
31
 
32
32
  it("accepts a minimal valid preview", () => {
@@ -93,7 +93,7 @@ describe("buildMs365CardText", () => {
93
93
  toolName: "mcp__ms-365__upload-file-content",
94
94
  itemId: "01ABCDEFG",
95
95
  itemDisplayName: "Q3-Strategy.docx",
96
- accountEmail: "ken@outlook.com",
96
+ accountEmail: "bob@example.com",
97
97
  };
98
98
 
99
99
  it("includes agent, tool, item, account", () => {
@@ -102,7 +102,7 @@ describe("buildMs365CardText", () => {
102
102
  expect(text).toContain("ms-365__upload-file-content");
103
103
  expect(text).toContain("Q3-Strategy.docx");
104
104
  expect(text).toContain("01ABCDEFG");
105
- expect(text).toContain("ken@outlook.com");
105
+ expect(text).toContain("bob@example.com");
106
106
  });
107
107
 
108
108
  it("omits ID line for new files", () => {
@@ -183,7 +183,7 @@ function makeMsg(overrides: Partial<RequestMs365ApprovalMessage> = {}): RequestM
183
183
  toolName: "mcp__ms-365__upload-file-content",
184
184
  itemId: "01ABC",
185
185
  itemDisplayName: "Strategy.docx",
186
- accountEmail: "ken@outlook.com",
186
+ accountEmail: "bob@example.com",
187
187
  },
188
188
  ttlMs: 5 * 60 * 1000,
189
189
  ...overrides,
@@ -0,0 +1,72 @@
1
+ /**
2
+ * represent-guard.ts — the duplicate-represent guard for the obligation sweep,
3
+ * extracted from obligationSweep so the "satisfied-but-misdetected obligation
4
+ * must NOT re-fire" decision (#2472) is EXECUTABLE in a pure unit test.
5
+ *
6
+ * The bug (#2472): obligation_represent re-fired for the same origin_turn_id even
7
+ * after the agent had already answered represent_count=1 with a reply tool call,
8
+ * producing a second near-identical message. The reply landed but its routing did
9
+ * not resolve back to the origin, so the ledger's normal close path missed it —
10
+ * and the represent branch (unlike the escalate branch) had no belt-and-braces
11
+ * outbound-history check before re-firing.
12
+ *
13
+ * This helper is the decision the sweep's represent branch now consults. PURE —
14
+ * no Telegram, no SQLite; the gateway injects `hasOutboundDeliveredSince` as a
15
+ * predicate. The single load-bearing subtlety lives here in one testable place:
16
+ *
17
+ * The cutoff is `lastRepresentedAt` (the time of the PREVIOUS represent), NOT
18
+ * `openedAt`. On the FIRST represent (`lastRepresentedAt` undefined) the guard
19
+ * is a no-op, so the genuine "agent wrote a plain-text answer and never called
20
+ * the reply tool" case still re-presents ONCE. Only the SECOND-and-later
21
+ * represent is gated — exactly where a reply that landed BETWEEN fires must
22
+ * suppress the re-ask. A reply that predates the last represent (e.g. the
23
+ * original plain-text answer) does not count, because it is not evidence the
24
+ * most recent represent was answered.
25
+ */
26
+
27
+ /** The obligation fields the represent guard inspects. */
28
+ export interface RepresentGuardObligation {
29
+ readonly originTurnId: string
30
+ readonly chatId: string
31
+ readonly threadId?: number
32
+ /** Wall-clock ms this obligation was most recently re-presented, if ever. */
33
+ readonly lastRepresentedAt?: number
34
+ }
35
+
36
+ export interface RepresentGuardDeps {
37
+ /** True when history is available to query (else the guard never suppresses). */
38
+ historyEnabled: boolean
39
+ /**
40
+ * Has a genuine assistant reply been delivered to this chat (optionally scoped
41
+ * to thread) at or after `sinceMs`? Wraps history.hasOutboundDeliveredSince.
42
+ *
43
+ * For the represent guard the gateway binds this with a LOW minChars (#2474
44
+ * follow-up): ANY real reply to the turn — even a terse "Yes — done." — means
45
+ * the user was answered and the duplicate represent must be suppressed. The
46
+ * 200-char "substantive" proxy is the ESCALATE branch's concern, not this one;
47
+ * applying it here left short-but-real replies failing to suppress the duplicate
48
+ * (the #2472 gap). The underlying query only counts recordOutbound rows, so
49
+ * typing indicators / progress-card edits are never miscounted as a reply.
50
+ */
51
+ hasOutboundDeliveredSince: (chatId: string, sinceMs: number, threadId?: number) => boolean
52
+ }
53
+
54
+ /**
55
+ * Decide whether a represent for `o` should be SUPPRESSED because the agent has
56
+ * already delivered a reply since the obligation was last re-presented.
57
+ *
58
+ * Returns true ⇒ the obligation is satisfied-but-misdetected; the caller closes
59
+ * it silently and does NOT re-fire. Returns false ⇒ proceed with the represent
60
+ * (first represent always proceeds; a represent with no reply since the last one
61
+ * proceeds; an unavailable history proceeds — never suppress on doubt).
62
+ */
63
+ export function shouldSuppressRepresent(
64
+ o: RepresentGuardObligation,
65
+ deps: RepresentGuardDeps,
66
+ ): boolean {
67
+ if (!deps.historyEnabled) return false
68
+ // First represent: nothing to compare against — let the single re-ask fire so
69
+ // the genuine plain-text-no-reply case is preserved.
70
+ if (o.lastRepresentedAt == null) return false
71
+ return deps.hasOutboundDeliveredSince(o.chatId, o.lastRepresentedAt, o.threadId)
72
+ }
@@ -12,6 +12,7 @@ function turn(overrides: Partial<StatusSurfaceTurnView> = {}): StatusSurfaceTurn
12
12
  sessionThreadId: undefined,
13
13
  startedAt: 1_780_000_000_000,
14
14
  toolCallCount: 0,
15
+ labeledToolCount: 0,
15
16
  activityMessageId: null,
16
17
  activityEverOpened: false,
17
18
  activityDrainFailures: 0,
@@ -35,7 +36,7 @@ describe('formatTurnLifecycle', () => {
35
36
  const line = formatTurnLifecycle(
36
37
  'clear',
37
38
  'turn_end',
38
- turn({ sessionThreadId: 3, toolCallCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
39
+ turn({ sessionThreadId: 3, toolCallCount: 5, labeledToolCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
39
40
  1_780_000_300_000, // +300s
40
41
  )
41
42
  expect(line).toContain('turn-lifecycle clear reason=turn_end')
@@ -63,7 +64,7 @@ describe('formatTurnLifecycle', () => {
63
64
  describe('detectStatusSurfaceDegraded', () => {
64
65
  it('flags a turn that did tool work but never opened the feed due to send failures (the resume-400 signature)', () => {
65
66
  const d = detectStatusSurfaceDegraded(
66
- turn({ toolCallCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
67
+ turn({ toolCallCount: 3, labeledToolCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
67
68
  )
68
69
  expect(d).not.toBeNull()
69
70
  expect(d!.reason).toBe('feed-never-opened')
@@ -75,7 +76,7 @@ describe('detectStatusSurfaceDegraded', () => {
75
76
  // the sticky activityEverOpened keeps this from false-positiving.
76
77
  expect(
77
78
  detectStatusSurfaceDegraded(
78
- turn({ toolCallCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
79
+ turn({ toolCallCount: 4, labeledToolCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
79
80
  ),
80
81
  ).toBeNull()
81
82
  })
@@ -83,7 +84,7 @@ describe('detectStatusSurfaceDegraded', () => {
83
84
  it('does NOT flag a turn that never attempted a feed send (e.g. ack-first suppression)', () => {
84
85
  expect(
85
86
  detectStatusSurfaceDegraded(
86
- turn({ toolCallCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
87
+ turn({ toolCallCount: 2, labeledToolCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
87
88
  ),
88
89
  ).toBeNull()
89
90
  })
@@ -30,6 +30,17 @@ export interface StatusSurfaceTurnView {
30
30
  sessionThreadId: number | undefined
31
31
  startedAt: number
32
32
  toolCallCount: number
33
+ /**
34
+ * Count of tool_label events that passed the surface-tool guard this turn —
35
+ * i.e. the number of surfaced (non-surface, non-suppressed) tool steps. This
36
+ * is the deterministic single source of truth for the `tools=` lifecycle
37
+ * field and the `✓ N steps` activity-feed total. Incremented in
38
+ * `case 'tool_label':` AFTER the `isTelegramSurfaceTool` guard so that
39
+ * reply/stream_reply/edit_message/react are never counted. send_typing and
40
+ * sync_retain are suppressed at the hook level (computeLabel returns null)
41
+ * and never arrive as tool_label events, so they are excluded automatically.
42
+ */
43
+ labeledToolCount: number
33
44
  /** Live activity-feed message id; null until the first send captures it. */
34
45
  activityMessageId: number | null
35
46
  /**
@@ -67,7 +78,7 @@ export function formatTurnLifecycle(
67
78
  return (
68
79
  `turn-lifecycle ${action} reason=${reason} turnId=${t.turnId} ` +
69
80
  `chat=${t.sessionChatId} thread=${t.sessionThreadId ?? '-'} ` +
70
- `tools=${t.toolCallCount} activityMsgId=${t.activityMessageId ?? 'none'} ` +
81
+ `tools=${t.labeledToolCount} activityMsgId=${t.activityMessageId ?? 'none'} ` +
71
82
  `feedOpened=${t.activityEverOpened} drainFailures=${t.activityDrainFailures} ` +
72
83
  `replyCalled=${t.replyCalled} finalAnswer=${t.finalAnswerDelivered} age_ms=${ageMs}`
73
84
  )
@@ -89,13 +100,13 @@ export function formatTurnLifecycle(
89
100
  export function detectStatusSurfaceDegraded(
90
101
  t: StatusSurfaceTurnView,
91
102
  ): { reason: string; detail: string } | null {
92
- if (t.toolCallCount === 0) return null
103
+ if (t.labeledToolCount === 0) return null
93
104
  if (t.activityEverOpened) return null
94
105
  if (t.activityDrainFailures === 0) return null
95
106
  return {
96
107
  reason: 'feed-never-opened',
97
108
  detail:
98
- `tools=${t.toolCallCount} drainFailures=${t.activityDrainFailures} ` +
109
+ `tools=${t.labeledToolCount} drainFailures=${t.activityDrainFailures} ` +
99
110
  `activityMsgId=none — the live activity feed failed every send this turn ` +
100
111
  `(card was dark despite tool work)`,
101
112
  }
@@ -557,11 +557,26 @@ export function getRecentOutboundCount(
557
557
  * SUBSTANTIVE: we never suppress escalation on a bare ack ("on it", "give me a
558
558
  * sec") — an agent that acks then ghosts must still escalate. The history schema
559
559
  * does not store a done/substantive flag, so we approximate: a row counts only
560
- * when LENGTH(text) >= 200 (the FINAL_ANSWER_MIN_CHARS constant from
561
- * final-answer-detect.ts). This is false-negative-safe: a genuine substantive
562
- * answer that happens to be < 200 chars will still fire an escalation, which is
563
- * the conservative (safe) outcome. A schema column would be more precise but is
564
- * disproportionate for this predicate; the reviewer accepted this approach.
560
+ * when LENGTH(text) >= `minChars` (default 200, the FINAL_ANSWER_MIN_CHARS
561
+ * constant from final-answer-detect.ts). This is false-negative-safe for the
562
+ * escalate branch: a genuine substantive answer that happens to be < 200 chars
563
+ * will still fire an escalation, which is the conservative (safe) outcome. A
564
+ * schema column would be more precise but is disproportionate for this predicate;
565
+ * the reviewer accepted this approach.
566
+ *
567
+ * `minChars` semantics (decoupled per caller, #2474 follow-up):
568
+ * - The ESCALATE branch (Fix 4) keeps the 200 default: it must not stand down an
569
+ * escalation on a mere ack, so it still demands a substantive-LENGTH outbound.
570
+ * - The duplicate-represent GUARD (#2472) passes a LOW value (1): for that path
571
+ * ANY genuine assistant reply to the turn — even a terse "Yes — done." or
572
+ * "Merged, all three landed." — means the user was answered, so the duplicate
573
+ * represent must be suppressed. The 200-char proxy was borrowed from the
574
+ * escalate branch and is WRONG there: a short-but-real reply left the
575
+ * duplicate-represent bug (#2472) alive. This is safe because the rows this
576
+ * query counts (role='assistant') are ONLY ever written by recordOutbound —
577
+ * i.e. real bot→user messages (reply / stream_reply / silent-anchor content /
578
+ * command acks). Typing indicators and progress-card edits NEVER call
579
+ * recordOutbound, so they cannot be miscounted as "the user was answered".
565
580
  *
566
581
  * `threadId` semantics:
567
582
  * - undefined → any message in the chat regardless of thread (DMs + supergroups)
@@ -575,16 +590,23 @@ export function hasOutboundDeliveredSince(
575
590
  chatId: string,
576
591
  sinceMs: number,
577
592
  threadId?: number | null,
593
+ minChars = 200,
578
594
  ): boolean {
579
595
  try {
580
596
  const cutoffSec = Math.floor(sinceMs / 1000)
581
- const params: unknown[] = [chatId, cutoffSec]
582
- // LENGTH(text) >= 200 scopes to substantive replies only never suppress
583
- // escalation on a mere ack. Mirrors FINAL_ANSWER_MIN_CHARS (200) from
584
- // final-answer-detect.ts; the `done` flag is not stored in the history
585
- // schema, so length is the closest available proxy.
597
+ // Clamp to >= 1 so the predicate never counts an empty/whitespace-only row
598
+ // (a degenerate outbound) as a delivered reply, even if a caller passes 0.
599
+ const minLen = Math.max(1, Math.floor(minChars))
600
+ const params: unknown[] = [chatId, cutoffSec, minLen]
601
+ // LENGTH(text) >= minChars scopes to replies of at least the caller's
602
+ // threshold. ESCALATE passes the default 200 (substantive-only — never stand
603
+ // down on a mere ack). The duplicate-represent GUARD passes a low value so a
604
+ // terse-but-real reply counts (#2472/#2474). The `done` flag is not stored in
605
+ // the history schema, so length is the closest available proxy; rows here are
606
+ // only ever recordOutbound writes (real bot→user sends), so progress-card
607
+ // edits / typing indicators are structurally excluded.
586
608
  let sql =
587
- "SELECT 1 FROM messages WHERE chat_id = ? AND role = 'assistant' AND ts >= ? AND LENGTH(text) >= 200"
609
+ "SELECT 1 FROM messages WHERE chat_id = ? AND role = 'assistant' AND ts >= ? AND LENGTH(text) >= ?"
588
610
  if (threadId !== undefined) {
589
611
  if (threadId === null) {
590
612
  sql += ' AND thread_id IS NULL'
@@ -297,6 +297,32 @@ async function main() {
297
297
  const markerPath = findNearestMarker(targetDir)
298
298
  if (markerPath == null) process.exit(0)
299
299
 
300
+ // Own-agent marker guard: suppress the agent's own CLAUDE.md / AGENTS.md /
301
+ // AGENT.md so it is never injected as additionalContext. The agent's own
302
+ // marker is already in the system prompt (baked by start.sh via
303
+ // --append-system-prompt); re-injecting it wastes ~30KB per session.
304
+ //
305
+ // The existing isUnderAgentWorkspace guard only blocks paths under the
306
+ // agent's workspace/ subdirectory. It misses the agent's start cwd
307
+ // (/home/.../.switchroom/agents/<name>) because that guard computes against
308
+ // workspace/, not agentDir itself. This marker-path check closes that gap.
309
+ //
310
+ // We do NOT add a "targetDir under startCwd" directory guard because that
311
+ // would wrongly suppress a legitimate worktree repo the operator has checked
312
+ // out inside the agent dir (e.g. agentDir/workspace/ repos) — the directory
313
+ // guard would catch those too. The marker-path equality check is surgical:
314
+ // only the exact CLAUDE.md / AGENTS.md / AGENT.md at agentDir root is blocked;
315
+ // any nested repo's marker injects normally.
316
+ if (agentName) {
317
+ const startCwd = normalize(
318
+ process.env.SWITCHROOM_AGENT_START_CWD ??
319
+ join(home, '.switchroom', 'agents', agentName),
320
+ )
321
+ for (const m of MARKER_FILES) {
322
+ if (markerPath === join(startCwd, m)) process.exit(0)
323
+ }
324
+ }
325
+
300
326
  const state = readSessionState(sessionId)
301
327
 
302
328
  // Already-loaded dedup — the load-once-per-repo-per-session invariant.
@@ -313,6 +313,11 @@ function updateRow(dbPath, { id, status, resultSummary, now, asyncLaunch }, done
313
313
  setImmediate(() => {
314
314
  try {
315
315
  const db = new SnapDatabaseSync(snapDbPath)
316
+ // Concurrency: per-connection busy_timeout so this hook's writes
317
+ // wait-and-retry instead of failing with SQLITE_BUSY under concurrent
318
+ // sub-agent dispatch. Set on the real open so BOTH the node:sqlite
319
+ // (production) and bun:sqlite branches are armed (#2535 review).
320
+ try { db.exec('PRAGMA busy_timeout = 5000') } catch { /* best-effort */ }
316
321
  const row = db.prepare(SELECT_SQL).get(snapId)
317
322
  const isBackground = row != null && row.background === 1
318
323
  if (isBackground) {
@@ -184,6 +184,14 @@ function writeRow(dbPath, { id, parentSessionId, parentTurnKey, agentType, descr
184
184
  setImmediate(() => {
185
185
  try {
186
186
  const db = new SnapDatabaseSync(snapDbPath)
187
+ // Concurrency: this hook writes registry.db from a separate process
188
+ // that contends with the gateway's subagent-watcher + the PostToolUse
189
+ // hook. Without a busy_timeout, the contending write fails IMMEDIATELY
190
+ // with SQLITE_BUSY ("database is locked") when several sub-agents
191
+ // dispatch at once, dropping the row → NULL jsonl_agent_id/parent_turn_key.
192
+ // Per-connection PRAGMA, set on the real open so BOTH the node:sqlite
193
+ // (production) and bun:sqlite branches are armed.
194
+ try { db.exec('PRAGMA busy_timeout = 5000') } catch { /* best-effort */ }
187
195
  db.exec(snapSchemaSql)
188
196
  // Migrate older DBs that pre-date jsonl_agent_id.
189
197
  const hasJsonlCol = db.prepare(snapMigrateSql).get()
@@ -149,30 +149,42 @@ export function computeLabel(toolName, input) {
149
149
  // the progress card path that used to surface this was retired
150
150
  // when `progressDriver` was nulled out in #1122 PR3.
151
151
  const slug = clip(asText(i.skill), 64)
152
+ // Empty-slug Skill stays suppressed (degenerate/malformed call): the
153
+ // liveness feed-open backstops visibility for a tool-less turn, so this
154
+ // does not need a label. Keeps the #2111 sidecar contract.
152
155
  return slug ? `Running skill ${slug}` : null
153
156
  }
154
157
  }
155
158
 
156
159
  // MCP tools.
157
160
  if (typeof toolName === 'string' && toolName.startsWith('mcp__')) {
158
- // Explicit labels / suppressions for the built-in servers.
161
+ // Telegram-plugin tools: matched by the key-agnostic regex so renames/forks work.
162
+ // Strip the `mcp__<server>__` prefix to get just the tool suffix.
163
+ const TELEGRAM_PREFIX_RE = /^mcp__[^_].*?telegram__/
164
+ const telegramMatch = TELEGRAM_PREFIX_RE.exec(toolName)
165
+ if (telegramMatch) {
166
+ const suffix = toolName.slice(telegramMatch[0].length)
167
+ // Surface tools (reply, stream_reply, edit_message, react) are the
168
+ // conversation itself — suppress them from the activity feed entirely.
169
+ // Mirrors isTelegramSurfaceTool in tool-names.ts.
170
+ if (
171
+ suffix === 'reply' ||
172
+ suffix === 'stream_reply' ||
173
+ suffix === 'edit_message' ||
174
+ suffix === 'react'
175
+ ) return null
176
+ if (suffix === 'get_recent_messages') return 'Reading chat history'
177
+ // send_typing and all other surface/control tools: suppress.
178
+ return null
179
+ }
180
+ // Explicit labels / suppressions for the hindsight server.
159
181
  switch (toolName) {
160
- case 'mcp__switchroom-telegram__reply':
161
- case 'mcp__switchroom-telegram__stream_reply':
162
- return 'Replying'
163
- case 'mcp__switchroom-telegram__react': {
164
- const emoji = clip(asText(i.emoji), 8)
165
- return emoji ? `Reacting ${emoji}` : 'Reacting'
166
- }
167
- case 'mcp__switchroom-telegram__get_recent_messages':
168
- return 'Reading chat history'
169
182
  case 'mcp__hindsight__recall':
170
183
  case 'mcp__hindsight__reflect':
171
184
  return 'Searching memory'
172
185
  case 'mcp__hindsight__retain':
173
186
  return 'Saving memory'
174
187
  // Explicit suppressions — return null so we don't emit a sidecar line.
175
- case 'mcp__switchroom-telegram__send_typing':
176
188
  case 'mcp__hindsight__sync_retain':
177
189
  return null
178
190
  }
@@ -182,13 +194,17 @@ export function computeLabel(toolName, input) {
182
194
  // entirely by MCP tools read as pure silence (only a typing dot + the
183
195
  // 👀 reaction) — the "I can't see what it's doing" report. Mirror the
184
196
  // gateway's describeToolUse: friendly per-server labels, else a
185
- // model-authored field, else a humanized tool name. NEVER label
186
- // switchroom-telegram surface/control tools (they ARE the conversation).
197
+ // model-authored field, else a humanized tool name. NEVER label any
198
+ // Telegram surface/control tools (they ARE the conversation). Use the
199
+ // same regex predicate as isTelegramSurfaceTool in tool-names.ts so
200
+ // this works regardless of the plugin's registration key (clerk-telegram,
201
+ // switchroom-telegram, custom fork, …).
202
+ const TELEGRAM_SURFACE_RE = /^mcp__[^_].*?telegram__/
203
+ if (TELEGRAM_SURFACE_RE.test(toolName)) return null
187
204
  const m = /^mcp__(.+?)__(.+)$/.exec(toolName)
188
205
  if (!m) return null
189
206
  const server = m[1].toLowerCase()
190
207
  const tool = m[2].toLowerCase()
191
- if (server === 'switchroom-telegram') return null
192
208
  if (server === 'hindsight') return 'Working with memory'
193
209
  if (server === 'google-workspace' || server === 'claude_ai_google_calendar')
194
210
  return 'Checking your calendar'
@@ -213,7 +229,15 @@ export function computeLabel(toolName, input) {
213
229
  return `Using ${tool.replace(/[-_]+/g, ' ')}`
214
230
  }
215
231
 
216
- return null
232
+ // Never-null fallthrough: any unrecognized BUILT-IN tool (no mcp__ prefix,
233
+ // not matched above) gets a generic label rather than dropping its sidecar
234
+ // line. A null here was the dark-turn mechanism — if such a tool was a
235
+ // turn's first/only tool, no tool_label event fired, the activity feed
236
+ // never opened, and a working turn read as pure silence. Surface tools
237
+ // (reply/react/send_typing/sync_retain) return earlier and are also
238
+ // suppressed at the gateway's isTelegramSurfaceTool guard, so this does
239
+ // not resurface them.
240
+ return 'Working…'
217
241
  }
218
242
 
219
243
  function main() {
@@ -328,6 +328,10 @@ export function createIssuesCardHandle(
328
328
  const sendOpts: Record<string, unknown> = {
329
329
  parse_mode: "HTML",
330
330
  disable_web_page_preview: true,
331
+ // Status card, not the user's answer — silence the open ping.
332
+ // (editMessageText ignores disable_notification, so the shared
333
+ // edit path below is unaffected.)
334
+ disable_notification: true,
331
335
  ...(opts.threadId != null ? { message_thread_id: opts.threadId } : {}),
332
336
  };
333
337
 
@@ -84,6 +84,13 @@ export function detectModelUnavailable(
84
84
  // resets 8:50am (Australia/Melbourne)".
85
85
  'hit your limit',
86
86
  'hit the limit',
87
+ // SESSION-cap wording: "You've hit your session limit · resets 5pm".
88
+ // A session cap is a quota exhaustion that frees in HOURS (its reset is a
89
+ // bare time-of-day, see parseResetTime's time-only branch) — recognising
90
+ // it here is what lets the time-only reset parse fire and keeps a
91
+ // session-capped account from the +7d weekly bench.
92
+ 'session limit',
93
+ 'session cap',
87
94
  ]
88
95
  if (quotaSignals.some(s => lower.includes(s))) {
89
96
  const resetAt = parseResetTime(sample)
@@ -192,9 +199,126 @@ function parseResetTime(text: string, parseTimeNow: Date = new Date()): Date | u
192
199
  if (!Number.isNaN(d.getTime())) return d
193
200
  }
194
201
 
202
+ // "resets 5pm (Australia/Melbourne)" / "resets 8:50am" / "resets 17:00 (UTC)"
203
+ // SESSION-cap wording: a time of day with NO month/day. This frees in
204
+ // HOURS, not a week — without this branch it falls through to undefined,
205
+ // and the 429 inference path then applies resolveExhaustUntil's +7d weekly
206
+ // floor, benching a session-capped account for a week. Must sit AFTER the
207
+ // calendar branch so "resets May 3, 11am" never matches here. The leading
208
+ // negative lookahead `(?!...)` rejects a month name so a date-bearing
209
+ // string can't fall into this time-only branch.
210
+ const timeOnly = text.match(
211
+ /resets?\s+(?:at\s+)?(?!(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\b)(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*(?:\(([^)]+)\))?/i,
212
+ )
213
+ if (timeOnly) {
214
+ const d = resolveNextWallClock(
215
+ Number(timeOnly[1]),
216
+ timeOnly[2] ? Number(timeOnly[2]) : 0,
217
+ timeOnly[3]?.toLowerCase(),
218
+ timeOnly[4]?.trim(),
219
+ parseTimeNow,
220
+ )
221
+ if (d != null) return d
222
+ }
223
+
224
+ return undefined
225
+ }
226
+
227
+ /**
228
+ * Resolve a bare wall-clock time ("5pm", "8:50am", "17:00") to the NEXT
229
+ * occurrence of that time, tz-aware. Returns the soonest future Date (rolls
230
+ * to tomorrow when the time has already passed today). Null on bad input
231
+ * (out-of-range hour/minute or an unknown tz). When `tz` is omitted the
232
+ * time is interpreted in UTC (best-effort) — Anthropic's strings normally
233
+ * carry the IANA tz in parens, e.g. "(Australia/Melbourne)".
234
+ */
235
+ function resolveNextWallClock(
236
+ hour12or24: number,
237
+ minute: number,
238
+ ampm: string | undefined,
239
+ tz: string | undefined,
240
+ nowDate: Date,
241
+ ): Date | undefined {
242
+ let hour = hour12or24
243
+ if (ampm === 'pm' && hour < 12) hour += 12
244
+ if (ampm === 'am' && hour === 12) hour = 0
245
+ if (!Number.isFinite(hour) || hour > 23 || hour < 0) return undefined
246
+ if (!Number.isFinite(minute) || minute > 59 || minute < 0) return undefined
247
+ const nowMs = nowDate.getTime()
248
+ // Walk today and the next two days (DST-safe span) and pick the first
249
+ // occurrence strictly in the future relative to now.
250
+ const base = new Date(nowMs)
251
+ for (let dayOffset = 0; dayOffset <= 2; dayOffset++) {
252
+ // Derive the y/m/d for `dayOffset` days from now IN THE TARGET TZ, so the
253
+ // wall-clock date we resolve is the tz's calendar date, not the container's.
254
+ const dateParts = tzDateParts(new Date(nowMs + dayOffset * 86_400_000), tz)
255
+ if (dateParts == null) return undefined
256
+ const epoch = wallClockToEpoch(
257
+ dateParts.year, dateParts.month, dateParts.day, hour, minute, tz,
258
+ )
259
+ if (epoch != null && epoch > nowMs) return new Date(epoch)
260
+ }
261
+ // Fallback: shouldn't happen, but keep the function total.
262
+ void base
195
263
  return undefined
196
264
  }
197
265
 
266
+ /** The y/m/d of `d` as seen in `tz` (UTC when tz omitted). Null on bad tz. */
267
+ function tzDateParts(
268
+ d: Date,
269
+ tz: string | undefined,
270
+ ): { year: number; month: number; day: number } | null {
271
+ if (!tz) {
272
+ return { year: d.getUTCFullYear(), month: d.getUTCMonth(), day: d.getUTCDate() }
273
+ }
274
+ try {
275
+ const fmt = new Intl.DateTimeFormat('en-US', {
276
+ timeZone: tz, year: 'numeric', month: '2-digit', day: '2-digit',
277
+ })
278
+ const parts = Object.fromEntries(
279
+ fmt.formatToParts(d).filter((p) => p.type !== 'literal').map((p) => [p.type, p.value]),
280
+ )
281
+ return {
282
+ year: Number(parts.year),
283
+ month: Number(parts.month) - 1,
284
+ day: Number(parts.day),
285
+ }
286
+ } catch {
287
+ return null
288
+ }
289
+ }
290
+
291
+ /**
292
+ * Convert a wall-clock time in an IANA tz to epoch-ms (null if the tz is
293
+ * unknown). Resolves the tz's offset AT that date via Intl, so it is correct
294
+ * across DST — NOT `new Date(localString)`, which assumes the container TZ.
295
+ * Mirrors wedge-watchdog.ts's helper of the same name (kept local to keep
296
+ * this module dependency-free / pure-testable).
297
+ */
298
+ function wallClockToEpoch(
299
+ year: number, month: number, day: number, hour: number, minute: number, tz: string | undefined,
300
+ ): number | null {
301
+ const asUtc = Date.UTC(year, month, day, hour, minute, 0)
302
+ if (!tz) return asUtc // no tz given → best-effort UTC
303
+ try {
304
+ const fmt = new Intl.DateTimeFormat('en-US', {
305
+ timeZone: tz, year: 'numeric', month: '2-digit', day: '2-digit',
306
+ hour: '2-digit', minute: '2-digit', second: '2-digit', hour12: false,
307
+ })
308
+ const parts = Object.fromEntries(
309
+ fmt.formatToParts(new Date(asUtc)).filter((p) => p.type !== 'literal').map((p) => [p.type, p.value]),
310
+ )
311
+ const shown = Date.UTC(
312
+ Number(parts.year), Number(parts.month) - 1, Number(parts.day),
313
+ Number(parts.hour) % 24, Number(parts.minute), Number(parts.second),
314
+ )
315
+ const offset = shown - asUtc // how far ahead the tz wall clock is of UTC
316
+ return asUtc - offset
317
+ } catch {
318
+ return null // unknown tz
319
+ }
320
+ }
321
+
198
322
  function parseRelativeDuration(s: string): number | null {
199
323
  // "2h 15m" / "30m" / "45 seconds"
200
324
  let total = 0
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Reducer-side narrative dedup gate (the correctness core of the
3
+ * JSONL-text-narrative primitive).
4
+ *
5
+ * A `text` / `sub_agent_text` JSONL block is one of two things:
6
+ *
7
+ * 1. DRAFT-THEN-SEND — the model composing its answer just before it
8
+ * calls `reply` / `stream_reply` with near-identical text. Surfacing
9
+ * it would double-print the answer (once as a transient narrative
10
+ * step, once as the canonical reply). It MUST be suppressed.
11
+ * 2. WORKING NARRATION — the agent's own commentary that is never sent
12
+ * to the user ("On it. Let me find the repo…", "Sent. Waiting on the
13
+ * build…"). It SHOULD be surfaced as a transient liveness step.
14
+ *
15
+ * A projector sees one JSONL line at a time and cannot know whether a
16
+ * later line is a reply tool_use, so the SHOW/SUPPRESS decision is a
17
+ * stateful, one-step-deferred decision made reducer-side (the gateway for
18
+ * the main agent, the subagent-watcher for sub/worker). This module is the
19
+ * pure, fully-unit-testable kernel of that decision — no I/O, no state of
20
+ * its own; the caller owns the `pendingNarrative` slot.
21
+ *
22
+ * The threshold heuristic deliberately matches the spirit of the #546
23
+ * outbound dedup (trim + lowercase + whitespace-collapse) so a draft and
24
+ * its reply compare equal the same way #546 considers them the same
25
+ * message.
26
+ */
27
+
28
+ /** Tools whose `input.text` IS the canonical answer surface. */
29
+ export const REPLY_TOOLS = new Set(['reply', 'stream_reply'])
30
+
31
+ /**
32
+ * Normalize for prefix comparison: strip markdown/HTML-ish emphasis,
33
+ * heading and quote marks, collapse whitespace, lowercase. Mirrors the
34
+ * #546 outbound-dedup normalization so a markdown-decorated draft and its
35
+ * plain reply compare equal.
36
+ */
37
+ export function normalizeNarrative(s: string): string {
38
+ return s
39
+ .replace(/[*_`>#~]/g, '') // markdown emphasis / heading / quote marks
40
+ .replace(/\s+/g, ' ')
41
+ .trim()
42
+ .toLowerCase()
43
+ }
44
+
45
+ /** Longest-common-prefix ratio over the SHORTER of the two normalized strings. */
46
+ export function prefixSimilarity(a: string, b: string): number {
47
+ const x = normalizeNarrative(a)
48
+ const y = normalizeNarrative(b)
49
+ if (x.length === 0 || y.length === 0) return 0
50
+ const n = Math.min(x.length, y.length)
51
+ let i = 0
52
+ while (i < n && x[i] === y[i]) i++
53
+ return i / n
54
+ }
55
+
56
+ /**
57
+ * The single tunable. Longest-common-PREFIX ratio (not Levenshtein) is
58
+ * deliberate: a draft shares a long head with the sent answer even when the
59
+ * model trims a trailing sentence before sending. 0.8 over the shorter
60
+ * string tolerates that trim while rejecting the "Sent. Waiting…" +
61
+ * different-reply case (short string, near-zero shared prefix). Exported so
62
+ * the test pins it — a silent retune breaks a test.
63
+ */
64
+ export const DRAFT_SUPPRESS_THRESHOLD = 0.8
65
+
66
+ /** TRUE ⇒ this text block is a draft-then-send of `replyText`; SUPPRESS it. */
67
+ export function isDraftOfReply(textBlock: string, replyText: string): boolean {
68
+ return prefixSimilarity(textBlock, replyText) >= DRAFT_SUPPRESS_THRESHOLD
69
+ }