switchroom 0.15.44 → 0.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/agent-scheduler/index.js +122 -88
  2. package/dist/auth-broker/index.js +463 -177
  3. package/dist/cli/autoaccept-poll.js +4842 -35
  4. package/dist/cli/drive-write-pretool.mjs +17 -14
  5. package/dist/cli/notion-write-pretool.mjs +117 -86
  6. package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
  7. package/dist/cli/self-improve-stop.mjs +428 -0
  8. package/dist/cli/skill-validate-pretool.mjs +72 -72
  9. package/dist/cli/switchroom.js +3249 -1241
  10. package/dist/cli/ui/index.html +1 -1
  11. package/dist/host-control/main.js +2833 -355
  12. package/dist/vault/approvals/kernel-server.js +7482 -7439
  13. package/dist/vault/broker/server.js +11315 -11272
  14. package/examples/minimal.yaml +1 -0
  15. package/examples/switchroom.yaml +1 -0
  16. package/package.json +3 -3
  17. package/profiles/_base/start.sh.hbs +88 -1
  18. package/profiles/_shared/execution-discipline.md.hbs +18 -0
  19. package/profiles/default/CLAUDE.md.hbs +3 -22
  20. package/telegram-plugin/.claude-plugin/plugin.json +2 -2
  21. package/telegram-plugin/answer-stream-flag.ts +12 -49
  22. package/telegram-plugin/answer-stream.ts +5 -150
  23. package/telegram-plugin/auth-snapshot-format.ts +280 -48
  24. package/telegram-plugin/auto-fallback-fleet.ts +44 -1
  25. package/telegram-plugin/context-exhaustion.ts +12 -0
  26. package/telegram-plugin/demo-mask.ts +154 -0
  27. package/telegram-plugin/dist/bridge/bridge.js +167 -124
  28. package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
  29. package/telegram-plugin/dist/server.js +215 -172
  30. package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
  31. package/telegram-plugin/draft-stream.ts +47 -410
  32. package/telegram-plugin/final-answer-detect.ts +17 -12
  33. package/telegram-plugin/fleet-fallback-resume.ts +131 -0
  34. package/telegram-plugin/format.ts +56 -19
  35. package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
  36. package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
  37. package/telegram-plugin/gateway/auth-command.ts +70 -14
  38. package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
  39. package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
  40. package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
  41. package/telegram-plugin/gateway/current-turn-map.ts +188 -0
  42. package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
  43. package/telegram-plugin/gateway/effort-command.ts +8 -3
  44. package/telegram-plugin/gateway/emission-authority.ts +369 -0
  45. package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
  46. package/telegram-plugin/gateway/gateway.ts +1837 -291
  47. package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
  48. package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
  49. package/telegram-plugin/gateway/represent-guard.ts +72 -0
  50. package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
  51. package/telegram-plugin/gateway/status-surface-log.ts +14 -3
  52. package/telegram-plugin/history.ts +33 -11
  53. package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
  54. package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
  55. package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
  56. package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
  57. package/telegram-plugin/issues-card.ts +4 -0
  58. package/telegram-plugin/model-unavailable.ts +124 -0
  59. package/telegram-plugin/narrative-dedup.ts +69 -0
  60. package/telegram-plugin/over-ping-safety-net.ts +70 -4
  61. package/telegram-plugin/package.json +3 -3
  62. package/telegram-plugin/pending-work-progress.ts +12 -0
  63. package/telegram-plugin/permission-rule.ts +32 -5
  64. package/telegram-plugin/permission-title.ts +152 -9
  65. package/telegram-plugin/quota-check.ts +13 -0
  66. package/telegram-plugin/quota-watch.ts +135 -7
  67. package/telegram-plugin/registry/turns-schema.test.ts +24 -0
  68. package/telegram-plugin/registry/turns-schema.ts +9 -0
  69. package/telegram-plugin/runtime-metrics.ts +13 -0
  70. package/telegram-plugin/session-tail.ts +96 -11
  71. package/telegram-plugin/silence-poke.ts +170 -24
  72. package/telegram-plugin/slot-banner-driver.ts +3 -0
  73. package/telegram-plugin/status-no-truncate.ts +44 -0
  74. package/telegram-plugin/status-reactions.ts +20 -3
  75. package/telegram-plugin/stream-controller.ts +4 -23
  76. package/telegram-plugin/stream-reply-handler.ts +6 -24
  77. package/telegram-plugin/streaming-metrics.ts +91 -0
  78. package/telegram-plugin/subagent-watcher.ts +212 -66
  79. package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
  80. package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
  81. package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
  82. package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
  83. package/telegram-plugin/tests/answer-stream.test.ts +2 -411
  84. package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
  85. package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
  86. package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
  87. package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
  88. package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
  89. package/telegram-plugin/tests/demo-mask.test.ts +127 -0
  90. package/telegram-plugin/tests/draft-stream.test.ts +0 -827
  91. package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
  92. package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
  93. package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
  94. package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
  95. package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
  96. package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
  97. package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
  98. package/telegram-plugin/tests/feed-survival.test.ts +526 -0
  99. package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
  100. package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
  101. package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
  102. package/telegram-plugin/tests/history.test.ts +60 -0
  103. package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
  104. package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
  105. package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
  106. package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
  107. package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
  108. package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
  109. package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
  110. package/telegram-plugin/tests/permission-rule.test.ts +17 -0
  111. package/telegram-plugin/tests/permission-title.test.ts +206 -17
  112. package/telegram-plugin/tests/quota-watch.test.ts +252 -9
  113. package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
  114. package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
  115. package/telegram-plugin/tests/represent-guard.test.ts +162 -0
  116. package/telegram-plugin/tests/session-tail.test.ts +147 -3
  117. package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
  118. package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
  119. package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
  120. package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
  121. package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
  122. package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
  123. package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
  124. package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
  125. package/telegram-plugin/tests/telegram-format.test.ts +101 -6
  126. package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
  127. package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
  128. package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
  129. package/telegram-plugin/tests/tool-labels.test.ts +67 -0
  130. package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
  131. package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
  132. package/telegram-plugin/tests/welcome-text.test.ts +32 -3
  133. package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
  134. package/telegram-plugin/tool-activity-summary.ts +375 -58
  135. package/telegram-plugin/turn-liveness-floor.ts +240 -0
  136. package/telegram-plugin/uat/assertions.ts +115 -0
  137. package/telegram-plugin/uat/driver.ts +68 -0
  138. package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
  139. package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
  140. package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
  141. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
  142. package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
  143. package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
  144. package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
  145. package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
  146. package/telegram-plugin/welcome-text.ts +13 -1
  147. package/telegram-plugin/worker-activity-feed.ts +157 -82
  148. package/telegram-plugin/draft-transport.ts +0 -122
  149. package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
  150. package/telegram-plugin/tests/draft-transport.test.ts +0 -211
@@ -15,6 +15,7 @@ import {
15
15
  formatPermissionResumeMessage,
16
16
  } from '../permission-title.js'
17
17
  import type { ScopeOption } from '../permission-rule.js'
18
+ import { resolveScopedAllowChoices } from '../permission-rule.js'
18
19
 
19
20
  const opt = (rule: string): ScopeOption => ({ rule, buttonLabel: 'x', broad: false })
20
21
 
@@ -119,11 +120,11 @@ describe('naturalAction — MCP tools', () => {
119
120
  })
120
121
 
121
122
  describe('formatPermissionCardBody', () => {
122
- test('renders "<Agent> wants to <action>" + why line', () => {
123
+ test('renders "<Agent> wants to <action>" + why line (why = caller reason)', () => {
123
124
  const body = formatPermissionCardBody({
124
125
  toolName: 'Edit',
125
- inputPreview: JSON.stringify({ file_path: '/work/supplement-log.md' }),
126
- description: 'logging today\'s lifts',
126
+ inputPreview: JSON.stringify({ file_path: '/work/supplement-log.md', reason: 'logging today\'s lifts' }),
127
+ description: 'Edit a file on disk.',
127
128
  agentName: 'gymbro',
128
129
  })
129
130
  expect(body).toBe(
@@ -131,11 +132,46 @@ describe('formatPermissionCardBody', () => {
131
132
  )
132
133
  })
133
134
 
134
- test('shows "not provided" when description is missing or whitespace', () => {
135
+ // #2469: the `why:` line is the CALLER's reason, never the tool's static
136
+ // schema description (which can contain literal $SWITCHROOM_* tokens).
137
+ test('why is the caller-supplied reason, NOT the schema description (#2469)', () => {
138
+ const body = formatPermissionCardBody({
139
+ toolName: 'mcp__hostd__agent_restart',
140
+ inputPreview: JSON.stringify({ name: 'carrie', reason: 'gateway is wedged, bouncing it' }),
141
+ description: 'Restart an agent via the host-control daemon. cross-agent (`name` ≠ $SWITCHROOM_AGENT_NAME) …',
142
+ agentName: 'carrie',
143
+ })
144
+ expect(body).toContain('why: <i>gateway is wedged, bouncing it</i>')
145
+ expect(body).not.toContain('$SWITCHROOM_AGENT_NAME')
146
+ expect(body).not.toContain('host-control daemon')
147
+ })
148
+
149
+ test('why accepts a `why` arg as well as `reason`', () => {
150
+ const body = formatPermissionCardBody({
151
+ toolName: 'Bash',
152
+ inputPreview: JSON.stringify({ command: 'ls /tmp', why: 'listing temp files' }),
153
+ description: 'Run a shell command.',
154
+ agentName: 'gymbro',
155
+ })
156
+ expect(body).toContain('why: <i>listing temp files</i>')
157
+ })
158
+
159
+ test('shows "not provided" when no caller reason is present (never the description)', () => {
135
160
  const body = formatPermissionCardBody({
136
161
  toolName: 'Bash',
137
162
  inputPreview: JSON.stringify({ command: 'ls /tmp' }),
138
- description: ' \n ',
163
+ description: 'Run a shell command on the host.',
164
+ agentName: 'gymbro',
165
+ })
166
+ expect(body).toContain('why: <i>not provided</i>')
167
+ expect(body).not.toContain('Run a shell command')
168
+ })
169
+
170
+ test('shows "not provided" when caller reason is whitespace only', () => {
171
+ const body = formatPermissionCardBody({
172
+ toolName: 'Bash',
173
+ inputPreview: JSON.stringify({ command: 'ls /tmp', reason: ' \n ' }),
174
+ description: 'Run a shell command.',
139
175
  agentName: 'gymbro',
140
176
  })
141
177
  expect(body).toContain('why: <i>not provided</i>')
@@ -144,18 +180,18 @@ describe('formatPermissionCardBody', () => {
144
180
  test('drops the agent prefix when agentName is null (early-boot edge)', () => {
145
181
  const body = formatPermissionCardBody({
146
182
  toolName: 'Skill',
147
- inputPreview: JSON.stringify({ skill: 'mail' }),
148
- description: 'do the thing',
183
+ inputPreview: JSON.stringify({ skill: 'mail', reason: 'do the thing' }),
184
+ description: 'Use a skill.',
149
185
  agentName: null,
150
186
  })
151
187
  expect(body).toBe(['🔐 Use the mail skill', 'why: <i>do the thing</i>'].join('\n'))
152
188
  })
153
189
 
154
- test('HTML-escapes <, >, & in agentName / action / description', () => {
190
+ test('HTML-escapes <, >, & in agentName / action / reason', () => {
155
191
  const body = formatPermissionCardBody({
156
192
  toolName: 'Bash',
157
- inputPreview: JSON.stringify({ command: 'echo "a < b && c > d"' }),
158
- description: 'compare a < b & c > d',
193
+ inputPreview: JSON.stringify({ command: 'echo "a < b && c > d"', reason: 'compare a < b & c > d' }),
194
+ description: 'Run a shell command.',
159
195
  agentName: 'agent<test>',
160
196
  })
161
197
  expect(body).toContain('&lt;test&gt;')
@@ -165,27 +201,119 @@ describe('formatPermissionCardBody', () => {
165
201
  expect(body).toContain('<i>')
166
202
  })
167
203
 
168
- test('truncates a very long description with an ellipsis', () => {
204
+ test('truncates a very long caller reason with an ellipsis', () => {
169
205
  const body = formatPermissionCardBody({
170
206
  toolName: 'Skill',
171
- inputPreview: JSON.stringify({ skill: 'mail' }),
172
- description: 'x'.repeat(500),
207
+ inputPreview: JSON.stringify({ skill: 'mail', reason: 'x'.repeat(500) }),
208
+ description: 'Use a skill.',
173
209
  agentName: 'clerk',
174
210
  })
175
211
  expect(body).toContain('xxxx…</i>')
176
212
  expect(body.split('\n')[0]).toBe('🔐 <b>Clerk</b> wants to use the mail skill')
177
213
  })
178
214
 
179
- test('collapses internal whitespace in the description', () => {
215
+ test('collapses internal whitespace in the caller reason', () => {
180
216
  const body = formatPermissionCardBody({
181
217
  toolName: 'Skill',
182
- inputPreview: JSON.stringify({ skill: 'mail' }),
183
- description: 'first\n\nsecond\t\t paragraph',
218
+ inputPreview: JSON.stringify({ skill: 'mail', reason: 'first\n\nsecond\t\t paragraph' }),
219
+ description: 'Use a skill.',
184
220
  agentName: 'clerk',
185
221
  })
186
222
  expect(body).toContain('why: <i>first second paragraph</i>')
187
223
  })
188
224
 
225
+ // config-edit-hardening: upstream Claude Code truncates `inputPreview`
226
+ // to ~200 chars. For config_propose_edit the (NEW-ordered) reason lands
227
+ // inside the surviving prefix, but the truncated JSON is unparseable —
228
+ // the lenient `extractReasonFromRaw` regex fallback must still recover it
229
+ // so the card no longer renders "why: not provided".
230
+ test('recovers reason from a >200-char truncated config_propose_edit input', () => {
231
+ // reason FIRST (the reordered schema), then a huge unified_diff that
232
+ // gets cut by the 200-char truncation → invalid JSON, no closing brace.
233
+ const reason = 'widen klanker tools.allow for the new skill'
234
+ const fullDiff =
235
+ '--- a/switchroom.yaml\n+++ b/switchroom.yaml\n' +
236
+ Array.from({ length: 40 }, (_, i) => `+ - "Bash(tool-${i}:*)"`).join('\n')
237
+ const full = JSON.stringify({
238
+ reason,
239
+ target_path: '/state/config/switchroom.yaml',
240
+ unified_diff: fullDiff,
241
+ })
242
+ const truncated = full.slice(0, 200) // mirror the upstream cut
243
+ expect(() => JSON.parse(truncated)).toThrow() // precondition: unparseable
244
+ const body = formatPermissionCardBody({
245
+ toolName: 'config_propose_edit',
246
+ inputPreview: truncated,
247
+ description: 'Propose a unified-diff patch against switchroom.yaml.',
248
+ agentName: 'klanker',
249
+ })
250
+ expect(body).toContain(`why: <i>${reason}</i>`)
251
+ expect(body).not.toContain('not provided')
252
+ })
253
+
254
+ test('recovers reason even when unified_diff precedes it (legacy order)', () => {
255
+ // Even with the OLD key order (diff first), the regex finds reason if it
256
+ // survives the cut — proving the fallback is order-independent.
257
+ const reason = 'self-scope allow rule add'
258
+ const raw =
259
+ '{"unified_diff":"--- a/x\\n+++ b/x\\n+ small","reason":"' + reason + '"}'
260
+ const body = formatPermissionCardBody({
261
+ toolName: 'config_propose_edit',
262
+ inputPreview: raw,
263
+ description: 'desc',
264
+ agentName: 'klanker',
265
+ })
266
+ expect(body).toContain(`why: <i>${reason}</i>`)
267
+ })
268
+
269
+ // #2469: hostd agent_* cards must name WHICH agent is targeted, pulled
270
+ // from the `name` input arg — not the static curated phrase.
271
+ test('hostd agent_restart names the target agent in the title (#2469)', () => {
272
+ const body = formatPermissionCardBody({
273
+ toolName: 'mcp__hostd__agent_restart',
274
+ inputPreview: JSON.stringify({ name: 'carrie', reason: 'wedged' }),
275
+ description: 'Restart an agent via the host-control daemon. $SWITCHROOM_AGENT_NAME …',
276
+ agentName: 'klanker',
277
+ })
278
+ expect(body.split('\n')[0]).toBe('🔐 <b>Klanker</b> wants to restart agent `carrie` in the fleet')
279
+ })
280
+
281
+ test('hostd start/stop/logs/exec each name the target agent (#2469)', () => {
282
+ const mk = (tool: string) =>
283
+ formatPermissionCardBody({
284
+ toolName: tool,
285
+ inputPreview: JSON.stringify({ name: 'pixel' }),
286
+ description: 'static schema doc',
287
+ agentName: 'klanker',
288
+ }).split('\n')[0]
289
+ expect(mk('mcp__hostd__agent_start')).toBe('🔐 <b>Klanker</b> wants to start agent `pixel` in the fleet')
290
+ expect(mk('mcp__hostd__agent_stop')).toBe('🔐 <b>Klanker</b> wants to stop agent `pixel` in the fleet')
291
+ expect(mk('mcp__hostd__agent_logs')).toBe("🔐 <b>Klanker</b> wants to read agent `pixel`'s container logs")
292
+ expect(mk('mcp__hostd__agent_exec')).toBe('🔐 <b>Klanker</b> wants to run a read-only inspection inside agent `pixel`')
293
+ })
294
+
295
+ test('hostd agent verb without a name arg falls back to the generic phrase (no crash) (#2469)', () => {
296
+ const body = formatPermissionCardBody({
297
+ toolName: 'mcp__hostd__agent_restart',
298
+ inputPreview: JSON.stringify({ reason: 'bouncing the fleet' }),
299
+ description: 'static schema doc',
300
+ agentName: 'klanker',
301
+ })
302
+ expect(body.split('\n')[0]).toBe('🔐 <b>Klanker</b> wants to restart an agent in the fleet')
303
+ expect(body).toContain('why: <i>bouncing the fleet</i>')
304
+ })
305
+
306
+ test('non-name-arg gated verb (update_apply) stays generic and does not break (#2469)', () => {
307
+ const body = formatPermissionCardBody({
308
+ toolName: 'mcp__hostd__update_apply',
309
+ inputPreview: JSON.stringify({ reason: 'rolling out v0.16' }),
310
+ description: 'static schema doc',
311
+ agentName: 'klanker',
312
+ })
313
+ expect(body.split('\n')[0]).toBe('🔐 <b>Klanker</b> wants to apply a fleet-wide update (pull + recreate)')
314
+ expect(body).toContain('why: <i>rolling out v0.16</i>')
315
+ })
316
+
189
317
  // Clarity fix: the card gains a third "↳" line summarizing the REST
190
318
  // payload so the operator can see WHAT is being written, not just the
191
319
  // endpoint. Values are redaction-passed + truncated; nested objects show
@@ -195,6 +323,7 @@ describe('formatPermissionCardBody', () => {
195
323
  toolName: 'mcp__brevo__post',
196
324
  inputPreview: JSON.stringify({
197
325
  path: '/smtp/email',
326
+ reason: 'sending the priority-access invite',
198
327
  body: { subject: 'Priority access', templateId: 12, to: [{ email: 'lisa@example.com' }] },
199
328
  }),
200
329
  description: 'HIGH RISK: write to the brevo API (POST).',
@@ -202,7 +331,7 @@ describe('formatPermissionCardBody', () => {
202
331
  })
203
332
  const lines = body.split('\n')
204
333
  expect(lines[0]).toBe('🔐 <b>Marko</b> wants to POST /smtp/email (Brevo)')
205
- expect(lines[1]).toBe('why: <i>HIGH RISK: write to the brevo API (POST).</i>')
334
+ expect(lines[1]).toBe('why: <i>sending the priority-access invite</i>')
206
335
  // Third line: scalar keys show value; the nested `to` array shows key-only.
207
336
  expect(lines[2]).toContain('↳')
208
337
  expect(lines[2]).toContain('subject: Priority access')
@@ -340,3 +469,63 @@ describe('formatPermissionResumeMessage — agent-voiced verdict ack', () => {
340
469
  ).toBe('▶️ <b>Agent</b> — got it, continuing: <i>edit: x.md</i>')
341
470
  })
342
471
  })
472
+
473
+ describe('truncated inputPreview recovery — Edit/Write file_path extraction', () => {
474
+ /**
475
+ * Claude Code produces `input_preview = JSON.stringify(displayInput).slice(0, 200)`.
476
+ * For Edit/Write the serialised form is:
477
+ * {"file_path":"...","old_string":"<hundreds of chars>","new_string":"..."}
478
+ * which almost always exceeds 200 chars, leaving invalid (truncated) JSON.
479
+ * "file_path" is the first key so its value is intact within 200 chars.
480
+ * The lenient regex fallback must recover it so cards read "edit: module.ts"
481
+ * instead of the generic "edit files".
482
+ */
483
+ function truncatedPreview(filePath: string): string {
484
+ const full = JSON.stringify({
485
+ file_path: filePath,
486
+ old_string:
487
+ 'function oldFn() {\n // many lines of old code that push the JSON way past 200 chars\n const x = doSomething();\n return x;\n}',
488
+ new_string: 'function newFn() { return doSomethingElse(); }',
489
+ })
490
+ return full.slice(0, 200)
491
+ }
492
+
493
+ test('naturalAction recovers file basename from truncated Edit inputPreview', () => {
494
+ const filePath = '/home/user/project/src/some/long/module.ts'
495
+ const preview = truncatedPreview(filePath)
496
+
497
+ // The truncated preview must be invalid JSON (precondition of the bug).
498
+ expect(() => JSON.parse(preview)).toThrow()
499
+
500
+ // After fix: basename is recovered via regex fallback.
501
+ expect(naturalAction('Edit', preview)).toBe('edit: module.ts')
502
+ })
503
+
504
+ test('naturalAction recovers file basename from truncated Write inputPreview', () => {
505
+ const filePath = '/home/user/project/src/config/settings.json'
506
+ const full = JSON.stringify({
507
+ file_path: filePath,
508
+ content: 'x'.repeat(300),
509
+ })
510
+ const preview = full.slice(0, 200)
511
+ expect(() => JSON.parse(preview)).toThrow()
512
+ expect(naturalAction('Write', preview)).toBe('write: settings.json')
513
+ })
514
+
515
+ test('resolveScopedAllowChoices includes a per-file "This file" choice for truncated Edit inputPreview', () => {
516
+ const filePath = '/home/user/project/src/some/long/module.ts'
517
+ const preview = truncatedPreview(filePath)
518
+
519
+ // The truncated preview must be invalid JSON (precondition of the bug).
520
+ expect(() => JSON.parse(preview)).toThrow()
521
+
522
+ const choices = resolveScopedAllowChoices('Edit', preview)
523
+ expect(choices).not.toBeNull()
524
+ // After fix: specific "This file" choice present with the full path.
525
+ expect(choices!.specific).toBeDefined()
526
+ expect(choices!.specific!.buttonLabel).toBe('This file')
527
+ expect(choices!.specific!.rule).toBe(`Edit(${filePath})`)
528
+ // Broad option also present.
529
+ expect(choices!.broad.buttonLabel).toBe('Any file')
530
+ })
531
+ })
@@ -18,6 +18,8 @@ import {
18
18
  patchQuotaWatchState,
19
19
  emptyQuotaWatchState,
20
20
  emptyAccountState,
21
+ isLiveCorroboration,
22
+ type CorroborationProbe,
21
23
  } from "../quota-watch.js";
22
24
  import type { AccountSnapshot } from "../auth-snapshot-format.js";
23
25
  import type { QuotaUtilization } from "../quota-check.js";
@@ -217,6 +219,22 @@ describe("evaluateQuotaWatchAccount — message content", () => {
217
219
  expect(d.message).toContain("5-hour");
218
220
  });
219
221
 
222
+ it("#2495 Change 3 — throttling alarm advertises live-probe corroboration, not a raw cache read", () => {
223
+ const d = evaluateQuotaWatchAccount({
224
+ agentName: "lawgpt",
225
+ snap: THROTTLING_5H,
226
+ prev: PREV_NEVER_NOTIFIED,
227
+ now: NOW,
228
+ });
229
+ expect(d.kind).toBe("notify");
230
+ if (d.kind !== "notify") return;
231
+ // The alarm body's source-of-truth footnote must reflect that the gateway
232
+ // corroborates the alarm with a forceLive probe (the broker re-probe at
233
+ // gateway.ts runQuotaWatch), not "Source: broker quota cache".
234
+ expect(d.message).toContain("Live-probe corroborated");
235
+ expect(d.message).not.toContain("Source: broker quota cache");
236
+ });
237
+
220
238
  it("recovery message contains account label and percentages", () => {
221
239
  const d = evaluateQuotaWatchAccount({
222
240
  agentName: "lawgpt",
@@ -272,6 +290,79 @@ describe("evaluateQuotaWatchAccount — message content", () => {
272
290
  });
273
291
  });
274
292
 
293
+ // ── corroboration gate (#2495 BLOCKER) ───────────────────────────────────────
294
+
295
+ describe("isLiveCorroboration — only a genuine live probe corroborates (#2495 BLOCKER)", () => {
296
+ // A successful upstream live probe.
297
+ const LIVE_OK: CorroborationProbe = { result: { ok: true }, served: "live" };
298
+ // The trap: under forceLive, when the upstream probe FAILS but the broker
299
+ // holds a prior snapshot, opProbeQuota returns cachedSnapshotToResult →
300
+ // result.ok === true but served === "cache". Vacuous corroboration.
301
+ const CACHE_FALLBACK_AFTER_PROBE_FAIL: CorroborationProbe = {
302
+ result: { ok: true },
303
+ served: "cache",
304
+ };
305
+ // A hard probe failure with no prior snapshot to fall back on.
306
+ const PROBE_FAILED: CorroborationProbe = { result: { ok: false }, served: "live" };
307
+
308
+ it("a genuine live probe (ok:true, served:'live') corroborates", () => {
309
+ expect(isLiveCorroboration(LIVE_OK)).toBe(true);
310
+ });
311
+
312
+ it("a failed-probe cache fallback (ok:true, served:'cache') does NOT corroborate", () => {
313
+ // This is the BLOCKER: a stale cache read must NOT be mistaken for a live
314
+ // corroboration, even though result.ok is true.
315
+ expect(isLiveCorroboration(CACHE_FALLBACK_AFTER_PROBE_FAIL)).toBe(false);
316
+ });
317
+
318
+ it("a failed probe (ok:false) does NOT corroborate", () => {
319
+ expect(isLiveCorroboration(PROBE_FAILED)).toBe(false);
320
+ });
321
+
322
+ it("a missing entry (probe absent from batch result) does NOT corroborate", () => {
323
+ expect(isLiveCorroboration(undefined)).toBe(false);
324
+ });
325
+
326
+ it("a legacy entry with no `served` tag does NOT corroborate (fail-closed)", () => {
327
+ expect(isLiveCorroboration({ result: { ok: true } })).toBe(false);
328
+ });
329
+
330
+ // Simulate the gateway gate (runQuotaWatch). The gate fires the alarm and
331
+ // stamps the "Live-probe corroborated" footnote ONLY when
332
+ // isLiveCorroboration is true; otherwise it DEFERS (state untouched).
333
+ function gateDecision(entry: CorroborationProbe | undefined): {
334
+ fired: boolean;
335
+ message: string | null;
336
+ } {
337
+ if (isLiveCorroboration(entry)) {
338
+ // Genuine corroboration → re-evaluate and notify with the live numbers.
339
+ const d = evaluateQuotaWatchAccount({
340
+ agentName: "lawgpt",
341
+ snap: THROTTLING_5H,
342
+ prev: PREV_NEVER_NOTIFIED,
343
+ now: NOW,
344
+ });
345
+ return { fired: true, message: d.kind === "notify" ? d.message : null };
346
+ }
347
+ // Not corroborated → defer. No alarm, no footnote.
348
+ return { fired: false, message: null };
349
+ }
350
+
351
+ it("failed-probe cache fallback → alarm DEFERRED, no false 'Live-probe corroborated' footnote", () => {
352
+ const decision = gateDecision(CACHE_FALLBACK_AFTER_PROBE_FAIL);
353
+ expect(decision.fired).toBe(false);
354
+ expect(decision.message).toBeNull();
355
+ // The false footnote must NOT be produced on this path.
356
+ expect(decision.message ?? "").not.toContain("Live-probe corroborated");
357
+ });
358
+
359
+ it("genuine live probe → alarm FIRES and stamps the 'Live-probe corroborated' footnote", () => {
360
+ const decision = gateDecision(LIVE_OK);
361
+ expect(decision.fired).toBe(true);
362
+ expect(decision.message).toContain("Live-probe corroborated");
363
+ });
364
+ });
365
+
275
366
  // ── state persistence tests ──────────────────────────────────────────────────
276
367
 
277
368
  describe("loadQuotaWatchState / saveQuotaWatchState — round-trip", () => {
@@ -369,40 +460,161 @@ describe("patchQuotaWatchState", () => {
369
460
  describe("evaluateFleetAllExhausted", () => {
370
461
  const notAlerting = { lastNotifiedHealth: null, lastNotifiedAt: 0 };
371
462
  const alerting = { lastNotifiedHealth: "throttling" as const, lastNotifiedAt: 1000 };
372
-
373
- it("notifies (entered) when every account is exhausted and we weren't alerting", () => {
463
+ // Use a realistic "now" so a fresh probe (capturedAt near NOW) and a stale
464
+ // probe (capturedAt older than maxStaleMs) are unambiguous.
465
+ const NOW = 10_000_000_000;
466
+ const STALE = DEFAULT_QUOTA_WATCH_MAX_STALE_MS;
467
+ const gate = { maxStaleMs: STALE };
468
+ /** A fresh live snapshot captured `ageMs` ago (default: just now). */
469
+ const freshProbe = (ageMs = 0) => ({ capturedAt: NOW - ageMs });
470
+ /** A stale snapshot, captured just past the staleness ceiling. */
471
+ const staleProbe = () => ({ capturedAt: NOW - STALE - 1 });
472
+
473
+ it("notifies (entered) when every exhausted account is backed by a FRESH probe", () => {
374
474
  const d = evaluateFleetAllExhausted({
375
475
  accounts: [
376
- { label: "a", exhausted: true, exhausted_until: 5_000 },
377
- { label: "b", exhausted: true, exhausted_until: 9_000 },
476
+ { label: "a", exhausted: true, exhausted_until: NOW + 5_000, last_quota: freshProbe() },
477
+ { label: "b", exhausted: true, exhausted_until: NOW + 9_000, last_quota: freshProbe(60_000) },
378
478
  ],
379
479
  prev: notAlerting,
380
- now: 1_000,
480
+ now: NOW,
481
+ tuning: gate,
381
482
  });
382
483
  expect(d.kind).toBe("notify");
383
484
  if (d.kind === "notify") {
384
485
  expect(d.transition).toBe("entered");
385
486
  expect(d.newState.lastNotifiedHealth).toBe("throttling");
386
487
  expect(d.message).toContain("All accounts exhausted");
387
- // earliest reset is the 5_000 one
488
+ // earliest reset is the +5_000 one
388
489
  expect(d.message).toContain("Earliest reset");
389
490
  }
390
491
  });
391
492
 
493
+ it("skips (probe-blind) when all exhausted rests on STALE marks with no fresh probe (#2478)", () => {
494
+ const d = evaluateFleetAllExhausted({
495
+ accounts: [
496
+ { label: "a", exhausted: true, exhausted_until: NOW + 5_000, last_quota: staleProbe() },
497
+ { label: "b", exhausted: true, exhausted_until: NOW + 9_000, last_quota: null },
498
+ ],
499
+ prev: notAlerting,
500
+ now: NOW,
501
+ tuning: gate,
502
+ });
503
+ expect(d.kind).toBe("skip");
504
+ if (d.kind === "skip") expect(d.reason).toBe("probe-blind");
505
+ });
506
+
507
+ it("skips (probe-blind) on MIXED freshness — one stale-mark-only account is enough (#2478)", () => {
508
+ const d = evaluateFleetAllExhausted({
509
+ accounts: [
510
+ { label: "a", exhausted: true, exhausted_until: NOW + 5_000, last_quota: freshProbe() },
511
+ { label: "b", exhausted: true, exhausted_until: NOW + 9_000, last_quota: staleProbe() },
512
+ ],
513
+ prev: notAlerting,
514
+ now: NOW,
515
+ tuning: gate,
516
+ });
517
+ expect(d.kind).toBe("skip");
518
+ if (d.kind === "skip") expect(d.reason).toBe("probe-blind");
519
+ });
520
+
521
+ it("skips (probe-blind) when a probe is FUTURE-dated beyond the clock-skew tolerance (#2479 nit)", () => {
522
+ // A future-dated capturedAt makes `now - capturedAt` negative, which would
523
+ // slip under the staleness ceiling and read as fresh. The clock-skew guard
524
+ // (mirrored from broker `snapshotFresh`: capturedAt <= now + 60_000) must
525
+ // reject it so a skewed snapshot does NOT corroborate exhaustion.
526
+ const futureProbe = () => ({ capturedAt: NOW + 60_000 + 1 }); // 1ms past tolerance
527
+ const d = evaluateFleetAllExhausted({
528
+ accounts: [
529
+ { label: "a", exhausted: true, exhausted_until: NOW + 5_000, last_quota: futureProbe() },
530
+ ],
531
+ prev: notAlerting,
532
+ now: NOW,
533
+ tuning: gate,
534
+ });
535
+ expect(d.kind).toBe("skip");
536
+ if (d.kind === "skip") expect(d.reason).toBe("probe-blind");
537
+ });
538
+
539
+ it("notifies (entered) when a probe is future-dated WITHIN the skew tolerance (boundary)", () => {
540
+ // Exactly +60_000 ms is within tolerance and still negative-age fresh — it
541
+ // must count as a fresh live probe (proves the guard is a future-dating
542
+ // skew allowance, not an outright rejection of any future timestamp).
543
+ const d = evaluateFleetAllExhausted({
544
+ accounts: [
545
+ { label: "a", exhausted: true, exhausted_until: NOW + 5_000, last_quota: { capturedAt: NOW + 60_000 } },
546
+ ],
547
+ prev: notAlerting,
548
+ now: NOW,
549
+ tuning: gate,
550
+ });
551
+ expect(d.kind).toBe("notify");
552
+ if (d.kind === "notify") expect(d.transition).toBe("entered");
553
+ });
554
+
555
+ it("notifies (entered) when out_of_credits account has a FRESH probe (freshness drives corroboration, not the credits flag)", () => {
556
+ // NEW CONTRACT (fix/out-of-credits-serve-block): out_of_credits is
557
+ // informational — it does NOT corroborate exhaustion on its own. But a
558
+ // genuinely fresh probe (capturedAt within maxStaleMs) still corroborates.
559
+ // Result: still notifies — for the right reason (fresh snapshot), not the
560
+ // credits reason.
561
+ const d = evaluateFleetAllExhausted({
562
+ accounts: [
563
+ {
564
+ label: "a",
565
+ exhausted: true,
566
+ last_quota: { capturedAt: NOW, overageDisabledReason: "out_of_credits" },
567
+ },
568
+ ],
569
+ prev: notAlerting,
570
+ now: NOW,
571
+ tuning: gate,
572
+ });
573
+ expect(d.kind).toBe("notify");
574
+ if (d.kind === "notify") expect(d.transition).toBe("entered");
575
+ });
576
+
577
+ it("out_of_credits does NOT corroborate exhaustion when the snapshot is past the staleness ceiling (probe-blind)", () => {
578
+ // NEW CONTRACT (fix/out-of-credits-serve-block): out_of_credits is
579
+ // informational, NOT exhaustion in its own right at any util. A stale
580
+ // snapshot with only out_of_credits provides no live corroboration →
581
+ // probe-blind → skip (no false fleet alert). Contrast with the test above:
582
+ // a FRESH probe with out_of_credits still notifies via freshness, not the
583
+ // credits flag.
584
+ const d = evaluateFleetAllExhausted({
585
+ accounts: [
586
+ {
587
+ label: "a",
588
+ exhausted: true,
589
+ last_quota: { capturedAt: NOW - STALE - 1, overageDisabledReason: "out_of_credits" },
590
+ },
591
+ ],
592
+ prev: notAlerting,
593
+ now: NOW,
594
+ tuning: gate,
595
+ });
596
+ expect(d.kind).toBe("skip");
597
+ if (d.kind === "skip") expect(d.reason).toBe("probe-blind");
598
+ });
599
+
392
600
  it("skips (still) when all exhausted and already alerting — no re-spam", () => {
393
601
  const d = evaluateFleetAllExhausted({
394
602
  accounts: [{ label: "a", exhausted: true }, { label: "b", exhausted: true }],
395
603
  prev: alerting,
396
604
  now: 2_000,
605
+ tuning: gate,
397
606
  });
398
607
  expect(d.kind).toBe("skip");
399
608
  });
400
609
 
401
- it("notifies (recovered) when one account frees after we were alerting", () => {
610
+ it("notifies (recovered) when one account frees after we were alerting — UNGUARDED by probe-blind", () => {
611
+ // Recovery must fire even when freshness data is absent, so a legitimately
612
+ // fired alert is never stranded (#2478 scope: gate only the `entered` edge).
402
613
  const d = evaluateFleetAllExhausted({
403
614
  accounts: [{ label: "a", exhausted: false }, { label: "b", exhausted: true }],
404
615
  prev: alerting,
405
616
  now: 3_000,
617
+ tuning: gate,
406
618
  });
407
619
  expect(d.kind).toBe("notify");
408
620
  if (d.kind === "notify") {
@@ -413,20 +625,51 @@ describe("evaluateFleetAllExhausted", () => {
413
625
  }
414
626
  });
415
627
 
628
+ it("entered then recovered: a legit fire (fresh probes) is followed by a working recovery edge", () => {
629
+ const entered = evaluateFleetAllExhausted({
630
+ accounts: [
631
+ { label: "a", exhausted: true, last_quota: freshProbe() },
632
+ { label: "b", exhausted: true, last_quota: freshProbe() },
633
+ ],
634
+ prev: notAlerting,
635
+ now: NOW,
636
+ tuning: gate,
637
+ });
638
+ expect(entered.kind).toBe("notify");
639
+ if (entered.kind !== "notify") return;
640
+ expect(entered.transition).toBe("entered");
641
+ // Feed the persisted state forward; one account frees.
642
+ const recovered = evaluateFleetAllExhausted({
643
+ accounts: [
644
+ { label: "a", exhausted: false, last_quota: freshProbe() },
645
+ { label: "b", exhausted: true, last_quota: freshProbe() },
646
+ ],
647
+ prev: entered.newState,
648
+ now: NOW + 60_000,
649
+ tuning: gate,
650
+ });
651
+ expect(recovered.kind).toBe("notify");
652
+ if (recovered.kind === "notify") expect(recovered.transition).toBe("recovered");
653
+ });
654
+
416
655
  it("skips (not-all) when some account is healthy and we weren't alerting", () => {
417
656
  const d = evaluateFleetAllExhausted({
418
657
  accounts: [{ label: "a", exhausted: false }, { label: "b", exhausted: true }],
419
658
  prev: notAlerting,
420
659
  now: 4_000,
660
+ tuning: gate,
421
661
  });
422
662
  expect(d.kind).toBe("skip");
423
663
  });
424
664
 
425
665
  it("never alerts on an empty fleet", () => {
426
- expect(evaluateFleetAllExhausted({ accounts: [], prev: notAlerting, now: 1 }).kind).toBe("skip");
666
+ expect(
667
+ evaluateFleetAllExhausted({ accounts: [], prev: notAlerting, now: 1, tuning: gate }).kind,
668
+ ).toBe("skip");
427
669
  });
428
670
 
429
- it("shows reset-unknown when no exhausted_until is present", () => {
671
+ it("with the gate disabled (maxStaleMs 0) the legacy bare-mark behaviour is preserved", () => {
672
+ // Kill-switch parity: tuning omitted / 0 → fire on bare marks (pre-#2478).
430
673
  const d = evaluateFleetAllExhausted({
431
674
  accounts: [{ label: "a", exhausted: true }],
432
675
  prev: notAlerting,
@@ -81,7 +81,12 @@ describe('#1713 + #1728 — reply tool reaction contract', () => {
81
81
  )
82
82
  const anchor = src.indexOf("fresh sendMessage from reply tool is a user-visible")
83
83
  expect(anchor).toBeGreaterThan(-1)
84
- const slice = src.slice(anchor, anchor + 3000)
84
+ // Window widened 3000 → 4000 (#2556): the deterministic-emission lever-1
85
+ // sticky-latch set + comment lives inside the post-send isFinalAnswerReply
86
+ // branch between this anchor and finalizeStatusReaction, growing the block
87
+ // past the old 3000-char window. The assertion's INTENT is unchanged —
88
+ // finalize present, gated by isFinalAnswerReply, and after the gate.
89
+ const slice = src.slice(anchor, anchor + 4000)
85
90
  // The finalize MUST appear in the post-send block.
86
91
  expect(slice).toMatch(/finalizeStatusReaction\(/)
87
92
  // It MUST be gated by isFinalAnswerReply (the classifier prevents