switchroom 0.13.1 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/agent-scheduler/index.js +2 -2
  2. package/dist/auth-broker/index.js +2 -2
  3. package/dist/cli/switchroom.js +21 -18
  4. package/dist/host-control/main.js +2 -2
  5. package/dist/vault/approvals/kernel-server.js +2 -2
  6. package/dist/vault/broker/server.js +2 -2
  7. package/package.json +1 -1
  8. package/telegram-plugin/dist/gateway/gateway.js +54 -22
  9. package/telegram-plugin/gateway/boot-probes.ts +13 -6
  10. package/telegram-plugin/gateway/gateway.ts +71 -122
  11. package/telegram-plugin/hooks/silent-end-interrupt-stop.mjs +5 -1
  12. package/telegram-plugin/silent-end.ts +56 -0
  13. package/telegram-plugin/tests/boot-probes.test.ts +26 -2
  14. package/telegram-plugin/tests/silent-end.test.ts +69 -0
  15. package/skills/buildkite-agent-infrastructure/SKILL.md +0 -321
  16. package/skills/buildkite-agent-infrastructure/agents/openai.yaml +0 -6
  17. package/skills/buildkite-agent-infrastructure/assets/buildkite-icon-large.png +0 -0
  18. package/skills/buildkite-agent-infrastructure/assets/buildkite-icon-small.png +0 -0
  19. package/skills/buildkite-agent-infrastructure/references/audit-logging.md +0 -87
  20. package/skills/buildkite-agent-infrastructure/references/graphql-mutations.md +0 -690
  21. package/skills/buildkite-agent-infrastructure/references/instance-shapes.md +0 -38
  22. package/skills/buildkite-agent-infrastructure/references/pipeline-templates.md +0 -73
  23. package/skills/buildkite-agent-infrastructure/references/self-hosted-agents.md +0 -137
  24. package/skills/buildkite-agent-infrastructure/references/sso-saml.md +0 -92
  25. package/skills/buildkite-agent-runtime/SKILL.md +0 -509
  26. package/skills/buildkite-agent-runtime/agents/openai.yaml +0 -6
  27. package/skills/buildkite-agent-runtime/assets/buildkite-icon-large.png +0 -0
  28. package/skills/buildkite-agent-runtime/assets/buildkite-icon-small.png +0 -0
  29. package/skills/buildkite-agent-runtime/references/flag-reference.md +0 -417
  30. package/skills/buildkite-agent-runtime/references/patterns-and-recipes.md +0 -555
  31. package/skills/buildkite-api/SKILL.md +0 -308
  32. package/skills/buildkite-api/agents/openai.yaml +0 -6
  33. package/skills/buildkite-api/assets/buildkite-icon-large.png +0 -0
  34. package/skills/buildkite-api/assets/buildkite-icon-small.png +0 -0
  35. package/skills/buildkite-api/references/graphql-reference.md +0 -195
  36. package/skills/buildkite-api/references/patterns.md +0 -44
  37. package/skills/buildkite-api/references/webhooks.md +0 -161
  38. package/skills/buildkite-cli/SKILL.md +0 -397
  39. package/skills/buildkite-cli/agents/openai.yaml +0 -6
  40. package/skills/buildkite-cli/assets/buildkite-icon-large.png +0 -0
  41. package/skills/buildkite-cli/assets/buildkite-icon-small.png +0 -0
  42. package/skills/buildkite-cli/references/command-reference.md +0 -181
  43. package/skills/buildkite-migration/SKILL.md +0 -195
  44. package/skills/buildkite-pipelines/SKILL.md +0 -481
  45. package/skills/buildkite-pipelines/agents/openai.yaml +0 -6
  46. package/skills/buildkite-pipelines/assets/buildkite-icon-large.png +0 -0
  47. package/skills/buildkite-pipelines/assets/buildkite-icon-small.png +0 -0
  48. package/skills/buildkite-pipelines/examples/basic-pipeline.yml +0 -24
  49. package/skills/buildkite-pipelines/examples/optimized-pipeline.yml +0 -100
  50. package/skills/buildkite-pipelines/references/advanced-patterns.md +0 -286
  51. package/skills/buildkite-pipelines/references/retry-and-error-codes.md +0 -131
  52. package/skills/buildkite-pipelines/references/step-types-reference.md +0 -225
  53. package/skills/buildkite-secure-delivery/SKILL.md +0 -182
  54. package/skills/buildkite-secure-delivery/agents/openai.yaml +0 -6
  55. package/skills/buildkite-secure-delivery/assets/buildkite-icon-large.png +0 -0
  56. package/skills/buildkite-secure-delivery/assets/buildkite-icon-small.png +0 -0
  57. package/skills/buildkite-secure-delivery/references/oidc-cloud-providers.md +0 -83
  58. package/skills/buildkite-secure-delivery/references/package-publishing.md +0 -100
  59. package/skills/buildkite-test-engine/SKILL.md +0 -256
  60. package/skills/buildkite-test-engine/agents/openai.yaml +0 -6
  61. package/skills/buildkite-test-engine/assets/buildkite-icon-large.png +0 -0
  62. package/skills/buildkite-test-engine/assets/buildkite-icon-small.png +0 -0
  63. package/skills/buildkite-test-engine/examples/bktec-splitting.yml +0 -16
  64. package/skills/buildkite-test-engine/examples/collector-pipeline.yml +0 -11
  65. package/skills/buildkite-test-engine/references/collectors.md +0 -198
  66. package/skills/buildkite-test-engine/references/splitting-examples.md +0 -93
@@ -76,7 +76,7 @@ import {
76
76
  import { emitRuntimeMetric } from '../runtime-metrics.js'
77
77
  import { classifyInbound } from '../inbound-classifier.js'
78
78
  import * as silencePoke from '../silence-poke.js'
79
- import { writeSilentEndState, clearSilentEndState } from '../silent-end.js'
79
+ import { writeSilentEndState, clearSilentEndState, recordSilentTurnEnd } from '../silent-end.js'
80
80
  import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
81
81
  import { type SessionEvent } from '../session-tail.js'
82
82
  import {
@@ -139,6 +139,16 @@ import { validateStringArray } from './access-validator.js'
139
139
  * identical envelope shapes.
140
140
  */
141
141
  const REPLY_TO_TEXT_MAX = 200
142
+
143
+ /**
144
+ * #1161 — user-facing fallback delivered when a user-message turn ends
145
+ * with zero outbound messages AND the deterministic Stop-hook re-prompt
146
+ * has already been exhausted. Without this the user only sees the
147
+ * progress card vanish; silence must never be the failure mode.
148
+ */
149
+ const SILENT_END_FALLBACK_TEXT =
150
+ '⚠️ The agent finished working but didn’t send a reply — your last ' +
151
+ 'message may not have been answered. Please try asking again.'
142
152
  import { markdownToHtml, splitHtmlChunks, repairEscapedWhitespace, telegramHtmlToPlainText } from '../format.js'
143
153
  import {
144
154
  validateInlineKeyboard,
@@ -1278,62 +1288,32 @@ function streamKey(chatId: string, threadId?: number | null): string {
1278
1288
  return chatKey(chatId, threadId)
1279
1289
  }
1280
1290
 
1281
- /**
1282
- * Reaction-state cleanup controller + msg-id maps + active-reaction
1283
- * file removal. PURE reaction-cleanup, no turn-end semantics:
1284
- * - does NOT emit shadow `turnEnd`
1285
- * - does NOT clear `activeTurnStartedAt` (turn-active marker)
1286
- * - does NOT fire the model-idle restart/flush gate
1287
- *
1288
- * Called from mid-turn signals like `endStatusReaction` (post-reply-tool,
1289
- * post-stream-reply-finalize) where the 👍 transition fires but the
1290
- * turn is still active. Per #1603 audit step 2: the reply tool was
1291
- * previously calling `purgeReactionTracking` here, which fired premature
1292
- * shadow `turnEnd` events and cleared `activeTurnStartedAt` mid-turn —
1293
- * the latter would trigger the model-idle restart probe and
1294
- * pendingInbound flush as if claude had gone idle.
1295
- */
1296
- function clearReactionState(key: string): void {
1291
+ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
1292
+ // Phase 2b: turn end. The key was registered via setTurnStarted when
1293
+ // the inbound arrived; purge is the canonical turn-end signal.
1294
+ //
1295
+ // outboundEmitted: read from the explicit `endingTurn` parameter when
1296
+ // provided (canonical path via endCurrentTurnAtomic — module-scope
1297
+ // currentTurn is already null by the time we get here), falling back
1298
+ // to `currentTurn?.replyCalled` for the legacy callsites that haven't
1299
+ // been threaded yet (sibling-key purges, restart-init cleanup).
1300
+ // Without this explicit-turn handoff the shadow trace would report
1301
+ // outboundEmitted=false on every replied turn (the dominant happy
1302
+ // path), producing strictly worse data than the blind `true` it
1303
+ // replaced. Invariant #5's `lastOutboundAt` correctness depends on
1304
+ // this signal being accurate.
1305
+ const outboundEmitted = endingTurn != null
1306
+ ? endingTurn.replyCalled === true
1307
+ : currentTurn?.replyCalled === true
1308
+ shadowEmit({ kind: 'turnEnd', key: key as _ChatKey, at: Date.now(), outboundEmitted })
1297
1309
  const msgInfo = activeReactionMsgIds.get(key)
1298
1310
  activeStatusReactions.delete(key)
1299
1311
  activeReactionMsgIds.delete(key)
1312
+ activeTurnStartedAt.delete(key)
1300
1313
  if (msgInfo) {
1301
1314
  const agentDir = resolveAgentDirFromEnv()
1302
1315
  if (agentDir != null) removeActiveReaction(agentDir, msgInfo.chatId, msgInfo.messageId)
1303
1316
  }
1304
- }
1305
-
1306
- function purgeReactionTracking(
1307
- key: string,
1308
- endingTurn?: CurrentTurn,
1309
- outboundEmittedOverride?: boolean,
1310
- ): void {
1311
- // Phase 2b: turn end. The key was registered via setTurnStarted when
1312
- // the inbound arrived; purge is the canonical turn-end signal.
1313
- //
1314
- // outboundEmitted derivation, in precedence order:
1315
- // 1. Explicit `outboundEmittedOverride` (e.g. silence-poke
1316
- // framework fallback FORCES false because the 5-min fallback
1317
- // firing proves visible delivery never happened — regardless of
1318
- // whatever `replyCalled` the wedged turn object carries).
1319
- // 2. `endingTurn.replyCalled` when the canonical caller threads
1320
- // the authoritative turn (endCurrentTurnAtomic path; module-scope
1321
- // currentTurn is already null by the time we get here).
1322
- // 3. `currentTurn?.replyCalled` fallback for the (now-vanishing)
1323
- // legacy callsites. Without the explicit-turn handoff the shadow
1324
- // trace would report outboundEmitted=false on every replied
1325
- // turn (the dominant happy path), producing strictly worse data
1326
- // than the blind `true` it replaced. Invariant #5's
1327
- // `lastOutboundAt` correctness depends on this signal being
1328
- // accurate.
1329
- const outboundEmitted = outboundEmittedOverride !== undefined
1330
- ? outboundEmittedOverride
1331
- : endingTurn != null
1332
- ? endingTurn.replyCalled === true
1333
- : currentTurn?.replyCalled === true
1334
- shadowEmit({ kind: 'turnEnd', key: key as _ChatKey, at: Date.now(), outboundEmitted })
1335
- clearReactionState(key)
1336
- activeTurnStartedAt.delete(key)
1337
1317
 
1338
1318
  // If no more active turns and a restart is pending, perform it now.
1339
1319
  //
@@ -1623,24 +1603,12 @@ async function resolveCompactCard(
1623
1603
  }
1624
1604
 
1625
1605
  function endStatusReaction(chatId: string, threadId: number | undefined, outcome: 'done' | 'error'): void {
1626
- // Mid-turn signal: the reply tool fired, or stream_reply finalized,
1627
- // and the status-reaction needs to transition to its terminal emoji
1628
- // (👍 / ⚠️). The turn itself is still active — the canonical turn-end
1629
- // signal is `endCurrentTurnAtomic(turn)`, which runs later via the
1630
- // turn_end handler / context-exhaust path / silent-marker path.
1631
- //
1632
- // Pre-#1603 audit step 2 (this commit), this called
1633
- // `purgeReactionTracking(key)` directly, which would fire shadow
1634
- // `turnEnd` and clear the turn-active marker mid-turn — the latter
1635
- // triggering the model-idle restart probe + pendingInbound flush as
1636
- // if claude had gone idle. Use `clearReactionState` to only do the
1637
- // reaction-cleanup work.
1638
1606
  const key = statusKey(chatId, threadId)
1639
1607
  const ctrl = activeStatusReactions.get(key)
1640
1608
  if (!ctrl) return
1641
1609
  if (outcome === 'done') ctrl.setDone()
1642
1610
  else ctrl.setError()
1643
- clearReactionState(key)
1611
+ purgeReactionTracking(key)
1644
1612
  }
1645
1613
 
1646
1614
  function resolveThreadId(chat_id: string, explicit?: string | number | null): number | undefined {
@@ -3135,15 +3103,7 @@ silencePoke.startTimer({
3135
3103
  // Drop silence-poke state and clear turn-active so the next inbound
3136
3104
  // for this chat starts a fresh turn instead of queueing forever.
3137
3105
  silencePoke.endTurn(fbKey)
3138
- // PR 3b step 5 (#1603 audit): force outboundEmitted=false. The
3139
- // framework fallback fires precisely because visible delivery
3140
- // didn't happen in 5 min — `wedgedTurn.replyCalled` may have been
3141
- // set during the turn (e.g. reply tool invoked but Telegram side
3142
- // never confirmed delivery), but from the user's perspective no
3143
- // outbound landed. The state machine's `noteOutbound` effect
3144
- // must NOT fire for this path. Pass `undefined` for endingTurn
3145
- // and `false` as the explicit override.
3146
- purgeReactionTracking(fbKey, undefined, false)
3106
+ purgeReactionTracking(fbKey)
3147
3107
  // Defense-in-depth: the fallback's purgeReactionTracking above
3148
3108
  // clears the canonical statusKey(chatId, threadId) for fbKey
3149
3109
  // only. activeTurnStartedAt can hold sibling entries for the
@@ -3156,14 +3116,10 @@ silencePoke.startTimer({
3156
3116
  // purger. Multi-chat-safe — only touches keys for fbChatId, so
3157
3117
  // #1546's intentional cross-chat safety guard is preserved.
3158
3118
  // See turn-state-purge.ts.
3159
- //
3160
- // Same `outboundEmitted=false` rationale as the bare call above —
3161
- // wrap the purger so every sibling-key purge emits a fallback
3162
- // shadow turnEnd with the truthful "no visible delivery" signal.
3163
3119
  const fbExtraPurge = purgeStaleTurnsForChat(
3164
3120
  fbChatId,
3165
3121
  activeTurnStartedAt.keys(),
3166
- (k) => purgeReactionTracking(k, undefined, false),
3122
+ purgeReactionTracking,
3167
3123
  )
3168
3124
  // Null `currentTurn` if it's still pointing at the wedged turn —
3169
3125
  // when claude eventually fires a late `turn_end` for this session
@@ -5882,10 +5838,7 @@ function handleSessionEvent(ev: SessionEvent): void {
5882
5838
  const ceKey = statusKey(chatId, threadId)
5883
5839
  const ctrl = activeStatusReactions.get(ceKey)
5884
5840
  if (ctrl) ctrl.setError()
5885
- // Duplicate-emit removed (#1603 audit, step 1): the canonical
5886
- // endCurrentTurnAtomic(turn) call at line ~5851 below already
5887
- // invokes purgeReactionTracking on the same ceKey. The bare
5888
- // call here was firing a second shadow `turnEnd` per traversal.
5841
+ purgeReactionTracking(ceKey)
5889
5842
  // Surfaced during CC-5 investigation (`docs/status-ask-cause-classes.md`):
5890
5843
  // the context-exhaust bail path teardown was missing
5891
5844
  // `silencePoke.endTurn(key)`. Without it, the silence-poke state for
@@ -6043,10 +5996,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6043
5996
  // Fall through to normal state cleanup (ctrl.setDone, purge, etc.)
6044
5997
  // but skip the regular closeProgressLane so we don't re-finalize.
6045
5998
  if (ctrl) ctrl.setDone()
6046
- // Duplicate-emit removed (#1603 audit, step 1): endCurrentTurnAtomic(turn)
6047
- // at line ~6049 below invokes purgeReactionTracking on the same key
6048
- // (statusKey(chatId, threadId)). The bare call here was firing a
6049
- // second shadow `turnEnd` per silent-marker traversal.
5999
+ purgeReactionTracking(statusKey(chatId, threadId))
6050
6000
  // Match the normal turn_end path's telemetry so silent-marker turns
6051
6001
  // still appear in turn-duration graphs.
6052
6002
  {
@@ -6187,15 +6137,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6187
6137
  // mirroring this contract — so reply-only turns transition
6188
6138
  // to terminal 👍 in their own success path rather than
6189
6139
  // relying on this dedup heuristic.
6190
- //
6191
- // PR 3b step 3 (#1603 audit): thread the captured `turn`
6192
- // explicitly. `endCurrentTurnAtomic(turn)` ran at line ~6120
6193
- // before this IIFE started, so `currentTurn === null` by
6194
- // now — without an explicit endingTurn argument, the shadow
6195
- // trace would read `outboundEmitted=false` for this dedup
6196
- // path even though `recentCount > 0` proves the reply tool
6197
- // did fire (turn.replyCalled === true).
6198
- purgeReactionTracking(statusKey(backstopChatId, backstopThreadId), turn)
6140
+ purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
6199
6141
  return
6200
6142
  }
6201
6143
  } catch {}
@@ -6323,35 +6265,14 @@ function handleSessionEvent(ev: SessionEvent): void {
6323
6265
  process.stderr.write(`telegram gateway: turn-flush send failed: ${(err as Error).message}\n`)
6324
6266
  if (backstopCtrl) backstopCtrl.setError()
6325
6267
  } finally {
6326
- // PR 3b step 3 (#1603 audit): thread the captured `turn`
6327
- // explicitly. The turn-flush backstop runs inside this IIFE
6328
- // after `endCurrentTurnAtomic(turn)` already nulled
6329
- // `currentTurn` at line ~6120. Without threading, the shadow
6330
- // trace would read `outboundEmitted=currentTurn?.replyCalled
6331
- // === undefined` → false. For the turn-flush path
6332
- // `turn.replyCalled` is `false` regardless (the model didn't
6333
- // call the reply tool — the gateway backstop did the work),
6334
- // so the threaded value matches the existing fallback here.
6335
- // But pinning the source via the captured turn matches the
6336
- // canonical pattern and survives any future change to how
6337
- // `currentTurn` is sequenced.
6338
- purgeReactionTracking(statusKey(backstopChatId, backstopThreadId), turn)
6268
+ purgeReactionTracking(statusKey(backstopChatId, backstopThreadId))
6339
6269
  }
6340
6270
  })()
6341
6271
  return
6342
6272
  }
6343
6273
 
6344
6274
  if (ctrl) ctrl.setDone()
6345
- // Duplicate-emit removed (#1603 audit, step 4 — the audit's
6346
- // original "route through endCurrentTurnAtomic" recommendation
6347
- // missed that this same code path already calls
6348
- // `endCurrentTurnAtomic(turn)` ~90 lines below at line ~6412
6349
- // on the same key — `chatId === turn.sessionChatId` and
6350
- // `threadId === turn.sessionThreadId` per the bindings at
6351
- // ~5946-5947. Removing this bare call closes the last duplicate
6352
- // shadow-`turnEnd` emit on the dominant happy-path turn-end
6353
- // tail; the canonical primitive below still fires the single
6354
- // authoritative turnEnd with the threaded turn).
6275
+ purgeReactionTracking(statusKey(chatId, threadId))
6355
6276
  {
6356
6277
  const sKey = streamKey(chatId, threadId)
6357
6278
  const turnDurationMs = turn.startedAt > 0 ? Date.now() - turn.startedAt : 0
@@ -6379,16 +6300,44 @@ function handleSessionEvent(ev: SessionEvent): void {
6379
6300
  longest_silent_gap_ms: outboundMetrics.longestOutboundGapMs,
6380
6301
  ended_via: outboundMetrics.outboundCount > 0 ? 'reply' : 'silent',
6381
6302
  })
6382
- // #1122 PR4 fix: deterministic silent-end detection (see the
6383
- // silent-marker path above for the rationale). The Stop hook
6384
- // reads the file we write here and blocks the session-end so
6385
- // the agent can be re-prompted to call reply.
6303
+ // #1122 PR4 / #1161: deterministic silent-end handling (see the
6304
+ // silent-marker path above for the rationale).
6305
+ // - first silent-end recordSilentTurnEnd writes the state
6306
+ // file so the Stop hook (silent-end-interrupt-stop.mjs)
6307
+ // blocks the session-end and re-prompts the agent to reply.
6308
+ // - the Stop-hook re-prompt is already spent and the agent is
6309
+ // STILL silent → recordSilentTurnEnd returns exhausted:true;
6310
+ // deliver a user-facing fallback so the turn never just
6311
+ // vanishes (the user otherwise only sees the card disappear).
6386
6312
  if (outboundMetrics.outboundCount === 0) {
6387
- writeSilentEndState({
6313
+ const silentEnd = recordSilentTurnEnd({
6388
6314
  chatId,
6389
6315
  threadId: threadId ?? null,
6390
6316
  turnKey: tKey,
6391
6317
  })
6318
+ if (silentEnd.exhausted) {
6319
+ process.stderr.write(
6320
+ `telegram gateway: WARN silent-end fallback — agent stayed ` +
6321
+ `silent after the Stop-hook re-prompt; delivering fallback ` +
6322
+ `message chat=${chatId} turnKey=${tKey} (#1161)\n`,
6323
+ )
6324
+ void retryWithThreadFallback(
6325
+ robustApiCall,
6326
+ (tid) =>
6327
+ bot.api.sendMessage(
6328
+ chatId,
6329
+ SILENT_END_FALLBACK_TEXT,
6330
+ tid != null ? { message_thread_id: tid } : {},
6331
+ ),
6332
+ { threadId, chat_id: chatId, verb: 'silent-end-fallback.sendMessage' },
6333
+ ).catch((err) => {
6334
+ process.stderr.write(
6335
+ `telegram gateway: silent-end fallback send failed: ${
6336
+ err instanceof Error ? err.message : String(err)
6337
+ }\n`,
6338
+ )
6339
+ })
6340
+ }
6392
6341
  }
6393
6342
  signalTracker.clear(tKey)
6394
6343
  silencePoke.endTurn(tKey)
@@ -9,7 +9,9 @@
9
9
  * decision:block to re-prompt the agent instead of letting the session close.
10
10
  *
11
11
  * On the second silent-end (retryCount >= MAX_RETRIES), the hook allows the
12
- * stop so the gateway can render the "🙊 Ended without reply" warning card.
12
+ * stop. The gateway's turn-end path (recordSilentTurnEnd in silent-end.ts)
13
+ * detects the exhausted re-prompt and delivers a user-facing fallback
14
+ * message so the turn never silently vanishes (#1161).
13
15
  *
14
16
  * Carve-outs preserved:
15
17
  * - wasAutonomous=true turns: the gateway never writes a state file for
@@ -30,6 +32,8 @@ import { readFileSync, writeFileSync, existsSync } from 'node:fs'
30
32
  import { join } from 'node:path'
31
33
  import { homedir } from 'node:os'
32
34
 
35
+ // MUST stay in sync with SILENT_END_MAX_RETRIES in telegram-plugin/silent-end.ts
36
+ // (this hook is a standalone .mjs and can't import the TS module).
33
37
  const MAX_RETRIES = 1
34
38
 
35
39
  function readStdin() {
@@ -51,6 +51,14 @@ export interface SilentEndDeps {
51
51
  log?: (line: string) => void
52
52
  }
53
53
 
54
+ /**
55
+ * How many times the Stop hook re-prompts a silent-end turn before it
56
+ * gives up. MUST stay in sync with `MAX_RETRIES` in the Stop hook
57
+ * (`telegram-plugin/hooks/silent-end-interrupt-stop.mjs`) — the hook is a
58
+ * standalone `.mjs` and can't import this module.
59
+ */
60
+ export const SILENT_END_MAX_RETRIES = 1
61
+
54
62
  function resolveStateDir(deps?: SilentEndDeps): string {
55
63
  if (deps?.stateDir != null) return deps.stateDir
56
64
  const env = process.env.TELEGRAM_STATE_DIR
@@ -172,3 +180,51 @@ export function readSilentEndState(deps?: SilentEndDeps): SilentEndState | null
172
180
  return null
173
181
  }
174
182
  }
183
+
184
+ /**
185
+ * Record a user-message turn that ended with zero outbound messages and
186
+ * report whether the deterministic re-prompt has been exhausted. This is
187
+ * the gateway's single entry point for the main turn-end path.
188
+ *
189
+ * - First silent-end of a turn (no prior state, or prior `retryCount`
190
+ * still below `SILENT_END_MAX_RETRIES`) → writes the state file via
191
+ * `writeSilentEndState`, so `silent-end-interrupt-stop.mjs` blocks
192
+ * the stop and re-prompts the agent. Returns `{ exhausted: false }`.
193
+ *
194
+ * - A silent-end where the prior state for the SAME turn already shows
195
+ * `retryCount >= SILENT_END_MAX_RETRIES` → the Stop hook already
196
+ * spent its re-prompt and the agent is STILL silent. Recovery has
197
+ * failed. Clears the state file (so the Stop hook on this final turn
198
+ * finds nothing pending and allows the stop cleanly) and returns
199
+ * `{ exhausted: true }` — the caller MUST then deliver a user-facing
200
+ * fallback so the turn never just vanishes (#1161).
201
+ *
202
+ * Chat-less autonomous wakeup turns never reach here: the gateway only
203
+ * creates a `currentTurn` (and therefore only runs a turn-end handler)
204
+ * when the inbound event carries a chat id. Cron-fired turns DO carry a
205
+ * topic chat and reach this path — a cron task that means to stay silent
206
+ * must emit a NO_REPLY sentinel, which routes to the gateway's
207
+ * silent-marker branch and never gets a fallback.
208
+ */
209
+ export function recordSilentTurnEnd(
210
+ args: { chatId: string; threadId: number | null; turnKey: string },
211
+ deps?: SilentEndDeps,
212
+ ): { exhausted: boolean } {
213
+ const prev = readSilentEndState(deps)
214
+ if (
215
+ prev != null &&
216
+ prev.turnKey === args.turnKey &&
217
+ prev.retryCount >= SILENT_END_MAX_RETRIES
218
+ ) {
219
+ clearSilentEndState(args.turnKey, deps)
220
+ emitLog(
221
+ deps,
222
+ `silent-end: re-prompt exhausted for turnKey=${args.turnKey} ` +
223
+ `(retryCount=${prev.retryCount} >= ${SILENT_END_MAX_RETRIES}) — ` +
224
+ `caller should deliver a fallback\n`,
225
+ )
226
+ return { exhausted: true }
227
+ }
228
+ writeSilentEndState(args, deps)
229
+ return { exhausted: false }
230
+ }
@@ -1185,8 +1185,15 @@ describe('uptimeMsForStarttime', () => {
1185
1185
  // the probes covered by the boot-card-dedup-and-next-steps PR so we don't
1186
1186
  // silently lose the hint on a future refactor.
1187
1187
 
1188
- describe('nextStep — agent systemd states', () => {
1189
- it('attaches a journalctl hint when the unit is failed', async () => {
1188
+ describe('nextStep — agent states', () => {
1189
+ const savedRuntime = process.env.SWITCHROOM_RUNTIME
1190
+ afterEach(() => {
1191
+ if (savedRuntime === undefined) delete process.env.SWITCHROOM_RUNTIME
1192
+ else process.env.SWITCHROOM_RUNTIME = savedRuntime
1193
+ })
1194
+
1195
+ it('attaches a journalctl hint when the unit is failed (non-docker runtime)', async () => {
1196
+ delete process.env.SWITCHROOM_RUNTIME
1190
1197
  const exec = makeSequence([makeSystemctlOutput('failed')])
1191
1198
  const r = await probeAgentProcess('klanker', {
1192
1199
  execFileImpl: exec as unknown as (cmd: string, args: string[]) => Promise<{ stdout: string; stderr: string }>,
@@ -1199,6 +1206,23 @@ describe('nextStep — agent systemd states', () => {
1199
1206
  expect(r.nextStep).toMatch(/switchroom-klanker/)
1200
1207
  })
1201
1208
 
1209
+ // #1382: the failed/unknown-state hints must follow SWITCHROOM_RUNTIME the
1210
+ // same way the boot-card crash row does (#1376) — no journalctl in-container.
1211
+ it('attaches a docker-logs hint when the unit is failed under SWITCHROOM_RUNTIME=docker', async () => {
1212
+ process.env.SWITCHROOM_RUNTIME = 'docker'
1213
+ const exec = makeSequence([makeSystemctlOutput('failed')])
1214
+ const r = await probeAgentProcess('klanker', {
1215
+ execFileImpl: exec as unknown as (cmd: string, args: string[]) => Promise<{ stdout: string; stderr: string }>,
1216
+ sleepImpl: async () => {},
1217
+ retryIntervalMs: 1,
1218
+ retryMaxMs: 0,
1219
+ })
1220
+ expect(r.status).toBe('fail')
1221
+ expect(r.nextStep).toMatch(/docker logs/)
1222
+ expect(r.nextStep).toMatch(/switchroom-klanker/)
1223
+ expect(r.nextStep).not.toMatch(/journalctl/)
1224
+ })
1225
+
1202
1226
  it('attaches a transient-state hint when the unit is activating after retry budget', async () => {
1203
1227
  const exec = makeSequence([makeSystemctlOutput('activating')])
1204
1228
  const r = await probeAgentProcess('klanker', {
@@ -7,6 +7,8 @@ import {
7
7
  writeSilentEndState,
8
8
  clearSilentEndState,
9
9
  readSilentEndState,
10
+ recordSilentTurnEnd,
11
+ SILENT_END_MAX_RETRIES,
10
12
  } from '../silent-end.js'
11
13
 
12
14
  let stateDir: string
@@ -118,6 +120,73 @@ describe('silent-end.ts — gateway state writer', () => {
118
120
  })
119
121
  })
120
122
 
123
+ describe('recordSilentTurnEnd — #1161 exhaustion detection', () => {
124
+ it('first silent-end of a turn writes state and reports exhausted:false', () => {
125
+ const r = recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' })
126
+ expect(r.exhausted).toBe(false)
127
+ expect(readSilentEndState()).toMatchObject({ turnKey: 'c:_', retryCount: 0 })
128
+ })
129
+
130
+ it('reports exhausted:false while prior retryCount is still below the cap', () => {
131
+ // The Stop hook has not yet been able to push retryCount to the cap.
132
+ const path = join(stateDir, 'silent-end-pending.json')
133
+ writeFileSync(path, JSON.stringify({
134
+ chatId: 'c', threadId: null, turnKey: 'c:_',
135
+ retryCount: SILENT_END_MAX_RETRIES - 1, timestamp: 0,
136
+ }))
137
+ const r = recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' })
138
+ expect(r.exhausted).toBe(false)
139
+ // State is (re)written, inheriting the prior counter for the same turn.
140
+ expect(readSilentEndState()!.retryCount).toBe(SILENT_END_MAX_RETRIES - 1)
141
+ })
142
+
143
+ it('reports exhausted:true and clears state once the re-prompt cap is reached', () => {
144
+ // The Stop hook already blocked once and pushed retryCount to the cap;
145
+ // the agent is STILL silent on this re-prompted turn.
146
+ const path = join(stateDir, 'silent-end-pending.json')
147
+ writeFileSync(path, JSON.stringify({
148
+ chatId: 'c', threadId: null, turnKey: 'c:_',
149
+ retryCount: SILENT_END_MAX_RETRIES, timestamp: 0,
150
+ }))
151
+ const r = recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' })
152
+ expect(r.exhausted).toBe(true)
153
+ // State cleared so the Stop hook on this final turn allows the stop.
154
+ expect(readSilentEndState()).toBeNull()
155
+ })
156
+
157
+ it('treats a capped prior state for a DIFFERENT turn as a fresh silent-end', () => {
158
+ const path = join(stateDir, 'silent-end-pending.json')
159
+ writeFileSync(path, JSON.stringify({
160
+ chatId: 'old', threadId: null, turnKey: 'old:_',
161
+ retryCount: SILENT_END_MAX_RETRIES, timestamp: 0,
162
+ }))
163
+ const r = recordSilentTurnEnd({ chatId: 'new', threadId: 9, turnKey: 'new:9' })
164
+ expect(r.exhausted).toBe(false)
165
+ expect(readSilentEndState()).toMatchObject({ turnKey: 'new:9', retryCount: 0 })
166
+ })
167
+
168
+ it('full lifecycle: silent → re-prompt → still silent → exhausted', () => {
169
+ // 1. Turn ends silent — first record.
170
+ expect(recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' }).exhausted).toBe(false)
171
+ // 2. Stop hook blocks and increments retryCount (simulated).
172
+ const path = join(stateDir, 'silent-end-pending.json')
173
+ const s = readSilentEndState()!
174
+ writeFileSync(path, JSON.stringify({ ...s, retryCount: s.retryCount + 1 }))
175
+ // 3. Re-prompted turn ends silent again — recovery exhausted.
176
+ expect(recordSilentTurnEnd({ chatId: 'c', threadId: null, turnKey: 'c:_' }).exhausted).toBe(true)
177
+ expect(readSilentEndState()).toBeNull()
178
+ })
179
+
180
+ it('SILENT_END_MAX_RETRIES matches MAX_RETRIES in the Stop hook', () => {
181
+ // The hook is a standalone .mjs and hardcodes its own copy — this
182
+ // guards the two from drifting apart.
183
+ const hookSrc = readFileSync(join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs'), 'utf8')
184
+ const m = hookSrc.match(/const MAX_RETRIES = (\d+)/)
185
+ expect(m).not.toBeNull()
186
+ expect(Number(m![1])).toBe(SILENT_END_MAX_RETRIES)
187
+ })
188
+ })
189
+
121
190
  describe('silent-end-interrupt-stop hook — integration', () => {
122
191
  const hookPath = join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs')
123
192