switchroom 0.12.22 → 0.12.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +23 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +350 -105
- package/telegram-plugin/gateway/gateway.ts +35 -0
- package/telegram-plugin/gateway/inbound-delivery-machine-shadow.ts +117 -0
- package/telegram-plugin/gateway/inbound-delivery-machine.ts +435 -0
- package/telegram-plugin/tests/inbound-delivery-machine.test.ts +475 -0
- package/telegram-plugin/uat/scenarios/jtbd-fast-trivial-dm.test.ts +127 -0
- package/telegram-plugin/uat/scenarios/jtbd-memory-survives-restart-dm.test.ts +239 -0
- package/telegram-plugin/uat/scenarios/jtbd-wake-audit-content-dm.test.ts +145 -0
|
@@ -254,6 +254,13 @@ import { purgeStaleTurnsForChat } from './turn-state-purge.js'
|
|
|
254
254
|
import { decideInboundDelivery } from './inbound-delivery-gate.js'
|
|
255
255
|
import { createPendingPermissionBuffer } from './pending-permission-decisions.js'
|
|
256
256
|
import { chatKey, chatKeyWithSuffix } from './chat-key.js'
|
|
257
|
+
// Phase 2b PR 2 — shadow mode. Each event-site below calls shadowEmit()
|
|
258
|
+
// to record what the InboundDeliveryStateMachine PREDICTS the gateway
|
|
259
|
+
// should do. Behavior unchanged in this PR — the imperative code below
|
|
260
|
+
// still runs everything. PR 3 will cut over to executing the machine's
|
|
261
|
+
// effects.
|
|
262
|
+
import { shadowEmit } from './inbound-delivery-machine-shadow.js'
|
|
263
|
+
import type { ChatKey as _ChatKey } from './inbound-delivery-machine.js'
|
|
257
264
|
import {
|
|
258
265
|
buildVaultGrantApprovedInbound,
|
|
259
266
|
buildVaultGrantDeniedInbound,
|
|
@@ -1265,6 +1272,13 @@ function streamKey(chatId: string, threadId?: number | null): string {
|
|
|
1265
1272
|
}
|
|
1266
1273
|
|
|
1267
1274
|
function purgeReactionTracking(key: string): void {
|
|
1275
|
+
// Phase 2b shadow: turn end. The key was registered via setTurnStarted
|
|
1276
|
+
// when the inbound arrived; purge is the canonical turn-end signal.
|
|
1277
|
+
// outboundEmitted is approximated `true` here — refined in PR 3 to read
|
|
1278
|
+
// from the per-turn `replyCalled` flag on `currentTurn`. Conservative
|
|
1279
|
+
// shadow approximation is safe (only affects machine's lastOutboundAt
|
|
1280
|
+
// tracking; can't drive incorrect behavior in shadow mode).
|
|
1281
|
+
shadowEmit({ kind: 'turnEnd', key: key as _ChatKey, at: Date.now(), outboundEmitted: true })
|
|
1268
1282
|
const msgInfo = activeReactionMsgIds.get(key)
|
|
1269
1283
|
activeStatusReactions.delete(key)
|
|
1270
1284
|
activeReactionMsgIds.delete(key)
|
|
@@ -3182,6 +3196,8 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
3182
3196
|
|
|
3183
3197
|
onClientRegistered(client: IpcClient) {
|
|
3184
3198
|
process.stderr.write(`telegram gateway: bridge registered — agent=${client.agentName}\n`)
|
|
3199
|
+
// Phase 2b shadow: bridge up.
|
|
3200
|
+
shadowEmit({ kind: 'bridgeUp', at: Date.now() })
|
|
3185
3201
|
client.send({ type: 'status', status: 'agent_connected' })
|
|
3186
3202
|
|
|
3187
3203
|
// #1150: drain any synthetic inbounds queued for this agent while
|
|
@@ -3307,6 +3323,8 @@ const ipcServer: IpcServer = createIpcServer({
|
|
|
3307
3323
|
|
|
3308
3324
|
onClientDisconnected(client: IpcClient) {
|
|
3309
3325
|
process.stderr.write(`telegram gateway: bridge disconnected — agent=${client.agentName}\n`)
|
|
3326
|
+
// Phase 2b shadow: bridge down.
|
|
3327
|
+
shadowEmit({ kind: 'bridgeDown', at: Date.now() })
|
|
3310
3328
|
|
|
3311
3329
|
// Scope the flush to clients that actually registered as an agent.
|
|
3312
3330
|
// Anonymous one-shot connections (e.g. recall.py's legacy
|
|
@@ -6637,6 +6655,23 @@ async function handleInbound(
|
|
|
6637
6655
|
// network RTT) but not a user-perceived end-to-end measurement.
|
|
6638
6656
|
const inboundReceivedAt = Date.now()
|
|
6639
6657
|
|
|
6658
|
+
// Phase 2b shadow: inbound arrival. Emit BEFORE the snapshot/gate
|
|
6659
|
+
// logic so the machine sees the event at the same point in time the
|
|
6660
|
+
// imperative code would. The machine internally handles fresh-turn
|
|
6661
|
+
// vs mid-turn — its decision will be visible in the gw-trace shadow
|
|
6662
|
+
// line emitted to stderr.
|
|
6663
|
+
const _shadowKey = statusKey(ctx.chat?.id != null ? String(ctx.chat.id) : '0', ctx.message?.message_thread_id) as _ChatKey
|
|
6664
|
+
shadowEmit({
|
|
6665
|
+
kind: 'inbound',
|
|
6666
|
+
key: _shadowKey,
|
|
6667
|
+
msg: {
|
|
6668
|
+
msgId: ctx.message?.message_id ?? 0,
|
|
6669
|
+
isSteering: false, // refined in PR 3 — for now shadow conservatively classifies as non-steering
|
|
6670
|
+
payload: null,
|
|
6671
|
+
},
|
|
6672
|
+
at: Date.now(),
|
|
6673
|
+
})
|
|
6674
|
+
|
|
6640
6675
|
// #1556 self-blocking fix (v0.12.22): snapshot the live turn-state
|
|
6641
6676
|
// BEFORE the fresh-turn branch (line ~7357) sets activeTurnStartedAt
|
|
6642
6677
|
// for THIS inbound. The #1556 delivery gate further down asks "is a
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* InboundDeliveryStateMachine — SHADOW MODE wiring (Phase 2b PR 2).
|
|
3
|
+
*
|
|
4
|
+
* Per RFC `docs/rfcs/inbound-delivery-state-machine.md` Phase 2b PR 2:
|
|
5
|
+
* the state machine runs ALONGSIDE the existing imperative gateway
|
|
6
|
+
* code, recording predicted effects to a structured trace. Behavior
|
|
7
|
+
* is unchanged — every existing code path still executes the actual
|
|
8
|
+
* I/O. This module's job:
|
|
9
|
+
*
|
|
10
|
+
* 1. Own the module-scope machine state.
|
|
11
|
+
* 2. Expose `shadowEmit(event)` that runs `transition()` + logs the
|
|
12
|
+
* predicted effects via `gw-trace shadow ...` stderr lines.
|
|
13
|
+
* 3. Provide test hooks for resetting + inspecting state.
|
|
14
|
+
*
|
|
15
|
+
* After PR 2 bakes on the fleet for 24+ hours, PR 3 will wire the
|
|
16
|
+
* effects to drive ACTUAL behavior (the cutover), at which point the
|
|
17
|
+
* imperative paths get deleted in PR 4.
|
|
18
|
+
*
|
|
19
|
+
* Telemetry approach: each `shadowEmit` writes a single stderr line
|
|
20
|
+
* with the event kind + emitted effect kinds. Operators can then:
|
|
21
|
+
*
|
|
22
|
+
* docker exec switchroom-<agent> sh -lc 'grep "gw-trace shadow" \
|
|
23
|
+
* /var/log/switchroom/gateway-supervisor.log | tail -50'
|
|
24
|
+
*
|
|
25
|
+
* to see what the state machine PREDICTS the gateway should do for
|
|
26
|
+
* each event. Comparing against the actual log lines (`pending-inbound-
|
|
27
|
+
* buffer: agent=X buffered ...` etc.) is the validation that the
|
|
28
|
+
* machine is bit-identical with reality.
|
|
29
|
+
*
|
|
30
|
+
* Toggle off via `SWITCHROOM_DELIVERY_MACHINE_SHADOW=0` — a kill
|
|
31
|
+
* switch for the case where the shadow emits prove problematic
|
|
32
|
+
* (e.g., trace volume too high). The default is ON.
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
import {
|
|
36
|
+
type Effect,
|
|
37
|
+
type Event,
|
|
38
|
+
type State,
|
|
39
|
+
initialState,
|
|
40
|
+
transition,
|
|
41
|
+
} from './inbound-delivery-machine.js'
|
|
42
|
+
|
|
43
|
+
let state: State = initialState()
|
|
44
|
+
const enabled = process.env.SWITCHROOM_DELIVERY_MACHINE_SHADOW !== '0'
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Run an event through the state machine in shadow mode. The machine
|
|
48
|
+
* state advances, the predicted effects are LOGGED, but no I/O fires.
|
|
49
|
+
*
|
|
50
|
+
* Returns the effects for callers that want to inspect them inline
|
|
51
|
+
* (e.g., the eventual PR 3 cutover will replace the return-and-ignore
|
|
52
|
+
* pattern here with return-and-execute).
|
|
53
|
+
*/
|
|
54
|
+
export function shadowEmit(event: Event): readonly Effect[] {
|
|
55
|
+
if (!enabled) return []
|
|
56
|
+
// Shadow mode MUST NEVER break the gateway. The state machine is
|
|
57
|
+
// pure (no I/O, no async) and property-tested over 5000 schedules,
|
|
58
|
+
// so transition() won't throw on well-formed input. But event
|
|
59
|
+
// construction at the call site could mis-shape inputs; the
|
|
60
|
+
// try/catch is belt-and-braces so a shadow bug never wedges a real
|
|
61
|
+
// turn. The catch logs and bails — the imperative gateway code
|
|
62
|
+
// below the emit point still runs.
|
|
63
|
+
try {
|
|
64
|
+
const result = transition(state, event)
|
|
65
|
+
state = result.state
|
|
66
|
+
|
|
67
|
+
// Single structured stderr line per event — grep-friendly and
|
|
68
|
+
// low volume (one line per gateway event, not per effect). The
|
|
69
|
+
// format matches gateway.ts's existing `tg-post method=...` shape
|
|
70
|
+
// so log aggregation can pick it up without a new parser.
|
|
71
|
+
const effectKinds = result.effects.map((e) => e.kind).join(',')
|
|
72
|
+
const eventDetail = formatEventDetail(event)
|
|
73
|
+
process.stderr.write(
|
|
74
|
+
`gw-trace shadow event=${event.kind}${eventDetail} effects=[${effectKinds}] global=${state.global.kind} perKeySize=${state.perKey.size}\n`,
|
|
75
|
+
)
|
|
76
|
+
return result.effects
|
|
77
|
+
} catch (err) {
|
|
78
|
+
process.stderr.write(
|
|
79
|
+
`gw-trace shadow ERROR event=${event.kind} err=${err instanceof Error ? err.message : String(err)}\n`,
|
|
80
|
+
)
|
|
81
|
+
return []
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** Compact event detail for the trace line — keeps the line one-line. */
|
|
86
|
+
function formatEventDetail(event: Event): string {
|
|
87
|
+
switch (event.kind) {
|
|
88
|
+
case 'inbound':
|
|
89
|
+
return ` key=${event.key} msg=${event.msg.msgId} steer=${event.msg.isSteering}`
|
|
90
|
+
case 'turnStart':
|
|
91
|
+
case 'turnEnd':
|
|
92
|
+
return ` key=${event.key}${event.kind === 'turnEnd' ? ` outbound=${event.outboundEmitted}` : ''}`
|
|
93
|
+
case 'permVerdict':
|
|
94
|
+
return ` req=${event.verdict.requestId} behavior=${event.verdict.behavior}`
|
|
95
|
+
case 'modelOutbound':
|
|
96
|
+
return ` key=${event.key}`
|
|
97
|
+
case 'bridgeUp':
|
|
98
|
+
case 'bridgeDown':
|
|
99
|
+
case 'tick':
|
|
100
|
+
return ''
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Test hook: reset state to the initial empty machine. */
|
|
105
|
+
export function __shadowResetForTests(): void {
|
|
106
|
+
state = initialState()
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** Test hook: read the current shadow state. */
|
|
110
|
+
export function __shadowGetStateForTests(): State {
|
|
111
|
+
return state
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/** Test hook: check if shadow mode is enabled (mirrors the env-var). */
|
|
115
|
+
export function __shadowEnabledForTests(): boolean {
|
|
116
|
+
return enabled
|
|
117
|
+
}
|
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* InboundDeliveryStateMachine — pure transition function for the
|
|
3
|
+
* gateway's inbound→bridge→outbound pipeline.
|
|
4
|
+
*
|
|
5
|
+
* Per `docs/rfcs/inbound-delivery-state-machine.md` (RFC merged in
|
|
6
|
+
* PR #1576): the gateway's delivery state was implicit and scattered
|
|
7
|
+
* across 8+ pieces of mutable state. The wedge cluster of 2026-05-19
|
|
8
|
+
* (9 PRs in 36h all patching variants of "inbound stranded → 5-min
|
|
9
|
+
* silence-poke fallback") and the v0.12.22 self-blocking gate bug
|
|
10
|
+
* (#1573, symptom-level) shared one root cause: no model anywhere in
|
|
11
|
+
* the codebase said "given these inputs, what should the gateway do."
|
|
12
|
+
*
|
|
13
|
+
* This module IS that model.
|
|
14
|
+
*
|
|
15
|
+
* ## Contract
|
|
16
|
+
*
|
|
17
|
+
* transition(state, event) → { state', effects[] }
|
|
18
|
+
*
|
|
19
|
+
* Pure. No I/O. No timers. No mutation of inputs. The gateway
|
|
20
|
+
* dispatcher receives `{ state', effects[] }` and EXECUTES the
|
|
21
|
+
* effects against the real bridge/buffer/spool/Telegram. The
|
|
22
|
+
* machine never touches those directly.
|
|
23
|
+
*
|
|
24
|
+
* Property-tested by 5 invariants (see
|
|
25
|
+
* `tests/inbound-delivery-machine.test.ts`):
|
|
26
|
+
*
|
|
27
|
+
* #1 — Every `inbound` event is delivered XOR persisted
|
|
28
|
+
* #2 — Every `setTurnStarted(key)` paired with `clearTurnStarted(key)`
|
|
29
|
+
* before the next end-of-life event for that key
|
|
30
|
+
* #3 — Per-chat sibling-key cleanup on `turnEnd`
|
|
31
|
+
* #4 — `permVerdict` delivered iff bridge alive; else persisted +
|
|
32
|
+
* re-delivered on next `bridgeUp`
|
|
33
|
+
* #5 — Spurious-fallback suppression (no `firePoke('fallback')` if
|
|
34
|
+
* the model produced an outbound for this key in the last 60s)
|
|
35
|
+
*
|
|
36
|
+
* ## Scope of this PR
|
|
37
|
+
*
|
|
38
|
+
* This is PR 1 of the 3-PR cutover (per RFC). The module is exported
|
|
39
|
+
* but NOT WIRED into `gateway.ts`. PR 2 will swap the gateway's
|
|
40
|
+
* imperative paths to dispatch through this machine. PR 3 will
|
|
41
|
+
* delete the now-redundant primitives.
|
|
42
|
+
*
|
|
43
|
+
* Zero production behavior change in this PR. The property test is
|
|
44
|
+
* the only gate.
|
|
45
|
+
*/
|
|
46
|
+
|
|
47
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
48
|
+
// Branded types — chat-key namespace
|
|
49
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Canonical chat-thread key. Use the existing `chatKey()` helper from
|
|
53
|
+
* `./chat-key.ts` to construct one — that helper collapses
|
|
54
|
+
* 0/null/undefined thread IDs to the same token (#1564 sibling-key
|
|
55
|
+
* canonicalization). The state machine treats `ChatKey` as opaque.
|
|
56
|
+
*/
|
|
57
|
+
export type ChatKey = string & { readonly __brand: 'ChatKey' }
|
|
58
|
+
|
|
59
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
60
|
+
// State
|
|
61
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Global delivery state. Mirrors the existing `currentTurn` singleton
|
|
65
|
+
* but explicit. The gateway has ONE bridge connection (single claude
|
|
66
|
+
* process per agent container), so global state is the right model.
|
|
67
|
+
*/
|
|
68
|
+
export type GlobalState =
|
|
69
|
+
| { kind: 'bridge_dead' }
|
|
70
|
+
| { kind: 'bridge_alive_idle' }
|
|
71
|
+
| { kind: 'bridge_alive_in_turn'; activeTurn: ChatKey }
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Per-key state. Lifts the scattered `activeTurnStartedAt` Map and
|
|
75
|
+
* silence-poke's per-key `lastOutboundAt` tracking into ONE place.
|
|
76
|
+
*
|
|
77
|
+
* `turnStartedAt`: when this chat's current turn began (null = no
|
|
78
|
+
* turn active for this key). Mirrors the existing
|
|
79
|
+
* `activeTurnStartedAt[key]` value.
|
|
80
|
+
*
|
|
81
|
+
* `lastOutboundAt`: when the model last produced an outbound for
|
|
82
|
+
* this key. CARRIES ACROSS TURNS — this is invariant #5's data: even
|
|
83
|
+
* if a new turn starts (overlapping turns case from the
|
|
84
|
+
* 2026-05-20 mid-turn silence wedge), the model's recent outbound is
|
|
85
|
+
* preserved so a spurious fallback fire is suppressed.
|
|
86
|
+
*/
|
|
87
|
+
export interface PerKeyState {
|
|
88
|
+
readonly turnStartedAt: number | null
|
|
89
|
+
readonly lastOutboundAt: number | null
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export interface State {
|
|
93
|
+
readonly global: GlobalState
|
|
94
|
+
readonly perKey: ReadonlyMap<ChatKey, PerKeyState>
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function initialState(): State {
|
|
98
|
+
return {
|
|
99
|
+
global: { kind: 'bridge_dead' },
|
|
100
|
+
perKey: new Map(),
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
105
|
+
// Events
|
|
106
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
export interface InboundMessage {
|
|
109
|
+
readonly msgId: number
|
|
110
|
+
readonly isSteering: boolean
|
|
111
|
+
readonly payload: unknown
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export interface PermissionVerdict {
|
|
115
|
+
readonly requestId: string
|
|
116
|
+
readonly behavior: 'allow' | 'deny' | 'allow_once' | 'allow_always'
|
|
117
|
+
readonly payload: unknown
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export interface SpooledInbound {
|
|
121
|
+
readonly key: ChatKey
|
|
122
|
+
readonly msg: InboundMessage
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export type Event =
|
|
126
|
+
| { kind: 'bridgeUp'; at: number }
|
|
127
|
+
| { kind: 'bridgeDown'; at: number }
|
|
128
|
+
| { kind: 'turnStart'; key: ChatKey; at: number }
|
|
129
|
+
| { kind: 'turnEnd'; key: ChatKey; at: number; outboundEmitted: boolean }
|
|
130
|
+
| { kind: 'inbound'; key: ChatKey; msg: InboundMessage; at: number }
|
|
131
|
+
| { kind: 'permVerdict'; verdict: PermissionVerdict; at: number }
|
|
132
|
+
| { kind: 'modelOutbound'; key: ChatKey; at: number }
|
|
133
|
+
| { kind: 'tick'; now: number }
|
|
134
|
+
|
|
135
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
136
|
+
// Effects (returned, not performed)
|
|
137
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
export type Effect =
|
|
140
|
+
| { kind: 'deliverToBridge'; key: ChatKey; msg: InboundMessage }
|
|
141
|
+
| { kind: 'bufferInbound'; key: ChatKey; msg: InboundMessage }
|
|
142
|
+
| { kind: 'persistInbound'; key: ChatKey; msg: InboundMessage }
|
|
143
|
+
| { kind: 'drainBuffer' }
|
|
144
|
+
| { kind: 'setTurnStarted'; key: ChatKey; at: number }
|
|
145
|
+
| { kind: 'clearTurnStarted'; key: ChatKey }
|
|
146
|
+
| { kind: 'noteOutbound'; key: ChatKey; at: number }
|
|
147
|
+
| { kind: 'firePoke'; key: ChatKey; level: 'soft' | 'firm' | 'fallback' }
|
|
148
|
+
| { kind: 'deliverPermVerdict'; verdict: PermissionVerdict }
|
|
149
|
+
| { kind: 'persistPermVerdict'; verdict: PermissionVerdict }
|
|
150
|
+
| { kind: 'redeliverPersistedPermVerdicts' }
|
|
151
|
+
| { kind: 'logTrace'; stage: string; key?: ChatKey; metadata?: Readonly<Record<string, unknown>> }
|
|
152
|
+
|
|
153
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
154
|
+
// Tunable timings (match the production silence-poke ladder for now;
|
|
155
|
+
// the RFC includes a recommendation to tighten these in a follow-up,
|
|
156
|
+
// but parity-first for the PR-2 cutover).
|
|
157
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
158
|
+
|
|
159
|
+
export const TURN_TTL_MS = 300_000 // 5 min — silence-poke fallback threshold
|
|
160
|
+
export const SOFT_POKE_MS = 75_000
|
|
161
|
+
export const FIRM_POKE_MS = 180_000
|
|
162
|
+
export const OUTBOUND_RECENT_MS = 60_000 // invariant #5 suppression window
|
|
163
|
+
|
|
164
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
165
|
+
// Transition function
|
|
166
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
167
|
+
|
|
168
|
+
export interface Transition {
|
|
169
|
+
readonly state: State
|
|
170
|
+
readonly effects: readonly Effect[]
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function emptyPerKey(): PerKeyState {
|
|
174
|
+
return { turnStartedAt: null, lastOutboundAt: null }
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function updatePerKey(
|
|
178
|
+
state: State,
|
|
179
|
+
key: ChatKey,
|
|
180
|
+
update: (prior: PerKeyState) => PerKeyState,
|
|
181
|
+
): State {
|
|
182
|
+
const prior = state.perKey.get(key) ?? emptyPerKey()
|
|
183
|
+
const next = update(prior)
|
|
184
|
+
const m = new Map(state.perKey)
|
|
185
|
+
// Empty entries (both fields null) are pruned to keep the map tight
|
|
186
|
+
// — invariant #2's test reads the map size and we don't want stale
|
|
187
|
+
// empty entries inflating it.
|
|
188
|
+
if (next.turnStartedAt == null && next.lastOutboundAt == null) {
|
|
189
|
+
m.delete(key)
|
|
190
|
+
} else {
|
|
191
|
+
m.set(key, next)
|
|
192
|
+
}
|
|
193
|
+
return { ...state, perKey: m }
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function chatIdOfKey(key: ChatKey): string {
|
|
197
|
+
// ChatKey shape is `${chatId}:${threadOrUnderscore}`. Splitting on
|
|
198
|
+
// the FIRST colon gives the chatId — robust to threads/suffixes.
|
|
199
|
+
const idx = key.indexOf(':')
|
|
200
|
+
return idx === -1 ? key : key.slice(0, idx)
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Sweep all sibling keys for a chatId — Invariant #3. After the last
|
|
205
|
+
* turnEnd for a chatId, no sibling thread keys should remain.
|
|
206
|
+
*
|
|
207
|
+
* Effect: emits `clearTurnStarted` for every sibling key still
|
|
208
|
+
* holding `turnStartedAt != null`. Returns updated state.
|
|
209
|
+
*
|
|
210
|
+
* The state machine's invariant is enforced because we PROACTIVELY
|
|
211
|
+
* purge siblings on turnEnd. The production sibling-key sweep
|
|
212
|
+
* (#1564's `purgeStaleTurnsForChat`) becomes redundant.
|
|
213
|
+
*/
|
|
214
|
+
function sweepSiblings(
|
|
215
|
+
state: State,
|
|
216
|
+
chatId: string,
|
|
217
|
+
exceptKey: ChatKey,
|
|
218
|
+
): { state: State; effects: Effect[] } {
|
|
219
|
+
const effects: Effect[] = []
|
|
220
|
+
let next = state
|
|
221
|
+
for (const [k, v] of state.perKey) {
|
|
222
|
+
if (k === exceptKey) continue
|
|
223
|
+
if (chatIdOfKey(k) !== chatId) continue
|
|
224
|
+
if (v.turnStartedAt == null) continue
|
|
225
|
+
effects.push({ kind: 'clearTurnStarted', key: k })
|
|
226
|
+
next = updatePerKey(next, k, (p) => ({ ...p, turnStartedAt: null }))
|
|
227
|
+
}
|
|
228
|
+
return { state: next, effects }
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export function transition(state: State, event: Event): Transition {
|
|
232
|
+
switch (event.kind) {
|
|
233
|
+
case 'bridgeUp': {
|
|
234
|
+
if (state.global.kind !== 'bridge_dead') {
|
|
235
|
+
// Idempotent: a second bridgeUp is a no-op.
|
|
236
|
+
return { state, effects: [{ kind: 'logTrace', stage: 'bridgeUp_redundant' }] }
|
|
237
|
+
}
|
|
238
|
+
return {
|
|
239
|
+
state: { ...state, global: { kind: 'bridge_alive_idle' } },
|
|
240
|
+
effects: [
|
|
241
|
+
{ kind: 'redeliverPersistedPermVerdicts' },
|
|
242
|
+
{ kind: 'drainBuffer' },
|
|
243
|
+
{ kind: 'logTrace', stage: 'bridge_recover' },
|
|
244
|
+
],
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
case 'bridgeDown': {
|
|
249
|
+
// Keep perKey state intact — the next bridgeUp + turnEnd will
|
|
250
|
+
// resolve naturally. Clearing turn state on bridge flap was the
|
|
251
|
+
// wedge-cluster's "drain on disconnect" footgun.
|
|
252
|
+
return {
|
|
253
|
+
state: { ...state, global: { kind: 'bridge_dead' } },
|
|
254
|
+
effects: [{ kind: 'logTrace', stage: 'bridge_flap' }],
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
case 'inbound': {
|
|
259
|
+
const isSteering = event.msg.isSteering
|
|
260
|
+
const inTurn = state.global.kind === 'bridge_alive_in_turn'
|
|
261
|
+
const alive = state.global.kind !== 'bridge_dead'
|
|
262
|
+
|
|
263
|
+
if (!alive) {
|
|
264
|
+
return {
|
|
265
|
+
state,
|
|
266
|
+
effects: [
|
|
267
|
+
{ kind: 'bufferInbound', key: event.key, msg: event.msg },
|
|
268
|
+
{ kind: 'persistInbound', key: event.key, msg: event.msg },
|
|
269
|
+
{ kind: 'logTrace', stage: 'inbound_bridge_dead_buffer', key: event.key },
|
|
270
|
+
],
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (inTurn && !isSteering) {
|
|
275
|
+
// Mid-turn non-steering inbound: buffer (the #1556 contract).
|
|
276
|
+
return {
|
|
277
|
+
state,
|
|
278
|
+
effects: [
|
|
279
|
+
{ kind: 'bufferInbound', key: event.key, msg: event.msg },
|
|
280
|
+
{ kind: 'persistInbound', key: event.key, msg: event.msg },
|
|
281
|
+
{ kind: 'logTrace', stage: 'inbound_held_mid_turn', key: event.key, metadata: { msgId: event.msg.msgId } },
|
|
282
|
+
],
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// Alive + (idle OR steering): deliver immediately.
|
|
287
|
+
// Steering messages reach claude mid-turn intentionally; they
|
|
288
|
+
// do NOT start a new turn (existing turn continues).
|
|
289
|
+
if (isSteering) {
|
|
290
|
+
return {
|
|
291
|
+
state,
|
|
292
|
+
effects: [
|
|
293
|
+
{ kind: 'deliverToBridge', key: event.key, msg: event.msg },
|
|
294
|
+
{ kind: 'logTrace', stage: 'steer_delivered_mid_turn', key: event.key },
|
|
295
|
+
],
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Fresh turn: state transitions to in_turn(key), perKey
|
|
300
|
+
// turnStartedAt is set, message delivered.
|
|
301
|
+
const next: State = {
|
|
302
|
+
global: { kind: 'bridge_alive_in_turn', activeTurn: event.key },
|
|
303
|
+
perKey: state.perKey,
|
|
304
|
+
}
|
|
305
|
+
return {
|
|
306
|
+
state: updatePerKey(next, event.key, (p) => ({ ...p, turnStartedAt: event.at })),
|
|
307
|
+
effects: [
|
|
308
|
+
{ kind: 'setTurnStarted', key: event.key, at: event.at },
|
|
309
|
+
{ kind: 'deliverToBridge', key: event.key, msg: event.msg },
|
|
310
|
+
{ kind: 'logTrace', stage: 'fresh_turn_deliver', key: event.key, metadata: { msgId: event.msg.msgId } },
|
|
311
|
+
],
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
case 'turnStart': {
|
|
316
|
+
// External signal that a turn has begun (e.g. session_event
|
|
317
|
+
// from bridge). Distinct from the implicit turn-start in
|
|
318
|
+
// `inbound`: a turn can begin without a fresh inbound (cron
|
|
319
|
+
// injection, scheduled fire).
|
|
320
|
+
const next: State = state.global.kind === 'bridge_alive_in_turn'
|
|
321
|
+
? state
|
|
322
|
+
: { ...state, global: { kind: 'bridge_alive_in_turn', activeTurn: event.key } }
|
|
323
|
+
return {
|
|
324
|
+
state: updatePerKey(next, event.key, (p) => ({ ...p, turnStartedAt: event.at })),
|
|
325
|
+
effects: [
|
|
326
|
+
{ kind: 'setTurnStarted', key: event.key, at: event.at },
|
|
327
|
+
{ kind: 'logTrace', stage: 'turn_start', key: event.key },
|
|
328
|
+
],
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
case 'turnEnd': {
|
|
333
|
+
// Clear turn state for the ending key AND sweep siblings
|
|
334
|
+
// (invariant #3). Transition global to idle if the ending turn
|
|
335
|
+
// was the active one.
|
|
336
|
+
const chatId = chatIdOfKey(event.key)
|
|
337
|
+
const stateAfterClear = updatePerKey(state, event.key, (p) => ({
|
|
338
|
+
turnStartedAt: null,
|
|
339
|
+
lastOutboundAt: event.outboundEmitted ? event.at : p.lastOutboundAt,
|
|
340
|
+
}))
|
|
341
|
+
const sweep = sweepSiblings(stateAfterClear, chatId, event.key)
|
|
342
|
+
const wasActive = state.global.kind === 'bridge_alive_in_turn' && state.global.activeTurn === event.key
|
|
343
|
+
const next: State = wasActive
|
|
344
|
+
? { ...sweep.state, global: { kind: 'bridge_alive_idle' } }
|
|
345
|
+
: sweep.state
|
|
346
|
+
const effects: Effect[] = [
|
|
347
|
+
{ kind: 'clearTurnStarted', key: event.key },
|
|
348
|
+
...sweep.effects,
|
|
349
|
+
]
|
|
350
|
+
if (event.outboundEmitted) {
|
|
351
|
+
effects.push({ kind: 'noteOutbound', key: event.key, at: event.at })
|
|
352
|
+
}
|
|
353
|
+
effects.push({ kind: 'drainBuffer' })
|
|
354
|
+
effects.push({ kind: 'logTrace', stage: 'turn_complete', key: event.key, metadata: { outboundEmitted: event.outboundEmitted } })
|
|
355
|
+
return { state: next, effects }
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
case 'modelOutbound': {
|
|
359
|
+
// Updates lastOutboundAt for the key. Does NOT change global
|
|
360
|
+
// state. This is invariant #5's data: it carries across turn
|
|
361
|
+
// boundaries so a spurious fallback can be suppressed.
|
|
362
|
+
return {
|
|
363
|
+
state: updatePerKey(state, event.key, (p) => ({ ...p, lastOutboundAt: event.at })),
|
|
364
|
+
effects: [
|
|
365
|
+
{ kind: 'noteOutbound', key: event.key, at: event.at },
|
|
366
|
+
],
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
case 'permVerdict': {
|
|
371
|
+
const alive = state.global.kind !== 'bridge_dead'
|
|
372
|
+
if (alive) {
|
|
373
|
+
return {
|
|
374
|
+
state,
|
|
375
|
+
effects: [
|
|
376
|
+
{ kind: 'deliverPermVerdict', verdict: event.verdict },
|
|
377
|
+
{ kind: 'logTrace', stage: 'perm_verdict_delivered' },
|
|
378
|
+
],
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
return {
|
|
382
|
+
state,
|
|
383
|
+
effects: [
|
|
384
|
+
{ kind: 'persistPermVerdict', verdict: event.verdict },
|
|
385
|
+
{ kind: 'logTrace', stage: 'perm_verdict_persisted' },
|
|
386
|
+
],
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
case 'tick': {
|
|
391
|
+
// Scan perKey for stale turns. For each entry with a non-null
|
|
392
|
+
// turnStartedAt where `now - turnStartedAt > TURN_TTL_MS`:
|
|
393
|
+
// - Check lastOutboundAt: if it's null OR more than
|
|
394
|
+
// OUTBOUND_RECENT_MS old, fire the fallback poke + clear.
|
|
395
|
+
// - Otherwise suppress (invariant #5).
|
|
396
|
+
const effects: Effect[] = []
|
|
397
|
+
let next = state
|
|
398
|
+
for (const [k, v] of state.perKey) {
|
|
399
|
+
if (v.turnStartedAt == null) continue
|
|
400
|
+
const age = event.now - v.turnStartedAt
|
|
401
|
+
if (age <= TURN_TTL_MS) {
|
|
402
|
+
// Not yet stale enough for fallback. Soft/firm pokes are
|
|
403
|
+
// not modeled here yet — they're advisory, the gateway
|
|
404
|
+
// emits them; the state machine governs the fallback gate.
|
|
405
|
+
continue
|
|
406
|
+
}
|
|
407
|
+
// Stale enough for fallback. Check the suppression window.
|
|
408
|
+
const recentOutbound =
|
|
409
|
+
v.lastOutboundAt != null && (event.now - v.lastOutboundAt) < OUTBOUND_RECENT_MS
|
|
410
|
+
if (recentOutbound) {
|
|
411
|
+
// Invariant #5: model recently broke silence; suppress fire.
|
|
412
|
+
effects.push({ kind: 'logTrace', stage: 'fallback_suppressed', key: k, metadata: { recentOutboundMs: event.now - (v.lastOutboundAt ?? 0) } })
|
|
413
|
+
continue
|
|
414
|
+
}
|
|
415
|
+
// Fire the fallback + clear the turn.
|
|
416
|
+
effects.push({ kind: 'firePoke', key: k, level: 'fallback' })
|
|
417
|
+
effects.push({ kind: 'clearTurnStarted', key: k })
|
|
418
|
+
next = updatePerKey(next, k, (p) => ({ ...p, turnStartedAt: null }))
|
|
419
|
+
// If this was the active turn globally, drop to idle.
|
|
420
|
+
if (next.global.kind === 'bridge_alive_in_turn' && next.global.activeTurn === k) {
|
|
421
|
+
next = { ...next, global: { kind: 'bridge_alive_idle' } }
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
return { state: next, effects }
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
430
|
+
// Test-only helpers — mirror silence-poke.ts's __XForTests idiom
|
|
431
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
432
|
+
|
|
433
|
+
export function __chatIdOfKeyForTests(key: ChatKey): string {
|
|
434
|
+
return chatIdOfKey(key)
|
|
435
|
+
}
|