switchroom 0.12.14 → 0.12.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +367 -278
- package/dist/vault/approvals/kernel-server.js +68 -1
- package/dist/vault/broker/server.js +21 -1
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +96 -70
- package/telegram-plugin/gateway/approval-callback.test.ts +49 -1
- package/telegram-plugin/gateway/approval-callback.ts +85 -67
- package/telegram-plugin/gateway/gateway.ts +19 -2
- package/telegram-plugin/gateway/pending-inbound-buffer.ts +39 -0
- package/telegram-plugin/tests/pending-inbound-buffer.test.ts +71 -1
|
@@ -22,14 +22,58 @@
|
|
|
22
22
|
*/
|
|
23
23
|
|
|
24
24
|
import { type Context, InlineKeyboard } from "grammy";
|
|
25
|
-
import { parseApprovalCallback, ttlMsFromToken } from "./approval-card.js";
|
|
26
25
|
import {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
parseApprovalCallback,
|
|
27
|
+
ttlMsFromToken,
|
|
28
|
+
type ApprovalChoice,
|
|
29
|
+
} from "./approval-card.js";
|
|
30
|
+
import { approvalConsumeRecord } from "../../src/vault/approvals/client.js";
|
|
30
31
|
import type { ApprovalDecisionMode } from "../../src/vault/approvals/schema.js";
|
|
31
32
|
import { scopeToOpenInDriveButton } from "../../src/drive/deep-links.js";
|
|
32
33
|
|
|
34
|
+
/**
|
|
35
|
+
* Resolve a tapped approval choice to its decision tuple — PURE, no
|
|
36
|
+
* kernel I/O, so the `bad ttl token` branch (the only fallible path in
|
|
37
|
+
* the old inline switch) is unit-testable without mocking grammy.
|
|
38
|
+
*
|
|
39
|
+
* Extracted (PR-5) from `handleApprovalCallback` so PR-4's invariant —
|
|
40
|
+
* "compute + validate the decision BEFORE burning the single-use
|
|
41
|
+
* nonce" — is now structural, not a comment: the handler calls this
|
|
42
|
+
* first and only proceeds to `approvalConsume` on `ok: true`. A
|
|
43
|
+
* malformed ttl token returns `{ ok: false }` and the nonce is never
|
|
44
|
+
* touched (operator can re-tap a valid choice).
|
|
45
|
+
*/
|
|
46
|
+
export type ResolvedApprovalDecision =
|
|
47
|
+
| {
|
|
48
|
+
ok: true;
|
|
49
|
+
decision: ApprovalDecisionMode;
|
|
50
|
+
granted: boolean;
|
|
51
|
+
ttl_ms: number | null;
|
|
52
|
+
displayMode: string;
|
|
53
|
+
}
|
|
54
|
+
| { ok: false; error: string };
|
|
55
|
+
|
|
56
|
+
export function resolveApprovalDecision(
|
|
57
|
+
choice: ApprovalChoice,
|
|
58
|
+
): ResolvedApprovalDecision {
|
|
59
|
+
switch (choice.kind) {
|
|
60
|
+
case "deny":
|
|
61
|
+
return { ok: true, decision: "deny", granted: false, ttl_ms: null, displayMode: "denied" };
|
|
62
|
+
case "once":
|
|
63
|
+
// No expiry — recorded as a one-shot grant; the agent calls
|
|
64
|
+
// approval_lookup at most once, then proceeds. /approvals revoke
|
|
65
|
+
// can still target the row by id.
|
|
66
|
+
return { ok: true, decision: "allow_once", granted: true, ttl_ms: null, displayMode: "granted once" };
|
|
67
|
+
case "always":
|
|
68
|
+
return { ok: true, decision: "allow_always", granted: true, ttl_ms: null, displayMode: "granted always" };
|
|
69
|
+
case "ttl": {
|
|
70
|
+
const ms = ttlMsFromToken(choice.param);
|
|
71
|
+
if (ms === null) return { ok: false, error: "bad ttl token" };
|
|
72
|
+
return { ok: true, decision: "allow_ttl", granted: true, ttl_ms: ms, displayMode: `granted for ${choice.param}` };
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
33
77
|
/**
|
|
34
78
|
* Build the post-tap keyboard for a granted decision. Today this is
|
|
35
79
|
* just the `[ 📖 Open in Drive ]` button when the granted scope names
|
|
@@ -57,66 +101,21 @@ export async function handleApprovalCallback(
|
|
|
57
101
|
return;
|
|
58
102
|
}
|
|
59
103
|
|
|
60
|
-
//
|
|
61
|
-
//
|
|
62
|
-
//
|
|
63
|
-
//
|
|
64
|
-
//
|
|
65
|
-
//
|
|
66
|
-
//
|
|
67
|
-
//
|
|
68
|
-
//
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
let decision: ApprovalDecisionMode;
|
|
73
|
-
let granted: boolean;
|
|
74
|
-
let ttl_ms: number | null = null;
|
|
75
|
-
let displayMode: string;
|
|
76
|
-
switch (parsed.choice.kind) {
|
|
77
|
-
case "deny":
|
|
78
|
-
decision = "deny";
|
|
79
|
-
granted = false;
|
|
80
|
-
displayMode = "denied";
|
|
81
|
-
break;
|
|
82
|
-
case "once":
|
|
83
|
-
decision = "allow_once";
|
|
84
|
-
granted = true;
|
|
85
|
-
// No expiry — recorded as a one-shot grant; the agent calls
|
|
86
|
-
// approval_lookup at most once, then proceeds. /approvals revoke
|
|
87
|
-
// can still target the row by id.
|
|
88
|
-
displayMode = "granted once";
|
|
89
|
-
break;
|
|
90
|
-
case "always":
|
|
91
|
-
decision = "allow_always";
|
|
92
|
-
granted = true;
|
|
93
|
-
displayMode = "granted always";
|
|
94
|
-
break;
|
|
95
|
-
case "ttl": {
|
|
96
|
-
decision = "allow_ttl";
|
|
97
|
-
granted = true;
|
|
98
|
-
const ms = ttlMsFromToken(parsed.choice.param);
|
|
99
|
-
if (ms === null) {
|
|
100
|
-
await ctx.answerCallbackQuery({ text: "bad ttl token" });
|
|
101
|
-
return;
|
|
102
|
-
}
|
|
103
|
-
ttl_ms = ms;
|
|
104
|
-
displayMode = `granted for ${parsed.choice.param}`;
|
|
105
|
-
break;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
const consumed = await approvalConsume(parsed.request_id);
|
|
110
|
-
if (consumed === null) {
|
|
111
|
-
await ctx.answerCallbackQuery({ text: "approval kernel unreachable" });
|
|
112
|
-
return;
|
|
113
|
-
}
|
|
114
|
-
if (!consumed.consumed) {
|
|
115
|
-
// Single-use enforcement: someone already tapped, or the nonce
|
|
116
|
-
// expired/unknown. Match the RFC §8.1 wording.
|
|
117
|
-
await ctx.answerCallbackQuery({ text: "this prompt expired" });
|
|
104
|
+
// Resolve + validate the decision BEFORE burning the single-use
|
|
105
|
+
// nonce (PR-4 invariant, now structural via the pure
|
|
106
|
+
// resolveApprovalDecision — see its doc). A malformed ttl token
|
|
107
|
+
// returns { ok: false } here and the nonce is never touched, so the
|
|
108
|
+
// operator can re-tap a valid choice; pre-fix this validation ran
|
|
109
|
+
// AFTER approvalConsume(), burning the nonce with no decision
|
|
110
|
+
// recorded → the agent's approval_lookup poll never saw a verdict
|
|
111
|
+
// and the turn wedged. There is now NO fallible step between the
|
|
112
|
+
// consume→record below.
|
|
113
|
+
const resolved = resolveApprovalDecision(parsed.choice);
|
|
114
|
+
if (!resolved.ok) {
|
|
115
|
+
await ctx.answerCallbackQuery({ text: resolved.error });
|
|
118
116
|
return;
|
|
119
117
|
}
|
|
118
|
+
const { decision, granted, ttl_ms, displayMode } = resolved;
|
|
120
119
|
|
|
121
120
|
const granted_by_user_id = ctx.from?.id ?? 0;
|
|
122
121
|
// Approver set at decision time = the chat that received the card. We
|
|
@@ -125,18 +124,37 @@ export async function handleApprovalCallback(
|
|
|
125
124
|
// when each surface migrates and starts passing access.allowFrom.
|
|
126
125
|
const approver_set = [String(granted_by_user_id)];
|
|
127
126
|
|
|
128
|
-
|
|
127
|
+
// PR-6: atomic consume+record — ONE round-trip; the kernel burns the
|
|
128
|
+
// single-use nonce AND writes the decision in one SQLite transaction.
|
|
129
|
+
// If the record fails the burn rolls back, so `null` genuinely means
|
|
130
|
+
// "nothing happened, safe to retry" — there is no burned-nonce /
|
|
131
|
+
// no-decision wedge any more (the residual the shipped permission-TTL
|
|
132
|
+
// auto-deny used to backstop). resolveApprovalDecision already
|
|
133
|
+
// validated the ttl above, so no fallible step precedes this call.
|
|
134
|
+
const result = await approvalConsumeRecord({
|
|
129
135
|
request_id: parsed.request_id,
|
|
130
136
|
decision,
|
|
131
137
|
approver_set,
|
|
132
138
|
granted_by_user_id,
|
|
133
139
|
ttl_ms,
|
|
134
140
|
});
|
|
135
|
-
|
|
136
|
-
|
|
141
|
+
if (result === null) {
|
|
142
|
+
await ctx.answerCallbackQuery({ text: "approval kernel unreachable" });
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
if (!result.consumed) {
|
|
146
|
+
// Already tapped / expired / unknown — single-use is enforced
|
|
147
|
+
// kernel-side and NO decision was written. RFC §8.1 wording.
|
|
148
|
+
await ctx.answerCallbackQuery({ text: "this prompt expired" });
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
if (!result.decision_id) {
|
|
152
|
+
// Defensive: consumed:true must carry a decision_id. Kept distinct
|
|
153
|
+
// from the unreachable message for operator triage.
|
|
137
154
|
await ctx.answerCallbackQuery({ text: "kernel record failed" });
|
|
138
155
|
return;
|
|
139
156
|
}
|
|
157
|
+
const decision_id: string = result.decision_id;
|
|
140
158
|
|
|
141
159
|
// Edit the original card to its post-tap state. Drop the original
|
|
142
160
|
// action keyboard either way; on a successful grant for a Drive
|
|
@@ -149,8 +167,8 @@ export async function handleApprovalCallback(
|
|
|
149
167
|
? ` · /approvals revoke <code>${decision_id}</code>`
|
|
150
168
|
: "");
|
|
151
169
|
|
|
152
|
-
const postTapKeyboard = granted &&
|
|
153
|
-
? buildGrantedKeyboard(
|
|
170
|
+
const postTapKeyboard = granted && result.scope
|
|
171
|
+
? buildGrantedKeyboard(result.scope)
|
|
154
172
|
: undefined;
|
|
155
173
|
|
|
156
174
|
try {
|
|
@@ -245,7 +245,7 @@ import { shouldSweepChatAtBoot } from './boot-sweep-filter.js'
|
|
|
245
245
|
import { createIpcServer, type IpcClient, type IpcServer } from './ipc-server.js'
|
|
246
246
|
import { handleRequestDriveApproval } from './drive-write-approval.js'
|
|
247
247
|
import { buildDiffPreviewCard } from './diff-preview-card.js'
|
|
248
|
-
import { createPendingInboundBuffer } from './pending-inbound-buffer.js'
|
|
248
|
+
import { createPendingInboundBuffer, redeliverBufferedInbound } from './pending-inbound-buffer.js'
|
|
249
249
|
import { createPendingPermissionBuffer } from './pending-permission-decisions.js'
|
|
250
250
|
import {
|
|
251
251
|
buildVaultGrantApprovedInbound,
|
|
@@ -2743,10 +2743,27 @@ silencePoke.startTimer({
|
|
|
2743
2743
|
try {
|
|
2744
2744
|
clearSilentEndState(fbKey)
|
|
2745
2745
|
} catch { /* best-effort */ }
|
|
2746
|
+
// Self-heal the inbound buffer. pendingInboundBuffer otherwise
|
|
2747
|
+
// drains ONLY on bridge re-register (onClientRegistered). After a
|
|
2748
|
+
// network storm that settles with the bridge STILL connected, user
|
|
2749
|
+
// messages buffered during the flap sit forever — until a manual
|
|
2750
|
+
// restart forces a re-register (the fleet-update thundering-herd
|
|
2751
|
+
// incident, 2026-05-19: agents "not responding", logs show
|
|
2752
|
+
// pending-inbound-buffer depth>0 with no drain). Flushing on
|
|
2753
|
+
// wedge-clear makes the agent self-heal. selfAgent-keyed; a miss
|
|
2754
|
+
// re-buffers so nothing is lost if the bridge is genuinely offline.
|
|
2755
|
+
const fbSelfAgent = process.env.SWITCHROOM_AGENT_NAME ?? ''
|
|
2756
|
+
const fbRedeliver = redeliverBufferedInbound(
|
|
2757
|
+
pendingInboundBuffer,
|
|
2758
|
+
fbSelfAgent,
|
|
2759
|
+
(m) => ipcServer.sendToAgent(fbSelfAgent, m),
|
|
2760
|
+
)
|
|
2746
2761
|
process.stderr.write(
|
|
2747
2762
|
`telegram gateway: silence-poke framework-fallback ended wedged turn ` +
|
|
2748
2763
|
`chat=${fbChatId} thread=${ctx.threadId ?? '-'} silence_ms=${ctx.silenceMs} ` +
|
|
2749
|
-
`currentTurn_nulled=${turnMatchesFallback}
|
|
2764
|
+
`currentTurn_nulled=${turnMatchesFallback} ` +
|
|
2765
|
+
`drained_buffered=${fbRedeliver.redelivered}/${fbRedeliver.drained}` +
|
|
2766
|
+
`${fbRedeliver.rebuffered > 0 ? ` rebuffered=${fbRedeliver.rebuffered}` : ''}\n`,
|
|
2750
2767
|
)
|
|
2751
2768
|
},
|
|
2752
2769
|
})
|
|
@@ -54,6 +54,45 @@ export interface PendingInboundBufferOptions {
|
|
|
54
54
|
log?: (line: string) => void
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
/**
|
|
58
|
+
* Drain `agent`'s buffered inbound and re-deliver each via `send`. A
|
|
59
|
+
* `send` returning false (or throwing) means "not delivered" — the
|
|
60
|
+
* message is re-buffered so nothing is lost when the bridge is still
|
|
61
|
+
* offline. Returns counts for observability.
|
|
62
|
+
*
|
|
63
|
+
* This exists because `drain` is otherwise only called on bridge
|
|
64
|
+
* re-register (`onClientRegistered`). After a network storm that
|
|
65
|
+
* settles with the bridge STILL connected, messages buffered during
|
|
66
|
+
* the flap never drain — they sit until a manual restart forces a
|
|
67
|
+
* re-register. The silence-poke framework fallback calls this on
|
|
68
|
+
* wedge-clear so the agent self-heals (fleet-update thundering-herd
|
|
69
|
+
* incident, 2026-05-19).
|
|
70
|
+
*/
|
|
71
|
+
export function redeliverBufferedInbound(
|
|
72
|
+
buffer: PendingInboundBuffer,
|
|
73
|
+
agent: string,
|
|
74
|
+
send: (msg: InboundMessage) => boolean,
|
|
75
|
+
): { drained: number; redelivered: number; rebuffered: number } {
|
|
76
|
+
const pending = buffer.drain(agent)
|
|
77
|
+
let redelivered = 0
|
|
78
|
+
let rebuffered = 0
|
|
79
|
+
for (const msg of pending) {
|
|
80
|
+
let delivered = false
|
|
81
|
+
try {
|
|
82
|
+
delivered = send(msg)
|
|
83
|
+
} catch {
|
|
84
|
+
delivered = false
|
|
85
|
+
}
|
|
86
|
+
if (delivered) {
|
|
87
|
+
redelivered++
|
|
88
|
+
} else {
|
|
89
|
+
buffer.push(agent, msg)
|
|
90
|
+
rebuffered++
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return { drained: pending.length, redelivered, rebuffered }
|
|
94
|
+
}
|
|
95
|
+
|
|
57
96
|
export function createPendingInboundBuffer(
|
|
58
97
|
opts: PendingInboundBufferOptions = {},
|
|
59
98
|
): PendingInboundBuffer {
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import { describe, it, expect } from 'vitest'
|
|
10
|
-
import { createPendingInboundBuffer, DEFAULT_PENDING_INBOUND_CAP } from '../gateway/pending-inbound-buffer.js'
|
|
10
|
+
import { createPendingInboundBuffer, redeliverBufferedInbound, DEFAULT_PENDING_INBOUND_CAP } from '../gateway/pending-inbound-buffer.js'
|
|
11
11
|
import type { InboundMessage } from '../gateway/ipc-protocol.js'
|
|
12
12
|
|
|
13
13
|
function inbound(source: string, ts = Date.now()): InboundMessage {
|
|
@@ -130,3 +130,73 @@ describe('pending-inbound-buffer', () => {
|
|
|
130
130
|
expect(buf.totalDepth()).toBe(1)
|
|
131
131
|
})
|
|
132
132
|
})
|
|
133
|
+
|
|
134
|
+
describe('redeliverBufferedInbound — wedge-clear self-heal (fleet-update incident 2026-05-19)', () => {
|
|
135
|
+
it('delivers every buffered message and empties the buffer when send succeeds', () => {
|
|
136
|
+
const buf = createPendingInboundBuffer({ log: () => {} })
|
|
137
|
+
buf.push('klanker', inbound('user', 1))
|
|
138
|
+
buf.push('klanker', inbound('user', 2))
|
|
139
|
+
const seen: number[] = []
|
|
140
|
+
const r = redeliverBufferedInbound(buf, 'klanker', (m) => {
|
|
141
|
+
seen.push(m.messageId as number)
|
|
142
|
+
return true
|
|
143
|
+
})
|
|
144
|
+
expect(r).toEqual({ drained: 2, redelivered: 2, rebuffered: 0 })
|
|
145
|
+
expect(seen).toEqual([1, 2]) // FIFO preserved
|
|
146
|
+
expect(buf.depth('klanker')).toBe(0)
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
it('re-buffers (loses nothing) when the bridge is still offline — send returns false', () => {
|
|
150
|
+
const buf = createPendingInboundBuffer({ log: () => {} })
|
|
151
|
+
buf.push('klanker', inbound('user', 1))
|
|
152
|
+
buf.push('klanker', inbound('cron', 2))
|
|
153
|
+
const r = redeliverBufferedInbound(buf, 'klanker', () => false)
|
|
154
|
+
expect(r).toEqual({ drained: 2, redelivered: 0, rebuffered: 2 })
|
|
155
|
+
expect(buf.depth('klanker')).toBe(2) // still there, nothing lost
|
|
156
|
+
expect(buf.drain('klanker').map((m) => m.meta?.source)).toEqual(['user', 'cron'])
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
it('treats a throwing send as not-delivered and re-buffers', () => {
|
|
160
|
+
const buf = createPendingInboundBuffer({ log: () => {} })
|
|
161
|
+
buf.push('klanker', inbound('user', 1))
|
|
162
|
+
const r = redeliverBufferedInbound(buf, 'klanker', () => {
|
|
163
|
+
throw new Error('bridge write failed')
|
|
164
|
+
})
|
|
165
|
+
expect(r).toEqual({ drained: 1, redelivered: 0, rebuffered: 1 })
|
|
166
|
+
expect(buf.depth('klanker')).toBe(1)
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
it('mixed: delivers what it can, re-buffers only the misses', () => {
|
|
170
|
+
const buf = createPendingInboundBuffer({ log: () => {} })
|
|
171
|
+
buf.push('klanker', inbound('a', 1))
|
|
172
|
+
buf.push('klanker', inbound('b', 2))
|
|
173
|
+
buf.push('klanker', inbound('c', 3))
|
|
174
|
+
let n = 0
|
|
175
|
+
const r = redeliverBufferedInbound(buf, 'klanker', () => {
|
|
176
|
+
n++
|
|
177
|
+
return n !== 2 // 2nd send fails
|
|
178
|
+
})
|
|
179
|
+
expect(r).toEqual({ drained: 3, redelivered: 2, rebuffered: 1 })
|
|
180
|
+
expect(buf.drain('klanker').map((m) => m.meta?.source)).toEqual(['b'])
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
it('is a no-op on an empty buffer (no send calls)', () => {
|
|
184
|
+
const buf = createPendingInboundBuffer({ log: () => {} })
|
|
185
|
+
let calls = 0
|
|
186
|
+
const r = redeliverBufferedInbound(buf, 'klanker', () => {
|
|
187
|
+
calls++
|
|
188
|
+
return true
|
|
189
|
+
})
|
|
190
|
+
expect(r).toEqual({ drained: 0, redelivered: 0, rebuffered: 0 })
|
|
191
|
+
expect(calls).toBe(0)
|
|
192
|
+
})
|
|
193
|
+
|
|
194
|
+
it('only touches the named agent', () => {
|
|
195
|
+
const buf = createPendingInboundBuffer({ log: () => {} })
|
|
196
|
+
buf.push('klanker', inbound('user', 1))
|
|
197
|
+
buf.push('clerk', inbound('user', 2))
|
|
198
|
+
redeliverBufferedInbound(buf, 'klanker', () => true)
|
|
199
|
+
expect(buf.depth('klanker')).toBe(0)
|
|
200
|
+
expect(buf.depth('clerk')).toBe(1) // untouched
|
|
201
|
+
})
|
|
202
|
+
})
|