switchroom 0.14.62 → 0.14.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +149 -36
- package/telegram-plugin/gateway/auto-classify-mid-turn.ts +119 -0
- package/telegram-plugin/gateway/escalation-drive.ts +79 -0
- package/telegram-plugin/gateway/gateway.ts +154 -55
- package/telegram-plugin/gateway/obligation-ledger.ts +45 -3
- package/telegram-plugin/hooks/tool-label-pretool.mjs +32 -12
- package/telegram-plugin/tests/auto-classify-mid-turn.test.ts +87 -0
- package/telegram-plugin/tests/escalation-drive.test.ts +123 -0
- package/telegram-plugin/tests/obligation-determinism.test.ts +63 -3
- package/telegram-plugin/tests/obligation-ledger.test.ts +92 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { driveEscalation } from "../gateway/escalation-drive.js";
|
|
3
|
+
import { ObligationLedger } from "../gateway/obligation-ledger.js";
|
|
4
|
+
|
|
5
|
+
// Drives the REAL escalation step (the code obligationSweep calls) with the REAL
|
|
6
|
+
// ObligationLedger and the REAL withDeadline — including a fake hanging send,
|
|
7
|
+
// the exact path the total proof flagged and that mtcute / a synchronous test
|
|
8
|
+
// cannot reach. This is the executable verification of the hang-wedge fix.
|
|
9
|
+
|
|
10
|
+
function openEscalatable(L: ObligationLedger, id: string) {
|
|
11
|
+
L.openIfAbsent({ originTurnId: id, chatId: "-100", threadId: 3, messageId: 1, text: "x", openedAt: 0 });
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const MAX = 3;
|
|
15
|
+
const DEADLINE = 15; // ms — short so the hang case settles fast and deterministically
|
|
16
|
+
|
|
17
|
+
describe("driveEscalation — the obligation escalation step is bounded and always reaches a terminal", () => {
|
|
18
|
+
it("a successful send closes the obligation and clears the in-flight flag", async () => {
|
|
19
|
+
const L = new ObligationLedger(2);
|
|
20
|
+
openEscalatable(L, "c#1");
|
|
21
|
+
const inFlight = new Set<string>();
|
|
22
|
+
await driveEscalation({
|
|
23
|
+
escId: "c#1",
|
|
24
|
+
inFlight,
|
|
25
|
+
ledger: L,
|
|
26
|
+
send: () => Promise.resolve("sent"),
|
|
27
|
+
maxAttempts: MAX,
|
|
28
|
+
deadlineMs: DEADLINE,
|
|
29
|
+
log: () => {},
|
|
30
|
+
});
|
|
31
|
+
expect(L.isOpen("c#1")).toBe(false); // closed
|
|
32
|
+
expect(inFlight.has("c#1")).toBe(false); // flag cleared
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it("a transient failure below the cap stays OPEN and clears the flag (retried next sweep)", async () => {
|
|
36
|
+
const L = new ObligationLedger(2);
|
|
37
|
+
openEscalatable(L, "c#1");
|
|
38
|
+
const inFlight = new Set<string>();
|
|
39
|
+
await driveEscalation({
|
|
40
|
+
escId: "c#1",
|
|
41
|
+
inFlight,
|
|
42
|
+
ledger: L,
|
|
43
|
+
send: () => Promise.reject(new Error("network blip")),
|
|
44
|
+
maxAttempts: MAX,
|
|
45
|
+
deadlineMs: DEADLINE,
|
|
46
|
+
log: () => {},
|
|
47
|
+
});
|
|
48
|
+
expect(L.isOpen("c#1")).toBe(true); // still open — will retry
|
|
49
|
+
expect(inFlight.has("c#1")).toBe(false); // flag cleared, so the next sweep can re-enter
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("THE FIX: a send that NEVER settles still clears the flag (bounded by the deadline)", async () => {
|
|
53
|
+
const L = new ObligationLedger(2);
|
|
54
|
+
openEscalatable(L, "c#1");
|
|
55
|
+
const inFlight = new Set<string>();
|
|
56
|
+
let sendInvoked = 0;
|
|
57
|
+
const start = Date.now();
|
|
58
|
+
// A promise that never resolves/rejects — the stalled send that, pre-fix,
|
|
59
|
+
// left the in-flight flag set forever and wedged the obligation OPEN.
|
|
60
|
+
await driveEscalation({
|
|
61
|
+
escId: "c#1",
|
|
62
|
+
inFlight,
|
|
63
|
+
ledger: L,
|
|
64
|
+
send: () => {
|
|
65
|
+
sendInvoked++;
|
|
66
|
+
return new Promise(() => {});
|
|
67
|
+
},
|
|
68
|
+
maxAttempts: MAX,
|
|
69
|
+
deadlineMs: DEADLINE,
|
|
70
|
+
log: () => {},
|
|
71
|
+
});
|
|
72
|
+
expect(sendInvoked).toBe(1);
|
|
73
|
+
expect(inFlight.has("c#1")).toBe(false); // cleared despite the hang — the wedge is gone
|
|
74
|
+
expect(Date.now() - start).toBeLessThan(DEADLINE + 500); // settled at the deadline, not "never"
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("repeated hung sends reach a bounded terminal (close best-effort), never an infinite loop", async () => {
|
|
78
|
+
const L = new ObligationLedger(2);
|
|
79
|
+
openEscalatable(L, "c#1");
|
|
80
|
+
const inFlight = new Set<string>();
|
|
81
|
+
let sends = 0;
|
|
82
|
+
let drives = 0;
|
|
83
|
+
// Simulate the 5s sweep firing repeatedly while every send hangs.
|
|
84
|
+
while (L.isOpen("c#1") && drives < 20) {
|
|
85
|
+
drives++;
|
|
86
|
+
const p = driveEscalation({
|
|
87
|
+
escId: "c#1",
|
|
88
|
+
inFlight,
|
|
89
|
+
ledger: L,
|
|
90
|
+
send: () => {
|
|
91
|
+
sends++;
|
|
92
|
+
return new Promise(() => {});
|
|
93
|
+
},
|
|
94
|
+
maxAttempts: MAX,
|
|
95
|
+
deadlineMs: DEADLINE,
|
|
96
|
+
log: () => {},
|
|
97
|
+
});
|
|
98
|
+
if (p) await p; // each attempt settles within the deadline
|
|
99
|
+
}
|
|
100
|
+
expect(L.isOpen("c#1")).toBe(false); // reached a terminal (closed best-effort)
|
|
101
|
+
expect(inFlight.has("c#1")).toBe(false);
|
|
102
|
+
expect(sends).toBe(MAX); // exactly maxAttempts sends, then close — bounded
|
|
103
|
+
expect(drives).toBeLessThanOrEqual(MAX + 1);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it("the in-flight guard prevents a concurrent second send for the same obligation", async () => {
|
|
107
|
+
const L = new ObligationLedger(2);
|
|
108
|
+
openEscalatable(L, "c#1");
|
|
109
|
+
const inFlight = new Set<string>();
|
|
110
|
+
let sends = 0;
|
|
111
|
+
const hang = () => {
|
|
112
|
+
sends++;
|
|
113
|
+
return new Promise<void>(() => {});
|
|
114
|
+
};
|
|
115
|
+
const p1 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
|
|
116
|
+
// Second call while the first is still awaiting → must be a no-op.
|
|
117
|
+
const p2 = driveEscalation({ escId: "c#1", inFlight, ledger: L, send: hang, maxAttempts: MAX, deadlineMs: 60, log: () => {} });
|
|
118
|
+
expect(p2).toBeUndefined(); // guarded
|
|
119
|
+
expect(sends).toBe(1); // only one send fired
|
|
120
|
+
expect(L.list()[0].escalateAttempts).toBe(1); // only one attempt recorded
|
|
121
|
+
await p1; // let the first settle so we don't leak a pending timer
|
|
122
|
+
});
|
|
123
|
+
});
|
|
@@ -89,12 +89,18 @@ interface Sim {
|
|
|
89
89
|
steps: number;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
function runSchedule(msgs: Msg[], seed: number): Sim {
|
|
92
|
+
function runSchedule(msgs: Msg[], seed: number, graceMs = 0): Sim {
|
|
93
93
|
const PATH = "/state/agent/telegram/obligations.json";
|
|
94
94
|
const store = memStore();
|
|
95
95
|
let ledger = new ObligationLedger(MAX_REPRESENTS, {
|
|
96
96
|
onChange: (snap) => persistObligations(PATH, store.fs, snap),
|
|
97
97
|
});
|
|
98
|
+
// Virtual monotonic clock (only meaningful when graceMs>0). Advances every
|
|
99
|
+
// step by more than one sweep tick so the grace window deterministically
|
|
100
|
+
// expires within the step budget — proving grace DELAYS but never PREVENTS a
|
|
101
|
+
// terminal (no livelock).
|
|
102
|
+
let clock = 1_000_000;
|
|
103
|
+
const SWEEP_TICK = 5_000;
|
|
98
104
|
const r = rng(seed);
|
|
99
105
|
|
|
100
106
|
const pending = [...msgs]; // not yet received
|
|
@@ -109,11 +115,18 @@ function runSchedule(msgs: Msg[], seed: number): Sim {
|
|
|
109
115
|
};
|
|
110
116
|
|
|
111
117
|
// Run one turn for an obligation; close if the model answers on this attempt.
|
|
118
|
+
// If it does NOT answer and grace is on, stamp the turn-end clock (mirrors the
|
|
119
|
+
// gateway's endCurrentTurnAtomic !finalAnswerDelivered branch) so the next
|
|
120
|
+
// decideAtIdle({now, graceMs}) waits out the grace before re-presenting.
|
|
112
121
|
const deliverTurn = (id: string) => {
|
|
113
122
|
const had = (turnsHad.get(id) ?? 0);
|
|
114
123
|
const attemptIndex = had; // 0-based
|
|
115
124
|
turnsHad.set(id, had + 1);
|
|
116
|
-
if (byId.get(id)!.answerOnAttempt === attemptIndex)
|
|
125
|
+
if (byId.get(id)!.answerOnAttempt === attemptIndex) {
|
|
126
|
+
close(id, "answered");
|
|
127
|
+
} else if (graceMs > 0 && ledger.isOpen(id)) {
|
|
128
|
+
ledger.noteTurnEnded(id, clock);
|
|
129
|
+
}
|
|
117
130
|
};
|
|
118
131
|
|
|
119
132
|
const ESC_IN_FLIGHT = new Set<string>(); // mirrors the gateway's concurrency guard (no-op in a sync model)
|
|
@@ -139,7 +152,14 @@ function runSchedule(msgs: Msg[], seed: number): Sim {
|
|
|
139
152
|
});
|
|
140
153
|
deliverTurn(m.id); // original turn (attempt 0)
|
|
141
154
|
} else if (open) {
|
|
142
|
-
const decision =
|
|
155
|
+
const decision =
|
|
156
|
+
graceMs > 0 ? ledger.decideAtIdle({ now: clock, graceMs }) : ledger.decideAtIdle();
|
|
157
|
+
if (decision.action === "none") {
|
|
158
|
+
// Every open obligation is within its grace window — the sweep waits.
|
|
159
|
+
// Advance the clock so grace deterministically expires; no livelock.
|
|
160
|
+
clock += SWEEP_TICK;
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
143
163
|
const o = decision.obligation as Obligation;
|
|
144
164
|
// INVARIANT (no double-ask): a terminated obligation must never resurface.
|
|
145
165
|
expect(terminals.has(o.originTurnId)).toBe(false);
|
|
@@ -169,6 +189,9 @@ function runSchedule(msgs: Msg[], seed: number): Sim {
|
|
|
169
189
|
});
|
|
170
190
|
ledger.hydrate(loadObligations(PATH, store.fs));
|
|
171
191
|
}
|
|
192
|
+
// Advance the virtual clock every step so any stamped grace window
|
|
193
|
+
// deterministically expires within the step budget.
|
|
194
|
+
clock += SWEEP_TICK;
|
|
172
195
|
}
|
|
173
196
|
|
|
174
197
|
return { terminals, steps };
|
|
@@ -220,6 +243,43 @@ describe("obligation determinism — every inbound reaches a terminal, no silent
|
|
|
220
243
|
}
|
|
221
244
|
});
|
|
222
245
|
|
|
246
|
+
it("holds across 3000 schedules WITH the escalate-grace window on (grace delays, never prevents a terminal)", () => {
|
|
247
|
+
const ANSWER = [0, 1, 2, 3, 99];
|
|
248
|
+
const ESCFAIL = [0, 1, 2, 3, 5];
|
|
249
|
+
const GRACE_MS = 45_000;
|
|
250
|
+
for (let seed = 1; seed <= 3000; seed++) {
|
|
251
|
+
const r = rng(seed * 7919);
|
|
252
|
+
const n = 1 + Math.floor(r() * 5);
|
|
253
|
+
const msgs: Msg[] = [];
|
|
254
|
+
for (let i = 0; i < n; i++) {
|
|
255
|
+
const msgId = seed * 100 + i;
|
|
256
|
+
msgs.push({
|
|
257
|
+
id: `c:3#${msgId}`,
|
|
258
|
+
msgId,
|
|
259
|
+
answerOnAttempt: pick(ANSWER, r),
|
|
260
|
+
escalateFailsFor: pick(ESCFAIL, r),
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
// Same enumeration as the no-grace proof, but the ledger now runs the grace
|
|
264
|
+
// path: every non-answering turn stamps noteTurnEnded and the sweep waits
|
|
265
|
+
// out the window before acting. The terminal each message reaches must be
|
|
266
|
+
// IDENTICAL to the no-grace run — grace only delays.
|
|
267
|
+
const { terminals, steps } = runSchedule(msgs, seed * 104729, GRACE_MS);
|
|
268
|
+
expect(steps).toBeLessThan(10_000); // still terminates (no grace livelock)
|
|
269
|
+
for (const m of msgs) {
|
|
270
|
+
const t = terminals.get(m.id);
|
|
271
|
+
expect(t, `grace seed=${seed} msg=${m.id} answer=${m.answerOnAttempt} escFail=${m.escalateFailsFor}`).toBeDefined();
|
|
272
|
+
if (m.answerOnAttempt <= MAX_REPRESENTS) {
|
|
273
|
+
expect(t).toBe("answered");
|
|
274
|
+
} else if (m.escalateFailsFor < ESCALATE_MAX) {
|
|
275
|
+
expect(t).toBe("escalation-delivered");
|
|
276
|
+
} else {
|
|
277
|
+
expect(t).toBe("escalation-give-up");
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
});
|
|
282
|
+
|
|
223
283
|
it("a delivered-but-unanswered obligation survives a restart and is escalated, not lost", () => {
|
|
224
284
|
// Deterministic single case: model NEVER answers, escalation succeeds first try,
|
|
225
285
|
// with a restart forced mid-life via a seed that triggers the 0.15 branch.
|
|
@@ -225,6 +225,20 @@ describe("ObligationLedger — durability hooks + escalate-attempt counter", ()
|
|
|
225
225
|
expect(L.decideAtIdle().action).toBe("escalate");
|
|
226
226
|
});
|
|
227
227
|
|
|
228
|
+
it("interrupt-cancel semantics: closing the in-flight turn's obligation removes it from the sweep, sibling untouched", () => {
|
|
229
|
+
// Mirrors cancelInterruptedObligation: an `!` interrupt SIGINT-kills the
|
|
230
|
+
// in-flight turn and closes its obligation, so the sweep can't later
|
|
231
|
+
// re-present/escalate the question the user explicitly redirected away from.
|
|
232
|
+
// A queued SIBLING obligation must survive.
|
|
233
|
+
const L = new ObligationLedger();
|
|
234
|
+
L.openIfAbsent(input("c:3#700", 1000)); // the in-flight turn's message
|
|
235
|
+
L.openIfAbsent(input("c:3#701", 1001)); // a queued sibling
|
|
236
|
+
expect(L.close("c:3#700")).toBe(true); // interrupt cancels the in-flight one
|
|
237
|
+
expect(L.isOpen("c:3#700")).toBe(false);
|
|
238
|
+
expect(L.decideAtIdle().obligation?.originTurnId).toBe("c:3#701"); // sibling still actionable
|
|
239
|
+
expect(L.close("c:3#700")).toBe(false); // re-close / unknown is a safe no-op
|
|
240
|
+
});
|
|
241
|
+
|
|
228
242
|
it("hydrate skips malformed rows", () => {
|
|
229
243
|
const L = new ObligationLedger();
|
|
230
244
|
L.hydrate([
|
|
@@ -234,3 +248,81 @@ describe("ObligationLedger — durability hooks + escalate-attempt counter", ()
|
|
|
234
248
|
expect(L.size()).toBe(1);
|
|
235
249
|
});
|
|
236
250
|
});
|
|
251
|
+
|
|
252
|
+
describe("ObligationLedger — escalate-grace window (over-escalation fix)", () => {
|
|
253
|
+
function input(id: string, openedAt: number) {
|
|
254
|
+
return { originTurnId: id, chatId: "-100123", threadId: 3, messageId: Number(id.split("#").pop() ?? 0), text: "x", openedAt };
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
it("no opts (or graceMs<=0) → pre-grace behaviour: acts immediately", () => {
|
|
258
|
+
const L = new ObligationLedger();
|
|
259
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
260
|
+
L.noteTurnEnded("c:3#1", 5000); // a turn just ended
|
|
261
|
+
expect(L.decideAtIdle().action).toBe("represent"); // no grace → act now
|
|
262
|
+
expect(L.decideAtIdle({ now: 5001, graceMs: 0 }).action).toBe("represent"); // graceMs 0 → off
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
it("skips an obligation whose turn ended < graceMs ago (the trailing-answer window)", () => {
|
|
266
|
+
const L = new ObligationLedger();
|
|
267
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
268
|
+
L.noteTurnEnded("c:3#1", 5000);
|
|
269
|
+
// 30s after turn-end, grace 45s → still within grace → wait (none)
|
|
270
|
+
expect(L.decideAtIdle({ now: 35000, graceMs: 45000 }).action).toBe("none");
|
|
271
|
+
// 46s after turn-end → out of grace → act
|
|
272
|
+
expect(L.decideAtIdle({ now: 51000, graceMs: 45000 }).action).toBe("represent");
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
it("an obligation that never had a turn end (still-queued) is always eligible — no trailing answer to wait for", () => {
|
|
276
|
+
const L = new ObligationLedger();
|
|
277
|
+
L.openIfAbsent(input("c:3#1", 1000)); // no noteTurnEnded
|
|
278
|
+
expect(L.decideAtIdle({ now: 1001, graceMs: 45000 }).action).toBe("represent");
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
it("picks the oldest ELIGIBLE: a newer in-grace obligation does not block an older out-of-grace one", () => {
|
|
282
|
+
const L = new ObligationLedger();
|
|
283
|
+
L.openIfAbsent(input("c:3#1", 1000)); // older
|
|
284
|
+
L.openIfAbsent(input("c:3#2", 2000)); // newer
|
|
285
|
+
L.noteTurnEnded("c:3#1", 5000); // older's turn ended at 5000 (out of grace by now)
|
|
286
|
+
L.noteTurnEnded("c:3#2", 60000); // newer's turn ended at 60000 (in grace)
|
|
287
|
+
const d = L.decideAtIdle({ now: 70000, graceMs: 45000 });
|
|
288
|
+
// older (#1) ended 65s ago → eligible; newer (#2) ended 10s ago → in grace.
|
|
289
|
+
// pick oldest eligible = #1.
|
|
290
|
+
expect(d.action).toBe("represent");
|
|
291
|
+
expect(d.obligation?.originTurnId).toBe("c:3#1");
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it("returns none when EVERY open obligation is within grace", () => {
|
|
295
|
+
const L = new ObligationLedger();
|
|
296
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
297
|
+
L.openIfAbsent(input("c:3#2", 2000));
|
|
298
|
+
L.noteTurnEnded("c:3#1", 60000);
|
|
299
|
+
L.noteTurnEnded("c:3#2", 61000);
|
|
300
|
+
expect(L.decideAtIdle({ now: 65000, graceMs: 45000 }).action).toBe("none");
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
it("noteTurnEnded is a no-op for an unknown/closed obligation and persists when it applies", () => {
|
|
304
|
+
const snapshots: Obligation[][] = [];
|
|
305
|
+
const L = new ObligationLedger(2, { onChange: (s) => snapshots.push(s) });
|
|
306
|
+
L.openIfAbsent(input("c:3#1", 1000)); // persist #1
|
|
307
|
+
L.noteTurnEnded("nope", 5000); // unknown → no persist
|
|
308
|
+
expect(snapshots.length).toBe(1);
|
|
309
|
+
L.noteTurnEnded("c:3#1", 5000); // applies → persist
|
|
310
|
+
expect(snapshots.length).toBe(2);
|
|
311
|
+
expect(snapshots[1][0].lastTurnEndedAt).toBe(5000);
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
it("grace still terminates the ladder: represent → escalate once each is out of grace", () => {
|
|
315
|
+
const L = new ObligationLedger(2);
|
|
316
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
317
|
+
// turn 0 ended at 1000; out of grace by t=50000
|
|
318
|
+
L.noteTurnEnded("c:3#1", 1000);
|
|
319
|
+
expect(L.decideAtIdle({ now: 50000, graceMs: 45000 }).action).toBe("represent");
|
|
320
|
+
L.markRepresented("c:3#1");
|
|
321
|
+
L.noteTurnEnded("c:3#1", 50000); // re-present turn ended
|
|
322
|
+
expect(L.decideAtIdle({ now: 96000, graceMs: 45000 }).action).toBe("represent");
|
|
323
|
+
L.markRepresented("c:3#1");
|
|
324
|
+
L.noteTurnEnded("c:3#1", 96000);
|
|
325
|
+
// representCount now 2 == max → escalate (once out of grace)
|
|
326
|
+
expect(L.decideAtIdle({ now: 142000, graceMs: 45000 }).action).toBe("escalate");
|
|
327
|
+
});
|
|
328
|
+
});
|