switchroom 0.14.62 → 0.14.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +148 -35
- package/telegram-plugin/gateway/auto-classify-mid-turn.ts +119 -0
- package/telegram-plugin/gateway/escalation-drive.ts +79 -0
- package/telegram-plugin/gateway/gateway.ts +146 -52
- package/telegram-plugin/gateway/obligation-ledger.ts +45 -3
- package/telegram-plugin/hooks/tool-label-pretool.mjs +32 -12
- package/telegram-plugin/tests/auto-classify-mid-turn.test.ts +87 -0
- package/telegram-plugin/tests/escalation-drive.test.ts +123 -0
- package/telegram-plugin/tests/obligation-determinism.test.ts +63 -3
- package/telegram-plugin/tests/obligation-ledger.test.ts +92 -0
|
@@ -89,12 +89,18 @@ interface Sim {
|
|
|
89
89
|
steps: number;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
function runSchedule(msgs: Msg[], seed: number): Sim {
|
|
92
|
+
function runSchedule(msgs: Msg[], seed: number, graceMs = 0): Sim {
|
|
93
93
|
const PATH = "/state/agent/telegram/obligations.json";
|
|
94
94
|
const store = memStore();
|
|
95
95
|
let ledger = new ObligationLedger(MAX_REPRESENTS, {
|
|
96
96
|
onChange: (snap) => persistObligations(PATH, store.fs, snap),
|
|
97
97
|
});
|
|
98
|
+
// Virtual monotonic clock (only meaningful when graceMs>0). Advances every
|
|
99
|
+
// step by more than one sweep tick so the grace window deterministically
|
|
100
|
+
// expires within the step budget — proving grace DELAYS but never PREVENTS a
|
|
101
|
+
// terminal (no livelock).
|
|
102
|
+
let clock = 1_000_000;
|
|
103
|
+
const SWEEP_TICK = 5_000;
|
|
98
104
|
const r = rng(seed);
|
|
99
105
|
|
|
100
106
|
const pending = [...msgs]; // not yet received
|
|
@@ -109,11 +115,18 @@ function runSchedule(msgs: Msg[], seed: number): Sim {
|
|
|
109
115
|
};
|
|
110
116
|
|
|
111
117
|
// Run one turn for an obligation; close if the model answers on this attempt.
|
|
118
|
+
// If it does NOT answer and grace is on, stamp the turn-end clock (mirrors the
|
|
119
|
+
// gateway's endCurrentTurnAtomic !finalAnswerDelivered branch) so the next
|
|
120
|
+
// decideAtIdle({now, graceMs}) waits out the grace before re-presenting.
|
|
112
121
|
const deliverTurn = (id: string) => {
|
|
113
122
|
const had = (turnsHad.get(id) ?? 0);
|
|
114
123
|
const attemptIndex = had; // 0-based
|
|
115
124
|
turnsHad.set(id, had + 1);
|
|
116
|
-
if (byId.get(id)!.answerOnAttempt === attemptIndex)
|
|
125
|
+
if (byId.get(id)!.answerOnAttempt === attemptIndex) {
|
|
126
|
+
close(id, "answered");
|
|
127
|
+
} else if (graceMs > 0 && ledger.isOpen(id)) {
|
|
128
|
+
ledger.noteTurnEnded(id, clock);
|
|
129
|
+
}
|
|
117
130
|
};
|
|
118
131
|
|
|
119
132
|
const ESC_IN_FLIGHT = new Set<string>(); // mirrors the gateway's concurrency guard (no-op in a sync model)
|
|
@@ -139,7 +152,14 @@ function runSchedule(msgs: Msg[], seed: number): Sim {
|
|
|
139
152
|
});
|
|
140
153
|
deliverTurn(m.id); // original turn (attempt 0)
|
|
141
154
|
} else if (open) {
|
|
142
|
-
const decision =
|
|
155
|
+
const decision =
|
|
156
|
+
graceMs > 0 ? ledger.decideAtIdle({ now: clock, graceMs }) : ledger.decideAtIdle();
|
|
157
|
+
if (decision.action === "none") {
|
|
158
|
+
// Every open obligation is within its grace window — the sweep waits.
|
|
159
|
+
// Advance the clock so grace deterministically expires; no livelock.
|
|
160
|
+
clock += SWEEP_TICK;
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
143
163
|
const o = decision.obligation as Obligation;
|
|
144
164
|
// INVARIANT (no double-ask): a terminated obligation must never resurface.
|
|
145
165
|
expect(terminals.has(o.originTurnId)).toBe(false);
|
|
@@ -169,6 +189,9 @@ function runSchedule(msgs: Msg[], seed: number): Sim {
|
|
|
169
189
|
});
|
|
170
190
|
ledger.hydrate(loadObligations(PATH, store.fs));
|
|
171
191
|
}
|
|
192
|
+
// Advance the virtual clock every step so any stamped grace window
|
|
193
|
+
// deterministically expires within the step budget.
|
|
194
|
+
clock += SWEEP_TICK;
|
|
172
195
|
}
|
|
173
196
|
|
|
174
197
|
return { terminals, steps };
|
|
@@ -220,6 +243,43 @@ describe("obligation determinism — every inbound reaches a terminal, no silent
|
|
|
220
243
|
}
|
|
221
244
|
});
|
|
222
245
|
|
|
246
|
+
it("holds across 3000 schedules WITH the escalate-grace window on (grace delays, never prevents a terminal)", () => {
|
|
247
|
+
const ANSWER = [0, 1, 2, 3, 99];
|
|
248
|
+
const ESCFAIL = [0, 1, 2, 3, 5];
|
|
249
|
+
const GRACE_MS = 45_000;
|
|
250
|
+
for (let seed = 1; seed <= 3000; seed++) {
|
|
251
|
+
const r = rng(seed * 7919);
|
|
252
|
+
const n = 1 + Math.floor(r() * 5);
|
|
253
|
+
const msgs: Msg[] = [];
|
|
254
|
+
for (let i = 0; i < n; i++) {
|
|
255
|
+
const msgId = seed * 100 + i;
|
|
256
|
+
msgs.push({
|
|
257
|
+
id: `c:3#${msgId}`,
|
|
258
|
+
msgId,
|
|
259
|
+
answerOnAttempt: pick(ANSWER, r),
|
|
260
|
+
escalateFailsFor: pick(ESCFAIL, r),
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
// Same enumeration as the no-grace proof, but the ledger now runs the grace
|
|
264
|
+
// path: every non-answering turn stamps noteTurnEnded and the sweep waits
|
|
265
|
+
// out the window before acting. The terminal each message reaches must be
|
|
266
|
+
// IDENTICAL to the no-grace run — grace only delays.
|
|
267
|
+
const { terminals, steps } = runSchedule(msgs, seed * 104729, GRACE_MS);
|
|
268
|
+
expect(steps).toBeLessThan(10_000); // still terminates (no grace livelock)
|
|
269
|
+
for (const m of msgs) {
|
|
270
|
+
const t = terminals.get(m.id);
|
|
271
|
+
expect(t, `grace seed=${seed} msg=${m.id} answer=${m.answerOnAttempt} escFail=${m.escalateFailsFor}`).toBeDefined();
|
|
272
|
+
if (m.answerOnAttempt <= MAX_REPRESENTS) {
|
|
273
|
+
expect(t).toBe("answered");
|
|
274
|
+
} else if (m.escalateFailsFor < ESCALATE_MAX) {
|
|
275
|
+
expect(t).toBe("escalation-delivered");
|
|
276
|
+
} else {
|
|
277
|
+
expect(t).toBe("escalation-give-up");
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
});
|
|
282
|
+
|
|
223
283
|
it("a delivered-but-unanswered obligation survives a restart and is escalated, not lost", () => {
|
|
224
284
|
// Deterministic single case: model NEVER answers, escalation succeeds first try,
|
|
225
285
|
// with a restart forced mid-life via a seed that triggers the 0.15 branch.
|
|
@@ -225,6 +225,20 @@ describe("ObligationLedger — durability hooks + escalate-attempt counter", ()
|
|
|
225
225
|
expect(L.decideAtIdle().action).toBe("escalate");
|
|
226
226
|
});
|
|
227
227
|
|
|
228
|
+
it("interrupt-cancel semantics: closing the in-flight turn's obligation removes it from the sweep, sibling untouched", () => {
|
|
229
|
+
// Mirrors cancelInterruptedObligation: an `!` interrupt SIGINT-kills the
|
|
230
|
+
// in-flight turn and closes its obligation, so the sweep can't later
|
|
231
|
+
// re-present/escalate the question the user explicitly redirected away from.
|
|
232
|
+
// A queued SIBLING obligation must survive.
|
|
233
|
+
const L = new ObligationLedger();
|
|
234
|
+
L.openIfAbsent(input("c:3#700", 1000)); // the in-flight turn's message
|
|
235
|
+
L.openIfAbsent(input("c:3#701", 1001)); // a queued sibling
|
|
236
|
+
expect(L.close("c:3#700")).toBe(true); // interrupt cancels the in-flight one
|
|
237
|
+
expect(L.isOpen("c:3#700")).toBe(false);
|
|
238
|
+
expect(L.decideAtIdle().obligation?.originTurnId).toBe("c:3#701"); // sibling still actionable
|
|
239
|
+
expect(L.close("c:3#700")).toBe(false); // re-close / unknown is a safe no-op
|
|
240
|
+
});
|
|
241
|
+
|
|
228
242
|
it("hydrate skips malformed rows", () => {
|
|
229
243
|
const L = new ObligationLedger();
|
|
230
244
|
L.hydrate([
|
|
@@ -234,3 +248,81 @@ describe("ObligationLedger — durability hooks + escalate-attempt counter", ()
|
|
|
234
248
|
expect(L.size()).toBe(1);
|
|
235
249
|
});
|
|
236
250
|
});
|
|
251
|
+
|
|
252
|
+
describe("ObligationLedger — escalate-grace window (over-escalation fix)", () => {
|
|
253
|
+
function input(id: string, openedAt: number) {
|
|
254
|
+
return { originTurnId: id, chatId: "-100123", threadId: 3, messageId: Number(id.split("#").pop() ?? 0), text: "x", openedAt };
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
it("no opts (or graceMs<=0) → pre-grace behaviour: acts immediately", () => {
|
|
258
|
+
const L = new ObligationLedger();
|
|
259
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
260
|
+
L.noteTurnEnded("c:3#1", 5000); // a turn just ended
|
|
261
|
+
expect(L.decideAtIdle().action).toBe("represent"); // no grace → act now
|
|
262
|
+
expect(L.decideAtIdle({ now: 5001, graceMs: 0 }).action).toBe("represent"); // graceMs 0 → off
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
it("skips an obligation whose turn ended < graceMs ago (the trailing-answer window)", () => {
|
|
266
|
+
const L = new ObligationLedger();
|
|
267
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
268
|
+
L.noteTurnEnded("c:3#1", 5000);
|
|
269
|
+
// 30s after turn-end, grace 45s → still within grace → wait (none)
|
|
270
|
+
expect(L.decideAtIdle({ now: 35000, graceMs: 45000 }).action).toBe("none");
|
|
271
|
+
// 46s after turn-end → out of grace → act
|
|
272
|
+
expect(L.decideAtIdle({ now: 51000, graceMs: 45000 }).action).toBe("represent");
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
it("an obligation that never had a turn end (still-queued) is always eligible — no trailing answer to wait for", () => {
|
|
276
|
+
const L = new ObligationLedger();
|
|
277
|
+
L.openIfAbsent(input("c:3#1", 1000)); // no noteTurnEnded
|
|
278
|
+
expect(L.decideAtIdle({ now: 1001, graceMs: 45000 }).action).toBe("represent");
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
it("picks the oldest ELIGIBLE: a newer in-grace obligation does not block an older out-of-grace one", () => {
|
|
282
|
+
const L = new ObligationLedger();
|
|
283
|
+
L.openIfAbsent(input("c:3#1", 1000)); // older
|
|
284
|
+
L.openIfAbsent(input("c:3#2", 2000)); // newer
|
|
285
|
+
L.noteTurnEnded("c:3#1", 5000); // older's turn ended at 5000 (out of grace by now)
|
|
286
|
+
L.noteTurnEnded("c:3#2", 60000); // newer's turn ended at 60000 (in grace)
|
|
287
|
+
const d = L.decideAtIdle({ now: 70000, graceMs: 45000 });
|
|
288
|
+
// older (#1) ended 65s ago → eligible; newer (#2) ended 10s ago → in grace.
|
|
289
|
+
// pick oldest eligible = #1.
|
|
290
|
+
expect(d.action).toBe("represent");
|
|
291
|
+
expect(d.obligation?.originTurnId).toBe("c:3#1");
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
it("returns none when EVERY open obligation is within grace", () => {
|
|
295
|
+
const L = new ObligationLedger();
|
|
296
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
297
|
+
L.openIfAbsent(input("c:3#2", 2000));
|
|
298
|
+
L.noteTurnEnded("c:3#1", 60000);
|
|
299
|
+
L.noteTurnEnded("c:3#2", 61000);
|
|
300
|
+
expect(L.decideAtIdle({ now: 65000, graceMs: 45000 }).action).toBe("none");
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
it("noteTurnEnded is a no-op for an unknown/closed obligation and persists when it applies", () => {
|
|
304
|
+
const snapshots: Obligation[][] = [];
|
|
305
|
+
const L = new ObligationLedger(2, { onChange: (s) => snapshots.push(s) });
|
|
306
|
+
L.openIfAbsent(input("c:3#1", 1000)); // persist #1
|
|
307
|
+
L.noteTurnEnded("nope", 5000); // unknown → no persist
|
|
308
|
+
expect(snapshots.length).toBe(1);
|
|
309
|
+
L.noteTurnEnded("c:3#1", 5000); // applies → persist
|
|
310
|
+
expect(snapshots.length).toBe(2);
|
|
311
|
+
expect(snapshots[1][0].lastTurnEndedAt).toBe(5000);
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
it("grace still terminates the ladder: represent → escalate once each is out of grace", () => {
|
|
315
|
+
const L = new ObligationLedger(2);
|
|
316
|
+
L.openIfAbsent(input("c:3#1", 1000));
|
|
317
|
+
// turn 0 ended at 1000; out of grace by t=50000
|
|
318
|
+
L.noteTurnEnded("c:3#1", 1000);
|
|
319
|
+
expect(L.decideAtIdle({ now: 50000, graceMs: 45000 }).action).toBe("represent");
|
|
320
|
+
L.markRepresented("c:3#1");
|
|
321
|
+
L.noteTurnEnded("c:3#1", 50000); // re-present turn ended
|
|
322
|
+
expect(L.decideAtIdle({ now: 96000, graceMs: 45000 }).action).toBe("represent");
|
|
323
|
+
L.markRepresented("c:3#1");
|
|
324
|
+
L.noteTurnEnded("c:3#1", 96000);
|
|
325
|
+
// representCount now 2 == max → escalate (once out of grace)
|
|
326
|
+
expect(L.decideAtIdle({ now: 142000, graceMs: 45000 }).action).toBe("escalate");
|
|
327
|
+
});
|
|
328
|
+
});
|