switchroom 0.13.20 → 0.13.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/profiles/_shared/telegram-style.md.hbs +3 -3
- package/telegram-plugin/dist/gateway/gateway.js +87 -25
- package/telegram-plugin/gateway/disconnect-flush.ts +37 -0
- package/telegram-plugin/gateway/gateway.ts +100 -7
- package/telegram-plugin/gateway/inbound-delivery-gate.ts +37 -4
- package/telegram-plugin/handoff-continuity.ts +8 -2
- package/telegram-plugin/recent-outbound-dedup.ts +51 -5
- package/telegram-plugin/runtime-metrics.ts +5 -1
- package/telegram-plugin/subagent-watcher.ts +25 -3
- package/telegram-plugin/tests/gateway-disconnect-flush.test.ts +114 -0
- package/telegram-plugin/tests/handoff-continuity.test.ts +15 -2
- package/telegram-plugin/tests/inbound-delivery-gate.test.ts +77 -4
- package/telegram-plugin/tests/recent-outbound-dedup.test.ts +72 -0
- package/telegram-plugin/tests/subagent-watcher-enoent-deregister.test.ts +152 -0
- package/telegram-plugin/uat/scenarios/jtbd-rapid-followup-dm.test.ts +72 -45
|
@@ -1,38 +1,35 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* JTBD scenario — rapid follow-ups (steering vs queued classification).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Live contract codified in `_shared/telegram-style.md.hbs` and
|
|
5
|
+
* `reference/steer-or-queue-mid-flight.md` (default-flip commits
|
|
6
|
+
* `4fff90bf` + `597a58af`, 2026-04-17):
|
|
5
7
|
*
|
|
6
|
-
* - A follow-up
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* - A follow-up prefixed with `/
|
|
10
|
-
*
|
|
11
|
-
* in-flight
|
|
8
|
+
* - A mid-turn follow-up with NO prefix is `queued="true"` — new
|
|
9
|
+
* independent task. The agent should NOT reference the in-flight
|
|
10
|
+
* work.
|
|
11
|
+
* - A mid-turn follow-up prefixed with `/steer ` or `/s ` is
|
|
12
|
+
* `steering="true"` — course-correction; the agent continues the
|
|
13
|
+
* in-flight task incorporating the new guidance.
|
|
14
|
+
* - Legacy `/queue ` / `/q ` is a redundant alias for the default;
|
|
15
|
+
* still works.
|
|
12
16
|
*
|
|
13
|
-
* This UAT fires both shapes and asserts the agent
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
* top of its reply. So we can pattern-match on that.
|
|
17
|
+
* This UAT fires both shapes and asserts the agent narrates the
|
|
18
|
+
* classification correctly. The prior version of this scenario
|
|
19
|
+
* (2026-05-13 / PR #1132) tested the pre-flip contract with
|
|
20
|
+
* too-loose assertions (`/md5/i` regex passes on the queued path
|
|
21
|
+
* by coincidence — the model answers "use md5" fresh and the reply
|
|
22
|
+
* contains "md5"). After unskipping with the corrected contract,
|
|
23
|
+
* the assertions check for the italic classification line the
|
|
24
|
+
* prompt instructs the agent to emit.
|
|
22
25
|
*/
|
|
23
26
|
|
|
24
27
|
import { describe, it, expect } from "vitest";
|
|
25
28
|
import { spinUp } from "../harness.js";
|
|
26
29
|
|
|
27
|
-
|
|
28
|
-
// surface "md5"; queued didn't produce the expected fresh-task reply).
|
|
29
|
-
// May be real classification bugs, may be prompt fragility — neither
|
|
30
|
-
// has been root-caused. Excluded from the buildkite gate so it doesn't
|
|
31
|
-
// block every PR touching telegram-plugin/. Run locally via
|
|
32
|
-
// `bun run test:uat` once classification has been investigated.
|
|
33
|
-
describe.skip("uat: rapid follow-ups — steering vs queued", () => {
|
|
30
|
+
describe("uat: rapid follow-ups — steering vs queued classification", () => {
|
|
34
31
|
it(
|
|
35
|
-
"follow-up
|
|
32
|
+
"follow-up with /steer prefix → agent self-narrates as steering",
|
|
36
33
|
async () => {
|
|
37
34
|
const sc = await spinUp({ agent: "test-harness" });
|
|
38
35
|
try {
|
|
@@ -43,26 +40,39 @@ describe.skip("uat: rapid follow-ups — steering vs queued", () => {
|
|
|
43
40
|
+ "Show the work step by step with a 2-second pause between.",
|
|
44
41
|
);
|
|
45
42
|
await new Promise((r) => setTimeout(r, 3_000));
|
|
46
|
-
// Steer: change the algorithm
|
|
47
|
-
await sc.sendDM("actually use md5 not sha256");
|
|
43
|
+
// Steer: change the algorithm using the explicit /steer prefix.
|
|
44
|
+
await sc.sendDM("/steer actually use md5 not sha256");
|
|
48
45
|
|
|
49
|
-
// The agent should reply mentioning md5
|
|
50
|
-
//
|
|
51
|
-
//
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
46
|
+
// The agent should reply mentioning md5 AND surface the italic
|
|
47
|
+
// classification line per the prompt
|
|
48
|
+
// ("_↪️ treating as steer on the prior task_" or similar).
|
|
49
|
+
// We match either explicit-steer narration OR the steer emoji
|
|
50
|
+
// (`↪️`) to allow for natural-language variation while still
|
|
51
|
+
// failing if no narration appears (the previous version of
|
|
52
|
+
// this UAT was too loose — bare `/md5/i` passed by coincidence
|
|
53
|
+
// on the queued path).
|
|
54
|
+
const reply = await sc.expectMessage(
|
|
55
|
+
(m) => {
|
|
56
|
+
const txt = m.text;
|
|
57
|
+
const mentionsMd5 = /\bmd5\b/i.test(txt);
|
|
58
|
+
const narratesSteer =
|
|
59
|
+
/↪️|\bsteer(ing)?\b|continuing the (prior|original|in-flight) task|amendment|course[- ]correct/i.test(
|
|
60
|
+
txt,
|
|
61
|
+
);
|
|
62
|
+
return mentionsMd5 && narratesSteer;
|
|
63
|
+
},
|
|
64
|
+
{ from: "bot", timeout: 120_000 },
|
|
65
|
+
);
|
|
56
66
|
expect(reply.text.toLowerCase()).toContain("md5");
|
|
57
67
|
} finally {
|
|
58
68
|
await sc.tearDown();
|
|
59
69
|
}
|
|
60
70
|
},
|
|
61
|
-
|
|
71
|
+
180_000,
|
|
62
72
|
);
|
|
63
73
|
|
|
64
74
|
it(
|
|
65
|
-
"follow-up
|
|
75
|
+
"follow-up with no prefix mid-turn → agent treats as queued (new task)",
|
|
66
76
|
async () => {
|
|
67
77
|
const sc = await spinUp({ agent: "test-harness" });
|
|
68
78
|
try {
|
|
@@ -71,9 +81,10 @@ describe.skip("uat: rapid follow-ups — steering vs queued", () => {
|
|
|
71
81
|
+ "Use bash.",
|
|
72
82
|
);
|
|
73
83
|
await new Promise((r) => setTimeout(r, 3_000));
|
|
74
|
-
//
|
|
75
|
-
//
|
|
76
|
-
|
|
84
|
+
// No prefix — the default-flipped contract says this is a
|
|
85
|
+
// QUEUED new task. The agent should NOT reference the
|
|
86
|
+
// counting work.
|
|
87
|
+
await sc.sendDM("what is 2+2?");
|
|
77
88
|
|
|
78
89
|
// First reply should be from the counting task (still
|
|
79
90
|
// in-flight). Then a second reply for the queued task.
|
|
@@ -81,16 +92,32 @@ describe.skip("uat: rapid follow-ups — steering vs queued", () => {
|
|
|
81
92
|
from: "bot",
|
|
82
93
|
timeout: 60_000,
|
|
83
94
|
});
|
|
84
|
-
|
|
85
|
-
//
|
|
86
|
-
//
|
|
95
|
+
|
|
96
|
+
// Second reply: the queued task's answer. We want to see
|
|
97
|
+
// EITHER the italic queued-narration line OR a fresh "4"
|
|
98
|
+
// answer that doesn't reference the counting work.
|
|
87
99
|
const secondReply = await sc.expectMessage(
|
|
88
|
-
(m) =>
|
|
89
|
-
m.messageId
|
|
90
|
-
|
|
100
|
+
(m) => {
|
|
101
|
+
if (m.messageId <= firstReply.messageId) return false;
|
|
102
|
+
const txt = m.text;
|
|
103
|
+
const answersTheQuestion =
|
|
104
|
+
/\b4\b|\bfour\b|two\s+plus\s+two|2\s*\+\s*2/i.test(txt);
|
|
105
|
+
const narratesQueued =
|
|
106
|
+
/📥|\bqueued\b|new\s+(?:independent\s+)?task|fresh\s+task/i.test(
|
|
107
|
+
txt,
|
|
108
|
+
);
|
|
109
|
+
// Pass if either: the explicit narration is present, OR the
|
|
110
|
+
// reply answers cleanly without referencing the counting
|
|
111
|
+
// task. The latter is the substantive behavioural check —
|
|
112
|
+
// the queued task is isolated from the in-flight context.
|
|
113
|
+
const isolatedFromCounting = !/\bcount(ing)?\b|\bsleep\b/i.test(
|
|
114
|
+
txt,
|
|
115
|
+
);
|
|
116
|
+
return answersTheQuestion && (narratesQueued || isolatedFromCounting);
|
|
117
|
+
},
|
|
91
118
|
{ from: "bot", timeout: 120_000 },
|
|
92
119
|
);
|
|
93
|
-
expect(secondReply.text).toMatch(/4|
|
|
120
|
+
expect(secondReply.text).toMatch(/4|four|2\s*\+\s*2/i);
|
|
94
121
|
} finally {
|
|
95
122
|
await sc.tearDown();
|
|
96
123
|
}
|