switchroom 0.14.50 → 0.14.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/answer-stream-flag.ts +18 -0
- package/telegram-plugin/dist/gateway/gateway.js +16 -17
- package/telegram-plugin/gateway/gateway.ts +39 -18
- package/telegram-plugin/gateway/resume-inbound-builder.ts +18 -9
- package/telegram-plugin/tests/answer-stream-flag.test.ts +27 -0
- package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +35 -0
- package/telegram-plugin/tests/resume-inbound-builder.test.ts +16 -6
- package/telegram-plugin/uat/scenarios/jtbd-interrupted-turn-resumes-dm.test.ts +90 -0
- package/telegram-plugin/uat/scenarios/jtbd-message-during-restart-channel.test.ts +95 -0
- package/telegram-plugin/uat/scenarios/jtbd-message-during-restart-dm.test.ts +95 -0
package/dist/cli/switchroom.js
CHANGED
|
@@ -49462,8 +49462,8 @@ var {
|
|
|
49462
49462
|
} = import__.default;
|
|
49463
49463
|
|
|
49464
49464
|
// src/build-info.ts
|
|
49465
|
-
var VERSION = "0.14.
|
|
49466
|
-
var COMMIT_SHA = "
|
|
49465
|
+
var VERSION = "0.14.52";
|
|
49466
|
+
var COMMIT_SHA = "98b4f7c3";
|
|
49467
49467
|
|
|
49468
49468
|
// src/cli/agent.ts
|
|
49469
49469
|
init_source();
|
package/package.json
CHANGED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse the `SWITCHROOM_VISIBLE_ANSWER_STREAM` env flag.
|
|
3
|
+
*
|
|
4
|
+
* Default **OFF** (flipped 2026-06-03, operator request) — see the rationale
|
|
5
|
+
* on the `ANSWER_STREAM_VISIBLE_ENABLED` gate in `gateway/gateway.ts`. When
|
|
6
|
+
* off, the answer lane streams to the invisible compose-box draft and the
|
|
7
|
+
* reply tool is the single canonical formatted message — no unformatted
|
|
8
|
+
* preliminary that flashes and gets deleted. Opt back IN per agent with
|
|
9
|
+
* `SWITCHROOM_VISIBLE_ANSWER_STREAM=1` (also accepts true/on/yes).
|
|
10
|
+
*
|
|
11
|
+
* Extracted as a pure function so the default + parsing are unit-testable
|
|
12
|
+
* (gateway.ts is not importable in isolation — top-level side effects).
|
|
13
|
+
*/
|
|
14
|
+
export function parseVisibleAnswerStreamEnabled(raw: string | undefined): boolean {
|
|
15
|
+
if (raw == null) return false
|
|
16
|
+
const v = raw.trim().toLowerCase()
|
|
17
|
+
return v === '1' || v === 'true' || v === 'on' || v === 'yes'
|
|
18
|
+
}
|
|
@@ -39700,6 +39700,14 @@ function createAnswerStream(config) {
|
|
|
39700
39700
|
};
|
|
39701
39701
|
}
|
|
39702
39702
|
|
|
39703
|
+
// answer-stream-flag.ts
|
|
39704
|
+
function parseVisibleAnswerStreamEnabled(raw) {
|
|
39705
|
+
if (raw == null)
|
|
39706
|
+
return false;
|
|
39707
|
+
const v = raw.trim().toLowerCase();
|
|
39708
|
+
return v === "1" || v === "true" || v === "on" || v === "yes";
|
|
39709
|
+
}
|
|
39710
|
+
|
|
39703
39711
|
// pty-tail.ts
|
|
39704
39712
|
var import_headless = __toESM(require_xterm_headless(), 1);
|
|
39705
39713
|
var PTY_DEBUG = process.env.SWITCHROOM_PTY_DEBUG === "1";
|
|
@@ -52158,10 +52166,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
52158
52166
|
}
|
|
52159
52167
|
|
|
52160
52168
|
// ../src/build-info.ts
|
|
52161
|
-
var VERSION = "0.14.
|
|
52162
|
-
var COMMIT_SHA = "
|
|
52163
|
-
var COMMIT_DATE = "2026-06-
|
|
52164
|
-
var LATEST_PR =
|
|
52169
|
+
var VERSION = "0.14.52";
|
|
52170
|
+
var COMMIT_SHA = "98b4f7c3";
|
|
52171
|
+
var COMMIT_DATE = "2026-06-03T13:20:36Z";
|
|
52172
|
+
var LATEST_PR = 2129;
|
|
52165
52173
|
var COMMITS_AHEAD_OF_TAG = 0;
|
|
52166
52174
|
|
|
52167
52175
|
// gateway/boot-version.ts
|
|
@@ -52670,8 +52678,7 @@ function promptClause(turn) {
|
|
|
52670
52678
|
const p = turn.user_prompt_preview?.trim();
|
|
52671
52679
|
if (!p)
|
|
52672
52680
|
return "";
|
|
52673
|
-
|
|
52674
|
-
return ` The request was: "${snippet}".`;
|
|
52681
|
+
return ` The start of the request was: "${p}".`;
|
|
52675
52682
|
}
|
|
52676
52683
|
function buildResumeInterruptedInbound(ctx) {
|
|
52677
52684
|
const ts = ctx.nowMs ?? Date.now();
|
|
@@ -52696,7 +52703,7 @@ function buildResumeInterruptedInbound(ctx) {
|
|
|
52696
52703
|
user: "switchroom",
|
|
52697
52704
|
userId: 0,
|
|
52698
52705
|
ts,
|
|
52699
|
-
text: `You just restarted. Your previous turn was interrupted ${elapsed} ago, ` + `before it finished \u2014 it was cut off by a restart, not completed.` + promptClause(ctx.turn) + `
|
|
52706
|
+
text: `You just restarted. Your previous turn was interrupted ${elapsed} ago, ` + `before it finished \u2014 it was cut off by a restart, not completed.` + promptClause(ctx.turn) + ` That quoted text is only the first ~200 characters of the original ` + `request, and you've lost your in-memory context across the restart \u2014 so ` + `BEFORE you continue, call get_recent_messages for this chat to read your ` + `full original message and the surrounding conversation, so you resume the ` + `COMPLETE task (including any instructions in the tail of a long request), ` + `not just the truncated preview. Then pick that work back up and carry it ` + `through to completion. In your first message, briefly let the user know ` + `you're resuming what was interrupted (mention roughly how long ago in ` + `plain language) so they're not left wondering \u2014 then carry on with the ` + `actual task. Do not ask whether to resume; just resume. If even after ` + `reading the recent messages you genuinely can't tell what the work was, ` + `say so and ask.`,
|
|
52700
52707
|
meta
|
|
52701
52708
|
};
|
|
52702
52709
|
}
|
|
@@ -54309,15 +54316,7 @@ var STREAM_THROTTLE_MS_OVERRIDE = (() => {
|
|
|
54309
54316
|
return Number.isFinite(n) && n >= 0 ? n : undefined;
|
|
54310
54317
|
})();
|
|
54311
54318
|
var TURN_FLUSH_SAFETY_ENABLED = isTurnFlushSafetyEnabled();
|
|
54312
|
-
var ANSWER_STREAM_VISIBLE_ENABLED = (
|
|
54313
|
-
const raw = process.env.SWITCHROOM_VISIBLE_ANSWER_STREAM;
|
|
54314
|
-
if (raw == null)
|
|
54315
|
-
return true;
|
|
54316
|
-
const v = raw.trim().toLowerCase();
|
|
54317
|
-
if (v === "0" || v === "false" || v === "off" || v === "no")
|
|
54318
|
-
return false;
|
|
54319
|
-
return true;
|
|
54320
|
-
})();
|
|
54319
|
+
var ANSWER_STREAM_VISIBLE_ENABLED = parseVisibleAnswerStreamEnabled(process.env.SWITCHROOM_VISIBLE_ANSWER_STREAM);
|
|
54321
54320
|
var progressDriver = null;
|
|
54322
54321
|
var unpinProgressCardForChat = null;
|
|
54323
54322
|
var getPinnedProgressCardMessageId = null;
|
|
@@ -57087,7 +57086,7 @@ function handleSessionEvent(ev) {
|
|
|
57087
57086
|
chatId: turn.sessionChatId,
|
|
57088
57087
|
isPrivateChat: turn.isDm,
|
|
57089
57088
|
threadId: turn.sessionThreadId,
|
|
57090
|
-
...ANSWER_STREAM_VISIBLE_ENABLED ? { minInitialChars: 1 } : { sendMessageDraft: sendMessageDraftFn },
|
|
57089
|
+
...ANSWER_STREAM_VISIBLE_ENABLED ? { minInitialChars: 1 } : { sendMessageDraft: sendMessageDraftFn, minInitialChars: Number.MAX_SAFE_INTEGER },
|
|
57091
57090
|
sendMessage: async (chatId, text, params) => {
|
|
57092
57091
|
const tid = params?.message_thread_id;
|
|
57093
57092
|
const silent = params?.purpose !== "materialize";
|
|
@@ -96,6 +96,7 @@ import * as pendingProgress from '../pending-work-progress.js'
|
|
|
96
96
|
import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
|
|
97
97
|
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
98
98
|
import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
|
|
99
|
+
import { parseVisibleAnswerStreamEnabled } from '../answer-stream-flag.js'
|
|
99
100
|
import { type SessionEvent } from '../session-tail.js'
|
|
100
101
|
import {
|
|
101
102
|
shouldSuppressToolActivity,
|
|
@@ -3738,23 +3739,33 @@ const TURN_FLUSH_SAFETY_ENABLED = isTurnFlushSafetyEnabled()
|
|
|
3738
3739
|
// the cross-turn pending-progress system (#1445/#1669) is the
|
|
3739
3740
|
// canonical surface and DOES ping at the appropriate boundaries.
|
|
3740
3741
|
//
|
|
3741
|
-
// 2026-05-25: default flipped ON after fleet-log audit showed
|
|
3742
|
-
// framework-fallback rate
|
|
3743
|
-
//
|
|
3744
|
-
//
|
|
3745
|
-
//
|
|
3746
|
-
//
|
|
3747
|
-
//
|
|
3748
|
-
//
|
|
3749
|
-
//
|
|
3750
|
-
//
|
|
3751
|
-
|
|
3752
|
-
|
|
3753
|
-
|
|
3754
|
-
|
|
3755
|
-
|
|
3756
|
-
|
|
3757
|
-
|
|
3742
|
+
// 2026-05-25: default flipped ON after a fleet-log audit showed a ~19%
|
|
3743
|
+
// framework-fallback ("still working…") rate — the visible stream gave an
|
|
3744
|
+
// immediate in-timeline signal that suppressed the silence-poke.
|
|
3745
|
+
//
|
|
3746
|
+
// 2026-06-03: default flipped back OFF (operator request). In practice the
|
|
3747
|
+
// visible stream delivered ~none of its intended benefit while imposing a
|
|
3748
|
+
// jarring cost:
|
|
3749
|
+
// - Telegram rate-limits editMessageText to roughly once/second, so real
|
|
3750
|
+
// "watch it type" streaming is impossible; and the model emits almost no
|
|
3751
|
+
// interstitial assistant.text (it thinks → tool → reply), so the
|
|
3752
|
+
// preliminary was a near-empty bubble (observed: 5–13 byte edits).
|
|
3753
|
+
// - On every turn where the model calls the reply tool (≈always), the reply
|
|
3754
|
+
// posts a SEPARATE canonical message and the stream RETRACTS (deletes) its
|
|
3755
|
+
// preliminary — the user sees a raw bubble appear then vanish, replaced by
|
|
3756
|
+
// the formatted reply. In supergroup topics it also mis-routed (preliminary
|
|
3757
|
+
// → General, reply → topic). Net: an unformatted flash + a delete, no
|
|
3758
|
+
// streaming value.
|
|
3759
|
+
// The anti-silence role the visible stream once filled is now covered by the
|
|
3760
|
+
// live ACTIVITY FEED (tool-use streaming, below), the "…typing" chat-action
|
|
3761
|
+
// loop, and `thinking_effort: low` (fast tool-less turns) — so off-by-default
|
|
3762
|
+
// no longer regresses the framework-fallback rate. With the flag off the lane
|
|
3763
|
+
// uses the invisible compose-box draft (the original default, #1664-compatible)
|
|
3764
|
+
// and the reply tool is the single canonical, formatted message.
|
|
3765
|
+
// Opt back IN per agent with SWITCHROOM_VISIBLE_ANSWER_STREAM=1.
|
|
3766
|
+
const ANSWER_STREAM_VISIBLE_ENABLED = parseVisibleAnswerStreamEnabled(
|
|
3767
|
+
process.env.SWITCHROOM_VISIBLE_ANSWER_STREAM,
|
|
3768
|
+
)
|
|
3758
3769
|
|
|
3759
3770
|
// Activity feed. The gateway streams a live "what it's doing" tool-activity
|
|
3760
3771
|
// feed for every turn. The PreToolUse sidecar emits a `tool_label` per tool
|
|
@@ -8426,9 +8437,19 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
8426
8437
|
// tool_use stream (case 'tool_use' above) where the real
|
|
8427
8438
|
// signal lives. assistant.text keeps its visible-message
|
|
8428
8439
|
// home; the reply tool stays the canonical answer.
|
|
8440
|
+
// Flag OFF (default): use the compose-box draft for DMs, and set
|
|
8441
|
+
// minInitialChars effectively-infinite so the lane NEVER opens a
|
|
8442
|
+
// visible chat message. This matters in supergroup TOPICS, where
|
|
8443
|
+
// draft transport is unsupported (gateway.ts:6422) so the lane
|
|
8444
|
+
// would otherwise fall to message transport and post a visible
|
|
8445
|
+
// preview once interstitial text passed the default 50-char gate
|
|
8446
|
+
// — which retract() then deletes (the unformatted flash, marko
|
|
8447
|
+
// General). With the gate unreachable the only posted message is
|
|
8448
|
+
// the canonical reply. (The gate is bypassed for DM draft
|
|
8449
|
+
// transport, so DM draft streaming is unaffected.)
|
|
8429
8450
|
...(ANSWER_STREAM_VISIBLE_ENABLED
|
|
8430
8451
|
? { minInitialChars: 1 }
|
|
8431
|
-
: { sendMessageDraft: sendMessageDraftFn }),
|
|
8452
|
+
: { sendMessageDraft: sendMessageDraftFn, minInitialChars: Number.MAX_SAFE_INTEGER }),
|
|
8432
8453
|
// #1075: route through robustApiCall so flood-wait,
|
|
8433
8454
|
// benign-400, and THREAD_NOT_FOUND are handled uniformly
|
|
8434
8455
|
// instead of crashing the answer-stream loop on a deleted
|
|
@@ -66,9 +66,12 @@ function threadIdNum(turn: Turn): number | undefined {
|
|
|
66
66
|
function promptClause(turn: Turn): string {
|
|
67
67
|
const p = turn.user_prompt_preview?.trim()
|
|
68
68
|
if (!p) return ''
|
|
69
|
-
//
|
|
70
|
-
|
|
71
|
-
|
|
69
|
+
// The stored preview is already capped at the first ~200 chars of the user
|
|
70
|
+
// message (TURN_PREVIEW_MAX). Include it verbatim as a hint — do NOT
|
|
71
|
+
// re-truncate (the old 160-char slice dropped instructions that lived in the
|
|
72
|
+
// tail of a longer request). The FULL original is recovered via
|
|
73
|
+
// get_recent_messages; the resume body tells the model to do that.
|
|
74
|
+
return ` The start of the request was: "${p}".`
|
|
72
75
|
}
|
|
73
76
|
|
|
74
77
|
/**
|
|
@@ -117,12 +120,18 @@ export function buildResumeInterruptedInbound(ctx: ResumeInboundContext): Inboun
|
|
|
117
120
|
`You just restarted. Your previous turn was interrupted ${elapsed} ago, ` +
|
|
118
121
|
`before it finished — it was cut off by a restart, not completed.` +
|
|
119
122
|
promptClause(ctx.turn) +
|
|
120
|
-
`
|
|
121
|
-
`
|
|
122
|
-
`
|
|
123
|
-
`
|
|
124
|
-
`
|
|
125
|
-
`
|
|
123
|
+
` That quoted text is only the first ~200 characters of the original ` +
|
|
124
|
+
`request, and you've lost your in-memory context across the restart — so ` +
|
|
125
|
+
`BEFORE you continue, call get_recent_messages for this chat to read your ` +
|
|
126
|
+
`full original message and the surrounding conversation, so you resume the ` +
|
|
127
|
+
`COMPLETE task (including any instructions in the tail of a long request), ` +
|
|
128
|
+
`not just the truncated preview. Then pick that work back up and carry it ` +
|
|
129
|
+
`through to completion. In your first message, briefly let the user know ` +
|
|
130
|
+
`you're resuming what was interrupted (mention roughly how long ago in ` +
|
|
131
|
+
`plain language) so they're not left wondering — then carry on with the ` +
|
|
132
|
+
`actual task. Do not ask whether to resume; just resume. If even after ` +
|
|
133
|
+
`reading the recent messages you genuinely can't tell what the work was, ` +
|
|
134
|
+
`say so and ask.`,
|
|
126
135
|
meta,
|
|
127
136
|
}
|
|
128
137
|
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pin the SWITCHROOM_VISIBLE_ANSWER_STREAM contract: default OFF (2026-06-03),
|
|
3
|
+
* opt-in only on a truthy value. Guards against an accidental flip back to
|
|
4
|
+
* default-on (which would reintroduce the unformatted-preliminary flash +
|
|
5
|
+
* delete-on-every-reply — see the gateway gate comment).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { describe, it, expect } from 'vitest'
|
|
9
|
+
import { parseVisibleAnswerStreamEnabled } from '../answer-stream-flag.js'
|
|
10
|
+
|
|
11
|
+
describe('parseVisibleAnswerStreamEnabled — default OFF, opt-in', () => {
|
|
12
|
+
it('defaults OFF when unset', () => {
|
|
13
|
+
expect(parseVisibleAnswerStreamEnabled(undefined)).toBe(false)
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
it('stays OFF for empty / falsey / unrecognized values', () => {
|
|
17
|
+
for (const v of ['', ' ', '0', 'false', 'off', 'no', 'nope', 'enabled', 'x']) {
|
|
18
|
+
expect(parseVisibleAnswerStreamEnabled(v)).toBe(false)
|
|
19
|
+
}
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
it('opts IN only on explicit truthy values (case/space-insensitive)', () => {
|
|
23
|
+
for (const v of ['1', 'true', 'on', 'yes', ' TRUE ', 'On', 'YES']) {
|
|
24
|
+
expect(parseVisibleAnswerStreamEnabled(v)).toBe(true)
|
|
25
|
+
}
|
|
26
|
+
})
|
|
27
|
+
})
|
|
@@ -68,6 +68,41 @@ describe('#656 — answer-stream retract() at turn_end emits nothing', () => {
|
|
|
68
68
|
expect(deleteMessage).not.toHaveBeenCalled()
|
|
69
69
|
})
|
|
70
70
|
|
|
71
|
+
// Visible-answer-stream OFF (default since 2026-06-03): the gateway passes
|
|
72
|
+
// minInitialChars=MAX_SAFE_INTEGER so a SUPERGROUP turn (message transport —
|
|
73
|
+
// draft is unsupported in forum topics) NEVER opens a visible preview, no
|
|
74
|
+
// matter how much interstitial assistant.text streams. This is the full fix
|
|
75
|
+
// for the marko-General unformatted-flash (the DM path is already covered by
|
|
76
|
+
// draft transport). Without it, message transport opens at the 50-char gate
|
|
77
|
+
// and retract() then deletes it.
|
|
78
|
+
it('flag-off supergroup: huge minInitialChars never opens a message even past the 50-char gate', async () => {
|
|
79
|
+
const sendMessage = vi.fn(async () => ({ message_id: nextMessageId++ }))
|
|
80
|
+
const editMessageText = vi.fn(async () => {})
|
|
81
|
+
const deleteMessage = vi.fn(async () => {})
|
|
82
|
+
|
|
83
|
+
const stream = createAnswerStream({
|
|
84
|
+
chatId: 'supergroup-topic',
|
|
85
|
+
isPrivateChat: false, // supergroup → message transport (no draft)
|
|
86
|
+
threadId: 4,
|
|
87
|
+
minInitialChars: Number.MAX_SAFE_INTEGER,
|
|
88
|
+
throttleMs: 250,
|
|
89
|
+
sendMessage: sendMessage as never,
|
|
90
|
+
editMessageText: editMessageText as never,
|
|
91
|
+
deleteMessage: deleteMessage as never,
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
// Feed 200 chars — well past the default 50-char open gate.
|
|
95
|
+
stream.update('x'.repeat(200))
|
|
96
|
+
vi.advanceTimersByTime(1000)
|
|
97
|
+
await flushMicrotasks()
|
|
98
|
+
expect(sendMessage).not.toHaveBeenCalled() // never opened a preview
|
|
99
|
+
expect(editMessageText).not.toHaveBeenCalled()
|
|
100
|
+
|
|
101
|
+
await stream.retract()
|
|
102
|
+
expect(sendMessage).not.toHaveBeenCalled()
|
|
103
|
+
expect(deleteMessage).not.toHaveBeenCalled() // nothing to delete → no flash
|
|
104
|
+
})
|
|
105
|
+
|
|
71
106
|
it('retract after a preliminary send: deletes the prelim, no fresh sendMessage', async () => {
|
|
72
107
|
const THROTTLE = 1000
|
|
73
108
|
const sendMessage = vi.fn(async () => ({ message_id: nextMessageId++ }))
|
|
@@ -106,12 +106,22 @@ describe('buildResumeInterruptedInbound', () => {
|
|
|
106
106
|
expect(msg.meta.original_prompt).toBe('refactor the auth module')
|
|
107
107
|
})
|
|
108
108
|
|
|
109
|
-
it('
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
expect(
|
|
109
|
+
it('includes the FULL stored preview in the body (no double-truncation below the 200-char storage cap)', () => {
|
|
110
|
+
// The turns table already caps the preview at ~200 chars (TURN_PREVIEW_MAX);
|
|
111
|
+
// the builder must NOT slice it shorter (the old 160-char cut dropped detail
|
|
112
|
+
// from the tail of a longer request). A 180-char preview must appear in full.
|
|
113
|
+
const preview = "step 1 do X; step 2 do Y; step 3 do Z; " + "d".repeat(141) // 180 chars
|
|
114
|
+
expect(preview.length).toBe(180)
|
|
115
|
+
const msg = buildResumeInterruptedInbound({ turn: makeTurn({ user_prompt_preview: preview }) })
|
|
116
|
+
expect(msg.text).toContain(preview) // verbatim, not sliced to 160
|
|
117
|
+
expect(msg.meta.original_prompt).toBe(preview)
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
it('tells the resumed turn to recover the FULL original via get_recent_messages', () => {
|
|
121
|
+
const msg = buildResumeInterruptedInbound({ turn: makeTurn({ user_prompt_preview: 'short task' }) })
|
|
122
|
+
expect(msg.text).toContain('get_recent_messages')
|
|
123
|
+
// Frames the quoted preview as partial so the model knows to fetch the rest.
|
|
124
|
+
expect(msg.text).toMatch(/first ~200 characters|start of the request/i)
|
|
115
125
|
})
|
|
116
126
|
|
|
117
127
|
it('routes to the forum thread when thread_id is numeric', () => {
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD — always-on: a turn interrupted by a restart is RESUMED without
|
|
3
|
+
* re-prompting, and the resume turn runs to completion (it is not silently
|
|
4
|
+
* dropped into a not-ready session). Regression gate for v0.14.50 / #2122.
|
|
5
|
+
*
|
|
6
|
+
* Flow: send a long-running task, let it get in-flight, then restart the agent
|
|
7
|
+
* (--force, NOT --wait, so it interrupts rather than draining). On boot the
|
|
8
|
+
* gateway finds the orphaned turn and injects the `resume_interrupted`
|
|
9
|
+
* synthetic. With #2122 that synthetic carries meta.chat_id + message_id, so the
|
|
10
|
+
* resumed turn (a) builds a currentTurn (progress card + silence-poke), (b)
|
|
11
|
+
* routes its reply to the originating chat, and (c) re-enrols in
|
|
12
|
+
* deliver-until-acked so a drop into a still-booting session is rescued rather
|
|
13
|
+
* than lost.
|
|
14
|
+
*
|
|
15
|
+
* Assertion: a reply arrives after the interrupting restart whose FRAMING shows
|
|
16
|
+
* the agent is resuming ("picking this back up" / "interrupted" / "cut off by a
|
|
17
|
+
* restart"). We deliberately do NOT assert an end-token: the resume synthetic
|
|
18
|
+
* only carries the first ~160 chars of the original prompt (promptClause
|
|
19
|
+
* truncation), so instructions in the prompt's tail are not guaranteed to
|
|
20
|
+
* survive — the resume FRAMING is the stable, builder-guaranteed signal.
|
|
21
|
+
*
|
|
22
|
+
* Self-skips green without NOPASSWD sudo.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { describe, it, expect } from "vitest";
|
|
26
|
+
import { execSync, spawn } from "node:child_process";
|
|
27
|
+
import { spinUp } from "../harness.js";
|
|
28
|
+
|
|
29
|
+
const AGENT = "test-harness";
|
|
30
|
+
const MID_TURN_MS = 10_000; // let the turn enqueue (become a recorded interrupted turn)
|
|
31
|
+
const RESUME_BUDGET_MS = 180_000; // boot + resume + reply
|
|
32
|
+
|
|
33
|
+
// The resume builder tells the model to "briefly let the user know you're
|
|
34
|
+
// resuming what was interrupted" — so the reply always opens with this framing.
|
|
35
|
+
const RESUME_FRAMING = /resum|picking .*back|interrupted|cut off|just restarted/i;
|
|
36
|
+
|
|
37
|
+
function canShellSudo(): boolean {
|
|
38
|
+
try {
|
|
39
|
+
execSync("sudo -n true", { stdio: "ignore", timeout: 2_000 });
|
|
40
|
+
return true;
|
|
41
|
+
} catch {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function kickRestartDetached(name: string): void {
|
|
47
|
+
// --force WITHOUT --wait → recreate now, interrupting the in-flight turn.
|
|
48
|
+
const child = spawn(
|
|
49
|
+
"sudo",
|
|
50
|
+
["-n", "env", `PATH=${process.env.PATH}`, `HOME=${process.env.HOME}`,
|
|
51
|
+
"switchroom", "agent", "restart", name, "--force"],
|
|
52
|
+
{ detached: true, stdio: "ignore" },
|
|
53
|
+
);
|
|
54
|
+
child.unref();
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const sudoOk = canShellSudo();
|
|
58
|
+
|
|
59
|
+
(sudoOk ? describe : describe.skip)("uat: interrupted turn resumes after restart (DM)", () => {
|
|
60
|
+
it(
|
|
61
|
+
"a turn interrupted by a restart is resumed and the resume turn completes",
|
|
62
|
+
async () => {
|
|
63
|
+
const sc = await spinUp({ agent: AGENT, settleMs: 0 });
|
|
64
|
+
try {
|
|
65
|
+
// A task long enough that it's still in-flight at MID_TURN_MS.
|
|
66
|
+
await sc.sendDM(
|
|
67
|
+
`Please write a thorough, detailed ~300-word explanation of how a ` +
|
|
68
|
+
`mechanical typewriter's typebar mechanism works, step by step. Take ` +
|
|
69
|
+
`your time and be complete.`,
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
await new Promise((r) => setTimeout(r, MID_TURN_MS));
|
|
73
|
+
kickRestartDetached(AGENT);
|
|
74
|
+
|
|
75
|
+
// After boot, the resume synthetic should make the agent pick the work
|
|
76
|
+
// back up and say so. (The newest interrupted turn — this one — is the
|
|
77
|
+
// one findLatestTurnIfInterrupted resumes.)
|
|
78
|
+
const reply = await sc.expectMessage((m) => RESUME_FRAMING.test(m.text), {
|
|
79
|
+
from: "bot",
|
|
80
|
+
timeout: RESUME_BUDGET_MS,
|
|
81
|
+
});
|
|
82
|
+
expect(reply.text).toMatch(RESUME_FRAMING);
|
|
83
|
+
expect(reply.text.length).toBeGreaterThan(40); // a substantive resumed reply, not a stub
|
|
84
|
+
} finally {
|
|
85
|
+
await sc.tearDown();
|
|
86
|
+
}
|
|
87
|
+
},
|
|
88
|
+
RESUME_BUDGET_MS + 60_000,
|
|
89
|
+
);
|
|
90
|
+
});
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD — always-on in a supergroup: a message sent into a forum topic WHILE the
|
|
3
|
+
* agent is restarting must still be answered IN the group (the channel twin of
|
|
4
|
+
* jtbd-message-during-restart-dm). Regression gate for v0.14.48 / #2117, proving
|
|
5
|
+
* the restart-redeliver rescue is keyed per (chat, thread) so DM and supergroup
|
|
6
|
+
* topics recover identically.
|
|
7
|
+
*
|
|
8
|
+
* Self-skips green when SWITCHROOM_UAT_CHAT_ID is unset or the chat isn't a
|
|
9
|
+
* resolvable forum supergroup the driver is in (uat/** is excluded from gating
|
|
10
|
+
* CI). mtcute caveat: no forum-topic create API, so this uses the supergroup's
|
|
11
|
+
* General topic — it proves DM-vs-channel routing, not "correct topic among
|
|
12
|
+
* many" (pinned by the gateway unit thread-assertions).
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { describe, it, expect } from "vitest";
|
|
16
|
+
import { execSync, spawn } from "node:child_process";
|
|
17
|
+
import { spinUp } from "../harness.js";
|
|
18
|
+
import { expectMessage } from "../assertions.js";
|
|
19
|
+
|
|
20
|
+
const AGENT = "test-harness";
|
|
21
|
+
const SUPERGROUP_ID = Number.parseInt(process.env.SWITCHROOM_UAT_CHAT_ID ?? "", 10);
|
|
22
|
+
const BOOT_SEND_DELAY_MS = Number.parseInt(
|
|
23
|
+
process.env.SWITCHROOM_UAT_BOOT_SEND_DELAY_MS ?? "12000",
|
|
24
|
+
10,
|
|
25
|
+
);
|
|
26
|
+
const REPLY_BUDGET_MS = 180_000;
|
|
27
|
+
|
|
28
|
+
function canShellSudo(): boolean {
|
|
29
|
+
try {
|
|
30
|
+
execSync("sudo -n true", { stdio: "ignore", timeout: 2_000 });
|
|
31
|
+
return true;
|
|
32
|
+
} catch {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function kickRestartDetached(name: string): void {
|
|
38
|
+
const child = spawn(
|
|
39
|
+
"sudo",
|
|
40
|
+
["-n", "env", `PATH=${process.env.PATH}`, `HOME=${process.env.HOME}`,
|
|
41
|
+
"switchroom", "agent", "restart", name, "--force"],
|
|
42
|
+
{ detached: true, stdio: "ignore" },
|
|
43
|
+
);
|
|
44
|
+
child.unref();
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const sudoOk = canShellSudo();
|
|
48
|
+
|
|
49
|
+
(sudoOk ? describe : describe.skip)("uat: message sent during a restart (supergroup)", () => {
|
|
50
|
+
it(
|
|
51
|
+
"a supergroup-topic message sent DURING the restart boot window is still answered in the group",
|
|
52
|
+
async () => {
|
|
53
|
+
if (!Number.isFinite(SUPERGROUP_ID)) {
|
|
54
|
+
console.warn("[during-restart-channel] SWITCHROOM_UAT_CHAT_ID unset — skipping");
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
const sc = await spinUp({ agent: AGENT, settleMs: 0 });
|
|
58
|
+
try {
|
|
59
|
+
await sc.driver.primeDialogs();
|
|
60
|
+
if (!(await sc.driver.canResolve(SUPERGROUP_ID))) {
|
|
61
|
+
console.warn(
|
|
62
|
+
`[during-restart-channel] supergroup ${SUPERGROUP_ID} not resolvable — skipping ` +
|
|
63
|
+
`(ensure forum supergroup with Topics + driver is a member)`,
|
|
64
|
+
);
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const nonce = `bootsg-${Date.now().toString(36)}`;
|
|
69
|
+
kickRestartDetached(AGENT);
|
|
70
|
+
await new Promise((r) => setTimeout(r, BOOT_SEND_DELAY_MS));
|
|
71
|
+
|
|
72
|
+
const sendStart = Date.now();
|
|
73
|
+
await sc.driver.sendText(
|
|
74
|
+
SUPERGROUP_ID,
|
|
75
|
+
`You are being restart-tested in this group. Reply in this group with exactly this token and nothing else: ${nonce}`,
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
const reply = await expectMessage(
|
|
79
|
+
sc.driver,
|
|
80
|
+
SUPERGROUP_ID,
|
|
81
|
+
(m) => m.text.includes(nonce),
|
|
82
|
+
{ timeout: REPLY_BUDGET_MS, senderFilter: { notUserId: sc.driverUserId } },
|
|
83
|
+
);
|
|
84
|
+
const ttfo = Date.now() - sendStart;
|
|
85
|
+
console.warn(`[during-restart-channel] answered in ${ttfo}ms (nonce ${nonce})`);
|
|
86
|
+
expect(reply.chatId).toBe(SUPERGROUP_ID);
|
|
87
|
+
expect(reply.fromBot).toBe(true);
|
|
88
|
+
expect(reply.text).toContain(nonce);
|
|
89
|
+
} finally {
|
|
90
|
+
await sc.tearDown();
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
REPLY_BUDGET_MS + 60_000,
|
|
94
|
+
);
|
|
95
|
+
});
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD — always-on: a message sent WHILE the agent is restarting must still be
|
|
3
|
+
* answered (DM). Regression gate for the v0.14.48 / #2117 lost-message incident
|
|
4
|
+
* (clerk/KenGPT, 2026-06-03).
|
|
5
|
+
*
|
|
6
|
+
* The wedge this guards: an inbound that arrives during a restart is buffered +
|
|
7
|
+
* spool-persisted, then redelivered on bridge-up — but `bridge registered` is
|
|
8
|
+
* not the same as `claude` session-ready. If claude is slow to boot (e.g. a
|
|
9
|
+
* Hindsight MCP timeout) the redelivered inject hit a still-booting session and
|
|
10
|
+
* was silently dropped; the 300s silence-poke then ended a phantom turn
|
|
11
|
+
* (`drained_buffered=0/0`) and the message was gone. The fix re-enrols the
|
|
12
|
+
* redelivered inbound in the deliver-until-acked queue so it re-delivers every
|
|
13
|
+
* 5s until claude actually consumes it.
|
|
14
|
+
*
|
|
15
|
+
* Unlike `jtbd-always-on-after-restart-dm` (which sends ~5s AFTER the restart
|
|
16
|
+
* returns, exercising the live path), this sends DURING the boot window so the
|
|
17
|
+
* message goes through the restart-redeliver path the fix patches. The mtcute
|
|
18
|
+
* driver sends straight to Telegram — independent of the agent's bridge — so the
|
|
19
|
+
* message queues server-side and is delivered the moment the new gateway
|
|
20
|
+
* reconnects.
|
|
21
|
+
*
|
|
22
|
+
* Self-skips green without NOPASSWD sudo (can't restart the agent).
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { describe, it, expect } from "vitest";
|
|
26
|
+
import { execSync, spawn } from "node:child_process";
|
|
27
|
+
import { spinUp } from "../harness.js";
|
|
28
|
+
|
|
29
|
+
const AGENT = "test-harness";
|
|
30
|
+
|
|
31
|
+
// After kicking the restart, wait this long before sending — long enough that
|
|
32
|
+
// the reconcile + docker recreate have dropped the bridge, so the message
|
|
33
|
+
// buffers and is drained on bridge-up (the restart-redeliver path). Override
|
|
34
|
+
// for forensics (e.g. 6000 to land it deeper in the not-ready window so the
|
|
35
|
+
// strand-rescue sweep fires).
|
|
36
|
+
const BOOT_SEND_DELAY_MS = Number.parseInt(
|
|
37
|
+
process.env.SWITCHROOM_UAT_BOOT_SEND_DELAY_MS ?? "12000",
|
|
38
|
+
10,
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
// Generous: the fix re-delivers every 5s until claude is ready. The wedge
|
|
42
|
+
// symptom is ≥300s (silence-poke floor) or never — both fail this.
|
|
43
|
+
const REPLY_BUDGET_MS = 180_000;
|
|
44
|
+
|
|
45
|
+
function canShellSudo(): boolean {
|
|
46
|
+
try {
|
|
47
|
+
execSync("sudo -n true", { stdio: "ignore", timeout: 2_000 });
|
|
48
|
+
return true;
|
|
49
|
+
} catch {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Kick a marker-safe restart and return immediately (detached). */
|
|
55
|
+
function kickRestartDetached(name: string): void {
|
|
56
|
+
const child = spawn(
|
|
57
|
+
"sudo",
|
|
58
|
+
["-n", "env", `PATH=${process.env.PATH}`, `HOME=${process.env.HOME}`,
|
|
59
|
+
"switchroom", "agent", "restart", name, "--force"],
|
|
60
|
+
{ detached: true, stdio: "ignore" },
|
|
61
|
+
);
|
|
62
|
+
child.unref();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const sudoOk = canShellSudo();
|
|
66
|
+
|
|
67
|
+
(sudoOk ? describe : describe.skip)("uat: message sent during a restart (DM)", () => {
|
|
68
|
+
it(
|
|
69
|
+
"a DM sent DURING the restart boot window is still answered (not lost)",
|
|
70
|
+
async () => {
|
|
71
|
+
const sc = await spinUp({ agent: AGENT, settleMs: 0 });
|
|
72
|
+
try {
|
|
73
|
+
const nonce = `bootdm-${Date.now().toString(36)}`;
|
|
74
|
+
kickRestartDetached(AGENT);
|
|
75
|
+
await new Promise((r) => setTimeout(r, BOOT_SEND_DELAY_MS));
|
|
76
|
+
|
|
77
|
+
const sendStart = Date.now();
|
|
78
|
+
await sc.sendDM(
|
|
79
|
+
`You are being restart-tested. Reply with exactly this token and nothing else: ${nonce}`,
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
const reply = await sc.expectMessage((m) => m.text.includes(nonce), {
|
|
83
|
+
from: "bot",
|
|
84
|
+
timeout: REPLY_BUDGET_MS,
|
|
85
|
+
});
|
|
86
|
+
const ttfo = Date.now() - sendStart;
|
|
87
|
+
console.warn(`[during-restart-dm] answered in ${ttfo}ms (nonce ${nonce})`);
|
|
88
|
+
expect(reply.text).toContain(nonce);
|
|
89
|
+
} finally {
|
|
90
|
+
await sc.tearDown();
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
REPLY_BUDGET_MS + 60_000,
|
|
94
|
+
);
|
|
95
|
+
});
|