switchroom 0.13.38 → 0.13.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/answer-stream.ts +28 -3
- package/telegram-plugin/dist/gateway/gateway.js +16 -8
- package/telegram-plugin/steering.ts +38 -7
- package/telegram-plugin/tests/answer-stream.test.ts +86 -0
- package/telegram-plugin/tests/steering.test.ts +37 -4
- package/telegram-plugin/uat/scenarios/jtbd-pending-progress-html-dm.test.ts +124 -0
package/dist/cli/switchroom.js
CHANGED
|
@@ -47744,8 +47744,8 @@ var {
|
|
|
47744
47744
|
} = import__.default;
|
|
47745
47745
|
|
|
47746
47746
|
// src/build-info.ts
|
|
47747
|
-
var VERSION = "0.13.
|
|
47748
|
-
var COMMIT_SHA = "
|
|
47747
|
+
var VERSION = "0.13.39";
|
|
47748
|
+
var COMMIT_SHA = "8681f423";
|
|
47749
47749
|
|
|
47750
47750
|
// src/cli/agent.ts
|
|
47751
47751
|
init_source();
|
package/package.json
CHANGED
|
@@ -244,15 +244,22 @@ export function createAnswerStream(config: AnswerStreamConfig): AnswerStreamHand
|
|
|
244
244
|
* must clear the draft. Best-effort: a failed clear is logged but
|
|
245
245
|
* not re-thrown — the worst case is a transient stale draft that
|
|
246
246
|
* Telegram's own 30 s draft expiry eventually mops up.
|
|
247
|
+
*
|
|
248
|
+
* #1792 — accepts an explicit `targetDraftId` so `forceNewMessage`
|
|
249
|
+
* can clear the OLD id before bumping the closure's `draftId`. The
|
|
250
|
+
* default reads the live closure, which is what stop() / retract()
|
|
251
|
+
* want — clear whatever's current at the time the call lands.
|
|
247
252
|
*/
|
|
248
|
-
async function clearDraftBestEffort(
|
|
249
|
-
|
|
253
|
+
async function clearDraftBestEffort(
|
|
254
|
+
targetDraftId: number | undefined = draftId,
|
|
255
|
+
): Promise<void> {
|
|
256
|
+
if (!usesDraftTransport || draftApi == null || targetDraftId == null) return
|
|
250
257
|
try {
|
|
251
258
|
const params: { message_thread_id?: number } = {}
|
|
252
259
|
if (threadId != null) params.message_thread_id = threadId
|
|
253
260
|
await draftApi(
|
|
254
261
|
chatId,
|
|
255
|
-
|
|
262
|
+
targetDraftId,
|
|
256
263
|
'',
|
|
257
264
|
Object.keys(params).length > 0 ? params : undefined,
|
|
258
265
|
)
|
|
@@ -531,6 +538,18 @@ export function createAnswerStream(config: AnswerStreamConfig): AnswerStreamHand
|
|
|
531
538
|
stopped = false
|
|
532
539
|
materialized = false
|
|
533
540
|
if (usesDraftTransport) {
|
|
541
|
+
// #1792: clear the OLD draftId BEFORE rotating. Otherwise the
|
|
542
|
+
// stale content stays in the user's compose box until the 30 s
|
|
543
|
+
// Telegram draft expiry — the typical caller (gateway.ts mid-
|
|
544
|
+
// turn rapid-steer path: `forceNewMessage(); stop();`) cleans
|
|
545
|
+
// up the prior turn's stream, so the prior draft's content is
|
|
546
|
+
// semantically retracted. Fire-and-forget — forceNewMessage is
|
|
547
|
+
// sync; the worst-case failure mode is the same 30 s expiry
|
|
548
|
+
// we'd have had without the call.
|
|
549
|
+
const staleDraftId = draftId
|
|
550
|
+
if (staleDraftId != null) {
|
|
551
|
+
void clearDraftBestEffort(staleDraftId)
|
|
552
|
+
}
|
|
534
553
|
draftId = allocateDraftId()
|
|
535
554
|
}
|
|
536
555
|
log?.(`answer-stream: forceNewMessage (gen=${generation})`)
|
|
@@ -546,6 +565,10 @@ export function createAnswerStream(config: AnswerStreamConfig): AnswerStreamHand
|
|
|
546
565
|
// #1704: clear the compose-box draft. stop() is sync — fire and
|
|
547
566
|
// forget. A dropped clear falls back on Telegram's own 30 s
|
|
548
567
|
// draft expiry; the worst case is a transient stale preview.
|
|
568
|
+
// (#1792: the stale-id-after-rotation hazard is owned by
|
|
569
|
+
// forceNewMessage itself now — it clears its own draftId before
|
|
570
|
+
// rotating. stop() just clears whatever's current; clearing an
|
|
571
|
+
// already-cleared or never-used id is a harmless no-op.)
|
|
549
572
|
void clearDraftBestEffort()
|
|
550
573
|
},
|
|
551
574
|
|
|
@@ -563,6 +586,8 @@ export function createAnswerStream(config: AnswerStreamConfig): AnswerStreamHand
|
|
|
563
586
|
// draft sitting in the user's input area and blocks them from
|
|
564
587
|
// typing until the 30 s draft expiry. Awaited so a follow-up
|
|
565
588
|
// sendMessage on the same chat doesn't race a stale draft edit.
|
|
589
|
+
// (See #1792 note in stop() — forceNewMessage owns its own stale
|
|
590
|
+
// id cleanup; retract just clears whatever's current.)
|
|
566
591
|
await clearDraftBestEffort()
|
|
567
592
|
// Delete the preliminary message if one was sent and deleteMessage
|
|
568
593
|
// is wired. Best-effort: failures are logged but not re-thrown.
|
|
@@ -37781,14 +37781,14 @@ function createAnswerStream(config) {
|
|
|
37781
37781
|
scheduledTimer = null;
|
|
37782
37782
|
}
|
|
37783
37783
|
}
|
|
37784
|
-
async function clearDraftBestEffort() {
|
|
37785
|
-
if (!usesDraftTransport || draftApi == null ||
|
|
37784
|
+
async function clearDraftBestEffort(targetDraftId = draftId) {
|
|
37785
|
+
if (!usesDraftTransport || draftApi == null || targetDraftId == null)
|
|
37786
37786
|
return;
|
|
37787
37787
|
try {
|
|
37788
37788
|
const params = {};
|
|
37789
37789
|
if (threadId != null)
|
|
37790
37790
|
params.message_thread_id = threadId;
|
|
37791
|
-
await draftApi(chatId,
|
|
37791
|
+
await draftApi(chatId, targetDraftId, "", Object.keys(params).length > 0 ? params : undefined);
|
|
37792
37792
|
} catch {}
|
|
37793
37793
|
}
|
|
37794
37794
|
async function sendDraft(text) {
|
|
@@ -38008,6 +38008,10 @@ function createAnswerStream(config) {
|
|
|
38008
38008
|
stopped = false;
|
|
38009
38009
|
materialized = false;
|
|
38010
38010
|
if (usesDraftTransport) {
|
|
38011
|
+
const staleDraftId = draftId;
|
|
38012
|
+
if (staleDraftId != null) {
|
|
38013
|
+
clearDraftBestEffort(staleDraftId);
|
|
38014
|
+
}
|
|
38011
38015
|
draftId = allocateDraftId2();
|
|
38012
38016
|
}
|
|
38013
38017
|
log?.(`answer-stream: forceNewMessage (gen=${generation})`);
|
|
@@ -39638,9 +39642,13 @@ function parseSteerPrefix(body) {
|
|
|
39638
39642
|
function escapeXmlAttribute(s) {
|
|
39639
39643
|
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
39640
39644
|
}
|
|
39645
|
+
function decodeXmlEntities(s) {
|
|
39646
|
+
return s.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'").replace(/ /g, " ").replace(/&/g, "&");
|
|
39647
|
+
}
|
|
39641
39648
|
function formatPriorAssistantPreview(text, maxChars = 200) {
|
|
39642
39649
|
const stripped = text.replace(/<[^>]*>/g, "");
|
|
39643
|
-
const
|
|
39650
|
+
const decoded = decodeXmlEntities(stripped);
|
|
39651
|
+
const collapsed = decoded.replace(/\s+/g, " ").trim();
|
|
39644
39652
|
const truncated = collapsed.length > maxChars ? collapsed.slice(0, maxChars) : collapsed;
|
|
39645
39653
|
return escapeXmlAttribute(truncated);
|
|
39646
39654
|
}
|
|
@@ -48722,10 +48730,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
48722
48730
|
}
|
|
48723
48731
|
|
|
48724
48732
|
// ../src/build-info.ts
|
|
48725
|
-
var VERSION = "0.13.
|
|
48726
|
-
var COMMIT_SHA = "
|
|
48727
|
-
var COMMIT_DATE = "2026-05-
|
|
48728
|
-
var LATEST_PR =
|
|
48733
|
+
var VERSION = "0.13.39";
|
|
48734
|
+
var COMMIT_SHA = "8681f423";
|
|
48735
|
+
var COMMIT_DATE = "2026-05-25T07:06:31Z";
|
|
48736
|
+
var LATEST_PR = 1797;
|
|
48729
48737
|
var COMMITS_AHEAD_OF_TAG = 0;
|
|
48730
48738
|
|
|
48731
48739
|
// gateway/boot-version.ts
|
|
@@ -73,22 +73,53 @@ export function escapeXmlAttribute(s: string): string {
|
|
|
73
73
|
.replace(/'/g, ''')
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
/**
|
|
77
|
+
* Decode the small set of HTML / XML entities switchroom emits when it
|
|
78
|
+
* renders model output as Telegram HTML. Pre-#1791 this function did
|
|
79
|
+
* not decode and `formatPriorAssistantPreview` then re-escaped the
|
|
80
|
+
* already-encoded entities, so a turn containing inline `<code>` would
|
|
81
|
+
* surface to the model on the next inbound as `&amp;lt;…&amp;gt;`
|
|
82
|
+
* (triple-encoded). The model had to mentally decode three layers to
|
|
83
|
+
* recover the original characters it wrote — measurably hostile to
|
|
84
|
+
* comprehension on turns with placeholders, JSX, XML, generics, etc.
|
|
85
|
+
*
|
|
86
|
+
* Decoding before re-escape closes that loop: the attribute boundary
|
|
87
|
+
* stays safe because `escapeXmlAttribute` runs unchanged at the tail.
|
|
88
|
+
*
|
|
89
|
+
* Limited to the canonical six entities — there's no general HTML
|
|
90
|
+
* entity table here, which keeps the surface predictable.
|
|
91
|
+
*/
|
|
92
|
+
function decodeXmlEntities(s: string): string {
|
|
93
|
+
return s
|
|
94
|
+
.replace(/</g, '<')
|
|
95
|
+
.replace(/>/g, '>')
|
|
96
|
+
.replace(/"/g, '"')
|
|
97
|
+
.replace(/'/g, "'")
|
|
98
|
+
.replace(/ /g, ' ')
|
|
99
|
+
// `&` last so we don't accidentally re-decode `&lt;` → `<` on
|
|
100
|
+
// a single pass — the order above relies on `&` still being
|
|
101
|
+
// intact during the prior replaces.
|
|
102
|
+
.replace(/&/g, '&')
|
|
103
|
+
}
|
|
104
|
+
|
|
76
105
|
/**
|
|
77
106
|
* Produce a short, safe preview of the last assistant turn for injection
|
|
78
107
|
* as an XML attribute. Strips HTML tags (so `<b>foo</b>` becomes `foo`),
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
*
|
|
82
|
-
*
|
|
83
|
-
* survives as `&amp;` after escape, which is fine: the attribute is
|
|
84
|
-
* for the model's situational awareness, not faithful rendering.
|
|
108
|
+
* decodes the canonical six XML entities so the model sees the original
|
|
109
|
+
* characters (not triple-encoded `&amp;lt;` — see #1791), collapses
|
|
110
|
+
* all whitespace runs into single spaces, truncates to `maxChars` visible
|
|
111
|
+
* characters, then XML-escapes for safe attribute injection.
|
|
85
112
|
*/
|
|
86
113
|
export function formatPriorAssistantPreview(text: string, maxChars = 200): string {
|
|
87
114
|
// Strip HTML tags. Anything angle-bracketed between < and > goes away;
|
|
88
115
|
// this is deliberately liberal (no tag-name whitelist) because the
|
|
89
116
|
// preview is for the model's eyes only.
|
|
90
117
|
const stripped = text.replace(/<[^>]*>/g, '')
|
|
91
|
-
|
|
118
|
+
// #1791: decode entities BEFORE collapse/truncate/re-escape so the
|
|
119
|
+
// model sees the prose it actually wrote. The re-escape at the tail
|
|
120
|
+
// preserves attribute-injection safety.
|
|
121
|
+
const decoded = decodeXmlEntities(stripped)
|
|
122
|
+
const collapsed = decoded.replace(/\s+/g, ' ').trim()
|
|
92
123
|
const truncated = collapsed.length > maxChars ? collapsed.slice(0, maxChars) : collapsed
|
|
93
124
|
return escapeXmlAttribute(truncated)
|
|
94
125
|
}
|
|
@@ -527,6 +527,92 @@ describe('answer-stream — clears sendMessageDraft on terminal paths (#1704)',
|
|
|
527
527
|
})
|
|
528
528
|
})
|
|
529
529
|
|
|
530
|
+
// ─── #1792 — forceNewMessage clears the stale draftId before rotating ───
|
|
531
|
+
//
|
|
532
|
+
// Background: `forceNewMessage()` rotates `draftId` to a fresh allocation
|
|
533
|
+
// so the stream can be re-used for a new turn (typical caller: gateway
|
|
534
|
+
// rapid-steer path in `handleSessionEvent` enqueue branch — calls
|
|
535
|
+
// `forceNewMessage(); stop()` on the prior turn's stream before opening
|
|
536
|
+
// the new turn). Pre-#1792, the rotation orphaned the prior turn's
|
|
537
|
+
// draft content in the user's compose box until Telegram's 30 s draft
|
|
538
|
+
// expiry — `stop()`'s fire-and-forget clear closed over the (now-new)
|
|
539
|
+
// `draftId`, so the clear targeted the unused id, not the stale one.
|
|
540
|
+
//
|
|
541
|
+
// Post-fix: `forceNewMessage` itself clears the stale draftId BEFORE
|
|
542
|
+
// rotating. `stop()` continues to clear whatever draftId is current
|
|
543
|
+
// at the time it runs (defensive, also fine: clearing an unused id
|
|
544
|
+
// is a harmless no-op for the user).
|
|
545
|
+
|
|
546
|
+
describe('answer-stream — forceNewMessage clears the stale draft before rotating (#1792)', () => {
|
|
547
|
+
it('clears the pre-rotation draftId when forceNewMessage rotates', async () => {
|
|
548
|
+
const sendMessage = makeSendMessage()
|
|
549
|
+
const editMessageText = makeEditMessageText()
|
|
550
|
+
const sendMessageDraft = makeSendMessageDraft()
|
|
551
|
+
const stream = createAnswerStream({
|
|
552
|
+
chatId: 'chat1',
|
|
553
|
+
isPrivateChat: true,
|
|
554
|
+
throttleMs: 250,
|
|
555
|
+
sendMessage,
|
|
556
|
+
editMessageText,
|
|
557
|
+
sendMessageDraft,
|
|
558
|
+
})
|
|
559
|
+
|
|
560
|
+
// Open the stream — this allocates draftId N and fires sendDraft(N).
|
|
561
|
+
stream.update('first turn thought')
|
|
562
|
+
await flushMicrotasks()
|
|
563
|
+
expect(sendMessageDraft).toHaveBeenCalledTimes(1)
|
|
564
|
+
const staleDraftId = (sendMessageDraft.mock.calls[0] as unknown as [string, number, string, unknown])[1]
|
|
565
|
+
sendMessageDraft.mockClear()
|
|
566
|
+
|
|
567
|
+
// Rotate. forceNewMessage must enqueue a clear against the OLD
|
|
568
|
+
// draftId before bumping to the new allocation — pre-fix the
|
|
569
|
+
// stale content stayed in the compose box for 30 s.
|
|
570
|
+
stream.forceNewMessage()
|
|
571
|
+
await flushMicrotasks()
|
|
572
|
+
|
|
573
|
+
expect(sendMessageDraft).toHaveBeenCalledTimes(1)
|
|
574
|
+
const clearedId = (sendMessageDraft.mock.calls[0] as unknown as [string, number, string, unknown])[1]
|
|
575
|
+
const clearedText = (sendMessageDraft.mock.calls[0] as unknown as [string, number, string, unknown])[2]
|
|
576
|
+
expect(clearedId).toBe(staleDraftId)
|
|
577
|
+
expect(clearedText).toBe('')
|
|
578
|
+
})
|
|
579
|
+
|
|
580
|
+
it('the gateway sequence forceNewMessage(); stop() clears the stale draftId', async () => {
|
|
581
|
+
// Mirrors the only production caller — telegram-plugin/gateway/
|
|
582
|
+
// gateway.ts:6476-6477 cleans up the prior turn's answer-stream
|
|
583
|
+
// before opening a new turn (rapid steer / queue path).
|
|
584
|
+
const sendMessage = makeSendMessage()
|
|
585
|
+
const editMessageText = makeEditMessageText()
|
|
586
|
+
const sendMessageDraft = makeSendMessageDraft()
|
|
587
|
+
const stream = createAnswerStream({
|
|
588
|
+
chatId: 'chat1',
|
|
589
|
+
isPrivateChat: true,
|
|
590
|
+
throttleMs: 250,
|
|
591
|
+
sendMessage,
|
|
592
|
+
editMessageText,
|
|
593
|
+
sendMessageDraft,
|
|
594
|
+
})
|
|
595
|
+
|
|
596
|
+
stream.update('prior turn thought')
|
|
597
|
+
await flushMicrotasks()
|
|
598
|
+
const staleDraftId = (sendMessageDraft.mock.calls[0] as unknown as [string, number, string, unknown])[1]
|
|
599
|
+
sendMessageDraft.mockClear()
|
|
600
|
+
|
|
601
|
+
stream.forceNewMessage()
|
|
602
|
+
stream.stop()
|
|
603
|
+
await flushMicrotasks()
|
|
604
|
+
|
|
605
|
+
// The stale id must have been cleared by ONE of the two calls
|
|
606
|
+
// (forceNewMessage in this design); the new unused id may also
|
|
607
|
+
// be cleared by stop() — harmless. The load-bearing invariant
|
|
608
|
+
// is "the stale id reaches sendMessageDraft('') somewhere".
|
|
609
|
+
const clearedIds = (sendMessageDraft.mock.calls as unknown as Array<[string, number, string, unknown]>)
|
|
610
|
+
.filter(c => c[2] === '')
|
|
611
|
+
.map(c => c[1])
|
|
612
|
+
expect(clearedIds).toContain(staleDraftId)
|
|
613
|
+
})
|
|
614
|
+
})
|
|
615
|
+
|
|
530
616
|
describe('answer-stream — empty / whitespace-only text is a no-op', () => {
|
|
531
617
|
it('update("") does not trigger any transport call', async () => {
|
|
532
618
|
const sendMessage = makeSendMessage()
|
|
@@ -138,10 +138,43 @@ describe('formatPriorAssistantPreview', () => {
|
|
|
138
138
|
expect(formatPriorAssistantPreview('a & b < c')).toBe('a & b < c')
|
|
139
139
|
})
|
|
140
140
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
141
|
+
// ─── #1791 — decode entities before re-escape ───────────────────────────
|
|
142
|
+
// Pre-fix this function did NOT decode, so an already-encoded source
|
|
143
|
+
// (e.g. the rendered HTML stored in history) was re-escaped on top of
|
|
144
|
+
// its own encoding. The model saw `&amp;lt;bar&amp;gt;` (triple
|
|
145
|
+
// encoded) instead of `<bar>`. Decoding before the trim/re-escape pass
|
|
146
|
+
// closes that loop; the attribute boundary stays safe because
|
|
147
|
+
// escapeXmlAttribute runs unchanged at the tail.
|
|
148
|
+
|
|
149
|
+
test('decodes & before re-escape (single-pass, not triple) — #1791', () => {
|
|
150
|
+
// Source stored in history as escaped HTML: `a & b`.
|
|
151
|
+
// Pre-fix output: `a &amp; b`. Post-fix: `a & b` (single).
|
|
152
|
+
expect(formatPriorAssistantPreview('a & b')).toBe('a & b')
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
test('decodes < / > inside stripped tags — #1791', () => {
|
|
156
|
+
// The classic #1120 case: model wrote `Path: \`/tmp/foo-<bar>/\``,
|
|
157
|
+
// markdownToHtml stored it as `<code>/tmp/foo-<bar>/</code>`,
|
|
158
|
+
// strip removes the <code> tags, decode brings back the angle
|
|
159
|
+
// brackets, escape re-encodes safely for the attribute.
|
|
160
|
+
expect(formatPriorAssistantPreview('<code>/tmp/foo-<bar>/</code>'))
|
|
161
|
+
.toBe('/tmp/foo-<bar>/')
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
test('decodes " / ' / — #1791', () => {
|
|
165
|
+
expect(formatPriorAssistantPreview('say "hi"')).toBe('say "hi"')
|
|
166
|
+
expect(formatPriorAssistantPreview('it's here')).toBe("it's here")
|
|
167
|
+
expect(formatPriorAssistantPreview('a b')).toBe('a b')
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
test('does not over-decode: bare `&lt;` decodes to `<`, not `<` — #1791', () => {
|
|
171
|
+
// The decode order (< / > / " / ' / first, then
|
|
172
|
+
// &) ensures a single pass doesn't strip two layers of escape.
|
|
173
|
+
// A literal `&lt;` in source (i.e. someone deliberately encoded
|
|
174
|
+
// the word "<") becomes `<` after one decode pass, and then
|
|
175
|
+
// re-escapes back to `&lt;`. Pin this so the order isn't accidentally
|
|
176
|
+
// flipped to a re-decode loop.
|
|
177
|
+
expect(formatPriorAssistantPreview('&lt;')).toBe('&lt;')
|
|
145
178
|
})
|
|
146
179
|
|
|
147
180
|
test('empty string returns empty', () => {
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* UAT — pending-progress edit preserves HTML formatting (#1698 regression gate).
|
|
3
|
+
*
|
|
4
|
+
* Promoted from the one-off `pr1706-pending-progress-html-dm.test.ts`
|
|
5
|
+
* verification scenario per #1793. The pending-progress / silent-anchor
|
|
6
|
+
* / answer-stream code family in `telegram-plugin/` all touch the
|
|
7
|
+
* parse_mode contract on cross-turn edits; the existing UAT suite
|
|
8
|
+
* (`cross-turn-pending-progress-dm.test.ts`, `jtbd-fast-trivial-dm.test.ts`,
|
|
9
|
+
* `jtbd-soft-commit-dm.test.ts`) covers cadence / round-trip / pacing
|
|
10
|
+
* but does NOT pin the parse_mode contract. #1698 shipped to prod and
|
|
11
|
+
* the existing suite went green throughout — this scenario closes that
|
|
12
|
+
* blind spot.
|
|
13
|
+
*
|
|
14
|
+
* Method:
|
|
15
|
+
* 1. Ask the agent to send ONE reply with both <b> and <code> via
|
|
16
|
+
* the reply tool (default html format).
|
|
17
|
+
* 2. Dispatch a background bash so the turn ends with pending async.
|
|
18
|
+
* 3. End turn. Pending-progress activates.
|
|
19
|
+
* 4. After ~60-90s, observe the first edit. Assert text reads back
|
|
20
|
+
* WITHOUT literal `<b>` / `<code>` substrings (Telegram parsed
|
|
21
|
+
* under HTML, formatting moved to entities, mtcute Message.text
|
|
22
|
+
* returns plain prose). Pre-fix, parse_mode was dropped on the
|
|
23
|
+
* edit and the tags would render as literal characters.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import { describe, it, expect } from "vitest";
|
|
27
|
+
import { spinUp, type ObservedMessage } from "../harness.js";
|
|
28
|
+
|
|
29
|
+
const SLEEP_SECONDS = 90;
|
|
30
|
+
const OVERALL_DEADLINE_MS = 4 * 60_000;
|
|
31
|
+
|
|
32
|
+
const PROMPT =
|
|
33
|
+
`Please run \`sleep ${SLEEP_SECONDS}\` in the background using the ` +
|
|
34
|
+
`Bash tool with \`run_in_background: true\`. Send exactly ONE reply, ` +
|
|
35
|
+
`using the reply tool with default html format, containing this text ` +
|
|
36
|
+
`VERBATIM:\n\n` +
|
|
37
|
+
`<b>Worker dispatched.</b> Running <code>sleep ${SLEEP_SECONDS}</code> ` +
|
|
38
|
+
`in background.\n\n` +
|
|
39
|
+
`Do NOT send any other reply until the sleep finishes. Just dispatch ` +
|
|
40
|
+
`the bash, send that one HTML reply, end your turn. When it finishes ` +
|
|
41
|
+
`much later, reply with the single word "done".`;
|
|
42
|
+
|
|
43
|
+
const SUFFIX_RE = /\n\n— still working \(\d+m\)$/;
|
|
44
|
+
|
|
45
|
+
describe("uat: pending-progress edit preserves HTML formatting (#1698 regression gate)", () => {
|
|
46
|
+
it(
|
|
47
|
+
"first pending-progress edit reads back WITHOUT literal HTML tags",
|
|
48
|
+
async () => {
|
|
49
|
+
const sc = await spinUp({ agent: "test-harness" });
|
|
50
|
+
try {
|
|
51
|
+
const startedAt = Date.now();
|
|
52
|
+
await sc.sendDM(PROMPT);
|
|
53
|
+
|
|
54
|
+
let anchorMsgId: number | null = null;
|
|
55
|
+
let editText: string | null = null;
|
|
56
|
+
const deadline = startedAt + OVERALL_DEADLINE_MS;
|
|
57
|
+
|
|
58
|
+
while (Date.now() < deadline) {
|
|
59
|
+
try {
|
|
60
|
+
const msg = await sc.expectMessage(
|
|
61
|
+
(m: ObservedMessage) => m.fromBot,
|
|
62
|
+
{ from: "bot", timeout: deadline - Date.now() },
|
|
63
|
+
);
|
|
64
|
+
const rel = Date.now() - startedAt;
|
|
65
|
+
console.log(
|
|
66
|
+
`[jtbd-pending-progress-html] +${(rel / 1000).toFixed(1)}s ` +
|
|
67
|
+
`${msg.edited ? "EDIT" : "FRESH"} msg=${msg.messageId} ` +
|
|
68
|
+
`${JSON.stringify(msg.text.slice(0, 120))}`,
|
|
69
|
+
);
|
|
70
|
+
if (!msg.edited && anchorMsgId == null) {
|
|
71
|
+
anchorMsgId = msg.messageId;
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
if (
|
|
75
|
+
msg.edited &&
|
|
76
|
+
anchorMsgId === msg.messageId &&
|
|
77
|
+
SUFFIX_RE.test(msg.text)
|
|
78
|
+
) {
|
|
79
|
+
editText = msg.text;
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
} catch {
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
expect(
|
|
88
|
+
anchorMsgId,
|
|
89
|
+
"agent never sent its initial HTML reply — UAT env issue",
|
|
90
|
+
).not.toBeNull();
|
|
91
|
+
expect(
|
|
92
|
+
editText,
|
|
93
|
+
`no pending-progress edit observed within ${OVERALL_DEADLINE_MS / 1000}s — ` +
|
|
94
|
+
`model may not have dispatched async, or pending-progress is disabled`,
|
|
95
|
+
).not.toBeNull();
|
|
96
|
+
|
|
97
|
+
// ── THE #1698 REGRESSION GATE ─────────────────────────────────
|
|
98
|
+
// mtcute's Message.text returns the parsed text — formatting
|
|
99
|
+
// lives in `entities`. So a working parse_mode=HTML edit shows
|
|
100
|
+
// clean prose with no literal "<b>" / "<code>" substrings.
|
|
101
|
+
// Pre-fix the gateway dropped parse_mode on the cross-turn
|
|
102
|
+
// edit and Telegram stored the tags as plain characters.
|
|
103
|
+
expect(
|
|
104
|
+
editText,
|
|
105
|
+
`#1698 regression: pending-progress edit text contains literal "<b>" — ` +
|
|
106
|
+
`parse_mode was dropped and Telegram is storing the original HTML tags as plain.`,
|
|
107
|
+
).not.toContain("<b>");
|
|
108
|
+
expect(editText).not.toContain("</b>");
|
|
109
|
+
expect(editText).not.toContain("<code>");
|
|
110
|
+
expect(editText).not.toContain("</code>");
|
|
111
|
+
|
|
112
|
+
// Sanity — the model's prose is still visible (without tags).
|
|
113
|
+
expect(editText).toContain("Worker dispatched");
|
|
114
|
+
|
|
115
|
+
// Belt-and-braces — the suffix landed (proves the edit was
|
|
116
|
+
// pending-progress and not some other path).
|
|
117
|
+
expect(editText).toMatch(SUFFIX_RE);
|
|
118
|
+
} finally {
|
|
119
|
+
await sc.tearDown();
|
|
120
|
+
}
|
|
121
|
+
},
|
|
122
|
+
OVERALL_DEADLINE_MS + 30_000,
|
|
123
|
+
);
|
|
124
|
+
});
|