switchroom 0.14.65 → 0.14.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +454 -326
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +56 -18
- package/telegram-plugin/gateway/answer-thread-resolve.test.ts +85 -0
- package/telegram-plugin/gateway/answer-thread-resolve.ts +30 -4
- package/telegram-plugin/gateway/gateway.ts +132 -13
- package/telegram-plugin/silence-poke.ts +25 -0
- package/telegram-plugin/tests/multitopic-routing-wiring.test.ts +4 -2
- package/telegram-plugin/tests/silence-liveness-wiring.test.ts +67 -0
- package/telegram-plugin/tests/silence-poke.test.ts +42 -0
- package/telegram-plugin/uat/real-work-prompts.ts +332 -0
- package/telegram-plugin/uat/scenarios/fuzz-real-work-channel.test.ts +82 -0
- package/telegram-plugin/uat/scenarios/fuzz-real-work-dm.test.ts +64 -0
package/package.json
CHANGED
|
@@ -39034,6 +39034,13 @@ function noteOutbound2(key, now) {
|
|
|
39034
39034
|
s.lastOutboundAt = now;
|
|
39035
39035
|
s.fallbackFired = false;
|
|
39036
39036
|
}
|
|
39037
|
+
function noteProduction(key, now) {
|
|
39038
|
+
const s = state2.get(key);
|
|
39039
|
+
if (s == null)
|
|
39040
|
+
return;
|
|
39041
|
+
s.lastOutboundAt = now;
|
|
39042
|
+
s.fallbackFired = false;
|
|
39043
|
+
}
|
|
39037
39044
|
function noteThinking(key, now) {
|
|
39038
39045
|
const s = state2.get(key);
|
|
39039
39046
|
if (s == null)
|
|
@@ -47936,6 +47943,10 @@ function resolveAnswerThreadId(input) {
|
|
|
47936
47943
|
return input.explicitThreadId;
|
|
47937
47944
|
if (input.originResolved)
|
|
47938
47945
|
return input.originThreadId;
|
|
47946
|
+
if (input.liveThreadId != null)
|
|
47947
|
+
return input.liveThreadId;
|
|
47948
|
+
if (input.lastEndedResolvedForChat)
|
|
47949
|
+
return input.lastEndedThreadIdForChat;
|
|
47939
47950
|
return input.liveThreadId;
|
|
47940
47951
|
}
|
|
47941
47952
|
|
|
@@ -52759,11 +52770,11 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
52759
52770
|
}
|
|
52760
52771
|
|
|
52761
52772
|
// ../src/build-info.ts
|
|
52762
|
-
var VERSION = "0.14.
|
|
52763
|
-
var COMMIT_SHA = "
|
|
52764
|
-
var COMMIT_DATE = "2026-06-
|
|
52765
|
-
var LATEST_PR =
|
|
52766
|
-
var COMMITS_AHEAD_OF_TAG =
|
|
52773
|
+
var VERSION = "0.14.67";
|
|
52774
|
+
var COMMIT_SHA = "dcade213";
|
|
52775
|
+
var COMMIT_DATE = "2026-06-05T08:22:01Z";
|
|
52776
|
+
var LATEST_PR = 2171;
|
|
52777
|
+
var COMMITS_AHEAD_OF_TAG = 4;
|
|
52767
52778
|
|
|
52768
52779
|
// gateway/boot-version.ts
|
|
52769
52780
|
function formatRelativeAgo(iso) {
|
|
@@ -54061,6 +54072,33 @@ function findTurnByOriginId(originTurnId) {
|
|
|
54061
54072
|
return currentTurn;
|
|
54062
54073
|
return recentTurnsById.get(originTurnId) ?? null;
|
|
54063
54074
|
}
|
|
54075
|
+
var LATE_REPLY_TOPIC_RECOVERY_ENABLED = process.env.SWITCHROOM_LATE_REPLY_TOPIC_RECOVERY !== "0";
|
|
54076
|
+
function findLatestEndedTurnForChat(chatId) {
|
|
54077
|
+
let latest = null;
|
|
54078
|
+
for (const t of recentTurnsById.values()) {
|
|
54079
|
+
if (t.sessionChatId === chatId)
|
|
54080
|
+
latest = t;
|
|
54081
|
+
}
|
|
54082
|
+
return latest;
|
|
54083
|
+
}
|
|
54084
|
+
function resolveAnswerThreadWithLog(chatId, explicitThreadId, originTurn, liveTurn, surface) {
|
|
54085
|
+
const recovered = LATE_REPLY_TOPIC_RECOVERY_ENABLED && explicitThreadId == null && originTurn == null && liveTurn == null ? findLatestEndedTurnForChat(chatId) : null;
|
|
54086
|
+
const threadId = resolveAnswerThreadId({
|
|
54087
|
+
explicitThreadId,
|
|
54088
|
+
originResolved: originTurn != null,
|
|
54089
|
+
originThreadId: originTurn?.sessionThreadId,
|
|
54090
|
+
liveThreadId: liveTurn?.sessionThreadId,
|
|
54091
|
+
lastEndedResolvedForChat: recovered != null,
|
|
54092
|
+
lastEndedThreadIdForChat: recovered?.sessionThreadId
|
|
54093
|
+
});
|
|
54094
|
+
const via = explicitThreadId != null ? "explicit" : originTurn != null ? "origin" : liveTurn?.sessionThreadId != null ? "live" : recovered != null ? "recovered" : "none";
|
|
54095
|
+
const ownerTurn = originTurn ?? recovered ?? liveTurn;
|
|
54096
|
+
const isSupergroup = chatId.startsWith("-100");
|
|
54097
|
+
const unrouted = isSupergroup && threadId == null;
|
|
54098
|
+
process.stderr.write(`telegram gateway: reply-route surface=${surface} chat=${chatId} resolved_thread=${threadId ?? "-"} via=${via} late=${liveTurn == null} originTurn=${ownerTurn?.turnId ?? "-"} origin_thread=${ownerTurn?.sessionThreadId ?? "-"}` + (via === "recovered" ? " RECOVERED" : "") + (unrouted ? " UNROUTED(supergroup\u2192no-topic)" : "") + `
|
|
54099
|
+
`);
|
|
54100
|
+
return threadId;
|
|
54101
|
+
}
|
|
54064
54102
|
function closeObligationOnSubstantiveReply(args, liveTurn) {
|
|
54065
54103
|
if (!OBLIGATION_LEDGER_ENABLED)
|
|
54066
54104
|
return;
|
|
@@ -55229,6 +55267,7 @@ function parsePositiveMsEnv(name, fallbackMs) {
|
|
|
55229
55267
|
var SILENCE_FALLBACK_MS = parsePositiveMsEnv("SWITCHROOM_SILENCE_FALLBACK_MS", 300000);
|
|
55230
55268
|
var SILENCE_FALLBACK_HARD_MS = parsePositiveMsEnv("SWITCHROOM_SILENCE_FALLBACK_HARD_MS", 900000);
|
|
55231
55269
|
var SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS === "1";
|
|
55270
|
+
var SILENCE_LIVENESS_PRODUCTION = process.env.SWITCHROOM_SILENCE_LIVENESS_PRODUCTION !== "0";
|
|
55232
55271
|
startTimer({
|
|
55233
55272
|
thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS },
|
|
55234
55273
|
deferFallbackWhileToolInFlight: SILENCE_DEFER_INFLIGHT_TOOLS,
|
|
@@ -55320,8 +55359,11 @@ startTimer({
|
|
|
55320
55359
|
const sib = silenceMsForKey(siblingKey, fbNow);
|
|
55321
55360
|
return sib == null || sib >= DEFAULT_THRESHOLDS.fallback;
|
|
55322
55361
|
});
|
|
55323
|
-
if (turnMatchesFallback && currentTurn === wedgedTurn)
|
|
55362
|
+
if (turnMatchesFallback && currentTurn === wedgedTurn && wedgedTurn != null) {
|
|
55363
|
+
process.stderr.write(`telegram gateway: ${formatTurnLifecycle("clear", "silence_fallback", wedgedTurn, Date.now())}
|
|
55364
|
+
`);
|
|
55324
55365
|
currentTurn = null;
|
|
55366
|
+
}
|
|
55325
55367
|
try {
|
|
55326
55368
|
clearSilentEndState(fbKey);
|
|
55327
55369
|
} catch {}
|
|
@@ -56291,12 +56333,7 @@ ${url}`;
|
|
|
56291
56333
|
if (TURN_ORIGIN_ROUTING_ENABLED) {
|
|
56292
56334
|
const explicit = args.message_thread_id != null ? Number(args.message_thread_id) : undefined;
|
|
56293
56335
|
const originTurn = findTurnByOriginId(args.origin_turn_id);
|
|
56294
|
-
threadId =
|
|
56295
|
-
explicitThreadId: Number.isFinite(explicit) ? explicit : undefined,
|
|
56296
|
-
originResolved: originTurn != null,
|
|
56297
|
-
originThreadId: originTurn?.sessionThreadId,
|
|
56298
|
-
liveThreadId: turn?.sessionThreadId
|
|
56299
|
-
});
|
|
56336
|
+
threadId = resolveAnswerThreadWithLog(chat_id, Number.isFinite(explicit) ? explicit : undefined, originTurn, turn, "reply");
|
|
56300
56337
|
} else {
|
|
56301
56338
|
threadId = resolveThreadId(chat_id, args.message_thread_id ?? (turn?.sessionThreadId != null ? turn.sessionThreadId : undefined));
|
|
56302
56339
|
}
|
|
@@ -56656,12 +56693,7 @@ async function executeStreamReply(args) {
|
|
|
56656
56693
|
let injected;
|
|
56657
56694
|
if (TURN_ORIGIN_ROUTING_ENABLED) {
|
|
56658
56695
|
const originTurn = findTurnByOriginId(args.origin_turn_id);
|
|
56659
|
-
injected =
|
|
56660
|
-
explicitThreadId: undefined,
|
|
56661
|
-
originResolved: originTurn != null,
|
|
56662
|
-
originThreadId: originTurn?.sessionThreadId,
|
|
56663
|
-
liveThreadId: turn?.sessionThreadId
|
|
56664
|
-
});
|
|
56696
|
+
injected = resolveAnswerThreadWithLog(String(args.chat_id), undefined, originTurn, turn, "stream_reply");
|
|
56665
56697
|
} else {
|
|
56666
56698
|
injected = turn?.sessionThreadId;
|
|
56667
56699
|
}
|
|
@@ -58055,6 +58087,9 @@ function handleSessionEvent(ev) {
|
|
|
58055
58087
|
const rendered = appendActivityLabel(turn.mirrorLines, ev.label);
|
|
58056
58088
|
if (rendered != null) {
|
|
58057
58089
|
turn.lastToolLabelAt = Date.now();
|
|
58090
|
+
if (SILENCE_LIVENESS_PRODUCTION && currentTurn === turn) {
|
|
58091
|
+
noteProduction(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now());
|
|
58092
|
+
}
|
|
58058
58093
|
turn.activityPendingRender = composeTurnActivity(turn) ?? rendered;
|
|
58059
58094
|
if (turn.activityInFlight == null) {
|
|
58060
58095
|
turn.activityInFlight = drainActivitySummary(turn);
|
|
@@ -58109,6 +58144,9 @@ function handleSessionEvent(ev) {
|
|
|
58109
58144
|
logStreamingEvent(metricEv);
|
|
58110
58145
|
if (currentTurn === turn) {
|
|
58111
58146
|
noteSignal(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now());
|
|
58147
|
+
if (SILENCE_LIVENESS_PRODUCTION) {
|
|
58148
|
+
noteProduction(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now());
|
|
58149
|
+
}
|
|
58112
58150
|
}
|
|
58113
58151
|
},
|
|
58114
58152
|
checkDedup: (text) => {
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { resolveAnswerThreadId } from './answer-thread-resolve.js'
|
|
3
|
+
|
|
4
|
+
describe('resolveAnswerThreadId — precedence', () => {
|
|
5
|
+
it('(1) explicit model thread wins over everything', () => {
|
|
6
|
+
expect(
|
|
7
|
+
resolveAnswerThreadId({
|
|
8
|
+
explicitThreadId: 7,
|
|
9
|
+
originResolved: true,
|
|
10
|
+
originThreadId: 3,
|
|
11
|
+
liveThreadId: 4,
|
|
12
|
+
lastEndedResolvedForChat: true,
|
|
13
|
+
lastEndedThreadIdForChat: 9,
|
|
14
|
+
}),
|
|
15
|
+
).toBe(7)
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
it('(2) origin turn thread wins over the live turn (the Brevo→Meta fix)', () => {
|
|
19
|
+
expect(
|
|
20
|
+
resolveAnswerThreadId({ originResolved: true, originThreadId: 3, liveThreadId: 4 }),
|
|
21
|
+
).toBe(3)
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
it('(2) a DM origin (resolved, thread undefined) pins to undefined, not the live thread', () => {
|
|
25
|
+
expect(
|
|
26
|
+
resolveAnswerThreadId({ originResolved: true, originThreadId: undefined, liveThreadId: 4 }),
|
|
27
|
+
).toBeUndefined()
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('(3) no origin → falls back to the live turn thread (legacy #1664)', () => {
|
|
31
|
+
expect(
|
|
32
|
+
resolveAnswerThreadId({ originResolved: false, liveThreadId: 4 }),
|
|
33
|
+
).toBe(4)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
// ── tier (4): late-reply topic recovery (2026-06-05) ──────────────────────
|
|
37
|
+
it('(4) no explicit, no origin, NO live turn → recovers the most-recent ended turn thread', () => {
|
|
38
|
+
// The marko bug: a reply that fired after the orphaned-reply backstop ended
|
|
39
|
+
// its turn. Pre-fix this returned undefined (General); now it recovers topic 3.
|
|
40
|
+
expect(
|
|
41
|
+
resolveAnswerThreadId({
|
|
42
|
+
originResolved: false,
|
|
43
|
+
liveThreadId: undefined,
|
|
44
|
+
lastEndedResolvedForChat: true,
|
|
45
|
+
lastEndedThreadIdForChat: 3,
|
|
46
|
+
}),
|
|
47
|
+
).toBe(3)
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
it('(4) a recovered DM turn (ended, thread undefined) stays threadless', () => {
|
|
51
|
+
expect(
|
|
52
|
+
resolveAnswerThreadId({
|
|
53
|
+
originResolved: false,
|
|
54
|
+
liveThreadId: undefined,
|
|
55
|
+
lastEndedResolvedForChat: true,
|
|
56
|
+
lastEndedThreadIdForChat: undefined,
|
|
57
|
+
}),
|
|
58
|
+
).toBeUndefined()
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('(4) recovery does NOT override a live turn — live thread still wins at tier 3', () => {
|
|
62
|
+
expect(
|
|
63
|
+
resolveAnswerThreadId({
|
|
64
|
+
originResolved: false,
|
|
65
|
+
liveThreadId: 4,
|
|
66
|
+
lastEndedResolvedForChat: true,
|
|
67
|
+
lastEndedThreadIdForChat: 3,
|
|
68
|
+
}),
|
|
69
|
+
).toBe(4)
|
|
70
|
+
})
|
|
71
|
+
|
|
72
|
+
it('(4) no recovery candidate → legacy result (undefined), unchanged', () => {
|
|
73
|
+
expect(
|
|
74
|
+
resolveAnswerThreadId({
|
|
75
|
+
originResolved: false,
|
|
76
|
+
liveThreadId: undefined,
|
|
77
|
+
lastEndedResolvedForChat: false,
|
|
78
|
+
}),
|
|
79
|
+
).toBeUndefined()
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
it('pure DM (every tier undefined) → undefined', () => {
|
|
83
|
+
expect(resolveAnswerThreadId({ originResolved: false })).toBeUndefined()
|
|
84
|
+
})
|
|
85
|
+
})
|
|
@@ -26,10 +26,14 @@
|
|
|
26
26
|
* 3. Else the LIVE turn's thread — but ONLY when the live turn IS the
|
|
27
27
|
* origin turn (no flip happened) OR no origin turn could be resolved
|
|
28
28
|
* at all (origin id absent/unknown; legacy / pre-stamp path).
|
|
29
|
-
* 4. Else (
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
29
|
+
* 4. Else (no explicit, no origin echoed, no live turn) — a LATE reply that
|
|
30
|
+
* fired after its turn already ended (the orphaned-reply backstop case) —
|
|
31
|
+
* recover the origin topic from the most-recently-ended turn for this
|
|
32
|
+
* chat. Without this, such a reply defaults to the main chat (General in a
|
|
33
|
+
* supergroup) and its answer vanishes from the topic the user is reading
|
|
34
|
+
* (the 2026-06-05 marko triage). Still NOT the `chatThreadMap` last-seen
|
|
35
|
+
* heuristic — the recovered turn is the chat's own most-recent turn, not
|
|
36
|
+
* whichever topic last received any message.
|
|
33
37
|
*
|
|
34
38
|
* The `chatThreadMap` last-seen fallback is preserved for NON-answer
|
|
35
39
|
* surfaces (`send_typing`, `forward_message`, `progress_update`) by NOT
|
|
@@ -53,6 +57,20 @@ export interface AnswerThreadInput {
|
|
|
53
57
|
* (no live turn, or a DM live turn). The legacy (#1664) fallback when
|
|
54
58
|
* no origin turn is resolvable. */
|
|
55
59
|
liveThreadId?: number | undefined
|
|
60
|
+
/**
|
|
61
|
+
* Late-reply topic recovery (2026-06-05). Thread of the most-recently-ended
|
|
62
|
+
* turn for THIS chat (from `recentTurnsById`), used as a deterministic
|
|
63
|
+
* fallback when the model echoed no `origin_turn_id` AND there is no live
|
|
64
|
+
* turn — the late-reply-after-turn-end case. Without it, a reply that fires
|
|
65
|
+
* after the orphaned-reply backstop closed its turn defaults to the main chat
|
|
66
|
+
* (General topic in a supergroup), so its answer vanishes from the topic the
|
|
67
|
+
* user is reading. Only consulted at tier (4); a DM origin yields undefined,
|
|
68
|
+
* which is correct.
|
|
69
|
+
*/
|
|
70
|
+
lastEndedThreadIdForChat?: number | undefined
|
|
71
|
+
/** Whether a recently-ended turn exists for this chat — distinguishes
|
|
72
|
+
* "ended turn exists, DM (thread undefined)" from "no ended turn at all". */
|
|
73
|
+
lastEndedResolvedForChat?: boolean
|
|
56
74
|
}
|
|
57
75
|
|
|
58
76
|
/**
|
|
@@ -75,5 +93,13 @@ export function resolveAnswerThreadId(input: AnswerThreadInput): number | undefi
|
|
|
75
93
|
if (input.originResolved) return input.originThreadId
|
|
76
94
|
// (3) no origin resolved (legacy / pre-stamp / evicted) → fall back to
|
|
77
95
|
// the live turn's thread, the existing turn-pinned behaviour (#1664).
|
|
96
|
+
if (input.liveThreadId != null) return input.liveThreadId
|
|
97
|
+
// (4) no explicit, no origin echoed, no live turn — a LATE reply that fired
|
|
98
|
+
// after its turn already ended (the orphaned-reply backstop case).
|
|
99
|
+
// Recover the origin topic from the most-recently-ended turn for this
|
|
100
|
+
// chat so the answer lands in the topic it belongs to instead of
|
|
101
|
+
// defaulting to the main chat (General). When no ended turn is known,
|
|
102
|
+
// fall through to liveThreadId (undefined) — the legacy result.
|
|
103
|
+
if (input.lastEndedResolvedForChat) return input.lastEndedThreadIdForChat
|
|
78
104
|
return input.liveThreadId
|
|
79
105
|
}
|
|
@@ -1888,6 +1888,89 @@ function findTurnByOriginId(originTurnId: string | null | undefined): CurrentTur
|
|
|
1888
1888
|
return recentTurnsById.get(originTurnId) ?? null
|
|
1889
1889
|
}
|
|
1890
1890
|
|
|
1891
|
+
// Late-reply topic recovery (2026-06-05 marko triage). Default ON; kill switch
|
|
1892
|
+
// SWITCHROOM_LATE_REPLY_TOPIC_RECOVERY=0 restores the legacy behaviour (a late
|
|
1893
|
+
// reply with no echoed origin and no live turn defaults to General).
|
|
1894
|
+
const LATE_REPLY_TOPIC_RECOVERY_ENABLED =
|
|
1895
|
+
process.env.SWITCHROOM_LATE_REPLY_TOPIC_RECOVERY !== '0'
|
|
1896
|
+
|
|
1897
|
+
/**
|
|
1898
|
+
* The most-recently-started turn for a chat from the bounded recently-ended
|
|
1899
|
+
* registry — the deterministic fallback for a LATE answer reply when the model
|
|
1900
|
+
* echoed no `origin_turn_id` and `currentTurn` has already cleared. Iterates in
|
|
1901
|
+
* insertion order so the last match is the most recent turn for that chat.
|
|
1902
|
+
* Returns null when the chat has no remembered turn (so the caller keeps the
|
|
1903
|
+
* legacy result). NB: this is the chat's own most-recent TURN, not the
|
|
1904
|
+
* `chatThreadMap` last-seen-any-message heuristic that caused the wrong-topic
|
|
1905
|
+
* bug — a late reply almost always belongs to the turn that just ended.
|
|
1906
|
+
*/
|
|
1907
|
+
function findLatestEndedTurnForChat(chatId: string): CurrentTurn | null {
|
|
1908
|
+
let latest: CurrentTurn | null = null
|
|
1909
|
+
for (const t of recentTurnsById.values()) {
|
|
1910
|
+
if (t.sessionChatId === chatId) latest = t
|
|
1911
|
+
}
|
|
1912
|
+
return latest
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1915
|
+
/**
|
|
1916
|
+
* Resolve the answer-reply thread AND emit `reply-route` telemetry. The
|
|
1917
|
+
* 2026-06-05 triage showed reply routing was the blind spot: `reply: invoked`
|
|
1918
|
+
* logged only chat + char count, so a late reply landing in the wrong topic was
|
|
1919
|
+
* invisible without hand-correlating raw tg-post threads against turn-lifecycle
|
|
1920
|
+
* timestamps. This wrapper logs, per reply: which precedence tier won (`via`),
|
|
1921
|
+
* the resolved thread, the origin turn + its thread, and whether the reply was
|
|
1922
|
+
* late (turn already ended). `via=recovered` marks a late reply this fix saved
|
|
1923
|
+
* from General; `UNROUTED` flags a supergroup reply that still resolved to no
|
|
1924
|
+
* topic (the residual gap to watch).
|
|
1925
|
+
*/
|
|
1926
|
+
function resolveAnswerThreadWithLog(
|
|
1927
|
+
chatId: string,
|
|
1928
|
+
explicitThreadId: number | undefined,
|
|
1929
|
+
originTurn: CurrentTurn | null,
|
|
1930
|
+
liveTurn: CurrentTurn | null,
|
|
1931
|
+
surface: 'reply' | 'stream_reply',
|
|
1932
|
+
): number | undefined {
|
|
1933
|
+
// Recover ONLY for a genuinely LATE reply — no live turn at all. Gating on
|
|
1934
|
+
// `liveTurn?.sessionThreadId == null` (the original) also fired for a
|
|
1935
|
+
// threadless DM that still had a live turn, marking every DM reply
|
|
1936
|
+
// `via=recovered`/RECOVERED in the telemetry (routing result unchanged —
|
|
1937
|
+
// DM → undefined — but it drowned the real supergroup recoveries the marker
|
|
1938
|
+
// exists to surface). `liveTurn == null` is the precise late-reply condition.
|
|
1939
|
+
const recovered =
|
|
1940
|
+
LATE_REPLY_TOPIC_RECOVERY_ENABLED &&
|
|
1941
|
+
explicitThreadId == null &&
|
|
1942
|
+
originTurn == null &&
|
|
1943
|
+
liveTurn == null
|
|
1944
|
+
? findLatestEndedTurnForChat(chatId)
|
|
1945
|
+
: null
|
|
1946
|
+
const threadId = resolveAnswerThreadId({
|
|
1947
|
+
explicitThreadId,
|
|
1948
|
+
originResolved: originTurn != null,
|
|
1949
|
+
originThreadId: originTurn?.sessionThreadId,
|
|
1950
|
+
liveThreadId: liveTurn?.sessionThreadId,
|
|
1951
|
+
lastEndedResolvedForChat: recovered != null,
|
|
1952
|
+
lastEndedThreadIdForChat: recovered?.sessionThreadId,
|
|
1953
|
+
})
|
|
1954
|
+
const via =
|
|
1955
|
+
explicitThreadId != null ? 'explicit'
|
|
1956
|
+
: originTurn != null ? 'origin'
|
|
1957
|
+
: liveTurn?.sessionThreadId != null ? 'live'
|
|
1958
|
+
: recovered != null ? 'recovered'
|
|
1959
|
+
: 'none'
|
|
1960
|
+
const ownerTurn = originTurn ?? recovered ?? liveTurn
|
|
1961
|
+
const isSupergroup = chatId.startsWith('-100')
|
|
1962
|
+
const unrouted = isSupergroup && threadId == null
|
|
1963
|
+
process.stderr.write(
|
|
1964
|
+
`telegram gateway: reply-route surface=${surface} chat=${chatId} ` +
|
|
1965
|
+
`resolved_thread=${threadId ?? '-'} via=${via} late=${liveTurn == null} ` +
|
|
1966
|
+
`originTurn=${ownerTurn?.turnId ?? '-'} origin_thread=${ownerTurn?.sessionThreadId ?? '-'}` +
|
|
1967
|
+
(via === 'recovered' ? ' RECOVERED' : '') +
|
|
1968
|
+
(unrouted ? ' UNROUTED(supergroup→no-topic)' : '') +
|
|
1969
|
+
'\n',
|
|
1970
|
+
)
|
|
1971
|
+
return threadId
|
|
1972
|
+
}
|
|
1973
|
+
|
|
1891
1974
|
/**
|
|
1892
1975
|
* PR2 obligation-ledger CLOSE. Called when a SUBSTANTIVE final answer lands
|
|
1893
1976
|
* (not a bare interim ack — using finalAnswerSubstantive, the #2141 signal): the
|
|
@@ -4596,6 +4679,12 @@ function parsePositiveMsEnv(name: string, fallbackMs: number): number {
|
|
|
4596
4679
|
const SILENCE_FALLBACK_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_MS', 300_000)
|
|
4597
4680
|
const SILENCE_FALLBACK_HARD_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_HARD_MS', 900_000)
|
|
4598
4681
|
const SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS === '1'
|
|
4682
|
+
// Production-liveness (2026-06-05 UAT finding). Count an activity-feed render or
|
|
4683
|
+
// an answer-stream draft update as liveness for the silence clock, so a long
|
|
4684
|
+
// tool/composition turn that's visibly producing doesn't trip the 300s fallback
|
|
4685
|
+
// and null currentTurn mid-work. Default ON; SWITCHROOM_SILENCE_LIVENESS_PRODUCTION=0
|
|
4686
|
+
// restores the legacy "only a real reply resets the clock" behaviour.
|
|
4687
|
+
const SILENCE_LIVENESS_PRODUCTION = process.env.SWITCHROOM_SILENCE_LIVENESS_PRODUCTION !== '0'
|
|
4599
4688
|
|
|
4600
4689
|
silencePoke.startTimer({
|
|
4601
4690
|
thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS },
|
|
@@ -4812,7 +4901,16 @@ silencePoke.startTimer({
|
|
|
4812
4901
|
// returns null and the regular teardown short-circuits. Without
|
|
4813
4902
|
// this, the late event would re-emit `turn_ended` AND clobber
|
|
4814
4903
|
// whatever fresh turn the next inbound started.
|
|
4815
|
-
if (turnMatchesFallback && currentTurn === wedgedTurn
|
|
4904
|
+
if (turnMatchesFallback && currentTurn === wedgedTurn && wedgedTurn != null) {
|
|
4905
|
+
// Status-surface observability: emit the lifecycle CLEAR for the
|
|
4906
|
+
// silence-poke teardown so a fallback-nulled turn has a turn-lifecycle
|
|
4907
|
+
// line like every other clear path (the framework-fallback line below is
|
|
4908
|
+
// its own format — this makes the dark-out greppable in the same shape).
|
|
4909
|
+
process.stderr.write(
|
|
4910
|
+
`telegram gateway: ${formatTurnLifecycle('clear', 'silence_fallback', wedgedTurn, Date.now())}\n`,
|
|
4911
|
+
)
|
|
4912
|
+
currentTurn = null
|
|
4913
|
+
}
|
|
4816
4914
|
// Best-effort: clear any pending silent-end marker so the Stop hook
|
|
4817
4915
|
// doesn't double-block when claude eventually exits the wedged turn.
|
|
4818
4916
|
try {
|
|
@@ -6522,12 +6620,13 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
6522
6620
|
if (TURN_ORIGIN_ROUTING_ENABLED) {
|
|
6523
6621
|
const explicit = args.message_thread_id != null ? Number(args.message_thread_id) : undefined
|
|
6524
6622
|
const originTurn = findTurnByOriginId(args.origin_turn_id as string | undefined)
|
|
6525
|
-
threadId =
|
|
6526
|
-
|
|
6527
|
-
|
|
6528
|
-
|
|
6529
|
-
|
|
6530
|
-
|
|
6623
|
+
threadId = resolveAnswerThreadWithLog(
|
|
6624
|
+
chat_id,
|
|
6625
|
+
Number.isFinite(explicit as number) ? (explicit as number) : undefined,
|
|
6626
|
+
originTurn,
|
|
6627
|
+
turn,
|
|
6628
|
+
'reply',
|
|
6629
|
+
)
|
|
6531
6630
|
} else {
|
|
6532
6631
|
threadId = resolveThreadId(
|
|
6533
6632
|
chat_id,
|
|
@@ -7178,12 +7277,13 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
|
|
|
7178
7277
|
let injected: number | undefined
|
|
7179
7278
|
if (TURN_ORIGIN_ROUTING_ENABLED) {
|
|
7180
7279
|
const originTurn = findTurnByOriginId(args.origin_turn_id as string | undefined)
|
|
7181
|
-
injected =
|
|
7182
|
-
|
|
7183
|
-
|
|
7184
|
-
|
|
7185
|
-
|
|
7186
|
-
|
|
7280
|
+
injected = resolveAnswerThreadWithLog(
|
|
7281
|
+
String(args.chat_id),
|
|
7282
|
+
undefined,
|
|
7283
|
+
originTurn,
|
|
7284
|
+
turn,
|
|
7285
|
+
'stream_reply',
|
|
7286
|
+
)
|
|
7187
7287
|
} else {
|
|
7188
7288
|
injected = turn?.sessionThreadId
|
|
7189
7289
|
}
|
|
@@ -9373,6 +9473,16 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
9373
9473
|
// the " · Ns" elapsed restarts from this step (and the feed itself just
|
|
9374
9474
|
// advanced, so it isn't stale).
|
|
9375
9475
|
turn.lastToolLabelAt = Date.now()
|
|
9476
|
+
// Production-liveness: a NEW model-driven activity label is genuine
|
|
9477
|
+
// liveness (the model emitted a new step), so reset the silence-poke
|
|
9478
|
+
// clock — this is the safe site, NOT drainActivitySummary, because the
|
|
9479
|
+
// framework feedHeartbeatTick also drains (climbing-elapsed re-renders)
|
|
9480
|
+
// and would falsely reset the clock forever on a hung-mid-tool turn,
|
|
9481
|
+
// reintroducing the #1556 dangling-turn wedge. Only the model emitting a
|
|
9482
|
+
// fresh label reaches here.
|
|
9483
|
+
if (SILENCE_LIVENESS_PRODUCTION && currentTurn === turn) {
|
|
9484
|
+
silencePoke.noteProduction(statusKey(turn.sessionChatId, turn.sessionThreadId), Date.now())
|
|
9485
|
+
}
|
|
9376
9486
|
// Recompose so any active foreground sub-agent's nested block (Model A)
|
|
9377
9487
|
// is preserved when the parent appends its own step. composeTurnActivity
|
|
9378
9488
|
// == the flat render when no foreground sub-agent is active.
|
|
@@ -9533,6 +9643,15 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
9533
9643
|
statusKey(turn.sessionChatId, turn.sessionThreadId),
|
|
9534
9644
|
Date.now(),
|
|
9535
9645
|
)
|
|
9646
|
+
// Production-liveness: a draft update is the agent visibly
|
|
9647
|
+
// composing — reset the silence-poke clock so a long
|
|
9648
|
+
// compose-only turn (no tools, no reply yet) isn't torn down.
|
|
9649
|
+
if (SILENCE_LIVENESS_PRODUCTION) {
|
|
9650
|
+
silencePoke.noteProduction(
|
|
9651
|
+
statusKey(turn.sessionChatId, turn.sessionThreadId),
|
|
9652
|
+
Date.now(),
|
|
9653
|
+
)
|
|
9654
|
+
}
|
|
9536
9655
|
}
|
|
9537
9656
|
},
|
|
9538
9657
|
// #646 — wire the shared outboundDedup into the answer-stream
|
|
@@ -196,6 +196,31 @@ export function noteOutbound(key: string, now: number): void {
|
|
|
196
196
|
s.fallbackFired = false
|
|
197
197
|
}
|
|
198
198
|
|
|
199
|
+
/**
|
|
200
|
+
* Record observable PRODUCTION that isn't a final reply — an activity-feed
|
|
201
|
+
* render (`→/✓` edit-in-place message) or an answer-stream draft update. Resets
|
|
202
|
+
* the silence clock exactly like a reply.
|
|
203
|
+
*
|
|
204
|
+
* Why this exists (2026-06-05): the header's "only a real reply counts; tool
|
|
205
|
+
* churn / the model ripping through 20 tool calls is still SILENT to the user"
|
|
206
|
+
* rule predates the live activity feed (#2162) and the compose draft. Those
|
|
207
|
+
* surfaces ARE user-visible now, so a turn actively rendering them is NOT
|
|
208
|
+
* silent — yet the 300s fallback (which nulls `currentTurn` and kills the very
|
|
209
|
+
* feed/draft the user is watching) still fired on a long tool/composition turn,
|
|
210
|
+
* darkening the live status mid-work. Counting production as liveness makes the
|
|
211
|
+
* fallback fire only on GENUINE silence (no reply, no feed, no draft, no tool
|
|
212
|
+
* events for the window) — a real wedge. A wedged agent produces nothing
|
|
213
|
+
* observable, so its clock is never reset and it still recovers.
|
|
214
|
+
*
|
|
215
|
+
* No-op when the kill switch is on or the key has no turn.
|
|
216
|
+
*/
|
|
217
|
+
export function noteProduction(key: string, now: number): void {
|
|
218
|
+
const s = state.get(key)
|
|
219
|
+
if (s == null) return
|
|
220
|
+
s.lastOutboundAt = now
|
|
221
|
+
s.fallbackFired = false
|
|
222
|
+
}
|
|
223
|
+
|
|
199
224
|
/**
|
|
200
225
|
* Record a `thinking` session event. Used to pick "still thinking…" vs
|
|
201
226
|
* "still working…" wording for the 300s framework fallback.
|
|
@@ -45,13 +45,15 @@ describe('component 3 — turn-origin reply routing', () => {
|
|
|
45
45
|
const fn = gatewaySrc.split('async function executeReply')[1]?.split('\nasync function ')[0] ?? ''
|
|
46
46
|
expect(fn).toMatch(/TURN_ORIGIN_ROUTING_ENABLED/)
|
|
47
47
|
expect(fn).toMatch(/findTurnByOriginId\(args\.origin_turn_id/)
|
|
48
|
-
|
|
48
|
+
// The resolution + reply-route telemetry go through resolveAnswerThreadWithLog,
|
|
49
|
+
// which calls the pure resolveAnswerThreadId internally (incl. tier-4 recovery).
|
|
50
|
+
expect(fn).toMatch(/resolveAnswerThread\w*\(/)
|
|
49
51
|
})
|
|
50
52
|
|
|
51
53
|
it('executeStreamReply resolves the answer thread via the origin turn too', () => {
|
|
52
54
|
const fn = gatewaySrc.split('async function executeStreamReply')[1]?.split('\nasync function ')[0] ?? ''
|
|
53
55
|
expect(fn).toMatch(/findTurnByOriginId\(args\.origin_turn_id/)
|
|
54
|
-
expect(fn).toMatch(/
|
|
56
|
+
expect(fn).toMatch(/resolveAnswerThread\w*\(/)
|
|
55
57
|
})
|
|
56
58
|
|
|
57
59
|
it('the reply + stream_reply tool schemas expose origin_turn_id to the model', () => {
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Silence-poke production-liveness — heartbeat-safety guard (2026-06-05).
|
|
3
|
+
*
|
|
4
|
+
* The production-liveness fix resets the silence clock on observable production
|
|
5
|
+
* so a long WORKING turn doesn't dark out. The load-bearing constraint: the
|
|
6
|
+
* reset must fire ONLY on MODEL-driven production, NEVER from the framework
|
|
7
|
+
* `feedHeartbeatTick` — a model-INDEPENDENT setInterval that re-renders a
|
|
8
|
+
* climbing " · Ns" elapsed every 6s (defeating the feed's content-dedup). If the
|
|
9
|
+
* reset lived in `drainActivitySummary` (which the heartbeat drains), a
|
|
10
|
+
* hung-but-bridge-connected agent would have its 300s silence clock reset every
|
|
11
|
+
* 6s forever, the load-bearing silence-poke unwedge would NEVER fire, and the
|
|
12
|
+
* conversation would be pinned — the #1556 permanent dangling-turn wedge.
|
|
13
|
+
*
|
|
14
|
+
* An adversarial review panel caught exactly this in an earlier revision. These
|
|
15
|
+
* are STRUCTURAL assertions (the gateway IIFE can't be instantiated in-process —
|
|
16
|
+
* same pattern as multitopic-routing-wiring.test) that pin the reset to the
|
|
17
|
+
* model-driven sites so a refactor can't silently reintroduce the regression.
|
|
18
|
+
* The behavioural counterpart (noteProduction resets; STOP producing → fires)
|
|
19
|
+
* lives in silence-poke.test.ts; this guards the WIRING the heartbeat must not
|
|
20
|
+
* cross.
|
|
21
|
+
*/
|
|
22
|
+
import { describe, it, expect } from 'vitest'
|
|
23
|
+
import { readFileSync } from 'node:fs'
|
|
24
|
+
import { resolve } from 'node:path'
|
|
25
|
+
|
|
26
|
+
const gatewaySrc = readFileSync(resolve(__dirname, '..', 'gateway', 'gateway.ts'), 'utf-8')
|
|
27
|
+
|
|
28
|
+
function between(src: string, startMarker: string, endMarker: string): string {
|
|
29
|
+
const after = src.split(startMarker)[1] ?? ''
|
|
30
|
+
return after.split(endMarker)[0] ?? ''
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
describe('silence-poke production-liveness — heartbeat safety', () => {
|
|
34
|
+
it('drainActivitySummary must NOT reset the silence clock (the framework heartbeat drains here)', () => {
|
|
35
|
+
const body = between(gatewaySrc, 'async function drainActivitySummary', '\nfunction feedHeartbeatTick')
|
|
36
|
+
expect(body.length).toBeGreaterThan(100) // sanity: the slice found the function body
|
|
37
|
+
expect(body).not.toMatch(/noteProduction/)
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
it('feedHeartbeatTick itself must NOT reset the silence clock (model-independent re-render)', () => {
|
|
41
|
+
const body = between(gatewaySrc, 'function feedHeartbeatTick(): void {', '\n}')
|
|
42
|
+
expect(body.length).toBeGreaterThan(50)
|
|
43
|
+
expect(body).not.toMatch(/noteProduction/)
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
it('the MODEL-driven tool-label append IS the reset site, gated on the live turn', () => {
|
|
47
|
+
// appendActivityLabel returns a fresh render only when the model emits a NEW
|
|
48
|
+
// labelled step — the genuine liveness signal the heartbeat can never forge.
|
|
49
|
+
const block = between(
|
|
50
|
+
gatewaySrc,
|
|
51
|
+
'const rendered = appendActivityLabel(turn.mirrorLines, ev.label)',
|
|
52
|
+
'\n return',
|
|
53
|
+
)
|
|
54
|
+
expect(block).toMatch(/silencePoke\.noteProduction/)
|
|
55
|
+
expect(block).toMatch(/currentTurn === turn/)
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
it('the answer-stream draft onMetric reset is model-driven and gated on the live turn', () => {
|
|
59
|
+
const block = between(gatewaySrc, 'onMetric: (metricEv) => {', '\n },')
|
|
60
|
+
expect(block).toMatch(/silencePoke\.noteProduction/)
|
|
61
|
+
expect(block).toMatch(/currentTurn === turn/)
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
it('production-liveness is behind the default-ON SWITCHROOM_SILENCE_LIVENESS_PRODUCTION kill switch', () => {
|
|
65
|
+
expect(gatewaySrc).toMatch(/SWITCHROOM_SILENCE_LIVENESS_PRODUCTION !== '0'/)
|
|
66
|
+
})
|
|
67
|
+
})
|
|
@@ -2,6 +2,7 @@ import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
|
2
2
|
import {
|
|
3
3
|
startTurn,
|
|
4
4
|
noteOutbound,
|
|
5
|
+
noteProduction,
|
|
5
6
|
noteThinking,
|
|
6
7
|
noteToolStart,
|
|
7
8
|
noteToolEnd,
|
|
@@ -136,6 +137,47 @@ describe('silence-poke — outbound resets the silence clock', () => {
|
|
|
136
137
|
})
|
|
137
138
|
})
|
|
138
139
|
|
|
140
|
+
// Production-liveness (2026-06-05): an activity-feed render or draft update is
|
|
141
|
+
// the agent visibly working — it resets the silence clock so a long
|
|
142
|
+
// tool/composition turn isn't torn down mid-work.
|
|
143
|
+
describe('silence-poke — noteProduction resets the silence clock', () => {
|
|
144
|
+
it('a feed/draft render at 250s pushes the fallback measurement to it', () => {
|
|
145
|
+
const fx = setupDeps()
|
|
146
|
+
startTurn('k', 0)
|
|
147
|
+
noteProduction('k', 250_000)
|
|
148
|
+
__tickForTests(300_000) // 50s since production — no fire
|
|
149
|
+
expect(fx.fallbacks).toHaveLength(0)
|
|
150
|
+
__tickForTests(550_000) // 300s since production — fires
|
|
151
|
+
expect(fx.fallbacks).toHaveLength(1)
|
|
152
|
+
})
|
|
153
|
+
|
|
154
|
+
it('repeated production every 60s keeps a long turn alive indefinitely', () => {
|
|
155
|
+
const fx = setupDeps()
|
|
156
|
+
startTurn('k', 0)
|
|
157
|
+
for (let t = 60_000; t <= 600_000; t += 60_000) {
|
|
158
|
+
noteProduction('k', t)
|
|
159
|
+
__tickForTests(t)
|
|
160
|
+
}
|
|
161
|
+
// 10 min of steady feed/draft renders — never torn down.
|
|
162
|
+
expect(fx.fallbacks).toHaveLength(0)
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
it('production STOPS → the fallback fires 300s after the last render (genuine wedge)', () => {
|
|
166
|
+
const fx = setupDeps()
|
|
167
|
+
startTurn('k', 0)
|
|
168
|
+
noteProduction('k', 100_000) // last render at 100s, then silence
|
|
169
|
+
__tickForTests(390_000) // 290s since last render — no fire
|
|
170
|
+
expect(fx.fallbacks).toHaveLength(0)
|
|
171
|
+
__tickForTests(401_000) // 301s since last render — fires
|
|
172
|
+
expect(fx.fallbacks).toHaveLength(1)
|
|
173
|
+
})
|
|
174
|
+
|
|
175
|
+
it('is a no-op for an unknown key (no turn state)', () => {
|
|
176
|
+
setupDeps()
|
|
177
|
+
expect(() => noteProduction('nope', 1_000)).not.toThrow()
|
|
178
|
+
})
|
|
179
|
+
})
|
|
180
|
+
|
|
139
181
|
// Pin the contract the gateway must uphold for ABNORMAL turn-ends:
|
|
140
182
|
// every code path that abandons a turn before turn_end (context-
|
|
141
183
|
// exhaust bail, gateway-side wedge timeout, silent-end recovery)
|