switchroom 0.14.63 → 0.14.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +77 -9
- package/telegram-plugin/gateway/gateway.ts +93 -12
- package/telegram-plugin/gateway/source-message-id.test.ts +32 -0
- package/telegram-plugin/gateway/source-message-id.ts +41 -0
- package/telegram-plugin/gateway/status-surface-log.test.ts +98 -0
- package/telegram-plugin/gateway/status-surface-log.ts +102 -0
- package/telegram-plugin/silence-poke.ts +47 -0
- package/telegram-plugin/tests/silence-poke.test.ts +69 -1
- package/telegram-plugin/tests/worker-activity-feed.test.ts +61 -0
- package/telegram-plugin/worker-activity-feed.ts +15 -0
package/dist/cli/switchroom.js
CHANGED
|
@@ -49452,8 +49452,8 @@ var {
|
|
|
49452
49452
|
} = import__.default;
|
|
49453
49453
|
|
|
49454
49454
|
// src/build-info.ts
|
|
49455
|
-
var VERSION = "0.14.
|
|
49456
|
-
var COMMIT_SHA = "
|
|
49455
|
+
var VERSION = "0.14.65";
|
|
49456
|
+
var COMMIT_SHA = "ff6f75c4";
|
|
49457
49457
|
|
|
49458
49458
|
// src/cli/agent.ts
|
|
49459
49459
|
init_source();
|
package/package.json
CHANGED
|
@@ -32622,6 +32622,7 @@ function createWorkerActivityFeed(opts) {
|
|
|
32622
32622
|
h.messageId = sent.message_id;
|
|
32623
32623
|
h.lastBody = body;
|
|
32624
32624
|
h.lastEditAt = nowFn();
|
|
32625
|
+
log(`worker-feed: paint agent=${h.agentId} chat=${h.chatId} ` + `thread=${h.threadId ?? "-"} msgId=${h.messageId} bytes=${body.length}`);
|
|
32625
32626
|
} catch (err) {
|
|
32626
32627
|
noteRateLimited(h, err, "send");
|
|
32627
32628
|
log(`worker-feed: send failed: ${err.message}`);
|
|
@@ -32636,6 +32637,7 @@ function createWorkerActivityFeed(opts) {
|
|
|
32636
32637
|
await opts.bot.editMessageText(h.chatId, h.messageId, body, sendOptsFor(h));
|
|
32637
32638
|
h.lastBody = body;
|
|
32638
32639
|
h.lastEditAt = nowFn();
|
|
32640
|
+
log(`worker-feed: edit agent=${h.agentId} chat=${h.chatId} ` + `thread=${h.threadId ?? "-"} msgId=${h.messageId} bytes=${body.length}`);
|
|
32639
32641
|
} catch (err) {
|
|
32640
32642
|
noteRateLimited(h, err, "edit");
|
|
32641
32643
|
log(`worker-feed: edit failed, will re-post: ${err.message}`);
|
|
@@ -32656,6 +32658,7 @@ function createWorkerActivityFeed(opts) {
|
|
|
32656
32658
|
await opts.bot.editMessageText(h.chatId, h.messageId, body, sendOptsFor(h));
|
|
32657
32659
|
h.lastBody = body;
|
|
32658
32660
|
h.lastEditAt = nowFn();
|
|
32661
|
+
log(`worker-feed: finish agent=${h.agentId} chat=${h.chatId} ` + `thread=${h.threadId ?? "-"} msgId=${h.messageId} state=${view.state} bytes=${body.length}`);
|
|
32659
32662
|
} catch (err) {
|
|
32660
32663
|
noteRateLimited(h, err, "finish");
|
|
32661
32664
|
log(`worker-feed: finish edit failed: ${err.message}`);
|
|
@@ -32674,6 +32677,7 @@ function createWorkerActivityFeed(opts) {
|
|
|
32674
32677
|
let h = handles.get(agentId);
|
|
32675
32678
|
if (h == null) {
|
|
32676
32679
|
h = {
|
|
32680
|
+
agentId,
|
|
32677
32681
|
chatId,
|
|
32678
32682
|
threadId,
|
|
32679
32683
|
messageId: null,
|
|
@@ -32708,6 +32712,38 @@ function createWorkerActivityFeed(opts) {
|
|
|
32708
32712
|
};
|
|
32709
32713
|
}
|
|
32710
32714
|
|
|
32715
|
+
// gateway/status-surface-log.ts
|
|
32716
|
+
function formatTurnLifecycle(action, reason, t, now) {
|
|
32717
|
+
const ageMs = action === "clear" ? Math.max(0, now - t.startedAt) : 0;
|
|
32718
|
+
return `turn-lifecycle ${action} reason=${reason} turnId=${t.turnId} ` + `chat=${t.sessionChatId} thread=${t.sessionThreadId ?? "-"} ` + `tools=${t.toolCallCount} activityMsgId=${t.activityMessageId ?? "none"} ` + `feedOpened=${t.activityEverOpened} drainFailures=${t.activityDrainFailures} ` + `replyCalled=${t.replyCalled} finalAnswer=${t.finalAnswerDelivered} age_ms=${ageMs}`;
|
|
32719
|
+
}
|
|
32720
|
+
function detectStatusSurfaceDegraded(t) {
|
|
32721
|
+
if (t.toolCallCount === 0)
|
|
32722
|
+
return null;
|
|
32723
|
+
if (t.activityEverOpened)
|
|
32724
|
+
return null;
|
|
32725
|
+
if (t.activityDrainFailures === 0)
|
|
32726
|
+
return null;
|
|
32727
|
+
return {
|
|
32728
|
+
reason: "feed-never-opened",
|
|
32729
|
+
detail: `tools=${t.toolCallCount} drainFailures=${t.activityDrainFailures} ` + `activityMsgId=none \u2014 the live activity feed failed every send this turn ` + `(card was dark despite tool work)`
|
|
32730
|
+
};
|
|
32731
|
+
}
|
|
32732
|
+
|
|
32733
|
+
// gateway/source-message-id.ts
|
|
32734
|
+
var MAX_TELEGRAM_MESSAGE_ID = 2 ** 31;
|
|
32735
|
+
function parseSourceMessageId(raw) {
|
|
32736
|
+
if (raw == null)
|
|
32737
|
+
return null;
|
|
32738
|
+
const s = String(raw);
|
|
32739
|
+
if (!/^\d+$/.test(s))
|
|
32740
|
+
return null;
|
|
32741
|
+
const n = Number(s);
|
|
32742
|
+
if (!Number.isSafeInteger(n) || n <= 0 || n >= MAX_TELEGRAM_MESSAGE_ID)
|
|
32743
|
+
return null;
|
|
32744
|
+
return n;
|
|
32745
|
+
}
|
|
32746
|
+
|
|
32711
32747
|
// tool-names.ts
|
|
32712
32748
|
var TELEGRAM_TOOL_PREFIX_RE = /^mcp__[^_].*?telegram__/;
|
|
32713
32749
|
function stripPrefix(toolName) {
|
|
@@ -39067,6 +39103,9 @@ function tick(now) {
|
|
|
39067
39103
|
if (silence < 0)
|
|
39068
39104
|
continue;
|
|
39069
39105
|
if (!s.fallbackFired && silence >= thresholds.fallback) {
|
|
39106
|
+
if (activeDeps.deferFallbackWhileToolInFlight === true && s.inFlightTools.size > 0 && silence < (thresholds.fallbackHardCeiling ?? Number.POSITIVE_INFINITY)) {
|
|
39107
|
+
continue;
|
|
39108
|
+
}
|
|
39070
39109
|
s.fallbackFired = true;
|
|
39071
39110
|
const { chatId, threadId } = parseKey(key);
|
|
39072
39111
|
const recentThinking = s.lastThinkingAt != null && now - s.lastThinkingAt < 30000;
|
|
@@ -52720,10 +52759,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
52720
52759
|
}
|
|
52721
52760
|
|
|
52722
52761
|
// ../src/build-info.ts
|
|
52723
|
-
var VERSION = "0.14.
|
|
52724
|
-
var COMMIT_SHA = "
|
|
52725
|
-
var COMMIT_DATE = "2026-06-
|
|
52726
|
-
var LATEST_PR =
|
|
52762
|
+
var VERSION = "0.14.65";
|
|
52763
|
+
var COMMIT_SHA = "ff6f75c4";
|
|
52764
|
+
var COMMIT_DATE = "2026-06-05T04:09:09Z";
|
|
52765
|
+
var LATEST_PR = 2165;
|
|
52727
52766
|
var COMMITS_AHEAD_OF_TAG = 0;
|
|
52728
52767
|
|
|
52729
52768
|
// gateway/boot-version.ts
|
|
@@ -53921,7 +53960,7 @@ var _deliveryTimeoutParsed = _deliveryTimeoutRaw != null && _deliveryTimeoutRaw
|
|
|
53921
53960
|
var DELIVERY_CONFIRM_TIMEOUT_MS = Number.isFinite(_deliveryTimeoutParsed) && _deliveryTimeoutParsed > 0 ? _deliveryTimeoutParsed : 15000;
|
|
53922
53961
|
var DELIVERY_CONFIRM_SWEEP_MS = 5000;
|
|
53923
53962
|
var deliveryQueue = createDeliveryQueue();
|
|
53924
|
-
var OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER
|
|
53963
|
+
var OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER !== "0";
|
|
53925
53964
|
var OBLIGATION_REPRESENT_MAX = 2;
|
|
53926
53965
|
var OBLIGATION_SWEEP_MS = 5000;
|
|
53927
53966
|
var OBLIGATION_ESCALATE_MAX = 3;
|
|
@@ -53933,7 +53972,7 @@ var OBLIGATION_ESCALATE_GRACE_MS = (() => {
|
|
|
53933
53972
|
const n = Number(raw);
|
|
53934
53973
|
return Number.isFinite(n) && n >= 0 ? n : 45000;
|
|
53935
53974
|
})();
|
|
53936
|
-
var AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW
|
|
53975
|
+
var AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW !== "0";
|
|
53937
53976
|
var lastAgentOutputAt = new Map;
|
|
53938
53977
|
var LAST_OUTPUT_MAX_KEYS = 512;
|
|
53939
53978
|
function noteAgentOutputAt(key, ts) {
|
|
@@ -54267,6 +54306,13 @@ function endCurrentTurnAtomic(turn) {
|
|
|
54267
54306
|
if (currentTurn !== turn)
|
|
54268
54307
|
return;
|
|
54269
54308
|
currentTurn = null;
|
|
54309
|
+
process.stderr.write(`telegram gateway: ${formatTurnLifecycle("clear", "turn_end", turn, Date.now())}
|
|
54310
|
+
`);
|
|
54311
|
+
const degraded = detectStatusSurfaceDegraded(turn);
|
|
54312
|
+
if (degraded != null) {
|
|
54313
|
+
process.stderr.write(`telegram gateway: status-surface DEGRADED reason=${degraded.reason} turnId=${turn.turnId} chat=${turn.sessionChatId} thread=${turn.sessionThreadId ?? "-"} ${degraded.detail}
|
|
54314
|
+
`);
|
|
54315
|
+
}
|
|
54270
54316
|
if (OBLIGATION_LEDGER_ENABLED) {
|
|
54271
54317
|
if (turn.finalAnswerDelivered) {
|
|
54272
54318
|
obligationLedger.close(turn.turnId);
|
|
@@ -54344,7 +54390,11 @@ async function postCompactCard(occ, cap) {
|
|
|
54344
54390
|
const chatId = loadAccess().allowFrom[0];
|
|
54345
54391
|
if (!chatId)
|
|
54346
54392
|
return;
|
|
54347
|
-
const threadId =
|
|
54393
|
+
const threadId = topicForRecipient({
|
|
54394
|
+
recipientChatId: chatId,
|
|
54395
|
+
resolvedTopic: resolveAgentOutboundTopic({ kind: "compact-watchdog" }) ?? chatThreadMap.get(chatId),
|
|
54396
|
+
supergroupChatId: resolveAgentSupergroupChatId()
|
|
54397
|
+
});
|
|
54348
54398
|
const text = `\uD83D\uDDDC\uFE0F <b>Context compaction</b>
|
|
54349
54399
|
` + `Working context hit ~${occ.toLocaleString()} tokens (cap ${cap.toLocaleString()}) \u2014 running <code>/compact</code>. ` + `Older detail moves to Hindsight; I'll confirm here once the context has shrunk (may take a turn or two).`;
|
|
54350
54400
|
const sent = await swallowingApiCall(() => bot.api.sendMessage(chatId, text, {
|
|
@@ -55169,7 +55219,19 @@ function ensureIssuesCard(chatId, threadId) {
|
|
|
55169
55219
|
}
|
|
55170
55220
|
}
|
|
55171
55221
|
var inFlightUpdate = null;
|
|
55222
|
+
function parsePositiveMsEnv(name, fallbackMs) {
|
|
55223
|
+
const raw = process.env[name];
|
|
55224
|
+
if (raw == null || raw === "")
|
|
55225
|
+
return fallbackMs;
|
|
55226
|
+
const n = Number(raw);
|
|
55227
|
+
return Number.isFinite(n) && n > 0 ? Math.floor(n) : fallbackMs;
|
|
55228
|
+
}
|
|
55229
|
+
var SILENCE_FALLBACK_MS = parsePositiveMsEnv("SWITCHROOM_SILENCE_FALLBACK_MS", 300000);
|
|
55230
|
+
var SILENCE_FALLBACK_HARD_MS = parsePositiveMsEnv("SWITCHROOM_SILENCE_FALLBACK_HARD_MS", 900000);
|
|
55231
|
+
var SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS === "1";
|
|
55172
55232
|
startTimer({
|
|
55233
|
+
thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS },
|
|
55234
|
+
deferFallbackWhileToolInFlight: SILENCE_DEFER_INFLIGHT_TOOLS,
|
|
55173
55235
|
emitMetric: (event) => {
|
|
55174
55236
|
emitRuntimeMetric(event);
|
|
55175
55237
|
},
|
|
@@ -57760,6 +57822,7 @@ async function drainActivitySummary(turn) {
|
|
|
57760
57822
|
...replyAnchor
|
|
57761
57823
|
}), { chat_id: chat, ...thread != null ? { threadId: thread } : {}, verb: "activity-summary.send" });
|
|
57762
57824
|
turn.activityMessageId = sent.message_id;
|
|
57825
|
+
turn.activityEverOpened = true;
|
|
57763
57826
|
} else {
|
|
57764
57827
|
const id = turn.activityMessageId;
|
|
57765
57828
|
await robustApiCall(() => bot.api.editMessageText(chat, id, html, { parse_mode: "HTML" }), { chat_id: chat, ...thread != null ? { threadId: thread } : {}, verb: "activity-summary.edit" });
|
|
@@ -57768,7 +57831,8 @@ async function drainActivitySummary(turn) {
|
|
|
57768
57831
|
} catch (err) {
|
|
57769
57832
|
const msg = err instanceof Error ? err.message : String(err);
|
|
57770
57833
|
if (!msg.includes("message is not modified")) {
|
|
57771
|
-
|
|
57834
|
+
turn.activityDrainFailures += 1;
|
|
57835
|
+
process.stderr.write(`telegram gateway: activity-summary drain failed: ${msg} (chat=${chat} thread=${thread ?? "-"} replyAnchor=${turn.sourceMessageId ?? "none"} everOpened=${turn.activityEverOpened} failures=${turn.activityDrainFailures})
|
|
57772
57836
|
`);
|
|
57773
57837
|
}
|
|
57774
57838
|
turn.activityLastSentRender = target;
|
|
@@ -57855,7 +57919,7 @@ function handleSessionEvent(ev) {
|
|
|
57855
57919
|
const next = {
|
|
57856
57920
|
sessionChatId: ev.chatId,
|
|
57857
57921
|
sessionThreadId: enqThreadIdNum,
|
|
57858
|
-
sourceMessageId:
|
|
57922
|
+
sourceMessageId: parseSourceMessageId(ev.messageId),
|
|
57859
57923
|
startedAt,
|
|
57860
57924
|
gatewayReceiveAt: startedAt,
|
|
57861
57925
|
replyCalled: false,
|
|
@@ -57876,12 +57940,16 @@ function handleSessionEvent(ev) {
|
|
|
57876
57940
|
activityInFlight: null,
|
|
57877
57941
|
activityPendingRender: null,
|
|
57878
57942
|
activityLastSentRender: null,
|
|
57943
|
+
activityEverOpened: false,
|
|
57944
|
+
activityDrainFailures: 0,
|
|
57879
57945
|
mirrorLines: [],
|
|
57880
57946
|
foregroundSubAgents: new Map,
|
|
57881
57947
|
answerStream: null,
|
|
57882
57948
|
isDm: isDmChatId(ev.chatId)
|
|
57883
57949
|
};
|
|
57884
57950
|
currentTurn = next;
|
|
57951
|
+
process.stderr.write(`telegram gateway: ${formatTurnLifecycle("set", "enqueue", next, startedAt)}
|
|
57952
|
+
`);
|
|
57885
57953
|
rememberRecentTurn(next);
|
|
57886
57954
|
promoteQueuedStatus(ev.chatId, enqThreadIdNum);
|
|
57887
57955
|
if (DELIVERY_CONFIRM_ENABLED) {
|
|
@@ -65,6 +65,8 @@ import {
|
|
|
65
65
|
import { StatusReactionController } from '../status-reactions.js'
|
|
66
66
|
import { DeferredDoneReactions } from '../reaction-defer.js'
|
|
67
67
|
import { createWorkerActivityFeed, isWorkerActivityFeedEnabled } from '../worker-activity-feed.js'
|
|
68
|
+
import { formatTurnLifecycle, detectStatusSurfaceDegraded } from './status-surface-log.js'
|
|
69
|
+
import { parseSourceMessageId } from './source-message-id.js'
|
|
68
70
|
import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
|
|
69
71
|
import { appendActivityLabel, renderActivityFeedWithNested } from '../tool-activity-summary.js'
|
|
70
72
|
import { toolLabel } from '../tool-labels.js'
|
|
@@ -1400,9 +1402,12 @@ const deliveryQueue = createDeliveryQueue<InboundMessage>()
|
|
|
1400
1402
|
// re-presented (bounded) until it closes, so a message the model read but never
|
|
1401
1403
|
// answered (the marko 715 drop) cannot be silently lost. ADDITIVE + flagged: it
|
|
1402
1404
|
// runs ALONGSIDE the existing acks/spool/buffer (PR3 retires the redundant
|
|
1403
|
-
// pieces).
|
|
1404
|
-
//
|
|
1405
|
-
|
|
1405
|
+
// pieces). DEFAULT ON (graduated from canary 2026-06-04 after the hang-fix
|
|
1406
|
+
// (#2152, total-proof), the escalate-grace (#2156, kills the fuzz-found
|
|
1407
|
+
// over-escalation), and interrupt-cancel (#2157) — proven on marko (supergroup)
|
|
1408
|
+
// + test-harness for days with 0 false cards). Kill switch:
|
|
1409
|
+
// SWITCHROOM_OBLIGATION_LEDGER=0 → every hook below is a no-op → zero change.
|
|
1410
|
+
const OBLIGATION_LEDGER_ENABLED = process.env.SWITCHROOM_OBLIGATION_LEDGER !== '0'
|
|
1406
1411
|
const OBLIGATION_REPRESENT_MAX = 2
|
|
1407
1412
|
const OBLIGATION_SWEEP_MS = 5_000
|
|
1408
1413
|
// Bound on escalation SEND attempts. The escalation now closes only AFTER a
|
|
@@ -1449,8 +1454,10 @@ const OBLIGATION_ESCALATE_GRACE_MS = (() => {
|
|
|
1449
1454
|
// ms_since_out) but the behaviour is UNCHANGED (still queue) — to gather the
|
|
1450
1455
|
// real-world distribution (how often mid-turn messages are same-topic
|
|
1451
1456
|
// continuations vs cross-topic, and the recency spread) before any action flips
|
|
1452
|
-
// on.
|
|
1453
|
-
|
|
1457
|
+
// on. DEFAULT ON fleet-wide (data-gathering: zero behaviour change — only logs +
|
|
1458
|
+
// a bounded recency map). This is a TEMPORARY default; when auto-steer ships it
|
|
1459
|
+
// supersedes shadow. Kill switch: SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW=0.
|
|
1460
|
+
const AUTOCLASSIFY_MIDTURN_SHADOW = process.env.SWITCHROOM_AUTOCLASSIFY_MIDTURN_SHADOW !== '0'
|
|
1454
1461
|
// Per-(chat,thread) wall-clock ms of the agent's LAST visible output — the
|
|
1455
1462
|
// recency clock the classifier uses (NOT turn age: a long actively-narrating
|
|
1456
1463
|
// worker turn must not read "stale"). Stamped beside signalTracker.noteOutbound.
|
|
@@ -1793,6 +1800,14 @@ type CurrentTurn = {
|
|
|
1793
1800
|
activityInFlight: Promise<void> | null
|
|
1794
1801
|
activityPendingRender: string | null
|
|
1795
1802
|
activityLastSentRender: string | null
|
|
1803
|
+
// Status-surface observability. `activityEverOpened` is sticky-true once the
|
|
1804
|
+
// feed posts its first message — unlike `activityMessageId`, it is NOT nulled
|
|
1805
|
+
// by `clearActivitySummary`, so the turn-end DEGRADED check can tell "feed
|
|
1806
|
+
// never opened" (the resume-400 signature) from "feed finalized + cleared".
|
|
1807
|
+
// `activityDrainFailures` counts real activity-feed send/edit failures this
|
|
1808
|
+
// turn (429s + "message is not modified" excluded). Both reset per turn.
|
|
1809
|
+
activityEverOpened: boolean
|
|
1810
|
+
activityDrainFailures: number
|
|
1796
1811
|
// Wall-clock anchor for the newest in-progress feed step — set each time a
|
|
1797
1812
|
// tool_label re-renders the feed. The heartbeat (`feedHeartbeatTick`) reads
|
|
1798
1813
|
// it to show a climbing " · Ns" elapsed on the live line so a long single
|
|
@@ -2483,6 +2498,20 @@ function releaseTurnBufferGate(key: string, endingTurn?: CurrentTurn): void {
|
|
|
2483
2498
|
function endCurrentTurnAtomic(turn: CurrentTurn): void {
|
|
2484
2499
|
if (currentTurn !== turn) return
|
|
2485
2500
|
currentTurn = null
|
|
2501
|
+
// Status-surface observability: one line at every turn CLEAR (with how far
|
|
2502
|
+
// the turn got), plus a DEGRADED warning when the turn did tool work but the
|
|
2503
|
+
// live feed never opened because its sends failed (the resume-400 signature).
|
|
2504
|
+
process.stderr.write(
|
|
2505
|
+
`telegram gateway: ${formatTurnLifecycle('clear', 'turn_end', turn, Date.now())}\n`,
|
|
2506
|
+
)
|
|
2507
|
+
const degraded = detectStatusSurfaceDegraded(turn)
|
|
2508
|
+
if (degraded != null) {
|
|
2509
|
+
process.stderr.write(
|
|
2510
|
+
`telegram gateway: status-surface DEGRADED reason=${degraded.reason} ` +
|
|
2511
|
+
`turnId=${turn.turnId} chat=${turn.sessionChatId} ` +
|
|
2512
|
+
`thread=${turn.sessionThreadId ?? '-'} ${degraded.detail}\n`,
|
|
2513
|
+
)
|
|
2514
|
+
}
|
|
2486
2515
|
// PR2 obligation-ledger CLOSE-at-turn-end. Close the ended turn's obligation
|
|
2487
2516
|
// when it delivered a final answer. finalAnswerDelivered is the right signal
|
|
2488
2517
|
// HERE (not isSubstantiveFinalReply at reply-time): a SHORT genuine answer
|
|
@@ -2653,9 +2682,18 @@ async function postCompactCard(occ: number, cap: number): Promise<void> {
|
|
|
2653
2682
|
// instead of conversation lanes. Fleet/DM agents fall through to
|
|
2654
2683
|
// the existing chatThreadMap last-seen-thread fallback (no
|
|
2655
2684
|
// observable change).
|
|
2656
|
-
|
|
2657
|
-
|
|
2658
|
-
|
|
2685
|
+
// The compact-watchdog topic is valid ONLY in the agent's supergroup;
|
|
2686
|
+
// attaching it to an operator DM recipient 400s "message thread not found"
|
|
2687
|
+
// and the notice silently vanishes (the marko #2096 class — proactiveCompact
|
|
2688
|
+
// was the one operator-send still missing this guard, 2026-06-05). DM
|
|
2689
|
+
// recipients get a thread-less send; the supergroup owner keeps the lane.
|
|
2690
|
+
const threadId = topicForRecipient({
|
|
2691
|
+
recipientChatId: chatId,
|
|
2692
|
+
resolvedTopic:
|
|
2693
|
+
resolveAgentOutboundTopic({ kind: 'compact-watchdog' })
|
|
2694
|
+
?? chatThreadMap.get(chatId),
|
|
2695
|
+
supergroupChatId: resolveAgentSupergroupChatId(),
|
|
2696
|
+
});
|
|
2659
2697
|
const text =
|
|
2660
2698
|
`🗜️ <b>Context compaction</b>\n` +
|
|
2661
2699
|
`Working context hit ~${occ.toLocaleString()} tokens ` +
|
|
@@ -4541,7 +4579,27 @@ function ensureIssuesCard(chatId: string, threadId: number | undefined): void {
|
|
|
4541
4579
|
// incident fix. In-memory only; a gateway recreate naturally resets it.
|
|
4542
4580
|
let inFlightUpdate: { requestId: string; startedAt: number } | null = null
|
|
4543
4581
|
|
|
4582
|
+
// Fix A — silence-fallback tuning (status-surface darkening, 2026-06-05). A long
|
|
4583
|
+
// quiet tool stretch (foreground sub-agent / big research) crossed the 300s
|
|
4584
|
+
// fallback and nulled currentTurn, darkening the live activity feed mid-work.
|
|
4585
|
+
// SWITCHROOM_SILENCE_FALLBACK_MS — base threshold (default 300000)
|
|
4586
|
+
// SWITCHROOM_SILENCE_FALLBACK_HARD_MS — hard ceiling for the in-flight-tool
|
|
4587
|
+
// defer (default 900000 = 15min)
|
|
4588
|
+
// SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1 — enable the defer (default OFF;
|
|
4589
|
+
// canary on marko against #2162 telemetry)
|
|
4590
|
+
function parsePositiveMsEnv(name: string, fallbackMs: number): number {
|
|
4591
|
+
const raw = process.env[name]
|
|
4592
|
+
if (raw == null || raw === '') return fallbackMs
|
|
4593
|
+
const n = Number(raw)
|
|
4594
|
+
return Number.isFinite(n) && n > 0 ? Math.floor(n) : fallbackMs
|
|
4595
|
+
}
|
|
4596
|
+
const SILENCE_FALLBACK_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_MS', 300_000)
|
|
4597
|
+
const SILENCE_FALLBACK_HARD_MS = parsePositiveMsEnv('SWITCHROOM_SILENCE_FALLBACK_HARD_MS', 900_000)
|
|
4598
|
+
const SILENCE_DEFER_INFLIGHT_TOOLS = process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS === '1'
|
|
4599
|
+
|
|
4544
4600
|
silencePoke.startTimer({
|
|
4601
|
+
thresholdsMs: { fallback: SILENCE_FALLBACK_MS, fallbackHardCeiling: SILENCE_FALLBACK_HARD_MS },
|
|
4602
|
+
deferFallbackWhileToolInFlight: SILENCE_DEFER_INFLIGHT_TOOLS,
|
|
4545
4603
|
emitMetric: (event) => {
|
|
4546
4604
|
// Re-emit through the unified runtime-metrics fan-out (PostHog + JSONL).
|
|
4547
4605
|
emitRuntimeMetric(event)
|
|
@@ -8845,6 +8903,7 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
|
|
|
8845
8903
|
{ chat_id: chat, ...(thread != null ? { threadId: thread } : {}), verb: 'activity-summary.send' },
|
|
8846
8904
|
)
|
|
8847
8905
|
turn.activityMessageId = sent.message_id
|
|
8906
|
+
turn.activityEverOpened = true
|
|
8848
8907
|
} else {
|
|
8849
8908
|
const id = turn.activityMessageId
|
|
8850
8909
|
await robustApiCall(
|
|
@@ -8856,7 +8915,18 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
|
|
|
8856
8915
|
} catch (err) {
|
|
8857
8916
|
const msg = err instanceof Error ? err.message : String(err)
|
|
8858
8917
|
if (!msg.includes('message is not modified')) {
|
|
8859
|
-
|
|
8918
|
+
turn.activityDrainFailures += 1
|
|
8919
|
+
// Surface the failing anchor + topic: the resume-400 bug fed a
|
|
8920
|
+
// fabricated 13-digit message_id as the reply anchor here, so every
|
|
8921
|
+
// send 400'd and the feed never opened. Logging the anchor +
|
|
8922
|
+
// everOpened makes a feed-blanking send self-explanatory (and the
|
|
8923
|
+
// turn-end DEGRADED line aggregates it).
|
|
8924
|
+
process.stderr.write(
|
|
8925
|
+
`telegram gateway: activity-summary drain failed: ${msg} ` +
|
|
8926
|
+
`(chat=${chat} thread=${thread ?? '-'} ` +
|
|
8927
|
+
`replyAnchor=${turn.sourceMessageId ?? 'none'} ` +
|
|
8928
|
+
`everOpened=${turn.activityEverOpened} failures=${turn.activityDrainFailures})\n`,
|
|
8929
|
+
)
|
|
8860
8930
|
}
|
|
8861
8931
|
// Mark as sent so we don't infinite-loop on a stuck render.
|
|
8862
8932
|
turn.activityLastSentRender = target
|
|
@@ -9014,9 +9084,13 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
9014
9084
|
const next: CurrentTurn = {
|
|
9015
9085
|
sessionChatId: ev.chatId,
|
|
9016
9086
|
sessionThreadId: enqThreadIdNum,
|
|
9017
|
-
|
|
9018
|
-
|
|
9019
|
-
|
|
9087
|
+
// Accept the inbound id as a reply anchor only when it is a plausible
|
|
9088
|
+
// Telegram message id. Synthetic boot-resume inbounds fabricate a
|
|
9089
|
+
// 13-digit Date.now() message_id (for ack-tracking); if that reached
|
|
9090
|
+
// the activity-feed reply anchor it 400'd every feed send and darkened
|
|
9091
|
+
// the live feed for the whole resume turn (2026-06-05). The ack-queue
|
|
9092
|
+
// still keys on ev.messageId independently — only the anchor is gated.
|
|
9093
|
+
sourceMessageId: parseSourceMessageId(ev.messageId),
|
|
9020
9094
|
startedAt,
|
|
9021
9095
|
gatewayReceiveAt: startedAt,
|
|
9022
9096
|
replyCalled: false,
|
|
@@ -9037,12 +9111,19 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
9037
9111
|
activityInFlight: null,
|
|
9038
9112
|
activityPendingRender: null,
|
|
9039
9113
|
activityLastSentRender: null,
|
|
9114
|
+
activityEverOpened: false,
|
|
9115
|
+
activityDrainFailures: 0,
|
|
9040
9116
|
mirrorLines: [],
|
|
9041
9117
|
foregroundSubAgents: new Map(),
|
|
9042
9118
|
answerStream: null,
|
|
9043
9119
|
isDm: isDmChatId(ev.chatId),
|
|
9044
9120
|
}
|
|
9045
9121
|
currentTurn = next
|
|
9122
|
+
// Status-surface observability: one line at every turn SET so a later
|
|
9123
|
+
// dark card is traceable to which turn/topic key it belonged to.
|
|
9124
|
+
process.stderr.write(
|
|
9125
|
+
`telegram gateway: ${formatTurnLifecycle('set', 'enqueue', next, startedAt)}\n`,
|
|
9126
|
+
)
|
|
9046
9127
|
// Component 3 — retain in the bounded recently-ended registry so a
|
|
9047
9128
|
// LATE reply (landing after currentTurn flips to a successor) can
|
|
9048
9129
|
// still resolve THIS turn's origin thread by its turnId.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { parseSourceMessageId, MAX_TELEGRAM_MESSAGE_ID } from './source-message-id.js'
|
|
3
|
+
|
|
4
|
+
describe('parseSourceMessageId', () => {
|
|
5
|
+
it('accepts a plausible Telegram message id (string or number)', () => {
|
|
6
|
+
expect(parseSourceMessageId('903')).toBe(903)
|
|
7
|
+
expect(parseSourceMessageId(905)).toBe(905)
|
|
8
|
+
expect(parseSourceMessageId('1')).toBe(1)
|
|
9
|
+
})
|
|
10
|
+
|
|
11
|
+
it('REJECTS a fabricated 13-digit Date.now() timestamp (the resume-dark-feed bug)', () => {
|
|
12
|
+
// 2026-06-04T23:34:21.578Z — the exact value that 400'd every feed send.
|
|
13
|
+
expect(parseSourceMessageId('1780616061578')).toBeNull()
|
|
14
|
+
expect(parseSourceMessageId(1_780_616_061_578)).toBeNull()
|
|
15
|
+
})
|
|
16
|
+
|
|
17
|
+
it('rejects anything at or above the Telegram message-id ceiling (2^31)', () => {
|
|
18
|
+
expect(parseSourceMessageId(MAX_TELEGRAM_MESSAGE_ID)).toBeNull()
|
|
19
|
+
expect(parseSourceMessageId(MAX_TELEGRAM_MESSAGE_ID - 1)).toBe(MAX_TELEGRAM_MESSAGE_ID - 1)
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
it('rejects null / undefined / empty / non-numeric / non-positive', () => {
|
|
23
|
+
expect(parseSourceMessageId(null)).toBeNull()
|
|
24
|
+
expect(parseSourceMessageId(undefined)).toBeNull()
|
|
25
|
+
expect(parseSourceMessageId('')).toBeNull()
|
|
26
|
+
expect(parseSourceMessageId('12a')).toBeNull()
|
|
27
|
+
expect(parseSourceMessageId('-5')).toBeNull() // leading "-" fails the digit test
|
|
28
|
+
expect(parseSourceMessageId(0)).toBeNull()
|
|
29
|
+
expect(parseSourceMessageId(-5)).toBeNull()
|
|
30
|
+
expect(parseSourceMessageId('3.5')).toBeNull()
|
|
31
|
+
})
|
|
32
|
+
})
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Guard for the per-turn reply anchor (`turn.sourceMessageId`).
|
|
3
|
+
*
|
|
4
|
+
* Telegram Bot API message ids are positive integers that fit within a signed
|
|
5
|
+
* 32-bit int; `reply_parameters.message_id` HARD-rejects anything larger with
|
|
6
|
+
* 400 "field 'message_id' must be a valid Number" (and `allow_sending_without_reply`
|
|
7
|
+
* does NOT bypass that range check).
|
|
8
|
+
*
|
|
9
|
+
* Synthetic boot-resume inbounds (`resume-inbound-builder.ts`) fabricate a
|
|
10
|
+
* `message_id` from `Date.now()` (~1.78e13) so the deliver-until-acked queue can
|
|
11
|
+
* ack the synthetic by its own enqueue id. That round-trip is fine on its own —
|
|
12
|
+
* but the enqueue handler also turns `ev.messageId` into `turn.sourceMessageId`,
|
|
13
|
+
* which `drainActivitySummary` sends as the activity-feed reply anchor. A
|
|
14
|
+
* fabricated 13-digit timestamp there 400s EVERY feed send for the whole turn,
|
|
15
|
+
* so the live status feed is dark for the entire first post-restart turn (the
|
|
16
|
+
* resume-dark-feed incident, 2026-06-05).
|
|
17
|
+
*
|
|
18
|
+
* This guard accepts a value as a real anchor ONLY when it is a plausible
|
|
19
|
+
* Telegram message id; anything non-numeric or out of range yields null, so the
|
|
20
|
+
* feed posts UNANCHORED (still correct — the anchor is a nicety, not required).
|
|
21
|
+
* The synthetic's ack-tracking is unaffected: it keys on the enqueue event's own
|
|
22
|
+
* id, never on this anchor.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
/** Telegram message ids fit within a signed 32-bit int for reply anchoring;
|
|
26
|
+
* anything at/above this is not a real message id (e.g. a wall-clock ms ts). */
|
|
27
|
+
export const MAX_TELEGRAM_MESSAGE_ID = 2 ** 31
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Parse an inbound's `messageId` into a usable reply anchor, or null when it is
|
|
31
|
+
* not a plausible Telegram message id (non-numeric, non-positive, non-integer,
|
|
32
|
+
* or out of the reply-anchor range — e.g. a fabricated `Date.now()` timestamp).
|
|
33
|
+
*/
|
|
34
|
+
export function parseSourceMessageId(raw: string | number | undefined | null): number | null {
|
|
35
|
+
if (raw == null) return null
|
|
36
|
+
const s = String(raw)
|
|
37
|
+
if (!/^\d+$/.test(s)) return null
|
|
38
|
+
const n = Number(s)
|
|
39
|
+
if (!Number.isSafeInteger(n) || n <= 0 || n >= MAX_TELEGRAM_MESSAGE_ID) return null
|
|
40
|
+
return n
|
|
41
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
formatTurnLifecycle,
|
|
4
|
+
detectStatusSurfaceDegraded,
|
|
5
|
+
type StatusSurfaceTurnView,
|
|
6
|
+
} from './status-surface-log.js'
|
|
7
|
+
|
|
8
|
+
function turn(overrides: Partial<StatusSurfaceTurnView> = {}): StatusSurfaceTurnView {
|
|
9
|
+
return {
|
|
10
|
+
turnId: '-100123:_#1780000000000',
|
|
11
|
+
sessionChatId: '-100123',
|
|
12
|
+
sessionThreadId: undefined,
|
|
13
|
+
startedAt: 1_780_000_000_000,
|
|
14
|
+
toolCallCount: 0,
|
|
15
|
+
activityMessageId: null,
|
|
16
|
+
activityEverOpened: false,
|
|
17
|
+
activityDrainFailures: 0,
|
|
18
|
+
replyCalled: false,
|
|
19
|
+
finalAnswerDelivered: false,
|
|
20
|
+
...overrides,
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
describe('formatTurnLifecycle', () => {
|
|
25
|
+
it('renders a set line with no age and a "-" thread for General', () => {
|
|
26
|
+
const line = formatTurnLifecycle('set', 'enqueue', turn(), 1_780_000_005_000)
|
|
27
|
+
expect(line).toContain('turn-lifecycle set reason=enqueue')
|
|
28
|
+
expect(line).toContain('turnId=-100123:_#1780000000000')
|
|
29
|
+
expect(line).toContain('chat=-100123')
|
|
30
|
+
expect(line).toContain('thread=-')
|
|
31
|
+
expect(line).toContain('age_ms=0') // set never reports age
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
it('renders a clear line with the turn age and live state', () => {
|
|
35
|
+
const line = formatTurnLifecycle(
|
|
36
|
+
'clear',
|
|
37
|
+
'turn_end',
|
|
38
|
+
turn({ sessionThreadId: 3, toolCallCount: 5, activityMessageId: 42, activityEverOpened: true, replyCalled: true, finalAnswerDelivered: true }),
|
|
39
|
+
1_780_000_300_000, // +300s
|
|
40
|
+
)
|
|
41
|
+
expect(line).toContain('turn-lifecycle clear reason=turn_end')
|
|
42
|
+
expect(line).toContain('thread=3')
|
|
43
|
+
expect(line).toContain('tools=5')
|
|
44
|
+
expect(line).toContain('activityMsgId=42')
|
|
45
|
+
expect(line).toContain('feedOpened=true')
|
|
46
|
+
expect(line).toContain('replyCalled=true')
|
|
47
|
+
expect(line).toContain('finalAnswer=true')
|
|
48
|
+
expect(line).toContain('age_ms=300000')
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
it('never emits a negative age even if startedAt is in the future (clock skew)', () => {
|
|
52
|
+
const line = formatTurnLifecycle('clear', 'turn_end', turn({ startedAt: 2_000_000_000_000 }), 1_780_000_000_000)
|
|
53
|
+
expect(line).toContain('age_ms=0')
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
it('carries no prefix or trailing newline — the caller owns transport', () => {
|
|
57
|
+
const line = formatTurnLifecycle('set', 'enqueue', turn(), 0)
|
|
58
|
+
expect(line.startsWith('telegram gateway:')).toBe(false)
|
|
59
|
+
expect(line.endsWith('\n')).toBe(false)
|
|
60
|
+
})
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
describe('detectStatusSurfaceDegraded', () => {
|
|
64
|
+
it('flags a turn that did tool work but never opened the feed due to send failures (the resume-400 signature)', () => {
|
|
65
|
+
const d = detectStatusSurfaceDegraded(
|
|
66
|
+
turn({ toolCallCount: 3, activityEverOpened: false, activityDrainFailures: 10 }),
|
|
67
|
+
)
|
|
68
|
+
expect(d).not.toBeNull()
|
|
69
|
+
expect(d!.reason).toBe('feed-never-opened')
|
|
70
|
+
expect(d!.detail).toContain('drainFailures=10')
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('does NOT flag a healthy turn where the feed opened, even if later cleared (activityMessageId nulled)', () => {
|
|
74
|
+
// clearActivitySummary nulls activityMessageId async on the healthy path;
|
|
75
|
+
// the sticky activityEverOpened keeps this from false-positiving.
|
|
76
|
+
expect(
|
|
77
|
+
detectStatusSurfaceDegraded(
|
|
78
|
+
turn({ toolCallCount: 4, activityMessageId: null, activityEverOpened: true, activityDrainFailures: 0 }),
|
|
79
|
+
),
|
|
80
|
+
).toBeNull()
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('does NOT flag a turn that never attempted a feed send (e.g. ack-first suppression)', () => {
|
|
84
|
+
expect(
|
|
85
|
+
detectStatusSurfaceDegraded(
|
|
86
|
+
turn({ toolCallCount: 2, activityEverOpened: false, activityDrainFailures: 0 }),
|
|
87
|
+
),
|
|
88
|
+
).toBeNull()
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
it('does NOT flag a turn with no tool work (nothing to surface)', () => {
|
|
92
|
+
expect(
|
|
93
|
+
detectStatusSurfaceDegraded(
|
|
94
|
+
turn({ toolCallCount: 0, activityEverOpened: false, activityDrainFailures: 3 }),
|
|
95
|
+
),
|
|
96
|
+
).toBeNull()
|
|
97
|
+
})
|
|
98
|
+
})
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Status-surface observability — pure formatters for the gateway's live-status
|
|
3
|
+
* lane (progress card / activity feed / typing indicator).
|
|
4
|
+
*
|
|
5
|
+
* Why a dedicated module: when an agent's live status went dark (marko,
|
|
6
|
+
* 2026-06-05), the lane was nearly silent in the logs — `currentTurn` (the
|
|
7
|
+
* variable that drives the card/feed/typing) was nulled with no breadcrumb, and
|
|
8
|
+
* the activity feed failed every send with no turn-level signal. Two latent
|
|
9
|
+
* bugs were invisible for days: a 300s silence-poke teardown that nulled the
|
|
10
|
+
* card mid-work, and a resume-synthetic whose fabricated 13-digit message_id
|
|
11
|
+
* made every feed send 400. Neither left a greppable "the card went dark and
|
|
12
|
+
* here's why" line.
|
|
13
|
+
*
|
|
14
|
+
* These pure functions give the gateway exactly that: ONE structured line per
|
|
15
|
+
* currentTurn lifecycle transition, and a single DEGRADED warning when a turn
|
|
16
|
+
* did tool work but the feed never opened because its sends failed. Pure
|
|
17
|
+
* formatters + injected transport (the caller owns `process.stderr.write`),
|
|
18
|
+
* mirroring `silence-poke.ts` / `worker-activity-feed.ts`, so they're
|
|
19
|
+
* unit-testable without a live gateway.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* The `currentTurn` fields the status-surface logs read. The gateway's
|
|
24
|
+
* `CurrentTurn` atom structurally satisfies this (TS structural typing), so the
|
|
25
|
+
* gateway passes the turn directly — no import cycle back into `gateway.ts`.
|
|
26
|
+
*/
|
|
27
|
+
export interface StatusSurfaceTurnView {
|
|
28
|
+
turnId: string
|
|
29
|
+
sessionChatId: string
|
|
30
|
+
sessionThreadId: number | undefined
|
|
31
|
+
startedAt: number
|
|
32
|
+
toolCallCount: number
|
|
33
|
+
/** Live activity-feed message id; null until the first send captures it. */
|
|
34
|
+
activityMessageId: number | null
|
|
35
|
+
/**
|
|
36
|
+
* Sticky: true once the activity feed ever opened a message this turn. Unlike
|
|
37
|
+
* `activityMessageId` (which `clearActivitySummary` nulls async on the
|
|
38
|
+
* healthy finalize path), this is never reset — so a turn that DID surface
|
|
39
|
+
* the feed can't false-positive as degraded at turn-end.
|
|
40
|
+
*/
|
|
41
|
+
activityEverOpened: boolean
|
|
42
|
+
/** Count of real activity-feed send/edit failures this turn (429s and
|
|
43
|
+
* "message is not modified" excluded). */
|
|
44
|
+
activityDrainFailures: number
|
|
45
|
+
replyCalled: boolean
|
|
46
|
+
finalAnswerDelivered: boolean
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export type TurnLifecycleAction = 'set' | 'clear'
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* One structured line per `currentTurn` set/clear. `currentTurn` drives the
|
|
53
|
+
* progress card / activity feed / typing; logging every transition — with the
|
|
54
|
+
* topic key, how far the turn got, and the reason it ended — makes a dark card
|
|
55
|
+
* explainable after the fact. Returned WITHOUT the `telegram gateway: ` prefix
|
|
56
|
+
* or trailing newline so the caller owns transport (and tests assert the body).
|
|
57
|
+
*
|
|
58
|
+
* `now` is only consulted for the `clear` age; for `set` it is ignored.
|
|
59
|
+
*/
|
|
60
|
+
export function formatTurnLifecycle(
|
|
61
|
+
action: TurnLifecycleAction,
|
|
62
|
+
reason: string,
|
|
63
|
+
t: StatusSurfaceTurnView,
|
|
64
|
+
now: number,
|
|
65
|
+
): string {
|
|
66
|
+
const ageMs = action === 'clear' ? Math.max(0, now - t.startedAt) : 0
|
|
67
|
+
return (
|
|
68
|
+
`turn-lifecycle ${action} reason=${reason} turnId=${t.turnId} ` +
|
|
69
|
+
`chat=${t.sessionChatId} thread=${t.sessionThreadId ?? '-'} ` +
|
|
70
|
+
`tools=${t.toolCallCount} activityMsgId=${t.activityMessageId ?? 'none'} ` +
|
|
71
|
+
`feedOpened=${t.activityEverOpened} drainFailures=${t.activityDrainFailures} ` +
|
|
72
|
+
`replyCalled=${t.replyCalled} finalAnswer=${t.finalAnswerDelivered} age_ms=${ageMs}`
|
|
73
|
+
)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Turn-end health check: did the turn do tool work but never get a live feed
|
|
78
|
+
* message onto the screen BECAUSE its sends failed? That is the exact signature
|
|
79
|
+
* of the resume-400 bug (every activity-summary send throws, so the feed never
|
|
80
|
+
* opens) — a single greppable line would have caught it in seconds.
|
|
81
|
+
*
|
|
82
|
+
* Returns null when the surface was healthy or legitimately silent:
|
|
83
|
+
* - no tool work this turn (nothing to surface), OR
|
|
84
|
+
* - the feed opened fine (`activityEverOpened`), OR
|
|
85
|
+
* - the feed never even attempted a send (`activityDrainFailures === 0`, e.g.
|
|
86
|
+
* an ack-first turn whose feed was intentionally suppressed) — absence of a
|
|
87
|
+
* send is not a failure.
|
|
88
|
+
*/
|
|
89
|
+
export function detectStatusSurfaceDegraded(
|
|
90
|
+
t: StatusSurfaceTurnView,
|
|
91
|
+
): { reason: string; detail: string } | null {
|
|
92
|
+
if (t.toolCallCount === 0) return null
|
|
93
|
+
if (t.activityEverOpened) return null
|
|
94
|
+
if (t.activityDrainFailures === 0) return null
|
|
95
|
+
return {
|
|
96
|
+
reason: 'feed-never-opened',
|
|
97
|
+
detail:
|
|
98
|
+
`tools=${t.toolCallCount} drainFailures=${t.activityDrainFailures} ` +
|
|
99
|
+
`activityMsgId=none — the live activity feed failed every send this turn ` +
|
|
100
|
+
`(card was dark despite tool work)`,
|
|
101
|
+
}
|
|
102
|
+
}
|
|
@@ -20,6 +20,14 @@
|
|
|
20
20
|
* edits, and tool churn DO NOT reset the silence clock — the model could
|
|
21
21
|
* be ripping through 20 tool calls and still be "silent" to the user.
|
|
22
22
|
*
|
|
23
|
+
* Fix A caveat (opt-in, `deferFallbackWhileToolInFlight`): tool churn still
|
|
24
|
+
* doesn't reset the *clock*, but when the threshold is crossed WITH a parent
|
|
25
|
+
* tool genuinely in flight, the terminal unwedge is DEFERRED (not skipped) up to
|
|
26
|
+
* `fallbackHardCeiling`. Since #2162 the live activity feed renders that tool
|
|
27
|
+
* work, so the "still silent to the user" premise no longer holds while a tool
|
|
28
|
+
* is visibly running; nulling `currentTurn` there would darken the very feed the
|
|
29
|
+
* user is watching. A turn with no in-flight tool is unaffected.
|
|
30
|
+
*
|
|
23
31
|
* Terminal action, once per turn:
|
|
24
32
|
*
|
|
25
33
|
* t=0 startTurn() — silence clock starts at turnStartedAt
|
|
@@ -81,6 +89,16 @@ export interface ThresholdsMs {
|
|
|
81
89
|
/** Silence (since last outbound, or turn start) after which the
|
|
82
90
|
* framework sends the user-visible fallback AND unwedges the turn. */
|
|
83
91
|
fallback: number
|
|
92
|
+
/**
|
|
93
|
+
* Fix A — hard ceiling for the in-flight-tool defer. When
|
|
94
|
+
* `deferFallbackWhileToolInFlight` is on, the fallback is held back while a
|
|
95
|
+
* parent tool is genuinely in flight (the agent is demonstrably working and
|
|
96
|
+
* the live activity feed is showing it). This bounds that defer: once silence
|
|
97
|
+
* crosses the ceiling the fallback fires REGARDLESS of an in-flight tool, so a
|
|
98
|
+
* hung-mid-tool turn can't pin the conversation forever. Ignored unless the
|
|
99
|
+
* defer is on; defaults to no ceiling (Infinity) when omitted.
|
|
100
|
+
*/
|
|
101
|
+
fallbackHardCeiling?: number
|
|
84
102
|
}
|
|
85
103
|
|
|
86
104
|
export const DEFAULT_THRESHOLDS: ThresholdsMs = {
|
|
@@ -122,6 +140,21 @@ export interface SilencePokeDeps {
|
|
|
122
140
|
thresholdsMs?: ThresholdsMs
|
|
123
141
|
/** Poll interval (tests). */
|
|
124
142
|
pollIntervalMs?: number
|
|
143
|
+
/**
|
|
144
|
+
* Fix A — when true, the 300s framework fallback is DEFERRED while a parent
|
|
145
|
+
* tool is genuinely in flight (`inFlightTools` non-empty): the agent is
|
|
146
|
+
* demonstrably working, and since #2162 the live activity feed shows that
|
|
147
|
+
* work, so nulling `currentTurn` (which the fallback does) would darken a feed
|
|
148
|
+
* the user is actively watching. The defer is bounded by
|
|
149
|
+
* `thresholdsMs.fallbackHardCeiling` so a hung-mid-tool turn still unwedges; a
|
|
150
|
+
* turn with NO in-flight tool fires at the base threshold exactly as before.
|
|
151
|
+
* Default false (legacy behaviour) — enable per-agent to canary.
|
|
152
|
+
*
|
|
153
|
+
* A crashed agent is recovered independently by the bridge-disconnect sweep
|
|
154
|
+
* (`onDanglingTurnsSwept`), so deferring here does not reintroduce the #1556
|
|
155
|
+
* dangling-turn wedge for the crash case.
|
|
156
|
+
*/
|
|
157
|
+
deferFallbackWhileToolInFlight?: boolean
|
|
125
158
|
}
|
|
126
159
|
|
|
127
160
|
const state = new Map<string, SilencePokeState>()
|
|
@@ -366,6 +399,20 @@ function tick(now: number): void {
|
|
|
366
399
|
if (silence < 0) continue
|
|
367
400
|
|
|
368
401
|
if (!s.fallbackFired && silence >= thresholds.fallback) {
|
|
402
|
+
// Fix A: defer the unwedge while a parent tool is genuinely in flight —
|
|
403
|
+
// the agent is demonstrably working and the live activity feed is showing
|
|
404
|
+
// it, so firing here (which nulls currentTurn) would darken that feed
|
|
405
|
+
// mid-work. Bounded by the hard ceiling so a hung-mid-tool turn still
|
|
406
|
+
// unwedges. `continue` WITHOUT setting fallbackFired so the next tick
|
|
407
|
+
// re-checks — once the tool ends and the turn stays silent past the base
|
|
408
|
+
// threshold, or the ceiling is crossed, it fires normally.
|
|
409
|
+
if (
|
|
410
|
+
activeDeps.deferFallbackWhileToolInFlight === true &&
|
|
411
|
+
s.inFlightTools.size > 0 &&
|
|
412
|
+
silence < (thresholds.fallbackHardCeiling ?? Number.POSITIVE_INFINITY)
|
|
413
|
+
) {
|
|
414
|
+
continue
|
|
415
|
+
}
|
|
369
416
|
s.fallbackFired = true
|
|
370
417
|
const { chatId, threadId } = parseKey(key)
|
|
371
418
|
const recentThinking = s.lastThinkingAt != null
|
|
@@ -26,7 +26,10 @@ interface TestFixtures {
|
|
|
26
26
|
fallbacks: FrameworkFallbackContext[]
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
function setupDeps(opts?: {
|
|
29
|
+
function setupDeps(opts?: {
|
|
30
|
+
thresholds?: Partial<typeof DEFAULT_THRESHOLDS> & { fallbackHardCeiling?: number }
|
|
31
|
+
deferFallbackWhileToolInFlight?: boolean
|
|
32
|
+
}): TestFixtures {
|
|
30
33
|
const fixtures: TestFixtures = { emitted: [], fallbacks: [] }
|
|
31
34
|
__setDepsForTests({
|
|
32
35
|
emitMetric: (e) => fixtures.emitted.push(e),
|
|
@@ -35,6 +38,9 @@ function setupDeps(opts?: { thresholds?: Partial<typeof DEFAULT_THRESHOLDS> }):
|
|
|
35
38
|
...DEFAULT_THRESHOLDS,
|
|
36
39
|
...(opts?.thresholds ?? {}),
|
|
37
40
|
},
|
|
41
|
+
...(opts?.deferFallbackWhileToolInFlight != null
|
|
42
|
+
? { deferFallbackWhileToolInFlight: opts.deferFallbackWhileToolInFlight }
|
|
43
|
+
: {}),
|
|
38
44
|
})
|
|
39
45
|
return fixtures
|
|
40
46
|
}
|
|
@@ -528,3 +534,65 @@ describe('silence-poke — performance', () => {
|
|
|
528
534
|
expect(elapsed).toBeLessThan(50)
|
|
529
535
|
})
|
|
530
536
|
})
|
|
537
|
+
|
|
538
|
+
// ─── Fix A: defer the unwedge while a parent tool is genuinely in flight ──────
|
|
539
|
+
// A long quiet tool stretch (foreground sub-agent / big research) crossed the
|
|
540
|
+
// 300s fallback and nulled currentTurn, darkening the live activity feed
|
|
541
|
+
// mid-work. The opt-in defer keeps the turn alive while a tool is in flight,
|
|
542
|
+
// bounded by a hard ceiling so a hung-mid-tool turn still unwedges.
|
|
543
|
+
describe('silence-poke — Fix A: in-flight-tool defer', () => {
|
|
544
|
+
it('legacy default (defer OFF): fires at 300s even with a tool in flight', () => {
|
|
545
|
+
const f = setupDeps() // deferFallbackWhileToolInFlight unset → off
|
|
546
|
+
startTurn('c:0', 0)
|
|
547
|
+
noteToolStart('c:0', 't1', 'Bash', 'long audit', 10_000)
|
|
548
|
+
__tickForTests(300_000)
|
|
549
|
+
expect(f.fallbacks).toHaveLength(1) // unchanged legacy behaviour
|
|
550
|
+
})
|
|
551
|
+
|
|
552
|
+
it('defer ON: does NOT fire at 300s while a tool is in flight', () => {
|
|
553
|
+
const f = setupDeps({ deferFallbackWhileToolInFlight: true, thresholds: { fallbackHardCeiling: 900_000 } })
|
|
554
|
+
startTurn('c:0', 0)
|
|
555
|
+
noteToolStart('c:0', 't1', 'Bash', 'long audit', 10_000)
|
|
556
|
+
__tickForTests(300_000)
|
|
557
|
+
__tickForTests(450_000) // still working, tool still in flight
|
|
558
|
+
expect(f.fallbacks).toHaveLength(0) // deferred — the live feed stays alive
|
|
559
|
+
})
|
|
560
|
+
|
|
561
|
+
it('defer ON: fires once the tool ends and the turn stays silent past threshold', () => {
|
|
562
|
+
const f = setupDeps({ deferFallbackWhileToolInFlight: true, thresholds: { fallbackHardCeiling: 900_000 } })
|
|
563
|
+
startTurn('c:0', 0)
|
|
564
|
+
noteToolStart('c:0', 't1', 'Bash', null, 10_000)
|
|
565
|
+
__tickForTests(300_000)
|
|
566
|
+
expect(f.fallbacks).toHaveLength(0) // deferred while in flight
|
|
567
|
+
noteToolEnd('c:0', 't1', 400_000) // tool completes, no reply follows
|
|
568
|
+
__tickForTests(400_001) // silence (from turn start) already well past 300s
|
|
569
|
+
expect(f.fallbacks).toHaveLength(1) // now unwedges promptly
|
|
570
|
+
})
|
|
571
|
+
|
|
572
|
+
it('defer ON: fires at the hard ceiling even with a tool still in flight (hung-mid-tool)', () => {
|
|
573
|
+
const f = setupDeps({ deferFallbackWhileToolInFlight: true, thresholds: { fallbackHardCeiling: 900_000 } })
|
|
574
|
+
startTurn('c:0', 0)
|
|
575
|
+
noteToolStart('c:0', 't1', 'Bash', 'wedged tool', 10_000)
|
|
576
|
+
__tickForTests(300_000)
|
|
577
|
+
expect(f.fallbacks).toHaveLength(0) // deferred
|
|
578
|
+
__tickForTests(900_000) // crosses the hard ceiling
|
|
579
|
+
expect(f.fallbacks).toHaveLength(1) // bounded — still unwedges
|
|
580
|
+
})
|
|
581
|
+
|
|
582
|
+
it('defer ON: a turn with NO in-flight tool fires at the base threshold (genuine silence)', () => {
|
|
583
|
+
const f = setupDeps({ deferFallbackWhileToolInFlight: true, thresholds: { fallbackHardCeiling: 900_000 } })
|
|
584
|
+
startTurn('c:0', 0)
|
|
585
|
+
// no tool ever started — genuinely silent/wedged
|
|
586
|
+
__tickForTests(300_000)
|
|
587
|
+
expect(f.fallbacks).toHaveLength(1) // unaffected by the defer
|
|
588
|
+
})
|
|
589
|
+
|
|
590
|
+
it('defer ON without a hard ceiling: defers indefinitely while the tool stays in flight', () => {
|
|
591
|
+
const f = setupDeps({ deferFallbackWhileToolInFlight: true }) // no fallbackHardCeiling → Infinity
|
|
592
|
+
startTurn('c:0', 0)
|
|
593
|
+
noteToolStart('c:0', 't1', 'Bash', null, 10_000)
|
|
594
|
+
__tickForTests(300_000)
|
|
595
|
+
__tickForTests(3_600_000) // an hour in
|
|
596
|
+
expect(f.fallbacks).toHaveLength(0)
|
|
597
|
+
})
|
|
598
|
+
})
|
|
@@ -440,3 +440,64 @@ describe('createWorkerActivityFeed', () => {
|
|
|
440
440
|
expect(bot.sent[0].opts?.message_thread_id).toBe(42)
|
|
441
441
|
})
|
|
442
442
|
})
|
|
443
|
+
|
|
444
|
+
// ─── log sink: success-path observability ────────────────────────────────────
|
|
445
|
+
// Before this, the feed only logged on FAILURE, so a feed that rendered fine
|
|
446
|
+
// was invisible in the gateway log — the exact gap that made the marko
|
|
447
|
+
// status-dark incident hard to triage. Assert paint/edit/finish each emit a
|
|
448
|
+
// structured, greppable line naming the worker, chat, thread, and message id.
|
|
449
|
+
describe('createWorkerActivityFeed — log sink', () => {
|
|
450
|
+
it('logs paint on first send, edit on each in-place update, and finish on terminal', async () => {
|
|
451
|
+
const bot = makeFakeBot()
|
|
452
|
+
const logs: string[] = []
|
|
453
|
+
let clock = 10_000
|
|
454
|
+
const feed = createWorkerActivityFeed({
|
|
455
|
+
bot,
|
|
456
|
+
now: () => clock,
|
|
457
|
+
minEditIntervalMs: 0,
|
|
458
|
+
log: (m) => logs.push(m),
|
|
459
|
+
})
|
|
460
|
+
|
|
461
|
+
await feed.update('w-research', 'chat-9', view({ toolCount: 1, latestSummary: 'first' }), 7)
|
|
462
|
+
clock = 11_000
|
|
463
|
+
await feed.update('w-research', 'chat-9', view({ toolCount: 2, latestSummary: 'second' }), 7)
|
|
464
|
+
clock = 12_000
|
|
465
|
+
await feed.finish('w-research', view({ state: 'done', toolCount: 2 }))
|
|
466
|
+
|
|
467
|
+
const paint = logs.find((l) => l.startsWith('worker-feed: paint'))
|
|
468
|
+
const edit = logs.find((l) => l.startsWith('worker-feed: edit'))
|
|
469
|
+
const finish = logs.find((l) => l.startsWith('worker-feed: finish'))
|
|
470
|
+
|
|
471
|
+
expect(paint).toBeDefined()
|
|
472
|
+
expect(paint).toContain('agent=w-research')
|
|
473
|
+
expect(paint).toContain('chat=chat-9')
|
|
474
|
+
expect(paint).toContain('thread=7')
|
|
475
|
+
expect(paint).toMatch(/msgId=\d+/)
|
|
476
|
+
expect(paint).toMatch(/bytes=\d+/)
|
|
477
|
+
|
|
478
|
+
expect(edit).toBeDefined()
|
|
479
|
+
expect(edit).toContain('agent=w-research')
|
|
480
|
+
|
|
481
|
+
expect(finish).toBeDefined()
|
|
482
|
+
expect(finish).toContain('state=done')
|
|
483
|
+
})
|
|
484
|
+
|
|
485
|
+
it('renders thread=- in the log line when no forum topic is set', async () => {
|
|
486
|
+
const bot = makeFakeBot()
|
|
487
|
+
const logs: string[] = []
|
|
488
|
+
let clock = 10_000
|
|
489
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, log: (m) => logs.push(m) })
|
|
490
|
+
await feed.update('w1', 'chat', view()) // no threadId
|
|
491
|
+
expect(logs.find((l) => l.startsWith('worker-feed: paint'))).toContain('thread=-')
|
|
492
|
+
})
|
|
493
|
+
|
|
494
|
+
it('does not log a paint when the worker stays below firstPaintMin (still silent)', async () => {
|
|
495
|
+
const bot = makeFakeBot()
|
|
496
|
+
const logs: string[] = []
|
|
497
|
+
let clock = 0
|
|
498
|
+
const feed = createWorkerActivityFeed({ bot, now: () => clock, firstPaintMinMs: 8000, log: (m) => logs.push(m) })
|
|
499
|
+
clock = 3000
|
|
500
|
+
await feed.update('w1', 'chat', view({ elapsedMs: 3000 }))
|
|
501
|
+
expect(logs.some((l) => l.startsWith('worker-feed: paint'))).toBe(false)
|
|
502
|
+
})
|
|
503
|
+
})
|
|
@@ -208,6 +208,8 @@ export interface WorkerActivityFeedOpts {
|
|
|
208
208
|
}
|
|
209
209
|
|
|
210
210
|
interface WorkerHandle {
|
|
211
|
+
/** jsonl agent id — carried so success/failure log lines can name the worker. */
|
|
212
|
+
agentId: string
|
|
211
213
|
chatId: string
|
|
212
214
|
threadId?: number
|
|
213
215
|
messageId: number | null
|
|
@@ -309,6 +311,10 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
|
|
|
309
311
|
h.messageId = sent.message_id
|
|
310
312
|
h.lastBody = body
|
|
311
313
|
h.lastEditAt = nowFn()
|
|
314
|
+
log(
|
|
315
|
+
`worker-feed: paint agent=${h.agentId} chat=${h.chatId} ` +
|
|
316
|
+
`thread=${h.threadId ?? '-'} msgId=${h.messageId} bytes=${body.length}`,
|
|
317
|
+
)
|
|
312
318
|
} catch (err) {
|
|
313
319
|
noteRateLimited(h, err, 'send')
|
|
314
320
|
log(`worker-feed: send failed: ${(err as Error).message}`)
|
|
@@ -324,6 +330,10 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
|
|
|
324
330
|
await opts.bot.editMessageText(h.chatId, h.messageId, body, sendOptsFor(h))
|
|
325
331
|
h.lastBody = body
|
|
326
332
|
h.lastEditAt = nowFn()
|
|
333
|
+
log(
|
|
334
|
+
`worker-feed: edit agent=${h.agentId} chat=${h.chatId} ` +
|
|
335
|
+
`thread=${h.threadId ?? '-'} msgId=${h.messageId} bytes=${body.length}`,
|
|
336
|
+
)
|
|
327
337
|
} catch (err) {
|
|
328
338
|
noteRateLimited(h, err, 'edit')
|
|
329
339
|
// Stale message_id (manually deleted / edit window gone). Re-post
|
|
@@ -351,6 +361,10 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
|
|
|
351
361
|
await opts.bot.editMessageText(h.chatId, h.messageId, body, sendOptsFor(h))
|
|
352
362
|
h.lastBody = body
|
|
353
363
|
h.lastEditAt = nowFn()
|
|
364
|
+
log(
|
|
365
|
+
`worker-feed: finish agent=${h.agentId} chat=${h.chatId} ` +
|
|
366
|
+
`thread=${h.threadId ?? '-'} msgId=${h.messageId} state=${view.state} bytes=${body.length}`,
|
|
367
|
+
)
|
|
354
368
|
} catch (err) {
|
|
355
369
|
noteRateLimited(h, err, 'finish')
|
|
356
370
|
log(`worker-feed: finish edit failed: ${(err as Error).message}`)
|
|
@@ -371,6 +385,7 @@ export function createWorkerActivityFeed(opts: WorkerActivityFeedOpts): WorkerAc
|
|
|
371
385
|
let h = handles.get(agentId)
|
|
372
386
|
if (h == null) {
|
|
373
387
|
h = {
|
|
388
|
+
agentId,
|
|
374
389
|
chatId,
|
|
375
390
|
threadId,
|
|
376
391
|
messageId: null,
|