switchroom 0.13.55 → 0.13.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +80 -80
- package/dist/auth-broker/index.js +80 -80
- package/dist/cli/ack-first-pretool.mjs +75 -0
- package/dist/cli/drive-write-pretool.mjs +10 -10
- package/dist/cli/notion-write-pretool.mjs +90 -84
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +367 -358
- package/dist/host-control/main.js +148 -148
- package/dist/vault/approvals/kernel-server.js +82 -82
- package/dist/vault/broker/server.js +83 -83
- package/package.json +1 -1
- package/skills/notion/SKILL.md +13 -9
- package/telegram-plugin/ack-flag.ts +66 -0
- package/telegram-plugin/dist/bridge/bridge.js +112 -112
- package/telegram-plugin/dist/gateway/gateway.js +544 -440
- package/telegram-plugin/dist/server.js +160 -160
- package/telegram-plugin/gateway/gateway.ts +80 -1
- package/telegram-plugin/runtime-metrics.ts +17 -0
- package/telegram-plugin/silence-poke.ts +82 -0
- package/telegram-plugin/tests/ack-flag.test.ts +65 -0
- package/telegram-plugin/tests/post-fallback-outbound-count.test.ts +78 -0
- package/telegram-plugin/tests/silence-poke.test.ts +117 -7
|
@@ -80,6 +80,7 @@ import { classifyInbound } from '../inbound-classifier.js'
|
|
|
80
80
|
import * as silencePoke from '../silence-poke.js'
|
|
81
81
|
import * as pendingProgress from '../pending-work-progress.js'
|
|
82
82
|
import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
|
|
83
|
+
import { markAckSent, clearAckSent } from '../ack-flag.js'
|
|
83
84
|
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
84
85
|
import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
|
|
85
86
|
import { type SessionEvent } from '../session-tail.js'
|
|
@@ -3321,6 +3322,43 @@ silencePoke.startTimer({
|
|
|
3321
3322
|
// Re-emit through the unified runtime-metrics fan-out (PostHog + JSONL).
|
|
3322
3323
|
emitRuntimeMetric(event)
|
|
3323
3324
|
},
|
|
3325
|
+
onAwarenessPing: async (ctx) => {
|
|
3326
|
+
// Early framework-owned awareness signal (~60s) so the user never
|
|
3327
|
+
// faces a silent chat while the model is busy / held / thinking.
|
|
3328
|
+
// Distinct from the 300s onFrameworkFallback: fires earlier, sends
|
|
3329
|
+
// a SILENT message (disable_notification: true — ambient liveness,
|
|
3330
|
+
// not a device buzz), and is bounded to ONE per turn by the silence-
|
|
3331
|
+
// poke module's `awarenessPingFired` flag. Reuses
|
|
3332
|
+
// `formatFrameworkFallbackText` so the wording stays consistent and
|
|
3333
|
+
// in-flight tools are named when known. If the model has been
|
|
3334
|
+
// silent long enough to cross 300s, the heavier framework_fallback
|
|
3335
|
+
// escalates with a notification.
|
|
3336
|
+
//
|
|
3337
|
+
// Late-fire guard mirrors the framework_fallback handler: skip if
|
|
3338
|
+
// the turn ended cleanly between the silence-poke arming and this
|
|
3339
|
+
// timer-fired handler so we don't talk over a clean response.
|
|
3340
|
+
if (activeTurnStartedAt.get(ctx.key) == null && currentTurn == null) {
|
|
3341
|
+
return
|
|
3342
|
+
}
|
|
3343
|
+
const text = silencePoke.formatFrameworkFallbackText(
|
|
3344
|
+
ctx.fallbackKind,
|
|
3345
|
+
ctx.silenceMs,
|
|
3346
|
+
ctx.inFlightTools,
|
|
3347
|
+
)
|
|
3348
|
+
try {
|
|
3349
|
+
await robustApiCall(
|
|
3350
|
+
() => bot.api.sendMessage(ctx.chatId, text, {
|
|
3351
|
+
...(ctx.threadId != null ? { message_thread_id: ctx.threadId } : {}),
|
|
3352
|
+
disable_notification: true,
|
|
3353
|
+
}),
|
|
3354
|
+
{ chat_id: ctx.chatId, ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}) },
|
|
3355
|
+
)
|
|
3356
|
+
} catch (err) {
|
|
3357
|
+
process.stderr.write(
|
|
3358
|
+
`silence-poke awareness-ping sendMessage failed chat=${ctx.chatId} thread=${ctx.threadId}: ${err}\n`,
|
|
3359
|
+
)
|
|
3360
|
+
}
|
|
3361
|
+
},
|
|
3324
3362
|
onFrameworkFallback: async (ctx) => {
|
|
3325
3363
|
// Late-fire short-circuit (2026-05-23 audit finding). The fallback
|
|
3326
3364
|
// can race a clean turn-end: the model's actual reply lands inside
|
|
@@ -3429,7 +3467,18 @@ silencePoke.startTimer({
|
|
|
3429
3467
|
longest_silent_gap_ms: outboundMetrics.longestOutboundGapMs,
|
|
3430
3468
|
ended_via: 'framework_fallback',
|
|
3431
3469
|
})
|
|
3432
|
-
signalTracker.
|
|
3470
|
+
// #1892-follow-up: do NOT clear signalTracker state here. When the
|
|
3471
|
+
// model recovers post-fallback (the framework's user-visible
|
|
3472
|
+
// "still working" message is the load-bearing unwedge primitive —
|
|
3473
|
+
// see `project_silence_poke_broken_and_cross_turn_fix`), its late
|
|
3474
|
+
// reply calls fire `signalTracker.noteOutbound(fbKey, ...)`. If
|
|
3475
|
+
// state is already cleared, that's a silent no-op and the late
|
|
3476
|
+
// reply is invisible to outbound_count + ttfo metrics — the
|
|
3477
|
+
// canonical session-end path (silent-marker line 7407 or normal
|
|
3478
|
+
// turn-end line 7502) emits turn_ended a second time, reading
|
|
3479
|
+
// the empty state and reporting outbound_count=0 even though
|
|
3480
|
+
// replies landed. Defer clear to the canonical paths so
|
|
3481
|
+
// post-fallback recoveries are correctly counted.
|
|
3433
3482
|
}
|
|
3434
3483
|
// Stamp the turn-DB end row as `timeout` so the wedged turn doesn't
|
|
3435
3484
|
// stay open until a SIGTERM/restart relabels it (false-negative for
|
|
@@ -4896,6 +4945,16 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
4896
4945
|
// silence-poke clock so the next poke is measured from this send.
|
|
4897
4946
|
signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
4898
4947
|
silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
4948
|
+
// Ack-first gate (`reference/conversational-pacing.md` beat 1):
|
|
4949
|
+
// touch the state-dir flag so the ack-first-pretool hook lets
|
|
4950
|
+
// subsequent non-reply tool calls through this turn. Cleared at
|
|
4951
|
+
// turn_started. Best-effort — a write failure shouldn't break
|
|
4952
|
+
// reply, and the hook is kill-switched anyway.
|
|
4953
|
+
try {
|
|
4954
|
+
markAckSent()
|
|
4955
|
+
} catch (err) {
|
|
4956
|
+
process.stderr.write(`telegram gateway: markAckSent failed: ${err}\n`)
|
|
4957
|
+
}
|
|
4899
4958
|
// #1741 — only clear silent-end state on a plausibly-final reply.
|
|
4900
4959
|
// An interim ack (disable_notification:true, short text, no done)
|
|
4901
4960
|
// must NOT clear the state file; otherwise a turn that ends with
|
|
@@ -5491,6 +5550,13 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
|
|
|
5491
5550
|
const sKey = statusKey(streamChatId, streamThreadId)
|
|
5492
5551
|
signalTracker.noteOutbound(sKey, Date.now())
|
|
5493
5552
|
silencePoke.noteOutbound(sKey, Date.now())
|
|
5553
|
+
// Ack-first gate: stream_reply's first emit also unlocks subsequent
|
|
5554
|
+
// tool calls. See ack-flag.ts + ack-first-pretool.ts.
|
|
5555
|
+
try {
|
|
5556
|
+
markAckSent()
|
|
5557
|
+
} catch (err) {
|
|
5558
|
+
process.stderr.write(`telegram gateway: markAckSent (stream_reply) failed: ${err}\n`)
|
|
5559
|
+
}
|
|
5494
5560
|
// #1741 — see executeReply for the rationale: only a plausibly-
|
|
5495
5561
|
// final stream_reply clears the silent-end state. An interim
|
|
5496
5562
|
// ack via stream_reply must NOT clear; the Stop hook needs
|
|
@@ -6725,6 +6791,14 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6725
6791
|
statusKey(ev.chatId, enqThreadId),
|
|
6726
6792
|
'handback',
|
|
6727
6793
|
)
|
|
6794
|
+
// Ack-first gate (`reference/conversational-pacing.md` beat 1):
|
|
6795
|
+
// wipe the prior turn's `ack-sent.flag` so the ack-first-
|
|
6796
|
+
// pretool hook re-arms for this fresh turn. Centralised HERE
|
|
6797
|
+
// (not in handleInbound) because `enqueue` is the single
|
|
6798
|
+
// canonical fresh-turn atom — fires for real inbounds, cron
|
|
6799
|
+
// fires, subagent-handback channel wakes, vault-grant resumes,
|
|
6800
|
+
// and restart markers alike. Best-effort — see ack-flag.ts.
|
|
6801
|
+
clearAckSent()
|
|
6728
6802
|
}
|
|
6729
6803
|
if (ev.chatId) {
|
|
6730
6804
|
// Issue #195: if a previous turn left an answer-lane stream open
|
|
@@ -8985,6 +9059,11 @@ async function handleInbound(
|
|
|
8985
9059
|
// the framework can nudge the model if it goes quiet past the
|
|
8986
9060
|
// soft / firm thresholds.
|
|
8987
9061
|
silencePoke.startTurn(statusKey(chat_id, messageThreadId), Date.now())
|
|
9062
|
+
// Ack-first gate clear is centralised in handleSessionEvent's
|
|
9063
|
+
// `enqueue` branch — that fires for EVERY fresh turn atom
|
|
9064
|
+
// (real inbound, cron, subagent-handback, vault-grant wake,
|
|
9065
|
+
// restart marker) so cron/handback turns also re-arm the gate.
|
|
9066
|
+
// See the call site under `case 'enqueue'` (~line 6794).
|
|
8988
9067
|
// #1445 cross-turn pending-async ambient. A new turn starting
|
|
8989
9068
|
// (user inbound, synthesised wake, or handback channel) is the
|
|
8990
9069
|
// signal that the model is about to re-engage — clear any
|
|
@@ -104,6 +104,23 @@ export type RuntimeMetricEvent =
|
|
|
104
104
|
fallback_kind: 'working' | 'thinking'
|
|
105
105
|
silence_ms: number
|
|
106
106
|
}
|
|
107
|
+
/**
|
|
108
|
+
* Awareness ping (~60s, default): framework-owned user-visible
|
|
109
|
+
* "still working… / still thinking…" message sent BEFORE the 300s
|
|
110
|
+
* fallback so the user never faces a silent chat for the full 5
|
|
111
|
+
* minutes. Silent (no device ping); one-shot per turn; suppressed
|
|
112
|
+
* by any outbound or sub-agent dispatch. A high rate is the
|
|
113
|
+
* diagnostic signal that frequent silences exist (held-inbound,
|
|
114
|
+
* extended-thinking, slow startup), and the rate of the heavier
|
|
115
|
+
* silence_fallback_sent that still follows tells us how many of
|
|
116
|
+
* those escalate all the way to 5 min.
|
|
117
|
+
*/
|
|
118
|
+
| {
|
|
119
|
+
kind: 'awareness_ping_sent'
|
|
120
|
+
key: string
|
|
121
|
+
fallback_kind: 'working' | 'thinking'
|
|
122
|
+
silence_ms: number
|
|
123
|
+
}
|
|
107
124
|
/**
|
|
108
125
|
* #1445 cross-turn pending-async ambient lifecycle. `started` fires
|
|
109
126
|
* when a turn ends with a captured anchor AND a pending Agent/Task/
|
|
@@ -80,6 +80,13 @@ export interface SilencePokeState {
|
|
|
80
80
|
* the ack nudge is specifically about the *first* outbound, so it
|
|
81
81
|
* never re-arms even after the model later goes quiet again. */
|
|
82
82
|
ackPokeFired: boolean
|
|
83
|
+
/** True once the early awareness-ping has fired this turn. One-shot:
|
|
84
|
+
* the user only needs one "we know it's slow" cue before the heavier
|
|
85
|
+
* 300s fallback escalates. Independent of the ack-poke (which targets
|
|
86
|
+
* the model via piggyback) — awareness-ping targets the user directly
|
|
87
|
+
* via Telegram, so it lands even during pure-thinking or held-inbound
|
|
88
|
+
* silences when no tool_result is available to piggyback on. */
|
|
89
|
+
awarenessPingFired: boolean
|
|
83
90
|
/** Wall-clock ms of last poke fire — used for poke-success latency. */
|
|
84
91
|
lastPokeFiredAt: number | null
|
|
85
92
|
/** #1292: in-flight tool calls keyed by toolUseId. Populated by
|
|
@@ -101,6 +108,14 @@ export interface ThresholdsMs {
|
|
|
101
108
|
* 75s `soft` threshold, which measures silence-since-last-outbound
|
|
102
109
|
* and is the wrong instrument for "you never said hello." */
|
|
103
110
|
ack: number
|
|
111
|
+
/** Awareness ping: if NO outbound has landed this many ms after turn
|
|
112
|
+
* start, send a framework-owned user-visible "still working…" status
|
|
113
|
+
* message directly via Telegram. Sits between the model-targeted ack
|
|
114
|
+
* poke (10s) and the 300s framework_fallback. Lands even during pure
|
|
115
|
+
* extended-thinking or held-inbound silences when no tool_result is
|
|
116
|
+
* available to piggyback the model-targeted pokes onto. One-shot per
|
|
117
|
+
* turn — the 300s fallback handles further escalation if still silent. */
|
|
118
|
+
awarenessPing: number
|
|
104
119
|
soft: number
|
|
105
120
|
firm: number
|
|
106
121
|
fallback: number
|
|
@@ -112,6 +127,7 @@ export interface ThresholdsMs {
|
|
|
112
127
|
|
|
113
128
|
export const DEFAULT_THRESHOLDS: ThresholdsMs = {
|
|
114
129
|
ack: 10_000,
|
|
130
|
+
awarenessPing: 60_000,
|
|
115
131
|
soft: 75_000,
|
|
116
132
|
firm: 180_000,
|
|
117
133
|
fallback: 300_000,
|
|
@@ -145,6 +161,7 @@ export type SilencePokeMetric =
|
|
|
145
161
|
| { kind: 'silence_poke_fired'; key: string; level: PokeLevel; silence_ms: number; subagent_wait: boolean }
|
|
146
162
|
| { kind: 'silence_poke_succeeded'; key: string; level: PokeLevel; latency_ms: number }
|
|
147
163
|
| { kind: 'silence_fallback_sent'; key: string; fallback_kind: 'working' | 'thinking'; silence_ms: number }
|
|
164
|
+
| { kind: 'awareness_ping_sent'; key: string; fallback_kind: 'working' | 'thinking'; silence_ms: number }
|
|
148
165
|
|
|
149
166
|
export interface SilencePokeDeps {
|
|
150
167
|
/** Called when the 300s fallback fires. Caller sends the user-visible
|
|
@@ -153,6 +170,12 @@ export interface SilencePokeDeps {
|
|
|
153
170
|
* not a model-sourced one, and we want pokes to continue (well, no,
|
|
154
171
|
* fallbackFired ensures only one per turn anyway). */
|
|
155
172
|
onFrameworkFallback: (ctx: FrameworkFallbackContext) => Promise<void> | void
|
|
173
|
+
/** Called when the awareness ping fires (default 60s). Caller sends
|
|
174
|
+
* a user-visible "still working…" message — silent (no device ping),
|
|
175
|
+
* framework-owned, one-shot per turn. Reuses the same context shape
|
|
176
|
+
* as onFrameworkFallback so the gateway can reuse `formatFrameworkFallbackText`
|
|
177
|
+
* and the in-flight-tool enrichment. */
|
|
178
|
+
onAwarenessPing: (ctx: FrameworkFallbackContext) => Promise<void> | void
|
|
156
179
|
/** Telemetry sink for poke events. */
|
|
157
180
|
emitMetric: (event: SilencePokeMetric) => void
|
|
158
181
|
/** Threshold overrides (tests). */
|
|
@@ -188,6 +211,7 @@ export function startTurn(key: string, now: number): void {
|
|
|
188
211
|
lastThinkingAt: null,
|
|
189
212
|
fallbackFired: false,
|
|
190
213
|
ackPokeFired: false,
|
|
214
|
+
awarenessPingFired: false,
|
|
191
215
|
lastPokeFiredAt: null,
|
|
192
216
|
inFlightTools: new Map(),
|
|
193
217
|
})
|
|
@@ -485,6 +509,64 @@ function tick(now: number): void {
|
|
|
485
509
|
continue
|
|
486
510
|
}
|
|
487
511
|
|
|
512
|
+
// Awareness ping — framework-owned user-visible status BEFORE the
|
|
513
|
+
// 300s heavy fallback. Lands at ~60s even when the model is in pure
|
|
514
|
+
// extended-thinking or the inbound is held (#1892 follow-up), since
|
|
515
|
+
// it's delivered directly via the gateway → Telegram, not
|
|
516
|
+
// piggybacked on tool_result. One-shot per turn; suppressed if any
|
|
517
|
+
// outbound has happened. The 300s fallback is unchanged and
|
|
518
|
+
// escalates further if silence persists.
|
|
519
|
+
//
|
|
520
|
+
// Independent of pokesFired so soft/firm/fallback still escalate on
|
|
521
|
+
// their own schedule. Independent of ackPokeFired so a long-running
|
|
522
|
+
// turn that already received the ack-poke (then went silent again)
|
|
523
|
+
// still gets the user-facing awareness ping.
|
|
524
|
+
if (
|
|
525
|
+
!s.awarenessPingFired
|
|
526
|
+
&& s.lastOutboundAt == null
|
|
527
|
+
&& !s.subagentDispatchActive
|
|
528
|
+
&& silence >= thresholds.awarenessPing
|
|
529
|
+
) {
|
|
530
|
+
s.awarenessPingFired = true
|
|
531
|
+
const { chatId, threadId } = parseKey(key)
|
|
532
|
+
const recentThinking = s.lastThinkingAt != null
|
|
533
|
+
&& (now - s.lastThinkingAt) < 30_000
|
|
534
|
+
const fallbackKind: 'working' | 'thinking' = recentThinking ? 'thinking' : 'working'
|
|
535
|
+
const inFlightTools: ToolSnapshot[] = Array.from(s.inFlightTools.values())
|
|
536
|
+
.sort((a, b) => a.startedAt - b.startedAt)
|
|
537
|
+
.map(t => ({
|
|
538
|
+
name: t.name,
|
|
539
|
+
label: t.label,
|
|
540
|
+
durationMs: now - t.startedAt,
|
|
541
|
+
}))
|
|
542
|
+
activeDeps.emitMetric({
|
|
543
|
+
kind: 'awareness_ping_sent',
|
|
544
|
+
key,
|
|
545
|
+
fallback_kind: fallbackKind,
|
|
546
|
+
silence_ms: silence,
|
|
547
|
+
})
|
|
548
|
+
try {
|
|
549
|
+
const ret = activeDeps.onAwarenessPing({
|
|
550
|
+
key,
|
|
551
|
+
chatId,
|
|
552
|
+
threadId,
|
|
553
|
+
fallbackKind,
|
|
554
|
+
silenceMs: silence,
|
|
555
|
+
inFlightTools,
|
|
556
|
+
})
|
|
557
|
+
if (ret != null && typeof (ret as Promise<unknown>).then === 'function') {
|
|
558
|
+
;(ret as Promise<unknown>).catch(err => {
|
|
559
|
+
process.stderr.write(`silence-poke: awareness-ping handler rejected: ${err}\n`)
|
|
560
|
+
})
|
|
561
|
+
}
|
|
562
|
+
} catch (err) {
|
|
563
|
+
process.stderr.write(`silence-poke: awareness-ping handler threw: ${err}\n`)
|
|
564
|
+
}
|
|
565
|
+
// Don't `continue` — soft/firm/fallback can still arm in the same tick
|
|
566
|
+
// if their thresholds have also been crossed. Awareness-ping is a
|
|
567
|
+
// sibling signal, not part of the ladder.
|
|
568
|
+
}
|
|
569
|
+
|
|
488
570
|
if (s.pokesFired === 0 && silence >= softThreshold) {
|
|
489
571
|
s.pokeArmed = { level: 'soft' }
|
|
490
572
|
s.pokesFired = 1
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
2
|
+
import { mkdtempSync, rmSync, existsSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
|
|
6
|
+
import { markAckSent, clearAckSent, ACK_SENT_MARKER } from "../ack-flag.js";
|
|
7
|
+
|
|
8
|
+
describe("ack-flag — gateway-side state file for ack-first hook", () => {
|
|
9
|
+
let stateDir: string;
|
|
10
|
+
let prevEnv: string | undefined;
|
|
11
|
+
|
|
12
|
+
beforeEach(() => {
|
|
13
|
+
stateDir = mkdtempSync(join(tmpdir(), "ack-flag-"));
|
|
14
|
+
prevEnv = process.env.TELEGRAM_STATE_DIR;
|
|
15
|
+
process.env.TELEGRAM_STATE_DIR = stateDir;
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
afterEach(() => {
|
|
19
|
+
if (prevEnv != null) process.env.TELEGRAM_STATE_DIR = prevEnv;
|
|
20
|
+
else delete process.env.TELEGRAM_STATE_DIR;
|
|
21
|
+
rmSync(stateDir, { recursive: true, force: true });
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("markAckSent creates the marker file", () => {
|
|
25
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(false);
|
|
26
|
+
markAckSent();
|
|
27
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("markAckSent is idempotent (second call is a no-op)", () => {
|
|
31
|
+
markAckSent();
|
|
32
|
+
expect(() => markAckSent()).not.toThrow();
|
|
33
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("clearAckSent removes the marker file", () => {
|
|
37
|
+
markAckSent();
|
|
38
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
39
|
+
clearAckSent();
|
|
40
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(false);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("clearAckSent is idempotent (works when marker doesn't exist)", () => {
|
|
44
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(false);
|
|
45
|
+
expect(() => clearAckSent()).not.toThrow();
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("round-trip: mark → clear → mark cycles work across turns", () => {
|
|
49
|
+
// turn 1
|
|
50
|
+
markAckSent();
|
|
51
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
52
|
+
// turn end / next turn start
|
|
53
|
+
clearAckSent();
|
|
54
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(false);
|
|
55
|
+
// turn 2 first reply
|
|
56
|
+
markAckSent();
|
|
57
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("noop when TELEGRAM_STATE_DIR is unset (best-effort)", () => {
|
|
61
|
+
delete process.env.TELEGRAM_STATE_DIR;
|
|
62
|
+
expect(() => markAckSent()).not.toThrow();
|
|
63
|
+
expect(() => clearAckSent()).not.toThrow();
|
|
64
|
+
});
|
|
65
|
+
});
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
reset,
|
|
4
|
+
noteOutbound,
|
|
5
|
+
getOutboundMetrics,
|
|
6
|
+
clear,
|
|
7
|
+
__resetAllForTests,
|
|
8
|
+
} from '../turn-signal-tracker.js'
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* switchroom#1892-follow-up — regression guard for the post-fallback
|
|
12
|
+
* outbound under-count.
|
|
13
|
+
*
|
|
14
|
+
* Scenario reproduced from clerk's 2026-05-27 wedge:
|
|
15
|
+
* 1. Turn starts (signalTracker.reset)
|
|
16
|
+
* 2. Model goes silent for 300s — silence-poke ladder fires, then
|
|
17
|
+
* framework_fallback fires + emits turn_ended with outbound_count=0
|
|
18
|
+
* 3. Model recovers — sends 2 late reply tool calls → executeReply
|
|
19
|
+
* calls signalTracker.noteOutbound
|
|
20
|
+
* 4. Canonical silent-marker turn-end path runs → reads metrics
|
|
21
|
+
* again → emits final turn_ended
|
|
22
|
+
*
|
|
23
|
+
* Pre-fix (b6cd7e5...): framework_fallback called signalTracker.clear
|
|
24
|
+
* after emitting its turn_ended. Late noteOutbound calls hit cleared
|
|
25
|
+
* state and no-op. Silent-marker path re-read the empty default and
|
|
26
|
+
* reported outbound_count=0 again — late replies invisible to KPIs.
|
|
27
|
+
*
|
|
28
|
+
* Post-fix: framework_fallback no longer clears; only the canonical
|
|
29
|
+
* paths do. Late replies accumulate; the canonical turn_ended carries
|
|
30
|
+
* the correct count.
|
|
31
|
+
*/
|
|
32
|
+
beforeEach(() => {
|
|
33
|
+
__resetAllForTests()
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
describe('signal-tracker survives framework-fallback for late-reply accounting (#1892)', () => {
|
|
37
|
+
it('late noteOutbound after fallback (without clear) increments outbound_count', () => {
|
|
38
|
+
const key = 'chat:thread'
|
|
39
|
+
const turnStart = 1_000_000
|
|
40
|
+
reset(key, turnStart)
|
|
41
|
+
|
|
42
|
+
// Simulate framework_fallback at +300s — emits turn_ended with
|
|
43
|
+
// outbound_count=0 but DOES NOT clear (the fix).
|
|
44
|
+
const beforeFallback = getOutboundMetrics(key)
|
|
45
|
+
expect(beforeFallback.outboundCount).toBe(0)
|
|
46
|
+
expect(beforeFallback.ttfoMs).toBeNull()
|
|
47
|
+
|
|
48
|
+
// Model recovers post-fallback and sends two late replies.
|
|
49
|
+
noteOutbound(key, turnStart + 312_000) // first late reply at +312s
|
|
50
|
+
noteOutbound(key, turnStart + 358_000) // second late reply at +358s
|
|
51
|
+
|
|
52
|
+
// Canonical silent-marker turn-end reads metrics again and clears.
|
|
53
|
+
const afterRecovery = getOutboundMetrics(key)
|
|
54
|
+
expect(afterRecovery.outboundCount).toBe(2)
|
|
55
|
+
expect(afterRecovery.ttfoMs).toBe(312_000)
|
|
56
|
+
|
|
57
|
+
clear(key)
|
|
58
|
+
expect(getOutboundMetrics(key).outboundCount).toBe(0)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('regression guard: clearing state mid-turn would silently lose late outbounds', () => {
|
|
62
|
+
// The buggy pre-fix sequence — kept here as the inverse case so a
|
|
63
|
+
// future change that re-introduces clear-at-fallback fails this
|
|
64
|
+
// test loudly with the late-reply count going to zero.
|
|
65
|
+
const key = 'chat:thread'
|
|
66
|
+
reset(key, 1_000_000)
|
|
67
|
+
clear(key) // simulates the pre-fix framework_fallback clear
|
|
68
|
+
|
|
69
|
+
noteOutbound(key, 1_000_312_000) // late reply after the bogus clear
|
|
70
|
+
noteOutbound(key, 1_000_358_000)
|
|
71
|
+
|
|
72
|
+
const metrics = getOutboundMetrics(key)
|
|
73
|
+
// This is the pre-fix bug: late replies are invisible because
|
|
74
|
+
// noteOutbound is a no-op when state is missing.
|
|
75
|
+
expect(metrics.outboundCount).toBe(0)
|
|
76
|
+
expect(metrics.ttfoMs).toBeNull()
|
|
77
|
+
})
|
|
78
|
+
})
|
|
@@ -26,20 +26,27 @@ const ORIGINAL_KILL_SWITCH = process.env.SWITCHROOM_DISABLE_SILENCE_POKE
|
|
|
26
26
|
interface TestFixtures {
|
|
27
27
|
emitted: SilencePokeMetric[]
|
|
28
28
|
fallbacks: FrameworkFallbackContext[]
|
|
29
|
+
awarenessPings: FrameworkFallbackContext[]
|
|
29
30
|
}
|
|
30
31
|
|
|
31
32
|
function setupDeps(opts?: { thresholds?: Partial<typeof DEFAULT_THRESHOLDS> }): TestFixtures {
|
|
32
|
-
const fixtures: TestFixtures = { emitted: [], fallbacks: [] }
|
|
33
|
+
const fixtures: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
|
|
33
34
|
__setDepsForTests({
|
|
34
35
|
emitMetric: (e) => fixtures.emitted.push(e),
|
|
35
36
|
onFrameworkFallback: (ctx) => { fixtures.fallbacks.push(ctx) },
|
|
37
|
+
onAwarenessPing: (ctx) => { fixtures.awarenessPings.push(ctx) },
|
|
36
38
|
// The ack budget (a new poke that fires *earlier* than `soft`) is
|
|
37
39
|
// disabled by default in this fixture so the soft/firm/fallback
|
|
38
40
|
// ladder tests stay isolated from it. The 'ack budget' describe
|
|
39
41
|
// block opts back in with a real value.
|
|
42
|
+
//
|
|
43
|
+
// The 60s awarenessPing is also disabled by default so the existing
|
|
44
|
+
// soft/firm/fallback ladder tests don't see the new sibling event;
|
|
45
|
+
// the 'awareness ping' describe block opts back in.
|
|
40
46
|
thresholdsMs: {
|
|
41
47
|
...DEFAULT_THRESHOLDS,
|
|
42
48
|
ack: Number.MAX_SAFE_INTEGER,
|
|
49
|
+
awarenessPing: Number.MAX_SAFE_INTEGER,
|
|
43
50
|
...(opts?.thresholds ?? {}),
|
|
44
51
|
},
|
|
45
52
|
})
|
|
@@ -733,13 +740,18 @@ describe('silence-poke — independence across turns', () => {
|
|
|
733
740
|
|
|
734
741
|
describe('silence-poke — fallback handler errors do not break timer', () => {
|
|
735
742
|
it('continues to function if onFrameworkFallback throws', () => {
|
|
736
|
-
const fx: TestFixtures = { emitted: [], fallbacks: [] }
|
|
743
|
+
const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
|
|
737
744
|
__setDepsForTests({
|
|
738
745
|
emitMetric: (e) => fx.emitted.push(e),
|
|
739
746
|
onFrameworkFallback: () => { throw new Error('oh no') },
|
|
740
|
-
|
|
747
|
+
onAwarenessPing: () => {},
|
|
748
|
+
// ack + awareness-ping out of the way — this test exercises the
|
|
741
749
|
// soft/firm/fallback ladder under a throwing fallback handler.
|
|
742
|
-
thresholdsMs: {
|
|
750
|
+
thresholdsMs: {
|
|
751
|
+
...DEFAULT_THRESHOLDS,
|
|
752
|
+
ack: Number.MAX_SAFE_INTEGER,
|
|
753
|
+
awarenessPing: Number.MAX_SAFE_INTEGER,
|
|
754
|
+
},
|
|
743
755
|
})
|
|
744
756
|
startTurn('k', 0)
|
|
745
757
|
expect(() => {
|
|
@@ -752,12 +764,17 @@ describe('silence-poke — fallback handler errors do not break timer', () => {
|
|
|
752
764
|
})
|
|
753
765
|
|
|
754
766
|
it('continues to function if onFrameworkFallback returns a rejected promise', async () => {
|
|
755
|
-
const fx: TestFixtures = { emitted: [], fallbacks: [] }
|
|
767
|
+
const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
|
|
756
768
|
__setDepsForTests({
|
|
757
769
|
emitMetric: (e) => fx.emitted.push(e),
|
|
758
770
|
onFrameworkFallback: () => Promise.reject(new Error('async fail')),
|
|
759
|
-
|
|
760
|
-
|
|
771
|
+
onAwarenessPing: () => {},
|
|
772
|
+
// ack + awareness-ping out of the way — see the throwing-handler test above.
|
|
773
|
+
thresholdsMs: {
|
|
774
|
+
...DEFAULT_THRESHOLDS,
|
|
775
|
+
ack: Number.MAX_SAFE_INTEGER,
|
|
776
|
+
awarenessPing: Number.MAX_SAFE_INTEGER,
|
|
777
|
+
},
|
|
761
778
|
})
|
|
762
779
|
startTurn('k', 0)
|
|
763
780
|
__tickForTests(75_000)
|
|
@@ -860,3 +877,96 @@ describe('silence-poke — performance', () => {
|
|
|
860
877
|
expect(elapsed).toBeLessThan(50)
|
|
861
878
|
})
|
|
862
879
|
})
|
|
880
|
+
|
|
881
|
+
describe('silence-poke — awareness ping (early framework-owned user-visible status)', () => {
|
|
882
|
+
it('fires once at 60s when no outbound has happened', () => {
|
|
883
|
+
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
884
|
+
startTurn('k', 0)
|
|
885
|
+
__tickForTests(59_000)
|
|
886
|
+
expect(fx.awarenessPings.length).toBe(0)
|
|
887
|
+
__tickForTests(60_000)
|
|
888
|
+
expect(fx.awarenessPings.length).toBe(1)
|
|
889
|
+
expect(fx.awarenessPings[0]!.silenceMs).toBeGreaterThanOrEqual(60_000)
|
|
890
|
+
expect(fx.emitted.some(e => e.kind === 'awareness_ping_sent')).toBe(true)
|
|
891
|
+
})
|
|
892
|
+
|
|
893
|
+
it('is one-shot per turn — does not re-fire as silence continues', () => {
|
|
894
|
+
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
895
|
+
startTurn('k', 0)
|
|
896
|
+
__tickForTests(60_000)
|
|
897
|
+
__tickForTests(120_000)
|
|
898
|
+
__tickForTests(180_000)
|
|
899
|
+
expect(fx.awarenessPings.length).toBe(1)
|
|
900
|
+
})
|
|
901
|
+
|
|
902
|
+
it('is suppressed by an early outbound', () => {
|
|
903
|
+
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
904
|
+
startTurn('k', 0)
|
|
905
|
+
noteOutbound('k', 30_000)
|
|
906
|
+
__tickForTests(90_000)
|
|
907
|
+
expect(fx.awarenessPings.length).toBe(0)
|
|
908
|
+
})
|
|
909
|
+
|
|
910
|
+
it('is suppressed when subagentDispatchActive is true', () => {
|
|
911
|
+
// Sub-agent dispatch already widens soft to 300s; the awareness-ping
|
|
912
|
+
// should also defer so we don't pre-empt the sub-agent's natural
|
|
913
|
+
// progress signal.
|
|
914
|
+
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
915
|
+
startTurn('k', 0)
|
|
916
|
+
noteSubagentDispatch('k')
|
|
917
|
+
__tickForTests(120_000)
|
|
918
|
+
expect(fx.awarenessPings.length).toBe(0)
|
|
919
|
+
})
|
|
920
|
+
|
|
921
|
+
it('does NOT advance the soft/firm/fallback ladder', () => {
|
|
922
|
+
// Awareness ping is a sibling signal; soft/firm/fallback continue
|
|
923
|
+
// to escalate on their own schedule (and the model-targeted ack-poke
|
|
924
|
+
// similarly remains independent).
|
|
925
|
+
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
926
|
+
startTurn('k', 0)
|
|
927
|
+
__tickForTests(60_000) // awareness fires
|
|
928
|
+
__tickForTests(75_000) // soft fires
|
|
929
|
+
__tickForTests(180_000) // firm fires
|
|
930
|
+
__tickForTests(300_000) // fallback fires
|
|
931
|
+
expect(fx.awarenessPings.length).toBe(1)
|
|
932
|
+
expect(fx.fallbacks.length).toBe(1)
|
|
933
|
+
expect(fx.emitted.filter(e => e.kind === 'silence_poke_fired').map(e => (e as { level: string }).level))
|
|
934
|
+
.toEqual(['soft', 'firm'])
|
|
935
|
+
expect(fx.emitted.some(e => e.kind === 'silence_fallback_sent')).toBe(true)
|
|
936
|
+
})
|
|
937
|
+
|
|
938
|
+
it('carries fallbackKind=thinking when a recent thinking event landed', () => {
|
|
939
|
+
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
940
|
+
startTurn('k', 0)
|
|
941
|
+
noteThinking('k', 45_000)
|
|
942
|
+
__tickForTests(60_000)
|
|
943
|
+
expect(fx.awarenessPings.length).toBe(1)
|
|
944
|
+
expect(fx.awarenessPings[0]!.fallbackKind).toBe('thinking')
|
|
945
|
+
})
|
|
946
|
+
|
|
947
|
+
it('does not fire if turn ends before the threshold', () => {
|
|
948
|
+
const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
|
|
949
|
+
startTurn('k', 0)
|
|
950
|
+
endTurn('k')
|
|
951
|
+
__tickForTests(120_000)
|
|
952
|
+
expect(fx.awarenessPings.length).toBe(0)
|
|
953
|
+
})
|
|
954
|
+
|
|
955
|
+
it('handler errors do not break the timer', () => {
|
|
956
|
+
const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
|
|
957
|
+
__setDepsForTests({
|
|
958
|
+
emitMetric: (e) => fx.emitted.push(e),
|
|
959
|
+
onFrameworkFallback: () => {},
|
|
960
|
+
onAwarenessPing: () => { throw new Error('awareness handler boom') },
|
|
961
|
+
thresholdsMs: {
|
|
962
|
+
...DEFAULT_THRESHOLDS,
|
|
963
|
+
ack: Number.MAX_SAFE_INTEGER,
|
|
964
|
+
awarenessPing: 60_000,
|
|
965
|
+
},
|
|
966
|
+
})
|
|
967
|
+
startTurn('k', 0)
|
|
968
|
+
expect(() => __tickForTests(60_000)).not.toThrow()
|
|
969
|
+
// Telemetry still emitted
|
|
970
|
+
expect(fx.emitted.some(e => e.kind === 'awareness_ping_sent')).toBe(true)
|
|
971
|
+
})
|
|
972
|
+
})
|