switchroom 0.13.55 → 0.13.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +80 -80
- package/dist/auth-broker/index.js +80 -80
- package/dist/cli/ack-first-pretool.mjs +75 -0
- package/dist/cli/drive-write-pretool.mjs +10 -10
- package/dist/cli/notion-write-pretool.mjs +90 -84
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +367 -358
- package/dist/host-control/main.js +148 -148
- package/dist/vault/approvals/kernel-server.js +82 -82
- package/dist/vault/broker/server.js +83 -83
- package/package.json +1 -1
- package/skills/notion/SKILL.md +13 -9
- package/telegram-plugin/ack-flag.ts +66 -0
- package/telegram-plugin/dist/bridge/bridge.js +112 -112
- package/telegram-plugin/dist/gateway/gateway.js +991 -601
- package/telegram-plugin/dist/server.js +160 -160
- package/telegram-plugin/gateway/gateway.ts +151 -1
- package/telegram-plugin/runtime-metrics.ts +17 -0
- package/telegram-plugin/silence-poke.ts +82 -0
- package/telegram-plugin/tests/ack-flag.test.ts +65 -0
- package/telegram-plugin/tests/post-fallback-outbound-count.test.ts +78 -0
- package/telegram-plugin/tests/silence-poke.test.ts +117 -7
- package/telegram-plugin/tests/tool-intent-surface.test.ts +128 -0
- package/telegram-plugin/tool-intent-surface.ts +155 -0
|
@@ -53,6 +53,7 @@ import { OutboundDedupCache } from '../recent-outbound-dedup.js'
|
|
|
53
53
|
import { createInboundCoalescer, inboundCoalesceKey } from './inbound-coalesce.js'
|
|
54
54
|
import { StatusReactionController } from '../status-reactions.js'
|
|
55
55
|
import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
|
|
56
|
+
import { deriveIntentSurface } from '../tool-intent-surface.js'
|
|
56
57
|
import { toolLabel } from '../tool-labels.js'
|
|
57
58
|
import { createTypingWrapper } from '../typing-wrap.js'
|
|
58
59
|
import { type DraftStreamHandle } from '../draft-stream.js'
|
|
@@ -80,6 +81,7 @@ import { classifyInbound } from '../inbound-classifier.js'
|
|
|
80
81
|
import * as silencePoke from '../silence-poke.js'
|
|
81
82
|
import * as pendingProgress from '../pending-work-progress.js'
|
|
82
83
|
import { writeSilentEndState, clearSilentEndState, recordUndeliveredTurnEnd } from '../silent-end.js'
|
|
84
|
+
import { markAckSent, clearAckSent } from '../ack-flag.js'
|
|
83
85
|
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
84
86
|
import { createAnswerStream, type AnswerStreamHandle } from '../answer-stream.js'
|
|
85
87
|
import { type SessionEvent } from '../session-tail.js'
|
|
@@ -1290,6 +1292,15 @@ type CurrentTurn = {
|
|
|
1290
1292
|
// Phase 1 of #332: count of tool_use events in the current turn, for
|
|
1291
1293
|
// the tool_call_count column in the turns registry.
|
|
1292
1294
|
toolCallCount: number
|
|
1295
|
+
// Tool-intent surface (the human-feel UX follow-up to #1921's
|
|
1296
|
+
// PreToolUse gate). When the model emits its first non-reply tool_use
|
|
1297
|
+
// of a turn AND no outbound has happened yet, the gateway lifts the
|
|
1298
|
+
// tool's already-formed intent (name + input → `toolLabel()`) into a
|
|
1299
|
+
// user-visible "<i>running</i>: ls -la /var/log" message. One-shot
|
|
1300
|
+
// per turn — subsequent tool_use events stay quiet so a multi-tool
|
|
1301
|
+
// turn doesn't spam. The model never has to call reply just to ack;
|
|
1302
|
+
// its own intent stream IS the ack source.
|
|
1303
|
+
intentSurfaceFired: boolean
|
|
1293
1304
|
// Issue #195 — answer-lane streaming. Lazily created on the first text
|
|
1294
1305
|
// event of a turn (once enough text has accumulated, the stream itself
|
|
1295
1306
|
// gates on minInitialChars). Materialized and cleared at turn_end.
|
|
@@ -3321,6 +3332,43 @@ silencePoke.startTimer({
|
|
|
3321
3332
|
// Re-emit through the unified runtime-metrics fan-out (PostHog + JSONL).
|
|
3322
3333
|
emitRuntimeMetric(event)
|
|
3323
3334
|
},
|
|
3335
|
+
onAwarenessPing: async (ctx) => {
|
|
3336
|
+
// Early framework-owned awareness signal (~60s) so the user never
|
|
3337
|
+
// faces a silent chat while the model is busy / held / thinking.
|
|
3338
|
+
// Distinct from the 300s onFrameworkFallback: fires earlier, sends
|
|
3339
|
+
// a SILENT message (disable_notification: true — ambient liveness,
|
|
3340
|
+
// not a device buzz), and is bounded to ONE per turn by the silence-
|
|
3341
|
+
// poke module's `awarenessPingFired` flag. Reuses
|
|
3342
|
+
// `formatFrameworkFallbackText` so the wording stays consistent and
|
|
3343
|
+
// in-flight tools are named when known. If the model has been
|
|
3344
|
+
// silent long enough to cross 300s, the heavier framework_fallback
|
|
3345
|
+
// escalates with a notification.
|
|
3346
|
+
//
|
|
3347
|
+
// Late-fire guard mirrors the framework_fallback handler: skip if
|
|
3348
|
+
// the turn ended cleanly between the silence-poke arming and this
|
|
3349
|
+
// timer-fired handler so we don't talk over a clean response.
|
|
3350
|
+
if (activeTurnStartedAt.get(ctx.key) == null && currentTurn == null) {
|
|
3351
|
+
return
|
|
3352
|
+
}
|
|
3353
|
+
const text = silencePoke.formatFrameworkFallbackText(
|
|
3354
|
+
ctx.fallbackKind,
|
|
3355
|
+
ctx.silenceMs,
|
|
3356
|
+
ctx.inFlightTools,
|
|
3357
|
+
)
|
|
3358
|
+
try {
|
|
3359
|
+
await robustApiCall(
|
|
3360
|
+
() => bot.api.sendMessage(ctx.chatId, text, {
|
|
3361
|
+
...(ctx.threadId != null ? { message_thread_id: ctx.threadId } : {}),
|
|
3362
|
+
disable_notification: true,
|
|
3363
|
+
}),
|
|
3364
|
+
{ chat_id: ctx.chatId, ...(ctx.threadId != null ? { threadId: ctx.threadId } : {}) },
|
|
3365
|
+
)
|
|
3366
|
+
} catch (err) {
|
|
3367
|
+
process.stderr.write(
|
|
3368
|
+
`silence-poke awareness-ping sendMessage failed chat=${ctx.chatId} thread=${ctx.threadId}: ${err}\n`,
|
|
3369
|
+
)
|
|
3370
|
+
}
|
|
3371
|
+
},
|
|
3324
3372
|
onFrameworkFallback: async (ctx) => {
|
|
3325
3373
|
// Late-fire short-circuit (2026-05-23 audit finding). The fallback
|
|
3326
3374
|
// can race a clean turn-end: the model's actual reply lands inside
|
|
@@ -3429,7 +3477,18 @@ silencePoke.startTimer({
|
|
|
3429
3477
|
longest_silent_gap_ms: outboundMetrics.longestOutboundGapMs,
|
|
3430
3478
|
ended_via: 'framework_fallback',
|
|
3431
3479
|
})
|
|
3432
|
-
signalTracker.
|
|
3480
|
+
// #1892-follow-up: do NOT clear signalTracker state here. When the
|
|
3481
|
+
// model recovers post-fallback (the framework's user-visible
|
|
3482
|
+
// "still working" message is the load-bearing unwedge primitive —
|
|
3483
|
+
// see `project_silence_poke_broken_and_cross_turn_fix`), its late
|
|
3484
|
+
// reply calls fire `signalTracker.noteOutbound(fbKey, ...)`. If
|
|
3485
|
+
// state is already cleared, that's a silent no-op and the late
|
|
3486
|
+
// reply is invisible to outbound_count + ttfo metrics — the
|
|
3487
|
+
// canonical session-end path (silent-marker line 7407 or normal
|
|
3488
|
+
// turn-end line 7502) emits turn_ended a second time, reading
|
|
3489
|
+
// the empty state and reporting outbound_count=0 even though
|
|
3490
|
+
// replies landed. Defer clear to the canonical paths so
|
|
3491
|
+
// post-fallback recoveries are correctly counted.
|
|
3433
3492
|
}
|
|
3434
3493
|
// Stamp the turn-DB end row as `timeout` so the wedged turn doesn't
|
|
3435
3494
|
// stay open until a SIGTERM/restart relabels it (false-negative for
|
|
@@ -4896,6 +4955,16 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
4896
4955
|
// silence-poke clock so the next poke is measured from this send.
|
|
4897
4956
|
signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
4898
4957
|
silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
4958
|
+
// Ack-first gate (`reference/conversational-pacing.md` beat 1):
|
|
4959
|
+
// touch the state-dir flag so the ack-first-pretool hook lets
|
|
4960
|
+
// subsequent non-reply tool calls through this turn. Cleared at
|
|
4961
|
+
// turn_started. Best-effort — a write failure shouldn't break
|
|
4962
|
+
// reply, and the hook is kill-switched anyway.
|
|
4963
|
+
try {
|
|
4964
|
+
markAckSent()
|
|
4965
|
+
} catch (err) {
|
|
4966
|
+
process.stderr.write(`telegram gateway: markAckSent failed: ${err}\n`)
|
|
4967
|
+
}
|
|
4899
4968
|
// #1741 — only clear silent-end state on a plausibly-final reply.
|
|
4900
4969
|
// An interim ack (disable_notification:true, short text, no done)
|
|
4901
4970
|
// must NOT clear the state file; otherwise a turn that ends with
|
|
@@ -5491,6 +5560,13 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
|
|
|
5491
5560
|
const sKey = statusKey(streamChatId, streamThreadId)
|
|
5492
5561
|
signalTracker.noteOutbound(sKey, Date.now())
|
|
5493
5562
|
silencePoke.noteOutbound(sKey, Date.now())
|
|
5563
|
+
// Ack-first gate: stream_reply's first emit also unlocks subsequent
|
|
5564
|
+
// tool calls. See ack-flag.ts + ack-first-pretool.ts.
|
|
5565
|
+
try {
|
|
5566
|
+
markAckSent()
|
|
5567
|
+
} catch (err) {
|
|
5568
|
+
process.stderr.write(`telegram gateway: markAckSent (stream_reply) failed: ${err}\n`)
|
|
5569
|
+
}
|
|
5494
5570
|
// #1741 — see executeReply for the rationale: only a plausibly-
|
|
5495
5571
|
// final stream_reply clears the silent-end state. An interim
|
|
5496
5572
|
// ack via stream_reply must NOT clear; the Stop hook needs
|
|
@@ -6725,6 +6801,14 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6725
6801
|
statusKey(ev.chatId, enqThreadId),
|
|
6726
6802
|
'handback',
|
|
6727
6803
|
)
|
|
6804
|
+
// Ack-first gate (`reference/conversational-pacing.md` beat 1):
|
|
6805
|
+
// wipe the prior turn's `ack-sent.flag` so the ack-first-
|
|
6806
|
+
// pretool hook re-arms for this fresh turn. Centralised HERE
|
|
6807
|
+
// (not in handleInbound) because `enqueue` is the single
|
|
6808
|
+
// canonical fresh-turn atom — fires for real inbounds, cron
|
|
6809
|
+
// fires, subagent-handback channel wakes, vault-grant resumes,
|
|
6810
|
+
// and restart markers alike. Best-effort — see ack-flag.ts.
|
|
6811
|
+
clearAckSent()
|
|
6728
6812
|
}
|
|
6729
6813
|
if (ev.chatId) {
|
|
6730
6814
|
// Issue #195: if a previous turn left an answer-lane stream open
|
|
@@ -6758,6 +6842,7 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6758
6842
|
lastAssistantMsgId: null,
|
|
6759
6843
|
lastAssistantDone: false,
|
|
6760
6844
|
toolCallCount: 0,
|
|
6845
|
+
intentSurfaceFired: false,
|
|
6761
6846
|
answerStream: null,
|
|
6762
6847
|
isDm: isDmChatId(ev.chatId),
|
|
6763
6848
|
}
|
|
@@ -6875,6 +6960,66 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
6875
6960
|
turn.orphanedReplyTimeoutId = null
|
|
6876
6961
|
}
|
|
6877
6962
|
}
|
|
6963
|
+
// Tool-intent surface — companion to the PreToolUse ack-first gate
|
|
6964
|
+
// (#1921). On the FIRST non-reply tool_use of a turn AND only when
|
|
6965
|
+
// no outbound has happened yet, the gateway lifts the model's tool
|
|
6966
|
+
// intent (name + input → `toolLabel()`) into a brief framework-voice
|
|
6967
|
+
// status: `<i>running:</i> ls -la /var/log`. The model never has to
|
|
6968
|
+
// call reply just to ack — its own intent stream IS the ack. The
|
|
6969
|
+
// gate continues to fire IN PARALLEL: if it produces a model-voice
|
|
6970
|
+
// ack first (`replyCalled=true`), the surface stays quiet by the
|
|
6971
|
+
// condition below. One-shot per turn.
|
|
6972
|
+
if (
|
|
6973
|
+
!turn.replyCalled
|
|
6974
|
+
&& !turn.intentSurfaceFired
|
|
6975
|
+
&& !isTelegramSurfaceTool(name)
|
|
6976
|
+
) {
|
|
6977
|
+
turn.intentSurfaceFired = true
|
|
6978
|
+
// `ev.input` is the canonical SessionEvent property
|
|
6979
|
+
// (`telegram-plugin/session-tail.ts:95`). All other tool_use
|
|
6980
|
+
// sites in this file use `ev.input` — keep that consistent.
|
|
6981
|
+
const surface = deriveIntentSurface(name, ev.input, ev.precomputedLabel)
|
|
6982
|
+
if (surface.text != null) {
|
|
6983
|
+
// Mark the ack-flag synchronously BEFORE the async send so a
|
|
6984
|
+
// PreToolUse ack-first hook (#1921) firing concurrently for this
|
|
6985
|
+
// same tool call sees the flag already present and allows the
|
|
6986
|
+
// tool through. The Telegram send is fire-and-forget; failure
|
|
6987
|
+
// is logged but does not block the model.
|
|
6988
|
+
try {
|
|
6989
|
+
markAckSent()
|
|
6990
|
+
} catch (err) {
|
|
6991
|
+
process.stderr.write(`telegram gateway: intent-surface markAckSent failed: ${err}\n`)
|
|
6992
|
+
}
|
|
6993
|
+
const surfaceChat = turn.sessionChatId
|
|
6994
|
+
const surfaceThread = turn.sessionThreadId
|
|
6995
|
+
const surfaceText = surface.text
|
|
6996
|
+
void (async () => {
|
|
6997
|
+
try {
|
|
6998
|
+
await robustApiCall(
|
|
6999
|
+
() => bot.api.sendMessage(surfaceChat, surfaceText, {
|
|
7000
|
+
...(surfaceThread != null ? { message_thread_id: surfaceThread } : {}),
|
|
7001
|
+
parse_mode: 'HTML',
|
|
7002
|
+
// Framework-narrating beat — silent, ambient, not a
|
|
7003
|
+
// device buzz. The user is meant to glance and know
|
|
7004
|
+
// the model is alive + on-task.
|
|
7005
|
+
disable_notification: true,
|
|
7006
|
+
}),
|
|
7007
|
+
{ chat_id: surfaceChat, ...(surfaceThread != null ? { threadId: surfaceThread } : {}), verb: 'intent-surface' },
|
|
7008
|
+
)
|
|
7009
|
+
// Deliberately NOT calling signalTracker.noteOutbound /
|
|
7010
|
+
// silencePoke.noteOutbound here — framework-owned
|
|
7011
|
+
// ambient messages are not model-author outbounds, so
|
|
7012
|
+
// they should not reset the TTFO clock or short-circuit
|
|
7013
|
+
// the silence-poke ladder. Mirrors the sibling
|
|
7014
|
+
// `onAwarenessPing` handler (silence-poke.ts:169
|
|
7015
|
+
// contract: "Caller must NOT call back into noteOutbound
|
|
7016
|
+
// for this — it's a framework-sourced message").
|
|
7017
|
+
} catch (err) {
|
|
7018
|
+
process.stderr.write(`telegram gateway: intent-surface send failed: ${err}\n`)
|
|
7019
|
+
}
|
|
7020
|
+
})()
|
|
7021
|
+
}
|
|
7022
|
+
}
|
|
6878
7023
|
if (!ctrl) return
|
|
6879
7024
|
if (isTelegramSurfaceTool(name)) return
|
|
6880
7025
|
ctrl.setTool(name)
|
|
@@ -8985,6 +9130,11 @@ async function handleInbound(
|
|
|
8985
9130
|
// the framework can nudge the model if it goes quiet past the
|
|
8986
9131
|
// soft / firm thresholds.
|
|
8987
9132
|
silencePoke.startTurn(statusKey(chat_id, messageThreadId), Date.now())
|
|
9133
|
+
// Ack-first gate clear is centralised in handleSessionEvent's
|
|
9134
|
+
// `enqueue` branch — that fires for EVERY fresh turn atom
|
|
9135
|
+
// (real inbound, cron, subagent-handback, vault-grant wake,
|
|
9136
|
+
// restart marker) so cron/handback turns also re-arm the gate.
|
|
9137
|
+
// See the call site under `case 'enqueue'` (~line 6794).
|
|
8988
9138
|
// #1445 cross-turn pending-async ambient. A new turn starting
|
|
8989
9139
|
// (user inbound, synthesised wake, or handback channel) is the
|
|
8990
9140
|
// signal that the model is about to re-engage — clear any
|
|
@@ -104,6 +104,23 @@ export type RuntimeMetricEvent =
|
|
|
104
104
|
fallback_kind: 'working' | 'thinking'
|
|
105
105
|
silence_ms: number
|
|
106
106
|
}
|
|
107
|
+
/**
|
|
108
|
+
* Awareness ping (~60s, default): framework-owned user-visible
|
|
109
|
+
* "still working… / still thinking…" message sent BEFORE the 300s
|
|
110
|
+
* fallback so the user never faces a silent chat for the full 5
|
|
111
|
+
* minutes. Silent (no device ping); one-shot per turn; suppressed
|
|
112
|
+
* by any outbound or sub-agent dispatch. A high rate is the
|
|
113
|
+
* diagnostic signal that frequent silences exist (held-inbound,
|
|
114
|
+
* extended-thinking, slow startup), and the rate of the heavier
|
|
115
|
+
* silence_fallback_sent that still follows tells us how many of
|
|
116
|
+
* those escalate all the way to 5 min.
|
|
117
|
+
*/
|
|
118
|
+
| {
|
|
119
|
+
kind: 'awareness_ping_sent'
|
|
120
|
+
key: string
|
|
121
|
+
fallback_kind: 'working' | 'thinking'
|
|
122
|
+
silence_ms: number
|
|
123
|
+
}
|
|
107
124
|
/**
|
|
108
125
|
* #1445 cross-turn pending-async ambient lifecycle. `started` fires
|
|
109
126
|
* when a turn ends with a captured anchor AND a pending Agent/Task/
|
|
@@ -80,6 +80,13 @@ export interface SilencePokeState {
|
|
|
80
80
|
* the ack nudge is specifically about the *first* outbound, so it
|
|
81
81
|
* never re-arms even after the model later goes quiet again. */
|
|
82
82
|
ackPokeFired: boolean
|
|
83
|
+
/** True once the early awareness-ping has fired this turn. One-shot:
|
|
84
|
+
* the user only needs one "we know it's slow" cue before the heavier
|
|
85
|
+
* 300s fallback escalates. Independent of the ack-poke (which targets
|
|
86
|
+
* the model via piggyback) — awareness-ping targets the user directly
|
|
87
|
+
* via Telegram, so it lands even during pure-thinking or held-inbound
|
|
88
|
+
* silences when no tool_result is available to piggyback on. */
|
|
89
|
+
awarenessPingFired: boolean
|
|
83
90
|
/** Wall-clock ms of last poke fire — used for poke-success latency. */
|
|
84
91
|
lastPokeFiredAt: number | null
|
|
85
92
|
/** #1292: in-flight tool calls keyed by toolUseId. Populated by
|
|
@@ -101,6 +108,14 @@ export interface ThresholdsMs {
|
|
|
101
108
|
* 75s `soft` threshold, which measures silence-since-last-outbound
|
|
102
109
|
* and is the wrong instrument for "you never said hello." */
|
|
103
110
|
ack: number
|
|
111
|
+
/** Awareness ping: if NO outbound has landed this many ms after turn
|
|
112
|
+
* start, send a framework-owned user-visible "still working…" status
|
|
113
|
+
* message directly via Telegram. Sits between the model-targeted ack
|
|
114
|
+
* poke (10s) and the 300s framework_fallback. Lands even during pure
|
|
115
|
+
* extended-thinking or held-inbound silences when no tool_result is
|
|
116
|
+
* available to piggyback the model-targeted pokes onto. One-shot per
|
|
117
|
+
* turn — the 300s fallback handles further escalation if still silent. */
|
|
118
|
+
awarenessPing: number
|
|
104
119
|
soft: number
|
|
105
120
|
firm: number
|
|
106
121
|
fallback: number
|
|
@@ -112,6 +127,7 @@ export interface ThresholdsMs {
|
|
|
112
127
|
|
|
113
128
|
export const DEFAULT_THRESHOLDS: ThresholdsMs = {
|
|
114
129
|
ack: 10_000,
|
|
130
|
+
awarenessPing: 60_000,
|
|
115
131
|
soft: 75_000,
|
|
116
132
|
firm: 180_000,
|
|
117
133
|
fallback: 300_000,
|
|
@@ -145,6 +161,7 @@ export type SilencePokeMetric =
|
|
|
145
161
|
| { kind: 'silence_poke_fired'; key: string; level: PokeLevel; silence_ms: number; subagent_wait: boolean }
|
|
146
162
|
| { kind: 'silence_poke_succeeded'; key: string; level: PokeLevel; latency_ms: number }
|
|
147
163
|
| { kind: 'silence_fallback_sent'; key: string; fallback_kind: 'working' | 'thinking'; silence_ms: number }
|
|
164
|
+
| { kind: 'awareness_ping_sent'; key: string; fallback_kind: 'working' | 'thinking'; silence_ms: number }
|
|
148
165
|
|
|
149
166
|
export interface SilencePokeDeps {
|
|
150
167
|
/** Called when the 300s fallback fires. Caller sends the user-visible
|
|
@@ -153,6 +170,12 @@ export interface SilencePokeDeps {
|
|
|
153
170
|
* not a model-sourced one, and we want pokes to continue (well, no,
|
|
154
171
|
* fallbackFired ensures only one per turn anyway). */
|
|
155
172
|
onFrameworkFallback: (ctx: FrameworkFallbackContext) => Promise<void> | void
|
|
173
|
+
/** Called when the awareness ping fires (default 60s). Caller sends
|
|
174
|
+
* a user-visible "still working…" message — silent (no device ping),
|
|
175
|
+
* framework-owned, one-shot per turn. Reuses the same context shape
|
|
176
|
+
* as onFrameworkFallback so the gateway can reuse `formatFrameworkFallbackText`
|
|
177
|
+
* and the in-flight-tool enrichment. */
|
|
178
|
+
onAwarenessPing: (ctx: FrameworkFallbackContext) => Promise<void> | void
|
|
156
179
|
/** Telemetry sink for poke events. */
|
|
157
180
|
emitMetric: (event: SilencePokeMetric) => void
|
|
158
181
|
/** Threshold overrides (tests). */
|
|
@@ -188,6 +211,7 @@ export function startTurn(key: string, now: number): void {
|
|
|
188
211
|
lastThinkingAt: null,
|
|
189
212
|
fallbackFired: false,
|
|
190
213
|
ackPokeFired: false,
|
|
214
|
+
awarenessPingFired: false,
|
|
191
215
|
lastPokeFiredAt: null,
|
|
192
216
|
inFlightTools: new Map(),
|
|
193
217
|
})
|
|
@@ -485,6 +509,64 @@ function tick(now: number): void {
|
|
|
485
509
|
continue
|
|
486
510
|
}
|
|
487
511
|
|
|
512
|
+
// Awareness ping — framework-owned user-visible status BEFORE the
|
|
513
|
+
// 300s heavy fallback. Lands at ~60s even when the model is in pure
|
|
514
|
+
// extended-thinking or the inbound is held (#1892 follow-up), since
|
|
515
|
+
// it's delivered directly via the gateway → Telegram, not
|
|
516
|
+
// piggybacked on tool_result. One-shot per turn; suppressed if any
|
|
517
|
+
// outbound has happened. The 300s fallback is unchanged and
|
|
518
|
+
// escalates further if silence persists.
|
|
519
|
+
//
|
|
520
|
+
// Independent of pokesFired so soft/firm/fallback still escalate on
|
|
521
|
+
// their own schedule. Independent of ackPokeFired so a long-running
|
|
522
|
+
// turn that already received the ack-poke (then went silent again)
|
|
523
|
+
// still gets the user-facing awareness ping.
|
|
524
|
+
if (
|
|
525
|
+
!s.awarenessPingFired
|
|
526
|
+
&& s.lastOutboundAt == null
|
|
527
|
+
&& !s.subagentDispatchActive
|
|
528
|
+
&& silence >= thresholds.awarenessPing
|
|
529
|
+
) {
|
|
530
|
+
s.awarenessPingFired = true
|
|
531
|
+
const { chatId, threadId } = parseKey(key)
|
|
532
|
+
const recentThinking = s.lastThinkingAt != null
|
|
533
|
+
&& (now - s.lastThinkingAt) < 30_000
|
|
534
|
+
const fallbackKind: 'working' | 'thinking' = recentThinking ? 'thinking' : 'working'
|
|
535
|
+
const inFlightTools: ToolSnapshot[] = Array.from(s.inFlightTools.values())
|
|
536
|
+
.sort((a, b) => a.startedAt - b.startedAt)
|
|
537
|
+
.map(t => ({
|
|
538
|
+
name: t.name,
|
|
539
|
+
label: t.label,
|
|
540
|
+
durationMs: now - t.startedAt,
|
|
541
|
+
}))
|
|
542
|
+
activeDeps.emitMetric({
|
|
543
|
+
kind: 'awareness_ping_sent',
|
|
544
|
+
key,
|
|
545
|
+
fallback_kind: fallbackKind,
|
|
546
|
+
silence_ms: silence,
|
|
547
|
+
})
|
|
548
|
+
try {
|
|
549
|
+
const ret = activeDeps.onAwarenessPing({
|
|
550
|
+
key,
|
|
551
|
+
chatId,
|
|
552
|
+
threadId,
|
|
553
|
+
fallbackKind,
|
|
554
|
+
silenceMs: silence,
|
|
555
|
+
inFlightTools,
|
|
556
|
+
})
|
|
557
|
+
if (ret != null && typeof (ret as Promise<unknown>).then === 'function') {
|
|
558
|
+
;(ret as Promise<unknown>).catch(err => {
|
|
559
|
+
process.stderr.write(`silence-poke: awareness-ping handler rejected: ${err}\n`)
|
|
560
|
+
})
|
|
561
|
+
}
|
|
562
|
+
} catch (err) {
|
|
563
|
+
process.stderr.write(`silence-poke: awareness-ping handler threw: ${err}\n`)
|
|
564
|
+
}
|
|
565
|
+
// Don't `continue` — soft/firm/fallback can still arm in the same tick
|
|
566
|
+
// if their thresholds have also been crossed. Awareness-ping is a
|
|
567
|
+
// sibling signal, not part of the ladder.
|
|
568
|
+
}
|
|
569
|
+
|
|
488
570
|
if (s.pokesFired === 0 && silence >= softThreshold) {
|
|
489
571
|
s.pokeArmed = { level: 'soft' }
|
|
490
572
|
s.pokesFired = 1
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
2
|
+
import { mkdtempSync, rmSync, existsSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
|
|
6
|
+
import { markAckSent, clearAckSent, ACK_SENT_MARKER } from "../ack-flag.js";
|
|
7
|
+
|
|
8
|
+
describe("ack-flag — gateway-side state file for ack-first hook", () => {
|
|
9
|
+
let stateDir: string;
|
|
10
|
+
let prevEnv: string | undefined;
|
|
11
|
+
|
|
12
|
+
beforeEach(() => {
|
|
13
|
+
stateDir = mkdtempSync(join(tmpdir(), "ack-flag-"));
|
|
14
|
+
prevEnv = process.env.TELEGRAM_STATE_DIR;
|
|
15
|
+
process.env.TELEGRAM_STATE_DIR = stateDir;
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
afterEach(() => {
|
|
19
|
+
if (prevEnv != null) process.env.TELEGRAM_STATE_DIR = prevEnv;
|
|
20
|
+
else delete process.env.TELEGRAM_STATE_DIR;
|
|
21
|
+
rmSync(stateDir, { recursive: true, force: true });
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("markAckSent creates the marker file", () => {
|
|
25
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(false);
|
|
26
|
+
markAckSent();
|
|
27
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("markAckSent is idempotent (second call is a no-op)", () => {
|
|
31
|
+
markAckSent();
|
|
32
|
+
expect(() => markAckSent()).not.toThrow();
|
|
33
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("clearAckSent removes the marker file", () => {
|
|
37
|
+
markAckSent();
|
|
38
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
39
|
+
clearAckSent();
|
|
40
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(false);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("clearAckSent is idempotent (works when marker doesn't exist)", () => {
|
|
44
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(false);
|
|
45
|
+
expect(() => clearAckSent()).not.toThrow();
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("round-trip: mark → clear → mark cycles work across turns", () => {
|
|
49
|
+
// turn 1
|
|
50
|
+
markAckSent();
|
|
51
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
52
|
+
// turn end / next turn start
|
|
53
|
+
clearAckSent();
|
|
54
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(false);
|
|
55
|
+
// turn 2 first reply
|
|
56
|
+
markAckSent();
|
|
57
|
+
expect(existsSync(join(stateDir, ACK_SENT_MARKER))).toBe(true);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it("noop when TELEGRAM_STATE_DIR is unset (best-effort)", () => {
|
|
61
|
+
delete process.env.TELEGRAM_STATE_DIR;
|
|
62
|
+
expect(() => markAckSent()).not.toThrow();
|
|
63
|
+
expect(() => clearAckSent()).not.toThrow();
|
|
64
|
+
});
|
|
65
|
+
});
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
reset,
|
|
4
|
+
noteOutbound,
|
|
5
|
+
getOutboundMetrics,
|
|
6
|
+
clear,
|
|
7
|
+
__resetAllForTests,
|
|
8
|
+
} from '../turn-signal-tracker.js'
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* switchroom#1892-follow-up — regression guard for the post-fallback
|
|
12
|
+
* outbound under-count.
|
|
13
|
+
*
|
|
14
|
+
* Scenario reproduced from clerk's 2026-05-27 wedge:
|
|
15
|
+
* 1. Turn starts (signalTracker.reset)
|
|
16
|
+
* 2. Model goes silent for 300s — silence-poke ladder fires, then
|
|
17
|
+
* framework_fallback fires + emits turn_ended with outbound_count=0
|
|
18
|
+
* 3. Model recovers — sends 2 late reply tool calls → executeReply
|
|
19
|
+
* calls signalTracker.noteOutbound
|
|
20
|
+
* 4. Canonical silent-marker turn-end path runs → reads metrics
|
|
21
|
+
* again → emits final turn_ended
|
|
22
|
+
*
|
|
23
|
+
* Pre-fix (b6cd7e5...): framework_fallback called signalTracker.clear
|
|
24
|
+
* after emitting its turn_ended. Late noteOutbound calls hit cleared
|
|
25
|
+
* state and no-op. Silent-marker path re-read the empty default and
|
|
26
|
+
* reported outbound_count=0 again — late replies invisible to KPIs.
|
|
27
|
+
*
|
|
28
|
+
* Post-fix: framework_fallback no longer clears; only the canonical
|
|
29
|
+
* paths do. Late replies accumulate; the canonical turn_ended carries
|
|
30
|
+
* the correct count.
|
|
31
|
+
*/
|
|
32
|
+
beforeEach(() => {
|
|
33
|
+
__resetAllForTests()
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
describe('signal-tracker survives framework-fallback for late-reply accounting (#1892)', () => {
|
|
37
|
+
it('late noteOutbound after fallback (without clear) increments outbound_count', () => {
|
|
38
|
+
const key = 'chat:thread'
|
|
39
|
+
const turnStart = 1_000_000
|
|
40
|
+
reset(key, turnStart)
|
|
41
|
+
|
|
42
|
+
// Simulate framework_fallback at +300s — emits turn_ended with
|
|
43
|
+
// outbound_count=0 but DOES NOT clear (the fix).
|
|
44
|
+
const beforeFallback = getOutboundMetrics(key)
|
|
45
|
+
expect(beforeFallback.outboundCount).toBe(0)
|
|
46
|
+
expect(beforeFallback.ttfoMs).toBeNull()
|
|
47
|
+
|
|
48
|
+
// Model recovers post-fallback and sends two late replies.
|
|
49
|
+
noteOutbound(key, turnStart + 312_000) // first late reply at +312s
|
|
50
|
+
noteOutbound(key, turnStart + 358_000) // second late reply at +358s
|
|
51
|
+
|
|
52
|
+
// Canonical silent-marker turn-end reads metrics again and clears.
|
|
53
|
+
const afterRecovery = getOutboundMetrics(key)
|
|
54
|
+
expect(afterRecovery.outboundCount).toBe(2)
|
|
55
|
+
expect(afterRecovery.ttfoMs).toBe(312_000)
|
|
56
|
+
|
|
57
|
+
clear(key)
|
|
58
|
+
expect(getOutboundMetrics(key).outboundCount).toBe(0)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('regression guard: clearing state mid-turn would silently lose late outbounds', () => {
|
|
62
|
+
// The buggy pre-fix sequence — kept here as the inverse case so a
|
|
63
|
+
// future change that re-introduces clear-at-fallback fails this
|
|
64
|
+
// test loudly with the late-reply count going to zero.
|
|
65
|
+
const key = 'chat:thread'
|
|
66
|
+
reset(key, 1_000_000)
|
|
67
|
+
clear(key) // simulates the pre-fix framework_fallback clear
|
|
68
|
+
|
|
69
|
+
noteOutbound(key, 1_000_312_000) // late reply after the bogus clear
|
|
70
|
+
noteOutbound(key, 1_000_358_000)
|
|
71
|
+
|
|
72
|
+
const metrics = getOutboundMetrics(key)
|
|
73
|
+
// This is the pre-fix bug: late replies are invisible because
|
|
74
|
+
// noteOutbound is a no-op when state is missing.
|
|
75
|
+
expect(metrics.outboundCount).toBe(0)
|
|
76
|
+
expect(metrics.ttfoMs).toBeNull()
|
|
77
|
+
})
|
|
78
|
+
})
|