switchroom 0.13.4 → 0.13.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -45
- package/dist/agent-scheduler/index.js +85 -80
- package/dist/auth-broker/index.js +85 -80
- package/dist/cli/drive-write-pretool.mjs +10 -10
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +430 -360
- package/dist/host-control/main.js +501 -126
- package/dist/vault/approvals/kernel-server.js +88 -83
- package/dist/vault/broker/server.js +89 -84
- package/package.json +1 -1
- package/profiles/_shared/telegram-style.md.hbs +1 -1
- package/profiles/_shared/vault-protocol.md.hbs +12 -0
- package/telegram-plugin/dist/bridge/bridge.js +136 -112
- package/telegram-plugin/dist/gateway/gateway.js +255 -195
- package/telegram-plugin/dist/server.js +184 -160
- package/telegram-plugin/gateway/gateway.ts +46 -1
- package/telegram-plugin/model-unavailable.ts +4 -0
- package/telegram-plugin/runtime-metrics.ts +14 -8
- package/telegram-plugin/session-tail.ts +53 -0
- package/telegram-plugin/silence-poke.ts +49 -1
- package/telegram-plugin/tests/model-unavailable.test.ts +9 -0
- package/telegram-plugin/tests/operator-events-session-tail.test.ts +43 -0
- package/telegram-plugin/tests/silence-poke.test.ts +135 -3
- package/telegram-plugin/uat/scenarios/jtbd-fast-ack-dm.test.ts +217 -0
- package/telegram-plugin/uat/scenarios/jtbd-soft-commit-dm.test.ts +16 -11
|
@@ -264,7 +264,7 @@ import { createInboundSpool } from './inbound-spool.js'
|
|
|
264
264
|
import { purgeStaleTurnsForChat } from './turn-state-purge.js'
|
|
265
265
|
import { decideInboundDelivery } from './inbound-delivery-gate.js'
|
|
266
266
|
import { createPendingPermissionBuffer } from './pending-permission-decisions.js'
|
|
267
|
-
import { chatKey, chatKeyWithSuffix } from './chat-key.js'
|
|
267
|
+
import { chatKey, chatKeyWithSuffix, chatIdOfChatKey } from './chat-key.js'
|
|
268
268
|
// Phase 2b PR 2 — shadow mode. Each event-site below calls shadowEmit()
|
|
269
269
|
// to record what the InboundDeliveryStateMachine PREDICTS the gateway
|
|
270
270
|
// should do. Behavior unchanged in this PR — the imperative code below
|
|
@@ -1310,6 +1310,13 @@ function purgeReactionTracking(key: string, endingTurn?: CurrentTurn): void {
|
|
|
1310
1310
|
activeStatusReactions.delete(key)
|
|
1311
1311
|
activeReactionMsgIds.delete(key)
|
|
1312
1312
|
activeTurnStartedAt.delete(key)
|
|
1313
|
+
// Human-feel UX: stop the turn-long `typing…` indicator started in
|
|
1314
|
+
// the turn-start block. `purgeReactionTracking` is the canonical
|
|
1315
|
+
// turn-end, so this is the single owner of the stop. (If an abnormal
|
|
1316
|
+
// abort skips purge, the stray loop self-heals: the next turn on this
|
|
1317
|
+
// chat calls `startTurnTypingLoop`, which stops the old interval
|
|
1318
|
+
// first.)
|
|
1319
|
+
stopTurnTypingLoop(chatIdOfChatKey(key as _ChatKey))
|
|
1313
1320
|
if (msgInfo) {
|
|
1314
1321
|
const agentDir = resolveAgentDirFromEnv()
|
|
1315
1322
|
if (agentDir != null) removeActiveReaction(agentDir, msgInfo.chatId, msgInfo.messageId)
|
|
@@ -1781,6 +1788,32 @@ function stopTypingLoop(chat_id: string): void {
|
|
|
1781
1788
|
if (retry) { clearTimeout(retry); typingRetryTimers.delete(chat_id) }
|
|
1782
1789
|
}
|
|
1783
1790
|
|
|
1791
|
+
// Turn-level `typing…` indicator. Deliberately a SEPARATE interval map
|
|
1792
|
+
// from `typingIntervals` (which the reply handler and the tool-use
|
|
1793
|
+
// typing wrapper share and freely stop). If the turn loop lived in the
|
|
1794
|
+
// shared map, a mid-turn reply's `finally { stopTypingLoop }` would
|
|
1795
|
+
// kill it and the chat would go dark for the rest of the turn — the
|
|
1796
|
+
// exact black-box gap this is here to close. A dedicated map makes the
|
|
1797
|
+
// turn loop structurally immune to those stops: only `stopTurnTypingLoop`
|
|
1798
|
+
// (called at the canonical turn-end) clears it. The redundant `typing`
|
|
1799
|
+
// pings while a reply is mid-flight are harmless — same action, and
|
|
1800
|
+
// sendChatAction is cheap.
|
|
1801
|
+
const turnTypingIntervals = new Map<string, ReturnType<typeof setInterval>>()
|
|
1802
|
+
|
|
1803
|
+
function startTurnTypingLoop(chat_id: string): void {
|
|
1804
|
+
stopTurnTypingLoop(chat_id)
|
|
1805
|
+
const send = () => {
|
|
1806
|
+
void bot.api.sendChatAction(chat_id, 'typing').catch(() => {})
|
|
1807
|
+
}
|
|
1808
|
+
send()
|
|
1809
|
+
turnTypingIntervals.set(chat_id, setInterval(send, 4000))
|
|
1810
|
+
}
|
|
1811
|
+
|
|
1812
|
+
function stopTurnTypingLoop(chat_id: string): void {
|
|
1813
|
+
const iv = turnTypingIntervals.get(chat_id)
|
|
1814
|
+
if (iv) { clearInterval(iv); turnTypingIntervals.delete(chat_id) }
|
|
1815
|
+
}
|
|
1816
|
+
|
|
1784
1817
|
const typingWrapper = createTypingWrapper({
|
|
1785
1818
|
startTypingLoop,
|
|
1786
1819
|
stopTypingLoop,
|
|
@@ -7563,6 +7596,16 @@ async function handleInbound(
|
|
|
7563
7596
|
// the framework can nudge the model if it goes quiet past the
|
|
7564
7597
|
// soft / firm thresholds.
|
|
7565
7598
|
silencePoke.startTurn(statusKey(chat_id, messageThreadId), Date.now())
|
|
7599
|
+
// Human-feel UX: hold a continuous `typing…` indicator for the
|
|
7600
|
+
// WHOLE turn, not just the split-second a reply is transmitted.
|
|
7601
|
+
// A person you message shows as typing the entire time they
|
|
7602
|
+
// compose; switchroom used to fire only one-shot ~5s pings, so
|
|
7603
|
+
// any turn that read a file or thought for a moment went dark
|
|
7604
|
+
// after 5s. Self-renews every 4s; stopped at the canonical
|
|
7605
|
+
// turn-end (`purgeReactionTracking → stopTurnTypingLoop`).
|
|
7606
|
+
// Deterministic, framework-owned, no prose — the mechanical
|
|
7607
|
+
// ambient layer of the pacing contract.
|
|
7608
|
+
startTurnTypingLoop(chat_id)
|
|
7566
7609
|
// #1122 KPI: emit turn_started so dashboards can compute funnel
|
|
7567
7610
|
// start counts + correlate to turn_ended for duration / TTFO.
|
|
7568
7611
|
emitRuntimeMetric({
|
|
@@ -14111,6 +14154,8 @@ async function shutdown(signal: string): Promise<void> {
|
|
|
14111
14154
|
|
|
14112
14155
|
for (const iv of [...typingIntervals.values()]) clearInterval(iv)
|
|
14113
14156
|
typingIntervals.clear()
|
|
14157
|
+
for (const iv of [...turnTypingIntervals.values()]) clearInterval(iv)
|
|
14158
|
+
turnTypingIntervals.clear()
|
|
14114
14159
|
for (const t of [...typingRetryTimers.values()]) clearTimeout(t)
|
|
14115
14160
|
typingRetryTimers.clear()
|
|
14116
14161
|
|
|
@@ -80,6 +80,10 @@ export function detectModelUnavailable(
|
|
|
80
80
|
'quota_exhausted',
|
|
81
81
|
'plan limit',
|
|
82
82
|
'subscription limit',
|
|
83
|
+
// Claude Code v2.1.x usage-limit wording: "You've hit your limit ·
|
|
84
|
+
// resets 8:50am (Australia/Melbourne)".
|
|
85
|
+
'hit your limit',
|
|
86
|
+
'hit the limit',
|
|
83
87
|
]
|
|
84
88
|
if (quotaSignals.some(s => lower.includes(s))) {
|
|
85
89
|
const resetAt = parseResetTime(sample)
|
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
import { existsSync, mkdirSync, appendFileSync } from 'node:fs'
|
|
20
20
|
import { dirname, join } from 'node:path'
|
|
21
21
|
import { captureEvent } from './analytics-posthog.js'
|
|
22
|
+
import type { PokeLevel } from './silence-poke.js'
|
|
22
23
|
|
|
23
24
|
export type RuntimeMetricEvent =
|
|
24
25
|
/**
|
|
@@ -62,28 +63,33 @@ export type RuntimeMetricEvent =
|
|
|
62
63
|
ended_via: 'reply' | 'stream_reply_done' | 'silent' | 'forced' | 'framework_fallback'
|
|
63
64
|
}
|
|
64
65
|
/**
|
|
65
|
-
* Framework safety-net: a silence-poke was armed
|
|
66
|
-
*
|
|
67
|
-
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
66
|
+
* Framework safety-net: a silence-poke was armed. `ack` is the early
|
|
67
|
+
* (~10s) ack-budget poke — the model has sent NOTHING this turn and is
|
|
68
|
+
* leaving the user on a silent chat. `soft` (75s) / `firm` (180s) are
|
|
69
|
+
* the silence-since-last-outbound ladder. The system-reminder appended
|
|
70
|
+
* to the next tool result nudges the model to send an update. Doubles
|
|
71
|
+
* as a design-health signal — if these fire frequently, the
|
|
72
|
+
* conversational-pacing prompt isn't doing its job.
|
|
70
73
|
*/
|
|
71
74
|
| {
|
|
72
75
|
kind: 'silence_poke_fired'
|
|
73
76
|
key: string
|
|
74
|
-
level:
|
|
77
|
+
level: PokeLevel
|
|
75
78
|
silence_ms: number
|
|
76
79
|
subagent_wait: boolean
|
|
77
80
|
}
|
|
78
81
|
/**
|
|
79
82
|
* The model sent an outbound message within the success window
|
|
80
83
|
* (default 15s) after a poke fired. Pair with `silence_poke_fired`
|
|
81
|
-
* to compute success rate — the design target is >80%.
|
|
84
|
+
* to compute success rate — the design target is >80%. (`ack`-level
|
|
85
|
+
* success is not currently emitted — the ack poke sits outside the
|
|
86
|
+
* `pokesFired` ladder noteOutbound measures against; the type admits
|
|
87
|
+
* `ack` only so the silence-poke metric union stays assignable.)
|
|
82
88
|
*/
|
|
83
89
|
| {
|
|
84
90
|
kind: 'silence_poke_succeeded'
|
|
85
91
|
key: string
|
|
86
|
-
level:
|
|
92
|
+
level: PokeLevel
|
|
87
93
|
latency_ms: number
|
|
88
94
|
}
|
|
89
95
|
/**
|
|
@@ -423,6 +423,33 @@ export function detectErrorInTranscriptLine(
|
|
|
423
423
|
|
|
424
424
|
const type = obj.type as string | undefined
|
|
425
425
|
|
|
426
|
+
// Claude Code (v2.1.x) records a usage-limit / API error as a
|
|
427
|
+
// SYNTHETIC ASSISTANT MESSAGE, not an api_error / error line:
|
|
428
|
+
// { type: "assistant",
|
|
429
|
+
// message: { role: "assistant",
|
|
430
|
+
// content: [{ type: "text", text: "You've hit your limit · resets …" }] },
|
|
431
|
+
// error: "rate_limit", isApiErrorMessage: true, apiErrorStatus: 429 }
|
|
432
|
+
// It has no `api_error`/`error` top-type and no nested error OBJECT
|
|
433
|
+
// (`error` is a bare string), so the structured checks below miss it
|
|
434
|
+
// entirely. That silent miss is what kept fleet auto-fallback from
|
|
435
|
+
// ever firing on a quota hit — the exhaustion signal never reached
|
|
436
|
+
// the operator-event path. Detect this shape explicitly.
|
|
437
|
+
if (obj.isApiErrorMessage === true) {
|
|
438
|
+
const status =
|
|
439
|
+
typeof obj.apiErrorStatus === 'number' ? obj.apiErrorStatus : null
|
|
440
|
+
const errStr = typeof obj.error === 'string' ? obj.error : ''
|
|
441
|
+
const text = extractAssistantText(obj)
|
|
442
|
+
// A 429 in this shape is a subscription usage-limit hit (it carries
|
|
443
|
+
// a reset time) — classify it quota-exhausted so the operator event
|
|
444
|
+
// resolves to an auto-fallback-eligible kind. Other statuses fall
|
|
445
|
+
// through to the shared classifier.
|
|
446
|
+
const kind: OperatorEventKind =
|
|
447
|
+
status === 429
|
|
448
|
+
? 'quota-exhausted'
|
|
449
|
+
: classifyClaudeError({ type: errStr, status, message: text })
|
|
450
|
+
return { kind, raw: obj, detail: text || errStr || 'api error' }
|
|
451
|
+
}
|
|
452
|
+
|
|
426
453
|
// Explicit error line types from Claude Code JSONL
|
|
427
454
|
const isErrorLine = type === 'api_error' || type === 'error'
|
|
428
455
|
|
|
@@ -454,6 +481,32 @@ function extractDetailMessage(obj: Record<string, unknown> | null): string | nul
|
|
|
454
481
|
return typeof msg === 'string' && msg.length > 0 ? msg : null
|
|
455
482
|
}
|
|
456
483
|
|
|
484
|
+
/**
|
|
485
|
+
* Pull the human-readable text out of a synthetic assistant message
|
|
486
|
+
* (`message.content[].text`, joined). Used for the v2.1.x
|
|
487
|
+
* `isApiErrorMessage` shape, where the user-facing error string lives
|
|
488
|
+
* inside the assistant message rather than in an `error` object.
|
|
489
|
+
* Returns '' for any non-conforming shape — never throws.
|
|
490
|
+
*/
|
|
491
|
+
function extractAssistantText(obj: Record<string, unknown>): string {
|
|
492
|
+
const message = obj.message
|
|
493
|
+
if (typeof message !== 'object' || message == null) return ''
|
|
494
|
+
const content = (message as Record<string, unknown>).content
|
|
495
|
+
if (!Array.isArray(content)) return ''
|
|
496
|
+
const parts: string[] = []
|
|
497
|
+
for (const block of content) {
|
|
498
|
+
if (
|
|
499
|
+
typeof block === 'object'
|
|
500
|
+
&& block != null
|
|
501
|
+
&& (block as Record<string, unknown>).type === 'text'
|
|
502
|
+
) {
|
|
503
|
+
const t = (block as Record<string, unknown>).text
|
|
504
|
+
if (typeof t === 'string') parts.push(t)
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
return parts.join(' ').trim()
|
|
508
|
+
}
|
|
509
|
+
|
|
457
510
|
// ─── The tail watcher ─────────────────────────────────────────────────────
|
|
458
511
|
|
|
459
512
|
/** Emitted to onOperatorEvent when the tail detects a Claude API error. */
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
* pacing prompt still applies; only the framework safety net is off.
|
|
44
44
|
*/
|
|
45
45
|
|
|
46
|
-
export type PokeLevel = 'soft' | 'firm'
|
|
46
|
+
export type PokeLevel = 'ack' | 'soft' | 'firm'
|
|
47
47
|
|
|
48
48
|
/** #1292: snapshot of an in-flight tool call, surfaced in the 300s
|
|
49
49
|
* framework-fallback message so the user sees the actual observable
|
|
@@ -76,6 +76,10 @@ export interface SilencePokeState {
|
|
|
76
76
|
lastThinkingAt: number | null
|
|
77
77
|
/** True once the 300s framework fallback has fired this turn. */
|
|
78
78
|
fallbackFired: boolean
|
|
79
|
+
/** True once the early ack-budget poke has fired this turn. One-shot:
|
|
80
|
+
* the ack nudge is specifically about the *first* outbound, so it
|
|
81
|
+
* never re-arms even after the model later goes quiet again. */
|
|
82
|
+
ackPokeFired: boolean
|
|
79
83
|
/** Wall-clock ms of last poke fire — used for poke-success latency. */
|
|
80
84
|
lastPokeFiredAt: number | null
|
|
81
85
|
/** #1292: in-flight tool calls keyed by toolUseId. Populated by
|
|
@@ -91,6 +95,12 @@ export interface SilencePokeState {
|
|
|
91
95
|
}
|
|
92
96
|
|
|
93
97
|
export interface ThresholdsMs {
|
|
98
|
+
/** Ack budget: if NO outbound at all has landed this many ms after
|
|
99
|
+
* turn start, arm an 'ack' poke. This is the framework enforcing the
|
|
100
|
+
* human-baseline "acknowledge within a beat" — far tighter than the
|
|
101
|
+
* 75s `soft` threshold, which measures silence-since-last-outbound
|
|
102
|
+
* and is the wrong instrument for "you never said hello." */
|
|
103
|
+
ack: number
|
|
94
104
|
soft: number
|
|
95
105
|
firm: number
|
|
96
106
|
fallback: number
|
|
@@ -101,6 +111,7 @@ export interface ThresholdsMs {
|
|
|
101
111
|
}
|
|
102
112
|
|
|
103
113
|
export const DEFAULT_THRESHOLDS: ThresholdsMs = {
|
|
114
|
+
ack: 10_000,
|
|
104
115
|
soft: 75_000,
|
|
105
116
|
firm: 180_000,
|
|
106
117
|
fallback: 300_000,
|
|
@@ -176,6 +187,7 @@ export function startTurn(key: string, now: number): void {
|
|
|
176
187
|
subagentDispatchActive: false,
|
|
177
188
|
lastThinkingAt: null,
|
|
178
189
|
fallbackFired: false,
|
|
190
|
+
ackPokeFired: false,
|
|
179
191
|
lastPokeFiredAt: null,
|
|
180
192
|
inFlightTools: new Map(),
|
|
181
193
|
})
|
|
@@ -340,6 +352,16 @@ export function endTurn(key: string): void {
|
|
|
340
352
|
|
|
341
353
|
/** Verbatim poke text. Wording is load-bearing — see issue #1122 design. */
|
|
342
354
|
export function formatPokeText(level: PokeLevel): string {
|
|
355
|
+
if (level === 'ack') {
|
|
356
|
+
return (
|
|
357
|
+
"[silence-poke] You haven't sent the user anything yet this turn — "
|
|
358
|
+
+ 'they are looking at a silent chat. Send a short, human one-line '
|
|
359
|
+
+ 'acknowledgement now via `reply` (e.g. "on it — checking"), in your '
|
|
360
|
+
+ "persona's voice, before you do any more work. A good colleague "
|
|
361
|
+
+ "answers in a beat; don't leave the message hanging while you think. "
|
|
362
|
+
+ 'If the full answer is genuinely seconds away, send that instead.'
|
|
363
|
+
)
|
|
364
|
+
}
|
|
343
365
|
if (level === 'soft') {
|
|
344
366
|
return (
|
|
345
367
|
"[silence-poke] You've been silent to the user for 75s. If you're "
|
|
@@ -437,6 +459,32 @@ function tick(now: number): void {
|
|
|
437
459
|
? thresholds.subagentSoft
|
|
438
460
|
: thresholds.soft
|
|
439
461
|
|
|
462
|
+
// Ack budget — the framework enforcing the human-baseline "answer
|
|
463
|
+
// in a beat." Fires once, only when NOTHING has been sent this turn
|
|
464
|
+
// (`lastOutboundAt == null`), well before the 75s `soft` threshold.
|
|
465
|
+
// `soft` measures silence-since-last-outbound and is the wrong
|
|
466
|
+
// instrument for "you never acknowledged me." Independent of the
|
|
467
|
+
// soft/firm/fallback ladder: if the model never acks, it still
|
|
468
|
+
// escalates soft → firm → fallback on schedule after this.
|
|
469
|
+
if (
|
|
470
|
+
!s.ackPokeFired
|
|
471
|
+
&& s.lastOutboundAt == null
|
|
472
|
+
&& s.pokesFired === 0
|
|
473
|
+
&& silence >= thresholds.ack
|
|
474
|
+
) {
|
|
475
|
+
s.pokeArmed = { level: 'ack' }
|
|
476
|
+
s.ackPokeFired = true
|
|
477
|
+
s.lastPokeFiredAt = now
|
|
478
|
+
activeDeps.emitMetric({
|
|
479
|
+
kind: 'silence_poke_fired',
|
|
480
|
+
key,
|
|
481
|
+
level: 'ack',
|
|
482
|
+
silence_ms: silence,
|
|
483
|
+
subagent_wait: s.subagentDispatchActive,
|
|
484
|
+
})
|
|
485
|
+
continue
|
|
486
|
+
}
|
|
487
|
+
|
|
440
488
|
if (s.pokesFired === 0 && silence >= softThreshold) {
|
|
441
489
|
s.pokeArmed = { level: 'soft' }
|
|
442
490
|
s.pokesFired = 1
|
|
@@ -42,6 +42,15 @@ describe('detectModelUnavailable — quota / billing strings', () => {
|
|
|
42
42
|
it('classifies "quota exhausted" verbatim', () => {
|
|
43
43
|
expect(detectModelUnavailable('quota exhausted on slot main')?.kind).toBe('quota_exhausted')
|
|
44
44
|
})
|
|
45
|
+
|
|
46
|
+
it("classifies Claude Code v2.1.x 'You've hit your limit' wording", () => {
|
|
47
|
+
// The exact text claude writes inside the synthetic
|
|
48
|
+
// isApiErrorMessage assistant message on a subscription quota hit.
|
|
49
|
+
const d = detectModelUnavailable(
|
|
50
|
+
"You've hit your limit · resets 8:50am (Australia/Melbourne)",
|
|
51
|
+
)
|
|
52
|
+
expect(d?.kind).toBe('quota_exhausted')
|
|
53
|
+
})
|
|
45
54
|
})
|
|
46
55
|
|
|
47
56
|
describe('detectModelUnavailable — overload / 429 / 5xx strings', () => {
|
|
@@ -70,6 +70,49 @@ describe('detectErrorInTranscriptLine — error detection', () => {
|
|
|
70
70
|
expect(detectErrorInTranscriptLine(line)).toBeNull()
|
|
71
71
|
})
|
|
72
72
|
|
|
73
|
+
// Regression — the fleet-auto-failover dead-zone. Claude Code v2.1.x
|
|
74
|
+
// records a usage-limit hit as a synthetic assistant message with
|
|
75
|
+
// isApiErrorMessage:true (no api_error type, no nested error OBJECT).
|
|
76
|
+
// Pre-fix, detectErrorInTranscriptLine missed it entirely → the
|
|
77
|
+
// operator-event path never ran → fleet auto-fallback never fired.
|
|
78
|
+
it('detects the v2.1.x synthetic-assistant-message usage-limit shape', () => {
|
|
79
|
+
// The exact on-disk line shape, verbatim from a real quota hit.
|
|
80
|
+
const line = JSON.stringify({
|
|
81
|
+
type: 'assistant',
|
|
82
|
+
message: {
|
|
83
|
+
role: 'assistant',
|
|
84
|
+
model: '<synthetic>',
|
|
85
|
+
content: [
|
|
86
|
+
{
|
|
87
|
+
type: 'text',
|
|
88
|
+
text: "You've hit your limit · resets 8:50am (Australia/Melbourne)",
|
|
89
|
+
},
|
|
90
|
+
],
|
|
91
|
+
},
|
|
92
|
+
error: 'rate_limit',
|
|
93
|
+
isApiErrorMessage: true,
|
|
94
|
+
apiErrorStatus: 429,
|
|
95
|
+
})
|
|
96
|
+
const result = detectErrorInTranscriptLine(line)
|
|
97
|
+
expect(result).not.toBeNull()
|
|
98
|
+
// A 429 in this shape is a subscription usage-limit hit → must
|
|
99
|
+
// classify quota-exhausted so the operator event resolves to an
|
|
100
|
+
// auto-fallback-eligible kind.
|
|
101
|
+
expect(result!.kind).toBe('quota-exhausted')
|
|
102
|
+
// The user-facing text must survive into `detail` (the model-
|
|
103
|
+
// unavailable card + the text-pattern path both rely on it).
|
|
104
|
+
expect(result!.detail).toContain('hit your limit')
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
it('still returns null for a normal (non-error) assistant message', () => {
|
|
108
|
+
// No isApiErrorMessage flag → must NOT be treated as an error.
|
|
109
|
+
const line = JSON.stringify({
|
|
110
|
+
type: 'assistant',
|
|
111
|
+
message: { role: 'assistant', content: [{ type: 'text', text: 'Done.' }] },
|
|
112
|
+
})
|
|
113
|
+
expect(detectErrorInTranscriptLine(line)).toBeNull()
|
|
114
|
+
})
|
|
115
|
+
|
|
73
116
|
it('returns null for lines with null error field', () => {
|
|
74
117
|
const line = JSON.stringify({ type: 'assistant', error: null })
|
|
75
118
|
expect(detectErrorInTranscriptLine(line)).toBeNull()
|
|
@@ -33,7 +33,15 @@ function setupDeps(opts?: { thresholds?: Partial<typeof DEFAULT_THRESHOLDS> }):
|
|
|
33
33
|
__setDepsForTests({
|
|
34
34
|
emitMetric: (e) => fixtures.emitted.push(e),
|
|
35
35
|
onFrameworkFallback: (ctx) => { fixtures.fallbacks.push(ctx) },
|
|
36
|
-
|
|
36
|
+
// The ack budget (a new poke that fires *earlier* than `soft`) is
|
|
37
|
+
// disabled by default in this fixture so the soft/firm/fallback
|
|
38
|
+
// ladder tests stay isolated from it. The 'ack budget' describe
|
|
39
|
+
// block opts back in with a real value.
|
|
40
|
+
thresholdsMs: {
|
|
41
|
+
...DEFAULT_THRESHOLDS,
|
|
42
|
+
ack: Number.MAX_SAFE_INTEGER,
|
|
43
|
+
...(opts?.thresholds ?? {}),
|
|
44
|
+
},
|
|
37
45
|
})
|
|
38
46
|
return fixtures
|
|
39
47
|
}
|
|
@@ -139,6 +147,127 @@ describe('silence-poke — escalation ladder', () => {
|
|
|
139
147
|
})
|
|
140
148
|
})
|
|
141
149
|
|
|
150
|
+
// PR1 (human-feel UX epic): the ack budget. A person you message
|
|
151
|
+
// answers in a beat — the framework enforces that baseline by arming an
|
|
152
|
+
// 'ack' poke if NOTHING has been sent within `thresholds.ack` of turn
|
|
153
|
+
// start. It is a one-shot nudge (the model still authors every word),
|
|
154
|
+
// deliberately OUTSIDE the soft/firm/fallback `pokesFired` ladder: if
|
|
155
|
+
// the model never acks, the ladder still escalates on its own schedule.
|
|
156
|
+
// See `reference/conversational-pacing.md` and the "Open with an
|
|
157
|
+
// acknowledgement" bullet in `profiles/_shared/telegram-style.md.hbs`.
|
|
158
|
+
//
|
|
159
|
+
// NB: `setupDeps` disables the ack budget by default (ack = MAX_SAFE);
|
|
160
|
+
// every test here opts back in with a real `ack` threshold.
|
|
161
|
+
describe('silence-poke — ack budget (PR1 human-feel UX)', () => {
|
|
162
|
+
it('arms an ack poke at the ack threshold when nothing has been sent', () => {
|
|
163
|
+
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
164
|
+
startTurn('chat:0', 0)
|
|
165
|
+
|
|
166
|
+
__tickForTests(9_000) // before the ack budget
|
|
167
|
+
expect(consumeArmedPoke()).toBeNull()
|
|
168
|
+
expect(fx.emitted).toHaveLength(0)
|
|
169
|
+
|
|
170
|
+
__tickForTests(10_000) // at the ack budget
|
|
171
|
+
expect(fx.emitted).toEqual([
|
|
172
|
+
expect.objectContaining({ kind: 'silence_poke_fired', level: 'ack' }),
|
|
173
|
+
])
|
|
174
|
+
const text = consumeArmedPoke()
|
|
175
|
+
expect(text).toContain('[silence-poke]')
|
|
176
|
+
expect(text).toContain('reply')
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
it('does NOT arm an ack poke if an outbound landed before the budget', () => {
|
|
180
|
+
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
181
|
+
startTurn('chat:0', 0)
|
|
182
|
+
noteOutbound('chat:0', 3_000) // model acked fast — inside the budget
|
|
183
|
+
__tickForTests(10_000)
|
|
184
|
+
__tickForTests(20_000)
|
|
185
|
+
expect(consumeArmedPoke()).toBeNull()
|
|
186
|
+
expect(
|
|
187
|
+
fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'ack'),
|
|
188
|
+
).toHaveLength(0)
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
it('is one-shot — never re-arms even if the model goes quiet again', () => {
|
|
192
|
+
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
193
|
+
startTurn('chat:0', 0)
|
|
194
|
+
__tickForTests(10_000) // ack fires
|
|
195
|
+
consumeArmedPoke() // drain it
|
|
196
|
+
noteOutbound('chat:0', 12_000) // model finally acks
|
|
197
|
+
// The model goes quiet again. The ack poke is specifically about the
|
|
198
|
+
// FIRST outbound — it must not fire twice. A later silence is the
|
|
199
|
+
// soft poke's job, not the ack budget's.
|
|
200
|
+
__tickForTests(40_000)
|
|
201
|
+
expect(
|
|
202
|
+
fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'ack'),
|
|
203
|
+
).toHaveLength(1)
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
it('ackPokeFired resets across turns even when endTurn was skipped (CC-5 invariant)', () => {
|
|
207
|
+
// Mirrors the subagentDispatchActive CC-5 guard: `ackPokeFired` is a
|
|
208
|
+
// turn-scoped one-shot flag, and the only thing that keeps it from
|
|
209
|
+
// leaking into the next turn (when an abnormal abort skips endTurn)
|
|
210
|
+
// is startTurn's unconditional state overwrite. Pin that here so a
|
|
211
|
+
// future read-modify-write refactor of startTurn fails loud.
|
|
212
|
+
setupDeps({ thresholds: { ack: 10_000 } })
|
|
213
|
+
startTurn('k', 0)
|
|
214
|
+
__tickForTests(10_000) // ack fires
|
|
215
|
+
expect(__getStateForTests('k')?.ackPokeFired).toBe(true)
|
|
216
|
+
// Turn 2 in the same key, no endTurn — startTurn MUST clear the flag.
|
|
217
|
+
startTurn('k', 1_000_000)
|
|
218
|
+
expect(__getStateForTests('k')?.ackPokeFired).toBe(false)
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
it('does not advance the ladder — soft still requires a full 75s of silence', () => {
|
|
222
|
+
// The ack poke is deliberately outside `pokesFired`. After it fires,
|
|
223
|
+
// a soft poke must still wait the normal 75s.
|
|
224
|
+
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
225
|
+
startTurn('chat:0', 0)
|
|
226
|
+
__tickForTests(10_000) // ack
|
|
227
|
+
consumeArmedPoke()
|
|
228
|
+
__tickForTests(70_000) // 70s total — under the 75s soft threshold
|
|
229
|
+
expect(
|
|
230
|
+
fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'soft'),
|
|
231
|
+
).toHaveLength(0)
|
|
232
|
+
__tickForTests(75_000)
|
|
233
|
+
expect(
|
|
234
|
+
fx.emitted.filter((e) => e.kind === 'silence_poke_fired' && e.level === 'soft'),
|
|
235
|
+
).toHaveLength(1)
|
|
236
|
+
})
|
|
237
|
+
|
|
238
|
+
it('still escalates ack -> soft -> firm -> fallback on a turn that never acks', () => {
|
|
239
|
+
const fx = setupDeps({ thresholds: { ack: 10_000 } })
|
|
240
|
+
startTurn('chat:0', 0)
|
|
241
|
+
__tickForTests(10_000) // ack
|
|
242
|
+
consumeArmedPoke()
|
|
243
|
+
__tickForTests(75_000) // soft
|
|
244
|
+
consumeArmedPoke()
|
|
245
|
+
__tickForTests(180_000) // firm
|
|
246
|
+
consumeArmedPoke()
|
|
247
|
+
__tickForTests(300_000) // fallback
|
|
248
|
+
const trail = fx.emitted.map((e) =>
|
|
249
|
+
e.kind === 'silence_poke_fired'
|
|
250
|
+
? `poke:${e.level}`
|
|
251
|
+
: e.kind === 'silence_fallback_sent'
|
|
252
|
+
? `fallback:${e.fallback_kind}`
|
|
253
|
+
: e.kind,
|
|
254
|
+
)
|
|
255
|
+
expect(trail).toEqual([
|
|
256
|
+
'poke:ack',
|
|
257
|
+
'poke:soft',
|
|
258
|
+
'poke:firm',
|
|
259
|
+
'fallback:working',
|
|
260
|
+
])
|
|
261
|
+
})
|
|
262
|
+
|
|
263
|
+
it('formatPokeText("ack") nudges for a human acknowledgement via reply', () => {
|
|
264
|
+
const text = formatPokeText('ack')
|
|
265
|
+
expect(text).toContain('[silence-poke]')
|
|
266
|
+
expect(text.toLowerCase()).toContain('acknowledg')
|
|
267
|
+
expect(text).toContain('reply')
|
|
268
|
+
})
|
|
269
|
+
})
|
|
270
|
+
|
|
142
271
|
describe('silence-poke — outbound resets clock + success measurement', () => {
|
|
143
272
|
it('noteOutbound resets the silence clock', () => {
|
|
144
273
|
setupDeps()
|
|
@@ -608,7 +737,9 @@ describe('silence-poke — fallback handler errors do not break timer', () => {
|
|
|
608
737
|
__setDepsForTests({
|
|
609
738
|
emitMetric: (e) => fx.emitted.push(e),
|
|
610
739
|
onFrameworkFallback: () => { throw new Error('oh no') },
|
|
611
|
-
|
|
740
|
+
// ack budget out of the way — this test exercises the
|
|
741
|
+
// soft/firm/fallback ladder under a throwing fallback handler.
|
|
742
|
+
thresholdsMs: { ...DEFAULT_THRESHOLDS, ack: Number.MAX_SAFE_INTEGER },
|
|
612
743
|
})
|
|
613
744
|
startTurn('k', 0)
|
|
614
745
|
expect(() => {
|
|
@@ -625,7 +756,8 @@ describe('silence-poke — fallback handler errors do not break timer', () => {
|
|
|
625
756
|
__setDepsForTests({
|
|
626
757
|
emitMetric: (e) => fx.emitted.push(e),
|
|
627
758
|
onFrameworkFallback: () => Promise.reject(new Error('async fail')),
|
|
628
|
-
|
|
759
|
+
// ack budget out of the way — see the throwing-handler test above.
|
|
760
|
+
thresholdsMs: { ...DEFAULT_THRESHOLDS, ack: Number.MAX_SAFE_INTEGER },
|
|
629
761
|
})
|
|
630
762
|
startTurn('k', 0)
|
|
631
763
|
__tickForTests(75_000)
|