switchroom 0.15.45 → 0.16.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +56 -15
- package/dist/auth-broker/index.js +383 -97
- package/dist/cli/autoaccept-poll.js +4842 -35
- package/dist/cli/drive-write-pretool.mjs +7 -4
- package/dist/cli/notion-write-pretool.mjs +35 -4
- package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
- package/dist/cli/self-improve-stop.mjs +428 -0
- package/dist/cli/switchroom.js +2894 -841
- package/dist/host-control/main.js +2685 -207
- package/dist/vault/approvals/kernel-server.js +7453 -7413
- package/dist/vault/broker/server.js +11428 -11388
- package/examples/minimal.yaml +1 -0
- package/examples/switchroom.yaml +1 -0
- package/package.json +3 -3
- package/profiles/_base/start.sh.hbs +97 -1
- package/profiles/_shared/execution-discipline.md.hbs +18 -0
- package/profiles/default/CLAUDE.md.hbs +0 -19
- package/telegram-plugin/.claude-plugin/plugin.json +2 -2
- package/telegram-plugin/answer-stream-flag.ts +12 -49
- package/telegram-plugin/answer-stream.ts +5 -150
- package/telegram-plugin/auth-snapshot-format.ts +280 -48
- package/telegram-plugin/auto-fallback-fleet.ts +44 -1
- package/telegram-plugin/context-exhaustion.ts +12 -0
- package/telegram-plugin/demo-mask.ts +154 -0
- package/telegram-plugin/dist/bridge/bridge.js +55 -12
- package/telegram-plugin/dist/gateway/gateway.js +2938 -977
- package/telegram-plugin/dist/server.js +55 -12
- package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
- package/telegram-plugin/draft-stream.ts +47 -410
- package/telegram-plugin/final-answer-detect.ts +17 -12
- package/telegram-plugin/fleet-fallback-resume.ts +131 -0
- package/telegram-plugin/format.ts +56 -19
- package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
- package/telegram-plugin/gateway/auth-command.ts +70 -14
- package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
- package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
- package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
- package/telegram-plugin/gateway/current-turn-map.ts +188 -0
- package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
- package/telegram-plugin/gateway/effort-command.ts +8 -3
- package/telegram-plugin/gateway/emission-authority.ts +369 -0
- package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
- package/telegram-plugin/gateway/gateway.ts +1857 -292
- package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
- package/telegram-plugin/gateway/model-command.ts +115 -4
- package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
- package/telegram-plugin/gateway/represent-guard.ts +72 -0
- package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
- package/telegram-plugin/gateway/status-surface-log.ts +14 -3
- package/telegram-plugin/history.ts +33 -11
- package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
- package/telegram-plugin/issues-card.ts +4 -0
- package/telegram-plugin/model-unavailable.ts +124 -0
- package/telegram-plugin/narrative-dedup.ts +69 -0
- package/telegram-plugin/over-ping-safety-net.ts +70 -4
- package/telegram-plugin/package.json +3 -3
- package/telegram-plugin/pending-work-progress.ts +12 -0
- package/telegram-plugin/permission-rule.ts +32 -5
- package/telegram-plugin/permission-title.ts +152 -9
- package/telegram-plugin/quota-check.ts +13 -0
- package/telegram-plugin/quota-watch.ts +135 -7
- package/telegram-plugin/registry/turns-schema.test.ts +24 -0
- package/telegram-plugin/registry/turns-schema.ts +9 -0
- package/telegram-plugin/runtime-metrics.ts +13 -0
- package/telegram-plugin/session-tail.ts +96 -11
- package/telegram-plugin/silence-poke.ts +170 -24
- package/telegram-plugin/slot-banner-driver.ts +3 -0
- package/telegram-plugin/status-no-truncate.ts +44 -0
- package/telegram-plugin/status-reactions.ts +20 -3
- package/telegram-plugin/stream-controller.ts +4 -23
- package/telegram-plugin/stream-reply-handler.ts +6 -24
- package/telegram-plugin/streaming-metrics.ts +91 -0
- package/telegram-plugin/subagent-watcher.ts +212 -66
- package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
- package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
- package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
- package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
- package/telegram-plugin/tests/answer-stream.test.ts +2 -411
- package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
- package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
- package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
- package/telegram-plugin/tests/demo-mask.test.ts +127 -0
- package/telegram-plugin/tests/draft-stream.test.ts +0 -827
- package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
- package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
- package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
- package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
- package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
- package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
- package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
- package/telegram-plugin/tests/feed-survival.test.ts +526 -0
- package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
- package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
- package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
- package/telegram-plugin/tests/history.test.ts +60 -0
- package/telegram-plugin/tests/model-command.test.ts +134 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
- package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
- package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
- package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
- package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
- package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
- package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
- package/telegram-plugin/tests/permission-rule.test.ts +17 -0
- package/telegram-plugin/tests/permission-title.test.ts +206 -17
- package/telegram-plugin/tests/quota-watch.test.ts +252 -9
- package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
- package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
- package/telegram-plugin/tests/represent-guard.test.ts +162 -0
- package/telegram-plugin/tests/session-tail.test.ts +147 -3
- package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
- package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
- package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
- package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
- package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
- package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
- package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
- package/telegram-plugin/tests/telegram-format.test.ts +101 -6
- package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
- package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
- package/telegram-plugin/tests/tool-labels.test.ts +67 -0
- package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
- package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
- package/telegram-plugin/tests/welcome-text.test.ts +32 -3
- package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
- package/telegram-plugin/tool-activity-summary.ts +375 -58
- package/telegram-plugin/turn-liveness-floor.ts +240 -0
- package/telegram-plugin/uat/assertions.ts +115 -0
- package/telegram-plugin/uat/driver.ts +68 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
- package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
- package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
- package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
- package/telegram-plugin/welcome-text.ts +13 -1
- package/telegram-plugin/worker-activity-feed.ts +157 -82
- package/telegram-plugin/draft-transport.ts +0 -122
- package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
- package/telegram-plugin/tests/draft-transport.test.ts +0 -211
|
@@ -0,0 +1,526 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* feed-survival.test.ts — unit tests for the feed-survival primitive.
|
|
3
|
+
*
|
|
4
|
+
* Tests the three gaps identified in the audit:
|
|
5
|
+
*
|
|
6
|
+
* Gap 1 — detached background work (Bash run_in_background, Agent/Task)
|
|
7
|
+
* empties inFlight on near-instant tool_result, but the work is
|
|
8
|
+
* still running. Both orphaned-reply timer and silence-poke must
|
|
9
|
+
* not tear down the feed while hasPendingAsyncDispatch is true.
|
|
10
|
+
*
|
|
11
|
+
* Gap 2 — silence-poke defer was gated on SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=1
|
|
12
|
+
* (default OFF). The new isLegitimatelyWorking callback makes the
|
|
13
|
+
* defer the DEFAULT when wired, without requiring the env var.
|
|
14
|
+
*
|
|
15
|
+
* Gap 3 — ask_user with TTL >10min hit ORPHANED_REPLY_MAX_REARMS and was
|
|
16
|
+
* force-closed mid-human-wait. Now exempt from the cap.
|
|
17
|
+
*
|
|
18
|
+
* Regression: a truly idle turn (no work) still backstops/tears down as before.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
22
|
+
import {
|
|
23
|
+
startTurn,
|
|
24
|
+
noteOutbound,
|
|
25
|
+
noteToolStart,
|
|
26
|
+
noteToolEnd,
|
|
27
|
+
endTurn,
|
|
28
|
+
__tickForTests,
|
|
29
|
+
__setDepsForTests,
|
|
30
|
+
__getStateForTests,
|
|
31
|
+
__resetAllForTests,
|
|
32
|
+
DEFAULT_THRESHOLDS,
|
|
33
|
+
type SilencePokeMetric,
|
|
34
|
+
type FrameworkFallbackContext,
|
|
35
|
+
} from '../silence-poke.js'
|
|
36
|
+
import {
|
|
37
|
+
noteAsyncDispatch,
|
|
38
|
+
hasPendingAsyncDispatch,
|
|
39
|
+
noteOutbound as ppNoteOutbound,
|
|
40
|
+
noteTurnEnd as ppNoteTurnEnd,
|
|
41
|
+
startTurn as ppStartTurn,
|
|
42
|
+
clearPending,
|
|
43
|
+
__resetAllForTests as ppReset,
|
|
44
|
+
__setDepsForTests as ppSetDeps,
|
|
45
|
+
} from '../pending-work-progress.js'
|
|
46
|
+
import { ToolFlightTracker } from '../gateway/interrupt-defer.js'
|
|
47
|
+
import {
|
|
48
|
+
ORPHANED_REPLY_TIMEOUT_MS,
|
|
49
|
+
ORPHANED_REPLY_MAX_REARMS,
|
|
50
|
+
} from '../context-exhaustion.js'
|
|
51
|
+
|
|
52
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
53
|
+
|
|
54
|
+
interface SilenceFixtures {
|
|
55
|
+
emitted: SilencePokeMetric[]
|
|
56
|
+
fallbacks: FrameworkFallbackContext[]
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function setupSilenceDeps(opts?: {
|
|
60
|
+
thresholds?: Partial<typeof DEFAULT_THRESHOLDS> & { fallbackHardCeiling?: number }
|
|
61
|
+
isLegitimatelyWorking?: (key: string) => boolean
|
|
62
|
+
}): SilenceFixtures {
|
|
63
|
+
const fixtures: SilenceFixtures = { emitted: [], fallbacks: [] }
|
|
64
|
+
__setDepsForTests({
|
|
65
|
+
emitMetric: (e) => fixtures.emitted.push(e),
|
|
66
|
+
onFrameworkFallback: (ctx) => { fixtures.fallbacks.push(ctx) },
|
|
67
|
+
thresholdsMs: {
|
|
68
|
+
...DEFAULT_THRESHOLDS,
|
|
69
|
+
...(opts?.thresholds ?? {}),
|
|
70
|
+
},
|
|
71
|
+
...(opts?.isLegitimatelyWorking != null
|
|
72
|
+
? { isLegitimatelyWorking: opts.isLegitimatelyWorking }
|
|
73
|
+
: {}),
|
|
74
|
+
})
|
|
75
|
+
return fixtures
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
beforeEach(() => {
|
|
79
|
+
__resetAllForTests()
|
|
80
|
+
ppReset()
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
afterEach(() => {
|
|
84
|
+
__resetAllForTests()
|
|
85
|
+
ppReset()
|
|
86
|
+
delete process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
// ─── hasPendingAsyncDispatch ──────────────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
describe('hasPendingAsyncDispatch', () => {
|
|
92
|
+
beforeEach(() => {
|
|
93
|
+
ppSetDeps({
|
|
94
|
+
editMessage: async () => {},
|
|
95
|
+
})
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it('returns false before any dispatch is noted', () => {
|
|
99
|
+
ppStartTurn('chat:0')
|
|
100
|
+
expect(hasPendingAsyncDispatch('chat:0')).toBe(false)
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
it('returns true after noteAsyncDispatch (Bash run_in_background / Agent / Task)', () => {
|
|
104
|
+
ppStartTurn('chat:0')
|
|
105
|
+
noteAsyncDispatch('chat:0')
|
|
106
|
+
expect(hasPendingAsyncDispatch('chat:0')).toBe(true)
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
it('returns false after clearPending (inbound clears the flag)', () => {
|
|
110
|
+
ppStartTurn('chat:0')
|
|
111
|
+
noteAsyncDispatch('chat:0')
|
|
112
|
+
clearPending('chat:0', 'inbound')
|
|
113
|
+
expect(hasPendingAsyncDispatch('chat:0')).toBe(false)
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
it('returns false for an unknown key', () => {
|
|
117
|
+
expect(hasPendingAsyncDispatch('never-started')).toBe(false)
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
it('returns true during active turn after dispatch, false after turn-end clears', () => {
|
|
121
|
+
ppStartTurn('chat:0')
|
|
122
|
+
noteAsyncDispatch('chat:0')
|
|
123
|
+
expect(hasPendingAsyncDispatch('chat:0')).toBe(true)
|
|
124
|
+
// Turn ends with pending+no-anchor → state is deleted
|
|
125
|
+
ppNoteTurnEnd('chat:0')
|
|
126
|
+
expect(hasPendingAsyncDispatch('chat:0')).toBe(false)
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
it('cross-turn: dispatch + outbound anchor + turn-end → pending PERSISTS (the core feed-survival path)', () => {
|
|
130
|
+
// The critical production scenario: the agent dispatches detached
|
|
131
|
+
// background work (run_in_background Bash / Agent / Task), posts a reply
|
|
132
|
+
// (capturing an anchor), and the turn ends — but the bg work is still
|
|
133
|
+
// running. pending+anchor at turn_end activates rather than deletes, so
|
|
134
|
+
// hasPendingAsyncDispatch stays true and both teardown timers keep
|
|
135
|
+
// deferring while the detached work runs.
|
|
136
|
+
ppStartTurn('chat:0')
|
|
137
|
+
noteAsyncDispatch('chat:0')
|
|
138
|
+
ppNoteOutbound('chat:0', { messageId: 4242, text: 'kicked off the build, polling…', parseMode: 'HTML' })
|
|
139
|
+
ppNoteTurnEnd('chat:0')
|
|
140
|
+
expect(hasPendingAsyncDispatch('chat:0')).toBe(true)
|
|
141
|
+
})
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
// ─── Silence-poke: isLegitimatelyWorking callback defer ──────────────────────
|
|
145
|
+
|
|
146
|
+
describe('silence-poke — isLegitimatelyWorking callback (default-on defer)', () => {
|
|
147
|
+
it('defers the 300s fallback when the callback returns true', () => {
|
|
148
|
+
let working = true
|
|
149
|
+
const f = setupSilenceDeps({
|
|
150
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
|
|
151
|
+
isLegitimatelyWorking: () => working,
|
|
152
|
+
})
|
|
153
|
+
startTurn('chat:0', 0)
|
|
154
|
+
__tickForTests(300_000) // would fire without the callback
|
|
155
|
+
expect(f.fallbacks).toHaveLength(0)
|
|
156
|
+
|
|
157
|
+
__tickForTests(500_000) // still working
|
|
158
|
+
expect(f.fallbacks).toHaveLength(0)
|
|
159
|
+
|
|
160
|
+
working = false // work done
|
|
161
|
+
__tickForTests(500_001) // silence was already past threshold
|
|
162
|
+
expect(f.fallbacks).toHaveLength(1)
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
it('does NOT defer when the callback returns false (genuinely idle turn)', () => {
|
|
166
|
+
const f = setupSilenceDeps({
|
|
167
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
|
|
168
|
+
isLegitimatelyWorking: () => false,
|
|
169
|
+
})
|
|
170
|
+
startTurn('chat:0', 0)
|
|
171
|
+
__tickForTests(300_000)
|
|
172
|
+
// Idle turn: no work, fallback fires immediately
|
|
173
|
+
expect(f.fallbacks).toHaveLength(1)
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
it('fires at the hard ceiling even when callback keeps returning true (hung-signal)', () => {
|
|
177
|
+
const f = setupSilenceDeps({
|
|
178
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
|
|
179
|
+
isLegitimatelyWorking: () => true, // signal never clears
|
|
180
|
+
})
|
|
181
|
+
startTurn('chat:0', 0)
|
|
182
|
+
__tickForTests(300_000)
|
|
183
|
+
expect(f.fallbacks).toHaveLength(0) // deferred
|
|
184
|
+
__tickForTests(900_000) // crosses ceiling
|
|
185
|
+
expect(f.fallbacks).toHaveLength(1) // bounded — still unwedges
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
it('wired callback returns false → fallback fires even with inFlightTools non-empty (callback supersedes legacy flag)', () => {
|
|
189
|
+
// When isLegitimatelyWorking is wired, it is consulted; the legacy flag
|
|
190
|
+
// is not consulted for the new path. Verify by having callback=false and
|
|
191
|
+
// inFlightTools non-empty — the fallback fires because the callback says "no".
|
|
192
|
+
const f = setupSilenceDeps({
|
|
193
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
|
|
194
|
+
isLegitimatelyWorking: () => false,
|
|
195
|
+
})
|
|
196
|
+
startTurn('chat:0', 0)
|
|
197
|
+
noteToolStart('chat:0', 't1', 'Bash', 'audit', 10_000)
|
|
198
|
+
__tickForTests(300_000)
|
|
199
|
+
// callback says false → no defer, fallback fires
|
|
200
|
+
expect(f.fallbacks).toHaveLength(1)
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
it('SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS=0 force-disables the defer even with callback wired', () => {
|
|
204
|
+
process.env.SWITCHROOM_SILENCE_DEFER_INFLIGHT_TOOLS = '0'
|
|
205
|
+
const f = setupSilenceDeps({
|
|
206
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
|
|
207
|
+
isLegitimatelyWorking: () => true,
|
|
208
|
+
})
|
|
209
|
+
startTurn('chat:0', 0)
|
|
210
|
+
__tickForTests(300_000)
|
|
211
|
+
expect(f.fallbacks).toHaveLength(1) // force-disabled: fallback fires
|
|
212
|
+
})
|
|
213
|
+
})
|
|
214
|
+
|
|
215
|
+
// ─── Silence-poke: detached background work (Gap 1) ──────────────────────────
|
|
216
|
+
|
|
217
|
+
describe('silence-poke — detached Bash run_in_background keeps feed alive', () => {
|
|
218
|
+
beforeEach(() => {
|
|
219
|
+
ppSetDeps({ editMessage: async () => {} })
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
it('defers the 300s fallback while hasPendingAsyncDispatch is true', () => {
|
|
223
|
+
// Simulate: model calls Bash(run_in_background:true), gets back instant
|
|
224
|
+
// handle (tool_result), turn ends, but background process is running.
|
|
225
|
+
// pendingProgress.pending stays true.
|
|
226
|
+
ppStartTurn('chat:0')
|
|
227
|
+
noteAsyncDispatch('chat:0') // Bash dispatched
|
|
228
|
+
|
|
229
|
+
const f = setupSilenceDeps({
|
|
230
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
|
|
231
|
+
isLegitimatelyWorking: (key) => hasPendingAsyncDispatch(key),
|
|
232
|
+
})
|
|
233
|
+
startTurn('chat:0', 0)
|
|
234
|
+
__tickForTests(300_000) // inFlight is empty but bg work is pending
|
|
235
|
+
expect(f.fallbacks).toHaveLength(0)
|
|
236
|
+
|
|
237
|
+
__tickForTests(500_000) // still pending
|
|
238
|
+
expect(f.fallbacks).toHaveLength(0)
|
|
239
|
+
|
|
240
|
+
// Background work completes: pendingProgress cleared
|
|
241
|
+
clearPending('chat:0', 'inbound')
|
|
242
|
+
__tickForTests(500_001)
|
|
243
|
+
expect(f.fallbacks).toHaveLength(1) // now fires
|
|
244
|
+
})
|
|
245
|
+
|
|
246
|
+
it('a truly idle turn (no background work, no tool in flight) still fires at 300s', () => {
|
|
247
|
+
const f = setupSilenceDeps({
|
|
248
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
|
|
249
|
+
isLegitimatelyWorking: (key) => hasPendingAsyncDispatch(key),
|
|
250
|
+
})
|
|
251
|
+
startTurn('chat:0', 0)
|
|
252
|
+
__tickForTests(300_000)
|
|
253
|
+
expect(f.fallbacks).toHaveLength(1) // no work: still backstops
|
|
254
|
+
})
|
|
255
|
+
})
|
|
256
|
+
|
|
257
|
+
// ─── Orphaned-reply: ask_user exemption from ORPHANED_REPLY_MAX_REARMS ───────
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Pure decision functions mirroring the orphaned-reply guard logic in gateway.ts.
|
|
261
|
+
* Extracted so the ask_user-exempt path can be tested without the full gateway.
|
|
262
|
+
*/
|
|
263
|
+
function shouldRearmOrphanedReply(opts: {
|
|
264
|
+
isLegitimatelyWorking: boolean
|
|
265
|
+
humanWaiting: boolean
|
|
266
|
+
rearmCount: number
|
|
267
|
+
maxRearms: number
|
|
268
|
+
}): 'rearm' | 'fire' {
|
|
269
|
+
const { isLegitimatelyWorking: working, humanWaiting, rearmCount, maxRearms } = opts
|
|
270
|
+
if (working || humanWaiting) {
|
|
271
|
+
// ask_user: exempt from cap — keep re-arming as long as human-wait is open
|
|
272
|
+
if (humanWaiting) return 'rearm'
|
|
273
|
+
// Other work: honour cap
|
|
274
|
+
if (rearmCount < maxRearms) return 'rearm'
|
|
275
|
+
// Cap exceeded: fire backstop (a genuinely hung tool must surface)
|
|
276
|
+
return 'fire'
|
|
277
|
+
}
|
|
278
|
+
return 'fire'
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
describe('orphaned-reply: ask_user exempt from ORPHANED_REPLY_MAX_REARMS', () => {
|
|
282
|
+
it('re-arms indefinitely while ask_user is open (humanWaiting=true)', () => {
|
|
283
|
+
// Even at count === ORPHANED_REPLY_MAX_REARMS, humanWaiting=true → still rearms
|
|
284
|
+
for (let i = 0; i <= ORPHANED_REPLY_MAX_REARMS + 5; i++) {
|
|
285
|
+
expect(shouldRearmOrphanedReply({
|
|
286
|
+
isLegitimatelyWorking: false,
|
|
287
|
+
humanWaiting: true,
|
|
288
|
+
rearmCount: i,
|
|
289
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
290
|
+
})).toBe('rearm')
|
|
291
|
+
}
|
|
292
|
+
})
|
|
293
|
+
|
|
294
|
+
it('fires once ask_user closes (humanWaiting=false, no other work)', () => {
|
|
295
|
+
expect(shouldRearmOrphanedReply({
|
|
296
|
+
isLegitimatelyWorking: false,
|
|
297
|
+
humanWaiting: false,
|
|
298
|
+
rearmCount: 0,
|
|
299
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
300
|
+
})).toBe('fire')
|
|
301
|
+
})
|
|
302
|
+
|
|
303
|
+
it('standard foreground tool: still bound by ORPHANED_REPLY_MAX_REARMS', () => {
|
|
304
|
+
// Under cap: rearm
|
|
305
|
+
expect(shouldRearmOrphanedReply({
|
|
306
|
+
isLegitimatelyWorking: true,
|
|
307
|
+
humanWaiting: false,
|
|
308
|
+
rearmCount: ORPHANED_REPLY_MAX_REARMS - 1,
|
|
309
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
310
|
+
})).toBe('rearm')
|
|
311
|
+
// At cap: fire
|
|
312
|
+
expect(shouldRearmOrphanedReply({
|
|
313
|
+
isLegitimatelyWorking: true,
|
|
314
|
+
humanWaiting: false,
|
|
315
|
+
rearmCount: ORPHANED_REPLY_MAX_REARMS,
|
|
316
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
317
|
+
})).toBe('fire')
|
|
318
|
+
})
|
|
319
|
+
|
|
320
|
+
it('ask_user combined with other work: cap still bypassed (humanWaiting wins)', () => {
|
|
321
|
+
expect(shouldRearmOrphanedReply({
|
|
322
|
+
isLegitimatelyWorking: true,
|
|
323
|
+
humanWaiting: true,
|
|
324
|
+
rearmCount: ORPHANED_REPLY_MAX_REARMS + 100,
|
|
325
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
326
|
+
})).toBe('rearm')
|
|
327
|
+
})
|
|
328
|
+
})
|
|
329
|
+
|
|
330
|
+
// ─── Orphaned-reply: detached work (Gap 1) ───────────────────────────────────
|
|
331
|
+
|
|
332
|
+
describe('orphaned-reply: detached background work keeps feed alive past 30s', () => {
|
|
333
|
+
it('re-arms while isLegitimatelyWorking=true (bg work in flight)', () => {
|
|
334
|
+
// Mirrors the gateway guard logic
|
|
335
|
+
function rearmDecision(working: boolean, humanWaiting: boolean, count: number): 'rearm' | 'fire' {
|
|
336
|
+
return shouldRearmOrphanedReply({
|
|
337
|
+
isLegitimatelyWorking: working,
|
|
338
|
+
humanWaiting,
|
|
339
|
+
rearmCount: count,
|
|
340
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
341
|
+
})
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Bash run_in_background: inFlight empty, hasPendingAsyncDispatch=true
|
|
345
|
+
expect(rearmDecision(true, false, 0)).toBe('rearm')
|
|
346
|
+
expect(rearmDecision(true, false, ORPHANED_REPLY_MAX_REARMS - 1)).toBe('rearm')
|
|
347
|
+
// Cap hit: fire (the bg process must have been stuck for 10min+)
|
|
348
|
+
expect(rearmDecision(true, false, ORPHANED_REPLY_MAX_REARMS)).toBe('fire')
|
|
349
|
+
})
|
|
350
|
+
|
|
351
|
+
it('fires immediately when no work at all (truly idle turn)', () => {
|
|
352
|
+
expect(shouldRearmOrphanedReply({
|
|
353
|
+
isLegitimatelyWorking: false,
|
|
354
|
+
humanWaiting: false,
|
|
355
|
+
rearmCount: 0,
|
|
356
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
357
|
+
})).toBe('fire')
|
|
358
|
+
})
|
|
359
|
+
})
|
|
360
|
+
|
|
361
|
+
// ─── Synthetic turn_end suppressor with isLegitimatelyWorking ─────────────────
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Mirrors the DEFENSIVE FIX in gateway.ts turn_end handler.
|
|
365
|
+
* Now uses isLegitimatelyWorking instead of just isMidToolCall().
|
|
366
|
+
*/
|
|
367
|
+
function shouldSuppressSyntheticTurnEnd(opts: {
|
|
368
|
+
durationMs: number
|
|
369
|
+
isLegitimatelyWorking: boolean
|
|
370
|
+
}): boolean {
|
|
371
|
+
return opts.durationMs === -1 && opts.isLegitimatelyWorking
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
describe('DEFENSIVE FIX: synthetic turn_end suppressor (extended predicate)', () => {
|
|
375
|
+
it('suppresses when durationMs=-1 and bg work is pending (detached Bash)', () => {
|
|
376
|
+
expect(shouldSuppressSyntheticTurnEnd({
|
|
377
|
+
durationMs: -1,
|
|
378
|
+
isLegitimatelyWorking: true, // hasPendingAsyncDispatch returned true
|
|
379
|
+
})).toBe(true)
|
|
380
|
+
})
|
|
381
|
+
|
|
382
|
+
it('suppresses when durationMs=-1 and ask_user is open (human-wait)', () => {
|
|
383
|
+
expect(shouldSuppressSyntheticTurnEnd({
|
|
384
|
+
durationMs: -1,
|
|
385
|
+
isLegitimatelyWorking: true, // pendingAskUser has entry for chat
|
|
386
|
+
})).toBe(true)
|
|
387
|
+
})
|
|
388
|
+
|
|
389
|
+
it('does NOT suppress when durationMs=-1 but no work (genuinely idle)', () => {
|
|
390
|
+
expect(shouldSuppressSyntheticTurnEnd({
|
|
391
|
+
durationMs: -1,
|
|
392
|
+
isLegitimatelyWorking: false,
|
|
393
|
+
})).toBe(false)
|
|
394
|
+
})
|
|
395
|
+
|
|
396
|
+
it('does NOT suppress a REAL turn_end (durationMs >= 0)', () => {
|
|
397
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: 0, isLegitimatelyWorking: true })).toBe(false)
|
|
398
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: 1, isLegitimatelyWorking: true })).toBe(false)
|
|
399
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: 12345, isLegitimatelyWorking: true })).toBe(false)
|
|
400
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: 0, isLegitimatelyWorking: false })).toBe(false)
|
|
401
|
+
})
|
|
402
|
+
|
|
403
|
+
it('only durationMs === -1 is the synthetic discriminator (near-miss values)', () => {
|
|
404
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: -2, isLegitimatelyWorking: true })).toBe(false)
|
|
405
|
+
expect(shouldSuppressSyntheticTurnEnd({ durationMs: -0.5, isLegitimatelyWorking: true })).toBe(false)
|
|
406
|
+
})
|
|
407
|
+
})
|
|
408
|
+
|
|
409
|
+
// ─── Regression: idle turns still backstop as before ─────────────────────────
|
|
410
|
+
|
|
411
|
+
describe('REGRESSION: idle turn (no work) still backstops', () => {
|
|
412
|
+
it('silence-poke fires at 300s for a genuinely idle turn (callback wired)', () => {
|
|
413
|
+
const f = setupSilenceDeps({
|
|
414
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 900_000 },
|
|
415
|
+
isLegitimatelyWorking: () => false, // nothing working
|
|
416
|
+
})
|
|
417
|
+
startTurn('chat:0', 0)
|
|
418
|
+
__tickForTests(300_000)
|
|
419
|
+
expect(f.fallbacks).toHaveLength(1)
|
|
420
|
+
})
|
|
421
|
+
|
|
422
|
+
it('silence-poke fires at 300s for an idle turn (no callback, no legacy defer)', () => {
|
|
423
|
+
const f = setupSilenceDeps() // nothing wired
|
|
424
|
+
startTurn('chat:0', 0)
|
|
425
|
+
__tickForTests(300_000)
|
|
426
|
+
expect(f.fallbacks).toHaveLength(1)
|
|
427
|
+
})
|
|
428
|
+
|
|
429
|
+
it('orphaned-reply fires for an idle turn (no work, no human wait)', () => {
|
|
430
|
+
expect(shouldRearmOrphanedReply({
|
|
431
|
+
isLegitimatelyWorking: false,
|
|
432
|
+
humanWaiting: false,
|
|
433
|
+
rearmCount: 0,
|
|
434
|
+
maxRearms: ORPHANED_REPLY_MAX_REARMS,
|
|
435
|
+
})).toBe('fire')
|
|
436
|
+
})
|
|
437
|
+
|
|
438
|
+
it('ToolFlightTracker empty → not legitimately working via isMidToolCall', () => {
|
|
439
|
+
const tracker = new ToolFlightTracker()
|
|
440
|
+
expect(tracker.isMidToolCall()).toBe(false)
|
|
441
|
+
})
|
|
442
|
+
|
|
443
|
+
it('ToolFlightTracker with tool in flight → legitimately working', () => {
|
|
444
|
+
const tracker = new ToolFlightTracker()
|
|
445
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'bg-bash' })
|
|
446
|
+
expect(tracker.isMidToolCall()).toBe(true)
|
|
447
|
+
// After tool_result (instant handle return for run_in_background):
|
|
448
|
+
tracker.onEvent({ kind: 'tool_result', toolUseId: 'bg-bash' })
|
|
449
|
+
expect(tracker.isMidToolCall()).toBe(false)
|
|
450
|
+
// But hasPendingAsyncDispatch should still signal the bg work is running
|
|
451
|
+
})
|
|
452
|
+
})
|
|
453
|
+
|
|
454
|
+
// ─── Combined: isLegitimatelyWorking integrates all three signals ─────────────
|
|
455
|
+
|
|
456
|
+
describe('isLegitimatelyWorking — all three signals', () => {
|
|
457
|
+
beforeEach(() => {
|
|
458
|
+
ppSetDeps({ editMessage: async () => {} })
|
|
459
|
+
})
|
|
460
|
+
|
|
461
|
+
it('true when only isMidToolCall (foreground tool)', () => {
|
|
462
|
+
const tracker = new ToolFlightTracker()
|
|
463
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 't1' })
|
|
464
|
+
// Simulate the predicate
|
|
465
|
+
const working = tracker.isMidToolCall() || hasPendingAsyncDispatch('chat:0') || false
|
|
466
|
+
expect(working).toBe(true)
|
|
467
|
+
})
|
|
468
|
+
|
|
469
|
+
it('true when only hasPendingAsyncDispatch (Bash run_in_background)', () => {
|
|
470
|
+
const tracker = new ToolFlightTracker()
|
|
471
|
+
// tool_use fired then instantly tool_result came back
|
|
472
|
+
tracker.onEvent({ kind: 'tool_use', toolUseId: 'bash1' })
|
|
473
|
+
tracker.onEvent({ kind: 'tool_result', toolUseId: 'bash1' })
|
|
474
|
+
ppStartTurn('chat:0')
|
|
475
|
+
noteAsyncDispatch('chat:0')
|
|
476
|
+
const working = tracker.isMidToolCall() || hasPendingAsyncDispatch('chat:0') || false
|
|
477
|
+
expect(tracker.isMidToolCall()).toBe(false) // inFlight cleared
|
|
478
|
+
expect(hasPendingAsyncDispatch('chat:0')).toBe(true) // bg work pending
|
|
479
|
+
expect(working).toBe(true)
|
|
480
|
+
})
|
|
481
|
+
|
|
482
|
+
it('true when only human-wait (ask_user open)', () => {
|
|
483
|
+
const tracker = new ToolFlightTracker()
|
|
484
|
+
// ask_user tool_use is in inFlight while waiting; here we simulate
|
|
485
|
+
// the defence-in-depth path where inFlight was cleared unexpectedly
|
|
486
|
+
const askUserInFlight = true // pendingAskUser.size > 0 for this chat
|
|
487
|
+
const working = tracker.isMidToolCall() || hasPendingAsyncDispatch('chat:0') || askUserInFlight
|
|
488
|
+
expect(working).toBe(true)
|
|
489
|
+
})
|
|
490
|
+
|
|
491
|
+
it('false when all three signals are clear (genuinely idle)', () => {
|
|
492
|
+
const tracker = new ToolFlightTracker()
|
|
493
|
+
const working = tracker.isMidToolCall() || hasPendingAsyncDispatch('chat:0') || false
|
|
494
|
+
expect(working).toBe(false)
|
|
495
|
+
})
|
|
496
|
+
})
|
|
497
|
+
|
|
498
|
+
// ─── Silence-poke: hard ceiling is correctly applied ─────────────────────────
|
|
499
|
+
|
|
500
|
+
describe('silence-poke — hard ceiling bounds the defer', () => {
|
|
501
|
+
it('fires at ceiling even when isLegitimatelyWorking stays true (leak-guard)', () => {
|
|
502
|
+
const f = setupSilenceDeps({
|
|
503
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 600_000 },
|
|
504
|
+
isLegitimatelyWorking: () => true, // signal never clears
|
|
505
|
+
})
|
|
506
|
+
startTurn('chat:0', 0)
|
|
507
|
+
__tickForTests(300_000)
|
|
508
|
+
expect(f.fallbacks).toHaveLength(0)
|
|
509
|
+
__tickForTests(599_000)
|
|
510
|
+
expect(f.fallbacks).toHaveLength(0)
|
|
511
|
+
__tickForTests(600_000) // at ceiling
|
|
512
|
+
expect(f.fallbacks).toHaveLength(1)
|
|
513
|
+
})
|
|
514
|
+
|
|
515
|
+
it('fallbackFired is true after ceiling fire (no double-fire)', () => {
|
|
516
|
+
const f = setupSilenceDeps({
|
|
517
|
+
thresholds: { fallback: 300_000, fallbackHardCeiling: 600_000 },
|
|
518
|
+
isLegitimatelyWorking: () => true,
|
|
519
|
+
})
|
|
520
|
+
startTurn('chat:0', 0)
|
|
521
|
+
__tickForTests(600_000)
|
|
522
|
+
__tickForTests(700_000) // additional tick after ceiling
|
|
523
|
+
expect(f.fallbacks).toHaveLength(1) // only fires once
|
|
524
|
+
expect(__getStateForTests('chat:0')!.fallbackFired).toBe(true)
|
|
525
|
+
})
|
|
526
|
+
})
|