switchroom 0.15.44 → 0.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +122 -88
- package/dist/auth-broker/index.js +463 -177
- package/dist/cli/autoaccept-poll.js +4842 -35
- package/dist/cli/drive-write-pretool.mjs +17 -14
- package/dist/cli/notion-write-pretool.mjs +117 -86
- package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
- package/dist/cli/self-improve-stop.mjs +428 -0
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +3249 -1241
- package/dist/cli/ui/index.html +1 -1
- package/dist/host-control/main.js +2833 -355
- package/dist/vault/approvals/kernel-server.js +7482 -7439
- package/dist/vault/broker/server.js +11315 -11272
- package/examples/minimal.yaml +1 -0
- package/examples/switchroom.yaml +1 -0
- package/package.json +3 -3
- package/profiles/_base/start.sh.hbs +88 -1
- package/profiles/_shared/execution-discipline.md.hbs +18 -0
- package/profiles/default/CLAUDE.md.hbs +3 -22
- package/telegram-plugin/.claude-plugin/plugin.json +2 -2
- package/telegram-plugin/answer-stream-flag.ts +12 -49
- package/telegram-plugin/answer-stream.ts +5 -150
- package/telegram-plugin/auth-snapshot-format.ts +280 -48
- package/telegram-plugin/auto-fallback-fleet.ts +44 -1
- package/telegram-plugin/context-exhaustion.ts +12 -0
- package/telegram-plugin/demo-mask.ts +154 -0
- package/telegram-plugin/dist/bridge/bridge.js +167 -124
- package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
- package/telegram-plugin/dist/server.js +215 -172
- package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
- package/telegram-plugin/draft-stream.ts +47 -410
- package/telegram-plugin/final-answer-detect.ts +17 -12
- package/telegram-plugin/fleet-fallback-resume.ts +131 -0
- package/telegram-plugin/format.ts +56 -19
- package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
- package/telegram-plugin/gateway/auth-command.ts +70 -14
- package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
- package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
- package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
- package/telegram-plugin/gateway/current-turn-map.ts +188 -0
- package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
- package/telegram-plugin/gateway/effort-command.ts +8 -3
- package/telegram-plugin/gateway/emission-authority.ts +369 -0
- package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
- package/telegram-plugin/gateway/gateway.ts +1837 -291
- package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
- package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
- package/telegram-plugin/gateway/represent-guard.ts +72 -0
- package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
- package/telegram-plugin/gateway/status-surface-log.ts +14 -3
- package/telegram-plugin/history.ts +33 -11
- package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
- package/telegram-plugin/issues-card.ts +4 -0
- package/telegram-plugin/model-unavailable.ts +124 -0
- package/telegram-plugin/narrative-dedup.ts +69 -0
- package/telegram-plugin/over-ping-safety-net.ts +70 -4
- package/telegram-plugin/package.json +3 -3
- package/telegram-plugin/pending-work-progress.ts +12 -0
- package/telegram-plugin/permission-rule.ts +32 -5
- package/telegram-plugin/permission-title.ts +152 -9
- package/telegram-plugin/quota-check.ts +13 -0
- package/telegram-plugin/quota-watch.ts +135 -7
- package/telegram-plugin/registry/turns-schema.test.ts +24 -0
- package/telegram-plugin/registry/turns-schema.ts +9 -0
- package/telegram-plugin/runtime-metrics.ts +13 -0
- package/telegram-plugin/session-tail.ts +96 -11
- package/telegram-plugin/silence-poke.ts +170 -24
- package/telegram-plugin/slot-banner-driver.ts +3 -0
- package/telegram-plugin/status-no-truncate.ts +44 -0
- package/telegram-plugin/status-reactions.ts +20 -3
- package/telegram-plugin/stream-controller.ts +4 -23
- package/telegram-plugin/stream-reply-handler.ts +6 -24
- package/telegram-plugin/streaming-metrics.ts +91 -0
- package/telegram-plugin/subagent-watcher.ts +212 -66
- package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
- package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
- package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
- package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
- package/telegram-plugin/tests/answer-stream.test.ts +2 -411
- package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
- package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
- package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
- package/telegram-plugin/tests/demo-mask.test.ts +127 -0
- package/telegram-plugin/tests/draft-stream.test.ts +0 -827
- package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
- package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
- package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
- package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
- package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
- package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
- package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
- package/telegram-plugin/tests/feed-survival.test.ts +526 -0
- package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
- package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
- package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
- package/telegram-plugin/tests/history.test.ts +60 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
- package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
- package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
- package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
- package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
- package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
- package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
- package/telegram-plugin/tests/permission-rule.test.ts +17 -0
- package/telegram-plugin/tests/permission-title.test.ts +206 -17
- package/telegram-plugin/tests/quota-watch.test.ts +252 -9
- package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
- package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
- package/telegram-plugin/tests/represent-guard.test.ts +162 -0
- package/telegram-plugin/tests/session-tail.test.ts +147 -3
- package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
- package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
- package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
- package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
- package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
- package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
- package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
- package/telegram-plugin/tests/telegram-format.test.ts +101 -6
- package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
- package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
- package/telegram-plugin/tests/tool-labels.test.ts +67 -0
- package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
- package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
- package/telegram-plugin/tests/welcome-text.test.ts +32 -3
- package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
- package/telegram-plugin/tool-activity-summary.ts +375 -58
- package/telegram-plugin/turn-liveness-floor.ts +240 -0
- package/telegram-plugin/uat/assertions.ts +115 -0
- package/telegram-plugin/uat/driver.ts +68 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
- package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
- package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
- package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
- package/telegram-plugin/welcome-text.ts +13 -1
- package/telegram-plugin/worker-activity-feed.ts +157 -82
- package/telegram-plugin/draft-transport.ts +0 -122
- package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
- package/telegram-plugin/tests/draft-transport.test.ts +0 -211
|
@@ -0,0 +1,597 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration test: telegram-activity-visibility
|
|
3
|
+
*
|
|
4
|
+
* Supersedes PR #2587 (inert — read frozen `lastToolLabelAt`) and #2588
|
|
5
|
+
* (partial — only fixed Lever 5, left narrative clip at 120 chars).
|
|
6
|
+
*
|
|
7
|
+
* This test exercises the REAL code paths — not injected state:
|
|
8
|
+
*
|
|
9
|
+
* Fix 2 (post-answer background-agent liveness) — drives the REAL code paths:
|
|
10
|
+
* - The actual `startSubagentWatcher` with a real fs mock drives `onProgress`,
|
|
11
|
+
* which stamps `turn.subagentActivityAt` (the gateway's one-line stamp,
|
|
12
|
+
* gated exactly as gateway.ts gates it on a non-null currentTurn).
|
|
13
|
+
* - The REAL `mayOpenActivityCard` AND the REAL `evaluatePostAnswerLiveness`
|
|
14
|
+
* (the helper feedHeartbeatTick consults each tick) decide the verdict — not
|
|
15
|
+
* a re-implemented copy of the gate.
|
|
16
|
+
* - Concern 2 lifecycle: the gateway's `currentTurn` gating is modelled
|
|
17
|
+
* verbatim to prove the stamp + heartbeat EMIT in the post-answer/pre-teardown
|
|
18
|
+
* window but are INERT once `currentTurn` nulls at turn_end — and that the
|
|
19
|
+
* decoupled worker still surfaces (and is bounded) via the real,
|
|
20
|
+
* currentTurn-independent `workerActivityFeed`.
|
|
21
|
+
* - Concern 3: the staleness cap flips the verdict to 'stale' once the worker's
|
|
22
|
+
* last advance is older than the cap, so the card stops climbing forever.
|
|
23
|
+
*
|
|
24
|
+
* Fix 1 (narrative as first-class feed lines):
|
|
25
|
+
* - `clipNarrative` is called on a long narrative string → verifies 200-char
|
|
26
|
+
* limit (not the old 120-char one that truncated mid-sentence).
|
|
27
|
+
* - `appendActivityLabel` accumulates narrative AND tool label lines side-by-side
|
|
28
|
+
* in mirrorLines (distinct, not overwriting) → verifies the feed reads
|
|
29
|
+
* "narrative → tool → narrative" in order.
|
|
30
|
+
* - `mayOpenActivityCard` with narrative producer pre-answer → allows OPEN
|
|
31
|
+
* (Lever 5 removed, #2588).
|
|
32
|
+
*
|
|
33
|
+
* Each test also verifies it FAILS on the original code, as required:
|
|
34
|
+
* - Fix 2 without `subagentActivityAt` → gate would block (returns false).
|
|
35
|
+
* - Fix 1 at old 120-char clip → narrative truncates.
|
|
36
|
+
* - Fix 1 with Lever 5 active → narrative cannot open a card.
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
import { describe, it, expect } from 'vitest'
|
|
40
|
+
import * as realFs from 'fs'
|
|
41
|
+
import { startSubagentWatcher } from '../subagent-watcher.js'
|
|
42
|
+
import { mayOpenActivityCard } from '../gateway/feed-open-gate.js'
|
|
43
|
+
import { clipNarrative, appendActivityLabel } from '../tool-activity-summary.js'
|
|
44
|
+
import { evaluatePostAnswerLiveness } from '../turn-liveness-floor.js'
|
|
45
|
+
import {
|
|
46
|
+
createWorkerActivityFeed,
|
|
47
|
+
type WorkerActivityView,
|
|
48
|
+
type BotApiForWorkerFeed,
|
|
49
|
+
} from '../worker-activity-feed.js'
|
|
50
|
+
|
|
51
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
function buildJSONL(...lines: object[]): string {
|
|
54
|
+
return lines.map((l) => JSON.stringify(l)).join('\n') + '\n'
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function subAgentUserMsg(text: string) {
|
|
58
|
+
return { type: 'user', message: { content: [{ type: 'text', text }] } }
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function subAgentToolUse(name: string) {
|
|
62
|
+
return { type: 'assistant', message: { content: [{ type: 'tool_use', name, id: 'id1', input: {} }] } }
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function subAgentAssistantText(text: string) {
|
|
66
|
+
return { type: 'assistant', message: { content: [{ type: 'text', text }] } }
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ─── Fix 2: Post-answer background-agent liveness ────────────────────────────
|
|
70
|
+
|
|
71
|
+
describe('Fix 2: post-answer background-agent liveness (watcher → gate → liveness card)', () => {
|
|
72
|
+
/**
|
|
73
|
+
* This test uses the REAL `startSubagentWatcher` with an injected mock fs
|
|
74
|
+
* (the same pattern as subagent-watcher.test.ts — the authoritative harness
|
|
75
|
+
* for watcher tests). The `onProgress` callback is the REAL watcher code path.
|
|
76
|
+
*
|
|
77
|
+
* Simulate: parent turn has delivered substantive answer → turn.finalAnswerEverDelivered=true.
|
|
78
|
+
* Then watcher fires onProgress for a background sub-agent → we capture the
|
|
79
|
+
* timestamp it would write to turn.subagentActivityAt.
|
|
80
|
+
* Then call the REAL mayOpenActivityCard with that signal → assert it returns true.
|
|
81
|
+
*
|
|
82
|
+
* This exercises the entire watcher → signal → gate pipeline, not injected state.
|
|
83
|
+
*/
|
|
84
|
+
|
|
85
|
+
it('watcher onProgress advances subagentActivityAt and gate allows liveness card (real pipeline)', async () => {
|
|
86
|
+
// --- Setup fake turn state mirroring what gateway.ts holds post-answer ---
|
|
87
|
+
// The parent answered at time 1000; we are now at 1500 (post-answer).
|
|
88
|
+
const turn = {
|
|
89
|
+
finalAnswerEverDelivered: true,
|
|
90
|
+
finalAnswerDelivered: true,
|
|
91
|
+
finalAnswerDeliveredAt: 1000,
|
|
92
|
+
subagentActivityAt: undefined as number | undefined,
|
|
93
|
+
labeledToolCount: 2,
|
|
94
|
+
}
|
|
95
|
+
// Model the gateway's module-scope `currentTurn` mirror so the test can
|
|
96
|
+
// reproduce its lifecycle: non-null in the post-answer/pre-teardown window,
|
|
97
|
+
// nulled at `endCurrentTurnAtomic` (turn_end). The onProgress stamp and the
|
|
98
|
+
// heartbeat both read THIS — the crux of concern 2.
|
|
99
|
+
let currentTurn: typeof turn | null = turn
|
|
100
|
+
|
|
101
|
+
// --- Wire up the REAL startSubagentWatcher with mock fs ---
|
|
102
|
+
// Pattern: start with an EMPTY subagents dir so the boot scan finds nothing
|
|
103
|
+
// (no historical entries). Then simulate a new file appearing after boot.
|
|
104
|
+
const agentDir = '/home/user/.switchroom/agents/myagent'
|
|
105
|
+
const projectsRoot = `${agentDir}/.claude/projects`
|
|
106
|
+
const projectDir = `${projectsRoot}/myproject`
|
|
107
|
+
const sessionDir = `${projectDir}/session-abc`
|
|
108
|
+
const subagentsDir = `${sessionDir}/subagents`
|
|
109
|
+
const jsonlPath = `${subagentsDir}/agent-bg01.jsonl`
|
|
110
|
+
|
|
111
|
+
// The JSONL has a tool_use then a narrative block.
|
|
112
|
+
// sub_agent_tool_use fires onProgress(progressLine), sub_agent_text fires onProgress(latestSummary).
|
|
113
|
+
const content = buildJSONL(
|
|
114
|
+
subAgentUserMsg('Analyse the 30 changed files'),
|
|
115
|
+
subAgentToolUse('Read'),
|
|
116
|
+
subAgentAssistantText('I have read the files and analysed the scope of the change'),
|
|
117
|
+
)
|
|
118
|
+
const contentBuf = Buffer.from(content, 'utf-8')
|
|
119
|
+
|
|
120
|
+
// Start with empty subagents dir so boot scan registers nothing historical
|
|
121
|
+
const fileContents: Map<string, Buffer> = new Map()
|
|
122
|
+
let lastOpenedPath: string | null = null
|
|
123
|
+
|
|
124
|
+
// Control knobs for per-phase fs state
|
|
125
|
+
let jsonlVisible = false
|
|
126
|
+
const mockFs = {
|
|
127
|
+
existsSync: ((p: realFs.PathLike) => {
|
|
128
|
+
const ps = String(p)
|
|
129
|
+
const staticPaths = [agentDir, projectsRoot, projectDir, sessionDir, subagentsDir]
|
|
130
|
+
if (staticPaths.includes(ps)) return true
|
|
131
|
+
if (ps === jsonlPath) return jsonlVisible
|
|
132
|
+
return false
|
|
133
|
+
}) as typeof realFs.existsSync,
|
|
134
|
+
readdirSync: ((p: realFs.PathLike) => {
|
|
135
|
+
const ps = String(p)
|
|
136
|
+
if (ps === projectsRoot) return ['myproject']
|
|
137
|
+
if (ps === projectDir) return ['session-abc']
|
|
138
|
+
if (ps === sessionDir) return ['subagents']
|
|
139
|
+
if (ps === subagentsDir) return jsonlVisible ? ['agent-bg01.jsonl'] : []
|
|
140
|
+
return []
|
|
141
|
+
}) as unknown as typeof realFs.readdirSync,
|
|
142
|
+
statSync: ((p: realFs.PathLike) => {
|
|
143
|
+
const ps = String(p)
|
|
144
|
+
if (ps === jsonlPath && jsonlVisible) return { size: contentBuf.length, mtimeMs: 1500, isDirectory: () => false } as unknown as realFs.Stats
|
|
145
|
+
return { size: 0, mtimeMs: 0, isDirectory: () => false } as unknown as realFs.Stats
|
|
146
|
+
}) as typeof realFs.statSync,
|
|
147
|
+
openSync: ((p: realFs.PathLike) => { lastOpenedPath = String(p); return 42 }) as unknown as typeof realFs.openSync,
|
|
148
|
+
closeSync: (() => { lastOpenedPath = null }) as typeof realFs.closeSync,
|
|
149
|
+
readSync: ((_fd: number, buf: NodeJS.ArrayBufferView, offset: number, length: number, position: number | null): number => {
|
|
150
|
+
if (lastOpenedPath !== jsonlPath) return 0
|
|
151
|
+
const pos = position ?? 0
|
|
152
|
+
const src = contentBuf.slice(pos, pos + length)
|
|
153
|
+
;(src as Buffer).copy(buf as Buffer, offset)
|
|
154
|
+
return src.length
|
|
155
|
+
}) as unknown as typeof realFs.readSync,
|
|
156
|
+
watch: (() => ({ close: () => {} })) as unknown as typeof realFs.watch,
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
let currentTime = 1500
|
|
160
|
+
const intervals: Array<{ fn: () => void; ms: number }> = []
|
|
161
|
+
let nextRef = 0
|
|
162
|
+
|
|
163
|
+
const progressEvents: Array<{ agentId: string; latestSummary: string }> = []
|
|
164
|
+
|
|
165
|
+
const watcher = startSubagentWatcher({
|
|
166
|
+
agentDir,
|
|
167
|
+
// Omit agentCwd so the watcher doesn't filter by slug — keeps the test simple
|
|
168
|
+
now: () => currentTime,
|
|
169
|
+
setInterval: (fn, ms) => {
|
|
170
|
+
const ref = nextRef++
|
|
171
|
+
intervals.push({ fn, ms })
|
|
172
|
+
return { ref }
|
|
173
|
+
},
|
|
174
|
+
clearInterval: () => {},
|
|
175
|
+
setTimeout: (_fn, _ms) => { return { ref: nextRef++ } },
|
|
176
|
+
clearTimeout: () => {},
|
|
177
|
+
fs: mockFs,
|
|
178
|
+
onProgress: ({ agentId, latestSummary }) => {
|
|
179
|
+
progressEvents.push({ agentId, latestSummary })
|
|
180
|
+
// Mirror ONLY the gateway's one-line stamp (`stampTurn.subagentActivityAt
|
|
181
|
+
// = Date.now()`), gated exactly as gateway.ts gates it: `currentTurn !=
|
|
182
|
+
// null && finalAnswerEverDelivered`. The DECISION the heartbeat then
|
|
183
|
+
// makes off this signal is NOT re-implemented here — the test drives the
|
|
184
|
+
// REAL `evaluatePostAnswerLiveness` helper below (concern 1/2: drive the
|
|
185
|
+
// real code path, not a copy of the gate).
|
|
186
|
+
const stampTurn = currentTurn // gateway: `const stampTurn = currentTurn`
|
|
187
|
+
if (stampTurn != null && stampTurn.finalAnswerEverDelivered) {
|
|
188
|
+
stampTurn.subagentActivityAt = currentTime
|
|
189
|
+
}
|
|
190
|
+
},
|
|
191
|
+
log: () => {},
|
|
192
|
+
})
|
|
193
|
+
// After startSubagentWatcher returns, bootScanInProgress = false.
|
|
194
|
+
// The JSONL was not visible during boot, so it is NOT historical.
|
|
195
|
+
|
|
196
|
+
// Phase 2: simulate the file appearing after boot (background worker dispatched after answer)
|
|
197
|
+
jsonlVisible = true
|
|
198
|
+
currentTime = 1600
|
|
199
|
+
|
|
200
|
+
// Trigger a poll — the watcher finds the new file, registers it as live (non-historical),
|
|
201
|
+
// does an initial read, and fires onProgress for the tool_use and/or text events.
|
|
202
|
+
const pollInterval = intervals[0]
|
|
203
|
+
expect(pollInterval).toBeDefined()
|
|
204
|
+
pollInterval.fn()
|
|
205
|
+
|
|
206
|
+
watcher.stop()
|
|
207
|
+
|
|
208
|
+
// --- Assert the REAL watcher fired onProgress ---
|
|
209
|
+
// The JSONL has a tool_use (fires progressLine) and a text block (fires latestSummary).
|
|
210
|
+
// At minimum one onProgress should have fired.
|
|
211
|
+
expect(progressEvents.length).toBeGreaterThan(0)
|
|
212
|
+
expect(progressEvents[0].agentId).toBe('bg01')
|
|
213
|
+
|
|
214
|
+
// --- Assert subagentActivityAt was stamped by the onProgress callback ---
|
|
215
|
+
// This is the Fix 2 signal: the watcher's onProgress writes it to the turn
|
|
216
|
+
// independently of lastToolLabelAt (which is frozen by the drop-guard).
|
|
217
|
+
expect(turn.subagentActivityAt).toBe(currentTime)
|
|
218
|
+
expect(turn.subagentActivityAt!).toBeGreaterThan(turn.finalAnswerDeliveredAt!)
|
|
219
|
+
|
|
220
|
+
// --- Assert the REAL mayOpenActivityCard gate allows a liveness card ---
|
|
221
|
+
// Fix 2's Lever 1 exception: postAnswerSubagentActivity=true + tool producer → allowed.
|
|
222
|
+
const allowed = mayOpenActivityCard({
|
|
223
|
+
producer: 'tool',
|
|
224
|
+
finalAnswerEverDelivered: turn.finalAnswerEverDelivered,
|
|
225
|
+
labeledToolCount: turn.labeledToolCount,
|
|
226
|
+
postAnswerSubagentActivity: true, // derived from subagentActivityAt > finalAnswerDeliveredAt
|
|
227
|
+
})
|
|
228
|
+
expect(allowed).toBe(true)
|
|
229
|
+
|
|
230
|
+
// --- Confirm FAILS without the fix ---
|
|
231
|
+
// Without postAnswerSubagentActivity, Lever 1 blocks: the old #2587 code path
|
|
232
|
+
// drove this off lastToolLabelAt (frozen) and never set postAnswerSubagentActivity.
|
|
233
|
+
const blockedWithoutFix = mayOpenActivityCard({
|
|
234
|
+
producer: 'tool',
|
|
235
|
+
finalAnswerEverDelivered: turn.finalAnswerEverDelivered,
|
|
236
|
+
labeledToolCount: turn.labeledToolCount,
|
|
237
|
+
// postAnswerSubagentActivity omitted → old Lever 1 block (was the bug in #2587)
|
|
238
|
+
})
|
|
239
|
+
expect(blockedWithoutFix).toBe(false)
|
|
240
|
+
|
|
241
|
+
// --- Drive the REAL feedHeartbeatTick decision helper (not a re-impl) ---
|
|
242
|
+
// The heartbeat reads `currentTurn`; in this post-answer/pre-teardown window
|
|
243
|
+
// it is still non-null AND just-stamped, so the REAL `evaluatePostAnswerLiveness`
|
|
244
|
+
// returns 'emit' → the liveness card renders. (Concern 2 (a): the stamp fires
|
|
245
|
+
// and the card renders while the turn is alive.)
|
|
246
|
+
expect(currentTurn).not.toBeNull()
|
|
247
|
+
const verdictInWindow = evaluatePostAnswerLiveness({
|
|
248
|
+
subagentActivityAt: currentTurn!.subagentActivityAt,
|
|
249
|
+
finalAnswerDeliveredAt: currentTurn!.finalAnswerDeliveredAt,
|
|
250
|
+
now: currentTime + 5, // a heartbeat tick moments after the stamp
|
|
251
|
+
staleCapMs: 30_000,
|
|
252
|
+
})
|
|
253
|
+
expect(verdictInWindow).toBe('emit')
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
it('idle post-answer (no watcher activity) → evaluatePostAnswerLiveness returns "idle" → silent', () => {
|
|
257
|
+
// When subagentActivityAt is undefined (no watcher activity since the answer)
|
|
258
|
+
// the REAL heartbeat decision returns 'idle' → the post-answer branch returns
|
|
259
|
+
// early and no card opens. The reply-is-last invariant holds for idle turns.
|
|
260
|
+
const verdict = evaluatePostAnswerLiveness({
|
|
261
|
+
subagentActivityAt: undefined,
|
|
262
|
+
finalAnswerDeliveredAt: 1000,
|
|
263
|
+
now: 50_000,
|
|
264
|
+
staleCapMs: 30_000,
|
|
265
|
+
})
|
|
266
|
+
expect(verdict).toBe('idle')
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
it('subagentActivityAt at/before the answer → "idle" (pre-answer label never opens a post-answer card)', () => {
|
|
270
|
+
expect(
|
|
271
|
+
evaluatePostAnswerLiveness({
|
|
272
|
+
subagentActivityAt: 500,
|
|
273
|
+
finalAnswerDeliveredAt: 1000,
|
|
274
|
+
now: 1500,
|
|
275
|
+
staleCapMs: 30_000,
|
|
276
|
+
}),
|
|
277
|
+
).toBe('idle')
|
|
278
|
+
})
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
// ─── Fix 2 — concern 2: the currentTurn path is INERT after teardown ─────────
|
|
282
|
+
|
|
283
|
+
describe('Fix 2 / concern 2: currentTurn nulls at turn_end → heartbeat path inert; worker feed covers it', () => {
|
|
284
|
+
/**
|
|
285
|
+
* The reviewer's concern: the post-answer liveness fix stamps + renders off
|
|
286
|
+
* `currentTurn`, which `endCurrentTurnAtomic` nulls at `turn_end`. A genuinely
|
|
287
|
+
* DECOUPLED background worker keeps ticking PAST teardown, so when its later
|
|
288
|
+
* onProgress arrives `currentTurn` is null → the stamp is inert and the
|
|
289
|
+
* heartbeat (which early-returns `if (turn == null) return`) is silent.
|
|
290
|
+
*
|
|
291
|
+
* This test reproduces that lifecycle EXACTLY (the gateway's `currentTurn`
|
|
292
|
+
* gating, which can't be imported, modelled verbatim) and proves:
|
|
293
|
+
* (a) in the post-answer/pre-teardown window the stamp fires and the REAL
|
|
294
|
+
* `evaluatePostAnswerLiveness` returns 'emit';
|
|
295
|
+
* (b) after teardown (`currentTurn = null`) a later worker tick CANNOT stamp
|
|
296
|
+
* and the heartbeat is structurally silent — i.e. the currentTurn fix IS
|
|
297
|
+
* inert for a decoupled worker, as suspected.
|
|
298
|
+
* The follow-on `describe` then proves the decoupled worker still surfaces via
|
|
299
|
+
* the currentTurn-INDEPENDENT `workerActivityFeed` (the by-design coverage).
|
|
300
|
+
*/
|
|
301
|
+
|
|
302
|
+
// The gateway's stamp, verbatim (the only line we can't import): gated on a
|
|
303
|
+
// non-null currentTurn that has delivered its substantive answer.
|
|
304
|
+
function gatewayStamp(currentTurn: { finalAnswerEverDelivered: boolean; subagentActivityAt?: number } | null, now: number): void {
|
|
305
|
+
const stampTurn = currentTurn
|
|
306
|
+
if (stampTurn != null && stampTurn.finalAnswerEverDelivered) {
|
|
307
|
+
stampTurn.subagentActivityAt = now
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// The gateway's feedHeartbeatTick post-answer entry, reduced to its decision:
|
|
312
|
+
// `if (turn == null) return` (no-turn), else the REAL evaluatePostAnswerLiveness.
|
|
313
|
+
function heartbeatVerdict(
|
|
314
|
+
currentTurn: { finalAnswerDelivered: boolean; finalAnswerDeliveredAt?: number; subagentActivityAt?: number } | null,
|
|
315
|
+
now: number,
|
|
316
|
+
): 'no-turn' | 'pre-answer' | ReturnType<typeof evaluatePostAnswerLiveness> {
|
|
317
|
+
const turn = currentTurn
|
|
318
|
+
if (turn == null) return 'no-turn' // gateway: `if (turn == null) return`
|
|
319
|
+
if (!turn.finalAnswerDelivered) return 'pre-answer'
|
|
320
|
+
return evaluatePostAnswerLiveness({
|
|
321
|
+
subagentActivityAt: turn.subagentActivityAt,
|
|
322
|
+
finalAnswerDeliveredAt: turn.finalAnswerDeliveredAt,
|
|
323
|
+
now,
|
|
324
|
+
staleCapMs: 30_000,
|
|
325
|
+
})
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
it('(a) in-window: currentTurn alive → stamp fires and heartbeat emits', () => {
|
|
329
|
+
const turn = {
|
|
330
|
+
finalAnswerEverDelivered: true,
|
|
331
|
+
finalAnswerDelivered: true,
|
|
332
|
+
finalAnswerDeliveredAt: 1000,
|
|
333
|
+
subagentActivityAt: undefined as number | undefined,
|
|
334
|
+
}
|
|
335
|
+
let currentTurn: typeof turn | null = turn
|
|
336
|
+
|
|
337
|
+
// Worker ticks at t=1600, still inside the turn (pre-teardown).
|
|
338
|
+
gatewayStamp(currentTurn, 1600)
|
|
339
|
+
expect(turn.subagentActivityAt).toBe(1600)
|
|
340
|
+
expect(heartbeatVerdict(currentTurn, 1605)).toBe('emit')
|
|
341
|
+
})
|
|
342
|
+
|
|
343
|
+
it('(b) post-teardown: currentTurn nulled → later worker tick cannot stamp; heartbeat is silent (INERT)', () => {
|
|
344
|
+
const turn = {
|
|
345
|
+
finalAnswerEverDelivered: true,
|
|
346
|
+
finalAnswerDelivered: true,
|
|
347
|
+
finalAnswerDeliveredAt: 1000,
|
|
348
|
+
subagentActivityAt: undefined as number | undefined,
|
|
349
|
+
}
|
|
350
|
+
let currentTurn: typeof turn | null = turn
|
|
351
|
+
|
|
352
|
+
// turn_end fires → endCurrentTurnAtomic nulls the module-scope mirror.
|
|
353
|
+
currentTurn = null
|
|
354
|
+
|
|
355
|
+
// The DECOUPLED worker keeps running and ticks much later (t=120_000).
|
|
356
|
+
gatewayStamp(currentTurn, 120_000)
|
|
357
|
+
// Nothing to stamp — the turn object is unreferenced by the live mirror.
|
|
358
|
+
expect(turn.subagentActivityAt).toBeUndefined()
|
|
359
|
+
// And the heartbeat is structurally silent: no live turn to render on.
|
|
360
|
+
expect(heartbeatVerdict(currentTurn, 120_005)).toBe('no-turn')
|
|
361
|
+
})
|
|
362
|
+
|
|
363
|
+
it('concern 3: while the turn is alive but the worker went stale, heartbeat stops ("stale")', () => {
|
|
364
|
+
const turn = {
|
|
365
|
+
finalAnswerDelivered: true,
|
|
366
|
+
finalAnswerDeliveredAt: 1000,
|
|
367
|
+
// last advance at 2000; the worker has since finished (onFinish froze it).
|
|
368
|
+
subagentActivityAt: 2000 as number | undefined,
|
|
369
|
+
}
|
|
370
|
+
const currentTurn: typeof turn | null = turn
|
|
371
|
+
// One tick just after the last advance still emits…
|
|
372
|
+
expect(heartbeatVerdict(currentTurn, 2500)).toBe('emit')
|
|
373
|
+
// …but once `now - subagentActivityAt >= 30s` the verdict flips to 'stale'
|
|
374
|
+
// and the card stops climbing `running` forever (the concern-3 bug).
|
|
375
|
+
expect(heartbeatVerdict(currentTurn, 2000 + 30_000)).toBe('stale')
|
|
376
|
+
expect(heartbeatVerdict(currentTurn, 2000 + 90_000)).toBe('stale')
|
|
377
|
+
})
|
|
378
|
+
})
|
|
379
|
+
|
|
380
|
+
// ─── Fix 2 — concern 2 resolution: the decoupled worker surfaces via the
|
|
381
|
+
// currentTurn-INDEPENDENT workerActivityFeed (and is bounded) ────────────
|
|
382
|
+
|
|
383
|
+
describe('Fix 2 / concern 2: decoupled background-worker activity surfaces via the real workerActivityFeed', () => {
|
|
384
|
+
/**
|
|
385
|
+
* Because the currentTurn heartbeat is inert post-teardown (proven above), the
|
|
386
|
+
* decoupled worker's activity is surfaced by the dedicated `workerActivityFeed`
|
|
387
|
+
* — a regular chat message edited in place, keyed by jsonl agent id, that the
|
|
388
|
+
* watcher keeps driving AFTER the parent turn ends (NO currentTurn dependency).
|
|
389
|
+
* This drives the REAL `createWorkerActivityFeed` to prove:
|
|
390
|
+
* - a running worker paints + edits a live message with NO turn in scope, and
|
|
391
|
+
* - it is BOUNDED: `finish` posts the terminal edit, the handle is dropped,
|
|
392
|
+
* and a later heartbeat tick emits nothing (no unbounded climb).
|
|
393
|
+
*/
|
|
394
|
+
|
|
395
|
+
interface FakeBot extends BotApiForWorkerFeed {
|
|
396
|
+
sent: Array<{ chatId: string; text: string }>
|
|
397
|
+
edits: Array<{ messageId: number; text: string }>
|
|
398
|
+
}
|
|
399
|
+
function makeBot(): FakeBot {
|
|
400
|
+
let nextId = 5000
|
|
401
|
+
const fb: FakeBot = {
|
|
402
|
+
sent: [],
|
|
403
|
+
edits: [],
|
|
404
|
+
sendMessage: async (chatId, text) => {
|
|
405
|
+
fb.sent.push({ chatId, text })
|
|
406
|
+
return { message_id: nextId++ }
|
|
407
|
+
},
|
|
408
|
+
editMessageText: async (_chatId, messageId, text) => {
|
|
409
|
+
fb.edits.push({ messageId, text })
|
|
410
|
+
return {}
|
|
411
|
+
},
|
|
412
|
+
}
|
|
413
|
+
return fb
|
|
414
|
+
}
|
|
415
|
+
function wView(p: Partial<WorkerActivityView> = {}): WorkerActivityView {
|
|
416
|
+
return {
|
|
417
|
+
description: 'analyse the 30 changed files',
|
|
418
|
+
lastTool: { name: 'Read', sanitisedArg: 'src/auth' },
|
|
419
|
+
toolCount: 2,
|
|
420
|
+
latestSummary: 'reading the auth module',
|
|
421
|
+
elapsedMs: 10_000,
|
|
422
|
+
state: 'running',
|
|
423
|
+
...p,
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
it('surfaces a running decoupled worker with NO live turn, then stops at finish (bounded)', async () => {
|
|
428
|
+
const bot = makeBot()
|
|
429
|
+
let clock = 1_000_000
|
|
430
|
+
const ticks: Array<() => void> = []
|
|
431
|
+
const feed = createWorkerActivityFeed({
|
|
432
|
+
bot,
|
|
433
|
+
now: () => clock,
|
|
434
|
+
firstPaintMinMs: 8000,
|
|
435
|
+
minEditIntervalMs: 0,
|
|
436
|
+
setInterval: (cb) => { ticks.push(cb); return ticks.length },
|
|
437
|
+
clearInterval: () => {},
|
|
438
|
+
})
|
|
439
|
+
|
|
440
|
+
// The parent turn has long ended (currentTurn is null) — irrelevant here:
|
|
441
|
+
// the feed is keyed by agentId and never reads currentTurn.
|
|
442
|
+
await feed.update('bg01', 'chat-77', wView({ elapsedMs: 12_000 }))
|
|
443
|
+
expect(bot.sent.length).toBe(1) // painted a live message post-teardown
|
|
444
|
+
expect(feed.has('bg01')).toBe(true)
|
|
445
|
+
|
|
446
|
+
// A later running tick edits the same message in place.
|
|
447
|
+
clock += 6000
|
|
448
|
+
await feed.update('bg01', 'chat-77', wView({ elapsedMs: 18_000, toolCount: 3, latestSummary: 'patching token parser' }))
|
|
449
|
+
expect(bot.edits.length).toBeGreaterThanOrEqual(1)
|
|
450
|
+
|
|
451
|
+
// Terminal: finish posts the recap edit and DROPS the handle.
|
|
452
|
+
clock += 3000
|
|
453
|
+
await feed.finish('bg01', wView({ state: 'done', toolCount: 4, latestSummary: 'opened PR #42' }))
|
|
454
|
+
expect(feed.has('bg01')).toBe(false)
|
|
455
|
+
const editsAfterFinish = bot.edits.length
|
|
456
|
+
|
|
457
|
+
// Bounded: a subsequent heartbeat tick must NOT keep editing a finished worker.
|
|
458
|
+
clock += 60_000
|
|
459
|
+
ticks.forEach((t) => t())
|
|
460
|
+
await Promise.resolve()
|
|
461
|
+
expect(bot.edits.length).toBe(editsAfterFinish)
|
|
462
|
+
|
|
463
|
+
feed.stop()
|
|
464
|
+
})
|
|
465
|
+
|
|
466
|
+
it('the worker-feed heartbeat only ticks RUNNING workers (terminal worker never climbs)', async () => {
|
|
467
|
+
const bot = makeBot()
|
|
468
|
+
let clock = 2_000_000
|
|
469
|
+
const ticks: Array<() => void> = []
|
|
470
|
+
const feed = createWorkerActivityFeed({
|
|
471
|
+
bot,
|
|
472
|
+
now: () => clock,
|
|
473
|
+
firstPaintMinMs: 0,
|
|
474
|
+
minEditIntervalMs: 0,
|
|
475
|
+
heartbeatTickMs: 6000,
|
|
476
|
+
setInterval: (cb) => { ticks.push(cb); return ticks.length },
|
|
477
|
+
clearInterval: () => {},
|
|
478
|
+
})
|
|
479
|
+
await feed.update('bg02', 'chat-9', wView({ elapsedMs: 1000 }))
|
|
480
|
+
expect(bot.sent.length).toBe(1)
|
|
481
|
+
await feed.finish('bg02', wView({ state: 'done', latestSummary: 'done' }))
|
|
482
|
+
const editCount = bot.edits.length
|
|
483
|
+
|
|
484
|
+
// Heartbeat after finish: the handle is gone → no further edits ever.
|
|
485
|
+
clock += 600_000
|
|
486
|
+
ticks.forEach((t) => t())
|
|
487
|
+
await Promise.resolve()
|
|
488
|
+
expect(bot.edits.length).toBe(editCount)
|
|
489
|
+
feed.stop()
|
|
490
|
+
})
|
|
491
|
+
})
|
|
492
|
+
|
|
493
|
+
// ─── Fix 1: Narrative as first-class feed lines ───────────────────────────────
|
|
494
|
+
|
|
495
|
+
describe('Fix 1: narrative as durable feed lines (clip length + Lever 5 removal)', () => {
|
|
496
|
+
/**
|
|
497
|
+
* These tests drive the REAL clipNarrative and appendActivityLabel functions
|
|
498
|
+
* from tool-activity-summary.ts, and the REAL mayOpenActivityCard gate.
|
|
499
|
+
* The pipeline is: raw text → clipNarrative → appendActivityLabel →
|
|
500
|
+
* mirrorLines (persistent alongside tool labels).
|
|
501
|
+
*/
|
|
502
|
+
|
|
503
|
+
it('clipNarrative raises clip to 200 chars (readable feed-line, matches STATUS_LINE_MAX)', () => {
|
|
504
|
+
// A narrative that is longer than the old 120-char limit but fits in 200.
|
|
505
|
+
// Before Fix 1: the 120-char clip would have truncated this mid-sentence.
|
|
506
|
+
const longNarrative = 'I will now analyse all 30 changed files in /src/auth to understand the scope of the authentication regression before patching the vulnerable token-parsing code path'
|
|
507
|
+
// Confirm it is longer than 120 chars (would have been clipped before the fix)
|
|
508
|
+
expect(longNarrative.length).toBeGreaterThan(120)
|
|
509
|
+
// Confirm it is ≤ 200 chars (the new limit matches STATUS_LINE_MAX)
|
|
510
|
+
expect(longNarrative.length).toBeLessThanOrEqual(200)
|
|
511
|
+
|
|
512
|
+
const clipped = clipNarrative(longNarrative)
|
|
513
|
+
// With Fix 1 (200 chars): the full narrative is preserved
|
|
514
|
+
expect(clipped).toBe(longNarrative)
|
|
515
|
+
|
|
516
|
+
// CONFIRM FAILS WITHOUT FIX: old 120-char limit would have truncated it
|
|
517
|
+
const oldClip = longNarrative.slice(0, 120)
|
|
518
|
+
expect(clipped).not.toBe(oldClip) // the fix produces a longer result
|
|
519
|
+
expect(clipped.length).toBeGreaterThan(oldClip.length)
|
|
520
|
+
})
|
|
521
|
+
|
|
522
|
+
it('clipNarrative still clips at 200 chars and takes first line only', () => {
|
|
523
|
+
// A multi-line narrative: only first line, and capped at 200.
|
|
524
|
+
const multiLine = 'First line of narrative\nSecond line should be dropped'
|
|
525
|
+
const clipped = clipNarrative(multiLine)
|
|
526
|
+
expect(clipped).toBe('First line of narrative')
|
|
527
|
+
expect(clipped).not.toContain('\n')
|
|
528
|
+
|
|
529
|
+
// A narrative longer than 200 chars IS clipped
|
|
530
|
+
const tooLong = 'A'.repeat(250)
|
|
531
|
+
const clippedLong = clipNarrative(tooLong)
|
|
532
|
+
expect(clippedLong.length).toBe(200)
|
|
533
|
+
})
|
|
534
|
+
|
|
535
|
+
it('narrative and tool label lines both persist in mirrorLines (durable, not overwriting)', () => {
|
|
536
|
+
// The REAL appendActivityLabel function: appends to mirrorLines without removing
|
|
537
|
+
// prior entries. Narrative lines and tool labels coexist in order.
|
|
538
|
+
const mirrorLines: string[] = []
|
|
539
|
+
|
|
540
|
+
// Step 1: narrative fires before a tool (the agent thinks aloud)
|
|
541
|
+
const narr1 = 'I will read the authentication module first'
|
|
542
|
+
appendActivityLabel(mirrorLines, narr1)
|
|
543
|
+
expect(mirrorLines).toHaveLength(1)
|
|
544
|
+
expect(mirrorLines[0]).toBe(narr1)
|
|
545
|
+
|
|
546
|
+
// Step 2: tool label arrives (producer B — the tool runs)
|
|
547
|
+
const tool1 = 'Reading /src/auth/accounts.ts'
|
|
548
|
+
appendActivityLabel(mirrorLines, tool1)
|
|
549
|
+
expect(mirrorLines).toHaveLength(2)
|
|
550
|
+
expect(mirrorLines[1]).toBe(tool1)
|
|
551
|
+
|
|
552
|
+
// Step 3: another narrative after the tool (post-action narration)
|
|
553
|
+
const narr2 = 'Now I will patch the token-parsing path'
|
|
554
|
+
appendActivityLabel(mirrorLines, narr2)
|
|
555
|
+
expect(mirrorLines).toHaveLength(3)
|
|
556
|
+
expect(mirrorLines[2]).toBe(narr2)
|
|
557
|
+
|
|
558
|
+
// The feed reads: narrative → tool → narrative (interleaved, legible)
|
|
559
|
+
expect(mirrorLines[0]).toBe(narr1)
|
|
560
|
+
expect(mirrorLines[1]).toBe(tool1)
|
|
561
|
+
expect(mirrorLines[2]).toBe(narr2)
|
|
562
|
+
})
|
|
563
|
+
|
|
564
|
+
it('0-tool narrative DOES open a card pre-answer (Lever 5 removed, Fix 1 / #2588)', () => {
|
|
565
|
+
// Before Fix 1: Lever 5 blocked narrative from opening a card on 0-tool turns.
|
|
566
|
+
// After Fix 1: pre-answer narrative may open; Lever 2 (clearActivitySummary)
|
|
567
|
+
// handles reply-is-last ordering.
|
|
568
|
+
const allowed = mayOpenActivityCard({
|
|
569
|
+
producer: 'narrative',
|
|
570
|
+
finalAnswerEverDelivered: false,
|
|
571
|
+
labeledToolCount: 0, // 0-tool conversational turn
|
|
572
|
+
})
|
|
573
|
+
expect(allowed).toBe(true)
|
|
574
|
+
|
|
575
|
+
// CONFIRM FAILS WITHOUT FIX:
|
|
576
|
+
// The old Lever 5 would have returned false here. We can verify this by
|
|
577
|
+
// simulating the old gate logic directly:
|
|
578
|
+
function oldMayOpenActivityCard(input: { producer: string; finalAnswerEverDelivered: boolean; labeledToolCount: number }): boolean {
|
|
579
|
+
if (input.finalAnswerEverDelivered) return false
|
|
580
|
+
if (input.producer === 'narrative' && input.labeledToolCount === 0) return false // old Lever 5
|
|
581
|
+
return true
|
|
582
|
+
}
|
|
583
|
+
expect(oldMayOpenActivityCard({ producer: 'narrative', finalAnswerEverDelivered: false, labeledToolCount: 0 })).toBe(false)
|
|
584
|
+
// The fix changes this to true — the narrative card CAN open pre-answer.
|
|
585
|
+
})
|
|
586
|
+
|
|
587
|
+
it('post-answer narrative remains blocked (Lever 1 still applies after Fix 1)', () => {
|
|
588
|
+
// Fix 1 only removes Lever 5 for pre-answer. Post-answer is still covered
|
|
589
|
+
// by Lever 1 (finalAnswerEverDelivered) — reply-is-last is preserved.
|
|
590
|
+
const blocked = mayOpenActivityCard({
|
|
591
|
+
producer: 'narrative',
|
|
592
|
+
finalAnswerEverDelivered: true,
|
|
593
|
+
labeledToolCount: 2,
|
|
594
|
+
})
|
|
595
|
+
expect(blocked).toBe(false)
|
|
596
|
+
})
|
|
597
|
+
})
|