switchroom 0.14.18 → 0.14.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +6 -1
- package/dist/auth-broker/index.js +6 -1
- package/dist/cli/notion-write-pretool.mjs +6 -1
- package/dist/cli/switchroom.js +48 -3
- package/dist/host-control/main.js +6 -1
- package/dist/vault/approvals/kernel-server.js +6 -1
- package/dist/vault/broker/server.js +6 -1
- package/package.json +1 -1
- package/telegram-plugin/README.md +7 -3
- package/telegram-plugin/bridge/bridge.ts +1 -1
- package/telegram-plugin/dist/bridge/bridge.js +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +381 -153
- package/telegram-plugin/dist/server.js +1 -1
- package/telegram-plugin/gateway/coalesce-attachments.ts +70 -0
- package/telegram-plugin/gateway/gateway.ts +296 -37
- package/telegram-plugin/gateway/interrupt-defer.ts +100 -0
- package/telegram-plugin/gateway/pending-inbound-buffer.ts +21 -4
- package/telegram-plugin/status-reactions.ts +18 -0
- package/telegram-plugin/tests/coalesce-attachments.test.ts +152 -0
- package/telegram-plugin/tests/interrupt-defer.test.ts +147 -0
- package/telegram-plugin/tests/pending-inbound-buffer.test.ts +36 -0
- package/telegram-plugin/tests/status-reactions.test.ts +69 -0
- package/telegram-plugin/tests/worker-activity-feed.test.ts +113 -0
- package/telegram-plugin/uat/scenarios/jtbd-forwarded-burst-dm.test.ts +158 -0
- package/telegram-plugin/worker-activity-feed.ts +54 -4
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
// Problem B — deferred safe-boundary interrupt.
|
|
2
|
+
//
|
|
3
|
+
// A `!`-prefix interrupt SIGINTs the agent's in-flight turn (tmux C-c) and
|
|
4
|
+
// then resumes with the replacement body as a fresh turn. Firing the SIGINT
|
|
5
|
+
// the instant `!` arrives can land mid-tool-call — a C-c during a Write or a
|
|
6
|
+
// Bash leaves the tool's work half-done. `reference/steer-or-queue-mid-flight.md`
|
|
7
|
+
// names this exact anti-pattern: "Mid-tool-call is not 'amend time.'"
|
|
8
|
+
//
|
|
9
|
+
// We can't pause claude's internal loop (the unmodified-CLI constraint — the
|
|
10
|
+
// only levers are SIGINT via tmux and observing the session JSONL). But we CAN
|
|
11
|
+
// observe when a tool call starts and finishes, and defer the SIGINT to the
|
|
12
|
+
// next clean boundary. This module is the pure, deterministic core of that
|
|
13
|
+
// decision so it can be unit-tested without the gateway's IPC / timers.
|
|
14
|
+
|
|
15
|
+
/** The session-event shape this tracker cares about. A structural subset of
|
|
16
|
+
* the gateway's `SessionEvent` so tests don't need the full union. */
|
|
17
|
+
export interface FlightEvent {
|
|
18
|
+
kind: string
|
|
19
|
+
toolUseId?: string | null
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Tracks top-level tool calls in flight for the CURRENT turn, keyed by
|
|
24
|
+
* toolUseId. A `tool_use` adds; its matching `tool_result` removes; a
|
|
25
|
+
* `turn_end` or a fresh `enqueue` clears the slate (a new turn starts clean,
|
|
26
|
+
* and a killed turn may never emit the trailing `tool_result`).
|
|
27
|
+
*
|
|
28
|
+
* Sub-agent events (`sub_agent_*`) are intentionally ignored: the parent's
|
|
29
|
+
* `Task` tool_use already sits in the set and represents the user-observable
|
|
30
|
+
* wait, so the sub-agent's own tool calls don't independently gate the
|
|
31
|
+
* boundary. Telegram-surface tools are NOT excluded — treating every in-flight
|
|
32
|
+
* tool as "unsafe to C-c" is the conservative call, and the max-wait bound
|
|
33
|
+
* keeps a stuck reply tool from stranding the interrupt.
|
|
34
|
+
*/
|
|
35
|
+
export class ToolFlightTracker {
|
|
36
|
+
private readonly inFlight = new Set<string>()
|
|
37
|
+
|
|
38
|
+
onEvent(ev: FlightEvent): void {
|
|
39
|
+
switch (ev.kind) {
|
|
40
|
+
case 'tool_use':
|
|
41
|
+
if (typeof ev.toolUseId === 'string' && ev.toolUseId.length > 0) {
|
|
42
|
+
this.inFlight.add(ev.toolUseId)
|
|
43
|
+
}
|
|
44
|
+
break
|
|
45
|
+
case 'tool_result':
|
|
46
|
+
if (typeof ev.toolUseId === 'string' && ev.toolUseId.length > 0) {
|
|
47
|
+
this.inFlight.delete(ev.toolUseId)
|
|
48
|
+
}
|
|
49
|
+
break
|
|
50
|
+
case 'turn_end':
|
|
51
|
+
case 'enqueue':
|
|
52
|
+
this.inFlight.clear()
|
|
53
|
+
break
|
|
54
|
+
// dequeue / thinking / text / tool_label / sub_agent_* — no effect.
|
|
55
|
+
default:
|
|
56
|
+
break
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** True when at least one top-level tool call is open (unsafe boundary). */
|
|
61
|
+
isMidToolCall(): boolean {
|
|
62
|
+
return this.inFlight.size > 0
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** Count of in-flight tool calls — exposed for diagnostics/logging. */
|
|
66
|
+
inFlightCount(): number {
|
|
67
|
+
return this.inFlight.size
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
clear(): void {
|
|
71
|
+
this.inFlight.clear()
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export type InterruptTiming = 'fire-now' | 'defer'
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Decide whether a `!` interrupt should fire immediately or wait for a safe
|
|
79
|
+
* boundary. Pure: the gateway feeds the live flag + tracker reading.
|
|
80
|
+
*
|
|
81
|
+
* - flag off → fire-now (historical synchronous behaviour)
|
|
82
|
+
* - flag on, no tool in flight → fire-now (already at a clean boundary)
|
|
83
|
+
* - flag on, tool in flight → defer (wait for tool_result / turn_end)
|
|
84
|
+
*/
|
|
85
|
+
export function decideInterruptTiming(opts: {
|
|
86
|
+
safeBoundaryEnabled: boolean
|
|
87
|
+
midToolCall: boolean
|
|
88
|
+
}): InterruptTiming {
|
|
89
|
+
if (!opts.safeBoundaryEnabled) return 'fire-now'
|
|
90
|
+
return opts.midToolCall ? 'defer' : 'fire-now'
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** Floor for the deferred-interrupt max-wait. A non-positive or absent config
|
|
94
|
+
* value falls back to the default; we never wait forever. */
|
|
95
|
+
export const DEFAULT_INTERRUPT_MAX_WAIT_MS = 8000
|
|
96
|
+
|
|
97
|
+
export function resolveInterruptMaxWaitMs(configured: number | undefined): number {
|
|
98
|
+
if (typeof configured === 'number' && configured > 0) return configured
|
|
99
|
+
return DEFAULT_INTERRUPT_MAX_WAIT_MS
|
|
100
|
+
}
|
|
@@ -184,10 +184,16 @@ export function planBufferedRedelivery(
|
|
|
184
184
|
return out
|
|
185
185
|
}
|
|
186
186
|
|
|
187
|
+
/** Meta keys that describe an attachment — the primary (image_path,
|
|
188
|
+
* attachment_*) plus the A2 numbered siblings (image_path_2,
|
|
189
|
+
* attachment_file_id_2, …) and attachment_count. */
|
|
190
|
+
const ATTACHMENT_META_RE = /^(image_path|attachment_)/
|
|
191
|
+
|
|
187
192
|
/** Collapse a >1 run into a single turn. The newest message anchors the
|
|
188
193
|
* turn (its messageId/ts/user/meta); texts join in arrival order; the
|
|
189
|
-
*
|
|
190
|
-
*
|
|
194
|
+
* attachment(s) (if any) ride along from whichever message carried them.
|
|
195
|
+
* Caller guarantees the run is mergeable + has at most one media-bearing
|
|
196
|
+
* entry. */
|
|
191
197
|
function mergeRun(run: InboundMessage[]): InboundMessage {
|
|
192
198
|
const last = run[run.length - 1]!
|
|
193
199
|
const mediaEntry = run.find(inboundHasMedia)
|
|
@@ -195,10 +201,21 @@ function mergeRun(run: InboundMessage[]): InboundMessage {
|
|
|
195
201
|
...last,
|
|
196
202
|
text: run.map((m) => m.text).join('\n'),
|
|
197
203
|
}
|
|
198
|
-
// Re-seat the
|
|
199
|
-
//
|
|
204
|
+
// Re-seat the attachment/imagePath from the entry that owns it (which may
|
|
205
|
+
// not be `last`), or strip them if the run is text-only.
|
|
200
206
|
delete merged.imagePath
|
|
201
207
|
delete merged.attachment
|
|
208
|
+
if (mediaEntry != null && mediaEntry !== last) {
|
|
209
|
+
// The media-bearing entry isn't the anchor, so `last.meta` lacks the
|
|
210
|
+
// attachment fields the agent reads (image_path / attachment_* and the
|
|
211
|
+
// A2 numbered siblings). Splice the owning entry's attachment meta keys
|
|
212
|
+
// into the merged meta so the agent still sees every attachment.
|
|
213
|
+
const splicedMeta: Record<string, string> = { ...merged.meta }
|
|
214
|
+
for (const [k, v] of Object.entries(mediaEntry.meta)) {
|
|
215
|
+
if (ATTACHMENT_META_RE.test(k)) splicedMeta[k] = v
|
|
216
|
+
}
|
|
217
|
+
merged.meta = splicedMeta
|
|
218
|
+
}
|
|
202
219
|
if (mediaEntry?.imagePath != null) merged.imagePath = mediaEntry.imagePath
|
|
203
220
|
if (mediaEntry?.attachment != null) merged.attachment = mediaEntry.attachment
|
|
204
221
|
return merged
|
|
@@ -53,6 +53,7 @@ export type ReactionState =
|
|
|
53
53
|
| 'web'
|
|
54
54
|
| 'tool'
|
|
55
55
|
| 'compacting'
|
|
56
|
+
| 'awaiting'
|
|
56
57
|
| 'done'
|
|
57
58
|
| 'error'
|
|
58
59
|
| 'stallSoft'
|
|
@@ -78,6 +79,7 @@ export const REACTION_VARIANTS: Record<ReactionState, string[]> = {
|
|
|
78
79
|
coding: ['👨💻', '✍', '⚡'], // WORKING: writing / running code
|
|
79
80
|
web: ['⚡', '🤔', '👌'], // WORKING: lookup in motion
|
|
80
81
|
compacting:['✍', '🤔', '👀'],
|
|
82
|
+
awaiting: ['🙏', '🤔', '👀'], // BLOCKED ON HUMAN: parked on a permission card
|
|
81
83
|
done: ['👍', '💯', '🎉'], // FINISHED: turn_end fired
|
|
82
84
|
error: ['😱', '😨', '🤯'], // NON-TERMINAL — recovery allowed
|
|
83
85
|
stallSoft: ['🥱', '😴', '🤔'],
|
|
@@ -180,6 +182,22 @@ export class StatusReactionController {
|
|
|
180
182
|
this.scheduleState('compacting')
|
|
181
183
|
}
|
|
182
184
|
|
|
185
|
+
/**
|
|
186
|
+
* 🙏 — the turn is parked on a human decision (a permission card is
|
|
187
|
+
* waiting for the operator to tap Allow/Deny). Immediate, non-terminal,
|
|
188
|
+
* and crucially SUSPENDS the stall watchdog: a turn blocked on the
|
|
189
|
+
* operator is not stalled, so it must NOT promote to 🥱/😨 while the
|
|
190
|
+
* card sits unanswered. The next working transition (setTool /
|
|
191
|
+
* setThinking, fired when the verdict resumes the turn) re-arms the
|
|
192
|
+
* watchdog normally. Bypasses debounce so 🙏 lands as soon as the card
|
|
193
|
+
* is posted.
|
|
194
|
+
*/
|
|
195
|
+
setAwaiting(): void {
|
|
196
|
+
if (this.finished) return
|
|
197
|
+
this.scheduleState('awaiting', { immediate: true, skipStallReset: true })
|
|
198
|
+
this.clearStallTimers()
|
|
199
|
+
}
|
|
200
|
+
|
|
183
201
|
/**
|
|
184
202
|
* 😱 — non-terminal error indicator. Paints the error emoji but does
|
|
185
203
|
* NOT end the controller — recovery to a working state is permitted
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the A2 multi-attachment helpers
|
|
3
|
+
* (telegram-plugin/gateway/coalesce-attachments.ts).
|
|
4
|
+
*
|
|
5
|
+
* These pin the two pure pieces of the multi-attachment fold-in that live
|
|
6
|
+
* outside gateway.ts so they can be exercised without loadAccess()/IPC:
|
|
7
|
+
* 1. splitCoalescedAttachments — primary + capped extras, arrival order.
|
|
8
|
+
* 2. buildExtraAttachmentMeta — numbered meta fields starting at _2.
|
|
9
|
+
*
|
|
10
|
+
* The default cap (1) MUST reproduce the historical single-attachment shape:
|
|
11
|
+
* primary only, no extras, no numbered meta.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, expect, it } from 'vitest'
|
|
15
|
+
import {
|
|
16
|
+
splitCoalescedAttachments,
|
|
17
|
+
buildExtraAttachmentMeta,
|
|
18
|
+
type ResolvedExtraAttachment,
|
|
19
|
+
} from '../gateway/coalesce-attachments.js'
|
|
20
|
+
|
|
21
|
+
interface Entry {
|
|
22
|
+
text: string
|
|
23
|
+
att?: string
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const has = (e: Entry): boolean => e.att != null
|
|
27
|
+
|
|
28
|
+
describe('splitCoalescedAttachments', () => {
|
|
29
|
+
it('default cap 1: keeps only the first attachment as primary, no extras', () => {
|
|
30
|
+
const entries: Entry[] = [
|
|
31
|
+
{ text: 'a', att: 'photo-1' },
|
|
32
|
+
{ text: 'b', att: 'photo-2' },
|
|
33
|
+
]
|
|
34
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 1)
|
|
35
|
+
expect(primary).toEqual({ text: 'a', att: 'photo-1' })
|
|
36
|
+
expect(extras).toEqual([])
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
it('picks the FIRST attachment-bearing entry as primary even when text-only entries precede it', () => {
|
|
40
|
+
const entries: Entry[] = [
|
|
41
|
+
{ text: 'look' },
|
|
42
|
+
{ text: 'at this', att: 'photo-1' },
|
|
43
|
+
{ text: 'and this', att: 'photo-2' },
|
|
44
|
+
]
|
|
45
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 3)
|
|
46
|
+
expect(primary?.att).toBe('photo-1')
|
|
47
|
+
expect(extras.map((e) => e.att)).toEqual(['photo-2'])
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
it('preserves arrival order of extras', () => {
|
|
51
|
+
const entries: Entry[] = [
|
|
52
|
+
{ text: '1', att: 'a' },
|
|
53
|
+
{ text: '2', att: 'b' },
|
|
54
|
+
{ text: '3', att: 'c' },
|
|
55
|
+
]
|
|
56
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 5)
|
|
57
|
+
expect(primary?.att).toBe('a')
|
|
58
|
+
expect(extras.map((e) => e.att)).toEqual(['b', 'c'])
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('caps extras at maxAttachments (overflow dropped here; bypassed upstream)', () => {
|
|
62
|
+
const entries: Entry[] = [
|
|
63
|
+
{ text: '1', att: 'a' },
|
|
64
|
+
{ text: '2', att: 'b' },
|
|
65
|
+
{ text: '3', att: 'c' },
|
|
66
|
+
{ text: '4', att: 'd' },
|
|
67
|
+
]
|
|
68
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 2)
|
|
69
|
+
expect(primary?.att).toBe('a')
|
|
70
|
+
expect(extras.map((e) => e.att)).toEqual(['b']) // total = cap of 2
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('returns undefined primary when no entry carries an attachment', () => {
|
|
74
|
+
const entries: Entry[] = [{ text: 'just' }, { text: 'text' }]
|
|
75
|
+
const { primary, extras } = splitCoalescedAttachments(entries, has, 3)
|
|
76
|
+
expect(primary).toBeUndefined()
|
|
77
|
+
expect(extras).toEqual([])
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
it('floors a cap of 0 / negative at 1 so the only attachment is never stripped', () => {
|
|
81
|
+
const entries: Entry[] = [{ text: '1', att: 'a' }, { text: '2', att: 'b' }]
|
|
82
|
+
expect(splitCoalescedAttachments(entries, has, 0).primary?.att).toBe('a')
|
|
83
|
+
expect(splitCoalescedAttachments(entries, has, -5).primary?.att).toBe('a')
|
|
84
|
+
expect(splitCoalescedAttachments(entries, has, 0).extras).toEqual([])
|
|
85
|
+
})
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
describe('buildExtraAttachmentMeta', () => {
|
|
89
|
+
it('returns an empty object for no extras (default single-attachment turn)', () => {
|
|
90
|
+
expect(buildExtraAttachmentMeta([])).toEqual({})
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
it('numbers a single photo extra as _2', () => {
|
|
94
|
+
const resolved: ResolvedExtraAttachment[] = [{ imagePath: '/inbox/p2.jpg' }]
|
|
95
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({ image_path_2: '/inbox/p2.jpg' })
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it('numbers multiple extras incrementally from _2', () => {
|
|
99
|
+
const resolved: ResolvedExtraAttachment[] = [
|
|
100
|
+
{ imagePath: '/inbox/p2.jpg' },
|
|
101
|
+
{ imagePath: '/inbox/p3.jpg' },
|
|
102
|
+
]
|
|
103
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({
|
|
104
|
+
image_path_2: '/inbox/p2.jpg',
|
|
105
|
+
image_path_3: '/inbox/p3.jpg',
|
|
106
|
+
})
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
it('emits full attachment metadata fields for a document extra', () => {
|
|
110
|
+
const resolved: ResolvedExtraAttachment[] = [
|
|
111
|
+
{
|
|
112
|
+
attachment: {
|
|
113
|
+
kind: 'document',
|
|
114
|
+
file_id: 'FID2',
|
|
115
|
+
size: 1234,
|
|
116
|
+
mime: 'application/pdf',
|
|
117
|
+
name: 'spec.pdf',
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
]
|
|
121
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({
|
|
122
|
+
attachment_kind_2: 'document',
|
|
123
|
+
attachment_file_id_2: 'FID2',
|
|
124
|
+
attachment_size_2: '1234',
|
|
125
|
+
attachment_mime_2: 'application/pdf',
|
|
126
|
+
attachment_name_2: 'spec.pdf',
|
|
127
|
+
})
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it('omits optional metadata fields that are absent', () => {
|
|
131
|
+
const resolved: ResolvedExtraAttachment[] = [
|
|
132
|
+
{ attachment: { kind: 'voice', file_id: 'FID2' } },
|
|
133
|
+
]
|
|
134
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({
|
|
135
|
+
attachment_kind_2: 'voice',
|
|
136
|
+
attachment_file_id_2: 'FID2',
|
|
137
|
+
})
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
it('handles a mix of photo and document extras with correct numbering', () => {
|
|
141
|
+
const resolved: ResolvedExtraAttachment[] = [
|
|
142
|
+
{ imagePath: '/inbox/p2.jpg' },
|
|
143
|
+
{ attachment: { kind: 'document', file_id: 'FID3', mime: 'text/plain' } },
|
|
144
|
+
]
|
|
145
|
+
expect(buildExtraAttachmentMeta(resolved)).toEqual({
|
|
146
|
+
image_path_2: '/inbox/p2.jpg',
|
|
147
|
+
attachment_kind_3: 'document',
|
|
148
|
+
attachment_file_id_3: 'FID3',
|
|
149
|
+
attachment_mime_3: 'text/plain',
|
|
150
|
+
})
|
|
151
|
+
})
|
|
152
|
+
})
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the deferred safe-boundary interrupt core (Problem B).
|
|
3
|
+
*
|
|
4
|
+
* The gateway-side wiring (timer, SIGINT-via-tmux, sendToAgent resume,
|
|
5
|
+
* coalescing) is exercised by integration; these pin the pure decision:
|
|
6
|
+
* - ToolFlightTracker correctly tracks open tool calls by toolUseId and
|
|
7
|
+
* clears on turn_end / a fresh enqueue.
|
|
8
|
+
* - decideInterruptTiming returns fire-now unless the flag is on AND a tool
|
|
9
|
+
* is in flight.
|
|
10
|
+
* - resolveInterruptMaxWaitMs never yields a non-positive / forever wait.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { describe, it, expect } from 'vitest'
|
|
14
|
+
import {
|
|
15
|
+
ToolFlightTracker,
|
|
16
|
+
decideInterruptTiming,
|
|
17
|
+
resolveInterruptMaxWaitMs,
|
|
18
|
+
DEFAULT_INTERRUPT_MAX_WAIT_MS,
|
|
19
|
+
} from '../gateway/interrupt-defer.js'
|
|
20
|
+
|
|
21
|
+
describe('ToolFlightTracker', () => {
|
|
22
|
+
it('starts at a safe boundary (no tools in flight)', () => {
|
|
23
|
+
const t = new ToolFlightTracker()
|
|
24
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
25
|
+
expect(t.inFlightCount()).toBe(0)
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
it('a tool_use opens an unsafe boundary; its tool_result closes it', () => {
|
|
29
|
+
const t = new ToolFlightTracker()
|
|
30
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
|
|
31
|
+
expect(t.isMidToolCall()).toBe(true)
|
|
32
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'tu_1' })
|
|
33
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
it('stays unsafe while ANY of several parallel tools is open', () => {
|
|
37
|
+
const t = new ToolFlightTracker()
|
|
38
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'a' })
|
|
39
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'b' })
|
|
40
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'c' })
|
|
41
|
+
expect(t.inFlightCount()).toBe(3)
|
|
42
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'b' })
|
|
43
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'a' })
|
|
44
|
+
expect(t.isMidToolCall()).toBe(true) // c still open
|
|
45
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'c' })
|
|
46
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
it('turn_end clears any residual in-flight tools', () => {
|
|
50
|
+
const t = new ToolFlightTracker()
|
|
51
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
|
|
52
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_2' })
|
|
53
|
+
t.onEvent({ kind: 'turn_end' })
|
|
54
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
it('a fresh enqueue clears the slate (new turn starts clean)', () => {
|
|
58
|
+
const t = new ToolFlightTracker()
|
|
59
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
|
|
60
|
+
t.onEvent({ kind: 'enqueue' })
|
|
61
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
it('ignores sub-agent and non-tool events', () => {
|
|
65
|
+
const t = new ToolFlightTracker()
|
|
66
|
+
t.onEvent({ kind: 'sub_agent_tool_use', toolUseId: 'sub_1' })
|
|
67
|
+
t.onEvent({ kind: 'thinking' })
|
|
68
|
+
t.onEvent({ kind: 'text' })
|
|
69
|
+
t.onEvent({ kind: 'tool_label', toolUseId: 'tu_x' })
|
|
70
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('ignores tool_use with a missing / empty toolUseId', () => {
|
|
74
|
+
const t = new ToolFlightTracker()
|
|
75
|
+
t.onEvent({ kind: 'tool_use' })
|
|
76
|
+
t.onEvent({ kind: 'tool_use', toolUseId: null })
|
|
77
|
+
t.onEvent({ kind: 'tool_use', toolUseId: '' })
|
|
78
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it('tool_result for an unknown id is a harmless no-op', () => {
|
|
82
|
+
const t = new ToolFlightTracker()
|
|
83
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'real' })
|
|
84
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'never-opened' })
|
|
85
|
+
expect(t.isMidToolCall()).toBe(true) // 'real' still open
|
|
86
|
+
})
|
|
87
|
+
|
|
88
|
+
it('clear() resets the tracker', () => {
|
|
89
|
+
const t = new ToolFlightTracker()
|
|
90
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'tu_1' })
|
|
91
|
+
t.clear()
|
|
92
|
+
expect(t.isMidToolCall()).toBe(false)
|
|
93
|
+
})
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
describe('decideInterruptTiming', () => {
|
|
97
|
+
it('fires now when the flag is off, even mid-tool-call', () => {
|
|
98
|
+
expect(
|
|
99
|
+
decideInterruptTiming({ safeBoundaryEnabled: false, midToolCall: true }),
|
|
100
|
+
).toBe('fire-now')
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
it('fires now when the flag is on but no tool is in flight', () => {
|
|
104
|
+
expect(
|
|
105
|
+
decideInterruptTiming({ safeBoundaryEnabled: true, midToolCall: false }),
|
|
106
|
+
).toBe('fire-now')
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
it('defers only when the flag is on AND a tool is in flight', () => {
|
|
110
|
+
expect(
|
|
111
|
+
decideInterruptTiming({ safeBoundaryEnabled: true, midToolCall: true }),
|
|
112
|
+
).toBe('defer')
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
it('fires now in the fully-off case', () => {
|
|
116
|
+
expect(
|
|
117
|
+
decideInterruptTiming({ safeBoundaryEnabled: false, midToolCall: false }),
|
|
118
|
+
).toBe('fire-now')
|
|
119
|
+
})
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
describe('resolveInterruptMaxWaitMs', () => {
|
|
123
|
+
it('uses the configured value when positive', () => {
|
|
124
|
+
expect(resolveInterruptMaxWaitMs(3000)).toBe(3000)
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
it('falls back to the default when undefined', () => {
|
|
128
|
+
expect(resolveInterruptMaxWaitMs(undefined)).toBe(DEFAULT_INTERRUPT_MAX_WAIT_MS)
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
it('never returns a non-positive wait (no forever-wait)', () => {
|
|
132
|
+
expect(resolveInterruptMaxWaitMs(0)).toBe(DEFAULT_INTERRUPT_MAX_WAIT_MS)
|
|
133
|
+
expect(resolveInterruptMaxWaitMs(-1)).toBe(DEFAULT_INTERRUPT_MAX_WAIT_MS)
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
it('models the lifecycle: open tool → defer; tool_result → safe → fire', () => {
|
|
137
|
+
const t = new ToolFlightTracker()
|
|
138
|
+
t.onEvent({ kind: 'tool_use', toolUseId: 'w1' })
|
|
139
|
+
// `!` lands here, flag on:
|
|
140
|
+
expect(
|
|
141
|
+
decideInterruptTiming({ safeBoundaryEnabled: true, midToolCall: t.isMidToolCall() }),
|
|
142
|
+
).toBe('defer')
|
|
143
|
+
// tool completes:
|
|
144
|
+
t.onEvent({ kind: 'tool_result', toolUseId: 'w1' })
|
|
145
|
+
expect(t.isMidToolCall()).toBe(false) // gateway fires the parked interrupt here
|
|
146
|
+
})
|
|
147
|
+
})
|
|
@@ -450,6 +450,42 @@ describe('planBufferedRedelivery — merge-on-drain (forwarded-burst across a tu
|
|
|
450
450
|
expect(plan[0]!.merged.imagePath).toBe('/tmp/p.jpg')
|
|
451
451
|
})
|
|
452
452
|
|
|
453
|
+
it('splices attachment meta from the media entry when it is NOT the anchor (A2 numbered fields survive)', () => {
|
|
454
|
+
// A coalesced multi-attachment message buffered, then a text-only
|
|
455
|
+
// follow-up. mergeRun anchors on `last` (the text), whose meta has no
|
|
456
|
+
// attachment fields — so the owning entry's image_path + numbered
|
|
457
|
+
// siblings + attachment_count must be spliced into the merged meta or
|
|
458
|
+
// the agent would never see the photos.
|
|
459
|
+
const photo = userMsg({ text: 'look', ts: 1, imagePath: '/tmp/a.jpg' })
|
|
460
|
+
photo.meta = {
|
|
461
|
+
image_path: '/tmp/a.jpg',
|
|
462
|
+
image_path_2: '/tmp/b.jpg',
|
|
463
|
+
attachment_count: '2',
|
|
464
|
+
user: 'alice',
|
|
465
|
+
}
|
|
466
|
+
const txt = userMsg({ text: 'at these', ts: 2 })
|
|
467
|
+
txt.meta = { user: 'alice' }
|
|
468
|
+
const plan = planBufferedRedelivery([photo, txt])
|
|
469
|
+
expect(plan).toHaveLength(1)
|
|
470
|
+
const meta = plan[0]!.merged.meta
|
|
471
|
+
expect(meta.image_path).toBe('/tmp/a.jpg')
|
|
472
|
+
expect(meta.image_path_2).toBe('/tmp/b.jpg')
|
|
473
|
+
expect(meta.attachment_count).toBe('2')
|
|
474
|
+
// Top-level primary still re-seated for inboundHasMedia detection.
|
|
475
|
+
expect(plan[0]!.merged.imagePath).toBe('/tmp/a.jpg')
|
|
476
|
+
})
|
|
477
|
+
|
|
478
|
+
it('does not need a meta splice when the media entry IS the anchor', () => {
|
|
479
|
+
const txt = userMsg({ text: 'intro', ts: 1 })
|
|
480
|
+
txt.meta = { user: 'alice' }
|
|
481
|
+
const photo = userMsg({ text: 'pic', ts: 2, imagePath: '/tmp/p.jpg' })
|
|
482
|
+
photo.meta = { image_path: '/tmp/p.jpg', user: 'alice' }
|
|
483
|
+
const plan = planBufferedRedelivery([txt, photo])
|
|
484
|
+
expect(plan).toHaveLength(1)
|
|
485
|
+
// Anchor is the photo, so its meta is inherited verbatim.
|
|
486
|
+
expect(plan[0]!.merged.meta.image_path).toBe('/tmp/p.jpg')
|
|
487
|
+
})
|
|
488
|
+
|
|
453
489
|
it('preserves the run total — sum of originals equals input length (lossless)', () => {
|
|
454
490
|
const msgs = [
|
|
455
491
|
userMsg({ text: 'a', ts: 1 }),
|
|
@@ -341,6 +341,75 @@ describe('StatusReactionController', () => {
|
|
|
341
341
|
expect(calls).toEqual(['👀'])
|
|
342
342
|
})
|
|
343
343
|
|
|
344
|
+
// setAwaiting(): park on 🙏 while a permission card waits for the
|
|
345
|
+
// operator. A turn blocked on a human is NOT stalled, so the watchdog
|
|
346
|
+
// must stay quiet — but it re-arms once the verdict resumes work.
|
|
347
|
+
describe('setAwaiting() — park on a human permission decision', () => {
|
|
348
|
+
it('emits 🙏 immediately (bypasses debounce)', async () => {
|
|
349
|
+
const { emit, calls } = makeEmitter()
|
|
350
|
+
const ctrl = new StatusReactionController(emit)
|
|
351
|
+
ctrl.setQueued()
|
|
352
|
+
await flush()
|
|
353
|
+
ctrl.setAwaiting()
|
|
354
|
+
await flush()
|
|
355
|
+
expect(calls).toEqual(['👀', '🙏'])
|
|
356
|
+
})
|
|
357
|
+
|
|
358
|
+
it('suppresses stall promotion (no 🥱/😨) while the card sits unanswered', async () => {
|
|
359
|
+
const { emit, calls } = makeEmitter()
|
|
360
|
+
const ctrl = new StatusReactionController(emit)
|
|
361
|
+
ctrl.setQueued()
|
|
362
|
+
ctrl.setTool('Bash') // working: 👨💻
|
|
363
|
+
vi.advanceTimersByTime(3500)
|
|
364
|
+
await flush()
|
|
365
|
+
ctrl.setAwaiting()
|
|
366
|
+
await flush()
|
|
367
|
+
// Well past both stall thresholds — awaiting must not yawn or panic.
|
|
368
|
+
vi.advanceTimersByTime(120000)
|
|
369
|
+
await flush()
|
|
370
|
+
expect(calls).not.toContain('🥱')
|
|
371
|
+
expect(calls).not.toContain('😨')
|
|
372
|
+
expect(calls[calls.length - 1]).toBe('🙏')
|
|
373
|
+
})
|
|
374
|
+
|
|
375
|
+
it('re-arms the stall watchdog once a working transition resumes the turn', async () => {
|
|
376
|
+
const { emit, calls } = makeEmitter()
|
|
377
|
+
const ctrl = new StatusReactionController(emit)
|
|
378
|
+
ctrl.setQueued()
|
|
379
|
+
await flush()
|
|
380
|
+
ctrl.setAwaiting()
|
|
381
|
+
await flush()
|
|
382
|
+
vi.advanceTimersByTime(120000) // long human wait — no stall
|
|
383
|
+
await flush()
|
|
384
|
+
expect(calls).toEqual(['👀', '🙏'])
|
|
385
|
+
|
|
386
|
+
// Verdict dispatched → gateway calls setThinking() to un-park.
|
|
387
|
+
ctrl.setThinking()
|
|
388
|
+
vi.advanceTimersByTime(3500)
|
|
389
|
+
await flush()
|
|
390
|
+
expect(calls).toEqual(['👀', '🙏', '🤔'])
|
|
391
|
+
|
|
392
|
+
// A genuine post-approval hang must still promote to 🥱 — the
|
|
393
|
+
// watchdog was re-armed by the resuming transition.
|
|
394
|
+
vi.advanceTimersByTime(30000)
|
|
395
|
+
await flush()
|
|
396
|
+
expect(calls).toContain('🥱')
|
|
397
|
+
})
|
|
398
|
+
|
|
399
|
+
it('is a no-op after finalize (cannot resurrect a finished controller)', async () => {
|
|
400
|
+
const { emit, calls } = makeEmitter()
|
|
401
|
+
const ctrl = new StatusReactionController(emit)
|
|
402
|
+
ctrl.setQueued()
|
|
403
|
+
ctrl.finalize('done')
|
|
404
|
+
await flush()
|
|
405
|
+
const snapshot = [...calls]
|
|
406
|
+
ctrl.setAwaiting()
|
|
407
|
+
vi.advanceTimersByTime(5000)
|
|
408
|
+
await flush()
|
|
409
|
+
expect(calls).toEqual(snapshot)
|
|
410
|
+
})
|
|
411
|
+
})
|
|
412
|
+
|
|
344
413
|
// hold(): freeze on a WORKING glyph while background sub-agent workers
|
|
345
414
|
// outlive the parent turn, deferring the terminal 👍 (worker-reaction fix).
|
|
346
415
|
describe('hold() — defer 👍 while a background worker runs', () => {
|