switchroom 0.14.43 → 0.14.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +80 -80
- package/dist/auth-broker/index.js +80 -80
- package/dist/cli/drive-write-pretool.mjs +10 -10
- package/dist/cli/notion-write-pretool.mjs +82 -82
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +357 -357
- package/dist/host-control/main.js +148 -148
- package/dist/vault/approvals/kernel-server.js +82 -82
- package/dist/vault/broker/server.js +83 -83
- package/package.json +1 -1
- package/telegram-plugin/dist/bridge/bridge.js +112 -112
- package/telegram-plugin/dist/gateway/gateway.js +220 -198
- package/telegram-plugin/dist/server.js +160 -160
- package/telegram-plugin/gateway/gateway.ts +42 -18
- package/telegram-plugin/gateway/turn-state-purge.ts +14 -0
- package/telegram-plugin/silence-poke.ts +26 -0
- package/telegram-plugin/status-reactions.ts +14 -0
- package/telegram-plugin/tests/silence-poke.test.ts +36 -0
- package/telegram-plugin/tests/status-reactions.test.ts +16 -0
- package/telegram-plugin/tests/turn-state-purge.test.ts +28 -0
- package/telegram-plugin/uat/scenarios/fuzz-supergroup-channel.test.ts +11 -6
- package/telegram-plugin/uat/scenarios/jtbd-foreground-subagent-activity-channel.test.ts +104 -0
- package/telegram-plugin/uat/scenarios/jtbd-supergroup-handback-channel.test.ts +77 -0
- package/telegram-plugin/uat/scenarios/jtbd-worker-activity-feed-channel.test.ts +114 -0
|
@@ -3912,10 +3912,18 @@ silencePoke.startTimer({
|
|
|
3912
3912
|
// (CC-4 in `docs/status-ask-cause-classes.md`). Derives "N min" suffix
|
|
3913
3913
|
// from `ctx.silenceMs` so the wording stays honest if the 300s
|
|
3914
3914
|
// threshold is tuned.
|
|
3915
|
+
// Honesty: if the turn is parked on an approval card (the dominant
|
|
3916
|
+
// benign "wedge" class — claude is alive, waiting on the operator's
|
|
3917
|
+
// tap), say so instead of "still working…". The reaction controller
|
|
3918
|
+
// already tracks this (setAwaiting on the permission-request park).
|
|
3919
|
+
const blockedOnApproval = activeStatusReactions
|
|
3920
|
+
.get(statusKey(ctx.chatId, ctx.threadId))
|
|
3921
|
+
?.isAwaiting() ?? false
|
|
3915
3922
|
text = silencePoke.formatFrameworkFallbackText(
|
|
3916
3923
|
ctx.fallbackKind,
|
|
3917
3924
|
ctx.silenceMs,
|
|
3918
3925
|
ctx.inFlightTools,
|
|
3926
|
+
blockedOnApproval,
|
|
3919
3927
|
)
|
|
3920
3928
|
}
|
|
3921
3929
|
try {
|
|
@@ -4033,16 +4041,29 @@ silencePoke.startTimer({
|
|
|
4033
4041
|
// SAME chat (different threads, or a `null` vs `undefined`-thread
|
|
4034
4042
|
// variant left over from a normal turn-end path that nulled
|
|
4035
4043
|
// currentTurn without invoking purgeReactionTracking — the
|
|
4036
|
-
// gymbro/klanker held-mid-turn symptom, 2026-05-20)
|
|
4037
|
-
//
|
|
4038
|
-
// (the chat has been silent ≥5 min); sweep them via the same
|
|
4039
|
-
// purger. Multi-chat-safe — only touches keys for fbChatId, so
|
|
4044
|
+
// gymbro/klanker held-mid-turn symptom, 2026-05-20); sweep them via
|
|
4045
|
+
// the same purger. Multi-chat-safe — only touches keys for fbChatId, so
|
|
4040
4046
|
// #1546's intentional cross-chat safety guard is preserved.
|
|
4047
|
+
//
|
|
4048
|
+
// BUT a sibling is NOT "by definition stale": in one-agent-owns-supergroup
|
|
4049
|
+
// every forum topic shares fbChatId, so a chatId-only sweep would purge a
|
|
4050
|
+
// LIVE sibling topic's reaction controller + typing loop when THIS topic's
|
|
4051
|
+
// poke fires. Gate each sibling on its OWN silence clock — purge only those
|
|
4052
|
+
// also silent ≥ the fallback threshold (their own poke would fire too),
|
|
4053
|
+
// sparing topics that are actively mid-turn. Use silence, not turn-start
|
|
4054
|
+
// age, so a long-but-narrating turn isn't mistaken for stale.
|
|
4041
4055
|
// See turn-state-purge.ts.
|
|
4056
|
+
const fbNow = Date.now()
|
|
4042
4057
|
const fbExtraPurge = purgeStaleTurnsForChat(
|
|
4043
4058
|
fbChatId,
|
|
4044
4059
|
activeTurnStartedAt.keys(),
|
|
4045
4060
|
purgeReactionTracking,
|
|
4061
|
+
(siblingKey) => {
|
|
4062
|
+
if (siblingKey === fbKey) return true // the firing key is genuinely stale
|
|
4063
|
+
const sib = silencePoke.silenceMsForKey(siblingKey, fbNow)
|
|
4064
|
+
// No silence-poke state → dangling (turn ended, key not purged) → stale.
|
|
4065
|
+
return sib == null || sib >= silencePoke.DEFAULT_THRESHOLDS.fallback
|
|
4066
|
+
},
|
|
4046
4067
|
)
|
|
4047
4068
|
// Null `currentTurn` if it's still pointing at the wedged turn —
|
|
4048
4069
|
// when claude eventually fires a late `turn_end` for this session
|
|
@@ -8154,17 +8175,19 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
8154
8175
|
// Phase tracking removed in #553 PR 5 — phases only fed the
|
|
8155
8176
|
// placeholder-heartbeat label, which has been retired.
|
|
8156
8177
|
if (isTelegramReplyTool(name)) {
|
|
8157
|
-
const wasFirstReply = !turn.replyCalled
|
|
8158
8178
|
turn.replyCalled = true
|
|
8159
8179
|
if (turn.orphanedReplyTimeoutId != null) {
|
|
8160
8180
|
clearTimeout(turn.orphanedReplyTimeoutId)
|
|
8161
8181
|
turn.orphanedReplyTimeoutId = null
|
|
8162
8182
|
}
|
|
8163
|
-
//
|
|
8164
|
-
//
|
|
8165
|
-
//
|
|
8166
|
-
//
|
|
8167
|
-
|
|
8183
|
+
// Delete the activity feed only when the FINAL answer has landed —
|
|
8184
|
+
// NOT on an ack-first interim reply ("On it"). Gating on the first
|
|
8185
|
+
// reply deleted the feed on the ack, so the post-ack work
|
|
8186
|
+
// (sub-agents/tools) rendered into nothing — the "agent went silent
|
|
8187
|
+
// after On it" gap. `finalAnswerDelivered` is set by executeReply
|
|
8188
|
+
// (isFinalAnswerReply) before this tool_use event fires; turn_end
|
|
8189
|
+
// (below) clears unconditionally as the idempotent no-reply / race net.
|
|
8190
|
+
if (turn.finalAnswerDelivered) {
|
|
8168
8191
|
clearActivitySummary(turn)
|
|
8169
8192
|
}
|
|
8170
8193
|
}
|
|
@@ -8195,15 +8218,16 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
8195
8218
|
// Surface tools (reply/stream_reply/react) are the conversation, not
|
|
8196
8219
|
// activity — the hook labels them ("Replying"), so filter by name.
|
|
8197
8220
|
if (isTelegramSurfaceTool(ev.toolName)) return
|
|
8198
|
-
// Stop feeding once the
|
|
8199
|
-
//
|
|
8200
|
-
//
|
|
8201
|
-
//
|
|
8202
|
-
//
|
|
8221
|
+
// Stop feeding once the FINAL answer has landed — the hand-off where
|
|
8222
|
+
// `clearActivitySummary` deletes the feed so the answer is the
|
|
8223
|
+
// authoritative surface. Gating on `replyCalled` (any reply) killed the
|
|
8224
|
+
// feed on an ack-first interim "On it", so the post-ack work had no live
|
|
8225
|
+
// surface; gate on `finalAnswerDelivered` so the feed keeps narrating
|
|
8226
|
+
// between the ack and the real answer. Without this a tool called after
|
|
8227
|
+
// the FINAL answer would re-`sendMessage` a fresh feed below it (flicker).
|
|
8203
8228
|
// Safe ordering: `tool_label` is real-time (PreToolUse, ~250ms) while
|
|
8204
|
-
// `
|
|
8205
|
-
|
|
8206
|
-
if (turn.replyCalled) return
|
|
8229
|
+
// `finalAnswerDelivered` is set from executeReply on the final answer.
|
|
8230
|
+
if (turn.finalAnswerDelivered) return
|
|
8207
8231
|
const rendered = appendActivityLabel(turn.mirrorLines, ev.label)
|
|
8208
8232
|
if (rendered != null) {
|
|
8209
8233
|
// Recompose so any active foreground sub-agent's nested block (Model A)
|
|
@@ -50,6 +50,19 @@ export function purgeStaleTurnsForChat(
|
|
|
50
50
|
chatId: string,
|
|
51
51
|
keys: Iterable<string>,
|
|
52
52
|
purger: (key: string) => void,
|
|
53
|
+
/**
|
|
54
|
+
* Per-sibling staleness gate. A sibling key for `chatId` is purged only when
|
|
55
|
+
* this returns true. CRITICAL for one-agent-owns-supergroup: all of an
|
|
56
|
+
* agent's forum topics share the SAME chatId, so a chatId-only match would
|
|
57
|
+
* purge a LIVE sibling topic's reaction controller + typing loop when ANOTHER
|
|
58
|
+
* topic's 300s silence-poke fires (the gymbro/klanker wedge class). The
|
|
59
|
+
* caller passes a predicate true only for siblings themselves silent ≥ the
|
|
60
|
+
* fallback threshold (their own poke would also fire) — preserving the #1556
|
|
61
|
+
* dangling-key cleanup while sparing live siblings. Defaults to always-stale
|
|
62
|
+
* for back-compat (DM / single-topic callers, where every sibling is
|
|
63
|
+
* genuinely dangling).
|
|
64
|
+
*/
|
|
65
|
+
isStale: (key: string) => boolean = () => true,
|
|
53
66
|
): PurgeStaleTurnsResult {
|
|
54
67
|
if (!chatId) return { purged: [] }
|
|
55
68
|
const purged: string[] = []
|
|
@@ -64,6 +77,7 @@ export function purgeStaleTurnsForChat(
|
|
|
64
77
|
if (sep < 0) continue // malformed / non-statusKey shape — skip
|
|
65
78
|
const keyChat = key.slice(0, sep)
|
|
66
79
|
if (keyChat !== chatId) continue
|
|
80
|
+
if (!isStale(key)) continue // live sibling topic — leave its turn state intact
|
|
67
81
|
purger(key)
|
|
68
82
|
purged.push(key)
|
|
69
83
|
}
|
|
@@ -244,6 +244,23 @@ export function endTurn(key: string): void {
|
|
|
244
244
|
state.delete(key)
|
|
245
245
|
}
|
|
246
246
|
|
|
247
|
+
/**
|
|
248
|
+
* Current silence duration (ms) for a key — `now - (lastOutboundAt ??
|
|
249
|
+
* turnStartedAt)`, the same clock `tick()` uses to decide the 300s fallback —
|
|
250
|
+
* or null when no turn state exists for the key. Lets the sibling-topic purge
|
|
251
|
+
* distinguish a STALE/wedged sibling (silent ≥ the fallback threshold, so its
|
|
252
|
+
* own poke would also fire) from a LIVE one mid-turn (recent outbound, low
|
|
253
|
+
* silence), so a silence-poke on one supergroup topic doesn't purge a live
|
|
254
|
+
* sibling topic's reaction controller + typing loop. NB: this is silence, NOT
|
|
255
|
+
* turn-start age — a long but actively-narrating turn has low silence and must
|
|
256
|
+
* not be treated as stale.
|
|
257
|
+
*/
|
|
258
|
+
export function silenceMsForKey(key: string, now: number): number | null {
|
|
259
|
+
const s = state.get(key)
|
|
260
|
+
if (s == null) return null
|
|
261
|
+
return now - (s.lastOutboundAt ?? s.turnStartedAt)
|
|
262
|
+
}
|
|
263
|
+
|
|
247
264
|
/**
|
|
248
265
|
* Verbatim framework-fallback text — the user-visible "still working / still
|
|
249
266
|
* thinking" message the gateway sends at the 300s threshold when the model
|
|
@@ -264,8 +281,17 @@ export function formatFrameworkFallbackText(
|
|
|
264
281
|
fallbackKind: 'working' | 'thinking',
|
|
265
282
|
silenceMs: number,
|
|
266
283
|
inFlightTools: ToolSnapshot[] = [],
|
|
284
|
+
blockedOnApproval = false,
|
|
267
285
|
): string {
|
|
268
286
|
const minutes = Math.max(1, Math.round(silenceMs / 60_000))
|
|
287
|
+
// The turn isn't stalled — it's parked on an approval card waiting for YOUR
|
|
288
|
+
// tap (the dominant live "wedge" class is benign approval-latency, not a
|
|
289
|
+
// hang). Saying "still working…" here actively lies; name the real blocker so
|
|
290
|
+
// the operator knows the ball is in their court. Takes precedence over the
|
|
291
|
+
// in-flight-tool framing (a tool awaiting approval isn't "running").
|
|
292
|
+
if (blockedOnApproval) {
|
|
293
|
+
return `waiting for your approval — tap Approve or Deny on the card above (${minutes} min)`
|
|
294
|
+
}
|
|
269
295
|
const suffix = `(no update from agent in ${minutes} min)`
|
|
270
296
|
// #1292 case (a): tools in flight. Name the longest-running one
|
|
271
297
|
// (entry[0] — caller pre-sorts by startedAt ascending). Avoid the
|
|
@@ -144,6 +144,10 @@ export class StatusReactionController {
|
|
|
144
144
|
private stallHardTimer: ReturnType<typeof setTimeout> | null = null
|
|
145
145
|
private finished = false
|
|
146
146
|
private held = false
|
|
147
|
+
// True while parked on the awaiting-approval state (🙏): the turn is blocked
|
|
148
|
+
// on the operator's tap, not stalled. Read by the silence-poke fallback so it
|
|
149
|
+
// says "waiting for your approval" instead of the dishonest "still working…".
|
|
150
|
+
private awaitingApproval = false
|
|
147
151
|
private readonly debounceMs: number
|
|
148
152
|
private readonly stallSoftMs: number
|
|
149
153
|
private readonly stallHardMs: number
|
|
@@ -272,11 +276,21 @@ export class StatusReactionController {
|
|
|
272
276
|
|
|
273
277
|
// ──────────────────────────────────────────────────────────────────────
|
|
274
278
|
|
|
279
|
+
/** True while the turn is parked awaiting the operator's approval tap (🙏).
|
|
280
|
+
* The silence-poke fallback reads this to phrase its 300s message honestly
|
|
281
|
+
* ("waiting for your approval") instead of "still working…". */
|
|
282
|
+
isAwaiting(): boolean {
|
|
283
|
+
return this.awaitingApproval && !this.finished
|
|
284
|
+
}
|
|
285
|
+
|
|
275
286
|
private scheduleState(
|
|
276
287
|
state: ReactionState,
|
|
277
288
|
opts: { immediate?: boolean; skipStallReset?: boolean } = {},
|
|
278
289
|
): void {
|
|
279
290
|
if (this.finished) return
|
|
291
|
+
// Track the awaiting-approval state for isAwaiting(). Any non-awaiting
|
|
292
|
+
// state transition (setThinking/setTool/… on verdict resume) clears it.
|
|
293
|
+
this.awaitingApproval = state === 'awaiting'
|
|
280
294
|
const emoji = this.resolveEmoji(state)
|
|
281
295
|
if (emoji == null) {
|
|
282
296
|
if (!opts.skipStallReset) this.resetStallTimers()
|
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
noteToolEnd,
|
|
8
8
|
noteToolLabel,
|
|
9
9
|
endTurn,
|
|
10
|
+
silenceMsForKey,
|
|
10
11
|
silencePokeEnabled,
|
|
11
12
|
formatFrameworkFallbackText,
|
|
12
13
|
__tickForTests,
|
|
@@ -275,6 +276,26 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
275
276
|
).toBe('still working… (no update from agent in 5 min)')
|
|
276
277
|
})
|
|
277
278
|
|
|
279
|
+
it('blockedOnApproval names the real blocker instead of the dishonest "still working…"', () => {
|
|
280
|
+
expect(
|
|
281
|
+
formatFrameworkFallbackText('working', 305_000, [], true),
|
|
282
|
+
).toBe('waiting for your approval — tap Approve or Deny on the card above (5 min)')
|
|
283
|
+
})
|
|
284
|
+
|
|
285
|
+
it('blockedOnApproval takes precedence over an in-flight tool (a tool awaiting approval is not "running")', () => {
|
|
286
|
+
expect(
|
|
287
|
+
formatFrameworkFallbackText('working', 305_000, [
|
|
288
|
+
{ name: 'Bash', label: 'rm -rf build', durationMs: 305_000 },
|
|
289
|
+
], true),
|
|
290
|
+
).toBe('waiting for your approval — tap Approve or Deny on the card above (5 min)')
|
|
291
|
+
})
|
|
292
|
+
|
|
293
|
+
it('blockedOnApproval=false keeps the existing wording (default, back-compat)', () => {
|
|
294
|
+
expect(
|
|
295
|
+
formatFrameworkFallbackText('working', 305_000, [], false),
|
|
296
|
+
).toBe('still working… (no update from agent in 5 min)')
|
|
297
|
+
})
|
|
298
|
+
|
|
278
299
|
it('tool-aware wording wins over "thinking" — the actual observable beats the inferred kind', () => {
|
|
279
300
|
const text = formatFrameworkFallbackText('thinking', 305_000, [
|
|
280
301
|
{ name: 'Grep', label: '"foo"', durationMs: 305_000 },
|
|
@@ -340,6 +361,21 @@ describe('silence-poke — #1292 tool-aware framework fallback', () => {
|
|
|
340
361
|
expect(fx.fallbacks).toHaveLength(1)
|
|
341
362
|
})
|
|
342
363
|
|
|
364
|
+
it('silenceMsForKey reports silence from last outbound (or turn start), null when unknown', () => {
|
|
365
|
+
setupDeps()
|
|
366
|
+
startTurn('k', 1_000)
|
|
367
|
+
// No outbound yet → silence measured from turnStartedAt.
|
|
368
|
+
expect(silenceMsForKey('k', 1_000 + 120_000)).toBe(120_000)
|
|
369
|
+
noteOutbound('k', 1_000 + 50_000)
|
|
370
|
+
// After an outbound → silence measured from lastOutboundAt.
|
|
371
|
+
expect(silenceMsForKey('k', 1_000 + 120_000)).toBe(70_000)
|
|
372
|
+
// Unknown key / ended turn → null (used by the sibling purge to treat a
|
|
373
|
+
// dangling key as stale).
|
|
374
|
+
expect(silenceMsForKey('never-started', 999_999)).toBeNull()
|
|
375
|
+
endTurn('k')
|
|
376
|
+
expect(silenceMsForKey('k', 999_999)).toBeNull()
|
|
377
|
+
})
|
|
378
|
+
|
|
343
379
|
it('Task tool populates inFlightTools so the fallback names it as the observable', () => {
|
|
344
380
|
const fx = setupDeps()
|
|
345
381
|
startTurn('k', 0)
|
|
@@ -94,6 +94,22 @@ describe('StatusReactionController', () => {
|
|
|
94
94
|
expect(calls).toEqual(['👀'])
|
|
95
95
|
})
|
|
96
96
|
|
|
97
|
+
it('isAwaiting() tracks the awaiting-approval state (for the honest silence-poke copy)', async () => {
|
|
98
|
+
const { emit } = makeEmitter()
|
|
99
|
+
const ctrl = new StatusReactionController(emit)
|
|
100
|
+
expect(ctrl.isAwaiting()).toBe(false)
|
|
101
|
+
ctrl.setAwaiting()
|
|
102
|
+
expect(ctrl.isAwaiting()).toBe(true)
|
|
103
|
+
// The verdict resume (setThinking) un-parks → no longer awaiting.
|
|
104
|
+
ctrl.setThinking()
|
|
105
|
+
expect(ctrl.isAwaiting()).toBe(false)
|
|
106
|
+
// Re-park, then finish → isAwaiting is false once the turn ends.
|
|
107
|
+
ctrl.setAwaiting()
|
|
108
|
+
expect(ctrl.isAwaiting()).toBe(true)
|
|
109
|
+
ctrl.finalize()
|
|
110
|
+
expect(ctrl.isAwaiting()).toBe(false)
|
|
111
|
+
})
|
|
112
|
+
|
|
97
113
|
it('setThinking is debounced by 3500ms (#1713)', async () => {
|
|
98
114
|
const { emit, calls } = makeEmitter()
|
|
99
115
|
const ctrl = new StatusReactionController(emit)
|
|
@@ -106,4 +106,32 @@ describe('purgeStaleTurnsForChat', () => {
|
|
|
106
106
|
expect(r.purged.sort()).toEqual(['123:7', '123:_'])
|
|
107
107
|
expect([...map.keys()]).toEqual(['999:_']) // multi-chat safety preserved
|
|
108
108
|
})
|
|
109
|
+
|
|
110
|
+
// #2 supergroup sibling-topic fix: one agent owns the supergroup, so all
|
|
111
|
+
// forum topics share the chatId. A 300s poke on topic A must NOT purge a
|
|
112
|
+
// LIVE sibling topic B's turn state — only siblings that are themselves stale.
|
|
113
|
+
it('isStale predicate spares live sibling topics (the supergroup fix)', () => {
|
|
114
|
+
const purged: string[] = []
|
|
115
|
+
const live = new Set(['-100:7']) // topic 7 is actively mid-turn
|
|
116
|
+
const r = purgeStaleTurnsForChat(
|
|
117
|
+
'-100',
|
|
118
|
+
['-100:4', '-100:7', '999:_'],
|
|
119
|
+
(k) => purged.push(k),
|
|
120
|
+
(k) => !live.has(k), // stale iff not live
|
|
121
|
+
)
|
|
122
|
+
expect(r.purged).toEqual(['-100:4']) // only the stale topic purged
|
|
123
|
+
expect(purged).toEqual(['-100:4']) // live topic 7 + other chat untouched
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
it('isStale=false for every sibling purges nothing (all topics live)', () => {
|
|
127
|
+
const purged: string[] = []
|
|
128
|
+
purgeStaleTurnsForChat('-100', ['-100:4', '-100:7'], (k) => purged.push(k), () => false)
|
|
129
|
+
expect(purged).toEqual([])
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
it('default isStale (omitted) purges every chatId match — back-compat', () => {
|
|
133
|
+
const purged: string[] = []
|
|
134
|
+
const r = purgeStaleTurnsForChat('123', ['123:_', '123:7', '999:_'], (k) => purged.push(k))
|
|
135
|
+
expect(r.purged.sort()).toEqual(['123:7', '123:_'])
|
|
136
|
+
})
|
|
109
137
|
})
|
|
@@ -121,12 +121,17 @@ describe("uat: supergroup human-style fuzz — JTBD invariants in a channel", ()
|
|
|
121
121
|
const meaningful = isMeaningfulReply(reply.text);
|
|
122
122
|
expect(meaningful.ok, `[sg-fuzz] ${fc.name}: ${meaningful.reason}`).toBe(true);
|
|
123
123
|
|
|
124
|
-
// Invariant 4 (
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
124
|
+
// Invariant 4 (SOFT): shape match when predictable. Like the DM
|
|
125
|
+
// fuzz, this is a "did the model engage the topic at all" diagnostic,
|
|
126
|
+
// NOT a correctness gate — different runs produce different valid
|
|
127
|
+
// wording (e.g. a clarifying question, or "use the package manager"
|
|
128
|
+
// without the literal "apt"). Log and continue; the load-bearing
|
|
129
|
+
// invariants are 1-3 (meaningful, leak-free, in the supergroup).
|
|
130
|
+
if (fc.expectMatch && !fc.expectMatch.test(reply.text)) {
|
|
131
|
+
console.warn(
|
|
132
|
+
`[sg-fuzz] ${fc.name}: reply didn't match ${fc.expectMatch} (soft) — ` +
|
|
133
|
+
`preview: ${JSON.stringify(reply.text.slice(0, 200))}`,
|
|
134
|
+
);
|
|
130
135
|
}
|
|
131
136
|
} finally {
|
|
132
137
|
await sc.tearDown();
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Foreground sub-agent live activity nesting in a SUPERGROUP (#2032 + #2099) — UAT.
|
|
3
|
+
*
|
|
4
|
+
* Channel twin of `jtbd-foreground-subagent-activity-dm`. A FOREGROUND
|
|
5
|
+
* sub-agent (Agent/Task `run_in_background:false`) dispatched from a supergroup
|
|
6
|
+
* — after an ack-first "On it" reply — must nest its live steps into the
|
|
7
|
+
* parent's activity-summary feed IN the supergroup. Proves the foreground
|
|
8
|
+
* sub-agent status surface has DM/channel parity (and exercises #2099's
|
|
9
|
+
* tool-step nesting + #2032's render-regardless-of-replyCalled in a channel).
|
|
10
|
+
*
|
|
11
|
+
* Asserts the load-bearing proof: an activity-summary feed message carrying the
|
|
12
|
+
* nested "↳" marker appears IN the supergroup AFTER the ack, then the turn
|
|
13
|
+
* completes cleanly. Self-skips when no test supergroup is wired. Uses the
|
|
14
|
+
* General topic (mtcute here has no forum-topic create API). NOT a draft —
|
|
15
|
+
* the activity-summary feed is a real sendMessage/editMessageText, so mtcute
|
|
16
|
+
* can observe it.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { describe, expect, it } from "vitest";
|
|
20
|
+
import { spinUp } from "../harness.js";
|
|
21
|
+
import { expectMessage } from "../assertions.js";
|
|
22
|
+
|
|
23
|
+
const AGENT = "test-harness";
|
|
24
|
+
const SUPERGROUP_ID = Number.parseInt(process.env.SWITCHROOM_UAT_CHAT_ID ?? "", 10);
|
|
25
|
+
|
|
26
|
+
const FG_DISPATCH_PROMPT =
|
|
27
|
+
`First, immediately send me a one-line acknowledgement that you're starting ` +
|
|
28
|
+
`(just "On it — running a check now."). Then use the Agent tool with ` +
|
|
29
|
+
`subagent_type "general-purpose" and run_in_background: false (a FOREGROUND ` +
|
|
30
|
+
`sub-agent) with this exact task: "Do eight steps, ONE AT A TIME, k = 1 ` +
|
|
31
|
+
`through 8. Before each step write a brief one-sentence narration of what ` +
|
|
32
|
+
`you are about to do, then run \`sleep 2\` via the Bash tool, then run ` +
|
|
33
|
+
`\`echo step-k\` via the Bash tool (substitute the real number for k). Run ` +
|
|
34
|
+
`every sleep and every echo as its OWN separate Bash call — never batch or ` +
|
|
35
|
+
`chain them with && — and narrate before each so progress surfaces ` +
|
|
36
|
+
`incrementally. Do not stop early; complete all eight steps, then return a ` +
|
|
37
|
+
`one-line summary." Wait for the foreground sub-agent to finish, then send ` +
|
|
38
|
+
`me a brief reply telling me it's done.`;
|
|
39
|
+
|
|
40
|
+
const NESTED_RE = /↳/;
|
|
41
|
+
|
|
42
|
+
describe("uat: foreground sub-agent activity nesting in a supergroup (#2032/#2099 channel parity)", () => {
|
|
43
|
+
it(
|
|
44
|
+
"surfaces nested foreground activity in the feed IN the supergroup, after the ack",
|
|
45
|
+
async () => {
|
|
46
|
+
if (!Number.isFinite(SUPERGROUP_ID)) {
|
|
47
|
+
console.warn("[fg-activity channel UAT] SWITCHROOM_UAT_CHAT_ID unset — skipping");
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
const sc = await spinUp({ agent: AGENT, settleMs: 0 });
|
|
51
|
+
try {
|
|
52
|
+
await sc.driver.primeDialogs();
|
|
53
|
+
if (!(await sc.driver.canResolve(SUPERGROUP_ID))) {
|
|
54
|
+
console.warn(`[fg-activity channel UAT] supergroup ${SUPERGROUP_ID} not resolvable — skipping`);
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
await sc.driver.sendText(SUPERGROUP_ID, FG_DISPATCH_PROMPT);
|
|
58
|
+
|
|
59
|
+
// Ack-first reply in the supergroup — sets replyCalled=true before the
|
|
60
|
+
// foreground sub-agent runs (the condition that broke #2027).
|
|
61
|
+
const ack = await expectMessage(sc.driver, SUPERGROUP_ID, /.+/, {
|
|
62
|
+
timeout: 60_000,
|
|
63
|
+
senderFilter: { notUserId: sc.driverUserId },
|
|
64
|
+
});
|
|
65
|
+
console.log(`[fg-activity channel UAT] ack-first reply: ${JSON.stringify(ack.text)}`);
|
|
66
|
+
|
|
67
|
+
// The activity-summary feed carrying the NESTED foreground narrative —
|
|
68
|
+
// must land IN the supergroup. Its presence after the ack is the proof.
|
|
69
|
+
const feed = await expectMessage(sc.driver, SUPERGROUP_ID, NESTED_RE, {
|
|
70
|
+
timeout: 90_000,
|
|
71
|
+
senderFilter: { notUserId: sc.driverUserId },
|
|
72
|
+
});
|
|
73
|
+
console.log(
|
|
74
|
+
`[fg-activity channel UAT] nested feed paint (id=${feed.messageId}, chat=${feed.chatId}): ${JSON.stringify(feed.text)}`,
|
|
75
|
+
);
|
|
76
|
+
expect(feed.chatId).toBe(SUPERGROUP_ID); // parity proof: nested feed in the channel
|
|
77
|
+
expect(feed.fromBot).toBe(true);
|
|
78
|
+
expect(feed.text).toMatch(NESTED_RE);
|
|
79
|
+
|
|
80
|
+
// Live edit: re-fetch the SAME message after a few sub-agent steps.
|
|
81
|
+
const before = feed.text;
|
|
82
|
+
await new Promise((r) => setTimeout(r, 10_000));
|
|
83
|
+
const mid = await sc.driver.getMessage(SUPERGROUP_ID, feed.messageId);
|
|
84
|
+
console.log(
|
|
85
|
+
`[fg-activity channel UAT] same feed after 10s (id=${feed.messageId}): ${JSON.stringify(mid?.text ?? null)}`,
|
|
86
|
+
);
|
|
87
|
+
|
|
88
|
+
// Final answer — parent resumes after the foreground sub-agent returns.
|
|
89
|
+
const done = await expectMessage(sc.driver, SUPERGROUP_ID, /done|complete|finished|step-8|wrapped/i, {
|
|
90
|
+
timeout: 120_000,
|
|
91
|
+
senderFilter: { notUserId: sc.driverUserId },
|
|
92
|
+
});
|
|
93
|
+
console.log(`[fg-activity channel UAT] final answer: ${JSON.stringify(done.text)}`);
|
|
94
|
+
expect(done.text.length).toBeGreaterThan(0);
|
|
95
|
+
if (mid?.text != null) {
|
|
96
|
+
console.log(`[fg-activity channel UAT] body moved in-flight: ${mid.text !== before}`);
|
|
97
|
+
}
|
|
98
|
+
} finally {
|
|
99
|
+
await sc.tearDown();
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
300_000,
|
|
103
|
+
);
|
|
104
|
+
});
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD scenario — background-worker HANDBACK lands in the supergroup (#2098).
|
|
3
|
+
*
|
|
4
|
+
* This is the live validation of the headline channel fix: when a background
|
|
5
|
+
* sub-agent (Task/Agent `run_in_background:true`) dispatched from a supergroup
|
|
6
|
+
* finishes, the agent's in-voice "here's what the worker found" handback (beat
|
|
7
|
+
* 4) must land IN the supergroup — not the operator DM (the pre-#2098 bug,
|
|
8
|
+
* where the synthesized handback inbound was thread-blind and the reply fell
|
|
9
|
+
* back to the chat's last-seen topic / owner DM).
|
|
10
|
+
*
|
|
11
|
+
* Mechanism exercised: dispatch a bg worker that returns a unique token →
|
|
12
|
+
* onFinish → buildSubagentHandbackInbound (now carrying the origin topic) →
|
|
13
|
+
* the parent relays the token. We assert the token appears in a BOT message
|
|
14
|
+
* IN the supergroup. Pre-#2098 that handback would not have been threaded to
|
|
15
|
+
* the supergroup; post-#2098 it is.
|
|
16
|
+
*
|
|
17
|
+
* Best-effort + model-dependent: the agent must actually background the task
|
|
18
|
+
* (not inline it) and relay the worker's token. The prompt is explicit. Self-
|
|
19
|
+
* skips when no test supergroup is wired. Uses the General topic (mtcute here
|
|
20
|
+
* has no forum-topic create API); topic-among-many routing is pinned by the
|
|
21
|
+
* #2098 unit thread-assertions.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { describe, it, expect } from "vitest";
|
|
25
|
+
import { spinUp } from "../harness.js";
|
|
26
|
+
import { expectMessage } from "../assertions.js";
|
|
27
|
+
|
|
28
|
+
const AGENT = "test-harness";
|
|
29
|
+
const SUPERGROUP_ID = Number.parseInt(process.env.SWITCHROOM_UAT_CHAT_ID ?? "", 10);
|
|
30
|
+
|
|
31
|
+
/** Worker dispatch + run + onFinish + handback relay — generous budget. */
|
|
32
|
+
const HANDBACK_TIMEOUT_MS = 150_000;
|
|
33
|
+
|
|
34
|
+
describe("uat: supergroup background-worker handback (#2098)", () => {
|
|
35
|
+
it("a dispatched background worker's result is handed back IN the supergroup", async () => {
|
|
36
|
+
if (!Number.isFinite(SUPERGROUP_ID)) {
|
|
37
|
+
console.warn("[uat] SWITCHROOM_UAT_CHAT_ID unset — skipping handback channel scenario");
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
const sc = await spinUp({ agent: AGENT, settleMs: 0 });
|
|
41
|
+
try {
|
|
42
|
+
await sc.driver.primeDialogs();
|
|
43
|
+
if (!(await sc.driver.canResolve(SUPERGROUP_ID))) {
|
|
44
|
+
console.warn(`[uat] supergroup ${SUPERGROUP_ID} not resolvable — skipping handback channel scenario`);
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Unique token the worker must echo back, so the handback relay is
|
|
49
|
+
// unambiguous and can't latch onto unrelated chatter.
|
|
50
|
+
const token = `HBK${Date.now().toString(36).toUpperCase()}`;
|
|
51
|
+
await sc.driver.sendText(
|
|
52
|
+
SUPERGROUP_ID,
|
|
53
|
+
`Dispatch a BACKGROUND worker (a Task with run_in_background:true) that ` +
|
|
54
|
+
`runs a shell sleep of about 8 seconds and then returns exactly the ` +
|
|
55
|
+
`token ${token}. Do NOT do it inline — it must be a background Task so ` +
|
|
56
|
+
`you can acknowledge first. Acknowledge now, and when the worker reports ` +
|
|
57
|
+
`back, relay its token (${token}) here in this group.`,
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
// The handback relay — a bot message in the supergroup carrying the
|
|
61
|
+
// worker's token. (The interim "on it" ack may arrive first; we wait
|
|
62
|
+
// for the message that actually carries the token, which is the
|
|
63
|
+
// post-handback relay.)
|
|
64
|
+
const relay = await expectMessage(
|
|
65
|
+
sc.driver,
|
|
66
|
+
SUPERGROUP_ID,
|
|
67
|
+
(m) => m.text.includes(token),
|
|
68
|
+
{ timeout: HANDBACK_TIMEOUT_MS, senderFilter: { notUserId: sc.driverUserId } },
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
expect(relay.chatId).toBe(SUPERGROUP_ID);
|
|
72
|
+
expect(relay.fromBot).toBe(true);
|
|
73
|
+
} finally {
|
|
74
|
+
await sc.tearDown();
|
|
75
|
+
}
|
|
76
|
+
}, HANDBACK_TIMEOUT_MS + 30_000);
|
|
77
|
+
});
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live worker-activity feed in a SUPERGROUP (#2000 + #2098 routing) — UAT.
|
|
3
|
+
*
|
|
4
|
+
* Channel twin of `jtbd-worker-activity-feed-dm`. A background sub-agent
|
|
5
|
+
* (Agent/Task `run_in_background:true`) dispatched from a supergroup must
|
|
6
|
+
* surface its live `🛠 Worker · …` feed message IN the supergroup — not the
|
|
7
|
+
* operator DM (the pre-v0.14.32 "always route to DM" bug). Proves the
|
|
8
|
+
* background-worker status surface has DM/channel parity.
|
|
9
|
+
*
|
|
10
|
+
* Asserts: (1) a worker-feed message appears IN the supergroup, from the bot;
|
|
11
|
+
* (2) it edits in place while work is in flight; (3) it finalizes to the
|
|
12
|
+
* terminal recap; (4) no raw Markdown leaks (#94-class guard).
|
|
13
|
+
*
|
|
14
|
+
* Self-skips when no test supergroup is wired. Uses the General topic (mtcute
|
|
15
|
+
* here has no forum-topic create API). Same paced-narration dispatch as the
|
|
16
|
+
* DM version so the worker's jsonl ticks under the test-harness 5s stall floor.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { describe, expect, it } from "vitest";
|
|
20
|
+
import { spinUp } from "../harness.js";
|
|
21
|
+
import { expectMessage } from "../assertions.js";
|
|
22
|
+
|
|
23
|
+
const AGENT = "test-harness";
|
|
24
|
+
const SUPERGROUP_ID = Number.parseInt(process.env.SWITCHROOM_UAT_CHAT_ID ?? "", 10);
|
|
25
|
+
|
|
26
|
+
const BG_DISPATCH_PROMPT =
|
|
27
|
+
`Use the Agent tool with subagent_type "general-purpose" and ` +
|
|
28
|
+
`run_in_background: true to dispatch a worker with this exact task: ` +
|
|
29
|
+
`"Do ten steps, ONE AT A TIME, k = 1 through 10. Before each step ` +
|
|
30
|
+
`write a brief one-sentence narration of what you are about to do, ` +
|
|
31
|
+
`then run \`sleep 2\` via the Bash tool, then run \`echo step-k\` via ` +
|
|
32
|
+
`the Bash tool (substitute the real number for k). Run every sleep and ` +
|
|
33
|
+
`every echo as its OWN separate Bash call — never batch or chain them ` +
|
|
34
|
+
`with && — and narrate before each so progress surfaces incrementally. ` +
|
|
35
|
+
`Do not stop early; complete all ten steps." After dispatching, send a ` +
|
|
36
|
+
`brief reply saying you've kicked off the background worker so I can ` +
|
|
37
|
+
`watch its progress.`;
|
|
38
|
+
|
|
39
|
+
const WORKER_FEED_RE = /🛠\s*Worker|running\s*·|finished\s*·/i;
|
|
40
|
+
const WORKER_DONE_RE = /finished\s*·\s*(completed|failed)/i;
|
|
41
|
+
|
|
42
|
+
describe("uat: live worker-activity feed in a supergroup (#2000 channel parity)", () => {
|
|
43
|
+
it(
|
|
44
|
+
"surfaces a background worker as a live, editing message IN the supergroup",
|
|
45
|
+
async () => {
|
|
46
|
+
if (!Number.isFinite(SUPERGROUP_ID)) {
|
|
47
|
+
console.warn("[worker-feed channel UAT] SWITCHROOM_UAT_CHAT_ID unset — skipping");
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
const sc = await spinUp({ agent: AGENT, settleMs: 0 });
|
|
51
|
+
try {
|
|
52
|
+
await sc.driver.primeDialogs();
|
|
53
|
+
if (!(await sc.driver.canResolve(SUPERGROUP_ID))) {
|
|
54
|
+
console.warn(`[worker-feed channel UAT] supergroup ${SUPERGROUP_ID} not resolvable — skipping`);
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
await sc.driver.sendText(SUPERGROUP_ID, BG_DISPATCH_PROMPT);
|
|
58
|
+
|
|
59
|
+
// Parent ack in the supergroup so we know the parent turn closed.
|
|
60
|
+
const ack = await expectMessage(sc.driver, SUPERGROUP_ID, /.+/, {
|
|
61
|
+
timeout: 45_000,
|
|
62
|
+
senderFilter: { notUserId: sc.driverUserId },
|
|
63
|
+
});
|
|
64
|
+
console.log(`[worker-feed channel UAT] parent ack: ${JSON.stringify(ack.text)}`);
|
|
65
|
+
|
|
66
|
+
// The worker-feed message — must land IN the supergroup.
|
|
67
|
+
const feed = await expectMessage(sc.driver, SUPERGROUP_ID, WORKER_FEED_RE, {
|
|
68
|
+
timeout: 75_000,
|
|
69
|
+
senderFilter: { notUserId: sc.driverUserId },
|
|
70
|
+
});
|
|
71
|
+
console.log(
|
|
72
|
+
`[worker-feed channel UAT] first feed paint (id=${feed.messageId}, chat=${feed.chatId}): ${JSON.stringify(feed.text)}`,
|
|
73
|
+
);
|
|
74
|
+
expect(feed.chatId).toBe(SUPERGROUP_ID); // parity proof: in the channel, not the DM
|
|
75
|
+
expect(feed.fromBot).toBe(true);
|
|
76
|
+
expect(feed.messageId).toBeGreaterThan(0);
|
|
77
|
+
|
|
78
|
+
// Live edit: re-fetch the SAME message after the throttle.
|
|
79
|
+
const before = feed.text;
|
|
80
|
+
await new Promise((r) => setTimeout(r, 12_000));
|
|
81
|
+
const mid = await sc.driver.getMessage(SUPERGROUP_ID, feed.messageId);
|
|
82
|
+
console.log(
|
|
83
|
+
`[worker-feed channel UAT] after 12s (id=${feed.messageId}): ${JSON.stringify(mid?.text ?? null)}`,
|
|
84
|
+
);
|
|
85
|
+
expect(mid, "worker-feed message vanished mid-flight").not.toBeNull();
|
|
86
|
+
|
|
87
|
+
// Terminal recap — poll the same message until done/failed.
|
|
88
|
+
let doneText: string | null = null;
|
|
89
|
+
const deadline = Date.now() + 120_000;
|
|
90
|
+
while (Date.now() < deadline) {
|
|
91
|
+
const m = await sc.driver.getMessage(SUPERGROUP_ID, feed.messageId);
|
|
92
|
+
if (m != null && WORKER_DONE_RE.test(m.text)) {
|
|
93
|
+
doneText = m.text;
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
await new Promise((r) => setTimeout(r, 5_000));
|
|
97
|
+
}
|
|
98
|
+
console.log(
|
|
99
|
+
`[worker-feed channel UAT] terminal (id=${feed.messageId}): ${JSON.stringify(doneText)}`,
|
|
100
|
+
);
|
|
101
|
+
expect(doneText, "worker-feed never reached a terminal recap").not.toBeNull();
|
|
102
|
+
expect(doneText!).toMatch(/tools?|tool ·/i);
|
|
103
|
+
expect(doneText).not.toBe(before);
|
|
104
|
+
// #94-class regression guard: no raw Markdown in the native card.
|
|
105
|
+
expect(doneText!, "raw ** leaked into the card").not.toMatch(/\*\*/);
|
|
106
|
+
expect(doneText!, "raw backtick leaked into the card").not.toContain("`");
|
|
107
|
+
expect(doneText!, "raw --- rule leaked into the card").not.toMatch(/(^|\n)\s*-{3,}\s*(\n|$)/);
|
|
108
|
+
} finally {
|
|
109
|
+
await sc.tearDown();
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
240_000,
|
|
113
|
+
);
|
|
114
|
+
});
|