switchroom 0.15.45 → 0.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +122 -88
- package/dist/auth-broker/index.js +463 -177
- package/dist/cli/autoaccept-poll.js +4842 -35
- package/dist/cli/drive-write-pretool.mjs +17 -14
- package/dist/cli/notion-write-pretool.mjs +117 -86
- package/dist/cli/self-improve-apply-guard-pretool.mjs +626 -0
- package/dist/cli/self-improve-stop.mjs +428 -0
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +3158 -1178
- package/dist/host-control/main.js +2833 -355
- package/dist/vault/approvals/kernel-server.js +7479 -7439
- package/dist/vault/broker/server.js +11312 -11272
- package/examples/minimal.yaml +1 -0
- package/examples/switchroom.yaml +1 -0
- package/package.json +3 -3
- package/profiles/_base/start.sh.hbs +88 -1
- package/profiles/_shared/execution-discipline.md.hbs +18 -0
- package/profiles/default/CLAUDE.md.hbs +0 -19
- package/telegram-plugin/.claude-plugin/plugin.json +2 -2
- package/telegram-plugin/answer-stream-flag.ts +12 -49
- package/telegram-plugin/answer-stream.ts +5 -150
- package/telegram-plugin/auth-snapshot-format.ts +280 -48
- package/telegram-plugin/auto-fallback-fleet.ts +44 -1
- package/telegram-plugin/context-exhaustion.ts +12 -0
- package/telegram-plugin/demo-mask.ts +154 -0
- package/telegram-plugin/dist/bridge/bridge.js +167 -124
- package/telegram-plugin/dist/gateway/gateway.js +3039 -1159
- package/telegram-plugin/dist/server.js +215 -172
- package/telegram-plugin/docs/waiting-ux-spec.md +2 -2
- package/telegram-plugin/draft-stream.ts +47 -410
- package/telegram-plugin/final-answer-detect.ts +17 -12
- package/telegram-plugin/fleet-fallback-resume.ts +131 -0
- package/telegram-plugin/format.ts +56 -19
- package/telegram-plugin/gateway/auth-add-flow.ts +332 -127
- package/telegram-plugin/gateway/auth-broker-client.ts +2 -2
- package/telegram-plugin/gateway/auth-command.ts +70 -14
- package/telegram-plugin/gateway/clean-shutdown-marker.ts +44 -0
- package/telegram-plugin/gateway/config-approval-handler.test.ts +91 -4
- package/telegram-plugin/gateway/config-approval-handler.ts +94 -13
- package/telegram-plugin/gateway/current-turn-map.ts +188 -0
- package/telegram-plugin/gateway/disconnect-flush.ts +3 -1
- package/telegram-plugin/gateway/effort-command.ts +8 -3
- package/telegram-plugin/gateway/emission-authority.ts +369 -0
- package/telegram-plugin/gateway/feed-open-gate.ts +292 -0
- package/telegram-plugin/gateway/gateway.ts +1837 -291
- package/telegram-plugin/gateway/inject-handler.test.ts +2 -1
- package/telegram-plugin/gateway/ms365-write-approval.test.ts +4 -4
- package/telegram-plugin/gateway/represent-guard.ts +72 -0
- package/telegram-plugin/gateway/status-surface-log.test.ts +5 -4
- package/telegram-plugin/gateway/status-surface-log.ts +14 -3
- package/telegram-plugin/history.ts +33 -11
- package/telegram-plugin/hooks/repo-context-pretool.mjs +26 -0
- package/telegram-plugin/hooks/subagent-tracker-posttool.mjs +5 -0
- package/telegram-plugin/hooks/subagent-tracker-pretool.mjs +8 -0
- package/telegram-plugin/hooks/tool-label-pretool.mjs +39 -15
- package/telegram-plugin/issues-card.ts +4 -0
- package/telegram-plugin/model-unavailable.ts +124 -0
- package/telegram-plugin/narrative-dedup.ts +69 -0
- package/telegram-plugin/over-ping-safety-net.ts +70 -4
- package/telegram-plugin/package.json +3 -3
- package/telegram-plugin/pending-work-progress.ts +12 -0
- package/telegram-plugin/permission-rule.ts +32 -5
- package/telegram-plugin/permission-title.ts +152 -9
- package/telegram-plugin/quota-check.ts +13 -0
- package/telegram-plugin/quota-watch.ts +135 -7
- package/telegram-plugin/registry/turns-schema.test.ts +24 -0
- package/telegram-plugin/registry/turns-schema.ts +9 -0
- package/telegram-plugin/runtime-metrics.ts +13 -0
- package/telegram-plugin/session-tail.ts +96 -11
- package/telegram-plugin/silence-poke.ts +170 -24
- package/telegram-plugin/slot-banner-driver.ts +3 -0
- package/telegram-plugin/status-no-truncate.ts +44 -0
- package/telegram-plugin/status-reactions.ts +20 -3
- package/telegram-plugin/stream-controller.ts +4 -23
- package/telegram-plugin/stream-reply-handler.ts +6 -24
- package/telegram-plugin/streaming-metrics.ts +91 -0
- package/telegram-plugin/subagent-watcher.ts +212 -66
- package/telegram-plugin/tests/activity-ever-opened-sticky.test.ts +47 -0
- package/telegram-plugin/tests/answer-stream-dedup.test.ts +9 -26
- package/telegram-plugin/tests/answer-stream-flag.test.ts +25 -58
- package/telegram-plugin/tests/answer-stream-silent-markers.test.ts +41 -51
- package/telegram-plugin/tests/answer-stream.test.ts +2 -411
- package/telegram-plugin/tests/auth-add-flow.test.ts +488 -253
- package/telegram-plugin/tests/auth-command-format2.test.ts +71 -1
- package/telegram-plugin/tests/auth-snapshot-format.test.ts +376 -6
- package/telegram-plugin/tests/auto-fallback-fleet.test.ts +120 -0
- package/telegram-plugin/tests/cross-turn-card-gate.test.ts +424 -0
- package/telegram-plugin/tests/demo-mask.test.ts +127 -0
- package/telegram-plugin/tests/draft-stream.test.ts +0 -827
- package/telegram-plugin/tests/emission-authority-card-drain-gate.test.ts +236 -0
- package/telegram-plugin/tests/emission-authority-facade.test.ts +488 -0
- package/telegram-plugin/tests/emission-authority-open-gate.test.ts +179 -0
- package/telegram-plugin/tests/emission-authority-ping-gate.test.ts +395 -0
- package/telegram-plugin/tests/emission-determinism-wiring.test.ts +177 -0
- package/telegram-plugin/tests/feed-heartbeat-liveness-open.test.ts +146 -0
- package/telegram-plugin/tests/feed-open-gate.test.ts +259 -0
- package/telegram-plugin/tests/feed-survival.test.ts +526 -0
- package/telegram-plugin/tests/fleet-fallback-resume.test.ts +197 -0
- package/telegram-plugin/tests/gateway-clean-shutdown-marker.test.ts +117 -0
- package/telegram-plugin/tests/gateway-no-reply-single-emit.test.ts +4 -11
- package/telegram-plugin/tests/history.test.ts +60 -0
- package/telegram-plugin/tests/model-unavailable.test.ts +118 -0
- package/telegram-plugin/tests/narrative-dedup.test.ts +118 -0
- package/telegram-plugin/tests/orphaned-reply-rearm.test.ts +285 -0
- package/telegram-plugin/tests/over-ping-final-answer-decoupling.test.ts +194 -0
- package/telegram-plugin/tests/over-ping-safety-net.test.ts +2 -2
- package/telegram-plugin/tests/per-topic-current-turn.test.ts +373 -0
- package/telegram-plugin/tests/permission-card-origin-kill-switch.test.ts +42 -0
- package/telegram-plugin/tests/permission-rule.test.ts +17 -0
- package/telegram-plugin/tests/permission-title.test.ts +206 -17
- package/telegram-plugin/tests/quota-watch.test.ts +252 -9
- package/telegram-plugin/tests/reply-terminal-reaction.test.ts +6 -1
- package/telegram-plugin/tests/repo-context-pretool.test.ts +62 -0
- package/telegram-plugin/tests/represent-guard.test.ts +162 -0
- package/telegram-plugin/tests/session-tail.test.ts +147 -3
- package/telegram-plugin/tests/silence-liveness-wiring.test.ts +18 -0
- package/telegram-plugin/tests/status-card-budget-parity.test.ts +72 -0
- package/telegram-plugin/tests/status-surface-log.test.ts +146 -0
- package/telegram-plugin/tests/subagent-watcher-clip-narrative.test.ts +58 -0
- package/telegram-plugin/tests/subagent-watcher-parent-turn-key.test.ts +102 -0
- package/telegram-plugin/tests/subagent-watcher-workflow-visibility.test.ts +225 -0
- package/telegram-plugin/tests/subagent-watcher.test.ts +147 -0
- package/telegram-plugin/tests/telegram-activity-visibility-integration.test.ts +597 -0
- package/telegram-plugin/tests/telegram-format.test.ts +101 -6
- package/telegram-plugin/tests/tool-activity-summary.test.ts +550 -15
- package/telegram-plugin/tests/tool-label-pretool.test.ts +73 -0
- package/telegram-plugin/tests/tool-label-sidecar.test.ts +44 -0
- package/telegram-plugin/tests/tool-labels.test.ts +67 -0
- package/telegram-plugin/tests/turn-liveness-floor.test.ts +196 -0
- package/telegram-plugin/tests/turn-liveness-invariant.test.ts +340 -0
- package/telegram-plugin/tests/welcome-text.test.ts +32 -3
- package/telegram-plugin/tests/worker-activity-feed.test.ts +470 -22
- package/telegram-plugin/tool-activity-summary.ts +375 -58
- package/telegram-plugin/turn-liveness-floor.ts +240 -0
- package/telegram-plugin/uat/assertions.ts +115 -0
- package/telegram-plugin/uat/driver.ts +68 -0
- package/telegram-plugin/uat/scenarios/bg-sub-agent-dispatch-dm.test.ts +119 -133
- package/telegram-plugin/uat/scenarios/jtbd-answer-pings.test.ts +94 -0
- package/telegram-plugin/uat/scenarios/jtbd-cross-turn-card-dm.test.ts +109 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-thinkgap-dm.test.ts +478 -0
- package/telegram-plugin/uat/scenarios/jtbd-foreground-feed-visibility-dm.test.ts +396 -0
- package/telegram-plugin/uat/scenarios/jtbd-liveness-feed-open-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/jtbd-reply-is-last-dm.test.ts +202 -0
- package/telegram-plugin/uat/scenarios/reactions-dm.test.ts +93 -87
- package/telegram-plugin/welcome-text.ts +13 -1
- package/telegram-plugin/worker-activity-feed.ts +157 -82
- package/telegram-plugin/draft-transport.ts +0 -122
- package/telegram-plugin/tests/draft-retirement-wiring.test.ts +0 -82
- package/telegram-plugin/tests/draft-transport.test.ts +0 -211
|
@@ -420,6 +420,68 @@ describe('repo-context-pretool e2e', () => {
|
|
|
420
420
|
expect(res.stdout ?? '').toBe('')
|
|
421
421
|
})
|
|
422
422
|
|
|
423
|
+
it('suppresses the agent own CLAUDE.md when SWITCHROOM_AGENT_START_CWD is set', () => {
|
|
424
|
+
// The agent's own CLAUDE.md is already in the system prompt — re-injecting
|
|
425
|
+
// it wastes ~30KB per session. The marker-path guard must exit 0 silently.
|
|
426
|
+
const agentDir = join(sb.root, '.switchroom', 'agents', 'myagent')
|
|
427
|
+
const agentWorkspace = join(agentDir, 'workspace')
|
|
428
|
+
mkdirSync(agentWorkspace, { recursive: true })
|
|
429
|
+
writeFileSync(join(agentDir, 'CLAUDE.md'), '# own agent claude md')
|
|
430
|
+
writeFileSync(join(agentWorkspace, 'note.md'), 'x')
|
|
431
|
+
|
|
432
|
+
const sid = `e2e-own-marker-${Date.now()}`
|
|
433
|
+
repoStateDir(sid)
|
|
434
|
+
const res = runHook(
|
|
435
|
+
{
|
|
436
|
+
session_id: sid,
|
|
437
|
+
cwd: agentDir,
|
|
438
|
+
hook_event_name: 'PreToolUse',
|
|
439
|
+
tool_name: 'Read',
|
|
440
|
+
tool_input: { file_path: join(agentDir, 'workspace', 'note.md') },
|
|
441
|
+
},
|
|
442
|
+
{
|
|
443
|
+
home: sb.root,
|
|
444
|
+
agent: 'myagent',
|
|
445
|
+
env: { SWITCHROOM_AGENT_START_CWD: agentDir },
|
|
446
|
+
},
|
|
447
|
+
)
|
|
448
|
+
expect(res.code).toBe(0)
|
|
449
|
+
// Own CLAUDE.md must NOT be injected
|
|
450
|
+
expect(res.stdout).toBe('')
|
|
451
|
+
})
|
|
452
|
+
|
|
453
|
+
it('still injects a worktree repo CLAUDE.md even when SWITCHROOM_AGENT_START_CWD is set', () => {
|
|
454
|
+
// A repo checked out outside the agent start dir must still inject.
|
|
455
|
+
const agentDir = join(sb.root, '.switchroom', 'agents', 'myagent')
|
|
456
|
+
mkdirSync(agentDir, { recursive: true })
|
|
457
|
+
writeFileSync(join(agentDir, 'CLAUDE.md'), '# own agent')
|
|
458
|
+
const worktree = join(sb.root, 'workspace', 'myrepo')
|
|
459
|
+
mkdirSync(worktree, { recursive: true })
|
|
460
|
+
writeFileSync(join(worktree, 'CLAUDE.md'), '# worktree repo')
|
|
461
|
+
writeFileSync(join(worktree, 'src.ts'), 'x')
|
|
462
|
+
|
|
463
|
+
const sid = `e2e-worktree-${Date.now()}`
|
|
464
|
+
repoStateDir(sid)
|
|
465
|
+
const res = runHook(
|
|
466
|
+
{
|
|
467
|
+
session_id: sid,
|
|
468
|
+
cwd: worktree,
|
|
469
|
+
hook_event_name: 'PreToolUse',
|
|
470
|
+
tool_name: 'Read',
|
|
471
|
+
tool_input: { file_path: join(worktree, 'src.ts') },
|
|
472
|
+
},
|
|
473
|
+
{
|
|
474
|
+
home: sb.root,
|
|
475
|
+
agent: 'myagent',
|
|
476
|
+
env: { SWITCHROOM_AGENT_START_CWD: agentDir },
|
|
477
|
+
},
|
|
478
|
+
)
|
|
479
|
+
expect(res.code).toBe(0)
|
|
480
|
+
expect(res.stdout.length).toBeGreaterThan(0)
|
|
481
|
+
const parsed = JSON.parse(res.stdout)
|
|
482
|
+
expect(parsed.hookSpecificOutput.additionalContext).toContain('# worktree repo')
|
|
483
|
+
})
|
|
484
|
+
|
|
423
485
|
it('no marker → no output', () => {
|
|
424
486
|
// tmpdir, no CLAUDE.md anywhere up the chain
|
|
425
487
|
const file = join(sb.root, 'nothing.ts')
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
shouldSuppressRepresent,
|
|
4
|
+
type RepresentGuardObligation,
|
|
5
|
+
} from "../gateway/represent-guard.js";
|
|
6
|
+
import { ObligationLedger } from "../gateway/obligation-ledger.js";
|
|
7
|
+
|
|
8
|
+
// Executable verification of the #2472 fix: obligation_represent must NOT re-fire
|
|
9
|
+
// for an origin_turn_id that has already been answered by a reply since the last
|
|
10
|
+
// represent (the satisfied-but-misdetected case that produced the near-identical
|
|
11
|
+
// duplicate), while the genuine "plain text, never replied" case still represents
|
|
12
|
+
// ONCE and the represent_count cap is honored.
|
|
13
|
+
|
|
14
|
+
const CHAT = "12345";
|
|
15
|
+
const ORIGIN = "12345:_#10605";
|
|
16
|
+
|
|
17
|
+
function obligation(over: Partial<RepresentGuardObligation> = {}): RepresentGuardObligation {
|
|
18
|
+
return { originTurnId: ORIGIN, chatId: CHAT, ...over };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** A hasOutboundDeliveredSince stub that returns true only for queries whose
|
|
22
|
+
* cutoff falls at/after `replyTs` — modelling a reply delivered at replyTs. */
|
|
23
|
+
function replyDeliveredAt(replyTs: number) {
|
|
24
|
+
return (_chat: string, sinceMs: number) => replyTs >= sinceMs;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
describe("shouldSuppressRepresent — #2472 duplicate-represent guard", () => {
|
|
28
|
+
it("suppresses the SECOND represent once a reply landed since the FIRST represent", () => {
|
|
29
|
+
// The exact #2472 sequence: represent_count=1 fired at t=1000, the agent
|
|
30
|
+
// answered with a reply at t=1500, the sweep is about to fire count=2.
|
|
31
|
+
const o = obligation({ lastRepresentedAt: 1000 });
|
|
32
|
+
const suppress = shouldSuppressRepresent(o, {
|
|
33
|
+
historyEnabled: true,
|
|
34
|
+
hasOutboundDeliveredSince: replyDeliveredAt(1500),
|
|
35
|
+
});
|
|
36
|
+
expect(suppress).toBe(true); // do NOT re-fire → no duplicate 10609
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("does NOT suppress the FIRST represent — genuine plain-text-no-reply still represents once", () => {
|
|
40
|
+
// First represent: lastRepresentedAt is undefined. Even though an assistant
|
|
41
|
+
// message (the original plain-text answer) exists in history, the single
|
|
42
|
+
// re-ask must still fire — the agent never called the reply tool.
|
|
43
|
+
const o = obligation({ lastRepresentedAt: undefined });
|
|
44
|
+
const suppress = shouldSuppressRepresent(o, {
|
|
45
|
+
historyEnabled: true,
|
|
46
|
+
// history WOULD report an outbound exists, but the first represent ignores it
|
|
47
|
+
hasOutboundDeliveredSince: () => true,
|
|
48
|
+
});
|
|
49
|
+
expect(suppress).toBe(false); // represent fires exactly once
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("does NOT suppress a later represent when NO reply landed since the last one", () => {
|
|
53
|
+
// count=1 fired at t=1000, nothing answered it → count=2 must still fire.
|
|
54
|
+
const o = obligation({ lastRepresentedAt: 1000 });
|
|
55
|
+
const suppress = shouldSuppressRepresent(o, {
|
|
56
|
+
historyEnabled: true,
|
|
57
|
+
hasOutboundDeliveredSince: () => false,
|
|
58
|
+
});
|
|
59
|
+
expect(suppress).toBe(false);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("a reply that PREDATES the last represent does not count (cutoff is lastRepresentedAt, not openedAt)", () => {
|
|
63
|
+
// The original plain-text answer landed at t=500, before the represent at
|
|
64
|
+
// t=1000. That is not evidence the represent itself was answered → fire.
|
|
65
|
+
const o = obligation({ lastRepresentedAt: 1000 });
|
|
66
|
+
const suppress = shouldSuppressRepresent(o, {
|
|
67
|
+
historyEnabled: true,
|
|
68
|
+
hasOutboundDeliveredSince: replyDeliveredAt(500),
|
|
69
|
+
});
|
|
70
|
+
expect(suppress).toBe(false);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it("never suppresses when history is unavailable (safe: re-ask rather than silently drop)", () => {
|
|
74
|
+
const o = obligation({ lastRepresentedAt: 1000 });
|
|
75
|
+
const suppress = shouldSuppressRepresent(o, {
|
|
76
|
+
historyEnabled: false,
|
|
77
|
+
hasOutboundDeliveredSince: () => true,
|
|
78
|
+
});
|
|
79
|
+
expect(suppress).toBe(false);
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
// #2474 follow-up — the terse-reply gap. PR #2474 suppressed the duplicate
|
|
84
|
+
// represent only when the satisfied-check saw a >=200-char "substantive" reply
|
|
85
|
+
// (the 200-char proxy borrowed from the ESCALATE branch). A GENUINE but SHORT
|
|
86
|
+
// reply (e.g. "Yes — done.") therefore did NOT suppress the duplicate, leaving
|
|
87
|
+
// the #2472 duplicate-message bug alive for terse answers. The guard itself is
|
|
88
|
+
// pure: the gateway now binds hasOutboundDeliveredSince with a LOW minChars so a
|
|
89
|
+
// terse-but-real reply reports true. These tests model the wired predicate's
|
|
90
|
+
// behavior at the guard boundary.
|
|
91
|
+
describe("represent guard — terse genuine reply suppresses the duplicate (#2474 follow-up)", () => {
|
|
92
|
+
/** Models the gateway-wired predicate AFTER the fix: reports true for ANY real
|
|
93
|
+
* reply at/after the cutoff regardless of length (minChars=1 inside history).
|
|
94
|
+
* `replyChars` is the length of the terse reply; included to make the intent
|
|
95
|
+
* explicit — the predicate no longer gates on length. */
|
|
96
|
+
function terseReplyDeliveredAt(replyTs: number, replyChars: number) {
|
|
97
|
+
expect(replyChars).toBeGreaterThan(0); // a real, non-empty reply
|
|
98
|
+
return (_chat: string, sinceMs: number) => replyTs >= sinceMs;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
it("suppresses the duplicate when a SHORT genuine reply landed since the last represent", () => {
|
|
102
|
+
// count=1 fired at t=1000; the agent answered with a terse 11-char reply
|
|
103
|
+
// ("Yes — done.") at t=1500. The duplicate (count=2) must now be suppressed —
|
|
104
|
+
// before the fix the 200-char gate let it through.
|
|
105
|
+
const o = obligation({ lastRepresentedAt: 1000 });
|
|
106
|
+
const suppress = shouldSuppressRepresent(o, {
|
|
107
|
+
historyEnabled: true,
|
|
108
|
+
hasOutboundDeliveredSince: terseReplyDeliveredAt(1500, "Yes — done.".length),
|
|
109
|
+
});
|
|
110
|
+
expect(suppress).toBe(true);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it("does NOT suppress when only framework noise occurred since the last represent", () => {
|
|
114
|
+
// Typing indicators and progress-card edits never call recordOutbound, so no
|
|
115
|
+
// assistant row exists for them → the wired predicate reports false. A real
|
|
116
|
+
// answer never landed, so the represent SHOULD still fire (no false suppress).
|
|
117
|
+
const o = obligation({ lastRepresentedAt: 1000 });
|
|
118
|
+
const suppress = shouldSuppressRepresent(o, {
|
|
119
|
+
historyEnabled: true,
|
|
120
|
+
// No assistant row for typing/progress edits → predicate is false.
|
|
121
|
+
hasOutboundDeliveredSince: () => false,
|
|
122
|
+
});
|
|
123
|
+
expect(suppress).toBe(false);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it("does NOT suppress when the terse reply PREDATES the last represent", () => {
|
|
127
|
+
// A terse reply at t=500 answered an EARLIER ask, not the represent at t=1000.
|
|
128
|
+
// The cutoff is lastRepresentedAt, so a pre-cutoff terse reply must not count.
|
|
129
|
+
const o = obligation({ lastRepresentedAt: 1000 });
|
|
130
|
+
const suppress = shouldSuppressRepresent(o, {
|
|
131
|
+
historyEnabled: true,
|
|
132
|
+
hasOutboundDeliveredSince: terseReplyDeliveredAt(500, "ok".length),
|
|
133
|
+
});
|
|
134
|
+
expect(suppress).toBe(false);
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
describe("represent_count cap is honored by the ledger — a misdetected obligation cannot loop", () => {
|
|
139
|
+
it("escalates (stops re-presenting) once representCount reaches maxRepresents", () => {
|
|
140
|
+
const L = new ObligationLedger(2); // maxRepresents = 2
|
|
141
|
+
L.openIfAbsent({
|
|
142
|
+
originTurnId: ORIGIN,
|
|
143
|
+
chatId: CHAT,
|
|
144
|
+
messageId: 10605,
|
|
145
|
+
text: "Check there was a bug raised…",
|
|
146
|
+
openedAt: 0,
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
// count 0 -> represent
|
|
150
|
+
expect(L.decideAtIdle().action).toBe("represent");
|
|
151
|
+
L.markRepresented(ORIGIN, 1000);
|
|
152
|
+
// count 1 -> represent
|
|
153
|
+
expect(L.decideAtIdle().action).toBe("represent");
|
|
154
|
+
L.markRepresented(ORIGIN, 2000);
|
|
155
|
+
// count 2 == cap -> escalate, NOT another represent
|
|
156
|
+
expect(L.decideAtIdle().action).toBe("escalate");
|
|
157
|
+
|
|
158
|
+
// and the ladder terminates: closing on escalate ends it
|
|
159
|
+
L.close(ORIGIN);
|
|
160
|
+
expect(L.decideAtIdle().action).toBe("none");
|
|
161
|
+
});
|
|
162
|
+
});
|
|
@@ -5,6 +5,7 @@ import { join } from 'path'
|
|
|
5
5
|
import {
|
|
6
6
|
projectTranscriptLine,
|
|
7
7
|
projectSubagentLine,
|
|
8
|
+
projectAssistantTextBlocks,
|
|
8
9
|
sanitizeCwdToProjectName,
|
|
9
10
|
getProjectsDirForCwd,
|
|
10
11
|
startSessionTail,
|
|
@@ -208,10 +209,67 @@ describe('projectTranscriptLine', () => {
|
|
|
208
209
|
},
|
|
209
210
|
})
|
|
210
211
|
expect(projectTranscriptLine(line)).toEqual([
|
|
211
|
-
|
|
212
|
+
// Shared narrative contract: a lone text block (no tool_use after it)
|
|
213
|
+
// is lastInMessage:true at blockIndex 0.
|
|
214
|
+
{ kind: 'text', text: 'Replied with comparison', blockIndex: 0, lastInMessage: true },
|
|
212
215
|
])
|
|
213
216
|
})
|
|
214
217
|
|
|
218
|
+
it('drops empty/whitespace-only text blocks (shared narrative contract)', () => {
|
|
219
|
+
const line = JSON.stringify({
|
|
220
|
+
type: 'assistant',
|
|
221
|
+
message: {
|
|
222
|
+
content: [
|
|
223
|
+
{ type: 'text', text: ' \n ' },
|
|
224
|
+
{ type: 'tool_use', id: 'toolu_a', name: 'Read', input: { file_path: '/a' } },
|
|
225
|
+
],
|
|
226
|
+
},
|
|
227
|
+
})
|
|
228
|
+
// The empty text block is dropped; only the tool_use survives.
|
|
229
|
+
expect(projectTranscriptLine(line)).toEqual([
|
|
230
|
+
{ kind: 'tool_use', toolName: 'Read', toolUseId: 'toolu_a', input: { file_path: '/a' } },
|
|
231
|
+
])
|
|
232
|
+
})
|
|
233
|
+
|
|
234
|
+
it('text block preceding a tool_use in the same message is lastInMessage:false', () => {
|
|
235
|
+
const line = JSON.stringify({
|
|
236
|
+
type: 'assistant',
|
|
237
|
+
message: {
|
|
238
|
+
content: [
|
|
239
|
+
{ type: 'text', text: 'Found both:' },
|
|
240
|
+
{ type: 'tool_use', id: 'toolu_a', name: 'Read', input: { file_path: '/a' } },
|
|
241
|
+
],
|
|
242
|
+
},
|
|
243
|
+
})
|
|
244
|
+
const events = projectTranscriptLine(line)
|
|
245
|
+
expect(events[0]).toEqual({ kind: 'text', text: 'Found both:', blockIndex: 0, lastInMessage: false })
|
|
246
|
+
})
|
|
247
|
+
|
|
248
|
+
it('text block AFTER the last tool_use is lastInMessage:true', () => {
|
|
249
|
+
const line = JSON.stringify({
|
|
250
|
+
type: 'assistant',
|
|
251
|
+
message: {
|
|
252
|
+
content: [
|
|
253
|
+
{ type: 'tool_use', id: 'toolu_a', name: 'Read', input: { file_path: '/a' } },
|
|
254
|
+
{ type: 'text', text: 'Done.' },
|
|
255
|
+
],
|
|
256
|
+
},
|
|
257
|
+
})
|
|
258
|
+
const events = projectTranscriptLine(line)
|
|
259
|
+
const textEv = events.find((e) => e.kind === 'text')
|
|
260
|
+
expect(textEv).toEqual({ kind: 'text', text: 'Done.', blockIndex: 1, lastInMessage: true })
|
|
261
|
+
})
|
|
262
|
+
|
|
263
|
+
it('the dead sub_agent_narrative kind is never emitted', () => {
|
|
264
|
+
// The union member was removed; nothing in either projector emits it.
|
|
265
|
+
const subLine = JSON.stringify({
|
|
266
|
+
type: 'assistant',
|
|
267
|
+
message: { content: [{ type: 'text', text: 'prose' }] },
|
|
268
|
+
})
|
|
269
|
+
const events = projectSubagentLine(subLine, 'X', { hasEmittedStart: true })
|
|
270
|
+
expect(events.some((e) => (e as { kind: string }).kind === 'sub_agent_narrative')).toBe(false)
|
|
271
|
+
})
|
|
272
|
+
|
|
215
273
|
it('parses assistant message with multiple blocks (thinking + tool_use)', () => {
|
|
216
274
|
const line = JSON.stringify({
|
|
217
275
|
type: 'assistant',
|
|
@@ -471,7 +529,8 @@ describe('projectSubagentLine', () => {
|
|
|
471
529
|
})
|
|
472
530
|
const events = projectSubagentLine(line, 'X', st)
|
|
473
531
|
expect(events).toEqual([
|
|
474
|
-
|
|
532
|
+
// text precedes a tool_use in the same message → lastInMessage:false
|
|
533
|
+
{ kind: 'sub_agent_text', agentId: 'X', text: 'Reading the reducer', blockIndex: 0, lastInMessage: false },
|
|
475
534
|
{
|
|
476
535
|
kind: 'sub_agent_tool_use',
|
|
477
536
|
agentId: 'X',
|
|
@@ -511,8 +570,9 @@ describe('projectSubagentLine', () => {
|
|
|
511
570
|
st,
|
|
512
571
|
)
|
|
513
572
|
// Text first (so the final summary still renders), turn_end last.
|
|
573
|
+
// A lone trailing text block (no tool_use after it) is lastInMessage:true.
|
|
514
574
|
expect(events).toEqual([
|
|
515
|
-
{ kind: 'sub_agent_text', agentId: 'X', text: 'Done. Fixed the bug.' },
|
|
575
|
+
{ kind: 'sub_agent_text', agentId: 'X', text: 'Done. Fixed the bug.', blockIndex: 0, lastInMessage: true },
|
|
516
576
|
{ kind: 'sub_agent_turn_end', agentId: 'X' },
|
|
517
577
|
])
|
|
518
578
|
})
|
|
@@ -565,3 +625,87 @@ describe('idle sub-tail reap (MEM2)', () => {
|
|
|
565
625
|
expect(src).toMatch(/rescanSubagents\(\)\s*[\s\S]*?reapIdleSubTails\(\)/)
|
|
566
626
|
})
|
|
567
627
|
})
|
|
628
|
+
|
|
629
|
+
describe('projectAssistantTextBlocks (shared text→narrative kernel)', () => {
|
|
630
|
+
// Direct unit coverage for the now-live projection kernel. Both
|
|
631
|
+
// projectTranscriptLine and projectSubagentLine derive their text events
|
|
632
|
+
// through this one function; these tests pin its contract independently of
|
|
633
|
+
// either caller. The `make` adapter here uses the main-agent `text` kind.
|
|
634
|
+
const makeText = (text: string, blockIndex: number, lastInMessage: boolean): SessionEvent => ({
|
|
635
|
+
kind: 'text',
|
|
636
|
+
text,
|
|
637
|
+
blockIndex,
|
|
638
|
+
lastInMessage,
|
|
639
|
+
})
|
|
640
|
+
|
|
641
|
+
it('emits one narration event per non-empty text block, keyed by source index', () => {
|
|
642
|
+
const out = projectAssistantTextBlocks(
|
|
643
|
+
[
|
|
644
|
+
{ type: 'text', text: 'hello' },
|
|
645
|
+
{ type: 'tool_use', name: 'Read', id: 't1' },
|
|
646
|
+
{ type: 'text', text: 'world' },
|
|
647
|
+
],
|
|
648
|
+
makeText,
|
|
649
|
+
)
|
|
650
|
+
expect(out.size).toBe(2)
|
|
651
|
+
expect(out.get(0)).toEqual({ kind: 'text', text: 'hello', blockIndex: 0, lastInMessage: false })
|
|
652
|
+
expect(out.get(2)).toEqual({ kind: 'text', text: 'world', blockIndex: 2, lastInMessage: true })
|
|
653
|
+
})
|
|
654
|
+
|
|
655
|
+
it('drops empty and whitespace-only text blocks', () => {
|
|
656
|
+
const out = projectAssistantTextBlocks(
|
|
657
|
+
[
|
|
658
|
+
{ type: 'text', text: '' },
|
|
659
|
+
{ type: 'text', text: ' \n\t ' },
|
|
660
|
+
{ type: 'text', text: 'kept' },
|
|
661
|
+
],
|
|
662
|
+
makeText,
|
|
663
|
+
)
|
|
664
|
+
expect(out.size).toBe(1)
|
|
665
|
+
expect(out.has(0)).toBe(false)
|
|
666
|
+
expect(out.has(1)).toBe(false)
|
|
667
|
+
expect(out.get(2)).toEqual({ kind: 'text', text: 'kept', blockIndex: 2, lastInMessage: true })
|
|
668
|
+
})
|
|
669
|
+
|
|
670
|
+
it('lastInMessage is false when a tool_use follows the text block in the same message', () => {
|
|
671
|
+
const out = projectAssistantTextBlocks(
|
|
672
|
+
[
|
|
673
|
+
{ type: 'text', text: 'preamble' },
|
|
674
|
+
{ type: 'tool_use', name: 'Bash', id: 't1' },
|
|
675
|
+
],
|
|
676
|
+
makeText,
|
|
677
|
+
)
|
|
678
|
+
expect(out.get(0)).toEqual({ kind: 'text', text: 'preamble', blockIndex: 0, lastInMessage: false })
|
|
679
|
+
})
|
|
680
|
+
|
|
681
|
+
it('lastInMessage is true for a text block after the last tool_use (trailing narration)', () => {
|
|
682
|
+
const out = projectAssistantTextBlocks(
|
|
683
|
+
[
|
|
684
|
+
{ type: 'tool_use', name: 'Bash', id: 't1' },
|
|
685
|
+
{ type: 'text', text: 'done' },
|
|
686
|
+
],
|
|
687
|
+
makeText,
|
|
688
|
+
)
|
|
689
|
+
expect(out.get(1)).toEqual({ kind: 'text', text: 'done', blockIndex: 1, lastInMessage: true })
|
|
690
|
+
})
|
|
691
|
+
|
|
692
|
+
it('the make adapter controls the wire kind (sub_agent_text tier)', () => {
|
|
693
|
+
const out = projectAssistantTextBlocks(
|
|
694
|
+
[{ type: 'text', text: 'sub preamble' }],
|
|
695
|
+
(text, blockIndex, lastInMessage): SessionEvent => ({
|
|
696
|
+
kind: 'sub_agent_text',
|
|
697
|
+
agentId: 'A',
|
|
698
|
+
text,
|
|
699
|
+
blockIndex,
|
|
700
|
+
lastInMessage,
|
|
701
|
+
}),
|
|
702
|
+
)
|
|
703
|
+
expect(out.get(0)).toEqual({
|
|
704
|
+
kind: 'sub_agent_text',
|
|
705
|
+
agentId: 'A',
|
|
706
|
+
text: 'sub preamble',
|
|
707
|
+
blockIndex: 0,
|
|
708
|
+
lastInMessage: true,
|
|
709
|
+
})
|
|
710
|
+
})
|
|
711
|
+
})
|
|
@@ -64,4 +64,22 @@ describe('silence-poke production-liveness — heartbeat safety', () => {
|
|
|
64
64
|
it('production-liveness is behind the default-ON SWITCHROOM_SILENCE_LIVENESS_PRODUCTION kill switch', () => {
|
|
65
65
|
expect(gatewaySrc).toMatch(/SWITCHROOM_SILENCE_LIVENESS_PRODUCTION !== '0'/)
|
|
66
66
|
})
|
|
67
|
+
|
|
68
|
+
it('the 300s fallback send gates disable_notification on blockedOnApproval (approval re-ping pings, liveness stays silent)', () => {
|
|
69
|
+
// The 300s fallback is normally a pure-liveness "still working…" status
|
|
70
|
+
// notice and stays SILENT. But the SAME send carries a user-gating re-ping
|
|
71
|
+
// ("waiting for your approval — tap Approve or Deny …") when the turn is
|
|
72
|
+
// parked on an approval card — that must PING. The send site must therefore
|
|
73
|
+
// gate disable_notification on blockedOnApproval, NOT hard-code `true`.
|
|
74
|
+
// Structural guard so a refactor can't silently re-silence the re-ping
|
|
75
|
+
// (the gateway IIFE can't be instantiated in-process — same pattern as the
|
|
76
|
+
// heartbeat-safety assertions above).
|
|
77
|
+
const block = between(gatewaySrc, 'onFrameworkFallback: async (ctx) => {', '\nfunction trackRedeliveredInbound')
|
|
78
|
+
expect(block.length).toBeGreaterThan(100) // sanity: slice found the handler body
|
|
79
|
+
// The signal is derived once, hoisted above the update-status branch so it's
|
|
80
|
+
// in scope at the send site.
|
|
81
|
+
expect(block).toMatch(/const blockedOnApproval = activeStatusReactions/)
|
|
82
|
+
// The send gates on it rather than hard-coding silent.
|
|
83
|
+
expect(block).toMatch(/disable_notification: blockedOnApproval \? false : true/)
|
|
84
|
+
})
|
|
67
85
|
})
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { renderStatusCard } from '../tool-activity-summary.js'
|
|
3
|
+
import { STATUS_CARD_CHAR_BUDGET } from '../status-no-truncate.js'
|
|
4
|
+
|
|
5
|
+
// Both status surfaces (🤖 agent + 🛠 worker) render through the single
|
|
6
|
+
// `renderStatusCard` primitive. Given the SAME raw steps, the rendered step
|
|
7
|
+
// BODY (everything below the two-line header) must be byte-identical regardless
|
|
8
|
+
// of the header emoji/label — the truncation pipeline, rolling window, and
|
|
9
|
+
// char-budget backstop are surface-agnostic. This pins that parity so the two
|
|
10
|
+
// surfaces can never drift apart again.
|
|
11
|
+
|
|
12
|
+
/** Strip the two header lines, returning just the step/result body. */
|
|
13
|
+
function body(card: string | null): string {
|
|
14
|
+
if (card == null) return ''
|
|
15
|
+
const lines = card.split('\n')
|
|
16
|
+
return lines.slice(2).join('\n')
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function agent(steps: string[], final = false) {
|
|
20
|
+
return renderStatusCard({
|
|
21
|
+
header: { emoji: '🤖', label: 'Agent', elapsedMs: 12_000, toolCount: 4, state: final ? 'done' : 'running' },
|
|
22
|
+
steps,
|
|
23
|
+
final,
|
|
24
|
+
})
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function worker(steps: string[], final = false) {
|
|
28
|
+
return renderStatusCard({
|
|
29
|
+
header: { emoji: '🛠', label: 'Worker', description: 'a task', elapsedMs: 12_000, toolCount: 4, state: final ? 'done' : 'running' },
|
|
30
|
+
steps,
|
|
31
|
+
final,
|
|
32
|
+
})
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
describe('status-card body byte-parity: agent vs worker', () => {
|
|
36
|
+
it('identical raw steps → identical body length and bytes (running)', () => {
|
|
37
|
+
const steps = ['read the brief', 'scanned vendor A', 'scanned vendor B']
|
|
38
|
+
const a = body(agent(steps))
|
|
39
|
+
const w = body(worker(steps))
|
|
40
|
+
expect(a).toBe(w)
|
|
41
|
+
expect(a.length).toBe(w.length)
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
it('identical raw steps → identical body (final)', () => {
|
|
45
|
+
const steps = ['compiled', 'linked', 'shipped']
|
|
46
|
+
expect(body(agent(steps, true))).toBe(body(worker(steps, true)))
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
it('overflow window → identical body + same +N earlier marker', () => {
|
|
50
|
+
const steps = Array.from({ length: 11 }, (_, i) => `step ${String(i + 1).padStart(2, '0')}`)
|
|
51
|
+
const a = body(agent(steps))
|
|
52
|
+
const w = body(worker(steps))
|
|
53
|
+
expect(a).toBe(w)
|
|
54
|
+
expect(a).toContain('earlier…')
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
it('char-budget backstop → identical body length under the fitter', () => {
|
|
58
|
+
const big = 'z'.repeat(900)
|
|
59
|
+
const steps = Array.from({ length: 6 }, () => big)
|
|
60
|
+
const a = agent(steps)!
|
|
61
|
+
const w = worker(steps)!
|
|
62
|
+
expect(body(a).length).toBe(body(w).length)
|
|
63
|
+
// Both whole cards stay within the wire budget.
|
|
64
|
+
expect(a.length).toBeLessThanOrEqual(STATUS_CARD_CHAR_BUDGET)
|
|
65
|
+
expect(w.length).toBeLessThanOrEqual(STATUS_CARD_CHAR_BUDGET)
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
it('escape parity: special chars escape identically on both surfaces', () => {
|
|
69
|
+
const steps = ['run build && deploy <prod> & verify']
|
|
70
|
+
expect(body(agent(steps))).toBe(body(worker(steps)))
|
|
71
|
+
})
|
|
72
|
+
})
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { formatTurnLifecycle, detectStatusSurfaceDegraded } from '../gateway/status-surface-log.js'
|
|
3
|
+
import type { StatusSurfaceTurnView } from '../gateway/status-surface-log.js'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Tests for status-surface-log.ts (issue #2461).
|
|
7
|
+
*
|
|
8
|
+
* Verifies that:
|
|
9
|
+
* - `formatTurnLifecycle` uses `labeledToolCount` (not `toolCallCount`) for
|
|
10
|
+
* the `tools=` lifecycle log field — so the log reflects the accurate
|
|
11
|
+
* surfaced count, not the raw tool_use count that includes surface tools.
|
|
12
|
+
* - `detectStatusSurfaceDegraded` uses `labeledToolCount` to determine
|
|
13
|
+
* whether to check for the feed-never-opened degraded state.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
function makeTurn(overrides: Partial<StatusSurfaceTurnView> = {}): StatusSurfaceTurnView {
|
|
17
|
+
return {
|
|
18
|
+
turnId: 'turn-abc',
|
|
19
|
+
sessionChatId: '123456',
|
|
20
|
+
sessionThreadId: undefined,
|
|
21
|
+
startedAt: 1000,
|
|
22
|
+
toolCallCount: 10, // raw tool_use count — includes surface tools
|
|
23
|
+
labeledToolCount: 5, // surfaced count — only non-surface, non-suppressed
|
|
24
|
+
activityMessageId: null,
|
|
25
|
+
activityEverOpened: false,
|
|
26
|
+
activityDrainFailures: 0,
|
|
27
|
+
replyCalled: false,
|
|
28
|
+
finalAnswerDelivered: false,
|
|
29
|
+
...overrides,
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
describe('formatTurnLifecycle — tools= field uses labeledToolCount (#2461)', () => {
|
|
34
|
+
it('set action: tools= reports labeledToolCount, not toolCallCount', () => {
|
|
35
|
+
const t = makeTurn({ toolCallCount: 10, labeledToolCount: 5 })
|
|
36
|
+
const line = formatTurnLifecycle('set', 'enqueue', t, 1000)
|
|
37
|
+
expect(line).toContain('tools=5')
|
|
38
|
+
// The raw toolCallCount (10) must NOT appear in the tools= field.
|
|
39
|
+
expect(line).not.toMatch(/tools=10/)
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
it('clear action: tools= also uses labeledToolCount', () => {
|
|
43
|
+
const t = makeTurn({ toolCallCount: 7, labeledToolCount: 3 })
|
|
44
|
+
const line = formatTurnLifecycle('clear', 'turn_end', t, 2000)
|
|
45
|
+
expect(line).toContain('tools=3')
|
|
46
|
+
expect(line).not.toMatch(/tools=7/)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
it('clear action: age_ms is computed from startedAt', () => {
|
|
50
|
+
const t = makeTurn({ startedAt: 1000 })
|
|
51
|
+
const line = formatTurnLifecycle('clear', 'turn_end', t, 3500)
|
|
52
|
+
expect(line).toContain('age_ms=2500')
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
it('set action: age_ms is always 0', () => {
|
|
56
|
+
const t = makeTurn({ startedAt: 1000 })
|
|
57
|
+
const line = formatTurnLifecycle('set', 'enqueue', t, 9999)
|
|
58
|
+
expect(line).toContain('age_ms=0')
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('includes turnId, chat, thread, feedOpened, drainFailures, replyCalled, finalAnswer', () => {
|
|
62
|
+
const t = makeTurn({
|
|
63
|
+
turnId: 'turn-xyz',
|
|
64
|
+
sessionChatId: '777',
|
|
65
|
+
sessionThreadId: 42,
|
|
66
|
+
activityMessageId: 99,
|
|
67
|
+
activityEverOpened: true,
|
|
68
|
+
activityDrainFailures: 2,
|
|
69
|
+
replyCalled: true,
|
|
70
|
+
finalAnswerDelivered: true,
|
|
71
|
+
})
|
|
72
|
+
const line = formatTurnLifecycle('clear', 'turn_end', t, 2000)
|
|
73
|
+
expect(line).toContain('turnId=turn-xyz')
|
|
74
|
+
expect(line).toContain('chat=777')
|
|
75
|
+
expect(line).toContain('thread=42')
|
|
76
|
+
expect(line).toContain('activityMsgId=99')
|
|
77
|
+
expect(line).toContain('feedOpened=true')
|
|
78
|
+
expect(line).toContain('drainFailures=2')
|
|
79
|
+
expect(line).toContain('replyCalled=true')
|
|
80
|
+
expect(line).toContain('finalAnswer=true')
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('thread=- when sessionThreadId is undefined', () => {
|
|
84
|
+
const t = makeTurn({ sessionThreadId: undefined })
|
|
85
|
+
const line = formatTurnLifecycle('set', 'enqueue', t, 1000)
|
|
86
|
+
expect(line).toContain('thread=-')
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
it('labeledToolCount=0 reports tools=0', () => {
|
|
90
|
+
const t = makeTurn({ labeledToolCount: 0, toolCallCount: 3 })
|
|
91
|
+
const line = formatTurnLifecycle('set', 'enqueue', t, 1000)
|
|
92
|
+
expect(line).toContain('tools=0')
|
|
93
|
+
})
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
describe('detectStatusSurfaceDegraded — uses labeledToolCount (#2461)', () => {
|
|
97
|
+
it('returns null when labeledToolCount=0 (no surfaced tool work this turn)', () => {
|
|
98
|
+
// Even if the raw toolCallCount is non-zero (surface-only tools like
|
|
99
|
+
// reply/react fired), labeledToolCount=0 means nothing to surface → null.
|
|
100
|
+
const t = makeTurn({ labeledToolCount: 0, toolCallCount: 5 })
|
|
101
|
+
expect(detectStatusSurfaceDegraded(t)).toBeNull()
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
it('H-2: reply-only turn — all tools surface-suppressed, drain failures present — no false-positive DEGRADED', () => {
|
|
105
|
+
// Scenario: agent sent a reply but all 5 tool_uses were surface tools
|
|
106
|
+
// (reply/react/etc) so labeledToolCount=0. Three activity-feed drain
|
|
107
|
+
// failures occurred (e.g. Telegram 400s from a stale anchor), but since
|
|
108
|
+
// there was nothing to surface, DEGRADED must NOT fire — the exemption is
|
|
109
|
+
// `labeledToolCount === 0`. This is the critical missing coverage case for
|
|
110
|
+
// the `labeledToolCount === 0` short-circuit in detectStatusSurfaceDegraded.
|
|
111
|
+
const t = makeTurn({
|
|
112
|
+
toolCallCount: 5,
|
|
113
|
+
labeledToolCount: 0,
|
|
114
|
+
activityDrainFailures: 3,
|
|
115
|
+
activityEverOpened: false,
|
|
116
|
+
replyCalled: true,
|
|
117
|
+
})
|
|
118
|
+
expect(detectStatusSurfaceDegraded(t)).toBeNull()
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
it('returns null when feed opened fine (activityEverOpened=true)', () => {
|
|
122
|
+
const t = makeTurn({ labeledToolCount: 3, activityEverOpened: true, activityDrainFailures: 2 })
|
|
123
|
+
expect(detectStatusSurfaceDegraded(t)).toBeNull()
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
it('returns null when no drain failures (feed never tried to open — not degraded)', () => {
|
|
127
|
+
const t = makeTurn({ labeledToolCount: 3, activityEverOpened: false, activityDrainFailures: 0 })
|
|
128
|
+
expect(detectStatusSurfaceDegraded(t)).toBeNull()
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
it('returns degraded when labeledToolCount>0, feed never opened, and drain failures>0', () => {
|
|
132
|
+
const t = makeTurn({
|
|
133
|
+
labeledToolCount: 4,
|
|
134
|
+
toolCallCount: 6,
|
|
135
|
+
activityEverOpened: false,
|
|
136
|
+
activityDrainFailures: 3,
|
|
137
|
+
})
|
|
138
|
+
const result = detectStatusSurfaceDegraded(t)
|
|
139
|
+
expect(result).not.toBeNull()
|
|
140
|
+
expect(result!.reason).toBe('feed-never-opened')
|
|
141
|
+
// detail uses labeledToolCount, not toolCallCount
|
|
142
|
+
expect(result!.detail).toContain('tools=4')
|
|
143
|
+
expect(result!.detail).not.toMatch(/tools=6/)
|
|
144
|
+
expect(result!.detail).toContain('drainFailures=3')
|
|
145
|
+
})
|
|
146
|
+
})
|