switchroom 0.13.33 → 0.13.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/timezone-hook.sh +1 -1
- package/dist/agent-scheduler/index.js +8 -1
- package/dist/auth-broker/index.js +8 -1
- package/dist/cli/switchroom.js +86 -21
- package/dist/host-control/main.js +5163 -192
- package/dist/vault/approvals/kernel-server.js +9 -2
- package/dist/vault/broker/server.js +9 -2
- package/package.json +1 -1
- package/profiles/default/CLAUDE.md.hbs +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +80 -9
- package/telegram-plugin/docs/waiting-ux-spec.md +40 -0
- package/telegram-plugin/gateway/error-envelope-card.ts +64 -0
- package/telegram-plugin/gateway/gateway.ts +70 -15
- package/telegram-plugin/gateway/unhandled-rejection-policy.ts +46 -1
- package/telegram-plugin/tests/boot-clears-clean-shutdown-marker.test.ts +75 -0
- package/telegram-plugin/tests/error-envelope-unlock-card.test.ts +79 -0
- package/telegram-plugin/tests/silent-end-integration.test.ts +268 -0
- package/telegram-plugin/tests/silent-end.test.ts +105 -0
- package/telegram-plugin/tests/unhandled-rejection-policy.test.ts +51 -6
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression guard for the marker-stale crash banner class.
|
|
3
|
+
*
|
|
4
|
+
* Pre-2026-05-25 the boot path read the clean-shutdown marker but
|
|
5
|
+
* never cleared it. A marker from a graceful shutdown 11 hours ago
|
|
6
|
+
* sat on disk untouched; subsequent boots after an unhandledRejection
|
|
7
|
+
* crash (which explicitly SKIPS writing a new marker, per
|
|
8
|
+
* gateway.ts:15107) read the stale marker, classified the age as
|
|
9
|
+
* >5min, and fired `boot.clean_shutdown_marker_stale age=39976s` →
|
|
10
|
+
* `reason=crash` → `agent-crashed` operator-event banner posted to
|
|
11
|
+
* the user's chat.
|
|
12
|
+
*
|
|
13
|
+
* That misclassified the user-visible state ("clerk seems to be
|
|
14
|
+
* crashing") because the banner detail included the stale-marker
|
|
15
|
+
* artifact rather than just naming the actual crash.
|
|
16
|
+
*
|
|
17
|
+
* Fix: clear the marker after every successful boot reads it. The
|
|
18
|
+
* marker now describes the IMMEDIATELY PRECEDING shutdown only;
|
|
19
|
+
* a subsequent crash with no marker write leaves an empty marker
|
|
20
|
+
* file, and boot-reason.ts:84 correctly classifies via the
|
|
21
|
+
* sessionMarker fallback.
|
|
22
|
+
*
|
|
23
|
+
* The gateway IIFE is too entangled to instantiate in-process; this
|
|
24
|
+
* is a source-level pin matching the pattern used by
|
|
25
|
+
* `reply-terminal-reaction.test.ts` and `buffer-gate-broadened.test.ts`.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { describe, it, expect } from 'vitest'
|
|
29
|
+
import { readFileSync } from 'node:fs'
|
|
30
|
+
import { resolve } from 'node:path'
|
|
31
|
+
|
|
32
|
+
const gatewaySrc = readFileSync(
|
|
33
|
+
resolve(__dirname, '..', 'gateway', 'gateway.ts'),
|
|
34
|
+
'utf-8',
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
describe('boot path clears the clean-shutdown marker after reading it', () => {
|
|
38
|
+
it('imports clearCleanShutdownMarker (no longer the "intentionally not imported" comment)', () => {
|
|
39
|
+
// Pre-fix the import block had a `clearCleanShutdownMarker is
|
|
40
|
+
// intentionally NOT imported here` block-comment, with a rationale
|
|
41
|
+
// that was wrong for the unhandledRejection edge case. If a future
|
|
42
|
+
// commit re-removes the import (and re-adds the wrong comment),
|
|
43
|
+
// this test trips.
|
|
44
|
+
expect(gatewaySrc).toMatch(/^\s*clearCleanShutdownMarker,$/m)
|
|
45
|
+
// The old "intentionally NOT imported" comment must be gone.
|
|
46
|
+
expect(gatewaySrc).not.toMatch(/clearCleanShutdownMarker is intentionally NOT imported/)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
it('calls clearCleanShutdownMarker inside the marker-read block at boot', () => {
|
|
50
|
+
// Slice the marker-read block (between the boot.clean_shutdown_*
|
|
51
|
+
// diagnostic logs and the next `if (marker)` line). The clear call
|
|
52
|
+
// MUST appear inside this block, not later in the boot flow —
|
|
53
|
+
// future readers should see the read and clear together.
|
|
54
|
+
const anchor = gatewaySrc.indexOf('boot.clean_shutdown_detected')
|
|
55
|
+
expect(anchor).toBeGreaterThan(-1)
|
|
56
|
+
const slice = gatewaySrc.slice(anchor, anchor + 4000)
|
|
57
|
+
expect(slice).toMatch(/clearCleanShutdownMarker\(GATEWAY_CLEAN_SHUTDOWN_MARKER_PATH\)/)
|
|
58
|
+
})
|
|
59
|
+
|
|
60
|
+
it('clear comment explains the unhandledRejection edge case', () => {
|
|
61
|
+
// Future maintainers MUST understand why the clear is here.
|
|
62
|
+
// The comment block above the call references the
|
|
63
|
+
// unhandledRejection / "crash path" semantics so the next
|
|
64
|
+
// engineer doesn't remove it as cleanup.
|
|
65
|
+
const callIdx = gatewaySrc.indexOf(
|
|
66
|
+
'clearCleanShutdownMarker(GATEWAY_CLEAN_SHUTDOWN_MARKER_PATH)',
|
|
67
|
+
)
|
|
68
|
+
expect(callIdx).toBeGreaterThan(-1)
|
|
69
|
+
// The 1500 chars immediately before the call should mention the
|
|
70
|
+
// failure mode this fixes (the comment block sits right above
|
|
71
|
+
// the call and is ~1100 chars at current writing).
|
|
72
|
+
const lead = gatewaySrc.slice(Math.max(0, callIdx - 1500), callIdx)
|
|
73
|
+
expect(lead).toMatch(/unhandledRejection|crash path/)
|
|
74
|
+
})
|
|
75
|
+
})
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Telegram bridge unlock-card safety (#1758 Phase 1).
|
|
3
|
+
*
|
|
4
|
+
* The bridge MUST validate `flip_yaml_flag.yaml_path` against the
|
|
5
|
+
* config-edit-validator allowlist before rendering a one-tap approval
|
|
6
|
+
* card. A malformed or hostile envelope from any backend could
|
|
7
|
+
* otherwise nudge the operator into approving an arbitrary flag flip.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, it, expect } from "vitest";
|
|
11
|
+
import { renderErrorEnvelopeCard } from "../gateway/error-envelope-card.js";
|
|
12
|
+
import type { HostdResponse } from "../../src/host-control/protocol.js";
|
|
13
|
+
|
|
14
|
+
function mkResp(fix: HostdResponse["error_envelope"]["fix"]): HostdResponse {
|
|
15
|
+
return {
|
|
16
|
+
v: 1,
|
|
17
|
+
request_id: "r-1",
|
|
18
|
+
result: "error",
|
|
19
|
+
exit_code: null,
|
|
20
|
+
duration_ms: 0,
|
|
21
|
+
error: "E_FOO: foo",
|
|
22
|
+
error_envelope: {
|
|
23
|
+
v: 1,
|
|
24
|
+
code: "E_FOO",
|
|
25
|
+
human: "foo",
|
|
26
|
+
fix,
|
|
27
|
+
request_id: "r-1",
|
|
28
|
+
},
|
|
29
|
+
} as HostdResponse;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
describe("renderErrorEnvelopeCard — allowlist guard", () => {
|
|
33
|
+
it("renders an approval card for an allowlisted yaml_path", () => {
|
|
34
|
+
const resp = mkResp({
|
|
35
|
+
kind: "flip_yaml_flag",
|
|
36
|
+
yaml_path: "hostd.config_edit_enabled",
|
|
37
|
+
to: true,
|
|
38
|
+
});
|
|
39
|
+
const out = renderErrorEnvelopeCard(resp, "klanker", "a".repeat(32));
|
|
40
|
+
expect(out.kind).toBe("card");
|
|
41
|
+
if (out.kind === "card") {
|
|
42
|
+
expect(out.yaml_path).toBe("hostd.config_edit_enabled");
|
|
43
|
+
expect(out.to).toBe(true);
|
|
44
|
+
expect(out.card.text).toContain("klanker");
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("falls back to plain-text for a NON-allowlisted yaml_path", () => {
|
|
49
|
+
const resp = mkResp({
|
|
50
|
+
kind: "flip_yaml_flag",
|
|
51
|
+
yaml_path: "hostd.evil_backdoor_flag",
|
|
52
|
+
to: true,
|
|
53
|
+
});
|
|
54
|
+
const out = renderErrorEnvelopeCard(resp, "klanker", "a".repeat(32));
|
|
55
|
+
expect(out).toEqual({ kind: "plain-text" });
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("falls back to plain-text for request_vault_grant (Phase 2 scope)", () => {
|
|
59
|
+
const resp = mkResp({
|
|
60
|
+
kind: "request_vault_grant",
|
|
61
|
+
vault_key: "openai/api-key",
|
|
62
|
+
});
|
|
63
|
+
const out = renderErrorEnvelopeCard(resp, "klanker", "a".repeat(32));
|
|
64
|
+
expect(out).toEqual({ kind: "plain-text" });
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it("falls back to plain-text when no envelope is present", () => {
|
|
68
|
+
const resp: HostdResponse = {
|
|
69
|
+
v: 1,
|
|
70
|
+
request_id: "r-1",
|
|
71
|
+
result: "error",
|
|
72
|
+
exit_code: null,
|
|
73
|
+
duration_ms: 0,
|
|
74
|
+
error: "legacy string",
|
|
75
|
+
};
|
|
76
|
+
const out = renderErrorEnvelopeCard(resp, "klanker", "a".repeat(32));
|
|
77
|
+
expect(out).toEqual({ kind: "plain-text" });
|
|
78
|
+
});
|
|
79
|
+
});
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* silent-end-integration.test.ts — #1744 follow-up.
|
|
3
|
+
*
|
|
4
|
+
* The existing silent-end.test.ts (#1741 block, L301-409) exercises the
|
|
5
|
+
* gate as a pure unit via `simulateReplyAtGateway`, which mirrors the
|
|
6
|
+
* `isFinalAnswerReply ? clearSilentEndState(...)` contract used at the
|
|
7
|
+
* `executeReply` send-site (gateway.ts:4609). That's a fine unit test
|
|
8
|
+
* for the `executeReply` path, but it does NOT model the `stream_reply`
|
|
9
|
+
* state machine where:
|
|
10
|
+
*
|
|
11
|
+
* - The FIRST stream emit is gated by `!activeDraftStreams.has(sKey)`
|
|
12
|
+
* (gateway.ts:5178) — only the first emit per stream considers
|
|
13
|
+
* clearing the silent-end state.
|
|
14
|
+
* - LATER emits in the same stream (subsequent calls that edit the
|
|
15
|
+
* same Telegram message) do NOT re-enter that first-emit branch.
|
|
16
|
+
*
|
|
17
|
+
* Pre-fix, a stream whose first emit was ack-shaped (short, silent, no
|
|
18
|
+
* done) and whose LATER emit carried `done=true` or substantive text
|
|
19
|
+
* would skip the clear at the first-emit gate and never re-attempt it,
|
|
20
|
+
* leaving the silent-end state file behind even though the model HAS
|
|
21
|
+
* now delivered its final answer. The Stop hook would then see a stale
|
|
22
|
+
* state file and fire a spurious re-prompt on the next turn end.
|
|
23
|
+
*
|
|
24
|
+
* The fix in gateway.ts:5343 adds a second clear at the
|
|
25
|
+
* `finalAnswerDelivered = true` site (which fires on EVERY emit that
|
|
26
|
+
* qualifies as a final answer, not just the first). This test walks
|
|
27
|
+
* the full ack-then-final stream sequence and asserts the state file's
|
|
28
|
+
* lifecycle matches the contract.
|
|
29
|
+
*
|
|
30
|
+
* KNOWN GAP — true end-to-end coverage of `executeStreamReply` would
|
|
31
|
+
* require importing gateway.ts (a multi-thousand-line module with
|
|
32
|
+
* heavy startup-time side effects: bot creation, IPC server bind, MCP
|
|
33
|
+
* registration, etc.). Neither `executeReply` nor `executeStreamReply`
|
|
34
|
+
* is exported. Instead, this test reproduces the EXACT call sequence
|
|
35
|
+
* the gateway makes at each send-site — same predicate
|
|
36
|
+
* (`isFinalAnswerReply`), same state-file API (`writeSilentEndState` /
|
|
37
|
+
* `clearSilentEndState`), same first-emit gating logic — by walking a
|
|
38
|
+
* tracked `activeDraftStreams` set to model the first-vs-later-emit
|
|
39
|
+
* branching that's the load-bearing detail of the bug. A future
|
|
40
|
+
* refactor that drops the second clear at L5343 would fail the
|
|
41
|
+
* `ack first emit then final later emit` test below, because the
|
|
42
|
+
* state file would never get cleared on the path that no longer
|
|
43
|
+
* passes through the first-emit branch.
|
|
44
|
+
*
|
|
45
|
+
* If the gateway is ever refactored so `executeReply`/`executeStreamReply`
|
|
46
|
+
* become testable in isolation (or get extracted into a thin
|
|
47
|
+
* dispatch shim around a pure handler), prefer wiring those real
|
|
48
|
+
* functions over this faithful-shape reproduction.
|
|
49
|
+
*/
|
|
50
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
51
|
+
import { mkdtempSync, rmSync } from 'node:fs'
|
|
52
|
+
import { tmpdir } from 'node:os'
|
|
53
|
+
import { join } from 'node:path'
|
|
54
|
+
import {
|
|
55
|
+
writeSilentEndState,
|
|
56
|
+
clearSilentEndState,
|
|
57
|
+
readSilentEndState,
|
|
58
|
+
} from '../silent-end.js'
|
|
59
|
+
import { isFinalAnswerReply } from '../final-answer-detect.js'
|
|
60
|
+
|
|
61
|
+
let stateDir: string
|
|
62
|
+
const ORIG_ENV = process.env.TELEGRAM_STATE_DIR
|
|
63
|
+
|
|
64
|
+
beforeEach(() => {
|
|
65
|
+
stateDir = mkdtempSync(join(tmpdir(), 'silent-end-integration-test-'))
|
|
66
|
+
process.env.TELEGRAM_STATE_DIR = stateDir
|
|
67
|
+
})
|
|
68
|
+
|
|
69
|
+
afterEach(() => {
|
|
70
|
+
rmSync(stateDir, { recursive: true, force: true })
|
|
71
|
+
if (ORIG_ENV != null) process.env.TELEGRAM_STATE_DIR = ORIG_ENV
|
|
72
|
+
else delete process.env.TELEGRAM_STATE_DIR
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Shape-accurate reproduction of the gateway's `executeReply` clear
|
|
77
|
+
* site (gateway.ts:4599-4611). The `reply` tool runs this on every
|
|
78
|
+
* call — no first-emit gating, because `reply` always produces a
|
|
79
|
+
* fresh outbound message.
|
|
80
|
+
*/
|
|
81
|
+
function simulateExecuteReply(
|
|
82
|
+
reply: { text: string; disableNotification: boolean },
|
|
83
|
+
turnKey: string,
|
|
84
|
+
): { finalAnswerDelivered: boolean } {
|
|
85
|
+
const final = isFinalAnswerReply(reply)
|
|
86
|
+
if (final) clearSilentEndState(turnKey)
|
|
87
|
+
return { finalAnswerDelivered: final }
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Shape-accurate reproduction of the gateway's `executeStreamReply`
|
|
92
|
+
* call (gateway.ts:5172-5344). The stream has TWO clear sites:
|
|
93
|
+
*
|
|
94
|
+
* 1. First-emit-only branch (L5178-5195) — fires once per stream,
|
|
95
|
+
* regardless of whether this emit is the final answer. Clears
|
|
96
|
+
* iff this first emit is a final-answer-shaped reply.
|
|
97
|
+
* 2. Final-answer site (L5335-5358, added in #1744 follow-up) —
|
|
98
|
+
* fires on EVERY emit that qualifies as the final answer,
|
|
99
|
+
* including later emits in a stream whose first emit was an ack.
|
|
100
|
+
* This is the load-bearing addition: without it, the ack-first-
|
|
101
|
+
* then-final-later case leaks the state file.
|
|
102
|
+
*
|
|
103
|
+
* `activeDraftStreams` is a Set-like state the gateway carries across
|
|
104
|
+
* calls within the same turn; this test threads it through explicitly.
|
|
105
|
+
*/
|
|
106
|
+
function simulateExecuteStreamReply(
|
|
107
|
+
emit: { text: string; disableNotification: boolean; done?: boolean },
|
|
108
|
+
turnKey: string,
|
|
109
|
+
state: { activeDraftStreams: Set<string>; finalAnswerDelivered: boolean },
|
|
110
|
+
): { finalAnswerDelivered: boolean } {
|
|
111
|
+
// Site 1 — first-emit-only branch.
|
|
112
|
+
const isFirstEmit = !state.activeDraftStreams.has(turnKey)
|
|
113
|
+
if (isFirstEmit) {
|
|
114
|
+
if (isFinalAnswerReply(emit)) {
|
|
115
|
+
clearSilentEndState(turnKey)
|
|
116
|
+
}
|
|
117
|
+
// Mark the stream active for subsequent emits.
|
|
118
|
+
state.activeDraftStreams.add(turnKey)
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ... draft / send-message work happens here in the real gateway ...
|
|
122
|
+
|
|
123
|
+
// Site 2 — final-answer site (#1744 follow-up at gateway.ts:5343).
|
|
124
|
+
if (isFinalAnswerReply(emit)) {
|
|
125
|
+
state.finalAnswerDelivered = true
|
|
126
|
+
clearSilentEndState(turnKey)
|
|
127
|
+
}
|
|
128
|
+
return { finalAnswerDelivered: state.finalAnswerDelivered }
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
describe('#1744 — silent-end state-file lifecycle through real call paths', () => {
|
|
132
|
+
it('executeReply: ack reply does not clear, then final-answer reply clears', () => {
|
|
133
|
+
// Turn-end writer fires from a prior turn that ended undelivered,
|
|
134
|
+
// OR the framework re-prompted and the state still persists.
|
|
135
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
136
|
+
|
|
137
|
+
// Interim ack via `reply` — must NOT clear.
|
|
138
|
+
const r1 = simulateExecuteReply({ text: 'On it', disableNotification: true }, 'c:_')
|
|
139
|
+
expect(r1.finalAnswerDelivered).toBe(false)
|
|
140
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
141
|
+
|
|
142
|
+
// Final answer via `reply` (pings) — clears.
|
|
143
|
+
const r2 = simulateExecuteReply(
|
|
144
|
+
{ text: "Here's the result.", disableNotification: false },
|
|
145
|
+
'c:_',
|
|
146
|
+
)
|
|
147
|
+
expect(r2.finalAnswerDelivered).toBe(true)
|
|
148
|
+
expect(readSilentEndState()).toBeNull()
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
it('executeStreamReply: stream that opens with a final-answer first emit clears at first-emit site', () => {
|
|
152
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
153
|
+
const state = { activeDraftStreams: new Set<string>(), finalAnswerDelivered: false }
|
|
154
|
+
|
|
155
|
+
// First emit is substantive (>=200 chars) — qualifies as final.
|
|
156
|
+
simulateExecuteStreamReply(
|
|
157
|
+
{ text: 'x'.repeat(250), disableNotification: true },
|
|
158
|
+
'c:_',
|
|
159
|
+
state,
|
|
160
|
+
)
|
|
161
|
+
expect(state.finalAnswerDelivered).toBe(true)
|
|
162
|
+
expect(readSilentEndState()).toBeNull()
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
it('executeStreamReply ack-then-final edge case: first emit is an ack (no clear at first-emit gate), later emit is the final answer (must clear at the new L5343 site)', () => {
|
|
166
|
+
// This is the regression the #1744 follow-up fixes. Pre-fix, the
|
|
167
|
+
// first-emit gate would skip the clear (ack-shaped first emit),
|
|
168
|
+
// and the later final-answer emit had NO second clear site —
|
|
169
|
+
// state file leaked.
|
|
170
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
171
|
+
const state = { activeDraftStreams: new Set<string>(), finalAnswerDelivered: false }
|
|
172
|
+
|
|
173
|
+
// First emit — ack-shaped. First-emit gate fires but the
|
|
174
|
+
// isFinalAnswerReply predicate returns false → no clear here.
|
|
175
|
+
simulateExecuteStreamReply(
|
|
176
|
+
{ text: 'thinking...', disableNotification: true, done: false },
|
|
177
|
+
'c:_',
|
|
178
|
+
state,
|
|
179
|
+
)
|
|
180
|
+
expect(state.finalAnswerDelivered).toBe(false)
|
|
181
|
+
// CONTRACT: state file MUST still be present after the ack first emit.
|
|
182
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
183
|
+
|
|
184
|
+
// Second emit — same stream (activeDraftStreams already has the key),
|
|
185
|
+
// so the first-emit branch is skipped. This emit carries done=true
|
|
186
|
+
// (the real final-answer signal). Without the L5343 clear, the
|
|
187
|
+
// state file would persist and the Stop hook would fire a spurious
|
|
188
|
+
// re-prompt on the next turn.
|
|
189
|
+
simulateExecuteStreamReply(
|
|
190
|
+
{ text: 'done', disableNotification: true, done: true },
|
|
191
|
+
'c:_',
|
|
192
|
+
state,
|
|
193
|
+
)
|
|
194
|
+
expect(state.finalAnswerDelivered).toBe(true)
|
|
195
|
+
// CONTRACT: state file MUST be cleared after the final-answer
|
|
196
|
+
// emit, even though the first-emit branch was skipped.
|
|
197
|
+
expect(readSilentEndState()).toBeNull()
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
it('executeStreamReply: late emit that flips disable_notification=false also clears', () => {
|
|
201
|
+
// Variant of the edge case — final-answer signal is the pacing
|
|
202
|
+
// contract flag rather than done=true.
|
|
203
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
204
|
+
const state = { activeDraftStreams: new Set<string>(), finalAnswerDelivered: false }
|
|
205
|
+
|
|
206
|
+
simulateExecuteStreamReply(
|
|
207
|
+
{ text: 'one sec', disableNotification: true, done: false },
|
|
208
|
+
'c:_',
|
|
209
|
+
state,
|
|
210
|
+
)
|
|
211
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
212
|
+
|
|
213
|
+
// Later emit drops the disable_notification flag — the pacing
|
|
214
|
+
// contract's "final answer" signal — but not done yet.
|
|
215
|
+
simulateExecuteStreamReply(
|
|
216
|
+
{ text: "Here's what I found.", disableNotification: false, done: false },
|
|
217
|
+
'c:_',
|
|
218
|
+
state,
|
|
219
|
+
)
|
|
220
|
+
expect(state.finalAnswerDelivered).toBe(true)
|
|
221
|
+
expect(readSilentEndState()).toBeNull()
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
it('idempotency: clearSilentEndState is safe to call when the file is already gone', () => {
|
|
225
|
+
// The L5343 clear is unconditional on isFinalAnswerReply — it
|
|
226
|
+
// fires even when the first-emit gate already cleared. The
|
|
227
|
+
// clear must be a no-op in that case so it can't accidentally
|
|
228
|
+
// unlink a fresh state file written for a DIFFERENT later turn.
|
|
229
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
230
|
+
const state = { activeDraftStreams: new Set<string>(), finalAnswerDelivered: false }
|
|
231
|
+
|
|
232
|
+
// First emit is final — clears via the first-emit site.
|
|
233
|
+
simulateExecuteStreamReply(
|
|
234
|
+
{ text: 'answer', disableNotification: false },
|
|
235
|
+
'c:_',
|
|
236
|
+
state,
|
|
237
|
+
)
|
|
238
|
+
expect(readSilentEndState()).toBeNull()
|
|
239
|
+
|
|
240
|
+
// A second final-shaped emit on the same stream re-enters the
|
|
241
|
+
// L5343 clear. State is already gone — must be a no-op.
|
|
242
|
+
expect(() => {
|
|
243
|
+
simulateExecuteStreamReply(
|
|
244
|
+
{ text: 'addendum', disableNotification: false },
|
|
245
|
+
'c:_',
|
|
246
|
+
state,
|
|
247
|
+
)
|
|
248
|
+
}).not.toThrow()
|
|
249
|
+
expect(readSilentEndState()).toBeNull()
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
it('cross-turn safety: clearSilentEndState on turnKey A does NOT clear state for turnKey B', () => {
|
|
253
|
+
// The clear is keyed on turnKey via the writer's stored value —
|
|
254
|
+
// a clear call for a DIFFERENT turn must not unlink a state file
|
|
255
|
+
// written for the in-flight turn. This guards against the L5343
|
|
256
|
+
// clear accidentally racing a turn-end writer for a newer turn.
|
|
257
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:turn-B' })
|
|
258
|
+
// A stale clear for turn-A — silent-end.ts only unlinks when the
|
|
259
|
+
// stored turnKey matches.
|
|
260
|
+
clearSilentEndState('c:turn-A')
|
|
261
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
262
|
+
expect(readSilentEndState()!.turnKey).toBe('c:turn-B')
|
|
263
|
+
|
|
264
|
+
// The matching clear works.
|
|
265
|
+
clearSilentEndState('c:turn-B')
|
|
266
|
+
expect(readSilentEndState()).toBeNull()
|
|
267
|
+
})
|
|
268
|
+
})
|
|
@@ -301,6 +301,111 @@ describe('recordUndeliveredTurnEnd — #1664 extended trigger', () => {
|
|
|
301
301
|
})
|
|
302
302
|
})
|
|
303
303
|
|
|
304
|
+
describe('#1741 — ack reply must not clear silent-end state', () => {
|
|
305
|
+
// The gateway gates `clearSilentEndState` at the reply send-site on
|
|
306
|
+
// `isFinalAnswerReply`. These tests reproduce that gate as a unit:
|
|
307
|
+
// simulate a turn's reply sequence by calling the same predicate the
|
|
308
|
+
// gateway uses, and assert state-file persistence matches the contract.
|
|
309
|
+
//
|
|
310
|
+
// Why this matters: if `turn_end` never lands (Claude Code's
|
|
311
|
+
// `turn_duration` system event is unreliable for trivial-prompt
|
|
312
|
+
// turns), the only line of defence between an undelivered turn and
|
|
313
|
+
// the Stop hook is the persistence of `silent-end-pending.json`.
|
|
314
|
+
// Pre-fix, an ack reply cleared the file unconditionally — so the
|
|
315
|
+
// Stop hook found no state and allowed the stop on every ack-then-
|
|
316
|
+
// tool-then-silent shape. Post-fix, only a plausibly-final reply
|
|
317
|
+
// clears it.
|
|
318
|
+
|
|
319
|
+
function simulateReplyAtGateway(
|
|
320
|
+
reply: { text: string; disableNotification: boolean; done?: boolean },
|
|
321
|
+
turnKey: string,
|
|
322
|
+
): void {
|
|
323
|
+
// The gateway calls clearSilentEndState ONLY when isFinalAnswerReply
|
|
324
|
+
// is true. Mirror that gate exactly.
|
|
325
|
+
if (isFinalAnswerReply(reply)) {
|
|
326
|
+
clearSilentEndState(turnKey)
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
it('ack reply (disable_notification, short, no done) does NOT clear pending state', () => {
|
|
331
|
+
// A prior turn-end already wrote state (or a re-prompt round wrote it).
|
|
332
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
333
|
+
// Agent sends an interim ack.
|
|
334
|
+
simulateReplyAtGateway(
|
|
335
|
+
{ text: 'On it', disableNotification: true },
|
|
336
|
+
'c:_',
|
|
337
|
+
)
|
|
338
|
+
// State must persist — the Stop hook still needs to be able to
|
|
339
|
+
// catch a subsequent silent end.
|
|
340
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
341
|
+
expect(readSilentEndState()!.turnKey).toBe('c:_')
|
|
342
|
+
})
|
|
343
|
+
|
|
344
|
+
it('final-answer reply (disable_notification=false) clears the state', () => {
|
|
345
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
346
|
+
simulateReplyAtGateway(
|
|
347
|
+
{ text: "Done — here's the result.", disableNotification: false },
|
|
348
|
+
'c:_',
|
|
349
|
+
)
|
|
350
|
+
expect(readSilentEndState()).toBeNull()
|
|
351
|
+
})
|
|
352
|
+
|
|
353
|
+
it('stream_reply done=true clears the state even with disable_notification=true', () => {
|
|
354
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
355
|
+
simulateReplyAtGateway(
|
|
356
|
+
{ text: 'ok', disableNotification: true, done: true },
|
|
357
|
+
'c:_',
|
|
358
|
+
)
|
|
359
|
+
expect(readSilentEndState()).toBeNull()
|
|
360
|
+
})
|
|
361
|
+
|
|
362
|
+
it('long ack-shaped reply (>=200 chars) is treated as final and clears the state', () => {
|
|
363
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
364
|
+
simulateReplyAtGateway(
|
|
365
|
+
{ text: 'x'.repeat(250), disableNotification: true },
|
|
366
|
+
'c:_',
|
|
367
|
+
)
|
|
368
|
+
expect(readSilentEndState()).toBeNull()
|
|
369
|
+
})
|
|
370
|
+
|
|
371
|
+
it('ack-then-silent end-to-end: ack does not clear, Stop hook still blocks', () => {
|
|
372
|
+
// 1. The previous undelivered turn-end wrote state, OR the turn
|
|
373
|
+
// starts fresh and only the Stop hook will see this state file
|
|
374
|
+
// once the gateway re-writes it. Simulate the gateway's writer
|
|
375
|
+
// firing at turn-end with finalAnswerDelivered=false (no
|
|
376
|
+
// qualifying reply happened this turn).
|
|
377
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
378
|
+
// 2. Mid-turn ack reply lands. Pre-fix this would unlink the
|
|
379
|
+
// state file; post-fix it must persist.
|
|
380
|
+
simulateReplyAtGateway(
|
|
381
|
+
{ text: 'On it, working on it…', disableNotification: true },
|
|
382
|
+
'c:_',
|
|
383
|
+
)
|
|
384
|
+
// 3. Stop hook fires (separately tested below): it must still
|
|
385
|
+
// find the state file and decide to block. Verify the file is
|
|
386
|
+
// intact at the path the hook reads.
|
|
387
|
+
const state = readSilentEndState()
|
|
388
|
+
expect(state).not.toBeNull()
|
|
389
|
+
expect(state!.turnKey).toBe('c:_')
|
|
390
|
+
})
|
|
391
|
+
|
|
392
|
+
it('ack-then-final: ack does not clear, final clears', () => {
|
|
393
|
+
writeSilentEndState({ chatId: 'c', threadId: null, turnKey: 'c:_' })
|
|
394
|
+
// Interim ack — state persists.
|
|
395
|
+
simulateReplyAtGateway(
|
|
396
|
+
{ text: 'On it', disableNotification: true },
|
|
397
|
+
'c:_',
|
|
398
|
+
)
|
|
399
|
+
expect(readSilentEndState()).not.toBeNull()
|
|
400
|
+
// Final answer — state cleared.
|
|
401
|
+
simulateReplyAtGateway(
|
|
402
|
+
{ text: 'Done — the answer is 42.', disableNotification: false },
|
|
403
|
+
'c:_',
|
|
404
|
+
)
|
|
405
|
+
expect(readSilentEndState()).toBeNull()
|
|
406
|
+
})
|
|
407
|
+
})
|
|
408
|
+
|
|
304
409
|
describe('silent-end-interrupt-stop hook — integration', () => {
|
|
305
410
|
const hookPath = join(__dirname, '..', 'hooks', 'silent-end-interrupt-stop.mjs')
|
|
306
411
|
|
|
@@ -103,11 +103,6 @@ describe('classifyRejection — genuine errors still crash', () => {
|
|
|
103
103
|
expect(classifyRejection(err)).toBe('shutdown')
|
|
104
104
|
})
|
|
105
105
|
|
|
106
|
-
it('returns "shutdown" for GrammyError 429 (rate limit) — should be retried not masked', () => {
|
|
107
|
-
const err = grammyError(429, 'Too Many Requests: retry after 5')
|
|
108
|
-
expect(classifyRejection(err)).toBe('shutdown')
|
|
109
|
-
})
|
|
110
|
-
|
|
111
106
|
it('returns "shutdown" for GrammyError 400 with NEW unknown description', () => {
|
|
112
107
|
// Use a description that's not in the benign list — the policy is
|
|
113
108
|
// intentionally narrow so genuinely new bug categories still crash
|
|
@@ -115,9 +110,59 @@ describe('classifyRejection — genuine errors still crash', () => {
|
|
|
115
110
|
const err = grammyError(400, 'Bad Request: PHOTO_INVALID_DIMENSIONS')
|
|
116
111
|
expect(classifyRejection(err)).toBe('shutdown')
|
|
117
112
|
})
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
describe('classifyRejection — transient API errors (2026-05-25 follow-up)', () => {
|
|
116
|
+
// The pre-fix policy crashed on 429 + 5xx + network errors. That's
|
|
117
|
+
// wrong shape: those errors are already handled by retry-api-call.ts
|
|
118
|
+
// (3 attempts with backoff). When one leaks past the retry policy —
|
|
119
|
+
// either because the caller didn't wrap, or because 3 sustained
|
|
120
|
+
// attempts all failed — crashing the gateway is the worst outcome
|
|
121
|
+
// (one bad packet = visible "agent-crashed" banner + restart loop
|
|
122
|
+
// risk). log_only matches the daemon-stays-up posture.
|
|
123
|
+
//
|
|
124
|
+
// Surfaced 2026-05-25 on clerk: a sendMessage hit 429 after exhausting
|
|
125
|
+
// retries, the rejection bubbled to the unhandledRejection handler,
|
|
126
|
+
// shutdown fired, operator-event banner posted, "clerk seems to be
|
|
127
|
+
// crashing" user-visible.
|
|
128
|
+
|
|
129
|
+
it('returns "log_only" for GrammyError 429 (rate limit) — already handled by retry-api-call', () => {
|
|
130
|
+
const err = grammyError(429, 'Too Many Requests: retry after 5')
|
|
131
|
+
expect(classifyRejection(err)).toBe('log_only')
|
|
132
|
+
})
|
|
118
133
|
|
|
119
|
-
it('returns "
|
|
134
|
+
it('returns "log_only" for GrammyError 500 (server error)', () => {
|
|
120
135
|
const err = grammyError(500, 'Internal Server Error')
|
|
136
|
+
expect(classifyRejection(err)).toBe('log_only')
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
it('returns "log_only" for GrammyError 502 (Bad Gateway)', () => {
|
|
140
|
+
const err = grammyError(502, 'Bad Gateway')
|
|
141
|
+
expect(classifyRejection(err)).toBe('log_only')
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
it('returns "log_only" for GrammyError 503 (Service Unavailable)', () => {
|
|
145
|
+
const err = grammyError(503, 'Service Unavailable')
|
|
146
|
+
expect(classifyRejection(err)).toBe('log_only')
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
it('returns "log_only" for HttpError (network failure) via injected detector', () => {
|
|
150
|
+
// Real grammy HttpError surfaces as e.g. "Network request for
|
|
151
|
+
// 'sendMessage' failed!" wrapping ECONNRESET / ETIMEDOUT / fetch
|
|
152
|
+
// failed. We inject the detector so this test doesn't depend on
|
|
153
|
+
// grammy internals; the production path uses `err instanceof
|
|
154
|
+
// HttpError`.
|
|
155
|
+
const fakeHttp = new Error("Network request for 'sendMessage' failed!")
|
|
156
|
+
expect(
|
|
157
|
+
classifyRejection(fakeHttp, { isHttpError: () => true }),
|
|
158
|
+
).toBe('log_only')
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
it('still returns "shutdown" for GrammyError 403 (forbidden) — must not be masked', () => {
|
|
162
|
+
// 401 (covered above at line 101) and 403 are NOT transient — they
|
|
163
|
+
// indicate a genuine configuration bug (revoked bot token, removed
|
|
164
|
+
// from chat) and must crash so systemd surfaces the failure.
|
|
165
|
+
const err = grammyError(403, 'Forbidden: bot was blocked by the user')
|
|
121
166
|
expect(classifyRejection(err)).toBe('shutdown')
|
|
122
167
|
})
|
|
123
168
|
})
|