switchroom 0.14.29 → 0.14.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +98 -11
- package/dist/host-control/main.js +87 -9
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +144 -24
- package/telegram-plugin/gateway/gateway.ts +27 -0
- package/telegram-plugin/hooks/hooks.json +9 -0
- package/telegram-plugin/hooks/sentinel-reply-guard-pretool.mjs +114 -0
- package/telegram-plugin/hooks/silent-end-scan.mjs +61 -5
- package/telegram-plugin/pending-work-progress.ts +10 -3
- package/telegram-plugin/secret-detect/generic-entropy.ts +87 -0
- package/telegram-plugin/secret-detect/index.ts +42 -23
- package/telegram-plugin/secret-detect/patterns.ts +64 -2
- package/telegram-plugin/secret-detect/redact.ts +10 -1
- package/telegram-plugin/tests/pending-work-progress.test.ts +22 -4
- package/telegram-plugin/tests/secret-detect-generic-entropy.test.ts +94 -0
- package/telegram-plugin/tests/secret-detect-providers.test.ts +74 -0
- package/telegram-plugin/tests/secret-detect-secretlint.test.ts +8 -4
- package/telegram-plugin/tests/sentinel-reply-guard-pretool.test.ts +109 -0
- package/telegram-plugin/tests/silent-end-interrupt-stop-scan.test.ts +118 -0
- package/telegram-plugin/tests/turn-flush-safety.test.ts +41 -0
- package/telegram-plugin/turn-flush-safety.ts +41 -0
- package/telegram-plugin/uat/scenarios/cross-turn-pending-progress-dm.test.ts +2 -1
- package/telegram-plugin/uat/scenarios/jtbd-pending-progress-html-dm.test.ts +2 -1
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { detectSecrets } from '../secret-detect/index.js'
|
|
3
|
+
import { scanGenericSecrets, GENERIC_MIN_DISTINCT } from '../secret-detect/generic-entropy.js'
|
|
4
|
+
import { redact } from '../secret-detect/redact.js'
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Generic bare-high-entropy fallback (#1) — the long-tail detector for
|
|
8
|
+
* standalone tokens that no prefix/KV rule matches (the Sanctum class).
|
|
9
|
+
* Emitted at `ambiguous` confidence: the inbound gate ASKS ("stash to
|
|
10
|
+
* vault or ignore?") rather than auto-deleting, so recall can be generous.
|
|
11
|
+
*
|
|
12
|
+
* Fixtures built by concatenation (no contiguous secret-shaped literals).
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
// 32 varied base62 chars → high entropy (~5 bits/char).
|
|
16
|
+
const HIGH_ENTROPY = 'q7Wm2Zx9' + 'Lk4Rp1Vn' + '8Bs3Yt6H' + 'd5Gj0Fc7'
|
|
17
|
+
// 32 chars but only 3 distinct → low entropy (< 4), must NOT flag.
|
|
18
|
+
const LOW_ENTROPY = 'abc'.repeat(11) // 33 chars, entropy ~1.6
|
|
19
|
+
|
|
20
|
+
describe('generic high-entropy detector', () => {
|
|
21
|
+
it('flags a standalone high-entropy token as ambiguous', () => {
|
|
22
|
+
const hits = detectSecrets(`the value is ${HIGH_ENTROPY} ok`)
|
|
23
|
+
const hit = hits.find((d) => d.rule_id === 'generic_high_entropy')
|
|
24
|
+
expect(hit).toBeDefined()
|
|
25
|
+
expect(hit!.matched_text).toBe(HIGH_ENTROPY)
|
|
26
|
+
expect(hit!.confidence).toBe('ambiguous') // asks, never auto-deletes
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
it('redact() does NOT mask a generic-flagged token (the #2059 outbound-corruption regression)', () => {
|
|
30
|
+
// HIGH_ENTROPY flags as generic_high_entropy (ambiguous). redact() — the
|
|
31
|
+
// chokepoint for the outbound reply mask + history + issues — must leave
|
|
32
|
+
// it intact; masking it would corrupt agent replies. This is the exact
|
|
33
|
+
// BLOCK that shipped to review; pin it.
|
|
34
|
+
const text = `use ${HIGH_ENTROPY} for the deploy`
|
|
35
|
+
expect(redact(text)).toBe(text)
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
it('respects the distinct-char floor (repetitive long strings do not flag)', () => {
|
|
39
|
+
expect(scanGenericSecrets(LOW_ENTROPY).length).toBe(0) // 3 distinct < 18
|
|
40
|
+
expect(GENERIC_MIN_DISTINCT).toBe(18)
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
it('caps hits on pathological input (bounds the O(n²) overlap-dedup)', () => {
|
|
44
|
+
// 100 distinct high-entropy tokens; the scanner must not return all 100.
|
|
45
|
+
const blob = Array.from({ length: 100 }, (_, i) =>
|
|
46
|
+
('q7Wm2Zx9Lk4Rp1Vn8Bs3Yt6H' + 'd5Gj0Fc7') + String(i).padStart(3, '0'),
|
|
47
|
+
).join(' ')
|
|
48
|
+
expect(scanGenericSecrets(blob).length).toBeLessThanOrEqual(20)
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
it('respects the length floor (short tokens do not flag)', () => {
|
|
52
|
+
const short = 'q7Wm2Zx9Lk4Rp1Vn' // 16 chars
|
|
53
|
+
expect(scanGenericSecrets(short).length).toBe(0)
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
it('does NOT downgrade a recognized high-confidence token', () => {
|
|
57
|
+
// A ghp_ token is matched by the anchored pattern (high). The generic
|
|
58
|
+
// pass must not swallow/downgrade it to ambiguous.
|
|
59
|
+
const ghp = 'ghp_' + 'A1b2C3d4E5'.repeat(3) // ghp_ + 30
|
|
60
|
+
const hits = detectSecrets(`token ${ghp} here`)
|
|
61
|
+
const ghpHit = hits.find((d) => d.matched_text === ghp || d.rule_id === 'github_pat_classic')
|
|
62
|
+
expect(ghpHit).toBeDefined()
|
|
63
|
+
expect(ghpHit!.confidence).toBe('high')
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
describe('false-positive guards — benign high-entropy shapes do NOT flag', () => {
|
|
67
|
+
const BENIGN: Array<[string, string]> = [
|
|
68
|
+
['a UUID', '550e8400-e29b-41d4-a716-446655440000'],
|
|
69
|
+
['a git SHA (40 hex)', 'a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0'],
|
|
70
|
+
['a sha256 (64 hex)', 'e3b0c44298fc1c149afbf4c8996fb924' + '27ae41e4649b934ca495991b7852b855'],
|
|
71
|
+
['an md5 (32 hex)', 'd41d8cd98f00b204' + 'e9800998ecf8427e'],
|
|
72
|
+
['a long digit run', '123456789012345678901234567890'],
|
|
73
|
+
['plain prose', 'the quick brown fox jumps over the lazy dog repeatedly today'],
|
|
74
|
+
['a file path', '/usr/local/lib/python3.11/site-packages/somepackage/internal/module.py'],
|
|
75
|
+
// Dense technical identifiers — the FP shapes the reviewer flagged.
|
|
76
|
+
// CamelCase-no-digit → killed by the digit requirement; separator
|
|
77
|
+
// styles (snake/kebab/npm/slug) → broken into sub-28 runs by the
|
|
78
|
+
// charset (no `_ - / .`).
|
|
79
|
+
['a CamelCase class name', 'AbstractSingletonProxyFactoryBeanGenerator'],
|
|
80
|
+
['a snake_case symbol', 'get_user_profile_by_organization_identifier'],
|
|
81
|
+
['a kebab-case slug', 'how-to-configure-kubernetes-ingress-with-cert-manager'],
|
|
82
|
+
['an npm package path', '@babel/plugin-transform-modules-commonjs'],
|
|
83
|
+
['a CSS class string (has a digit)', 'flex-row-justify-between-items-center-gap-4'],
|
|
84
|
+
['a long CamelCase phrase', 'TheQuickBrownFoxJumpsOverTheLazyDogToday'],
|
|
85
|
+
['a 32-char base62 with NO digit', 'AbcdefGhijkLmnopQrstuVwxyzABCDEFG'],
|
|
86
|
+
]
|
|
87
|
+
for (const [label, text] of BENIGN) {
|
|
88
|
+
it(`${label} does not flag generic_high_entropy`, () => {
|
|
89
|
+
const hits = detectSecrets(text).filter((d) => d.rule_id === 'generic_high_entropy')
|
|
90
|
+
expect(hits, `unexpected: ${JSON.stringify(hits.map((h) => h.matched_text))}`).toHaveLength(0)
|
|
91
|
+
})
|
|
92
|
+
}
|
|
93
|
+
})
|
|
94
|
+
})
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { detectSecrets } from '../secret-detect/index.js'
|
|
3
|
+
import { PROVIDER_PATTERNS } from '../secret-detect/patterns.js'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* High-precision provider ruleset (#2 — "use a comprehensive, GitHub-style
|
|
7
|
+
* curated set instead of 22 hand-rolled patterns"). Each fixture token is
|
|
8
|
+
* built by concatenation so the source never holds a contiguous secret-shaped
|
|
9
|
+
* literal (repo Push Protection / no-pii lint).
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
// [rule_id, sample token] — token built to match the rule's regex exactly.
|
|
13
|
+
const HEX10 = 'a1b2c3d4e5'
|
|
14
|
+
const ALNUM10 = 'A1b2C3d4E5'
|
|
15
|
+
const CASES: Array<[string, string]> = [
|
|
16
|
+
['slack_webhook', 'https://hooks.slack.com/services/' + 'T00000000/B00000000/' + 'XXXXXXXXXXXXXXXXXXXXXXXX'],
|
|
17
|
+
['stripe_live_secret', 'sk_' + 'live_' + ALNUM10.repeat(2) + 'ABcd'], // 24 alnum
|
|
18
|
+
['stripe_restricted', 'rk_' + 'live_' + ALNUM10.repeat(2) + 'ABcd'],
|
|
19
|
+
['sendgrid_api_key', 'SG' + '.' + (ALNUM10 + ALNUM10 + 'AB') + '.' + 'a'.repeat(43)], // 22 . 43
|
|
20
|
+
['gitlab_pat', 'glpat-' + ALNUM10.repeat(2)], // 20
|
|
21
|
+
['huggingface_token', 'hf_' + 'a'.repeat(34)],
|
|
22
|
+
['twilio_api_key', 'SK' + HEX10.repeat(3) + 'ab'], // 32 hex
|
|
23
|
+
['mailgun_key', 'key-' + HEX10.repeat(3) + 'ab'],
|
|
24
|
+
['digitalocean_pat', 'dop_v1_' + HEX10.repeat(6) + 'abcd'], // 64 hex
|
|
25
|
+
['linear_api_key', 'lin_api_' + ALNUM10.repeat(4)], // 40
|
|
26
|
+
['shopify_access_token', 'shpat_' + HEX10.repeat(3) + 'ab'],
|
|
27
|
+
['square_access_token', 'sq0atp-' + ALNUM10.repeat(2) + 'AB'], // 22
|
|
28
|
+
['newrelic_key', 'NRAK-' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0'], // 27 A-Z0-9
|
|
29
|
+
['notion_token', 'ntn_' + ALNUM10.repeat(4) + 'A1b2C3'], // 46
|
|
30
|
+
['atlassian_token', 'ATATT' + ALNUM10.repeat(2)],
|
|
31
|
+
['supabase_service_key', 'sbp_' + HEX10.repeat(4)], // 40 hex
|
|
32
|
+
['databricks_token', 'dapi' + HEX10.repeat(3) + 'ab'],
|
|
33
|
+
['aws_temp_access_key', 'ASIA' + 'ABCDEFGHIJKLMNOP'], // 16 A-Z0-9
|
|
34
|
+
['gcp_oauth_token', 'ya29' + '.' + 'a'.repeat(40)],
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
describe('high-precision provider patterns', () => {
|
|
38
|
+
for (const [ruleId, tok] of CASES) {
|
|
39
|
+
it(`detects ${ruleId} as a high-confidence hit`, () => {
|
|
40
|
+
const hits = detectSecrets(`here's the credential: ${tok} — use it`)
|
|
41
|
+
const hit = hits.find((d) => d.rule_id === ruleId)
|
|
42
|
+
expect(hit, `expected a ${ruleId} hit for ${tok.slice(0, 8)}…`).toBeDefined()
|
|
43
|
+
expect(hit!.matched_text).toBe(tok)
|
|
44
|
+
expect(hit!.confidence).toBe('high')
|
|
45
|
+
expect(hit!.suppressed).toBe(false)
|
|
46
|
+
})
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
it('exposes the provider rules through ALL_PATTERNS (so detectSecrets uses them)', () => {
|
|
50
|
+
expect(PROVIDER_PATTERNS.length).toBeGreaterThanOrEqual(25)
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
describe('false-positive guards — ordinary strings must NOT match any provider rule', () => {
|
|
54
|
+
const providerIds = new Set(PROVIDER_PATTERNS.map((p) => p.rule_id))
|
|
55
|
+
const BENIGN: Array<[string, string]> = [
|
|
56
|
+
['a UUID', '550e8400-e29b-41d4-a716-446655440000'],
|
|
57
|
+
['a git SHA', 'a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0'],
|
|
58
|
+
// MD5 split so the SOURCE never holds a contiguous `<32hex>-usN`
|
|
59
|
+
// (which is the Mailchimp shape — GitHub Push Protection flags it).
|
|
60
|
+
['a bare md5 hash', 'checksum ' + ('d41d8cd98f00b204' + 'e9800998ecf8427e') + ' ok'],
|
|
61
|
+
['an md5 + -usN (mailchimp look-alike, must NOT auto-delete)', 'ETag ' + ('d41d8cd98f00b204' + 'e9800998ecf8427e') + '-us' + '1 cached'],
|
|
62
|
+
['plain prose', 'the quick brown fox jumps over the lazy dog 12 times'],
|
|
63
|
+
['a base64 data blob', 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAA'],
|
|
64
|
+
['a hex color + number', 'background #ff00aa width 1024px margin 32'],
|
|
65
|
+
['a long file path', '/usr/local/lib/python3.11/site-packages/somepkg/module.py'],
|
|
66
|
+
]
|
|
67
|
+
for (const [label, text] of BENIGN) {
|
|
68
|
+
it(`${label} triggers no provider rule`, () => {
|
|
69
|
+
const providerHits = detectSecrets(text).filter((d) => providerIds.has(d.rule_id))
|
|
70
|
+
expect(providerHits, `unexpected provider hits: ${JSON.stringify(providerHits.map((h) => h.rule_id))}`).toHaveLength(0)
|
|
71
|
+
})
|
|
72
|
+
}
|
|
73
|
+
})
|
|
74
|
+
})
|
|
@@ -80,14 +80,18 @@ describe('detectSecretsAsync merge', () => {
|
|
|
80
80
|
expect(slackHits[0]!.rule_id).toBe('slack_token')
|
|
81
81
|
})
|
|
82
82
|
|
|
83
|
-
it('
|
|
84
|
-
// Shopify is
|
|
85
|
-
// vendored
|
|
83
|
+
it('detects a Shopify token via the async (Secretlint-augmented) path', async () => {
|
|
84
|
+
// Shopify is now ALSO a vendored PROVIDER_PATTERN (shopify_shared_secret),
|
|
85
|
+
// so on this span the merge prefers the vendored high hit over the
|
|
86
|
+
// Secretlint one — both are valid Shopify classifications. Secretlint
|
|
87
|
+
// remains the fallback for the long tail of providers we don't vendor;
|
|
88
|
+
// this asserts the async path still detects + classifies the token.
|
|
86
89
|
const text = 'SHOPIFY=shpss_1234567890abcdef1234567890abcdef and go'
|
|
87
90
|
const hits = await detectSecretsAsync(text)
|
|
88
91
|
const shopify = hits.find((h) => h.matched_text.startsWith('shpss_'))
|
|
89
92
|
expect(shopify).toBeDefined()
|
|
90
|
-
expect(shopify!.rule_id).toMatch(/
|
|
93
|
+
expect(shopify!.rule_id).toMatch(/shopify/)
|
|
94
|
+
expect(shopify!.confidence).toBe('high')
|
|
91
95
|
})
|
|
92
96
|
|
|
93
97
|
it('produces unique slugs across the merged detection list', async () => {
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for telegram-plugin/hooks/sentinel-reply-guard-pretool.mjs (#2053).
|
|
3
|
+
*
|
|
4
|
+
* Defense-in-depth guard on the reply path: a reply / stream_reply call
|
|
5
|
+
* whose entire payload is only the silent sentinel (NO_REPLY /
|
|
6
|
+
* HEARTBEAT_OK) must be DROPPED before it reaches the Telegram chat,
|
|
7
|
+
* regardless of any nag-loop behaviour upstream.
|
|
8
|
+
*
|
|
9
|
+
* Two layers of coverage:
|
|
10
|
+
* - the pure `isSentinelOnly` predicate (exact-trim match, never a
|
|
11
|
+
* substring of genuine prose);
|
|
12
|
+
* - the hook end-to-end as a child process, asserting the PreToolUse
|
|
13
|
+
* block/allow protocol (decision JSON on stdout).
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { describe, it, expect } from 'vitest'
|
|
17
|
+
import { spawnSync } from 'node:child_process'
|
|
18
|
+
import { resolve } from 'node:path'
|
|
19
|
+
|
|
20
|
+
import { isSentinelOnly } from '../hooks/sentinel-reply-guard-pretool.mjs'
|
|
21
|
+
|
|
22
|
+
const HOOK_PATH = resolve(__dirname, '..', 'hooks', 'sentinel-reply-guard-pretool.mjs')
|
|
23
|
+
|
|
24
|
+
function runHook(event: unknown): { stdout: string; decision?: { decision?: string; reason?: string } } {
|
|
25
|
+
const res = spawnSync('node', [HOOK_PATH], {
|
|
26
|
+
input: JSON.stringify(event),
|
|
27
|
+
encoding: 'utf8',
|
|
28
|
+
})
|
|
29
|
+
const stdout = res.stdout.trim()
|
|
30
|
+
let decision: { decision?: string; reason?: string } | undefined
|
|
31
|
+
if (stdout) {
|
|
32
|
+
try {
|
|
33
|
+
decision = JSON.parse(stdout)
|
|
34
|
+
} catch {
|
|
35
|
+
decision = undefined
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return { stdout, decision }
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const REPLY = 'mcp__switchroom-telegram__reply'
|
|
42
|
+
const STREAM_REPLY = 'mcp__switchroom-telegram__stream_reply'
|
|
43
|
+
|
|
44
|
+
describe('isSentinelOnly — exact-trim, never substring (#2053)', () => {
|
|
45
|
+
it('bare NO_REPLY / HEARTBEAT_OK → true', () => {
|
|
46
|
+
expect(isSentinelOnly('NO_REPLY')).toBe(true)
|
|
47
|
+
expect(isSentinelOnly('HEARTBEAT_OK')).toBe(true)
|
|
48
|
+
expect(isSentinelOnly(' NO_REPLY ')).toBe(true)
|
|
49
|
+
expect(isSentinelOnly('NO_REPLY.')).toBe(true)
|
|
50
|
+
})
|
|
51
|
+
it('repeats of the sentinel → true', () => {
|
|
52
|
+
expect(isSentinelOnly('NO_REPLY\nNO_REPLY')).toBe(true)
|
|
53
|
+
expect(isSentinelOnly('NO_REPLY\nHEARTBEAT_OK\nNO_REPLY')).toBe(true)
|
|
54
|
+
})
|
|
55
|
+
it('(d) genuine prose containing "NO_REPLY" as a substring → false', () => {
|
|
56
|
+
expect(isSentinelOnly('Reply with exactly NO_REPLY if there is nothing to add.')).toBe(false)
|
|
57
|
+
expect(isSentinelOnly('The hook expects NO_REPLY on idle turns — here is your answer.')).toBe(false)
|
|
58
|
+
})
|
|
59
|
+
it('prose then a trailing NO_REPLY line → false (has non-marker content)', () => {
|
|
60
|
+
// The guard is the LAST line of defense and only drops PURE sentinel
|
|
61
|
+
// payloads. A prose+trailing-NO_REPLY blob is handled upstream by the
|
|
62
|
+
// flush gate / Stop-hook scan; if it somehow reaches the reply tool it
|
|
63
|
+
// still carries real prose the user might need, so the guard lets it
|
|
64
|
+
// through rather than silently eating content.
|
|
65
|
+
expect(isSentinelOnly('Here is the summary.\nNO_REPLY')).toBe(false)
|
|
66
|
+
})
|
|
67
|
+
it('non-strings / empty → false', () => {
|
|
68
|
+
expect(isSentinelOnly(undefined as unknown as string)).toBe(false)
|
|
69
|
+
expect(isSentinelOnly('')).toBe(false)
|
|
70
|
+
expect(isSentinelOnly(' ')).toBe(false)
|
|
71
|
+
})
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
describe('sentinel-reply-guard hook — end-to-end (#2053)', () => {
|
|
75
|
+
it('(c) DROPS a sentinel-only reply payload', () => {
|
|
76
|
+
const { decision } = runHook({ tool_name: REPLY, tool_input: { text: 'NO_REPLY' } })
|
|
77
|
+
expect(decision?.decision).toBe('block')
|
|
78
|
+
expect(decision?.reason).toMatch(/sentinel|NO_REPLY/i)
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it('DROPS a sentinel-only stream_reply payload (repeated markers)', () => {
|
|
82
|
+
const { decision } = runHook({ tool_name: STREAM_REPLY, tool_input: { text: 'NO_REPLY\nNO_REPLY' } })
|
|
83
|
+
expect(decision?.decision).toBe('block')
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
it('(d) ALLOWS a real reply that mentions NO_REPLY inside prose', () => {
|
|
87
|
+
const { stdout } = runHook({
|
|
88
|
+
tool_name: REPLY,
|
|
89
|
+
tool_input: { text: 'If nothing is pending, reply with exactly NO_REPLY — otherwise summarise.' },
|
|
90
|
+
})
|
|
91
|
+
expect(stdout).toBe('')
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
it('ALLOWS an ordinary reply', () => {
|
|
95
|
+
const { stdout } = runHook({ tool_name: REPLY, tool_input: { text: 'Done — the build is green.' } })
|
|
96
|
+
expect(stdout).toBe('')
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
it('ignores non-reply tools entirely', () => {
|
|
100
|
+
const { stdout } = runHook({ tool_name: 'Bash', tool_input: { command: 'NO_REPLY' } })
|
|
101
|
+
expect(stdout).toBe('')
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
it('fails open on malformed / empty stdin', () => {
|
|
105
|
+
const res = spawnSync('node', [HOOK_PATH], { input: '', encoding: 'utf8' })
|
|
106
|
+
expect(res.status).toBe(0)
|
|
107
|
+
expect(res.stdout.trim()).toBe('')
|
|
108
|
+
})
|
|
109
|
+
})
|
|
@@ -22,6 +22,7 @@ import { describe, it, expect } from 'vitest'
|
|
|
22
22
|
import {
|
|
23
23
|
scanTurnForFinalReply,
|
|
24
24
|
isFinalAnswerReply,
|
|
25
|
+
endsWithSilentMarker,
|
|
25
26
|
} from '../hooks/silent-end-scan.mjs'
|
|
26
27
|
|
|
27
28
|
// ── Fixture builders ────────────────────────────────────────────────
|
|
@@ -32,6 +33,13 @@ const ENQUEUE = JSON.stringify({
|
|
|
32
33
|
content: '<channel source="switchroom-telegram" chat_id="111" message_id="42">hi</channel>',
|
|
33
34
|
})
|
|
34
35
|
|
|
36
|
+
// Cron-fired turn: the enqueue envelope carries `source="cron"` (#2053).
|
|
37
|
+
const ENQUEUE_CRON = JSON.stringify({
|
|
38
|
+
type: 'queue-operation',
|
|
39
|
+
operation: 'enqueue',
|
|
40
|
+
content: '<channel source="cron" chat_id="111" message_thread_id="7">Time for the digest</channel>',
|
|
41
|
+
})
|
|
42
|
+
|
|
35
43
|
function assistantToolUse(name: string, input: Record<string, unknown>, opts: { isSidechain?: boolean } = {}) {
|
|
36
44
|
const base = {
|
|
37
45
|
type: 'assistant',
|
|
@@ -312,3 +320,113 @@ describe('scanTurnForFinalReply — malformed input tolerance', () => {
|
|
|
312
320
|
expect(scanTurnForFinalReply(text).decided).toBe('block')
|
|
313
321
|
})
|
|
314
322
|
})
|
|
323
|
+
|
|
324
|
+
// ── #2053 — endsWithSilentMarker helper ─────────────────────────────
|
|
325
|
+
|
|
326
|
+
describe('endsWithSilentMarker (#2053)', () => {
|
|
327
|
+
it('bare marker (whole string) → true', () => {
|
|
328
|
+
expect(endsWithSilentMarker('NO_REPLY')).toBe(true)
|
|
329
|
+
expect(endsWithSilentMarker('HEARTBEAT_OK')).toBe(true)
|
|
330
|
+
})
|
|
331
|
+
it('prose then trailing bare NO_REPLY → true', () => {
|
|
332
|
+
expect(endsWithSilentMarker('Nothing actionable in the digest today.\nNO_REPLY')).toBe(true)
|
|
333
|
+
expect(endsWithSilentMarker('Long\nmulti-line\nsummary.\nHEARTBEAT_OK')).toBe(true)
|
|
334
|
+
})
|
|
335
|
+
it('trailing marker with stray punctuation → true', () => {
|
|
336
|
+
expect(endsWithSilentMarker('done reviewing.\nNO_REPLY.')).toBe(true)
|
|
337
|
+
})
|
|
338
|
+
it('marker buried mid-output with real content after → false', () => {
|
|
339
|
+
expect(endsWithSilentMarker('NO_REPLY\nThe answer is 42.')).toBe(false)
|
|
340
|
+
})
|
|
341
|
+
// Documents the intentional divergence from the TS-side helper: this
|
|
342
|
+
// .mjs uses SILENT_MARKER_RE directly (unlimited trailing punctuation),
|
|
343
|
+
// whereas turn-flush-safety.ts delegates to the length-capped,
|
|
344
|
+
// single-punct isSilentFlushMarker. This side is deliberately the more
|
|
345
|
+
// permissive of the two — extra leniency only ever suppresses more.
|
|
346
|
+
it('trailing marker with multiple punctuation chars → true (more permissive than TS side)', () => {
|
|
347
|
+
expect(endsWithSilentMarker('all quiet.\nNO_REPLY...')).toBe(true)
|
|
348
|
+
expect(endsWithSilentMarker('NO_REPLY!!!')).toBe(true)
|
|
349
|
+
expect(endsWithSilentMarker('NO_REPLY?!')).toBe(true)
|
|
350
|
+
})
|
|
351
|
+
it('genuine prose mentioning NO_REPLY as a substring → false', () => {
|
|
352
|
+
expect(endsWithSilentMarker('reply with exactly NO_REPLY if there is nothing to add')).toBe(false)
|
|
353
|
+
})
|
|
354
|
+
it('non-strings / empty → false', () => {
|
|
355
|
+
expect(endsWithSilentMarker(undefined)).toBe(false)
|
|
356
|
+
expect(endsWithSilentMarker('')).toBe(false)
|
|
357
|
+
expect(endsWithSilentMarker(' \n ')).toBe(false)
|
|
358
|
+
})
|
|
359
|
+
})
|
|
360
|
+
|
|
361
|
+
// ── #2053 — prose-then-trailing-NO_REPLY recognised as silent ───────
|
|
362
|
+
|
|
363
|
+
describe('scanTurnForFinalReply — trailing NO_REPLY is a valid silent end (#2053)', () => {
|
|
364
|
+
it('(a) plain assistant TEXT ending with a trailing bare NO_REPLY → allow', () => {
|
|
365
|
+
// The exact #2053 leak shape: the model wrote prose then a bare
|
|
366
|
+
// NO_REPLY as plain transcript text (NOT through the reply tool).
|
|
367
|
+
// Pre-fix this matched nothing → block → nag → sentinel leak.
|
|
368
|
+
const text = jsonl(
|
|
369
|
+
ENQUEUE,
|
|
370
|
+
assistantText("Reviewed the overnight digest — nothing needs your attention.\nNO_REPLY"),
|
|
371
|
+
)
|
|
372
|
+
const r = scanTurnForFinalReply(text)
|
|
373
|
+
expect(r.decided).toBe('allow')
|
|
374
|
+
expect(r.reason).toBe('silent-marker-text')
|
|
375
|
+
})
|
|
376
|
+
|
|
377
|
+
it('reply-tool payload of prose+trailing NO_REPLY → allow (silent-marker)', () => {
|
|
378
|
+
const text = jsonl(
|
|
379
|
+
ENQUEUE,
|
|
380
|
+
assistantToolUse('mcp__switchroom-telegram__reply', {
|
|
381
|
+
text: 'Checked the build — all green.\nNO_REPLY',
|
|
382
|
+
disable_notification: true,
|
|
383
|
+
}),
|
|
384
|
+
)
|
|
385
|
+
const r = scanTurnForFinalReply(text)
|
|
386
|
+
expect(r.decided).toBe('allow')
|
|
387
|
+
expect(r.reason).toBe('silent-marker')
|
|
388
|
+
})
|
|
389
|
+
|
|
390
|
+
it('plain text NOT ending with a marker → still block', () => {
|
|
391
|
+
const text = jsonl(
|
|
392
|
+
ENQUEUE,
|
|
393
|
+
assistantText('Here is my real answer that I forgot to send via the reply tool.'),
|
|
394
|
+
)
|
|
395
|
+
expect(scanTurnForFinalReply(text).decided).toBe('block')
|
|
396
|
+
})
|
|
397
|
+
})
|
|
398
|
+
|
|
399
|
+
// ── #2053 — cron-source turns skip the nag ──────────────────────────
|
|
400
|
+
|
|
401
|
+
describe('scanTurnForFinalReply — cron-source turns skip the nag (#2053)', () => {
|
|
402
|
+
it('(b) cron turn with no qualifying reply → allow (cron-source), not block', () => {
|
|
403
|
+
const text = jsonl(
|
|
404
|
+
ENQUEUE_CRON,
|
|
405
|
+
assistantText('Ran the scheduled check. Nothing to report.'),
|
|
406
|
+
)
|
|
407
|
+
const r = scanTurnForFinalReply(text)
|
|
408
|
+
expect(r.decided).toBe('allow')
|
|
409
|
+
expect(r.reason).toBe('cron-source')
|
|
410
|
+
})
|
|
411
|
+
|
|
412
|
+
it('cron turn that DID send a real reply → allow (final-reply), reply still wins', () => {
|
|
413
|
+
const text = jsonl(
|
|
414
|
+
ENQUEUE_CRON,
|
|
415
|
+
assistantToolUse('mcp__switchroom-telegram__reply', {
|
|
416
|
+
text: 'Daily digest: 3 PRs merged, 1 incident.',
|
|
417
|
+
disable_notification: false,
|
|
418
|
+
}),
|
|
419
|
+
)
|
|
420
|
+
const r = scanTurnForFinalReply(text)
|
|
421
|
+
expect(r.decided).toBe('allow')
|
|
422
|
+
expect(r.reason).toBe('final-reply')
|
|
423
|
+
})
|
|
424
|
+
|
|
425
|
+
it('non-cron (telegram) turn with no reply → still blocks (cron carve-out scoped)', () => {
|
|
426
|
+
const text = jsonl(
|
|
427
|
+
ENQUEUE,
|
|
428
|
+
assistantText('I forgot to send my answer.'),
|
|
429
|
+
)
|
|
430
|
+
expect(scanTurnForFinalReply(text).decided).toBe('block')
|
|
431
|
+
})
|
|
432
|
+
})
|
|
@@ -18,6 +18,7 @@ import {
|
|
|
18
18
|
decideTurnFlush,
|
|
19
19
|
isSilentFlushMarker,
|
|
20
20
|
isCompositeSilentNoise,
|
|
21
|
+
endsWithSilentMarker,
|
|
21
22
|
isTurnFlushSafetyEnabled,
|
|
22
23
|
} from '../turn-flush-safety.js'
|
|
23
24
|
|
|
@@ -68,6 +69,46 @@ describe('decideTurnFlush — composite silent noise is skipped, not leaked', ()
|
|
|
68
69
|
})
|
|
69
70
|
})
|
|
70
71
|
|
|
72
|
+
describe('endsWithSilentMarker — prose+trailing-sentinel recognition (#2053)', () => {
|
|
73
|
+
it('recognises prose followed by a trailing bare NO_REPLY line', () => {
|
|
74
|
+
expect(endsWithSilentMarker('Nothing actionable in the digest.\nNO_REPLY')).toBe(true)
|
|
75
|
+
expect(endsWithSilentMarker('Build is green.\nHEARTBEAT_OK')).toBe(true)
|
|
76
|
+
})
|
|
77
|
+
it('tolerates a single trailing punctuation on the marker', () => {
|
|
78
|
+
expect(endsWithSilentMarker('done.\nNO_REPLY.')).toBe(true)
|
|
79
|
+
})
|
|
80
|
+
it('does NOT match when real content follows the marker', () => {
|
|
81
|
+
expect(endsWithSilentMarker('NO_REPLY\nThe answer is 42.')).toBe(false)
|
|
82
|
+
})
|
|
83
|
+
it('does NOT match a marker mentioned inside genuine prose', () => {
|
|
84
|
+
expect(endsWithSilentMarker('reply with exactly NO_REPLY when nothing to add')).toBe(false)
|
|
85
|
+
})
|
|
86
|
+
it('handles non-strings / empty safely', () => {
|
|
87
|
+
expect(endsWithSilentMarker(undefined)).toBe(false)
|
|
88
|
+
expect(endsWithSilentMarker('')).toBe(false)
|
|
89
|
+
expect(endsWithSilentMarker(' \n ')).toBe(false)
|
|
90
|
+
})
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
describe('decideTurnFlush — prose+trailing-sentinel is suppressed, not leaked (#2053)', () => {
|
|
94
|
+
it('skips a cron-style "prose\\nNO_REPLY" blob (the #2053 leak)', () => {
|
|
95
|
+
const d = decideTurnFlush({
|
|
96
|
+
chatId: '12345',
|
|
97
|
+
replyCalled: false,
|
|
98
|
+
capturedText: ['Reviewed the overnight digest — nothing needs your attention.', 'NO_REPLY'],
|
|
99
|
+
})
|
|
100
|
+
expect(d).toEqual({ kind: 'skip', reason: 'silent-marker' })
|
|
101
|
+
})
|
|
102
|
+
it('still flushes a real answer whose last line is NOT a sentinel', () => {
|
|
103
|
+
const d = decideTurnFlush({
|
|
104
|
+
chatId: '12345',
|
|
105
|
+
replyCalled: false,
|
|
106
|
+
capturedText: ['Here is the summary.', 'Three stories, all low priority.'],
|
|
107
|
+
})
|
|
108
|
+
expect(d.kind).toBe('flush')
|
|
109
|
+
})
|
|
110
|
+
})
|
|
111
|
+
|
|
71
112
|
describe('decideTurnFlush', () => {
|
|
72
113
|
it('(a) does NOT flush when the reply tool was called', () => {
|
|
73
114
|
const decision = decideTurnFlush({
|
|
@@ -92,6 +92,42 @@ export function isCompositeSilentNoise(text: string | undefined): boolean {
|
|
|
92
92
|
return lines.every(l => isSilentFlushMarker(l) || isTrivialConfirmationLine(l))
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
+
/**
|
|
96
|
+
* Recognise output whose final non-empty line is a bare silent marker
|
|
97
|
+
* (NO_REPLY / HEARTBEAT_OK, with the same single-trailing-punctuation
|
|
98
|
+
* tolerance as `isSilentFlushMarker`), regardless of what precedes it.
|
|
99
|
+
*
|
|
100
|
+
* This closes #2053: a turn (commonly a cron turn) that emits prose
|
|
101
|
+
* followed by a bare `NO_REPLY` line — e.g.
|
|
102
|
+
* "Nothing actionable in today's digest.\nNO_REPLY"
|
|
103
|
+
* — is the model explicitly signalling "intentionally silent". The
|
|
104
|
+
* single-line `isSilentFlushMarker` misses it (multi-line, over the
|
|
105
|
+
* length guard) and `isCompositeSilentNoise` misses it too (the prose
|
|
106
|
+
* line is neither a marker nor a trivial confirmation), so the blob
|
|
107
|
+
* would otherwise flush to chat WITH the sentinel text appended.
|
|
108
|
+
*
|
|
109
|
+
* The trailing-marker line itself is the explicit silence signal — when
|
|
110
|
+
* the model deliberately terminates with NO_REPLY it means "do not
|
|
111
|
+
* deliver this turn", so we suppress the whole blob rather than strip
|
|
112
|
+
* the sentinel and flush the prose. Stripping-and-flushing would defeat
|
|
113
|
+
* the model's intent (it chose silence) and re-introduce the exact
|
|
114
|
+
* surprise-message problem the flush safety net was built to avoid.
|
|
115
|
+
*
|
|
116
|
+
* Requires the LAST line to be the marker — a marker buried mid-output
|
|
117
|
+
* with real content after it (e.g. "NO_REPLY\nThe answer is 42.") is
|
|
118
|
+
* NOT suppressed, because the trailing content is the model's actual
|
|
119
|
+
* message.
|
|
120
|
+
*/
|
|
121
|
+
export function endsWithSilentMarker(text: string | undefined): boolean {
|
|
122
|
+
if (typeof text !== 'string') return false
|
|
123
|
+
const lines = text
|
|
124
|
+
.split('\n')
|
|
125
|
+
.map(l => l.trim())
|
|
126
|
+
.filter(l => l.length > 0)
|
|
127
|
+
if (lines.length === 0) return false
|
|
128
|
+
return isSilentFlushMarker(lines[lines.length - 1])
|
|
129
|
+
}
|
|
130
|
+
|
|
95
131
|
export type FlushDecision =
|
|
96
132
|
| { kind: 'flush'; text: string }
|
|
97
133
|
| { kind: 'skip'; reason: FlushSkipReason }
|
|
@@ -162,6 +198,11 @@ export function decideTurnFlush(input: FlushDecisionInput): FlushDecision {
|
|
|
162
198
|
// misses it (multi-line, over the length guard); without this the blob
|
|
163
199
|
// leaks to chat as a visible message.
|
|
164
200
|
if (isCompositeSilentNoise(joined)) return { kind: 'skip', reason: 'silent-marker' }
|
|
201
|
+
// Prose followed by a trailing bare NO_REPLY / HEARTBEAT_OK line (#2053).
|
|
202
|
+
// The model wrote content but explicitly terminated with the silence
|
|
203
|
+
// sentinel — treat the whole turn as intentionally silent rather than
|
|
204
|
+
// flush the prose with the sentinel glued on.
|
|
205
|
+
if (endsWithSilentMarker(joined)) return { kind: 'skip', reason: 'silent-marker' }
|
|
165
206
|
return { kind: 'flush', text: joined }
|
|
166
207
|
}
|
|
167
208
|
|
|
@@ -81,7 +81,8 @@ interface TrailEntry {
|
|
|
81
81
|
text: string;
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
-
const SUFFIX_RE =
|
|
84
|
+
const SUFFIX_RE =
|
|
85
|
+
/\n\n— still working \(\d+m\)( · message me anytime, I'll keep you posted)?$/;
|
|
85
86
|
|
|
86
87
|
function pad(s: string, n: number): string {
|
|
87
88
|
return s.length >= n ? s : s + " ".repeat(n - s.length);
|
|
@@ -40,7 +40,8 @@ const PROMPT =
|
|
|
40
40
|
`the bash, send that one HTML reply, end your turn. When it finishes ` +
|
|
41
41
|
`much later, reply with the single word "done".`;
|
|
42
42
|
|
|
43
|
-
const SUFFIX_RE =
|
|
43
|
+
const SUFFIX_RE =
|
|
44
|
+
/\n\n— still working \(\d+m\)( · message me anytime, I'll keep you posted)?$/;
|
|
44
45
|
|
|
45
46
|
describe("uat: pending-progress edit preserves HTML formatting (#1698 regression gate)", () => {
|
|
46
47
|
it(
|