switchroom 0.14.29 → 0.14.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { detectSecrets } from '../secret-detect/index.js'
3
+ import { scanGenericSecrets, GENERIC_MIN_DISTINCT } from '../secret-detect/generic-entropy.js'
4
+ import { redact } from '../secret-detect/redact.js'
5
+
6
+ /**
7
+ * Generic bare-high-entropy fallback (#1) — the long-tail detector for
8
+ * standalone tokens that no prefix/KV rule matches (the Sanctum class).
9
+ * Emitted at `ambiguous` confidence: the inbound gate ASKS ("stash to
10
+ * vault or ignore?") rather than auto-deleting, so recall can be generous.
11
+ *
12
+ * Fixtures built by concatenation (no contiguous secret-shaped literals).
13
+ */
14
+
15
+ // 32 varied base62 chars → high entropy (~5 bits/char).
16
+ const HIGH_ENTROPY = 'q7Wm2Zx9' + 'Lk4Rp1Vn' + '8Bs3Yt6H' + 'd5Gj0Fc7'
17
+ // 32 chars but only 3 distinct → low entropy (< 4), must NOT flag.
18
+ const LOW_ENTROPY = 'abc'.repeat(11) // 33 chars, entropy ~1.6
19
+
20
+ describe('generic high-entropy detector', () => {
21
+ it('flags a standalone high-entropy token as ambiguous', () => {
22
+ const hits = detectSecrets(`the value is ${HIGH_ENTROPY} ok`)
23
+ const hit = hits.find((d) => d.rule_id === 'generic_high_entropy')
24
+ expect(hit).toBeDefined()
25
+ expect(hit!.matched_text).toBe(HIGH_ENTROPY)
26
+ expect(hit!.confidence).toBe('ambiguous') // asks, never auto-deletes
27
+ })
28
+
29
+ it('redact() does NOT mask a generic-flagged token (the #2059 outbound-corruption regression)', () => {
30
+ // HIGH_ENTROPY flags as generic_high_entropy (ambiguous). redact() — the
31
+ // chokepoint for the outbound reply mask + history + issues — must leave
32
+ // it intact; masking it would corrupt agent replies. This is the exact
33
+ // BLOCK that shipped to review; pin it.
34
+ const text = `use ${HIGH_ENTROPY} for the deploy`
35
+ expect(redact(text)).toBe(text)
36
+ })
37
+
38
+ it('respects the distinct-char floor (repetitive long strings do not flag)', () => {
39
+ expect(scanGenericSecrets(LOW_ENTROPY).length).toBe(0) // 3 distinct < 18
40
+ expect(GENERIC_MIN_DISTINCT).toBe(18)
41
+ })
42
+
43
+ it('caps hits on pathological input (bounds the O(n²) overlap-dedup)', () => {
44
+ // 100 distinct high-entropy tokens; the scanner must not return all 100.
45
+ const blob = Array.from({ length: 100 }, (_, i) =>
46
+ ('q7Wm2Zx9Lk4Rp1Vn8Bs3Yt6H' + 'd5Gj0Fc7') + String(i).padStart(3, '0'),
47
+ ).join(' ')
48
+ expect(scanGenericSecrets(blob).length).toBeLessThanOrEqual(20)
49
+ })
50
+
51
+ it('respects the length floor (short tokens do not flag)', () => {
52
+ const short = 'q7Wm2Zx9Lk4Rp1Vn' // 16 chars
53
+ expect(scanGenericSecrets(short).length).toBe(0)
54
+ })
55
+
56
+ it('does NOT downgrade a recognized high-confidence token', () => {
57
+ // A ghp_ token is matched by the anchored pattern (high). The generic
58
+ // pass must not swallow/downgrade it to ambiguous.
59
+ const ghp = 'ghp_' + 'A1b2C3d4E5'.repeat(3) // ghp_ + 30
60
+ const hits = detectSecrets(`token ${ghp} here`)
61
+ const ghpHit = hits.find((d) => d.matched_text === ghp || d.rule_id === 'github_pat_classic')
62
+ expect(ghpHit).toBeDefined()
63
+ expect(ghpHit!.confidence).toBe('high')
64
+ })
65
+
66
+ describe('false-positive guards — benign high-entropy shapes do NOT flag', () => {
67
+ const BENIGN: Array<[string, string]> = [
68
+ ['a UUID', '550e8400-e29b-41d4-a716-446655440000'],
69
+ ['a git SHA (40 hex)', 'a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0'],
70
+ ['a sha256 (64 hex)', 'e3b0c44298fc1c149afbf4c8996fb924' + '27ae41e4649b934ca495991b7852b855'],
71
+ ['an md5 (32 hex)', 'd41d8cd98f00b204' + 'e9800998ecf8427e'],
72
+ ['a long digit run', '123456789012345678901234567890'],
73
+ ['plain prose', 'the quick brown fox jumps over the lazy dog repeatedly today'],
74
+ ['a file path', '/usr/local/lib/python3.11/site-packages/somepackage/internal/module.py'],
75
+ // Dense technical identifiers — the FP shapes the reviewer flagged.
76
+ // CamelCase-no-digit → killed by the digit requirement; separator
77
+ // styles (snake/kebab/npm/slug) → broken into sub-28 runs by the
78
+ // charset (no `_ - / .`).
79
+ ['a CamelCase class name', 'AbstractSingletonProxyFactoryBeanGenerator'],
80
+ ['a snake_case symbol', 'get_user_profile_by_organization_identifier'],
81
+ ['a kebab-case slug', 'how-to-configure-kubernetes-ingress-with-cert-manager'],
82
+ ['an npm package path', '@babel/plugin-transform-modules-commonjs'],
83
+ ['a CSS class string (has a digit)', 'flex-row-justify-between-items-center-gap-4'],
84
+ ['a long CamelCase phrase', 'TheQuickBrownFoxJumpsOverTheLazyDogToday'],
85
+ ['a 32-char base62 with NO digit', 'AbcdefGhijkLmnopQrstuVwxyzABCDEFG'],
86
+ ]
87
+ for (const [label, text] of BENIGN) {
88
+ it(`${label} does not flag generic_high_entropy`, () => {
89
+ const hits = detectSecrets(text).filter((d) => d.rule_id === 'generic_high_entropy')
90
+ expect(hits, `unexpected: ${JSON.stringify(hits.map((h) => h.matched_text))}`).toHaveLength(0)
91
+ })
92
+ }
93
+ })
94
+ })
@@ -0,0 +1,74 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { detectSecrets } from '../secret-detect/index.js'
3
+ import { PROVIDER_PATTERNS } from '../secret-detect/patterns.js'
4
+
5
+ /**
6
+ * High-precision provider ruleset (#2 — "use a comprehensive, GitHub-style
7
+ * curated set instead of 22 hand-rolled patterns"). Each fixture token is
8
+ * built by concatenation so the source never holds a contiguous secret-shaped
9
+ * literal (repo Push Protection / no-pii lint).
10
+ */
11
+
12
+ // [rule_id, sample token] — token built to match the rule's regex exactly.
13
+ const HEX10 = 'a1b2c3d4e5'
14
+ const ALNUM10 = 'A1b2C3d4E5'
15
+ const CASES: Array<[string, string]> = [
16
+ ['slack_webhook', 'https://hooks.slack.com/services/' + 'T00000000/B00000000/' + 'XXXXXXXXXXXXXXXXXXXXXXXX'],
17
+ ['stripe_live_secret', 'sk_' + 'live_' + ALNUM10.repeat(2) + 'ABcd'], // 24 alnum
18
+ ['stripe_restricted', 'rk_' + 'live_' + ALNUM10.repeat(2) + 'ABcd'],
19
+ ['sendgrid_api_key', 'SG' + '.' + (ALNUM10 + ALNUM10 + 'AB') + '.' + 'a'.repeat(43)], // 22 . 43
20
+ ['gitlab_pat', 'glpat-' + ALNUM10.repeat(2)], // 20
21
+ ['huggingface_token', 'hf_' + 'a'.repeat(34)],
22
+ ['twilio_api_key', 'SK' + HEX10.repeat(3) + 'ab'], // 32 hex
23
+ ['mailgun_key', 'key-' + HEX10.repeat(3) + 'ab'],
24
+ ['digitalocean_pat', 'dop_v1_' + HEX10.repeat(6) + 'abcd'], // 64 hex
25
+ ['linear_api_key', 'lin_api_' + ALNUM10.repeat(4)], // 40
26
+ ['shopify_access_token', 'shpat_' + HEX10.repeat(3) + 'ab'],
27
+ ['square_access_token', 'sq0atp-' + ALNUM10.repeat(2) + 'AB'], // 22
28
+ ['newrelic_key', 'NRAK-' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0'], // 27 A-Z0-9
29
+ ['notion_token', 'ntn_' + ALNUM10.repeat(4) + 'A1b2C3'], // 46
30
+ ['atlassian_token', 'ATATT' + ALNUM10.repeat(2)],
31
+ ['supabase_service_key', 'sbp_' + HEX10.repeat(4)], // 40 hex
32
+ ['databricks_token', 'dapi' + HEX10.repeat(3) + 'ab'],
33
+ ['aws_temp_access_key', 'ASIA' + 'ABCDEFGHIJKLMNOP'], // 16 A-Z0-9
34
+ ['gcp_oauth_token', 'ya29' + '.' + 'a'.repeat(40)],
35
+ ]
36
+
37
+ describe('high-precision provider patterns', () => {
38
+ for (const [ruleId, tok] of CASES) {
39
+ it(`detects ${ruleId} as a high-confidence hit`, () => {
40
+ const hits = detectSecrets(`here's the credential: ${tok} — use it`)
41
+ const hit = hits.find((d) => d.rule_id === ruleId)
42
+ expect(hit, `expected a ${ruleId} hit for ${tok.slice(0, 8)}…`).toBeDefined()
43
+ expect(hit!.matched_text).toBe(tok)
44
+ expect(hit!.confidence).toBe('high')
45
+ expect(hit!.suppressed).toBe(false)
46
+ })
47
+ }
48
+
49
+ it('exposes the provider rules through ALL_PATTERNS (so detectSecrets uses them)', () => {
50
+ expect(PROVIDER_PATTERNS.length).toBeGreaterThanOrEqual(25)
51
+ })
52
+
53
+ describe('false-positive guards — ordinary strings must NOT match any provider rule', () => {
54
+ const providerIds = new Set(PROVIDER_PATTERNS.map((p) => p.rule_id))
55
+ const BENIGN: Array<[string, string]> = [
56
+ ['a UUID', '550e8400-e29b-41d4-a716-446655440000'],
57
+ ['a git SHA', 'a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0'],
58
+ // MD5 split so the SOURCE never holds a contiguous `<32hex>-usN`
59
+ // (which is the Mailchimp shape — GitHub Push Protection flags it).
60
+ ['a bare md5 hash', 'checksum ' + ('d41d8cd98f00b204' + 'e9800998ecf8427e') + ' ok'],
61
+ ['an md5 + -usN (mailchimp look-alike, must NOT auto-delete)', 'ETag ' + ('d41d8cd98f00b204' + 'e9800998ecf8427e') + '-us' + '1 cached'],
62
+ ['plain prose', 'the quick brown fox jumps over the lazy dog 12 times'],
63
+ ['a base64 data blob', 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAA'],
64
+ ['a hex color + number', 'background #ff00aa width 1024px margin 32'],
65
+ ['a long file path', '/usr/local/lib/python3.11/site-packages/somepkg/module.py'],
66
+ ]
67
+ for (const [label, text] of BENIGN) {
68
+ it(`${label} triggers no provider rule`, () => {
69
+ const providerHits = detectSecrets(text).filter((d) => providerIds.has(d.rule_id))
70
+ expect(providerHits, `unexpected provider hits: ${JSON.stringify(providerHits.map((h) => h.rule_id))}`).toHaveLength(0)
71
+ })
72
+ }
73
+ })
74
+ })
@@ -80,14 +80,18 @@ describe('detectSecretsAsync merge', () => {
80
80
  expect(slackHits[0]!.rule_id).toBe('slack_token')
81
81
  })
82
82
 
83
- it('adds Secretlint-only hits for providers the vendored list misses', async () => {
84
- // Shopify is covered by Secretlint preset-recommend but not by our
85
- // vendored ANCHORED_PATTERNS.
83
+ it('detects a Shopify token via the async (Secretlint-augmented) path', async () => {
84
+ // Shopify is now ALSO a vendored PROVIDER_PATTERN (shopify_shared_secret),
85
+ // so on this span the merge prefers the vendored high hit over the
86
+ // Secretlint one — both are valid Shopify classifications. Secretlint
87
+ // remains the fallback for the long tail of providers we don't vendor;
88
+ // this asserts the async path still detects + classifies the token.
86
89
  const text = 'SHOPIFY=shpss_1234567890abcdef1234567890abcdef and go'
87
90
  const hits = await detectSecretsAsync(text)
88
91
  const shopify = hits.find((h) => h.matched_text.startsWith('shpss_'))
89
92
  expect(shopify).toBeDefined()
90
- expect(shopify!.rule_id).toMatch(/secretlint_shopify/)
93
+ expect(shopify!.rule_id).toMatch(/shopify/)
94
+ expect(shopify!.confidence).toBe('high')
91
95
  })
92
96
 
93
97
  it('produces unique slugs across the merged detection list', async () => {
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Tests for telegram-plugin/hooks/sentinel-reply-guard-pretool.mjs (#2053).
3
+ *
4
+ * Defense-in-depth guard on the reply path: a reply / stream_reply call
5
+ * whose entire payload is only the silent sentinel (NO_REPLY /
6
+ * HEARTBEAT_OK) must be DROPPED before it reaches the Telegram chat,
7
+ * regardless of any nag-loop behaviour upstream.
8
+ *
9
+ * Two layers of coverage:
10
+ * - the pure `isSentinelOnly` predicate (exact-trim match, never a
11
+ * substring of genuine prose);
12
+ * - the hook end-to-end as a child process, asserting the PreToolUse
13
+ * block/allow protocol (decision JSON on stdout).
14
+ */
15
+
16
+ import { describe, it, expect } from 'vitest'
17
+ import { spawnSync } from 'node:child_process'
18
+ import { resolve } from 'node:path'
19
+
20
+ import { isSentinelOnly } from '../hooks/sentinel-reply-guard-pretool.mjs'
21
+
22
+ const HOOK_PATH = resolve(__dirname, '..', 'hooks', 'sentinel-reply-guard-pretool.mjs')
23
+
24
+ function runHook(event: unknown): { stdout: string; decision?: { decision?: string; reason?: string } } {
25
+ const res = spawnSync('node', [HOOK_PATH], {
26
+ input: JSON.stringify(event),
27
+ encoding: 'utf8',
28
+ })
29
+ const stdout = res.stdout.trim()
30
+ let decision: { decision?: string; reason?: string } | undefined
31
+ if (stdout) {
32
+ try {
33
+ decision = JSON.parse(stdout)
34
+ } catch {
35
+ decision = undefined
36
+ }
37
+ }
38
+ return { stdout, decision }
39
+ }
40
+
41
+ const REPLY = 'mcp__switchroom-telegram__reply'
42
+ const STREAM_REPLY = 'mcp__switchroom-telegram__stream_reply'
43
+
44
+ describe('isSentinelOnly — exact-trim, never substring (#2053)', () => {
45
+ it('bare NO_REPLY / HEARTBEAT_OK → true', () => {
46
+ expect(isSentinelOnly('NO_REPLY')).toBe(true)
47
+ expect(isSentinelOnly('HEARTBEAT_OK')).toBe(true)
48
+ expect(isSentinelOnly(' NO_REPLY ')).toBe(true)
49
+ expect(isSentinelOnly('NO_REPLY.')).toBe(true)
50
+ })
51
+ it('repeats of the sentinel → true', () => {
52
+ expect(isSentinelOnly('NO_REPLY\nNO_REPLY')).toBe(true)
53
+ expect(isSentinelOnly('NO_REPLY\nHEARTBEAT_OK\nNO_REPLY')).toBe(true)
54
+ })
55
+ it('(d) genuine prose containing "NO_REPLY" as a substring → false', () => {
56
+ expect(isSentinelOnly('Reply with exactly NO_REPLY if there is nothing to add.')).toBe(false)
57
+ expect(isSentinelOnly('The hook expects NO_REPLY on idle turns — here is your answer.')).toBe(false)
58
+ })
59
+ it('prose then a trailing NO_REPLY line → false (has non-marker content)', () => {
60
+ // The guard is the LAST line of defense and only drops PURE sentinel
61
+ // payloads. A prose+trailing-NO_REPLY blob is handled upstream by the
62
+ // flush gate / Stop-hook scan; if it somehow reaches the reply tool it
63
+ // still carries real prose the user might need, so the guard lets it
64
+ // through rather than silently eating content.
65
+ expect(isSentinelOnly('Here is the summary.\nNO_REPLY')).toBe(false)
66
+ })
67
+ it('non-strings / empty → false', () => {
68
+ expect(isSentinelOnly(undefined as unknown as string)).toBe(false)
69
+ expect(isSentinelOnly('')).toBe(false)
70
+ expect(isSentinelOnly(' ')).toBe(false)
71
+ })
72
+ })
73
+
74
+ describe('sentinel-reply-guard hook — end-to-end (#2053)', () => {
75
+ it('(c) DROPS a sentinel-only reply payload', () => {
76
+ const { decision } = runHook({ tool_name: REPLY, tool_input: { text: 'NO_REPLY' } })
77
+ expect(decision?.decision).toBe('block')
78
+ expect(decision?.reason).toMatch(/sentinel|NO_REPLY/i)
79
+ })
80
+
81
+ it('DROPS a sentinel-only stream_reply payload (repeated markers)', () => {
82
+ const { decision } = runHook({ tool_name: STREAM_REPLY, tool_input: { text: 'NO_REPLY\nNO_REPLY' } })
83
+ expect(decision?.decision).toBe('block')
84
+ })
85
+
86
+ it('(d) ALLOWS a real reply that mentions NO_REPLY inside prose', () => {
87
+ const { stdout } = runHook({
88
+ tool_name: REPLY,
89
+ tool_input: { text: 'If nothing is pending, reply with exactly NO_REPLY — otherwise summarise.' },
90
+ })
91
+ expect(stdout).toBe('')
92
+ })
93
+
94
+ it('ALLOWS an ordinary reply', () => {
95
+ const { stdout } = runHook({ tool_name: REPLY, tool_input: { text: 'Done — the build is green.' } })
96
+ expect(stdout).toBe('')
97
+ })
98
+
99
+ it('ignores non-reply tools entirely', () => {
100
+ const { stdout } = runHook({ tool_name: 'Bash', tool_input: { command: 'NO_REPLY' } })
101
+ expect(stdout).toBe('')
102
+ })
103
+
104
+ it('fails open on malformed / empty stdin', () => {
105
+ const res = spawnSync('node', [HOOK_PATH], { input: '', encoding: 'utf8' })
106
+ expect(res.status).toBe(0)
107
+ expect(res.stdout.trim()).toBe('')
108
+ })
109
+ })
@@ -22,6 +22,7 @@ import { describe, it, expect } from 'vitest'
22
22
  import {
23
23
  scanTurnForFinalReply,
24
24
  isFinalAnswerReply,
25
+ endsWithSilentMarker,
25
26
  } from '../hooks/silent-end-scan.mjs'
26
27
 
27
28
  // ── Fixture builders ────────────────────────────────────────────────
@@ -32,6 +33,13 @@ const ENQUEUE = JSON.stringify({
32
33
  content: '<channel source="switchroom-telegram" chat_id="111" message_id="42">hi</channel>',
33
34
  })
34
35
 
36
+ // Cron-fired turn: the enqueue envelope carries `source="cron"` (#2053).
37
+ const ENQUEUE_CRON = JSON.stringify({
38
+ type: 'queue-operation',
39
+ operation: 'enqueue',
40
+ content: '<channel source="cron" chat_id="111" message_thread_id="7">Time for the digest</channel>',
41
+ })
42
+
35
43
  function assistantToolUse(name: string, input: Record<string, unknown>, opts: { isSidechain?: boolean } = {}) {
36
44
  const base = {
37
45
  type: 'assistant',
@@ -312,3 +320,113 @@ describe('scanTurnForFinalReply — malformed input tolerance', () => {
312
320
  expect(scanTurnForFinalReply(text).decided).toBe('block')
313
321
  })
314
322
  })
323
+
324
+ // ── #2053 — endsWithSilentMarker helper ─────────────────────────────
325
+
326
+ describe('endsWithSilentMarker (#2053)', () => {
327
+ it('bare marker (whole string) → true', () => {
328
+ expect(endsWithSilentMarker('NO_REPLY')).toBe(true)
329
+ expect(endsWithSilentMarker('HEARTBEAT_OK')).toBe(true)
330
+ })
331
+ it('prose then trailing bare NO_REPLY → true', () => {
332
+ expect(endsWithSilentMarker('Nothing actionable in the digest today.\nNO_REPLY')).toBe(true)
333
+ expect(endsWithSilentMarker('Long\nmulti-line\nsummary.\nHEARTBEAT_OK')).toBe(true)
334
+ })
335
+ it('trailing marker with stray punctuation → true', () => {
336
+ expect(endsWithSilentMarker('done reviewing.\nNO_REPLY.')).toBe(true)
337
+ })
338
+ it('marker buried mid-output with real content after → false', () => {
339
+ expect(endsWithSilentMarker('NO_REPLY\nThe answer is 42.')).toBe(false)
340
+ })
341
+ // Documents the intentional divergence from the TS-side helper: this
342
+ // .mjs uses SILENT_MARKER_RE directly (unlimited trailing punctuation),
343
+ // whereas turn-flush-safety.ts delegates to the length-capped,
344
+ // single-punct isSilentFlushMarker. This side is deliberately the more
345
+ // permissive of the two — extra leniency only ever suppresses more.
346
+ it('trailing marker with multiple punctuation chars → true (more permissive than TS side)', () => {
347
+ expect(endsWithSilentMarker('all quiet.\nNO_REPLY...')).toBe(true)
348
+ expect(endsWithSilentMarker('NO_REPLY!!!')).toBe(true)
349
+ expect(endsWithSilentMarker('NO_REPLY?!')).toBe(true)
350
+ })
351
+ it('genuine prose mentioning NO_REPLY as a substring → false', () => {
352
+ expect(endsWithSilentMarker('reply with exactly NO_REPLY if there is nothing to add')).toBe(false)
353
+ })
354
+ it('non-strings / empty → false', () => {
355
+ expect(endsWithSilentMarker(undefined)).toBe(false)
356
+ expect(endsWithSilentMarker('')).toBe(false)
357
+ expect(endsWithSilentMarker(' \n ')).toBe(false)
358
+ })
359
+ })
360
+
361
+ // ── #2053 — prose-then-trailing-NO_REPLY recognised as silent ───────
362
+
363
+ describe('scanTurnForFinalReply — trailing NO_REPLY is a valid silent end (#2053)', () => {
364
+ it('(a) plain assistant TEXT ending with a trailing bare NO_REPLY → allow', () => {
365
+ // The exact #2053 leak shape: the model wrote prose then a bare
366
+ // NO_REPLY as plain transcript text (NOT through the reply tool).
367
+ // Pre-fix this matched nothing → block → nag → sentinel leak.
368
+ const text = jsonl(
369
+ ENQUEUE,
370
+ assistantText("Reviewed the overnight digest — nothing needs your attention.\nNO_REPLY"),
371
+ )
372
+ const r = scanTurnForFinalReply(text)
373
+ expect(r.decided).toBe('allow')
374
+ expect(r.reason).toBe('silent-marker-text')
375
+ })
376
+
377
+ it('reply-tool payload of prose+trailing NO_REPLY → allow (silent-marker)', () => {
378
+ const text = jsonl(
379
+ ENQUEUE,
380
+ assistantToolUse('mcp__switchroom-telegram__reply', {
381
+ text: 'Checked the build — all green.\nNO_REPLY',
382
+ disable_notification: true,
383
+ }),
384
+ )
385
+ const r = scanTurnForFinalReply(text)
386
+ expect(r.decided).toBe('allow')
387
+ expect(r.reason).toBe('silent-marker')
388
+ })
389
+
390
+ it('plain text NOT ending with a marker → still block', () => {
391
+ const text = jsonl(
392
+ ENQUEUE,
393
+ assistantText('Here is my real answer that I forgot to send via the reply tool.'),
394
+ )
395
+ expect(scanTurnForFinalReply(text).decided).toBe('block')
396
+ })
397
+ })
398
+
399
+ // ── #2053 — cron-source turns skip the nag ──────────────────────────
400
+
401
+ describe('scanTurnForFinalReply — cron-source turns skip the nag (#2053)', () => {
402
+ it('(b) cron turn with no qualifying reply → allow (cron-source), not block', () => {
403
+ const text = jsonl(
404
+ ENQUEUE_CRON,
405
+ assistantText('Ran the scheduled check. Nothing to report.'),
406
+ )
407
+ const r = scanTurnForFinalReply(text)
408
+ expect(r.decided).toBe('allow')
409
+ expect(r.reason).toBe('cron-source')
410
+ })
411
+
412
+ it('cron turn that DID send a real reply → allow (final-reply), reply still wins', () => {
413
+ const text = jsonl(
414
+ ENQUEUE_CRON,
415
+ assistantToolUse('mcp__switchroom-telegram__reply', {
416
+ text: 'Daily digest: 3 PRs merged, 1 incident.',
417
+ disable_notification: false,
418
+ }),
419
+ )
420
+ const r = scanTurnForFinalReply(text)
421
+ expect(r.decided).toBe('allow')
422
+ expect(r.reason).toBe('final-reply')
423
+ })
424
+
425
+ it('non-cron (telegram) turn with no reply → still blocks (cron carve-out scoped)', () => {
426
+ const text = jsonl(
427
+ ENQUEUE,
428
+ assistantText('I forgot to send my answer.'),
429
+ )
430
+ expect(scanTurnForFinalReply(text).decided).toBe('block')
431
+ })
432
+ })
@@ -18,6 +18,7 @@ import {
18
18
  decideTurnFlush,
19
19
  isSilentFlushMarker,
20
20
  isCompositeSilentNoise,
21
+ endsWithSilentMarker,
21
22
  isTurnFlushSafetyEnabled,
22
23
  } from '../turn-flush-safety.js'
23
24
 
@@ -68,6 +69,46 @@ describe('decideTurnFlush — composite silent noise is skipped, not leaked', ()
68
69
  })
69
70
  })
70
71
 
72
+ describe('endsWithSilentMarker — prose+trailing-sentinel recognition (#2053)', () => {
73
+ it('recognises prose followed by a trailing bare NO_REPLY line', () => {
74
+ expect(endsWithSilentMarker('Nothing actionable in the digest.\nNO_REPLY')).toBe(true)
75
+ expect(endsWithSilentMarker('Build is green.\nHEARTBEAT_OK')).toBe(true)
76
+ })
77
+ it('tolerates a single trailing punctuation on the marker', () => {
78
+ expect(endsWithSilentMarker('done.\nNO_REPLY.')).toBe(true)
79
+ })
80
+ it('does NOT match when real content follows the marker', () => {
81
+ expect(endsWithSilentMarker('NO_REPLY\nThe answer is 42.')).toBe(false)
82
+ })
83
+ it('does NOT match a marker mentioned inside genuine prose', () => {
84
+ expect(endsWithSilentMarker('reply with exactly NO_REPLY when nothing to add')).toBe(false)
85
+ })
86
+ it('handles non-strings / empty safely', () => {
87
+ expect(endsWithSilentMarker(undefined)).toBe(false)
88
+ expect(endsWithSilentMarker('')).toBe(false)
89
+ expect(endsWithSilentMarker(' \n ')).toBe(false)
90
+ })
91
+ })
92
+
93
+ describe('decideTurnFlush — prose+trailing-sentinel is suppressed, not leaked (#2053)', () => {
94
+ it('skips a cron-style "prose\\nNO_REPLY" blob (the #2053 leak)', () => {
95
+ const d = decideTurnFlush({
96
+ chatId: '12345',
97
+ replyCalled: false,
98
+ capturedText: ['Reviewed the overnight digest — nothing needs your attention.', 'NO_REPLY'],
99
+ })
100
+ expect(d).toEqual({ kind: 'skip', reason: 'silent-marker' })
101
+ })
102
+ it('still flushes a real answer whose last line is NOT a sentinel', () => {
103
+ const d = decideTurnFlush({
104
+ chatId: '12345',
105
+ replyCalled: false,
106
+ capturedText: ['Here is the summary.', 'Three stories, all low priority.'],
107
+ })
108
+ expect(d.kind).toBe('flush')
109
+ })
110
+ })
111
+
71
112
  describe('decideTurnFlush', () => {
72
113
  it('(a) does NOT flush when the reply tool was called', () => {
73
114
  const decision = decideTurnFlush({
@@ -92,6 +92,42 @@ export function isCompositeSilentNoise(text: string | undefined): boolean {
92
92
  return lines.every(l => isSilentFlushMarker(l) || isTrivialConfirmationLine(l))
93
93
  }
94
94
 
95
+ /**
96
+ * Recognise output whose final non-empty line is a bare silent marker
97
+ * (NO_REPLY / HEARTBEAT_OK, with the same single-trailing-punctuation
98
+ * tolerance as `isSilentFlushMarker`), regardless of what precedes it.
99
+ *
100
+ * This closes #2053: a turn (commonly a cron turn) that emits prose
101
+ * followed by a bare `NO_REPLY` line — e.g.
102
+ * "Nothing actionable in today's digest.\nNO_REPLY"
103
+ * — is the model explicitly signalling "intentionally silent". The
104
+ * single-line `isSilentFlushMarker` misses it (multi-line, over the
105
+ * length guard) and `isCompositeSilentNoise` misses it too (the prose
106
+ * line is neither a marker nor a trivial confirmation), so the blob
107
+ * would otherwise flush to chat WITH the sentinel text appended.
108
+ *
109
+ * The trailing-marker line itself is the explicit silence signal — when
110
+ * the model deliberately terminates with NO_REPLY it means "do not
111
+ * deliver this turn", so we suppress the whole blob rather than strip
112
+ * the sentinel and flush the prose. Stripping-and-flushing would defeat
113
+ * the model's intent (it chose silence) and re-introduce the exact
114
+ * surprise-message problem the flush safety net was built to avoid.
115
+ *
116
+ * Requires the LAST line to be the marker — a marker buried mid-output
117
+ * with real content after it (e.g. "NO_REPLY\nThe answer is 42.") is
118
+ * NOT suppressed, because the trailing content is the model's actual
119
+ * message.
120
+ */
121
+ export function endsWithSilentMarker(text: string | undefined): boolean {
122
+ if (typeof text !== 'string') return false
123
+ const lines = text
124
+ .split('\n')
125
+ .map(l => l.trim())
126
+ .filter(l => l.length > 0)
127
+ if (lines.length === 0) return false
128
+ return isSilentFlushMarker(lines[lines.length - 1])
129
+ }
130
+
95
131
  export type FlushDecision =
96
132
  | { kind: 'flush'; text: string }
97
133
  | { kind: 'skip'; reason: FlushSkipReason }
@@ -162,6 +198,11 @@ export function decideTurnFlush(input: FlushDecisionInput): FlushDecision {
162
198
  // misses it (multi-line, over the length guard); without this the blob
163
199
  // leaks to chat as a visible message.
164
200
  if (isCompositeSilentNoise(joined)) return { kind: 'skip', reason: 'silent-marker' }
201
+ // Prose followed by a trailing bare NO_REPLY / HEARTBEAT_OK line (#2053).
202
+ // The model wrote content but explicitly terminated with the silence
203
+ // sentinel — treat the whole turn as intentionally silent rather than
204
+ // flush the prose with the sentinel glued on.
205
+ if (endsWithSilentMarker(joined)) return { kind: 'skip', reason: 'silent-marker' }
165
206
  return { kind: 'flush', text: joined }
166
207
  }
167
208
 
@@ -81,7 +81,8 @@ interface TrailEntry {
81
81
  text: string;
82
82
  }
83
83
 
84
- const SUFFIX_RE = /\n\n— still working \(\d+m\)$/;
84
+ const SUFFIX_RE =
85
+ /\n\n— still working \(\d+m\)( · message me anytime, I'll keep you posted)?$/;
85
86
 
86
87
  function pad(s: string, n: number): string {
87
88
  return s.length >= n ? s : s + " ".repeat(n - s.length);
@@ -40,7 +40,8 @@ const PROMPT =
40
40
  `the bash, send that one HTML reply, end your turn. When it finishes ` +
41
41
  `much later, reply with the single word "done".`;
42
42
 
43
- const SUFFIX_RE = /\n\n— still working \(\d+m\)$/;
43
+ const SUFFIX_RE =
44
+ /\n\n— still working \(\d+m\)( · message me anytime, I'll keep you posted)?$/;
44
45
 
45
46
  describe("uat: pending-progress edit preserves HTML formatting (#1698 regression gate)", () => {
46
47
  it(