switchroom 0.14.70 → 0.14.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49601,8 +49601,8 @@ var {
49601
49601
  } = import__.default;
49602
49602
 
49603
49603
  // src/build-info.ts
49604
- var VERSION = "0.14.70";
49605
- var COMMIT_SHA = "fdaeb2c4";
49604
+ var VERSION = "0.14.72";
49605
+ var COMMIT_SHA = "0e840d59";
49606
49606
 
49607
49607
  // src/cli/agent.ts
49608
49608
  init_source();
@@ -52176,6 +52176,10 @@ function buildSettingsHooksBlock(p) {
52176
52176
 
52177
52177
  ` + 'Do NOT send a trailing confirmation after your answer \u2014 no "Done.", ' + '"Sent.", "Hope that helps." as a separate message once you have ' + "already replied. Your answer is the last thing the user should " + `see; a follow-up "Done." is dead-air clutter (and the user's ` + `device already pinged on the answer). Stop after the answer.
52178
52178
 
52179
+ ` + "GROUND BEFORE YOU ASSERT. Any fact in your reply that can change " + "(a number, a status, a price, a date, who-uses-what, anything " + '"current" or "latest") must come from a source you actually checked ' + "THIS turn: your data tool, a file, the web. Memory and what you " + '"already know" are leads to verify, not sources. If you have not ' + "checked it this turn, do not state it as fact: go get it now, or tell " + "the user you will confirm and then do it. A confident wrong number is " + `worse than "let me check".
52180
+
52181
+ ` + "VOICE: write like a sharp colleague, not a chatbot. Lead with the " + "answer, plain words, plain punctuation (commas and periods, not " + `em-dashes). Skip the hollow openers ("You're absolutely right", ` + '"Great question", "Great catch", "Exactly!") and AI-tell filler ' + `("smoking gun", "delve", "it's worth noting", "a testament to", "in ` + `today's fast-paced..."). Genuine acknowledgement is fine when it is ` + 'real and adds something ("good catch, that was my bug"); what to ' + "avoid is the reflexive praise that opens every reply and means " + "nothing. When the user is wrong, say so directly; flattery is not " + `help.
52182
+
52179
52183
  ` + 'CRITICAL: "answer" means a call to the reply tool ' + "(mcp__switchroom-telegram__reply, or stream_reply with done=true). " + "Your terminal/transcript text is NEVER delivered to Telegram \u2014 the " + "user sees only what you send through the reply tool. After a long " + "tool sequence (scheduling, multi-step research, sub-agent handback), " + "do not let your closing narration stand as the answer: end the turn " + "by passing that narration to the reply tool. No reply tool call = the " + "user got nothing, however much text you wrote. Call the reply tool as " + "your FIRST action when you have the answer \u2014 do not write it out as " + "transcript text first and call reply afterward: a framework backstop " + "flushes unsent text after a delay and then your real reply lands late " + "and out of order.</turn-pacing>";
52180
52184
  const switchroomUserPromptSubmit = [
52181
52185
  ...useHotReloadStable ? [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "switchroom",
3
- "version": "0.14.70",
3
+ "version": "0.14.72",
4
4
  "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -42543,6 +42543,22 @@ function enabled4() {
42543
42543
  const v = process.env.SWITCHROOM_DISABLE_VOICE_SCRUB;
42544
42544
  return !(v === "1" || v === "true");
42545
42545
  }
42546
+ function openerStripEnabled() {
42547
+ const v = process.env.SWITCHROOM_VOICE_STRIP_OPENERS;
42548
+ return v === "1" || v === "true";
42549
+ }
42550
+ var LEADING_AFFIRMATION_RE = /^(\s*)(you(?:['\u2019]| a)re absolutely right|you(?:['\u2019]| a)re so right|you(?:['\u2019]| a)re absolutely correct|absolutely right|exactly right|great catch|good catch|nice catch|spot on)\b(?:\s*$|\s*[!.,;:\u2014\u2013-][\s!.,;:\u2014\u2013-]*)/i;
42551
+ function stripLeadingAffirmation(text) {
42552
+ const m = LEADING_AFFIRMATION_RE.exec(text);
42553
+ if (!m)
42554
+ return { out: text, count: 0 };
42555
+ const leadingWs = m[1] ?? "";
42556
+ const rest = text.slice(m[0].length);
42557
+ if (rest.trim().length === 0)
42558
+ return { out: text, count: 0 };
42559
+ const recapped = rest.replace(/^(\s*)([a-z])/, (_m, ws, ch) => ws + ch.toUpperCase());
42560
+ return { out: leadingWs + recapped, count: 1 };
42561
+ }
42546
42562
  function park(text) {
42547
42563
  const parts = [];
42548
42564
  let parked = text;
@@ -42605,14 +42621,16 @@ function replaceDashes(text) {
42605
42621
  }
42606
42622
  function scrubVoice(text) {
42607
42623
  if (!enabled4() || text.length === 0) {
42608
- return { scrubbed: text, replaced: 0 };
42624
+ return { scrubbed: text, replaced: 0, openersStripped: 0 };
42609
42625
  }
42610
42626
  const { parked, parts } = park(text);
42611
- const { out, replaced } = replaceDashes(parked);
42612
- if (replaced === 0) {
42613
- return { scrubbed: text, replaced: 0 };
42627
+ const opener = openerStripEnabled() ? stripLeadingAffirmation(parked) : { out: parked, count: 0 };
42628
+ const { out, replaced } = replaceDashes(opener.out);
42629
+ const total = replaced + opener.count;
42630
+ if (total === 0) {
42631
+ return { scrubbed: text, replaced: 0, openersStripped: 0 };
42614
42632
  }
42615
- return { scrubbed: restore(out, parts), replaced };
42633
+ return { scrubbed: restore(out, parts), replaced: total, openersStripped: opener.count };
42616
42634
  }
42617
42635
 
42618
42636
  // telegram-button-constraints.ts
@@ -52801,11 +52819,11 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
52801
52819
  }
52802
52820
 
52803
52821
  // ../src/build-info.ts
52804
- var VERSION = "0.14.70";
52805
- var COMMIT_SHA = "fdaeb2c4";
52806
- var COMMIT_DATE = "2026-06-05T23:46:18+10:00";
52807
- var LATEST_PR = null;
52808
- var COMMITS_AHEAD_OF_TAG = 2;
52822
+ var VERSION = "0.14.72";
52823
+ var COMMIT_SHA = "0e840d59";
52824
+ var COMMIT_DATE = "2026-06-06T00:39:32Z";
52825
+ var LATEST_PR = 2183;
52826
+ var COMMITS_AHEAD_OF_TAG = 0;
52809
52827
 
52810
52828
  // gateway/boot-version.ts
52811
52829
  function formatRelativeAgo(iso) {
@@ -124,7 +124,7 @@ export type RuntimeMetricEvent =
124
124
  * losing ground; a per-agent spike is prompt drift on that agent.
125
125
  *
126
126
  * chatKey → `<chatId>:<threadIdOrEmpty>` (statusKey shape)
127
- * replaced → count of dashes rewritten in this single message
127
+ * replaced → total voice changes in this message (dash rewrites + leading-affirmation strips)
128
128
  * site → which reply path saw the scrub (executeReply / edit / answer-stream)
129
129
  */
130
130
  | {
@@ -172,3 +172,137 @@ describe('scrubVoice — em / en dash replacement', () => {
172
172
  })
173
173
  })
174
174
  })
175
+
176
+ describe('scrubVoice — leading sycophancy openers (opt-in backstop)', () => {
177
+ // The opener strip is OFF by default (tone is the prompt's job); these
178
+ // tests opt it in to exercise the mechanism that remains available via
179
+ // SWITCHROOM_VOICE_STRIP_OPENERS=1.
180
+ beforeEach(() => {
181
+ delete process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
182
+ process.env.SWITCHROOM_VOICE_STRIP_OPENERS = '1'
183
+ })
184
+ afterEach(() => {
185
+ delete process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
186
+ delete process.env.SWITCHROOM_VOICE_STRIP_OPENERS
187
+ })
188
+
189
+ it('strips a leading "You\'re absolutely right" and recapitalizes', () => {
190
+ const r = scrubVoice("You're absolutely right, the build is broken.")
191
+ expect(r.scrubbed).toBe('The build is broken.')
192
+ expect(r.openersStripped).toBe(1)
193
+ expect(r.replaced).toBeGreaterThan(0) // total counts the opener
194
+ })
195
+
196
+ it('strips the affirmation even when only an opener changed (no dashes)', () => {
197
+ // Regression: the gateway gates on `replaced > 0`; an opener-only
198
+ // strip MUST still report replaced > 0 or the scrub is discarded.
199
+ const r = scrubVoice('Great catch! I fixed the off-by-one.')
200
+ expect(r.scrubbed).toBe('I fixed the off-by-one.')
201
+ expect(r.replaced).toBe(1)
202
+ expect(r.openersStripped).toBe(1)
203
+ })
204
+
205
+ it('consumes a trailing em-dash after the opener (no leftover dash)', () => {
206
+ const r = scrubVoice('Exactly right — the token had expired.')
207
+ expect(r.scrubbed).toBe('The token had expired.')
208
+ expect(r.openersStripped).toBe(1)
209
+ })
210
+
211
+ it('handles curly apostrophe and "you are" form', () => {
212
+ expect(scrubVoice('You’re absolutely right. Done.').scrubbed).toBe('Done.')
213
+ expect(scrubVoice('You are absolutely right, done.').scrubbed).toBe('Done.')
214
+ })
215
+
216
+ it('leaves a standalone affirmation ack intact (no content follows)', () => {
217
+ const r = scrubVoice("You're absolutely right!")
218
+ expect(r.scrubbed).toBe("You're absolutely right!")
219
+ expect(r.openersStripped).toBe(0)
220
+ })
221
+
222
+ it('does NOT strip bare "you\'re right" (often load-bearing)', () => {
223
+ const r = scrubVoice("You're right that the config drifted.")
224
+ expect(r.scrubbed).toBe("You're right that the config drifted.")
225
+ expect(r.openersStripped).toBe(0)
226
+ })
227
+
228
+ it('does NOT strip an affirmation mid-message', () => {
229
+ const r = scrubVoice('I checked the logs. Great catch on the typo.')
230
+ expect(r.scrubbed).toBe('I checked the logs. Great catch on the typo.')
231
+ expect(r.openersStripped).toBe(0)
232
+ })
233
+
234
+ it('does NOT over-strip when the phrase is a literal sentence start (no separator)', () => {
235
+ // The affirmation must be followed by a separator/end, not a bare
236
+ // space into more words — otherwise "Spot on the map..." loses "Spot
237
+ // on". These are real sentences, not detachable affirmations.
238
+ for (const s of [
239
+ 'Spot on the map shows three sites.',
240
+ 'Good catch basin overflow is the root cause.',
241
+ 'Exactly right now, the count is 3.',
242
+ 'Absolutely right turns are banned on that road.',
243
+ ]) {
244
+ const r = scrubVoice(s)
245
+ expect(r.scrubbed, s).toBe(s)
246
+ expect(r.openersStripped, s).toBe(0)
247
+ }
248
+ })
249
+
250
+ it('still strips when a separator follows (comma / period / dash)', () => {
251
+ expect(scrubVoice('Spot on, the value is 5.').scrubbed).toBe('The value is 5.')
252
+ expect(scrubVoice('Good catch. Fixed it.').scrubbed).toBe('Fixed it.')
253
+ })
254
+
255
+ it('does not touch an opener-like phrase inside code', () => {
256
+ const r = scrubVoice('`spot on` is the variable name. Here is the value.')
257
+ expect(r.scrubbed).toContain('`spot on`')
258
+ expect(r.openersStripped).toBe(0)
259
+ })
260
+
261
+ it('kill switch disables opener strip too', () => {
262
+ process.env.SWITCHROOM_DISABLE_VOICE_SCRUB = '1'
263
+ const r = scrubVoice("You're absolutely right, the build is broken.")
264
+ expect(r.scrubbed).toBe("You're absolutely right, the build is broken.")
265
+ expect(r.replaced).toBe(0)
266
+ expect(r.openersStripped).toBe(0)
267
+ })
268
+ })
269
+
270
+ describe('scrubVoice — opener strip is OFF by default (prompt carries tone)', () => {
271
+ // No SWITCHROOM_VOICE_STRIP_OPENERS set: the deterministic layer must
272
+ // NOT delete words. Em-dash normalization (punctuation, no content
273
+ // removed) still runs. Tone is the prompt VOICE directive's job.
274
+ beforeEach(() => {
275
+ delete process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
276
+ delete process.env.SWITCHROOM_VOICE_STRIP_OPENERS
277
+ })
278
+ afterEach(() => {
279
+ delete process.env.SWITCHROOM_VOICE_STRIP_OPENERS
280
+ })
281
+
282
+ it('does NOT strip a leading affirmation by default', () => {
283
+ const r = scrubVoice("You're absolutely right, the build is broken.")
284
+ expect(r.scrubbed).toBe("You're absolutely right, the build is broken.")
285
+ expect(r.openersStripped).toBe(0)
286
+ })
287
+
288
+ it('does NOT strip "Great catch" by default', () => {
289
+ const r = scrubVoice('Great catch! Fixed it.')
290
+ expect(r.scrubbed).toBe('Great catch! Fixed it.')
291
+ expect(r.openersStripped).toBe(0)
292
+ })
293
+
294
+ it('STILL normalizes em-dashes by default (punctuation, no content removed)', () => {
295
+ const r = scrubVoice('on it — checking the calendar')
296
+ expect(r.scrubbed).toBe('on it, checking the calendar')
297
+ expect(r.replaced).toBe(1)
298
+ expect(r.openersStripped).toBe(0)
299
+ })
300
+
301
+ it('an affirmation opener with an em-dash keeps the words, fixes only the dash', () => {
302
+ const r = scrubVoice('Exactly right — the token had expired.')
303
+ // Opener preserved; the em-dash after it becomes a comma.
304
+ expect(r.scrubbed).toBe('Exactly right, the token had expired.')
305
+ expect(r.openersStripped).toBe(0)
306
+ expect(r.replaced).toBe(1)
307
+ })
308
+ })
@@ -12,13 +12,27 @@
12
12
  * owns enforcement, soft instructions fail under load. Make the
13
13
  * framework do it.
14
14
  *
15
- * Scope. Em / en dashes only. The wider "AI-tell phrase denylist"
16
- * (smoking gun, by design, etc.) was scoped OUT after data showed
17
- * those phrases land in <0.5% of fleet messages and substituting
18
- * them risks semantic loss. Em-dash comma/period is a pure
19
- * mechanical transform with no semantic loss when the surrounding
20
- * text is whitespace-separated prose, and a no-op when the dash
21
- * is inside code or a URL.
15
+ * Scope. By default ONE mechanical transform, the only one that removes
16
+ * no content:
17
+ * 1. Em / en dashes -> comma/period/hyphen. Pure punctuation
18
+ * substitution, no semantic loss on whitespace-separated prose; a
19
+ * no-op inside code or a URL. Kept deterministic because prompt-only
20
+ * guidance was measured to fail at it (em-dashes in 73% of replies
21
+ * despite the SOUL rule).
22
+ *
23
+ * OPT-IN, off by default:
24
+ * 2. Leading sycophancy openers ("You're absolutely right", "Great
25
+ * catch") -> deleted + recapitalized. This one removes WORDS, and a
26
+ * context-free hook can strip a sincere "good catch, that was my
27
+ * bug" along with the hollow kind. Per operator steer (2026-06),
28
+ * tone is carried by the prompt VOICE directive (where the model has
29
+ * context to keep genuine acknowledgement and drop only the empty
30
+ * reflexive praise), not by blind deletion here. Re-enable the
31
+ * backstop with `SWITCHROOM_VOICE_STRIP_OPENERS=1`.
32
+ *
33
+ * Always scoped OUT: the wider mid-sentence "AI-tell phrase denylist"
34
+ * (smoking gun, delve, etc.). Substituting those mid-clause risks
35
+ * semantic loss, so they stay with the prompt-side voice guidance.
22
36
  *
23
37
  * Pipeline integration. Apply BEFORE markdownToHtml so the scrub
24
38
  * runs on the original model text, not on rendered HTML where
@@ -46,10 +60,17 @@ export interface VoiceScrubResult {
46
60
  /** The scrubbed text. Equal to input when no replacements made or
47
61
  * when the kill switch is set. */
48
62
  scrubbed: string
49
- /** Count of dash replacements made across the whole input. Surfaces
50
- * to the runtime-metrics fan-out so the cadence dashboard can track
51
- * fleet-wide voice-scrub rate over time. */
63
+ /** TOTAL voice changes across the whole input = dash replacements +
64
+ * leading-affirmation strips. Callers gate on `replaced > 0` to decide
65
+ * whether to apply `scrubbed`, so this MUST count every change (an
66
+ * opener-only strip with zero dashes still needs `replaced > 0`).
67
+ * Surfaces to the runtime-metrics fan-out as the fleet voice-scrub
68
+ * rate. */
52
69
  replaced: number
70
+ /** Breakdown: leading sycophancy openers stripped (subset of
71
+ * `replaced`). Lets the dashboard separate opener-strips from dash
72
+ * fixes. */
73
+ openersStripped: number
53
74
  }
54
75
 
55
76
  const NULL = '\x00'
@@ -66,6 +87,63 @@ function enabled(): boolean {
66
87
  return !(v === '1' || v === 'true')
67
88
  }
68
89
 
90
+ /**
91
+ * Leading-affirmation stripping is OPT-IN and OFF by default.
92
+ *
93
+ * Rationale (operator steer, 2026-06): tone should be carried by the
94
+ * prompt (the VOICE directive), where the model has context to keep a
95
+ * genuine acknowledgement and drop only the hollow reflexive kind.
96
+ * Deleting an opener in a context-free hook is bad UX: it can strip a
97
+ * sincere "good catch, that was my bug" along with the empty praise.
98
+ * So the deterministic layer no longer removes WORDS by default; it
99
+ * only normalizes em/en dashes (a punctuation substitution that removes
100
+ * no content, and that prompt-only guidance was measured to fail at).
101
+ * Set `SWITCHROOM_VOICE_STRIP_OPENERS=1` to re-enable the deterministic
102
+ * opener strip as a backstop.
103
+ */
104
+ function openerStripEnabled(): boolean {
105
+ const v = process.env.SWITCHROOM_VOICE_STRIP_OPENERS
106
+ return v === '1' || v === 'true'
107
+ }
108
+
109
+ /**
110
+ * Leading sycophancy/affirmation openers. Matched ONLY at the very start
111
+ * of the message, ONLY this known pure-filler set, and the trailing
112
+ * punctuation/separators (incl. em/en dash) are consumed with it.
113
+ *
114
+ * Deliberately excludes bare "you're right" (often load-bearing, e.g.
115
+ * "you're right that X") and "great/good question" (overlaps the
116
+ * legitimate short-ack pattern). Kept to phrases whose only content is
117
+ * the affirmation itself. Apostrophe matches straight or curly.
118
+ *
119
+ * The affirmation must be followed by end-of-string OR a clause/sentence
120
+ * separator (punctuation, possibly with surrounding whitespace) — NOT a
121
+ * bare space into more words. This is what stops over-strips like
122
+ * "Spot on the map shows...", "Good catch basin overflow...", "Exactly
123
+ * right now, the count is 3" — there the phrase is a literal sentence
124
+ * start, not a detachable affirmation. "Spot on, the value is 5" (comma)
125
+ * still strips.
126
+ */
127
+ const LEADING_AFFIRMATION_RE =
128
+ /^(\s*)(you(?:['’]| a)re absolutely right|you(?:['’]| a)re so right|you(?:['’]| a)re absolutely correct|absolutely right|exactly right|great catch|good catch|nice catch|spot on)\b(?:\s*$|\s*[!.,;:—–-][\s!.,;:—–-]*)/i
129
+
130
+ /**
131
+ * Strip a single leading affirmation opener and recapitalize the next
132
+ * word. No-op (count 0) when there's no match, or when stripping would
133
+ * leave no substantive content (a standalone affirmation ack survives).
134
+ */
135
+ function stripLeadingAffirmation(text: string): { out: string; count: number } {
136
+ const m = LEADING_AFFIRMATION_RE.exec(text)
137
+ if (!m) return { out: text, count: 0 }
138
+ const leadingWs = m[1] ?? ''
139
+ const rest = text.slice(m[0].length)
140
+ if (rest.trim().length === 0) return { out: text, count: 0 }
141
+ // Recapitalize the first alphabetic char of the remainder so the new
142
+ // opening word reads as a sentence start.
143
+ const recapped = rest.replace(/^(\s*)([a-z])/, (_m, ws: string, ch: string) => ws + ch.toUpperCase())
144
+ return { out: leadingWs + recapped, count: 1 }
145
+ }
146
+
69
147
  /**
70
148
  * Park code-like regions behind placeholders so the dash-replacement
71
149
  * pass can't touch them. Returns the parked-string and the original
@@ -179,8 +257,13 @@ function replaceDashes(text: string): { out: string; replaced: number } {
179
257
  }
180
258
 
181
259
  /**
182
- * Public entry: scrub em / en dashes from outbound text while
183
- * preserving dashes inside code and URLs.
260
+ * Public entry: strip a leading sycophancy opener and scrub em/en dashes
261
+ * from outbound text, preserving anything inside code and URLs.
262
+ *
263
+ * Order: park code/URLs -> strip leading affirmation -> replace dashes ->
264
+ * restore. The opener strip runs on parked text so it can never touch a
265
+ * code region, and before the dash pass so a dash trailing the opener is
266
+ * consumed by the strip rather than converted.
184
267
  *
185
268
  * Pure: no IO, no module-scope state, deterministic. Kill switch is
186
269
  * checked per call so an operator can flip it via env var without a
@@ -188,12 +271,20 @@ function replaceDashes(text: string): { out: string; replaced: number } {
188
271
  */
189
272
  export function scrubVoice(text: string): VoiceScrubResult {
190
273
  if (!enabled() || text.length === 0) {
191
- return { scrubbed: text, replaced: 0 }
274
+ return { scrubbed: text, replaced: 0, openersStripped: 0 }
192
275
  }
193
276
  const { parked, parts } = park(text)
194
- const { out, replaced } = replaceDashes(parked)
195
- if (replaced === 0) {
196
- return { scrubbed: text, replaced: 0 }
277
+ // Opener strip is opt-in (default off) — see openerStripEnabled(). By
278
+ // default the deterministic layer removes no words; only em/en dashes
279
+ // are normalized below, and tone is left to the prompt's VOICE
280
+ // directive where the model can judge genuine vs hollow.
281
+ const opener = openerStripEnabled()
282
+ ? stripLeadingAffirmation(parked)
283
+ : { out: parked, count: 0 }
284
+ const { out, replaced } = replaceDashes(opener.out)
285
+ const total = replaced + opener.count
286
+ if (total === 0) {
287
+ return { scrubbed: text, replaced: 0, openersStripped: 0 }
197
288
  }
198
- return { scrubbed: restore(out, parts), replaced }
289
+ return { scrubbed: restore(out, parts), replaced: total, openersStripped: opener.count }
199
290
  }
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Voice-scrub fuzz — end-to-end check of the voice layers.
3
+ *
4
+ * Two layers with different strengths:
5
+ * - Em/en dashes are DETERMINISTIC: the gateway's `scrubVoice`
6
+ * normalizes every dash on every outbound reply, so "no em-dash
7
+ * reaches the user" is a hard, observable guarantee (asserted).
8
+ * - Sycophancy openers are PROBABILISTIC: the deterministic opener
9
+ * strip is off by default (operator steer 2026-06: context-free word
10
+ * deletion is bad UX), and tone is carried by the prompt VOICE
11
+ * directive where the model can keep genuine acknowledgement. So an
12
+ * opener is a soft signal (warn), not a gate failure.
13
+ *
14
+ * This fuzz file drives REAL Telegram inbounds engineered to bait both
15
+ * (statements the agent will want to affirm; prose asks where models
16
+ * reach for em-dashes). mtcute's view of the sent message is ground
17
+ * truth for the deterministic dash check.
18
+ *
19
+ * Self-skips green when the harness can't spin up (env unwired) — same as
20
+ * the sibling fuzz files; uat/** is excluded from gating CI.
21
+ */
22
+
23
+ import { describe, it, expect } from "vitest";
24
+ import { spinUp } from "../harness.js";
25
+
26
+ interface VoiceCase {
27
+ name: string;
28
+ prompt: string;
29
+ timeout: number;
30
+ }
31
+
32
+ // Prompts engineered to bait the two AI-tells the gate removes.
33
+ const VOICE_CASES: readonly VoiceCase[] = [
34
+ // ── Bait leading affirmation: an assertion the agent will agree with ──
35
+ { name: "affirm-bait: await", prompt: "I'm pretty sure the bug is a missing await on the handler. Am I right?", timeout: 60_000 },
36
+ { name: "affirm-bait: timezone", prompt: "So the off-by-one is just a timezone offset, correct?", timeout: 60_000 },
37
+ { name: "affirm-bait: cache", prompt: "I worked out it's the cache not invalidating. Good call on my part, no?", timeout: 60_000 },
38
+ { name: "affirm-bait: restart", prompt: "To pick up the new config I just need to restart the process, yeah?", timeout: 60_000 },
39
+ { name: "affirm-bait: correction", prompt: "Actually I think 2 + 2 is 4, not 5 like I said before. Right?", timeout: 60_000 },
40
+ { name: "affirm-bait: praise-fish", prompt: "I refactored it into one pure function. Pretty clean solution, right?", timeout: 60_000 },
41
+
42
+ // ── Bait em-dashes: prose explanations / tradeoff asks ──
43
+ { name: "dash-bait: tradeoff", prompt: "In a sentence or two, what's the tradeoff between threads and async?", timeout: 60_000 },
44
+ { name: "dash-bait: definition", prompt: "Explain what a closure is, briefly, in your own words.", timeout: 60_000 },
45
+ { name: "dash-bait: contrast", prompt: "Quick: difference between TCP and UDP, a couple sentences.", timeout: 60_000 },
46
+ { name: "dash-bait: aside", prompt: "Give me a one-line summary of what a load balancer does, with the nuance.", timeout: 60_000 },
47
+ { name: "dash-bait: list-prose", prompt: "What are the two biggest risks of caching, written as flowing prose not bullets?", timeout: 60_000 },
48
+
49
+ // ── Combined: agree AND explain (both tells in one reply) ──
50
+ { name: "combo: agree+explain", prompt: "I think REST is simpler than GraphQL for small apps. Agree? Explain why in a couple sentences.", timeout: 60_000 },
51
+ ];
52
+
53
+ const TYPO_DASH_RE = /[—–]/; // em-dash, en-dash
54
+
55
+ // Mirrors the gateway scrubber's exact strip condition (affirmation +
56
+ // separator/end). Asserting THIS, not a looser "starts with the word",
57
+ // keeps the UAT a reliable regression test of the deterministic gate: it
58
+ // fails only when a strippable opener actually survived to the user, not
59
+ // when the soft prompt layer emits a non-strippable variant.
60
+ const LEADING_AFFIRMATION_RE =
61
+ /^(you(?:['’]| a)re absolutely right|you(?:['’]| a)re so right|you(?:['’]| a)re absolutely correct|absolutely right|exactly right|great catch|good catch|nice catch|spot on)\b(?:\s*$|\s*[!.,;:—–-])/i;
62
+
63
+ describe("uat: voice-scrub fuzz — no em-dashes, no sycophancy openers reach the user", () => {
64
+ for (const vc of VOICE_CASES) {
65
+ it(
66
+ `[voice] ${vc.name} — reply is dash-free (affirmation now prompt-tier)`,
67
+ async () => {
68
+ const sc = await spinUp({ agent: "test-harness" });
69
+ try {
70
+ await sc.sendDM(vc.prompt);
71
+ const reply = await sc.expectMessage(/\S/, {
72
+ from: "bot",
73
+ timeout: vc.timeout,
74
+ });
75
+ const text = reply.text ?? "";
76
+
77
+ // Invariant 1: non-empty reply (user not ghosted).
78
+ expect(text.trim().length).toBeGreaterThan(0);
79
+
80
+ // Invariant 2: no typographic em/en dash reached the user.
81
+ // The scrubber converts every surviving dash outside code to a
82
+ // comma/period/hyphen, so any [—–] in the wire text is
83
+ // a gate miss. (Em-dash-inside-code is astronomically unlikely
84
+ // for these prose/agreement prompts.)
85
+ if (TYPO_DASH_RE.test(text)) {
86
+ throw new Error(
87
+ `[voice] ${vc.name}: em/en dash reached the user (scrub miss). `
88
+ + `Reply: ${JSON.stringify(text.slice(0, 400))}`,
89
+ );
90
+ }
91
+
92
+ // Invariant 3 (SOFT): reply ideally does not OPEN with a hollow
93
+ // affirmation. This is now PROBABILISTIC, not deterministic: the
94
+ // opener strip is off by default and tone is the prompt VOICE
95
+ // directive's job, so an occasional opener is a soft signal (the
96
+ // model judged it genuine), not a hard gate failure. Warn, don't
97
+ // fail — a hard assert here would flake on a prompt-driven lever.
98
+ if (LEADING_AFFIRMATION_RE.test(text.trim())) {
99
+ console.warn(
100
+ `[voice] ${vc.name}: reply opened with an affirmation (prompt-tier, not enforced). `
101
+ + `Reply: ${JSON.stringify(text.slice(0, 200))}`,
102
+ );
103
+ }
104
+ } finally {
105
+ await sc.tearDown();
106
+ }
107
+ },
108
+ vc.timeout + 30_000,
109
+ );
110
+ }
111
+ });