switchroom 0.14.70 → 0.14.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +6 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +24 -10
- package/telegram-plugin/runtime-metrics.ts +1 -1
- package/telegram-plugin/tests/text-voice-scrub.test.ts +89 -0
- package/telegram-plugin/text-voice-scrub.ts +78 -17
- package/telegram-plugin/uat/scenarios/fuzz-voice-scrub-dm.test.ts +104 -0
package/dist/cli/switchroom.js
CHANGED
|
@@ -49601,8 +49601,8 @@ var {
|
|
|
49601
49601
|
} = import__.default;
|
|
49602
49602
|
|
|
49603
49603
|
// src/build-info.ts
|
|
49604
|
-
var VERSION = "0.14.
|
|
49605
|
-
var COMMIT_SHA = "
|
|
49604
|
+
var VERSION = "0.14.71";
|
|
49605
|
+
var COMMIT_SHA = "314a0e0e";
|
|
49606
49606
|
|
|
49607
49607
|
// src/cli/agent.ts
|
|
49608
49608
|
init_source();
|
|
@@ -52176,6 +52176,10 @@ function buildSettingsHooksBlock(p) {
|
|
|
52176
52176
|
|
|
52177
52177
|
` + 'Do NOT send a trailing confirmation after your answer \u2014 no "Done.", ' + '"Sent.", "Hope that helps." as a separate message once you have ' + "already replied. Your answer is the last thing the user should " + `see; a follow-up "Done." is dead-air clutter (and the user's ` + `device already pinged on the answer). Stop after the answer.
|
|
52178
52178
|
|
|
52179
|
+
` + "GROUND BEFORE YOU ASSERT. Any fact in your reply that can change " + "(a number, a status, a price, a date, who-uses-what, anything " + '"current" or "latest") must come from a source you actually checked ' + "THIS turn: your data tool, a file, the web. Memory and what you " + '"already know" are leads to verify, not sources. If you have not ' + "checked it this turn, do not state it as fact: go get it now, or tell " + "the user you will confirm and then do it. A confident wrong number is " + `worse than "let me check".
|
|
52180
|
+
|
|
52181
|
+
` + "VOICE: write like a sharp colleague, not a chatbot. Do not open with " + `affirmation ("You're absolutely right", "Great question", "Great ` + 'catch", "Exactly!"); just answer. Skip AI-tell filler ("smoking ' + `gun", "delve", "it's worth noting", "a testament to", "in today's ` + 'fast-paced..."). Lead with the answer, plain words, kept short. When ' + `the user is wrong, say so directly; flattery is not help.
|
|
52182
|
+
|
|
52179
52183
|
` + 'CRITICAL: "answer" means a call to the reply tool ' + "(mcp__switchroom-telegram__reply, or stream_reply with done=true). " + "Your terminal/transcript text is NEVER delivered to Telegram \u2014 the " + "user sees only what you send through the reply tool. After a long " + "tool sequence (scheduling, multi-step research, sub-agent handback), " + "do not let your closing narration stand as the answer: end the turn " + "by passing that narration to the reply tool. No reply tool call = the " + "user got nothing, however much text you wrote. Call the reply tool as " + "your FIRST action when you have the answer \u2014 do not write it out as " + "transcript text first and call reply afterward: a framework backstop " + "flushes unsent text after a delay and then your real reply lands late " + "and out of order.</turn-pacing>";
|
|
52180
52184
|
const switchroomUserPromptSubmit = [
|
|
52181
52185
|
...useHotReloadStable ? [
|
package/package.json
CHANGED
|
@@ -42543,6 +42543,18 @@ function enabled4() {
|
|
|
42543
42543
|
const v = process.env.SWITCHROOM_DISABLE_VOICE_SCRUB;
|
|
42544
42544
|
return !(v === "1" || v === "true");
|
|
42545
42545
|
}
|
|
42546
|
+
var LEADING_AFFIRMATION_RE = /^(\s*)(you(?:['\u2019]| a)re absolutely right|you(?:['\u2019]| a)re so right|you(?:['\u2019]| a)re absolutely correct|absolutely right|exactly right|great catch|good catch|nice catch|spot on)\b(?:\s*$|\s*[!.,;:\u2014\u2013-][\s!.,;:\u2014\u2013-]*)/i;
|
|
42547
|
+
function stripLeadingAffirmation(text) {
|
|
42548
|
+
const m = LEADING_AFFIRMATION_RE.exec(text);
|
|
42549
|
+
if (!m)
|
|
42550
|
+
return { out: text, count: 0 };
|
|
42551
|
+
const leadingWs = m[1] ?? "";
|
|
42552
|
+
const rest = text.slice(m[0].length);
|
|
42553
|
+
if (rest.trim().length === 0)
|
|
42554
|
+
return { out: text, count: 0 };
|
|
42555
|
+
const recapped = rest.replace(/^(\s*)([a-z])/, (_m, ws, ch) => ws + ch.toUpperCase());
|
|
42556
|
+
return { out: leadingWs + recapped, count: 1 };
|
|
42557
|
+
}
|
|
42546
42558
|
function park(text) {
|
|
42547
42559
|
const parts = [];
|
|
42548
42560
|
let parked = text;
|
|
@@ -42605,14 +42617,16 @@ function replaceDashes(text) {
|
|
|
42605
42617
|
}
|
|
42606
42618
|
function scrubVoice(text) {
|
|
42607
42619
|
if (!enabled4() || text.length === 0) {
|
|
42608
|
-
return { scrubbed: text, replaced: 0 };
|
|
42620
|
+
return { scrubbed: text, replaced: 0, openersStripped: 0 };
|
|
42609
42621
|
}
|
|
42610
42622
|
const { parked, parts } = park(text);
|
|
42611
|
-
const
|
|
42612
|
-
|
|
42613
|
-
|
|
42623
|
+
const opener = stripLeadingAffirmation(parked);
|
|
42624
|
+
const { out, replaced } = replaceDashes(opener.out);
|
|
42625
|
+
const total = replaced + opener.count;
|
|
42626
|
+
if (total === 0) {
|
|
42627
|
+
return { scrubbed: text, replaced: 0, openersStripped: 0 };
|
|
42614
42628
|
}
|
|
42615
|
-
return { scrubbed: restore(out, parts), replaced };
|
|
42629
|
+
return { scrubbed: restore(out, parts), replaced: total, openersStripped: opener.count };
|
|
42616
42630
|
}
|
|
42617
42631
|
|
|
42618
42632
|
// telegram-button-constraints.ts
|
|
@@ -52801,11 +52815,11 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
52801
52815
|
}
|
|
52802
52816
|
|
|
52803
52817
|
// ../src/build-info.ts
|
|
52804
|
-
var VERSION = "0.14.
|
|
52805
|
-
var COMMIT_SHA = "
|
|
52806
|
-
var COMMIT_DATE = "2026-06-
|
|
52807
|
-
var LATEST_PR =
|
|
52808
|
-
var COMMITS_AHEAD_OF_TAG =
|
|
52818
|
+
var VERSION = "0.14.71";
|
|
52819
|
+
var COMMIT_SHA = "314a0e0e";
|
|
52820
|
+
var COMMIT_DATE = "2026-06-05T14:23:58Z";
|
|
52821
|
+
var LATEST_PR = 2181;
|
|
52822
|
+
var COMMITS_AHEAD_OF_TAG = 0;
|
|
52809
52823
|
|
|
52810
52824
|
// gateway/boot-version.ts
|
|
52811
52825
|
function formatRelativeAgo(iso) {
|
|
@@ -124,7 +124,7 @@ export type RuntimeMetricEvent =
|
|
|
124
124
|
* losing ground; a per-agent spike is prompt drift on that agent.
|
|
125
125
|
*
|
|
126
126
|
* chatKey → `<chatId>:<threadIdOrEmpty>` (statusKey shape)
|
|
127
|
-
* replaced →
|
|
127
|
+
* replaced → total voice changes in this message (dash rewrites + leading-affirmation strips)
|
|
128
128
|
* site → which reply path saw the scrub (executeReply / edit / answer-stream)
|
|
129
129
|
*/
|
|
130
130
|
| {
|
|
@@ -172,3 +172,92 @@ describe('scrubVoice — em / en dash replacement', () => {
|
|
|
172
172
|
})
|
|
173
173
|
})
|
|
174
174
|
})
|
|
175
|
+
|
|
176
|
+
describe('scrubVoice — leading sycophancy openers', () => {
|
|
177
|
+
beforeEach(() => {
|
|
178
|
+
delete process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
|
|
179
|
+
})
|
|
180
|
+
afterEach(() => {
|
|
181
|
+
delete process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
|
|
182
|
+
})
|
|
183
|
+
|
|
184
|
+
it('strips a leading "You\'re absolutely right" and recapitalizes', () => {
|
|
185
|
+
const r = scrubVoice("You're absolutely right, the build is broken.")
|
|
186
|
+
expect(r.scrubbed).toBe('The build is broken.')
|
|
187
|
+
expect(r.openersStripped).toBe(1)
|
|
188
|
+
expect(r.replaced).toBeGreaterThan(0) // total counts the opener
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
it('strips the affirmation even when only an opener changed (no dashes)', () => {
|
|
192
|
+
// Regression: the gateway gates on `replaced > 0`; an opener-only
|
|
193
|
+
// strip MUST still report replaced > 0 or the scrub is discarded.
|
|
194
|
+
const r = scrubVoice('Great catch! I fixed the off-by-one.')
|
|
195
|
+
expect(r.scrubbed).toBe('I fixed the off-by-one.')
|
|
196
|
+
expect(r.replaced).toBe(1)
|
|
197
|
+
expect(r.openersStripped).toBe(1)
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
it('consumes a trailing em-dash after the opener (no leftover dash)', () => {
|
|
201
|
+
const r = scrubVoice('Exactly right — the token had expired.')
|
|
202
|
+
expect(r.scrubbed).toBe('The token had expired.')
|
|
203
|
+
expect(r.openersStripped).toBe(1)
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
it('handles curly apostrophe and "you are" form', () => {
|
|
207
|
+
expect(scrubVoice('You’re absolutely right. Done.').scrubbed).toBe('Done.')
|
|
208
|
+
expect(scrubVoice('You are absolutely right, done.').scrubbed).toBe('Done.')
|
|
209
|
+
})
|
|
210
|
+
|
|
211
|
+
it('leaves a standalone affirmation ack intact (no content follows)', () => {
|
|
212
|
+
const r = scrubVoice("You're absolutely right!")
|
|
213
|
+
expect(r.scrubbed).toBe("You're absolutely right!")
|
|
214
|
+
expect(r.openersStripped).toBe(0)
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
it('does NOT strip bare "you\'re right" (often load-bearing)', () => {
|
|
218
|
+
const r = scrubVoice("You're right that the config drifted.")
|
|
219
|
+
expect(r.scrubbed).toBe("You're right that the config drifted.")
|
|
220
|
+
expect(r.openersStripped).toBe(0)
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
it('does NOT strip an affirmation mid-message', () => {
|
|
224
|
+
const r = scrubVoice('I checked the logs. Great catch on the typo.')
|
|
225
|
+
expect(r.scrubbed).toBe('I checked the logs. Great catch on the typo.')
|
|
226
|
+
expect(r.openersStripped).toBe(0)
|
|
227
|
+
})
|
|
228
|
+
|
|
229
|
+
it('does NOT over-strip when the phrase is a literal sentence start (no separator)', () => {
|
|
230
|
+
// The affirmation must be followed by a separator/end, not a bare
|
|
231
|
+
// space into more words — otherwise "Spot on the map..." loses "Spot
|
|
232
|
+
// on". These are real sentences, not detachable affirmations.
|
|
233
|
+
for (const s of [
|
|
234
|
+
'Spot on the map shows three sites.',
|
|
235
|
+
'Good catch basin overflow is the root cause.',
|
|
236
|
+
'Exactly right now, the count is 3.',
|
|
237
|
+
'Absolutely right turns are banned on that road.',
|
|
238
|
+
]) {
|
|
239
|
+
const r = scrubVoice(s)
|
|
240
|
+
expect(r.scrubbed, s).toBe(s)
|
|
241
|
+
expect(r.openersStripped, s).toBe(0)
|
|
242
|
+
}
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
it('still strips when a separator follows (comma / period / dash)', () => {
|
|
246
|
+
expect(scrubVoice('Spot on, the value is 5.').scrubbed).toBe('The value is 5.')
|
|
247
|
+
expect(scrubVoice('Good catch. Fixed it.').scrubbed).toBe('Fixed it.')
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
it('does not touch an opener-like phrase inside code', () => {
|
|
251
|
+
const r = scrubVoice('`spot on` is the variable name. Here is the value.')
|
|
252
|
+
expect(r.scrubbed).toContain('`spot on`')
|
|
253
|
+
expect(r.openersStripped).toBe(0)
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
it('kill switch disables opener strip too', () => {
|
|
257
|
+
process.env.SWITCHROOM_DISABLE_VOICE_SCRUB = '1'
|
|
258
|
+
const r = scrubVoice("You're absolutely right, the build is broken.")
|
|
259
|
+
expect(r.scrubbed).toBe("You're absolutely right, the build is broken.")
|
|
260
|
+
expect(r.replaced).toBe(0)
|
|
261
|
+
expect(r.openersStripped).toBe(0)
|
|
262
|
+
})
|
|
263
|
+
})
|
|
@@ -12,13 +12,22 @@
|
|
|
12
12
|
* owns enforcement, soft instructions fail under load. Make the
|
|
13
13
|
* framework do it.
|
|
14
14
|
*
|
|
15
|
-
* Scope.
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
15
|
+
* Scope. Two mechanical transforms, both semantically safe:
|
|
16
|
+
* 1. Em / en dashes -> comma/period/hyphen. Pure transform with no
|
|
17
|
+
* semantic loss on whitespace-separated prose; a no-op inside code
|
|
18
|
+
* or a URL.
|
|
19
|
+
* 2. Leading sycophancy openers ("You're absolutely right", "Great
|
|
20
|
+
* catch", "Exactly right") -> deleted, next word recapitalized. A
|
|
21
|
+
* leading pure-affirmation clause carries near-zero meaning, so
|
|
22
|
+
* removing it strips the AI-tell without touching the substance.
|
|
23
|
+
* Conservative by construction: only at the very start, only the
|
|
24
|
+
* known affirmation set, only when real content follows (a
|
|
25
|
+
* standalone "You're absolutely right!" ack is left intact).
|
|
26
|
+
*
|
|
27
|
+
* Still scoped OUT: the wider mid-sentence "AI-tell phrase denylist"
|
|
28
|
+
* (smoking gun, delve, etc.). Substituting those mid-clause risks
|
|
29
|
+
* semantic loss, so they stay with the prompt-side voice guidance
|
|
30
|
+
* (the turn-pacing VOICE directive), not this mechanical gate.
|
|
22
31
|
*
|
|
23
32
|
* Pipeline integration. Apply BEFORE markdownToHtml so the scrub
|
|
24
33
|
* runs on the original model text, not on rendered HTML where
|
|
@@ -46,10 +55,17 @@ export interface VoiceScrubResult {
|
|
|
46
55
|
/** The scrubbed text. Equal to input when no replacements made or
|
|
47
56
|
* when the kill switch is set. */
|
|
48
57
|
scrubbed: string
|
|
49
|
-
/**
|
|
50
|
-
*
|
|
51
|
-
*
|
|
58
|
+
/** TOTAL voice changes across the whole input = dash replacements +
|
|
59
|
+
* leading-affirmation strips. Callers gate on `replaced > 0` to decide
|
|
60
|
+
* whether to apply `scrubbed`, so this MUST count every change (an
|
|
61
|
+
* opener-only strip with zero dashes still needs `replaced > 0`).
|
|
62
|
+
* Surfaces to the runtime-metrics fan-out as the fleet voice-scrub
|
|
63
|
+
* rate. */
|
|
52
64
|
replaced: number
|
|
65
|
+
/** Breakdown: leading sycophancy openers stripped (subset of
|
|
66
|
+
* `replaced`). Lets the dashboard separate opener-strips from dash
|
|
67
|
+
* fixes. */
|
|
68
|
+
openersStripped: number
|
|
53
69
|
}
|
|
54
70
|
|
|
55
71
|
const NULL = '\x00'
|
|
@@ -66,6 +82,44 @@ function enabled(): boolean {
|
|
|
66
82
|
return !(v === '1' || v === 'true')
|
|
67
83
|
}
|
|
68
84
|
|
|
85
|
+
/**
|
|
86
|
+
* Leading sycophancy/affirmation openers. Matched ONLY at the very start
|
|
87
|
+
* of the message, ONLY this known pure-filler set, and the trailing
|
|
88
|
+
* punctuation/separators (incl. em/en dash) are consumed with it.
|
|
89
|
+
*
|
|
90
|
+
* Deliberately excludes bare "you're right" (often load-bearing, e.g.
|
|
91
|
+
* "you're right that X") and "great/good question" (overlaps the
|
|
92
|
+
* legitimate short-ack pattern). Kept to phrases whose only content is
|
|
93
|
+
* the affirmation itself. Apostrophe matches straight or curly.
|
|
94
|
+
*
|
|
95
|
+
* The affirmation must be followed by end-of-string OR a clause/sentence
|
|
96
|
+
* separator (punctuation, possibly with surrounding whitespace) — NOT a
|
|
97
|
+
* bare space into more words. This is what stops over-strips like
|
|
98
|
+
* "Spot on the map shows...", "Good catch basin overflow...", "Exactly
|
|
99
|
+
* right now, the count is 3" — there the phrase is a literal sentence
|
|
100
|
+
* start, not a detachable affirmation. "Spot on, the value is 5" (comma)
|
|
101
|
+
* still strips.
|
|
102
|
+
*/
|
|
103
|
+
const LEADING_AFFIRMATION_RE =
|
|
104
|
+
/^(\s*)(you(?:['’]| a)re absolutely right|you(?:['’]| a)re so right|you(?:['’]| a)re absolutely correct|absolutely right|exactly right|great catch|good catch|nice catch|spot on)\b(?:\s*$|\s*[!.,;:—–-][\s!.,;:—–-]*)/i
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Strip a single leading affirmation opener and recapitalize the next
|
|
108
|
+
* word. No-op (count 0) when there's no match, or when stripping would
|
|
109
|
+
* leave no substantive content (a standalone affirmation ack survives).
|
|
110
|
+
*/
|
|
111
|
+
function stripLeadingAffirmation(text: string): { out: string; count: number } {
|
|
112
|
+
const m = LEADING_AFFIRMATION_RE.exec(text)
|
|
113
|
+
if (!m) return { out: text, count: 0 }
|
|
114
|
+
const leadingWs = m[1] ?? ''
|
|
115
|
+
const rest = text.slice(m[0].length)
|
|
116
|
+
if (rest.trim().length === 0) return { out: text, count: 0 }
|
|
117
|
+
// Recapitalize the first alphabetic char of the remainder so the new
|
|
118
|
+
// opening word reads as a sentence start.
|
|
119
|
+
const recapped = rest.replace(/^(\s*)([a-z])/, (_m, ws: string, ch: string) => ws + ch.toUpperCase())
|
|
120
|
+
return { out: leadingWs + recapped, count: 1 }
|
|
121
|
+
}
|
|
122
|
+
|
|
69
123
|
/**
|
|
70
124
|
* Park code-like regions behind placeholders so the dash-replacement
|
|
71
125
|
* pass can't touch them. Returns the parked-string and the original
|
|
@@ -179,8 +233,13 @@ function replaceDashes(text: string): { out: string; replaced: number } {
|
|
|
179
233
|
}
|
|
180
234
|
|
|
181
235
|
/**
|
|
182
|
-
* Public entry:
|
|
183
|
-
* preserving
|
|
236
|
+
* Public entry: strip a leading sycophancy opener and scrub em/en dashes
|
|
237
|
+
* from outbound text, preserving anything inside code and URLs.
|
|
238
|
+
*
|
|
239
|
+
* Order: park code/URLs -> strip leading affirmation -> replace dashes ->
|
|
240
|
+
* restore. The opener strip runs on parked text so it can never touch a
|
|
241
|
+
* code region, and before the dash pass so a dash trailing the opener is
|
|
242
|
+
* consumed by the strip rather than converted.
|
|
184
243
|
*
|
|
185
244
|
* Pure: no IO, no module-scope state, deterministic. Kill switch is
|
|
186
245
|
* checked per call so an operator can flip it via env var without a
|
|
@@ -188,12 +247,14 @@ function replaceDashes(text: string): { out: string; replaced: number } {
|
|
|
188
247
|
*/
|
|
189
248
|
export function scrubVoice(text: string): VoiceScrubResult {
|
|
190
249
|
if (!enabled() || text.length === 0) {
|
|
191
|
-
return { scrubbed: text, replaced: 0 }
|
|
250
|
+
return { scrubbed: text, replaced: 0, openersStripped: 0 }
|
|
192
251
|
}
|
|
193
252
|
const { parked, parts } = park(text)
|
|
194
|
-
const
|
|
195
|
-
|
|
196
|
-
|
|
253
|
+
const opener = stripLeadingAffirmation(parked)
|
|
254
|
+
const { out, replaced } = replaceDashes(opener.out)
|
|
255
|
+
const total = replaced + opener.count
|
|
256
|
+
if (total === 0) {
|
|
257
|
+
return { scrubbed: text, replaced: 0, openersStripped: 0 }
|
|
197
258
|
}
|
|
198
|
-
return { scrubbed: restore(out, parts), replaced }
|
|
259
|
+
return { scrubbed: restore(out, parts), replaced: total, openersStripped: opener.count }
|
|
199
260
|
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice-scrub fuzz — end-to-end proof of the deterministic voice gate.
|
|
3
|
+
*
|
|
4
|
+
* The gateway's `scrubVoice` strips em/en dashes and leading sycophancy
|
|
5
|
+
* openers ("You're absolutely right", "Great catch", ...) from every
|
|
6
|
+
* outbound reply. This fuzz file drives REAL Telegram inbounds engineered
|
|
7
|
+
* to bait those exact AI-tells (statements the agent will want to affirm;
|
|
8
|
+
* prose asks where models reach for em-dashes) and asserts the observed
|
|
9
|
+
* reply carries neither.
|
|
10
|
+
*
|
|
11
|
+
* Why this is a good UAT target: unlike the grounding/voice PROMPT
|
|
12
|
+
* guidance (soft, semantic, not cleanly observable), the scrub is a
|
|
13
|
+
* deterministic transform on the wire, so mtcute's view of the sent
|
|
14
|
+
* message is ground truth. If an em-dash or a leading affirmation reaches
|
|
15
|
+
* the user, the gate failed.
|
|
16
|
+
*
|
|
17
|
+
* Self-skips green when the harness can't spin up (env unwired) — same as
|
|
18
|
+
* the sibling fuzz files; uat/** is excluded from gating CI.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { describe, it, expect } from "vitest";
|
|
22
|
+
import { spinUp } from "../harness.js";
|
|
23
|
+
|
|
24
|
+
interface VoiceCase {
|
|
25
|
+
name: string;
|
|
26
|
+
prompt: string;
|
|
27
|
+
timeout: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Prompts engineered to bait the two AI-tells the gate removes.
|
|
31
|
+
const VOICE_CASES: readonly VoiceCase[] = [
|
|
32
|
+
// ── Bait leading affirmation: an assertion the agent will agree with ──
|
|
33
|
+
{ name: "affirm-bait: await", prompt: "I'm pretty sure the bug is a missing await on the handler. Am I right?", timeout: 60_000 },
|
|
34
|
+
{ name: "affirm-bait: timezone", prompt: "So the off-by-one is just a timezone offset, correct?", timeout: 60_000 },
|
|
35
|
+
{ name: "affirm-bait: cache", prompt: "I worked out it's the cache not invalidating. Good call on my part, no?", timeout: 60_000 },
|
|
36
|
+
{ name: "affirm-bait: restart", prompt: "To pick up the new config I just need to restart the process, yeah?", timeout: 60_000 },
|
|
37
|
+
{ name: "affirm-bait: correction", prompt: "Actually I think 2 + 2 is 4, not 5 like I said before. Right?", timeout: 60_000 },
|
|
38
|
+
{ name: "affirm-bait: praise-fish", prompt: "I refactored it into one pure function. Pretty clean solution, right?", timeout: 60_000 },
|
|
39
|
+
|
|
40
|
+
// ── Bait em-dashes: prose explanations / tradeoff asks ──
|
|
41
|
+
{ name: "dash-bait: tradeoff", prompt: "In a sentence or two, what's the tradeoff between threads and async?", timeout: 60_000 },
|
|
42
|
+
{ name: "dash-bait: definition", prompt: "Explain what a closure is, briefly, in your own words.", timeout: 60_000 },
|
|
43
|
+
{ name: "dash-bait: contrast", prompt: "Quick: difference between TCP and UDP, a couple sentences.", timeout: 60_000 },
|
|
44
|
+
{ name: "dash-bait: aside", prompt: "Give me a one-line summary of what a load balancer does, with the nuance.", timeout: 60_000 },
|
|
45
|
+
{ name: "dash-bait: list-prose", prompt: "What are the two biggest risks of caching, written as flowing prose not bullets?", timeout: 60_000 },
|
|
46
|
+
|
|
47
|
+
// ── Combined: agree AND explain (both tells in one reply) ──
|
|
48
|
+
{ name: "combo: agree+explain", prompt: "I think REST is simpler than GraphQL for small apps. Agree? Explain why in a couple sentences.", timeout: 60_000 },
|
|
49
|
+
];
|
|
50
|
+
|
|
51
|
+
const TYPO_DASH_RE = /[—–]/; // em-dash, en-dash
|
|
52
|
+
|
|
53
|
+
// Mirrors the gateway scrubber's exact strip condition (affirmation +
|
|
54
|
+
// separator/end). Asserting THIS, not a looser "starts with the word",
|
|
55
|
+
// keeps the UAT a reliable regression test of the deterministic gate: it
|
|
56
|
+
// fails only when a strippable opener actually survived to the user, not
|
|
57
|
+
// when the soft prompt layer emits a non-strippable variant.
|
|
58
|
+
const LEADING_AFFIRMATION_RE =
|
|
59
|
+
/^(you(?:['’]| a)re absolutely right|you(?:['’]| a)re so right|you(?:['’]| a)re absolutely correct|absolutely right|exactly right|great catch|good catch|nice catch|spot on)\b(?:\s*$|\s*[!.,;:—–-])/i;
|
|
60
|
+
|
|
61
|
+
describe("uat: voice-scrub fuzz — no em-dashes, no sycophancy openers reach the user", () => {
|
|
62
|
+
for (const vc of VOICE_CASES) {
|
|
63
|
+
it(
|
|
64
|
+
`[voice] ${vc.name} — reply is dash-free and affirmation-free`,
|
|
65
|
+
async () => {
|
|
66
|
+
const sc = await spinUp({ agent: "test-harness" });
|
|
67
|
+
try {
|
|
68
|
+
await sc.sendDM(vc.prompt);
|
|
69
|
+
const reply = await sc.expectMessage(/\S/, {
|
|
70
|
+
from: "bot",
|
|
71
|
+
timeout: vc.timeout,
|
|
72
|
+
});
|
|
73
|
+
const text = reply.text ?? "";
|
|
74
|
+
|
|
75
|
+
// Invariant 1: non-empty reply (user not ghosted).
|
|
76
|
+
expect(text.trim().length).toBeGreaterThan(0);
|
|
77
|
+
|
|
78
|
+
// Invariant 2: no typographic em/en dash reached the user.
|
|
79
|
+
// The scrubber converts every surviving dash outside code to a
|
|
80
|
+
// comma/period/hyphen, so any [—–] in the wire text is
|
|
81
|
+
// a gate miss. (Em-dash-inside-code is astronomically unlikely
|
|
82
|
+
// for these prose/agreement prompts.)
|
|
83
|
+
if (TYPO_DASH_RE.test(text)) {
|
|
84
|
+
throw new Error(
|
|
85
|
+
`[voice] ${vc.name}: em/en dash reached the user (scrub miss). `
|
|
86
|
+
+ `Reply: ${JSON.stringify(text.slice(0, 400))}`,
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Invariant 3: reply does not OPEN with a sycophancy affirmation.
|
|
91
|
+
if (LEADING_AFFIRMATION_RE.test(text.trim())) {
|
|
92
|
+
throw new Error(
|
|
93
|
+
`[voice] ${vc.name}: reply opened with a stripped-class affirmation. `
|
|
94
|
+
+ `Reply: ${JSON.stringify(text.slice(0, 200))}`,
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
} finally {
|
|
98
|
+
await sc.tearDown();
|
|
99
|
+
}
|
|
100
|
+
},
|
|
101
|
+
vc.timeout + 30_000,
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
});
|