switchroom 0.13.19 → 0.13.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +2 -2
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +118 -10
- package/telegram-plugin/gateway/gateway.ts +38 -1
- package/telegram-plugin/runtime-metrics.ts +18 -0
- package/telegram-plugin/tests/text-voice-scrub.test.ts +174 -0
- package/telegram-plugin/text-voice-scrub.ts +199 -0
package/dist/cli/switchroom.js
CHANGED
|
@@ -47331,8 +47331,8 @@ var {
|
|
|
47331
47331
|
} = import__.default;
|
|
47332
47332
|
|
|
47333
47333
|
// src/build-info.ts
|
|
47334
|
-
var VERSION = "0.13.
|
|
47335
|
-
var COMMIT_SHA = "
|
|
47334
|
+
var VERSION = "0.13.20";
|
|
47335
|
+
var COMMIT_SHA = "9962efb4";
|
|
47336
47336
|
|
|
47337
47337
|
// src/cli/agent.ts
|
|
47338
47338
|
init_source();
|
package/package.json
CHANGED
|
@@ -40355,6 +40355,90 @@ function getOpenTags(html) {
|
|
|
40355
40355
|
return tagStack;
|
|
40356
40356
|
}
|
|
40357
40357
|
|
|
40358
|
+
// text-voice-scrub.ts
|
|
40359
|
+
var NULL = "\x00";
|
|
40360
|
+
var FENCE_PH = `${NULL}VS_FENCE`;
|
|
40361
|
+
var INLINE_PH = `${NULL}VS_INLINE`;
|
|
40362
|
+
var HTML_CODE_PH = `${NULL}VS_HTMLCODE`;
|
|
40363
|
+
var HTML_PRE_PH = `${NULL}VS_HTMLPRE`;
|
|
40364
|
+
var URL_PH = `${NULL}VS_URL`;
|
|
40365
|
+
var URL_RE = /https?:\/\/\S+/g;
|
|
40366
|
+
function enabled4() {
|
|
40367
|
+
const v = process.env.SWITCHROOM_DISABLE_VOICE_SCRUB;
|
|
40368
|
+
return !(v === "1" || v === "true");
|
|
40369
|
+
}
|
|
40370
|
+
function park(text) {
|
|
40371
|
+
const parts = [];
|
|
40372
|
+
let parked = text;
|
|
40373
|
+
parked = parked.replace(/```[\s\S]*?```/g, (m) => {
|
|
40374
|
+
const idx = parts.length;
|
|
40375
|
+
parts.push({ prefix: FENCE_PH, idx, raw: m });
|
|
40376
|
+
return `${FENCE_PH}${idx}${NULL}`;
|
|
40377
|
+
});
|
|
40378
|
+
parked = parked.replace(/<pre>[\s\S]*?<\/pre>/gi, (m) => {
|
|
40379
|
+
const idx = parts.length;
|
|
40380
|
+
parts.push({ prefix: HTML_PRE_PH, idx, raw: m });
|
|
40381
|
+
return `${HTML_PRE_PH}${idx}${NULL}`;
|
|
40382
|
+
});
|
|
40383
|
+
parked = parked.replace(/<code[^>]*>[\s\S]*?<\/code>/gi, (m) => {
|
|
40384
|
+
const idx = parts.length;
|
|
40385
|
+
parts.push({ prefix: HTML_CODE_PH, idx, raw: m });
|
|
40386
|
+
return `${HTML_CODE_PH}${idx}${NULL}`;
|
|
40387
|
+
});
|
|
40388
|
+
parked = parked.replace(/`[^`\n]+`/g, (m) => {
|
|
40389
|
+
const idx = parts.length;
|
|
40390
|
+
parts.push({ prefix: INLINE_PH, idx, raw: m });
|
|
40391
|
+
return `${INLINE_PH}${idx}${NULL}`;
|
|
40392
|
+
});
|
|
40393
|
+
parked = parked.replace(URL_RE, (m) => {
|
|
40394
|
+
const idx = parts.length;
|
|
40395
|
+
parts.push({ prefix: URL_PH, idx, raw: m });
|
|
40396
|
+
return `${URL_PH}${idx}${NULL}`;
|
|
40397
|
+
});
|
|
40398
|
+
return { parked, parts };
|
|
40399
|
+
}
|
|
40400
|
+
function restore(text, parts) {
|
|
40401
|
+
let restored = text;
|
|
40402
|
+
for (let i = parts.length - 1;i >= 0; i--) {
|
|
40403
|
+
const p = parts[i];
|
|
40404
|
+
restored = restored.replace(`${p.prefix}${p.idx}${NULL}`, () => p.raw);
|
|
40405
|
+
}
|
|
40406
|
+
return restored;
|
|
40407
|
+
}
|
|
40408
|
+
function replaceDashes(text) {
|
|
40409
|
+
let replaced = 0;
|
|
40410
|
+
let out = text;
|
|
40411
|
+
out = out.replace(/(\S) [\u2014\u2013] (\S)/g, (_m, before, after) => {
|
|
40412
|
+
replaced++;
|
|
40413
|
+
const sentenceStart = /[A-Z]/.test(after);
|
|
40414
|
+
return sentenceStart ? `${before}. ${after}` : `${before}, ${after}`;
|
|
40415
|
+
});
|
|
40416
|
+
out = out.replace(/ [\u2014\u2013](\s*\n)/g, (_m, ws) => {
|
|
40417
|
+
replaced++;
|
|
40418
|
+
return `.${ws}`;
|
|
40419
|
+
});
|
|
40420
|
+
out = out.replace(/(\w)[\u2014\u2013](\w)/g, (_m, before, after) => {
|
|
40421
|
+
replaced++;
|
|
40422
|
+
return `${before}, ${after}`;
|
|
40423
|
+
});
|
|
40424
|
+
out = out.replace(/[\u2014\u2013]/g, () => {
|
|
40425
|
+
replaced++;
|
|
40426
|
+
return "-";
|
|
40427
|
+
});
|
|
40428
|
+
return { out, replaced };
|
|
40429
|
+
}
|
|
40430
|
+
function scrubVoice(text) {
|
|
40431
|
+
if (!enabled4() || text.length === 0) {
|
|
40432
|
+
return { scrubbed: text, replaced: 0 };
|
|
40433
|
+
}
|
|
40434
|
+
const { parked, parts } = park(text);
|
|
40435
|
+
const { out, replaced } = replaceDashes(parked);
|
|
40436
|
+
if (replaced === 0) {
|
|
40437
|
+
return { scrubbed: text, replaced: 0 };
|
|
40438
|
+
}
|
|
40439
|
+
return { scrubbed: restore(out, parts), replaced };
|
|
40440
|
+
}
|
|
40441
|
+
|
|
40358
40442
|
// telegram-button-constraints.ts
|
|
40359
40443
|
var TELEGRAM_BUTTON_LIMITS = {
|
|
40360
40444
|
TEXT_MAX: 64,
|
|
@@ -44639,9 +44723,9 @@ function transition(state3, event) {
|
|
|
44639
44723
|
|
|
44640
44724
|
// gateway/inbound-delivery-machine-shadow.ts
|
|
44641
44725
|
var state3 = initialState();
|
|
44642
|
-
var
|
|
44726
|
+
var enabled5 = process.env.SWITCHROOM_DELIVERY_MACHINE_SHADOW !== "0";
|
|
44643
44727
|
function shadowEmit(event) {
|
|
44644
|
-
if (!
|
|
44728
|
+
if (!enabled5)
|
|
44645
44729
|
return [];
|
|
44646
44730
|
try {
|
|
44647
44731
|
const result = transition(state3, event);
|
|
@@ -44699,12 +44783,12 @@ function redeliverBufferedInbound2(buffer, agent, send, spool) {
|
|
|
44699
44783
|
}
|
|
44700
44784
|
|
|
44701
44785
|
// gateway/inbound-delivery-machine-dispatch.ts
|
|
44702
|
-
var
|
|
44786
|
+
var enabled6 = process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== "0";
|
|
44703
44787
|
function isDispatchEnabled() {
|
|
44704
|
-
return
|
|
44788
|
+
return enabled6;
|
|
44705
44789
|
}
|
|
44706
44790
|
function dispatchEffects(effects, ctx) {
|
|
44707
|
-
if (!
|
|
44791
|
+
if (!enabled6)
|
|
44708
44792
|
return;
|
|
44709
44793
|
for (const effect of effects) {
|
|
44710
44794
|
dispatchOne(effect, ctx);
|
|
@@ -48207,10 +48291,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
48207
48291
|
}
|
|
48208
48292
|
|
|
48209
48293
|
// ../src/build-info.ts
|
|
48210
|
-
var VERSION = "0.13.
|
|
48211
|
-
var COMMIT_SHA = "
|
|
48212
|
-
var COMMIT_DATE = "2026-05-
|
|
48213
|
-
var LATEST_PR =
|
|
48294
|
+
var VERSION = "0.13.20";
|
|
48295
|
+
var COMMIT_SHA = "9962efb4";
|
|
48296
|
+
var COMMIT_DATE = "2026-05-23T08:29:36Z";
|
|
48297
|
+
var LATEST_PR = 1684;
|
|
48214
48298
|
var COMMITS_AHEAD_OF_TAG = 0;
|
|
48215
48299
|
|
|
48216
48300
|
// gateway/boot-version.ts
|
|
@@ -50650,6 +50734,18 @@ async function executeReply(args) {
|
|
|
50650
50734
|
if (rawText == null || rawText === "")
|
|
50651
50735
|
throw new Error("reply: text is required and cannot be empty");
|
|
50652
50736
|
let text = repairEscapedWhitespace(rawText);
|
|
50737
|
+
{
|
|
50738
|
+
const scrub = scrubVoice(text);
|
|
50739
|
+
if (scrub.replaced > 0) {
|
|
50740
|
+
text = scrub.scrubbed;
|
|
50741
|
+
emitRuntimeMetric({
|
|
50742
|
+
kind: "voice_scrub_applied",
|
|
50743
|
+
chatKey: statusKey(chat_id, args.message_thread_id != null ? Number(args.message_thread_id) : undefined),
|
|
50744
|
+
replaced: scrub.replaced,
|
|
50745
|
+
site: "reply"
|
|
50746
|
+
});
|
|
50747
|
+
}
|
|
50748
|
+
}
|
|
50653
50749
|
process.stderr.write(`telegram channel: reply: invoked chatId=${chat_id} charCount=${text.length} preview=${JSON.stringify(text.slice(0, 80))}
|
|
50654
50750
|
`);
|
|
50655
50751
|
{
|
|
@@ -51710,7 +51806,19 @@ async function executeEditMessage(args) {
|
|
|
51710
51806
|
const editAccess = loadAccess();
|
|
51711
51807
|
const editConfigMode = editAccess.parseMode ?? "html";
|
|
51712
51808
|
const editFormat = args.format ?? editConfigMode;
|
|
51713
|
-
|
|
51809
|
+
let editRawText = repairEscapedWhitespace(args.text);
|
|
51810
|
+
{
|
|
51811
|
+
const scrub = scrubVoice(editRawText);
|
|
51812
|
+
if (scrub.replaced > 0) {
|
|
51813
|
+
editRawText = scrub.scrubbed;
|
|
51814
|
+
emitRuntimeMetric({
|
|
51815
|
+
kind: "voice_scrub_applied",
|
|
51816
|
+
chatKey: statusKey(args.chat_id, undefined),
|
|
51817
|
+
replaced: scrub.replaced,
|
|
51818
|
+
site: "edit_message"
|
|
51819
|
+
});
|
|
51820
|
+
}
|
|
51821
|
+
}
|
|
51714
51822
|
let editParseMode;
|
|
51715
51823
|
let editText;
|
|
51716
51824
|
if (editFormat === "html") {
|
|
@@ -154,6 +154,7 @@ const SILENT_END_FALLBACK_TEXT =
|
|
|
154
154
|
'⚠️ The agent finished working but didn’t send a reply — your last ' +
|
|
155
155
|
'message may not have been answered. Please try asking again.'
|
|
156
156
|
import { markdownToHtml, splitHtmlChunks, repairEscapedWhitespace, telegramHtmlToPlainText } from '../format.js'
|
|
157
|
+
import { scrubVoice } from '../text-voice-scrub.js'
|
|
157
158
|
import {
|
|
158
159
|
validateInlineKeyboard,
|
|
159
160
|
type AnyButton,
|
|
@@ -4197,6 +4198,26 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
4197
4198
|
const rawText = args.text as string | undefined
|
|
4198
4199
|
if (rawText == null || rawText === '') throw new Error('reply: text is required and cannot be empty')
|
|
4199
4200
|
let text = repairEscapedWhitespace(rawText)
|
|
4201
|
+
// Voice scrub (#1683): replace em / en dashes with commas / periods.
|
|
4202
|
+
// Runs BEFORE outboundDedup so retries see the scrubbed key, and
|
|
4203
|
+
// BEFORE markdownToHtml so code-block content is correctly parked
|
|
4204
|
+
// by the scrubber's own placeholder pass (otherwise the html
|
|
4205
|
+
// converter would have already escaped/parked code, and the scrub
|
|
4206
|
+
// would see only the parked placeholders). Kill switch:
|
|
4207
|
+
// `SWITCHROOM_DISABLE_VOICE_SCRUB=1`.
|
|
4208
|
+
{
|
|
4209
|
+
const scrub = scrubVoice(text)
|
|
4210
|
+
if (scrub.replaced > 0) {
|
|
4211
|
+
text = scrub.scrubbed
|
|
4212
|
+
emitRuntimeMetric({
|
|
4213
|
+
kind: 'voice_scrub_applied',
|
|
4214
|
+
chatKey: statusKey(chat_id, args.message_thread_id != null
|
|
4215
|
+
? Number(args.message_thread_id) : undefined),
|
|
4216
|
+
replaced: scrub.replaced,
|
|
4217
|
+
site: 'reply',
|
|
4218
|
+
})
|
|
4219
|
+
}
|
|
4220
|
+
}
|
|
4200
4221
|
process.stderr.write(`telegram channel: reply: invoked chatId=${chat_id} charCount=${text.length} preview=${JSON.stringify(text.slice(0, 80))}\n`)
|
|
4201
4222
|
|
|
4202
4223
|
// #546 dedup check: was this content just sent via turn-flush or
|
|
@@ -5842,7 +5863,23 @@ async function executeEditMessage(args: Record<string, unknown>): Promise<unknow
|
|
|
5842
5863
|
const editAccess = loadAccess()
|
|
5843
5864
|
const editConfigMode = editAccess.parseMode ?? 'html'
|
|
5844
5865
|
const editFormat = (args.format as string | undefined) ?? editConfigMode
|
|
5845
|
-
|
|
5866
|
+
let editRawText = repairEscapedWhitespace(args.text as string)
|
|
5867
|
+
// Voice scrub (#1683): same em-dash scrub as the reply path. Edits
|
|
5868
|
+
// are how silent-anchor and progress-update mutate already-sent
|
|
5869
|
+
// bubbles, so without this an edit can re-introduce dashes the
|
|
5870
|
+
// original send had scrubbed out.
|
|
5871
|
+
{
|
|
5872
|
+
const scrub = scrubVoice(editRawText)
|
|
5873
|
+
if (scrub.replaced > 0) {
|
|
5874
|
+
editRawText = scrub.scrubbed
|
|
5875
|
+
emitRuntimeMetric({
|
|
5876
|
+
kind: 'voice_scrub_applied',
|
|
5877
|
+
chatKey: statusKey(args.chat_id as string, undefined),
|
|
5878
|
+
replaced: scrub.replaced,
|
|
5879
|
+
site: 'edit_message',
|
|
5880
|
+
})
|
|
5881
|
+
}
|
|
5882
|
+
}
|
|
5846
5883
|
let editParseMode: 'HTML' | 'MarkdownV2' | undefined
|
|
5847
5884
|
let editText: string
|
|
5848
5885
|
if (editFormat === 'html') {
|
|
@@ -142,6 +142,24 @@ export type RuntimeMetricEvent =
|
|
|
142
142
|
key: string
|
|
143
143
|
sinceFirstPingMs: number
|
|
144
144
|
}
|
|
145
|
+
/**
|
|
146
|
+
* Voice scrubber engaged: em / en dashes were rewritten to commas /
|
|
147
|
+
* periods on an outbound reply. Each event is a soft-layer policy
|
|
148
|
+
* violation the framework caught (SOUL.md.hbs "never use em-dashes"
|
|
149
|
+
* is the soft layer, this scrub is the hard layer). Fleet-wide
|
|
150
|
+
* trend over weeks shows whether the soft prompt is gaining or
|
|
151
|
+
* losing ground; a per-agent spike is prompt drift on that agent.
|
|
152
|
+
*
|
|
153
|
+
* chatKey → `<chatId>:<threadIdOrEmpty>` (statusKey shape)
|
|
154
|
+
* replaced → count of dashes rewritten in this single message
|
|
155
|
+
* site → which reply path saw the scrub (executeReply / edit / answer-stream)
|
|
156
|
+
*/
|
|
157
|
+
| {
|
|
158
|
+
kind: 'voice_scrub_applied'
|
|
159
|
+
chatKey: string
|
|
160
|
+
replaced: number
|
|
161
|
+
site: 'reply' | 'edit_message' | 'progress_update' | 'answer_stream'
|
|
162
|
+
}
|
|
145
163
|
|
|
146
164
|
/**
|
|
147
165
|
* The JSONL sink lives under the runtime state dir so it's per-agent
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit suite for #1683 text-voice-scrub.
|
|
3
|
+
*
|
|
4
|
+
* The fleet sample on 2026-05-23 showed 73% of outbound replies
|
|
5
|
+
* shipped at least one em-dash despite the SOUL.md.hbs soft rule.
|
|
6
|
+
* These tests pin the deterministic transform that the framework
|
|
7
|
+
* enforces, including the code/inline/HTML/URL preservation that
|
|
8
|
+
* keeps the scrub from mangling legitimate non-prose contexts.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { afterEach, beforeEach, describe, expect, it } from 'vitest'
|
|
12
|
+
|
|
13
|
+
import { scrubVoice } from '../text-voice-scrub.js'
|
|
14
|
+
|
|
15
|
+
describe('scrubVoice — em / en dash replacement', () => {
|
|
16
|
+
beforeEach(() => {
|
|
17
|
+
delete process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
|
|
18
|
+
})
|
|
19
|
+
afterEach(() => {
|
|
20
|
+
delete process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
describe('mechanical rewrite of spaced dashes', () => {
|
|
24
|
+
it('replaces a spaced em-dash before lowercase with a comma', () => {
|
|
25
|
+
const r = scrubVoice('on it — checking the calendar')
|
|
26
|
+
expect(r.scrubbed).toBe('on it, checking the calendar')
|
|
27
|
+
expect(r.replaced).toBe(1)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('replaces a spaced em-dash before an uppercase letter with a period', () => {
|
|
31
|
+
// The model often writes "Here's the result — Done." style.
|
|
32
|
+
const r = scrubVoice("Here's the result — Done.")
|
|
33
|
+
expect(r.scrubbed).toBe("Here's the result. Done.")
|
|
34
|
+
expect(r.replaced).toBe(1)
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
it('handles multiple em-dashes in one sentence', () => {
|
|
38
|
+
const r = scrubVoice('one — two — three — done')
|
|
39
|
+
expect(r.scrubbed).toBe('one, two, three, done')
|
|
40
|
+
expect(r.replaced).toBe(3)
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
it('treats en-dash (–) identically to em-dash', () => {
|
|
44
|
+
const r = scrubVoice('on it – checking the calendar')
|
|
45
|
+
expect(r.scrubbed).toBe('on it, checking the calendar')
|
|
46
|
+
expect(r.replaced).toBe(1)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
it('replaces unspaced word-dash-word as a comma', () => {
|
|
50
|
+
// Less common but seen in tightly-typed prose.
|
|
51
|
+
const r = scrubVoice('flag—on or flag—off')
|
|
52
|
+
expect(r.scrubbed).toBe('flag, on or flag, off')
|
|
53
|
+
expect(r.replaced).toBe(2)
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
it('replaces end-of-line dashes with a period', () => {
|
|
57
|
+
const r = scrubVoice('thinking out loud —\nnext line here')
|
|
58
|
+
expect(r.scrubbed).toBe('thinking out loud.\nnext line here')
|
|
59
|
+
expect(r.replaced).toBe(1)
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
it('converts a leading-dash sentence-start to ASCII hyphen', () => {
|
|
63
|
+
// Quoted-style or list-bullet em-dash at message start; falls
|
|
64
|
+
// through to the catch-all rule.
|
|
65
|
+
const r = scrubVoice('— note: ship it')
|
|
66
|
+
expect(r.scrubbed).toBe('- note: ship it')
|
|
67
|
+
expect(r.replaced).toBe(1)
|
|
68
|
+
})
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
describe('protected regions are left alone', () => {
|
|
72
|
+
it('preserves dashes inside fenced code blocks', () => {
|
|
73
|
+
const input = 'here is code:\n```bash\nfoo --bar — baz\n```\nand prose — done'
|
|
74
|
+
const r = scrubVoice(input)
|
|
75
|
+
expect(r.scrubbed).toBe(
|
|
76
|
+
'here is code:\n```bash\nfoo --bar — baz\n```\nand prose, done',
|
|
77
|
+
)
|
|
78
|
+
expect(r.replaced).toBe(1)
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it('preserves dashes inside inline code', () => {
|
|
82
|
+
const r = scrubVoice('the flag `--really — keep` matters — yes')
|
|
83
|
+
expect(r.scrubbed).toBe('the flag `--really — keep` matters, yes')
|
|
84
|
+
expect(r.replaced).toBe(1)
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
it('preserves dashes inside <code> HTML tags', () => {
|
|
88
|
+
const r = scrubVoice('see <code>x — y</code> and note — ok')
|
|
89
|
+
expect(r.scrubbed).toBe('see <code>x — y</code> and note, ok')
|
|
90
|
+
expect(r.replaced).toBe(1)
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
it('preserves dashes inside <pre> HTML tags', () => {
|
|
94
|
+
const r = scrubVoice('block:\n<pre>x — y\nz — w</pre>\nend — ok')
|
|
95
|
+
expect(r.scrubbed).toBe('block:\n<pre>x — y\nz — w</pre>\nend, ok')
|
|
96
|
+
expect(r.replaced).toBe(1)
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
it('preserves dashes inside URLs', () => {
|
|
100
|
+
const r = scrubVoice('see https://example.com/a—b for context — ok')
|
|
101
|
+
expect(r.scrubbed).toBe(
|
|
102
|
+
'see https://example.com/a—b for context, ok',
|
|
103
|
+
)
|
|
104
|
+
expect(r.replaced).toBe(1)
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
it('preserves a code block containing markdown that could otherwise match', () => {
|
|
108
|
+
// The placeholder restore must put the original raw fence back,
|
|
109
|
+
// not a transformed copy.
|
|
110
|
+
const fence =
|
|
111
|
+
'```\n# heading — title\nfunction f() {}\n```'
|
|
112
|
+
const r = scrubVoice(fence + '\ntrailing — yes')
|
|
113
|
+
expect(r.scrubbed).toBe(fence + '\ntrailing, yes')
|
|
114
|
+
expect(r.replaced).toBe(1)
|
|
115
|
+
})
|
|
116
|
+
})
|
|
117
|
+
|
|
118
|
+
describe('no-op cases', () => {
|
|
119
|
+
it('returns identity (same string, replaced=0) when input has no dashes', () => {
|
|
120
|
+
const input = 'no dashes anywhere, just commas and periods.'
|
|
121
|
+
const r = scrubVoice(input)
|
|
122
|
+
expect(r.scrubbed).toBe(input)
|
|
123
|
+
expect(r.replaced).toBe(0)
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
it('returns identity when input is empty', () => {
|
|
127
|
+
const r = scrubVoice('')
|
|
128
|
+
expect(r.scrubbed).toBe('')
|
|
129
|
+
expect(r.replaced).toBe(0)
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
it('kill switch (SWITCHROOM_DISABLE_VOICE_SCRUB=1) returns input unchanged', () => {
|
|
133
|
+
process.env.SWITCHROOM_DISABLE_VOICE_SCRUB = '1'
|
|
134
|
+
const r = scrubVoice('on it — checking')
|
|
135
|
+
expect(r.scrubbed).toBe('on it — checking')
|
|
136
|
+
expect(r.replaced).toBe(0)
|
|
137
|
+
})
|
|
138
|
+
|
|
139
|
+
it('kill switch accepts "true" as well as "1"', () => {
|
|
140
|
+
process.env.SWITCHROOM_DISABLE_VOICE_SCRUB = 'true'
|
|
141
|
+
const r = scrubVoice('on it — checking')
|
|
142
|
+
expect(r.scrubbed).toBe('on it — checking')
|
|
143
|
+
expect(r.replaced).toBe(0)
|
|
144
|
+
})
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
describe('realistic fleet samples', () => {
|
|
148
|
+
it('scrubs a multi-step status message', () => {
|
|
149
|
+
const input =
|
|
150
|
+
"I'll check the calendar — should take a few seconds. " +
|
|
151
|
+
'Result: empty for Saturday — nothing scheduled. Anything else?'
|
|
152
|
+
const r = scrubVoice(input)
|
|
153
|
+
expect(r.scrubbed).toBe(
|
|
154
|
+
"I'll check the calendar, should take a few seconds. " +
|
|
155
|
+
'Result: empty for Saturday, nothing scheduled. Anything else?',
|
|
156
|
+
)
|
|
157
|
+
expect(r.replaced).toBe(2)
|
|
158
|
+
})
|
|
159
|
+
|
|
160
|
+
it('mixed prose and code keeps the code untouched', () => {
|
|
161
|
+
const input =
|
|
162
|
+
'Running `git status --short` — looks clean. ' +
|
|
163
|
+
'```\nM file.ts — modified\n```\n' +
|
|
164
|
+
'Ready to commit — go?'
|
|
165
|
+
const r = scrubVoice(input)
|
|
166
|
+
expect(r.scrubbed).toBe(
|
|
167
|
+
'Running `git status --short`, looks clean. ' +
|
|
168
|
+
'```\nM file.ts — modified\n```\n' +
|
|
169
|
+
'Ready to commit, go?',
|
|
170
|
+
)
|
|
171
|
+
expect(r.replaced).toBe(2)
|
|
172
|
+
})
|
|
173
|
+
})
|
|
174
|
+
})
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* text-voice-scrub.ts — deterministic prose-style enforcement at the
|
|
3
|
+
* gateway.
|
|
4
|
+
*
|
|
5
|
+
* Background. Despite three landed soft fixes (SOUL.md.hbs "never use
|
|
6
|
+
* em-dashes" rule, PR #1177 voice consolidation, the /humanizer skill),
|
|
7
|
+
* sampling 2,867 recent fleet outbound replies on 2026-05-23 showed
|
|
8
|
+
* em-dashes still present in 73% of agent messages (3.23 per 1k chars).
|
|
9
|
+
* Soft layer was not winning. The operator's framing is the same one
|
|
10
|
+
* that drove the over-ping safety net (#1674) and the silent-reply
|
|
11
|
+
* auto-edit (#1677): when the model authors voice and the framework
|
|
12
|
+
* owns enforcement, soft instructions fail under load. Make the
|
|
13
|
+
* framework do it.
|
|
14
|
+
*
|
|
15
|
+
* Scope. Em / en dashes only. The wider "AI-tell phrase denylist"
|
|
16
|
+
* (smoking gun, by design, etc.) was scoped OUT after data showed
|
|
17
|
+
* those phrases land in <0.5% of fleet messages and substituting
|
|
18
|
+
* them risks semantic loss. Em-dash → comma/period is a pure
|
|
19
|
+
* mechanical transform with no semantic loss when the surrounding
|
|
20
|
+
* text is whitespace-separated prose, and a no-op when the dash
|
|
21
|
+
* is inside code or a URL.
|
|
22
|
+
*
|
|
23
|
+
* Pipeline integration. Apply BEFORE markdownToHtml so the scrub
|
|
24
|
+
* runs on the original model text, not on rendered HTML where
|
|
25
|
+
* the dash might already be tag-escaped or live inside a parked
|
|
26
|
+
* code-block placeholder. Apply BEFORE outboundDedup.check so
|
|
27
|
+
* dedup keys see the post-scrub content (same text from a retry
|
|
28
|
+
* collapses cleanly).
|
|
29
|
+
*
|
|
30
|
+
* Code-region awareness. The scrubber MUST preserve dashes inside:
|
|
31
|
+
* - fenced code blocks: ```lang\n...\n```
|
|
32
|
+
* - inline code: `...`
|
|
33
|
+
* - explicit Telegram HTML code tags: <code>...</code>, <pre>...</pre>
|
|
34
|
+
* - URLs (rare to contain em-dashes, but technically valid IDN)
|
|
35
|
+
* The strategy is to park each protected region with a sentinel,
|
|
36
|
+
* scrub the rest, then restore. Mirrors the well-trodden
|
|
37
|
+
* markdownToHtml() codeBlocks/inlineCode placeholder pattern at
|
|
38
|
+
* format.ts:254-272.
|
|
39
|
+
*
|
|
40
|
+
* Kill switch. `SWITCHROOM_DISABLE_VOICE_SCRUB=1` returns the input
|
|
41
|
+
* unchanged and reports zero replacements. Same shape every other
|
|
42
|
+
* gateway safety net uses; rollback is one env var + agent restart.
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
export interface VoiceScrubResult {
|
|
46
|
+
/** The scrubbed text. Equal to input when no replacements made or
|
|
47
|
+
* when the kill switch is set. */
|
|
48
|
+
scrubbed: string
|
|
49
|
+
/** Count of dash replacements made across the whole input. Surfaces
|
|
50
|
+
* to the runtime-metrics fan-out so the cadence dashboard can track
|
|
51
|
+
* fleet-wide voice-scrub rate over time. */
|
|
52
|
+
replaced: number
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const NULL = '\x00'
|
|
56
|
+
const FENCE_PH = `${NULL}VS_FENCE`
|
|
57
|
+
const INLINE_PH = `${NULL}VS_INLINE`
|
|
58
|
+
const HTML_CODE_PH = `${NULL}VS_HTMLCODE`
|
|
59
|
+
const HTML_PRE_PH = `${NULL}VS_HTMLPRE`
|
|
60
|
+
const URL_PH = `${NULL}VS_URL`
|
|
61
|
+
|
|
62
|
+
const URL_RE = /https?:\/\/\S+/g
|
|
63
|
+
|
|
64
|
+
function enabled(): boolean {
|
|
65
|
+
const v = process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
|
|
66
|
+
return !(v === '1' || v === 'true')
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Park code-like regions behind placeholders so the dash-replacement
|
|
71
|
+
* pass can't touch them. Returns the parked-string and the original
|
|
72
|
+
* fragments keyed by index.
|
|
73
|
+
*/
|
|
74
|
+
function park(text: string): {
|
|
75
|
+
parked: string
|
|
76
|
+
parts: Array<{ prefix: string; idx: number; raw: string }>
|
|
77
|
+
} {
|
|
78
|
+
const parts: Array<{ prefix: string; idx: number; raw: string }> = []
|
|
79
|
+
let parked = text
|
|
80
|
+
|
|
81
|
+
// Order matters: fenced first (so a ` inside a fence isn't taken
|
|
82
|
+
// as inline-code start), then HTML code tags, then inline backticks,
|
|
83
|
+
// then URLs.
|
|
84
|
+
parked = parked.replace(/```[\s\S]*?```/g, (m) => {
|
|
85
|
+
const idx = parts.length
|
|
86
|
+
parts.push({ prefix: FENCE_PH, idx, raw: m })
|
|
87
|
+
return `${FENCE_PH}${idx}${NULL}`
|
|
88
|
+
})
|
|
89
|
+
parked = parked.replace(/<pre>[\s\S]*?<\/pre>/gi, (m) => {
|
|
90
|
+
const idx = parts.length
|
|
91
|
+
parts.push({ prefix: HTML_PRE_PH, idx, raw: m })
|
|
92
|
+
return `${HTML_PRE_PH}${idx}${NULL}`
|
|
93
|
+
})
|
|
94
|
+
parked = parked.replace(/<code[^>]*>[\s\S]*?<\/code>/gi, (m) => {
|
|
95
|
+
const idx = parts.length
|
|
96
|
+
parts.push({ prefix: HTML_CODE_PH, idx, raw: m })
|
|
97
|
+
return `${HTML_CODE_PH}${idx}${NULL}`
|
|
98
|
+
})
|
|
99
|
+
parked = parked.replace(/`[^`\n]+`/g, (m) => {
|
|
100
|
+
const idx = parts.length
|
|
101
|
+
parts.push({ prefix: INLINE_PH, idx, raw: m })
|
|
102
|
+
return `${INLINE_PH}${idx}${NULL}`
|
|
103
|
+
})
|
|
104
|
+
parked = parked.replace(URL_RE, (m) => {
|
|
105
|
+
const idx = parts.length
|
|
106
|
+
parts.push({ prefix: URL_PH, idx, raw: m })
|
|
107
|
+
return `${URL_PH}${idx}${NULL}`
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
return { parked, parts }
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function restore(
|
|
114
|
+
text: string,
|
|
115
|
+
parts: Array<{ prefix: string; idx: number; raw: string }>,
|
|
116
|
+
): string {
|
|
117
|
+
let restored = text
|
|
118
|
+
// Restore in reverse-insertion order so a placeholder accidentally
|
|
119
|
+
// emitted by a nested replacement gets the right raw region.
|
|
120
|
+
for (let i = parts.length - 1; i >= 0; i--) {
|
|
121
|
+
const p = parts[i]!
|
|
122
|
+
restored = restored.replace(`${p.prefix}${p.idx}${NULL}`, () => p.raw)
|
|
123
|
+
}
|
|
124
|
+
return restored
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Replace em / en dashes with context-appropriate punctuation.
|
|
129
|
+
*
|
|
130
|
+
* Rules, applied in order:
|
|
131
|
+
* 1. ` — ` / ` – ` (flanked by single space) → `, ` if followed by a
|
|
132
|
+
* lowercase or open-paren character; otherwise `. ` if followed by
|
|
133
|
+
* an uppercase or end-of-string. Heuristic: lowercase = mid-clause
|
|
134
|
+
* continuation (comma reads naturally); uppercase = new sentence
|
|
135
|
+
* (period reads naturally).
|
|
136
|
+
* 2. End-of-line dash (` —\n` / ` –\n`) → `.\n` — treat as full stop.
|
|
137
|
+
* 3. Bare dash with no flanking spaces between word chars
|
|
138
|
+
* (e.g. "word—word") → `, ` — the missing-space form is rarer but
|
|
139
|
+
* semantically the same as #1.
|
|
140
|
+
* 4. Surviving dash (uncommon, e.g. at sentence start "— note") → `-`
|
|
141
|
+
* so the message still renders without the AI tell.
|
|
142
|
+
*/
|
|
143
|
+
function replaceDashes(text: string): { out: string; replaced: number } {
|
|
144
|
+
let replaced = 0
|
|
145
|
+
let out = text
|
|
146
|
+
|
|
147
|
+
// #1: spaced em-dash mid-prose. Decide between ", " and ". " on
|
|
148
|
+
// the leading character of the following token.
|
|
149
|
+
out = out.replace(/(\S) [—–] (\S)/g, (_m, before: string, after: string) => {
|
|
150
|
+
replaced++
|
|
151
|
+
// If `after` is uppercase ASCII or one of a known sentence-starter
|
|
152
|
+
// set, treat as new sentence; otherwise a parenthetical comma.
|
|
153
|
+
const sentenceStart = /[A-Z]/.test(after)
|
|
154
|
+
return sentenceStart ? `${before}. ${after}` : `${before}, ${after}`
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
// #2: dash at end of line. Treat as full stop.
|
|
158
|
+
out = out.replace(/ [—–](\s*\n)/g, (_m, ws: string) => {
|
|
159
|
+
replaced++
|
|
160
|
+
return `.${ws}`
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
// #3: bare dash between word chars (no flanking spaces). Treat as
|
|
164
|
+
// missing-space form of #1; comma is the safe fallback.
|
|
165
|
+
out = out.replace(/(\w)[—–](\w)/g, (_m, before: string, after: string) => {
|
|
166
|
+
replaced++
|
|
167
|
+
return `${before}, ${after}`
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
// #4: anything still standing — convert to ASCII hyphen so no
|
|
171
|
+
// typographic dash escapes the gate. Rare path; covers leading
|
|
172
|
+
// "— note" / quoted dash / etc.
|
|
173
|
+
out = out.replace(/[—–]/g, () => {
|
|
174
|
+
replaced++
|
|
175
|
+
return '-'
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
return { out, replaced }
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Public entry: scrub em / en dashes from outbound text while
|
|
183
|
+
* preserving dashes inside code and URLs.
|
|
184
|
+
*
|
|
185
|
+
* Pure: no IO, no module-scope state, deterministic. Kill switch is
|
|
186
|
+
* checked per call so an operator can flip it via env var without a
|
|
187
|
+
* restart of an in-process test.
|
|
188
|
+
*/
|
|
189
|
+
export function scrubVoice(text: string): VoiceScrubResult {
|
|
190
|
+
if (!enabled() || text.length === 0) {
|
|
191
|
+
return { scrubbed: text, replaced: 0 }
|
|
192
|
+
}
|
|
193
|
+
const { parked, parts } = park(text)
|
|
194
|
+
const { out, replaced } = replaceDashes(parked)
|
|
195
|
+
if (replaced === 0) {
|
|
196
|
+
return { scrubbed: text, replaced: 0 }
|
|
197
|
+
}
|
|
198
|
+
return { scrubbed: restore(out, parts), replaced }
|
|
199
|
+
}
|