switchroom 0.13.19 → 0.13.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47331,8 +47331,8 @@ var {
47331
47331
  } = import__.default;
47332
47332
 
47333
47333
  // src/build-info.ts
47334
- var VERSION = "0.13.19";
47335
- var COMMIT_SHA = "de154395";
47334
+ var VERSION = "0.13.20";
47335
+ var COMMIT_SHA = "9962efb4";
47336
47336
 
47337
47337
  // src/cli/agent.ts
47338
47338
  init_source();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "switchroom",
3
- "version": "0.13.19",
3
+ "version": "0.13.20",
4
4
  "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -40355,6 +40355,90 @@ function getOpenTags(html) {
40355
40355
  return tagStack;
40356
40356
  }
40357
40357
 
40358
+ // text-voice-scrub.ts
40359
+ var NULL = "\x00";
40360
+ var FENCE_PH = `${NULL}VS_FENCE`;
40361
+ var INLINE_PH = `${NULL}VS_INLINE`;
40362
+ var HTML_CODE_PH = `${NULL}VS_HTMLCODE`;
40363
+ var HTML_PRE_PH = `${NULL}VS_HTMLPRE`;
40364
+ var URL_PH = `${NULL}VS_URL`;
40365
+ var URL_RE = /https?:\/\/\S+/g;
40366
+ function enabled4() {
40367
+ const v = process.env.SWITCHROOM_DISABLE_VOICE_SCRUB;
40368
+ return !(v === "1" || v === "true");
40369
+ }
40370
+ function park(text) {
40371
+ const parts = [];
40372
+ let parked = text;
40373
+ parked = parked.replace(/```[\s\S]*?```/g, (m) => {
40374
+ const idx = parts.length;
40375
+ parts.push({ prefix: FENCE_PH, idx, raw: m });
40376
+ return `${FENCE_PH}${idx}${NULL}`;
40377
+ });
40378
+ parked = parked.replace(/<pre>[\s\S]*?<\/pre>/gi, (m) => {
40379
+ const idx = parts.length;
40380
+ parts.push({ prefix: HTML_PRE_PH, idx, raw: m });
40381
+ return `${HTML_PRE_PH}${idx}${NULL}`;
40382
+ });
40383
+ parked = parked.replace(/<code[^>]*>[\s\S]*?<\/code>/gi, (m) => {
40384
+ const idx = parts.length;
40385
+ parts.push({ prefix: HTML_CODE_PH, idx, raw: m });
40386
+ return `${HTML_CODE_PH}${idx}${NULL}`;
40387
+ });
40388
+ parked = parked.replace(/`[^`\n]+`/g, (m) => {
40389
+ const idx = parts.length;
40390
+ parts.push({ prefix: INLINE_PH, idx, raw: m });
40391
+ return `${INLINE_PH}${idx}${NULL}`;
40392
+ });
40393
+ parked = parked.replace(URL_RE, (m) => {
40394
+ const idx = parts.length;
40395
+ parts.push({ prefix: URL_PH, idx, raw: m });
40396
+ return `${URL_PH}${idx}${NULL}`;
40397
+ });
40398
+ return { parked, parts };
40399
+ }
40400
+ function restore(text, parts) {
40401
+ let restored = text;
40402
+ for (let i = parts.length - 1;i >= 0; i--) {
40403
+ const p = parts[i];
40404
+ restored = restored.replace(`${p.prefix}${p.idx}${NULL}`, () => p.raw);
40405
+ }
40406
+ return restored;
40407
+ }
40408
+ function replaceDashes(text) {
40409
+ let replaced = 0;
40410
+ let out = text;
40411
+ out = out.replace(/(\S) [\u2014\u2013] (\S)/g, (_m, before, after) => {
40412
+ replaced++;
40413
+ const sentenceStart = /[A-Z]/.test(after);
40414
+ return sentenceStart ? `${before}. ${after}` : `${before}, ${after}`;
40415
+ });
40416
+ out = out.replace(/ [\u2014\u2013](\s*\n)/g, (_m, ws) => {
40417
+ replaced++;
40418
+ return `.${ws}`;
40419
+ });
40420
+ out = out.replace(/(\w)[\u2014\u2013](\w)/g, (_m, before, after) => {
40421
+ replaced++;
40422
+ return `${before}, ${after}`;
40423
+ });
40424
+ out = out.replace(/[\u2014\u2013]/g, () => {
40425
+ replaced++;
40426
+ return "-";
40427
+ });
40428
+ return { out, replaced };
40429
+ }
40430
+ function scrubVoice(text) {
40431
+ if (!enabled4() || text.length === 0) {
40432
+ return { scrubbed: text, replaced: 0 };
40433
+ }
40434
+ const { parked, parts } = park(text);
40435
+ const { out, replaced } = replaceDashes(parked);
40436
+ if (replaced === 0) {
40437
+ return { scrubbed: text, replaced: 0 };
40438
+ }
40439
+ return { scrubbed: restore(out, parts), replaced };
40440
+ }
40441
+
40358
40442
  // telegram-button-constraints.ts
40359
40443
  var TELEGRAM_BUTTON_LIMITS = {
40360
40444
  TEXT_MAX: 64,
@@ -44639,9 +44723,9 @@ function transition(state3, event) {
44639
44723
 
44640
44724
  // gateway/inbound-delivery-machine-shadow.ts
44641
44725
  var state3 = initialState();
44642
- var enabled4 = process.env.SWITCHROOM_DELIVERY_MACHINE_SHADOW !== "0";
44726
+ var enabled5 = process.env.SWITCHROOM_DELIVERY_MACHINE_SHADOW !== "0";
44643
44727
  function shadowEmit(event) {
44644
- if (!enabled4)
44728
+ if (!enabled5)
44645
44729
  return [];
44646
44730
  try {
44647
44731
  const result = transition(state3, event);
@@ -44699,12 +44783,12 @@ function redeliverBufferedInbound2(buffer, agent, send, spool) {
44699
44783
  }
44700
44784
 
44701
44785
  // gateway/inbound-delivery-machine-dispatch.ts
44702
- var enabled5 = process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== "0";
44786
+ var enabled6 = process.env.SWITCHROOM_DELIVERY_MACHINE_CUTOVER !== "0";
44703
44787
  function isDispatchEnabled() {
44704
- return enabled5;
44788
+ return enabled6;
44705
44789
  }
44706
44790
  function dispatchEffects(effects, ctx) {
44707
- if (!enabled5)
44791
+ if (!enabled6)
44708
44792
  return;
44709
44793
  for (const effect of effects) {
44710
44794
  dispatchOne(effect, ctx);
@@ -48207,10 +48291,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
48207
48291
  }
48208
48292
 
48209
48293
  // ../src/build-info.ts
48210
- var VERSION = "0.13.19";
48211
- var COMMIT_SHA = "de154395";
48212
- var COMMIT_DATE = "2026-05-23T07:08:03Z";
48213
- var LATEST_PR = 1682;
48294
+ var VERSION = "0.13.20";
48295
+ var COMMIT_SHA = "9962efb4";
48296
+ var COMMIT_DATE = "2026-05-23T08:29:36Z";
48297
+ var LATEST_PR = 1684;
48214
48298
  var COMMITS_AHEAD_OF_TAG = 0;
48215
48299
 
48216
48300
  // gateway/boot-version.ts
@@ -50650,6 +50734,18 @@ async function executeReply(args) {
50650
50734
  if (rawText == null || rawText === "")
50651
50735
  throw new Error("reply: text is required and cannot be empty");
50652
50736
  let text = repairEscapedWhitespace(rawText);
50737
+ {
50738
+ const scrub = scrubVoice(text);
50739
+ if (scrub.replaced > 0) {
50740
+ text = scrub.scrubbed;
50741
+ emitRuntimeMetric({
50742
+ kind: "voice_scrub_applied",
50743
+ chatKey: statusKey(chat_id, args.message_thread_id != null ? Number(args.message_thread_id) : undefined),
50744
+ replaced: scrub.replaced,
50745
+ site: "reply"
50746
+ });
50747
+ }
50748
+ }
50653
50749
  process.stderr.write(`telegram channel: reply: invoked chatId=${chat_id} charCount=${text.length} preview=${JSON.stringify(text.slice(0, 80))}
50654
50750
  `);
50655
50751
  {
@@ -51710,7 +51806,19 @@ async function executeEditMessage(args) {
51710
51806
  const editAccess = loadAccess();
51711
51807
  const editConfigMode = editAccess.parseMode ?? "html";
51712
51808
  const editFormat = args.format ?? editConfigMode;
51713
- const editRawText = repairEscapedWhitespace(args.text);
51809
+ let editRawText = repairEscapedWhitespace(args.text);
51810
+ {
51811
+ const scrub = scrubVoice(editRawText);
51812
+ if (scrub.replaced > 0) {
51813
+ editRawText = scrub.scrubbed;
51814
+ emitRuntimeMetric({
51815
+ kind: "voice_scrub_applied",
51816
+ chatKey: statusKey(args.chat_id, undefined),
51817
+ replaced: scrub.replaced,
51818
+ site: "edit_message"
51819
+ });
51820
+ }
51821
+ }
51714
51822
  let editParseMode;
51715
51823
  let editText;
51716
51824
  if (editFormat === "html") {
@@ -154,6 +154,7 @@ const SILENT_END_FALLBACK_TEXT =
154
154
  '⚠️ The agent finished working but didn’t send a reply — your last ' +
155
155
  'message may not have been answered. Please try asking again.'
156
156
  import { markdownToHtml, splitHtmlChunks, repairEscapedWhitespace, telegramHtmlToPlainText } from '../format.js'
157
+ import { scrubVoice } from '../text-voice-scrub.js'
157
158
  import {
158
159
  validateInlineKeyboard,
159
160
  type AnyButton,
@@ -4197,6 +4198,26 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
4197
4198
  const rawText = args.text as string | undefined
4198
4199
  if (rawText == null || rawText === '') throw new Error('reply: text is required and cannot be empty')
4199
4200
  let text = repairEscapedWhitespace(rawText)
4201
+ // Voice scrub (#1683): replace em / en dashes with commas / periods.
4202
+ // Runs BEFORE outboundDedup so retries see the scrubbed key, and
4203
+ // BEFORE markdownToHtml so code-block content is correctly parked
4204
+ // by the scrubber's own placeholder pass (otherwise the html
4205
+ // converter would have already escaped/parked code, and the scrub
4206
+ // would see only the parked placeholders). Kill switch:
4207
+ // `SWITCHROOM_DISABLE_VOICE_SCRUB=1`.
4208
+ {
4209
+ const scrub = scrubVoice(text)
4210
+ if (scrub.replaced > 0) {
4211
+ text = scrub.scrubbed
4212
+ emitRuntimeMetric({
4213
+ kind: 'voice_scrub_applied',
4214
+ chatKey: statusKey(chat_id, args.message_thread_id != null
4215
+ ? Number(args.message_thread_id) : undefined),
4216
+ replaced: scrub.replaced,
4217
+ site: 'reply',
4218
+ })
4219
+ }
4220
+ }
4200
4221
  process.stderr.write(`telegram channel: reply: invoked chatId=${chat_id} charCount=${text.length} preview=${JSON.stringify(text.slice(0, 80))}\n`)
4201
4222
 
4202
4223
  // #546 dedup check: was this content just sent via turn-flush or
@@ -5842,7 +5863,23 @@ async function executeEditMessage(args: Record<string, unknown>): Promise<unknow
5842
5863
  const editAccess = loadAccess()
5843
5864
  const editConfigMode = editAccess.parseMode ?? 'html'
5844
5865
  const editFormat = (args.format as string | undefined) ?? editConfigMode
5845
- const editRawText = repairEscapedWhitespace(args.text as string)
5866
+ let editRawText = repairEscapedWhitespace(args.text as string)
5867
+ // Voice scrub (#1683): same em-dash scrub as the reply path. Edits
5868
+ // are how silent-anchor and progress-update mutate already-sent
5869
+ // bubbles, so without this an edit can re-introduce dashes the
5870
+ // original send had scrubbed out.
5871
+ {
5872
+ const scrub = scrubVoice(editRawText)
5873
+ if (scrub.replaced > 0) {
5874
+ editRawText = scrub.scrubbed
5875
+ emitRuntimeMetric({
5876
+ kind: 'voice_scrub_applied',
5877
+ chatKey: statusKey(args.chat_id as string, undefined),
5878
+ replaced: scrub.replaced,
5879
+ site: 'edit_message',
5880
+ })
5881
+ }
5882
+ }
5846
5883
  let editParseMode: 'HTML' | 'MarkdownV2' | undefined
5847
5884
  let editText: string
5848
5885
  if (editFormat === 'html') {
@@ -142,6 +142,24 @@ export type RuntimeMetricEvent =
142
142
  key: string
143
143
  sinceFirstPingMs: number
144
144
  }
145
+ /**
146
+ * Voice scrubber engaged: em / en dashes were rewritten to commas /
147
+ * periods on an outbound reply. Each event is a soft-layer policy
148
+ * violation the framework caught (SOUL.md.hbs "never use em-dashes"
149
+ * is the soft layer, this scrub is the hard layer). Fleet-wide
150
+ * trend over weeks shows whether the soft prompt is gaining or
151
+ * losing ground; a per-agent spike is prompt drift on that agent.
152
+ *
153
+ * chatKey → `<chatId>:<threadIdOrEmpty>` (statusKey shape)
154
+ * replaced → count of dashes rewritten in this single message
155
+ * site → which reply path saw the scrub (executeReply / edit / answer-stream)
156
+ */
157
+ | {
158
+ kind: 'voice_scrub_applied'
159
+ chatKey: string
160
+ replaced: number
161
+ site: 'reply' | 'edit_message' | 'progress_update' | 'answer_stream'
162
+ }
145
163
 
146
164
  /**
147
165
  * The JSONL sink lives under the runtime state dir so it's per-agent
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Unit suite for #1683 text-voice-scrub.
3
+ *
4
+ * The fleet sample on 2026-05-23 showed 73% of outbound replies
5
+ * shipped at least one em-dash despite the SOUL.md.hbs soft rule.
6
+ * These tests pin the deterministic transform that the framework
7
+ * enforces, including the code/inline/HTML/URL preservation that
8
+ * keeps the scrub from mangling legitimate non-prose contexts.
9
+ */
10
+
11
+ import { afterEach, beforeEach, describe, expect, it } from 'vitest'
12
+
13
+ import { scrubVoice } from '../text-voice-scrub.js'
14
+
15
+ describe('scrubVoice — em / en dash replacement', () => {
16
+ beforeEach(() => {
17
+ delete process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
18
+ })
19
+ afterEach(() => {
20
+ delete process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
21
+ })
22
+
23
+ describe('mechanical rewrite of spaced dashes', () => {
24
+ it('replaces a spaced em-dash before lowercase with a comma', () => {
25
+ const r = scrubVoice('on it — checking the calendar')
26
+ expect(r.scrubbed).toBe('on it, checking the calendar')
27
+ expect(r.replaced).toBe(1)
28
+ })
29
+
30
+ it('replaces a spaced em-dash before an uppercase letter with a period', () => {
31
+ // The model often writes "Here's the result — Done." style.
32
+ const r = scrubVoice("Here's the result — Done.")
33
+ expect(r.scrubbed).toBe("Here's the result. Done.")
34
+ expect(r.replaced).toBe(1)
35
+ })
36
+
37
+ it('handles multiple em-dashes in one sentence', () => {
38
+ const r = scrubVoice('one — two — three — done')
39
+ expect(r.scrubbed).toBe('one, two, three, done')
40
+ expect(r.replaced).toBe(3)
41
+ })
42
+
43
+ it('treats en-dash (–) identically to em-dash', () => {
44
+ const r = scrubVoice('on it – checking the calendar')
45
+ expect(r.scrubbed).toBe('on it, checking the calendar')
46
+ expect(r.replaced).toBe(1)
47
+ })
48
+
49
+ it('replaces unspaced word-dash-word as a comma', () => {
50
+ // Less common but seen in tightly-typed prose.
51
+ const r = scrubVoice('flag—on or flag—off')
52
+ expect(r.scrubbed).toBe('flag, on or flag, off')
53
+ expect(r.replaced).toBe(2)
54
+ })
55
+
56
+ it('replaces end-of-line dashes with a period', () => {
57
+ const r = scrubVoice('thinking out loud —\nnext line here')
58
+ expect(r.scrubbed).toBe('thinking out loud.\nnext line here')
59
+ expect(r.replaced).toBe(1)
60
+ })
61
+
62
+ it('converts a leading-dash sentence-start to ASCII hyphen', () => {
63
+ // Quoted-style or list-bullet em-dash at message start; falls
64
+ // through to the catch-all rule.
65
+ const r = scrubVoice('— note: ship it')
66
+ expect(r.scrubbed).toBe('- note: ship it')
67
+ expect(r.replaced).toBe(1)
68
+ })
69
+ })
70
+
71
+ describe('protected regions are left alone', () => {
72
+ it('preserves dashes inside fenced code blocks', () => {
73
+ const input = 'here is code:\n```bash\nfoo --bar — baz\n```\nand prose — done'
74
+ const r = scrubVoice(input)
75
+ expect(r.scrubbed).toBe(
76
+ 'here is code:\n```bash\nfoo --bar — baz\n```\nand prose, done',
77
+ )
78
+ expect(r.replaced).toBe(1)
79
+ })
80
+
81
+ it('preserves dashes inside inline code', () => {
82
+ const r = scrubVoice('the flag `--really — keep` matters — yes')
83
+ expect(r.scrubbed).toBe('the flag `--really — keep` matters, yes')
84
+ expect(r.replaced).toBe(1)
85
+ })
86
+
87
+ it('preserves dashes inside <code> HTML tags', () => {
88
+ const r = scrubVoice('see <code>x — y</code> and note — ok')
89
+ expect(r.scrubbed).toBe('see <code>x — y</code> and note, ok')
90
+ expect(r.replaced).toBe(1)
91
+ })
92
+
93
+ it('preserves dashes inside <pre> HTML tags', () => {
94
+ const r = scrubVoice('block:\n<pre>x — y\nz — w</pre>\nend — ok')
95
+ expect(r.scrubbed).toBe('block:\n<pre>x — y\nz — w</pre>\nend, ok')
96
+ expect(r.replaced).toBe(1)
97
+ })
98
+
99
+ it('preserves dashes inside URLs', () => {
100
+ const r = scrubVoice('see https://example.com/a—b for context — ok')
101
+ expect(r.scrubbed).toBe(
102
+ 'see https://example.com/a—b for context, ok',
103
+ )
104
+ expect(r.replaced).toBe(1)
105
+ })
106
+
107
+ it('preserves a code block containing markdown that could otherwise match', () => {
108
+ // The placeholder restore must put the original raw fence back,
109
+ // not a transformed copy.
110
+ const fence =
111
+ '```\n# heading — title\nfunction f() {}\n```'
112
+ const r = scrubVoice(fence + '\ntrailing — yes')
113
+ expect(r.scrubbed).toBe(fence + '\ntrailing, yes')
114
+ expect(r.replaced).toBe(1)
115
+ })
116
+ })
117
+
118
+ describe('no-op cases', () => {
119
+ it('returns identity (same string, replaced=0) when input has no dashes', () => {
120
+ const input = 'no dashes anywhere, just commas and periods.'
121
+ const r = scrubVoice(input)
122
+ expect(r.scrubbed).toBe(input)
123
+ expect(r.replaced).toBe(0)
124
+ })
125
+
126
+ it('returns identity when input is empty', () => {
127
+ const r = scrubVoice('')
128
+ expect(r.scrubbed).toBe('')
129
+ expect(r.replaced).toBe(0)
130
+ })
131
+
132
+ it('kill switch (SWITCHROOM_DISABLE_VOICE_SCRUB=1) returns input unchanged', () => {
133
+ process.env.SWITCHROOM_DISABLE_VOICE_SCRUB = '1'
134
+ const r = scrubVoice('on it — checking')
135
+ expect(r.scrubbed).toBe('on it — checking')
136
+ expect(r.replaced).toBe(0)
137
+ })
138
+
139
+ it('kill switch accepts "true" as well as "1"', () => {
140
+ process.env.SWITCHROOM_DISABLE_VOICE_SCRUB = 'true'
141
+ const r = scrubVoice('on it — checking')
142
+ expect(r.scrubbed).toBe('on it — checking')
143
+ expect(r.replaced).toBe(0)
144
+ })
145
+ })
146
+
147
+ describe('realistic fleet samples', () => {
148
+ it('scrubs a multi-step status message', () => {
149
+ const input =
150
+ "I'll check the calendar — should take a few seconds. " +
151
+ 'Result: empty for Saturday — nothing scheduled. Anything else?'
152
+ const r = scrubVoice(input)
153
+ expect(r.scrubbed).toBe(
154
+ "I'll check the calendar, should take a few seconds. " +
155
+ 'Result: empty for Saturday, nothing scheduled. Anything else?',
156
+ )
157
+ expect(r.replaced).toBe(2)
158
+ })
159
+
160
+ it('mixed prose and code keeps the code untouched', () => {
161
+ const input =
162
+ 'Running `git status --short` — looks clean. ' +
163
+ '```\nM file.ts — modified\n```\n' +
164
+ 'Ready to commit — go?'
165
+ const r = scrubVoice(input)
166
+ expect(r.scrubbed).toBe(
167
+ 'Running `git status --short`, looks clean. ' +
168
+ '```\nM file.ts — modified\n```\n' +
169
+ 'Ready to commit, go?',
170
+ )
171
+ expect(r.replaced).toBe(2)
172
+ })
173
+ })
174
+ })
@@ -0,0 +1,199 @@
1
+ /**
2
+ * text-voice-scrub.ts — deterministic prose-style enforcement at the
3
+ * gateway.
4
+ *
5
+ * Background. Despite three landed soft fixes (SOUL.md.hbs "never use
6
+ * em-dashes" rule, PR #1177 voice consolidation, the /humanizer skill),
7
+ * sampling 2,867 recent fleet outbound replies on 2026-05-23 showed
8
+ * em-dashes still present in 73% of agent messages (3.23 per 1k chars).
9
+ * Soft layer was not winning. The operator's framing is the same one
10
+ * that drove the over-ping safety net (#1674) and the silent-reply
11
+ * auto-edit (#1677): when the model authors voice and the framework
12
+ * owns enforcement, soft instructions fail under load. Make the
13
+ * framework do it.
14
+ *
15
+ * Scope. Em / en dashes only. The wider "AI-tell phrase denylist"
16
+ * (smoking gun, by design, etc.) was scoped OUT after data showed
17
+ * those phrases land in <0.5% of fleet messages and substituting
18
+ * them risks semantic loss. Em-dash → comma/period is a pure
19
+ * mechanical transform with no semantic loss when the surrounding
20
+ * text is whitespace-separated prose, and a no-op when the dash
21
+ * is inside code or a URL.
22
+ *
23
+ * Pipeline integration. Apply BEFORE markdownToHtml so the scrub
24
+ * runs on the original model text, not on rendered HTML where
25
+ * the dash might already be tag-escaped or live inside a parked
26
+ * code-block placeholder. Apply BEFORE outboundDedup.check so
27
+ * dedup keys see the post-scrub content (same text from a retry
28
+ * collapses cleanly).
29
+ *
30
+ * Code-region awareness. The scrubber MUST preserve dashes inside:
31
+ * - fenced code blocks: ```lang\n...\n```
32
+ * - inline code: `...`
33
+ * - explicit Telegram HTML code tags: <code>...</code>, <pre>...</pre>
34
+ * - URLs (rare to contain em-dashes, but technically valid IDN)
35
+ * The strategy is to park each protected region with a sentinel,
36
+ * scrub the rest, then restore. Mirrors the well-trodden
37
+ * markdownToHtml() codeBlocks/inlineCode placeholder pattern at
38
+ * format.ts:254-272.
39
+ *
40
+ * Kill switch. `SWITCHROOM_DISABLE_VOICE_SCRUB=1` returns the input
41
+ * unchanged and reports zero replacements. Same shape every other
42
+ * gateway safety net uses; rollback is one env var + agent restart.
43
+ */
44
+
45
+ export interface VoiceScrubResult {
46
+ /** The scrubbed text. Equal to input when no replacements made or
47
+ * when the kill switch is set. */
48
+ scrubbed: string
49
+ /** Count of dash replacements made across the whole input. Surfaces
50
+ * to the runtime-metrics fan-out so the cadence dashboard can track
51
+ * fleet-wide voice-scrub rate over time. */
52
+ replaced: number
53
+ }
54
+
55
+ const NULL = '\x00'
56
+ const FENCE_PH = `${NULL}VS_FENCE`
57
+ const INLINE_PH = `${NULL}VS_INLINE`
58
+ const HTML_CODE_PH = `${NULL}VS_HTMLCODE`
59
+ const HTML_PRE_PH = `${NULL}VS_HTMLPRE`
60
+ const URL_PH = `${NULL}VS_URL`
61
+
62
+ const URL_RE = /https?:\/\/\S+/g
63
+
64
+ function enabled(): boolean {
65
+ const v = process.env.SWITCHROOM_DISABLE_VOICE_SCRUB
66
+ return !(v === '1' || v === 'true')
67
+ }
68
+
69
+ /**
70
+ * Park code-like regions behind placeholders so the dash-replacement
71
+ * pass can't touch them. Returns the parked-string and the original
72
+ * fragments keyed by index.
73
+ */
74
+ function park(text: string): {
75
+ parked: string
76
+ parts: Array<{ prefix: string; idx: number; raw: string }>
77
+ } {
78
+ const parts: Array<{ prefix: string; idx: number; raw: string }> = []
79
+ let parked = text
80
+
81
+ // Order matters: fenced first (so a ` inside a fence isn't taken
82
+ // as inline-code start), then HTML code tags, then inline backticks,
83
+ // then URLs.
84
+ parked = parked.replace(/```[\s\S]*?```/g, (m) => {
85
+ const idx = parts.length
86
+ parts.push({ prefix: FENCE_PH, idx, raw: m })
87
+ return `${FENCE_PH}${idx}${NULL}`
88
+ })
89
+ parked = parked.replace(/<pre>[\s\S]*?<\/pre>/gi, (m) => {
90
+ const idx = parts.length
91
+ parts.push({ prefix: HTML_PRE_PH, idx, raw: m })
92
+ return `${HTML_PRE_PH}${idx}${NULL}`
93
+ })
94
+ parked = parked.replace(/<code[^>]*>[\s\S]*?<\/code>/gi, (m) => {
95
+ const idx = parts.length
96
+ parts.push({ prefix: HTML_CODE_PH, idx, raw: m })
97
+ return `${HTML_CODE_PH}${idx}${NULL}`
98
+ })
99
+ parked = parked.replace(/`[^`\n]+`/g, (m) => {
100
+ const idx = parts.length
101
+ parts.push({ prefix: INLINE_PH, idx, raw: m })
102
+ return `${INLINE_PH}${idx}${NULL}`
103
+ })
104
+ parked = parked.replace(URL_RE, (m) => {
105
+ const idx = parts.length
106
+ parts.push({ prefix: URL_PH, idx, raw: m })
107
+ return `${URL_PH}${idx}${NULL}`
108
+ })
109
+
110
+ return { parked, parts }
111
+ }
112
+
113
+ function restore(
114
+ text: string,
115
+ parts: Array<{ prefix: string; idx: number; raw: string }>,
116
+ ): string {
117
+ let restored = text
118
+ // Restore in reverse-insertion order so a placeholder accidentally
119
+ // emitted by a nested replacement gets the right raw region.
120
+ for (let i = parts.length - 1; i >= 0; i--) {
121
+ const p = parts[i]!
122
+ restored = restored.replace(`${p.prefix}${p.idx}${NULL}`, () => p.raw)
123
+ }
124
+ return restored
125
+ }
126
+
127
+ /**
128
+ * Replace em / en dashes with context-appropriate punctuation.
129
+ *
130
+ * Rules, applied in order:
131
+ * 1. ` — ` / ` – ` (flanked by single space) → `, ` if followed by a
132
+ * lowercase or open-paren character; otherwise `. ` if followed by
133
+ * an uppercase or end-of-string. Heuristic: lowercase = mid-clause
134
+ * continuation (comma reads naturally); uppercase = new sentence
135
+ * (period reads naturally).
136
+ * 2. End-of-line dash (` —\n` / ` –\n`) → `.\n` — treat as full stop.
137
+ * 3. Bare dash with no flanking spaces between word chars
138
+ * (e.g. "word—word") → `, ` — the missing-space form is rarer but
139
+ * semantically the same as #1.
140
+ * 4. Surviving dash (uncommon, e.g. at sentence start "— note") → `-`
141
+ * so the message still renders without the AI tell.
142
+ */
143
+ function replaceDashes(text: string): { out: string; replaced: number } {
144
+ let replaced = 0
145
+ let out = text
146
+
147
+ // #1: spaced em-dash mid-prose. Decide between ", " and ". " on
148
+ // the leading character of the following token.
149
+ out = out.replace(/(\S) [—–] (\S)/g, (_m, before: string, after: string) => {
150
+ replaced++
151
+ // If `after` is uppercase ASCII or one of a known sentence-starter
152
+ // set, treat as new sentence; otherwise a parenthetical comma.
153
+ const sentenceStart = /[A-Z]/.test(after)
154
+ return sentenceStart ? `${before}. ${after}` : `${before}, ${after}`
155
+ })
156
+
157
+ // #2: dash at end of line. Treat as full stop.
158
+ out = out.replace(/ [—–](\s*\n)/g, (_m, ws: string) => {
159
+ replaced++
160
+ return `.${ws}`
161
+ })
162
+
163
+ // #3: bare dash between word chars (no flanking spaces). Treat as
164
+ // missing-space form of #1; comma is the safe fallback.
165
+ out = out.replace(/(\w)[—–](\w)/g, (_m, before: string, after: string) => {
166
+ replaced++
167
+ return `${before}, ${after}`
168
+ })
169
+
170
+ // #4: anything still standing — convert to ASCII hyphen so no
171
+ // typographic dash escapes the gate. Rare path; covers leading
172
+ // "— note" / quoted dash / etc.
173
+ out = out.replace(/[—–]/g, () => {
174
+ replaced++
175
+ return '-'
176
+ })
177
+
178
+ return { out, replaced }
179
+ }
180
+
181
+ /**
182
+ * Public entry: scrub em / en dashes from outbound text while
183
+ * preserving dashes inside code and URLs.
184
+ *
185
+ * Pure: no IO, no module-scope state, deterministic. Kill switch is
186
+ * checked per call so an operator can flip it via env var without a
187
+ * restart of an in-process test.
188
+ */
189
+ export function scrubVoice(text: string): VoiceScrubResult {
190
+ if (!enabled() || text.length === 0) {
191
+ return { scrubbed: text, replaced: 0 }
192
+ }
193
+ const { parked, parts } = park(text)
194
+ const { out, replaced } = replaceDashes(parked)
195
+ if (replaced === 0) {
196
+ return { scrubbed: text, replaced: 0 }
197
+ }
198
+ return { scrubbed: restore(out, parts), replaced }
199
+ }