@iinm/plain-agent 1.11.9 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/iinm/plain-agent)
4
4
  [![npm version](https://img.shields.io/npm/v/@iinm/plain-agent)](https://www.npmjs.com/package/@iinm/plain-agent)
5
5
  [![install size](https://packagephobia.com/badge?p=@iinm/plain-agent)](https://packagephobia.com/result?p=@iinm/plain-agent)
6
- [![Socket Badge](https://badge.socket.dev/npm/package/@iinm/plain-agent/1.11.9)](https://socket.dev/npm/package/@iinm/plain-agent)
6
+ [![Socket Badge](https://badge.socket.dev/npm/package/@iinm/plain-agent/1.12.0)](https://socket.dev/npm/package/@iinm/plain-agent)
7
7
  [![CodeQL](https://github.com/iinm/plain-agent/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/iinm/plain-agent/actions/workflows/github-code-scanning/codeql)
8
8
 
9
9
  A lightweight terminal-based coding agent focused on safety and low token cost
@@ -25,7 +25,6 @@ A lightweight terminal-based coding agent focused on safety and low token cost
25
25
  - [Prompts](#prompts)
26
26
  - [Subagents](#subagents)
27
27
  - [Claude Code Plugin Support](#claude-code-plugin-support)
28
- - [Voice Input](#voice-input)
29
28
  - [Appendix: Creating Least-Privilege Users for Cloud Providers](#appendix-creating-least-privilege-users-for-cloud-providers)
30
29
  - [Developer Notes](#developer-notes)
31
30
 
@@ -848,52 +847,6 @@ Example:
848
847
  plain install-claude-code-plugins
849
848
  ```
850
849
 
851
- ## Voice Input
852
-
853
- Press **Ctrl-O** to start recording, then press it again to stop. Partial transcripts are inserted into the prompt as you speak, so you can edit and send them like regular text.
854
-
855
- ### Requirements
856
-
857
- - A recording command on `PATH`: `arecord`, `sox`, or `ffmpeg`.
858
- - An API key for the chosen provider.
859
- - Your host must have microphone access.
860
-
861
- ### Providers
862
-
863
- **OpenAI Realtime**
864
-
865
- ```js
866
- // ~/.config/plain-agent/config.local.json
867
- {
868
- "voiceInput": {
869
- "provider": "openai",
870
- "apiKey": "<OPENAI_API_KEY>"
871
- // "model": "gpt-4o-transcribe", // or "gpt-4o-mini-transcribe", "whisper-1"
872
- // "language": "ja" // ISO-639-1 code. Improves accuracy and latency.
873
- }
874
- }
875
- ```
876
-
877
- **Gemini Live**
878
-
879
- ```js
880
- // ~/.config/plain-agent/config.local.json
881
- {
882
- "voiceInput": {
883
- "provider": "gemini",
884
- "apiKey": "<GEMINI_API_KEY>"
885
- // "model": "gemini-3.1-flash-live-preview",
886
- // "language": "ja"
887
- }
888
- }
889
- ```
890
-
891
- ### Options
892
-
893
- - `toggleKey` — Rebind the toggle key. Accepts `"ctrl-<char>"` where `<char>`
894
- is a letter (a-z) or one of `[ \ ] ^ _`. Defaults to `"ctrl-o"`.
895
- - `recorder` — Override automatic recorder detection, e.g. `{ "command": "sox", "args": ["-q", "-d", "-b", "16", "-c", "1", "-r", "24000", "-e", "signed-integer", "-t", "raw", "-"] }`. It must write raw 16-bit little-endian mono PCM to stdout at 24 kHz (OpenAI) or 16 kHz (Gemini).
896
-
897
850
  ## Appendix: Creating Least-Privilege Users for Cloud Providers
898
851
 
899
852
  <details>
@@ -5,9 +5,15 @@
5
5
  "patterns": [
6
6
  {
7
7
  "toolName": "exec_command",
8
- "input": { "command": { "$regex": "^(find|grep)$" } },
8
+ "input": { "command": "find" },
9
9
  "action": "deny",
10
- "reason": "Use rg or fd instead"
10
+ "reason": "Use fd instead; fd respects .gitignore by default"
11
+ },
12
+ {
13
+ "toolName": "exec_command",
14
+ "input": { "command": "grep" },
15
+ "action": "deny",
16
+ "reason": "Use rg instead; rg respects .gitignore by default"
11
17
  },
12
18
  {
13
19
  "toolName": "exec_command",
@@ -146,6 +152,16 @@
146
152
  }
147
153
  ],
148
154
  "tests": [
155
+ {
156
+ "desc": "find should be denied",
157
+ "toolUse": { "toolName": "exec_command", "input": { "command": "find" } },
158
+ "expectedAction": "deny"
159
+ },
160
+ {
161
+ "desc": "grep should be denied",
162
+ "toolUse": { "toolName": "exec_command", "input": { "command": "grep" } },
163
+ "expectedAction": "deny"
164
+ },
149
165
  {
150
166
  "desc": "ls should be allowed",
151
167
  "toolUse": { "toolName": "exec_command", "input": { "command": "ls" } },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@iinm/plain-agent",
3
- "version": "1.11.9",
3
+ "version": "1.12.0",
4
4
  "description": "A lightweight terminal-based coding agent focused on safety and low token cost",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -2,8 +2,6 @@
2
2
  * @import { UserEventEmitter, AgentEventEmitter, AgentCommands } from "../agent"
3
3
  * @import { ClaudeCodePlugin } from "../claudeCodePlugin.mjs"
4
4
  * @import { Tool, SandboxModeProvider } from "../tool"
5
- * @import { VoiceInputConfig } from "../voice/input.mjs"
6
- * @import { VoiceSession } from "../voice/session.mjs"
7
5
  */
8
6
 
9
7
  import readline from "node:readline";
@@ -11,8 +9,6 @@ import { styleText } from "node:util";
11
9
  import { appendUsageRecord, buildUsageRecord } from "../usageStore.mjs";
12
10
  import { createSequentialExecutor } from "../utils/createSequentialExecutor.mjs";
13
11
  import { notify } from "../utils/notify.mjs";
14
- import { startVoiceSession } from "../voice/input.mjs";
15
- import { parseVoiceToggleKey } from "../voice/toggleKey.mjs";
16
12
  import { createCommandHandler } from "./commands.mjs";
17
13
  import { createCompleter, SLASH_COMMANDS } from "./completer.mjs";
18
14
  import {
@@ -21,7 +17,6 @@ import {
21
17
  printMessage,
22
18
  } from "./formatter.mjs";
23
19
  import { createInterruptTransform } from "./interruptTransform.mjs";
24
- import { createMuteTransform } from "./muteTransform.mjs";
25
20
  import { createPasteHandler } from "./pasteTransform.mjs";
26
21
  import { createStreamFormatter } from "./streamFormatter.mjs";
27
22
 
@@ -67,7 +62,6 @@ const HELP_MESSAGE = [
67
62
  * @property {boolean} sandbox
68
63
  * @property {() => Promise<void>} onStop
69
64
  * @property {ClaudeCodePlugin[]} [claudeCodePlugins]
70
- * @property {VoiceInputConfig} [voiceInput]
71
65
  * @property {Tool & SandboxModeProvider} [execCommandTool]
72
66
  */
73
67
 
@@ -112,7 +106,6 @@ export function startInteractiveSession({
112
106
  sandbox,
113
107
  onStop,
114
108
  claudeCodePlugins,
115
- voiceInput,
116
109
  execCommandTool,
117
110
  }) {
118
111
  /** @type {{ turn: boolean, multiLineBuffer: string[] | null, subagentName: string, toolSpinnerIndex: number, toolSpinnerLastTime: number }} */
@@ -127,19 +120,9 @@ export function startInteractiveSession({
127
120
  const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
128
121
  const SPINNER_INTERVAL_MS = 80;
129
122
 
130
- /**
131
- * Active voice input session, or null when not recording.
132
- * @type {{ session: VoiceSession, startCursor: number, transcriptLength: number } | null}
133
- */
134
- let voice = null;
135
-
136
123
  // Create the stream buffer instance for this session
137
124
  const streamBuffer = createStreamBuffer();
138
125
 
139
- // Parse the voice toggle key once at startup so misconfiguration fails
140
- // loudly instead of silently falling back.
141
- const voiceToggle = parseVoiceToggleKey(voiceInput?.toggleKey);
142
-
143
126
  const getCliPrompt = (subagentName = "", flashMessage = "") =>
144
127
  [
145
128
  "",
@@ -198,100 +181,7 @@ export function startInteractiveSession({
198
181
  cli.prompt();
199
182
  };
200
183
 
201
- const stopVoiceSession = async () => {
202
- if (!voice) return;
203
- const current = voice;
204
- voice = null;
205
- await current.session.stop();
206
- cli.setPrompt(currentCliPrompt);
207
- // @ts-expect-error - internal property
208
- cli._refreshLine?.();
209
- };
210
-
211
- const handleVoiceToggle = () => {
212
- // Ignore while the agent is working.
213
- if (!state.turn) return;
214
-
215
- if (voice) {
216
- stopVoiceSession();
217
- return;
218
- }
219
-
220
- if (!voiceInput) {
221
- cli.setPrompt(
222
- getCliPrompt(
223
- state.subagentName,
224
- styleText(
225
- "yellow",
226
- `Voice input not configured. Set \`voiceInput\` in your config to enable ${voiceToggle.label}.`,
227
- ),
228
- ),
229
- );
230
- cli.prompt(true);
231
- return;
232
- }
233
-
234
- const startCursor = cli.cursor;
235
- const session = startVoiceSession({
236
- config: voiceInput,
237
- callbacks: {
238
- onTranscript: (delta) => {
239
- if (!voice) return;
240
- const insertAt = voice.startCursor + voice.transcriptLength;
241
- // Insert delta at the recording's insertion point. User input is
242
- // swallowed while recording, so the buffer around `insertAt` is
243
- // stable.
244
- const before = cli.line.slice(0, insertAt);
245
- const after = cli.line.slice(insertAt);
246
- // `line` and `cursor` are declared readonly in the Node typings but
247
- // are writable at runtime — the existing code already patches
248
- // `_refreshLine` in the same way.
249
- const mutableCli = /** @type {{ line: string, cursor: number }} */ (
250
- /** @type {unknown} */ (cli)
251
- );
252
- mutableCli.line = before + delta + after;
253
- mutableCli.cursor = insertAt + delta.length;
254
- voice.transcriptLength += delta.length;
255
- // @ts-expect-error - internal property
256
- cli._refreshLine?.();
257
- },
258
- onError: (err) => {
259
- voice = null;
260
- cli.setPrompt(
261
- getCliPrompt(
262
- state.subagentName,
263
- styleText("red", `Voice input error: ${err.message}`),
264
- ),
265
- );
266
- cli.prompt(true);
267
- },
268
- onClose: () => {
269
- if (!voice) return;
270
- voice = null;
271
- cli.setPrompt(currentCliPrompt);
272
- // @ts-expect-error - internal property
273
- cli._refreshLine?.();
274
- },
275
- },
276
- });
277
- voice = { session, startCursor, transcriptLength: 0 };
278
- cli.setPrompt(
279
- getCliPrompt(
280
- state.subagentName,
281
- styleText(["red", "bold"], `● REC (${voiceToggle.label} to stop)`),
282
- ),
283
- );
284
- // @ts-expect-error - internal property
285
- cli._refreshLine?.();
286
- };
287
-
288
184
  const handleCtrlC = () => {
289
- // Stop voice recording first if active.
290
- if (voice) {
291
- stopVoiceSession();
292
- return;
293
- }
294
-
295
185
  // Agent turn: pause auto-approve; do not clear input.
296
186
  if (!state.turn) {
297
187
  agentCommands.pauseAutoApprove();
@@ -347,20 +237,14 @@ export function startInteractiveSession({
347
237
  };
348
238
 
349
239
  // Pre-readline pipeline:
350
- // stdin -> interrupt (Ctrl-C / Ctrl-D) -> mute (voice recording) -> paste (bracketed paste) -> readline
240
+ // stdin -> interrupt (Ctrl-C / Ctrl-D) -> paste (bracketed paste) -> readline
351
241
  const interrupt = createInterruptTransform({
352
242
  onCtrlC: handleCtrlC,
353
243
  onCtrlD: handleCtrlD,
354
- onVoiceToggle: handleVoiceToggle,
355
- voiceToggleByte: voiceToggle.byte,
356
244
  });
357
- // While a voice session is recording, swallow all stdin bytes other than
358
- // Ctrl-C / Ctrl-D / the voice toggle key so transcript insertion stays
359
- // consistent.
360
- const mute = createMuteTransform({ isMuted: () => voice !== null });
361
245
  const paste = createPasteHandler();
362
246
 
363
- process.stdin.pipe(interrupt).pipe(mute).pipe(paste.transform);
247
+ process.stdin.pipe(interrupt).pipe(paste.transform);
364
248
 
365
249
  // Enable bracketed paste mode
366
250
  if (process.stdout.isTTY) {
@@ -1,31 +1,21 @@
1
1
  import { Transform } from "node:stream";
2
2
 
3
3
  /**
4
- * Create a Transform that intercepts Ctrl-C (0x03), Ctrl-D (0x04), and an
5
- * optional "voice toggle" byte (default Ctrl-O, 0x0f). When one of those
6
- * bytes is seen anywhere in a chunk, the corresponding callback is invoked
7
- * and the entire chunk is dropped so that downstream consumers (e.g.
8
- * readline) never observe it. All other input flows through unchanged.
4
+ * Create a Transform that intercepts Ctrl-C (0x03) and Ctrl-D (0x04).
5
+ * When one of those bytes is seen anywhere in a chunk, the corresponding
6
+ * callback is invoked and the entire chunk is dropped so that downstream
7
+ * consumers (e.g. readline) never observe it. All other input flows
8
+ * through unchanged.
9
9
  *
10
10
  * Priority when multiple handled bytes appear in the same chunk:
11
- * Ctrl-C > Ctrl-D > voice toggle.
11
+ * Ctrl-C > Ctrl-D.
12
12
  *
13
13
  * @param {object} handlers
14
14
  * @param {() => void} handlers.onCtrlC - Called when Ctrl-C is detected
15
15
  * @param {() => void} handlers.onCtrlD - Called when Ctrl-D is detected
16
- * @param {() => void} [handlers.onVoiceToggle]
17
- * Called when the voice toggle byte is detected.
18
- * @param {number} [handlers.voiceToggleByte]
19
- * Byte value for the voice toggle key. Defaults to 0x0f (Ctrl-O).
20
16
  * @returns {Transform}
21
17
  */
22
- export function createInterruptTransform({
23
- onCtrlC,
24
- onCtrlD,
25
- onVoiceToggle,
26
- voiceToggleByte = 0x0f,
27
- }) {
28
- const voiceToggleChar = String.fromCharCode(voiceToggleByte);
18
+ export function createInterruptTransform({ onCtrlC, onCtrlD }) {
29
19
  return new Transform({
30
20
  transform(chunk, _encoding, callback) {
31
21
  const data = chunk.toString("utf8");
@@ -39,11 +29,6 @@ export function createInterruptTransform({
39
29
  callback();
40
30
  return;
41
31
  }
42
- if (onVoiceToggle && data.includes(voiceToggleChar)) {
43
- onVoiceToggle();
44
- callback();
45
- return;
46
- }
47
32
  this.push(chunk);
48
33
  callback();
49
34
  },
package/src/config.d.ts CHANGED
@@ -10,7 +10,6 @@ import {
10
10
  WebSearchToolGeminiOptions,
11
11
  WebSearchToolGeminiVertexAIOptions,
12
12
  } from "./tools/webSearch.mjs";
13
- import { VoiceInputConfig } from "./voice/input.mjs";
14
13
 
15
14
  /**
16
15
  * JSON-serializable webFetch configuration.
@@ -88,7 +87,6 @@ export type AppConfig = {
88
87
  };
89
88
  mcpServers?: Record<string, MCPServerConfig>;
90
89
  notifyCmd?: { command: string; args?: string[] };
91
- voiceInput?: VoiceInputConfig;
92
90
  claudeCodePlugins?: ClaudeCodePluginRepo[];
93
91
  };
94
92
 
package/src/config.mjs CHANGED
@@ -129,9 +129,6 @@ export async function loadAppConfig(options = {}) {
129
129
  ...(merged.claudeCodePlugins ?? []),
130
130
  ...(config.claudeCodePlugins ?? []),
131
131
  ],
132
- voiceInput: config.voiceInput
133
- ? { ...(merged.voiceInput ?? {}), ...config.voiceInput }
134
- : merged.voiceInput,
135
132
  };
136
133
  }
137
134
 
package/src/main.mjs CHANGED
@@ -447,7 +447,6 @@ export async function main(argv = process.argv) {
447
447
  execCommandTool,
448
448
  notifyCmd: appConfig.notifyCmd,
449
449
  claudeCodePlugins: resolvePluginPaths(appConfig.claudeCodePlugins ?? []),
450
- voiceInput: appConfig.voiceInput,
451
450
  });
452
451
  }
453
452
  }
@@ -1,26 +0,0 @@
1
- import { Transform } from "node:stream";
2
-
3
- /**
4
- * Create a Transform that swallows all chunks while `isMuted()` returns true,
5
- * and passes them through unchanged while it returns false.
6
- *
7
- * Intended to sit between `createInterruptTransform` and the paste handler so
8
- * that callers can fully silence regular stdin input during special modes
9
- * (e.g. while a voice input session is recording) without coupling that
10
- * concern to the interrupt-detection logic.
11
- *
12
- * @param {object} options
13
- * @param {() => boolean} options.isMuted
14
- * Called for each incoming chunk; when true the chunk is dropped.
15
- * @returns {Transform}
16
- */
17
- export function createMuteTransform({ isMuted }) {
18
- return new Transform({
19
- transform(chunk, _encoding, callback) {
20
- if (!isMuted()) {
21
- this.push(chunk);
22
- }
23
- callback();
24
- },
25
- });
26
- }
@@ -1,102 +0,0 @@
1
- import { isObjectLike, startWebSocketVoiceSession } from "./session.mjs";
2
-
3
- /**
4
- * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./session.mjs"
5
- */
6
-
7
- /**
8
- * @typedef {Object} VoiceInputGeminiConfig
9
- * @property {"gemini"} provider
10
- * @property {string} apiKey
11
- * @property {string} [model] - Defaults to "gemini-3.1-flash-live-preview".
12
- * @property {string} [language] - ISO-639-1 code (e.g. "ja", "en"). Passed to the model as a system instruction since Gemini Live has no native language hint for input transcription.
13
- * @property {string} [baseURL]
14
- * @property {VoiceRecorderConfig} [recorder]
15
- * @property {string} [toggleKey]
16
- */
17
-
18
- const GEMINI_DEFAULT_MODEL = "gemini-3.1-flash-live-preview";
19
- const GEMINI_DEFAULT_WS =
20
- "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
21
- const GEMINI_SAMPLE_RATE = 16000;
22
- const GEMINI_LABEL = "Gemini Live";
23
-
24
- /**
25
- * Start a voice input session backed by the Gemini Live BidiGenerateContent
26
- * WebSocket. Spawns a recorder, streams PCM as base64 JSON messages, and
27
- * forwards transcript deltas via `onTranscript`.
28
- *
29
- * Gemini Live was designed for voice agents, not pure STT, so the setup
30
- * message forces `maxOutputTokens: 1` and disables thinking on 2.5 models
31
- * to minimise wasted audio output.
32
- *
33
- * @param {object} options
34
- * @param {VoiceInputGeminiConfig} options.config
35
- * @param {VoiceSessionCallbacks} options.callbacks
36
- * @returns {VoiceSession}
37
- */
38
- export function startGeminiVoiceSession({ config, callbacks }) {
39
- /** @type {VoiceProviderHooks<VoiceInputGeminiConfig>} */
40
- const hooks = {
41
- label: GEMINI_LABEL,
42
- sampleRate: GEMINI_SAMPLE_RATE,
43
- buildWsUrl(config) {
44
- const base = config.baseURL ?? GEMINI_DEFAULT_WS;
45
- return `${base}?key=${encodeURIComponent(config.apiKey)}`;
46
- },
47
- buildSetupMessage(config) {
48
- const model = config.model ?? GEMINI_DEFAULT_MODEL;
49
- /** @type {Record<string, unknown>} */
50
- const generationConfig = {
51
- // https://ai.google.dev/gemini-api/docs/live-api/capabilities#response-modalities
52
- // > The native audio models only support `AUDIO` response modality.
53
- responseModalities: ["AUDIO"],
54
- maxOutputTokens: 1,
55
- };
56
- if (model.includes("2.5")) {
57
- generationConfig.thinkingConfig = { thinkingBudget: 0 };
58
- }
59
- /** @type {Record<string, unknown>} */
60
- const setup = {
61
- model: `models/${model}`,
62
- generationConfig,
63
- inputAudioTranscription: {},
64
- };
65
- if (config.language) {
66
- setup.systemInstruction = {
67
- parts: [{ text: `The user is speaking in ${config.language}.` }],
68
- };
69
- }
70
- return { setup };
71
- },
72
- isReadyMessage(message) {
73
- return isObjectLike(message) && "setupComplete" in message;
74
- },
75
- extractTranscript(message) {
76
- if (!isObjectLike(message)) return undefined;
77
- const serverContent = message.serverContent;
78
- if (!isObjectLike(serverContent)) return undefined;
79
- const transcription = serverContent.inputTranscription;
80
- if (
81
- isObjectLike(transcription) &&
82
- typeof transcription.text === "string" &&
83
- transcription.text.length > 0
84
- ) {
85
- return transcription.text;
86
- }
87
- return undefined;
88
- },
89
- buildAudioPayload(chunk, sampleRate) {
90
- return {
91
- realtimeInput: {
92
- audio: {
93
- data: chunk.toString("base64"),
94
- mimeType: `audio/pcm;rate=${sampleRate}`,
95
- },
96
- },
97
- };
98
- },
99
- };
100
-
101
- return startWebSocketVoiceSession({ hooks, config, callbacks });
102
- }
@@ -1,29 +0,0 @@
1
- import { startGeminiVoiceSession } from "./gemini.mjs";
2
- import { startOpenAIVoiceSession } from "./openai.mjs";
3
- import { failVoiceSessionAsync } from "./session.mjs";
4
-
5
- /**
6
- * @typedef {import("./openai.mjs").VoiceInputOpenAIConfig | import("./gemini.mjs").VoiceInputGeminiConfig} VoiceInputConfig
7
- */
8
- /**
9
- * Start a voice input session. Dispatches to the provider-specific
10
- * implementation based on `config.provider`.
11
- *
12
- * @param {object} options
13
- * @param {VoiceInputConfig} options.config
14
- * @param {import("./session.mjs").VoiceSessionCallbacks} options.callbacks
15
- * @returns {import("./session.mjs").VoiceSession}
16
- */
17
- export function startVoiceSession({ config, callbacks }) {
18
- if (config.provider === "openai") {
19
- return startOpenAIVoiceSession({ config, callbacks });
20
- }
21
- if (config.provider === "gemini") {
22
- return startGeminiVoiceSession({ config, callbacks });
23
- }
24
- const provider = /** @type {{ provider: string }} */ (config).provider;
25
- return failVoiceSessionAsync(
26
- callbacks,
27
- new Error(`Unsupported voiceInput.provider: ${provider}`),
28
- );
29
- }
@@ -1,102 +0,0 @@
1
- import { isObjectLike, startWebSocketVoiceSession } from "./session.mjs";
2
-
3
- /**
4
- * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./session.mjs"
5
- */
6
-
7
- /**
8
- * @typedef {Object} VoiceInputOpenAIConfig
9
- * @property {"openai"} provider
10
- * @property {string} apiKey
11
- * @property {string} [model] - Transcription model. Defaults to "gpt-realtime-whisper".
12
- * @property {string} [language] - ISO-639-1 code (e.g. "ja", "en"). Improves accuracy and latency when set.
13
- * @property {string} [baseURL]
14
- * @property {VoiceRecorderConfig} [recorder]
15
- * @property {string} [toggleKey] - "ctrl-<char>". Defaults to "ctrl-o".
16
- */
17
-
18
- const OPENAI_DEFAULT_TRANSCRIPTION_MODEL = "gpt-realtime-whisper";
19
- const OPENAI_DEFAULT_WS = "wss://api.openai.com/v1/realtime";
20
- const OPENAI_SAMPLE_RATE = 24000;
21
- const OPENAI_LABEL = "OpenAI Realtime";
22
-
23
- /**
24
- * Start a voice input session backed by the OpenAI Realtime transcription
25
- * WebSocket. Spawns a recorder, streams PCM as base64 JSON messages, and
26
- * forwards transcript deltas via `onTranscript`.
27
- *
28
- * @param {object} options
29
- * @param {VoiceInputOpenAIConfig} options.config
30
- * @param {VoiceSessionCallbacks} options.callbacks
31
- * @returns {VoiceSession}
32
- */
33
- export function startOpenAIVoiceSession({ config, callbacks }) {
34
- /** @type {VoiceProviderHooks<VoiceInputOpenAIConfig>} */
35
- const hooks = {
36
- label: OPENAI_LABEL,
37
- sampleRate: OPENAI_SAMPLE_RATE,
38
- buildWsUrl(config) {
39
- const base = config.baseURL ?? OPENAI_DEFAULT_WS;
40
- return `${base}?intent=transcription`;
41
- },
42
- buildWsOptions(config) {
43
- return {
44
- headers: {
45
- Authorization: `Bearer ${config.apiKey}`,
46
- },
47
- };
48
- },
49
- buildSetupMessage(config) {
50
- const model = config.model ?? OPENAI_DEFAULT_TRANSCRIPTION_MODEL;
51
- /** @type {{ model: string, language?: string }} */
52
- const transcription = { model };
53
- if (config.language) transcription.language = config.language;
54
- return {
55
- type: "session.update",
56
- session: {
57
- type: "transcription",
58
- audio: {
59
- input: {
60
- format: { type: "audio/pcm", rate: OPENAI_SAMPLE_RATE },
61
- transcription,
62
- },
63
- },
64
- },
65
- };
66
- },
67
- isReadyMessage(message) {
68
- return (
69
- isObjectLike(message) &&
70
- (message.type === "session.created" ||
71
- message.type === "session.updated")
72
- );
73
- },
74
- extractError(message) {
75
- if (!isObjectLike(message) || message.type !== "error") return undefined;
76
- const error = message.error;
77
- if (!isObjectLike(error)) return undefined;
78
- return typeof error.message === "string"
79
- ? error.message
80
- : JSON.stringify(error);
81
- },
82
- extractTranscript(message) {
83
- if (
84
- isObjectLike(message) &&
85
- message.type === "conversation.item.input_audio_transcription.delta" &&
86
- typeof message.delta === "string" &&
87
- message.delta.length > 0
88
- ) {
89
- return message.delta;
90
- }
91
- return undefined;
92
- },
93
- buildAudioPayload(chunk, _sampleRate) {
94
- return {
95
- type: "input_audio_buffer.append",
96
- audio: chunk.toString("base64"),
97
- };
98
- },
99
- };
100
-
101
- return startWebSocketVoiceSession({ hooks, config, callbacks });
102
- }
@@ -1,543 +0,0 @@
1
- import { spawn, spawnSync } from "node:child_process";
2
-
3
- /**
4
- * @typedef {Object} VoiceRecorderConfig
5
- * @property {string} command
6
- * @property {string[]} args
7
- * Must write raw 16-bit little-endian mono PCM to stdout at the sample
8
- * rate required by the chosen provider (24 kHz for OpenAI, 16 kHz for
9
- * Gemini).
10
- */
11
-
12
- /**
13
- * @typedef {Object} VoiceSessionCallbacks
14
- * @property {(text: string) => void} onTranscript
15
- * @property {(error: Error) => void} onError
16
- * @property {() => void} [onClose]
17
- */
18
-
19
- /**
20
- * @typedef {Object} VoiceSession
21
- * @property {() => Promise<void>} stop
22
- */
23
-
24
- /**
25
- * @typedef {Object} RecorderHandle
26
- * @property {() => void} stop
27
- */
28
-
29
- export const VOICE_DEBUG = process.env.PLAIN_VOICE_DEBUG === "1";
30
-
31
- /**
32
- * @param {number} sampleRate
33
- * @returns {VoiceRecorderConfig[]}
34
- */
35
- export function getRecorderCandidates(sampleRate) {
36
- const rate = String(sampleRate);
37
- const isMac = process.platform === "darwin";
38
- /** @type {VoiceRecorderConfig[]} */
39
- const candidates = [];
40
-
41
- if (!isMac) {
42
- candidates.push({
43
- command: "arecord",
44
- args: ["-q", "-f", "S16_LE", "-c", "1", "-r", rate, "-t", "raw"],
45
- });
46
- }
47
-
48
- candidates.push({
49
- command: "sox",
50
- args: [
51
- "-q",
52
- "-d",
53
- "-b",
54
- "16",
55
- "-c",
56
- "1",
57
- "-r",
58
- rate,
59
- "-e",
60
- "signed-integer",
61
- "-t",
62
- "raw",
63
- "-",
64
- ],
65
- });
66
-
67
- const ffmpegInput = isMac
68
- ? ["-f", "avfoundation", "-i", ":0"]
69
- : ["-f", "alsa", "-i", "default"];
70
- candidates.push({
71
- command: "ffmpeg",
72
- args: [
73
- "-hide_banner",
74
- "-loglevel",
75
- "error",
76
- ...ffmpegInput,
77
- "-ac",
78
- "1",
79
- "-ar",
80
- rate,
81
- "-f",
82
- "s16le",
83
- "-",
84
- ],
85
- });
86
-
87
- return candidates;
88
- }
89
-
90
- /**
91
- * @param {VoiceRecorderConfig[]} candidates
92
- * @returns {VoiceRecorderConfig | null}
93
- */
94
- export function detectRecorder(candidates) {
95
- return candidates.find((c) => isCommandAvailable(c.command)) ?? null;
96
- }
97
-
98
- /**
99
- * @param {string} command
100
- */
101
- export function isCommandAvailable(command) {
102
- if (process.platform === "win32") {
103
- const result = spawnSync("where", [command], { stdio: "ignore" });
104
- return result.status === 0;
105
- }
106
- const result = spawnSync("sh", ["-c", `command -v ${command}`], {
107
- stdio: "ignore",
108
- });
109
- return result.status === 0;
110
- }
111
-
112
- /**
113
- * Spawn a recorder subprocess that emits raw PCM on stdout, and wire its
114
- * lifecycle events to the provided callbacks. This is purely transport
115
- * plumbing — it knows nothing about any specific STT provider.
116
- *
117
- * @param {object} options
118
- * @param {VoiceRecorderConfig} options.recorder
119
- * @param {(chunk: Buffer) => void} options.onAudio
120
- * @param {(error: Error) => void} options.onError
121
- * @param {() => void} options.onExit - Called after the recorder subprocess exits (for any reason).
122
- * @returns {RecorderHandle}
123
- */
124
- export function startRecorder({ recorder, onAudio, onError, onExit }) {
125
- const child = spawn(recorder.command, recorder.args, {
126
- stdio: ["ignore", "pipe", "pipe"],
127
- });
128
-
129
- /** @type {string[]} */
130
- const stderrChunks = [];
131
- child.stderr.on("data", (chunk) => {
132
- stderrChunks.push(chunk.toString("utf8"));
133
- });
134
-
135
- child.on("error", (err) => {
136
- const suffix =
137
- /** @type {NodeJS.ErrnoException} */ (err).code === "ENOENT"
138
- ? ` (command "${recorder.command}" not found)`
139
- : "";
140
- onError(new Error(`Recorder failed to start${suffix}: ${err.message}`));
141
- });
142
-
143
- child.on("exit", (code, signal) => {
144
- if (code !== 0 && signal === null) {
145
- const stderrText = stderrChunks.join("").trim();
146
- onError(
147
- new Error(
148
- `Recorder "${recorder.command}" exited with code ${code}${
149
- stderrText ? `: ${stderrText}` : ""
150
- }`,
151
- ),
152
- );
153
- }
154
- onExit();
155
- });
156
-
157
- child.stdout.on("data", onAudio);
158
-
159
- return {
160
- stop() {
161
- try {
162
- child.kill("SIGTERM");
163
- } catch {
164
- // ignore
165
- }
166
- },
167
- };
168
- }
169
-
170
- /**
171
- * Report an error asynchronously and return an already-terminated session.
172
- *
173
- * Calls `onError` followed by `onClose` in a microtask, ensuring the caller
174
- * receives a valid {@link VoiceSession} synchronously while still notifying
175
- * the consumer of the failure.
176
- *
177
- * @param {VoiceSessionCallbacks} callbacks
178
- * @param {Error} error
179
- * @returns {VoiceSession}
180
- */
181
- export function failVoiceSessionAsync(callbacks, error) {
182
- queueMicrotask(() => {
183
- callbacks.onError(error);
184
- callbacks.onClose?.();
185
- });
186
- return { stop: async () => {} };
187
- }
188
-
189
- /**
190
- * Provider-specific hook contract for {@link startWebSocketVoiceSession}.
191
- *
192
- * Each hook is called at a specific point in the session lifecycle:
193
- *
194
- * 1. **Construction** – `buildWsUrl` (and optionally `buildWsOptions`) are
195
- * invoked immediately to create the WebSocket.
196
- * 2. **Open** – `buildSetupMessage` is sent as the first JSON message once the
197
- * WebSocket opens.
198
- * 3. **Ready** – `isReadyMessage` is tested on every incoming message until it
199
- * returns `true`. At that point the session transitions to *ready* and any
200
- * buffered audio chunks are flushed.
201
- * 4. **Streaming** – `buildAudioPayload` is called for every recorder chunk
202
- * while the WebSocket is open and ready.
203
- * 5. **Error extraction** – `extractError` is checked on every message before
204
- * transcript extraction. If it returns a string, the session reports an
205
- * error and drops the message.
206
- * 6. **Transcription** – `extractTranscript` is called on every message after
207
- * the session is ready. Non-empty results are pushed through the CJK
208
- * space normalizer and then forwarded to `onTranscript`.
209
- *
210
- * @template TConfig
211
- * @typedef {Object} VoiceProviderHooks
212
- * @property {string} label - Human-readable provider name (used in logs and
213
- * error messages).
214
- * @property {number} sampleRate - PCM sample rate expected by the provider
215
- * (e.g. 16000 for Gemini, 24000 for OpenAI). Passed to the recorder and
216
- * `buildAudioPayload`.
217
- * @property {(config: TConfig) => string} buildWsUrl - Returns the full
218
- * WebSocket URL, including any query parameters.
219
- * @property {(config: TConfig) => { headers?: Record<string, string> }} [buildWsOptions]
220
- * - Returns optional per-provider WebSocket constructor options. Node's
221
- * global WebSocket (undici) accepts a non-standard `headers` option that
222
- * is not declared in the standard typings.
223
- * @property {(config: TConfig) => object} buildSetupMessage - Returns the
224
- * first JSON message sent immediately after the WebSocket opens.
225
- * @property {(message: unknown) => boolean} isReadyMessage - Returns `true`
226
- * when the given server message signals that the provider is ready to
227
- * receive audio.
228
- * @property {(message: unknown) => string | undefined} extractTranscript -
229
- * Extracts a transcript delta from a server message. Return `undefined`
230
- * when the message carries no transcript.
231
- * @property {(message: unknown) => string | undefined} [extractError] -
232
- * Extracts an error description from a server message. Return `undefined`
233
- * when the message carries no error.
234
- * @property {(chunk: Buffer, sampleRate: number) => object} buildAudioPayload -
235
- * Wraps a raw PCM chunk into the provider-specific JSON payload. The
236
- * `sampleRate` argument is the same value as `hooks.sampleRate`.
237
- */
238
-
239
- /**
240
- * Shared WebSocket voice session implementation used by both Gemini and
241
- * OpenAI drivers.
242
- *
243
- * Responsibilities of this function:
244
- * - Detect and start a suitable system audio recorder.
245
- * - Establish the provider WebSocket connection.
246
- * - Manage the lifecycle (setup → ready → streaming → close).
247
- * - Buffer audio chunks while the connection is not yet ready.
248
- * - Apply CJK space normalization to transcript text.
249
- *
250
- * Responsibilities of the caller (the driver):
251
- * - Provide a {@link VoiceProviderHooks} object that knows the provider's
252
- * wire protocol (URLs, headers, message schemas).
253
- * - Supply `config` and `callbacks` from the user's call site.
254
- *
255
- * @template TConfig
256
- * @param {object} options
257
- * @param {VoiceProviderHooks<TConfig>} options.hooks
258
- * @param {TConfig & { recorder?: VoiceRecorderConfig }} options.config
259
- * @param {VoiceSessionCallbacks} options.callbacks
260
- * @returns {VoiceSession}
261
- */
262
- export function startWebSocketVoiceSession({ hooks, config, callbacks }) {
263
- const recorder =
264
- config.recorder ?? detectRecorder(getRecorderCandidates(hooks.sampleRate));
265
- if (!recorder) {
266
- return failVoiceSessionAsync(
267
- callbacks,
268
- new Error(
269
- "No voice recorder found. Install arecord, sox, or ffmpeg (or set `voiceInput.recorder`).",
270
- ),
271
- );
272
- }
273
-
274
- if (!isCommandAvailable(recorder.command)) {
275
- return failVoiceSessionAsync(
276
- callbacks,
277
- new Error(
278
- `Voice recorder command "${recorder.command}" not found on PATH.`,
279
- ),
280
- );
281
- }
282
-
283
- let stopped = false;
284
- let closeEmitted = false;
285
- let ready = false;
286
- /** @type {Buffer[]} */
287
- const pendingAudio = [];
288
- const normalizer = createCJKSpaceNormalizer();
289
-
290
- function emitClose() {
291
- if (closeEmitted) return;
292
- closeEmitted = true;
293
- callbacks.onClose?.();
294
- }
295
-
296
- const wsUrl = hooks.buildWsUrl(config);
297
- const wsOptions = hooks.buildWsOptions?.(config);
298
-
299
- // Node's global WebSocket (undici) accepts a non-standard `headers`
300
- // option. The built-in typings only declare the standards-compliant
301
- // constructor, so cast through `WebSocket`-as-constructor.
302
- const Ctor = /** @type {new (url: string, opts?: unknown) => WebSocket} */ (
303
- /** @type {unknown} */ (WebSocket)
304
- );
305
- const ws = new Ctor(wsUrl, wsOptions);
306
- ws.binaryType = "arraybuffer";
307
-
308
- const rec = startRecorder({
309
- recorder,
310
- onAudio(chunk) {
311
- if (stopped) return;
312
- if (ready && ws.readyState === WebSocket.OPEN) {
313
- sendAudio(chunk);
314
- } else {
315
- pendingAudio.push(chunk);
316
- }
317
- },
318
- onError(err) {
319
- if (!stopped) callbacks.onError(err);
320
- stop();
321
- },
322
- onExit() {
323
- stop();
324
- },
325
- });
326
-
327
- /**
328
- * @param {Buffer} chunk
329
- */
330
- function sendAudio(chunk) {
331
- const payload = hooks.buildAudioPayload(chunk, hooks.sampleRate);
332
- try {
333
- ws.send(JSON.stringify(payload));
334
- } catch (err) {
335
- if (VOICE_DEBUG) {
336
- process.stderr.write(
337
- `[voiceInput] sendAudio dropped: ${formatError(err)}\n`,
338
- );
339
- }
340
- }
341
- }
342
-
343
- ws.addEventListener("open", () => {
344
- const setup = hooks.buildSetupMessage(config);
345
- try {
346
- ws.send(JSON.stringify(setup));
347
- } catch (err) {
348
- callbacks.onError(
349
- new Error(`Failed to send setup message: ${formatError(err)}`),
350
- );
351
- stop();
352
- }
353
- });
354
-
355
- ws.addEventListener("message", (event) => {
356
- if (stopped) return;
357
- let raw = "";
358
- let message;
359
- try {
360
- raw =
361
- typeof event.data === "string"
362
- ? event.data
363
- : Buffer.from(/** @type {ArrayBuffer} */ (event.data)).toString(
364
- "utf8",
365
- );
366
- message = JSON.parse(raw);
367
- } catch (err) {
368
- callbacks.onError(
369
- new Error(`Failed to parse server message: ${formatError(err)}`),
370
- );
371
- return;
372
- }
373
- if (!isObjectLike(message)) return;
374
- if (VOICE_DEBUG) {
375
- process.stderr.write(`[voiceInput] <- ${raw.slice(0, 800)}\n`);
376
- }
377
-
378
- const errorText = hooks.extractError?.(message);
379
- if (errorText) {
380
- callbacks.onError(new Error(`${hooks.label} error: ${errorText}`));
381
- return;
382
- }
383
-
384
- if (!ready && hooks.isReadyMessage(message)) {
385
- ready = true;
386
- for (const chunk of pendingAudio.splice(0)) {
387
- if (ws.readyState === WebSocket.OPEN) sendAudio(chunk);
388
- }
389
- return;
390
- }
391
-
392
- const transcript = hooks.extractTranscript(message);
393
- if (transcript && transcript.length > 0) {
394
- const normalized = normalizer.push(transcript);
395
- if (normalized.length > 0) {
396
- callbacks.onTranscript(normalized);
397
- }
398
- }
399
- });
400
-
401
- ws.addEventListener("error", (event) => {
402
- if (stopped) return;
403
- const message =
404
- /** @type {{ message?: string }} */ (event).message ?? "WebSocket error";
405
- callbacks.onError(new Error(`${hooks.label} WebSocket error: ${message}`));
406
- stop();
407
- });
408
-
409
- ws.addEventListener("close", (event) => {
410
- if (!stopped && event.code !== 1000 && event.code !== 1005) {
411
- const reason = event.reason ? `: ${event.reason}` : "";
412
- callbacks.onError(
413
- new Error(
414
- `${hooks.label} WebSocket closed (code ${event.code}${reason})`,
415
- ),
416
- );
417
- }
418
- stopped = true;
419
- rec.stop();
420
- emitClose();
421
- });
422
-
423
- if (VOICE_DEBUG) {
424
- process.stderr.write(
425
- `[voiceInput] driver=${hooks.label} recorder=${recorder.command} ${recorder.args.join(" ")}\n`,
426
- );
427
- }
428
-
429
- /**
430
- * Stops the recorder and closes the WebSocket.
431
- *
432
- * **Note on asynchronicity:** This function is `async` only to satisfy the
433
- * {@link VoiceSession} interface. It is called without `await` from event
434
- * listeners (recorder exit, WebSocket error/close). Callers must not rely
435
- * on the returned promise because unhandled rejections would crash the
436
- * process. If the function is ever changed to perform real async work,
437
- * every call site must wrap it with `.catch(() => {})`.
438
- */
439
- async function stop() {
440
- if (stopped) return;
441
- stopped = true;
442
- rec.stop();
443
- pendingAudio.length = 0;
444
- if (
445
- ws.readyState === WebSocket.OPEN ||
446
- ws.readyState === WebSocket.CONNECTING
447
- ) {
448
- try {
449
- ws.close(1000, "client stop");
450
- } catch (err) {
451
- if (VOICE_DEBUG) {
452
- process.stderr.write(
453
- `[voiceInput] ws.close failed: ${formatError(err)}\n`,
454
- );
455
- }
456
- }
457
- }
458
- emitClose();
459
- }
460
-
461
- return { stop };
462
- }
463
-
464
- /**
465
- * Drop whitespace sitting between two CJK characters. Some providers return
466
- * Japanese transcripts with morpheme-separating spaces ("そう 、 声 で");
467
- * mixed strings like "Windows を使う" keep their inter-script spaces.
468
- *
469
- * @returns {{ push: (text: string) => string, flush: () => string }}
470
- */
471
- export function createCJKSpaceNormalizer() {
472
- let prevChar = "";
473
- let pendingSpaces = "";
474
-
475
- /**
476
- * @param {string} c
477
- * @returns {boolean}
478
- */
479
- function isSpace(c) {
480
- return c === " " || c === "\t" || c === "\u3000";
481
- }
482
-
483
- return {
484
- push(text) {
485
- let out = "";
486
- for (const ch of text) {
487
- if (isSpace(ch)) {
488
- pendingSpaces += ch;
489
- continue;
490
- }
491
- if (pendingSpaces.length > 0) {
492
- if (!(isCJKChar(prevChar) && isCJKChar(ch))) {
493
- out += pendingSpaces;
494
- }
495
- pendingSpaces = "";
496
- }
497
- out += ch;
498
- prevChar = ch;
499
- }
500
- return out;
501
- },
502
- flush() {
503
- const out = pendingSpaces;
504
- pendingSpaces = "";
505
- prevChar = "";
506
- return out;
507
- },
508
- };
509
- }
510
-
511
- /**
512
- * @param {string} ch
513
- * @returns {boolean}
514
- */
515
- function isCJKChar(ch) {
516
- const code = ch.codePointAt(0);
517
- if (code === undefined) return false;
518
- return (
519
- (code >= 0x3000 && code <= 0x33ff) ||
520
- (code >= 0x3400 && code <= 0x4dbf) ||
521
- (code >= 0x4e00 && code <= 0x9fff) ||
522
- (code >= 0xac00 && code <= 0xd7af) ||
523
- (code >= 0xf900 && code <= 0xfaff) ||
524
- (code >= 0xff00 && code <= 0xffef) ||
525
- (code >= 0x20000 && code <= 0x2ffff)
526
- );
527
- }
528
-
529
- /**
530
- * @param {unknown} value
531
- * @returns {value is Record<string, unknown>}
532
- */
533
- export function isObjectLike(value) {
534
- return typeof value === "object" && value !== null;
535
- }
536
-
537
- /**
538
- * @param {unknown} err
539
- * @returns {string}
540
- */
541
- function formatError(err) {
542
- return err instanceof Error ? err.message : String(err);
543
- }
@@ -1,62 +0,0 @@
1
- /**
2
- * @typedef {Object} VoiceToggleKey
3
- * @property {number} byte
4
- * @property {string} label
5
- */
6
-
7
- // Bytes reserved for other terminal/readline uses — cannot be used as a voice toggle.
8
- // 0x03 = Ctrl-C (SIGINT)
9
- // 0x04 = Ctrl-D (EOF / readline exit)
10
- // 0x09 = Ctrl-I (Tab)
11
- // 0x0a = Ctrl-J (LF / Enter)
12
- // 0x0d = Ctrl-M (CR / Enter)
13
- // 0x11 = Ctrl-Q (XON: resume terminal output)
14
- // 0x13 = Ctrl-S (XOFF: suspend terminal output)
15
- const RESERVED_TERMINAL_BYTES = new Set([
16
- 0x03, 0x04, 0x09, 0x0a, 0x0d, 0x11, 0x13,
17
- ]);
18
-
19
- /**
20
- * Parse a "ctrl-<char>" binding into the raw byte the terminal sends in
21
- * raw mode. Only Ctrl-<char> is supported because it is the only family
22
- * the pre-readline pipeline can recognize without a full key decoder.
23
- *
24
- * @param {string | undefined} spec
25
- * @returns {VoiceToggleKey}
26
- */
27
- export function parseVoiceToggleKey(spec) {
28
- const raw = (spec ?? "ctrl-o").trim().toLowerCase();
29
-
30
- const match = /^ctrl-(.)$/.exec(raw);
31
- if (!match) {
32
- throw new Error(
33
- `Invalid voiceInput.toggleKey "${spec}". Expected "ctrl-<char>".`,
34
- );
35
- }
36
-
37
- const ch = match[1];
38
- const code = ch.charCodeAt(0);
39
-
40
- // Subtracting a fixed offset from the character's ASCII code yields the
41
- // control byte (0x01–0x1f) the terminal sends for that Ctrl combination.
42
- let byte;
43
- if (code >= 0x61 && code <= 0x7a) {
44
- // a–z (0x61–0x7a): subtract 0x60 → 0x01 (Ctrl-A) – 0x1a (Ctrl-Z)
45
- byte = code - 0x60;
46
- } else if (code >= 0x5b && code <= 0x5f) {
47
- // [ \ ] ^ _ (0x5b–0x5f): subtract 0x40 → 0x1b (Ctrl-[) – 0x1f (Ctrl-_)
48
- byte = code - 0x40;
49
- } else {
50
- throw new Error(
51
- `Unsupported voiceInput.toggleKey "${spec}". Use ctrl-<letter> or ctrl-<[ \\ ] ^ _>.`,
52
- );
53
- }
54
-
55
- if (RESERVED_TERMINAL_BYTES.has(byte)) {
56
- throw new Error(
57
- `voiceInput.toggleKey "${spec}" conflicts with a reserved terminal/readline key.`,
58
- );
59
- }
60
-
61
- return { byte, label: `Ctrl-${ch.toUpperCase()}` };
62
- }