@iinm/plain-agent 1.7.15 → 1.7.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,21 +4,28 @@
4
4
 
5
5
  # Plain Agent
6
6
 
7
- A lightweight CLI-based coding agent.
7
+ A lightweight CLI-based coding agent with zero framework dependencies.
8
8
 
9
- - **Safety controls** — Configure approval rules and sandboxing for safe execution
10
- - **Multi-provider** — Supports Anthropic, OpenAI, Gemini, Bedrock, Azure, Vertex AI, and more
11
- - **Sequential subagent delegation** — Delegate subtasks to specialized subagents with full visibility
12
- - **MCP support** — Connect to external MCP servers to extend available tools
13
- - **Claude Code compatible** — Reuse Claude Code plugins, agents, commands, and skills
9
+ ## Why Plain Agent?
14
10
 
15
- ## Safety Controls
11
+ - **Multi-provider** — Use Claude, GPT, Gemini, or any OpenAI-compatible model.
12
+ Switch providers without changing your workflow.
13
+ - **Fine-grained approval rules** — Auto-approve commands by name, arguments,
14
+ and file paths using regex patterns
15
+ ([`config.predefined.json`](https://github.com/iinm/plain-agent/blob/main/config/config.predefined.json)).
16
+ - **Path validation** — File paths must stay within the working directory
17
+ and git-ignored files (`.env`, etc.) are blocked.
18
+ - **Sandboxed execution** — Run the agent's shell commands inside a Docker
19
+ container with network access restricted to allowlisted destinations
20
+ (e.g., `registry.npmjs.org` only for `npm install`).
21
+ - **Extensible** — Define prompts and subagents in Markdown.
22
+ Connect MCP servers. Reuse Claude Code plugins.
16
23
 
17
- **Auto-Approval**: Tools with no side effects and no sensitive data access are automatically approved based on patterns defined in [`config.predefined.json#autoApproval`](https://github.com/iinm/plain-agent/blob/main/config/config.predefined.json).
24
+ ## Limitations
18
25
 
19
- **Path Validation**: All file paths in tool inputs are validated to remain within the working directory and under git control.
20
-
21
- ⚠️ `write_file` and `patch_file` require explicit path arguments. However, `exec_command` can run arbitrary code where file access cannot be validated. Use a sandbox for stronger isolation.
26
+ - **Sequential subagent execution** Subagents run one at a time rather than
27
+ in parallel. The trade-off is full visibility: every step is streamed to
28
+ your terminal so you can follow exactly what each subagent is doing.
22
29
 
23
30
  ## Requirements
24
31
 
@@ -53,49 +60,28 @@ Create the configuration.
53
60
  {
54
61
  "name": "anthropic",
55
62
  "variant": "default",
56
- "apiKey": "FIXME"
63
+ "apiKey": "<ANTHROPIC_API_KEY>"
57
64
  // Or
58
65
  // "apiKey": { "$env": "ANTHROPIC_API_KEY" }
59
66
  },
60
67
  {
61
68
  "name": "gemini",
62
69
  "variant": "default",
63
- "apiKey": "FIXME"
70
+ "apiKey": "<GEMINI_API_KEY>"
64
71
  },
65
72
  {
66
73
  "name": "openai",
67
74
  "variant": "default",
68
- "apiKey": "FIXME"
69
- },
70
- {
71
- // Requires Azure CLI to get access token
72
- "name": "azure",
73
- "variant": "openai",
74
- "baseURL": "https://<resource>.openai.azure.com/openai",
75
- // Optional
76
- "azureConfigDir": "/home/xxx/.azure-for-agent"
75
+ "apiKey": "<OPENAI_API_KEY>"
77
76
  },
78
- {
79
- "name": "bedrock",
80
- "variant": "default",
81
- "baseURL": "https://bedrock-runtime.<region>.amazonaws.com",
82
- "awsProfile": "FIXME"
83
- },
84
- {
85
- // Requires gcloud CLI to get authentication token
86
- "name": "vertex-ai",
87
- "variant": "default",
88
- "baseURL": "https://aiplatform.googleapis.com/v1beta1/projects/<project>/locations/<location>",
89
- // Optional
90
- "account": "<service_account_email>"
91
- }
92
77
  ],
93
78
 
94
79
  // Optional
95
80
  "tools": {
81
+ // askWeb: Searches the web to answer questions requiring up-to-date information or external sources.
96
82
  "askWeb": {
97
83
  "provider": "gemini",
98
- "apiKey": "FIXME",
84
+ "apiKey": "<GEMINI_API_KEY>",
99
85
  "model": "gemini-3-flash-preview"
100
86
  // Optional
101
87
  // "baseURL": "<proxy_url>"
@@ -108,9 +94,11 @@ Create the configuration.
108
94
  // "account": "<service_account_email>"
109
95
  },
110
96
 
97
+ // askURL: Answers questions based on provided URL content.
98
+ // Directly injecting URL content into context is not supported to prevent prompt injection.
111
99
  "askURL": {
112
100
  "provider": "gemini",
113
- "apiKey": "FIXME"
101
+ "apiKey": "<GEMINI_API_KEY>"
114
102
  "model": "gemini-3-flash-preview"
115
103
  // Optional
116
104
  // "baseURL": "<proxy_url>"
@@ -129,7 +117,40 @@ Create the configuration.
129
117
  ```
130
118
 
131
119
  <details>
132
- <summary><b>Other provider examples</b></summary>
120
+ <summary><b>Azure / Bedrock / Vertex AI provider examples</b></summary>
121
+
122
+ ```js
123
+ {
124
+ "platforms": [
125
+ {
126
+ // Requires Azure CLI to get access token
127
+ "name": "azure",
128
+ "variant": "openai",
129
+ "baseURL": "https://<resource>.openai.azure.com/openai",
130
+ // Optional
131
+ "azureConfigDir": "/home/xxx/.azure-for-agent"
132
+ },
133
+ {
134
+ "name": "bedrock",
135
+ "variant": "default",
136
+ "baseURL": "https://bedrock-runtime.<region>.amazonaws.com",
137
+ "awsProfile": "<AWS_PROFILE>"
138
+ },
139
+ {
140
+ // Requires gcloud CLI to get authentication token
141
+ "name": "vertex-ai",
142
+ "variant": "default",
143
+ "baseURL": "https://aiplatform.googleapis.com/v1beta1/projects/<project>/locations/<location>",
144
+ // Optional
145
+ "account": "<service_account_email>"
146
+ }
147
+ ]
148
+ }
149
+ ```
150
+ </details>
151
+
152
+ <details>
153
+ <summary><b>OpenAI compatible provider examples</b></summary>
133
154
 
134
155
  ```js
135
156
  {
@@ -138,19 +159,19 @@ Create the configuration.
138
159
  "name": "openai-compatible",
139
160
  "variant": "ollama",
140
161
  "baseURL": "https://ollama.com",
141
- "apiKey": "FIXME"
162
+ "apiKey": "<API_KEY>"
142
163
  },
143
164
  {
144
165
  "name": "openai-compatible",
145
166
  "variant": "huggingface",
146
167
  "baseURL": "https://router.huggingface.co",
147
- "apiKey": "FIXME"
168
+ "apiKey": "<HUGGINGFACE_API_KEY>"
148
169
  },
149
170
  {
150
171
  "name": "openai-compatible",
151
172
  "variant": "fireworks",
152
173
  "baseURL": "https://api.fireworks.ai/inference",
153
- "apiKey": "FIXME"
174
+ "apiKey": "<FIREWORKS_API_KEY>"
154
175
  }
155
176
  ]
156
177
  }
@@ -222,7 +243,7 @@ Create the configuration.
222
243
  "name": "bedrock",
223
244
  "variant": "jp",
224
245
  "baseURL": "https://bedrock-runtime.ap-northeast-1.amazonaws.com",
225
- "awsProfile": "FIXME"
246
+ "awsProfile": "<AWS_PROFILE>"
226
247
  }
227
248
  ]
228
249
  }
@@ -442,7 +463,7 @@ The agent loads configuration files in the following order. Settings in later fi
442
463
  // ⚠️ Add this to config.local.json to avoid committing secrets to Git
443
464
  "slack": {
444
465
  "command": "npx",
445
- "args": ["-y", "mcp-remote", "https://mcp.slack.com/mcp", "--header", "Authorization:Bearer FIXME"],
466
+ "args": ["-y", "mcp-remote", "https://mcp.slack.com/mcp", "--header", "Authorization:Bearer <SLACK_TOKEN>"],
446
467
  },
447
468
  "notion": {
448
469
  "command": "npx",
@@ -459,12 +480,18 @@ The agent loads configuration files in the following order. Settings in later fi
459
480
  // ⚠️ Add this to config.local.json to avoid committing secrets to Git
460
481
  "google_developer-knowledge": {
461
482
  "command": "npx",
462
- "args": ["-y", "mcp-remote", "https://developerknowledge.googleapis.com/mcp", "--header", "X-Goog-Api-Key:FIXME"]
483
+ "args": ["-y", "mcp-remote", "https://developerknowledge.googleapis.com/mcp", "--header", "X-Goog-Api-Key:<GOOGLE_API_KEY>"]
463
484
  }
464
485
  },
465
486
 
466
487
  // Override default notification command
467
488
  // "notifyCmd": "/path/to/notification-command"
489
+
490
+ // (Optional) Voice input. See "Voice Input" below.
491
+ // "voiceInput": {
492
+ // "provider": "openai",
493
+ // "apiKey": "<OPENAI_API_KEY>"
494
+ // }
468
495
  }
469
496
  ```
470
497
  </details>
@@ -585,6 +612,53 @@ Example:
585
612
  plain install-claude-code-plugins
586
613
  ```
587
614
 
615
+ ## Voice Input
616
+
617
+ Press **Ctrl-O** to start recording, press it again to stop. Partial
618
+ transcripts are inserted into the prompt as you speak so you can edit
619
+ and send them like regular text.
620
+
621
+ ### Requirements
622
+
623
+ - A recording command on `PATH`: `arecord`, `sox`, or `ffmpeg`.
624
+ - An API key for the chosen provider.
625
+ - Your host must have microphone access. The sandbox does not need to.
626
+
627
+ ### Providers
628
+
629
+ **OpenAI Realtime** (default, recommended):
630
+
631
+ ```js
632
+ {
633
+ "voiceInput": {
634
+ "provider": "openai",
635
+ "apiKey": "<OPENAI_API_KEY>"
636
+ // "model": "gpt-4o-transcribe", // or "gpt-4o-mini-transcribe", "whisper-1"
637
+ // "language": "ja" // ISO-639-1 code. Improves accuracy and latency.
638
+ }
639
+ }
640
+ ```
641
+
642
+ **Gemini Live** (preview API; model names and pricing may change):
643
+
644
+ ```js
645
+ {
646
+ "voiceInput": {
647
+ "provider": "gemini",
648
+ "apiKey": "<GEMINI_API_KEY>"
649
+ // "model": "gemini-3.1-flash-live-preview",
650
+ // "language": "ja"
651
+ }
652
+ }
653
+ ```
654
+
655
+ ### Options
656
+
657
+ - `toggleKey` — Rebind the toggle. Accepts `"ctrl-<char>"` where `<char>`
658
+ is a letter (a-z) or one of `[ \ ] ^ _`. Defaults to `"ctrl-o"`.
659
+ - `recorder` — Override recorder auto-detection. Must write raw 16-bit
660
+ little-endian mono PCM to stdout at 24 kHz (OpenAI) or 16 kHz (Gemini).
661
+
588
662
  ## Development
589
663
 
590
664
  ```sh
@@ -623,9 +697,9 @@ npm publish --access public
623
697
 
624
698
  ```sh
625
699
  # IAM Identity Center
626
- identity_center_instance_arn="FIXME" # e.g., arn:aws:sso:::instance/ssoins-xxxxxxxxxxxxxxxx"
627
- identity_store_id=FIXME
628
- aws_account_id=FIXME
700
+ identity_center_instance_arn="<IDENTITY_CENTER_INSTANCE_ARN>" # e.g., arn:aws:sso:::instance/ssoins-xxxxxxxxxxxxxxxx"
701
+ identity_store_id=<IDENTITY_STORE_ID>
702
+ aws_account_id=<AWS_ACCOUNT_ID>
629
703
 
630
704
  # Create a permission set
631
705
  permission_set_arn=$(aws sso-admin create-permission-set \
@@ -660,10 +734,10 @@ aws sso-admin put-inline-policy-to-permission-set \
660
734
  --inline-policy "$policy"
661
735
 
662
736
  # Create an SSO user
663
- sso_user_name=FIXME
664
- sso_user_email=FIXME
665
- sso_user_family_name=FIXME
666
- sso_user_given_name=FIXME
737
+ sso_user_name=<SSO_USER_NAME>
738
+ sso_user_email=<SSO_USER_EMAIL>
739
+ sso_user_family_name=<SSO_USER_FAMILY_NAME>
740
+ sso_user_given_name=<SSO_USER_GIVEN_NAME>
667
741
 
668
742
  user_id=$(aws identitystore create-user \
669
743
  --identity-store-id "$identity_store_id" \
@@ -704,8 +778,8 @@ aws bedrock-runtime invoke-model \
704
778
  <summary><b>Azure - Microsoft Foundry</b></summary>
705
779
 
706
780
  ```sh
707
- resource_group=FIXME
708
- account_name=FIXME # resource name
781
+ resource_group=<RESOURCE_GROUP>
782
+ account_name=<ACCOUNT_NAME> # resource name
709
783
 
710
784
  # Create a service principal
711
785
  service_principal=$(az ad sp create-for-rbac --name "CodingAgentServicePrincipal" --skip-assignment)
@@ -737,10 +811,10 @@ az login --service-principal -u "$app_id" -p "$app_secret" --tenant "$tenant_id"
737
811
  <summary><b>Google Cloud Vertex AI</b></summary>
738
812
 
739
813
  ```sh
740
- project_id=FIXME
741
- service_account_name=FIXME
814
+ project_id=<PROJECT_ID>
815
+ service_account_name=<SERVICE_ACCOUNT_NAME>
742
816
  service_account_email="${service_account_name}@${project_id}.iam.gserviceaccount.com"
743
- your_account_email=FIXME
817
+ your_account_email=<YOUR_ACCOUNT_EMAIL>
744
818
 
745
819
  # Create a service account
746
820
  gcloud iam service-accounts create "$service_account_name" \
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@iinm/plain-agent",
3
- "version": "1.7.15",
3
+ "version": "1.7.17",
4
4
  "description": "A lightweight CLI-based coding agent",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -11,6 +11,49 @@
11
11
  import { styleText } from "node:util";
12
12
  import { createPatch } from "diff";
13
13
 
14
+ /** Length above which a single-line arg forces block-form rendering. */
15
+ const ARG_BLOCK_LENGTH_THRESHOLD = 60;
16
+
17
+ /**
18
+ * Format an args array for display.
19
+ * Uses compact JSON for short single-line args; switches to a YAML-style
20
+ * block form when any arg contains newlines or exceeds
21
+ * {@link ARG_BLOCK_LENGTH_THRESHOLD} characters so that long scripts passed
22
+ * to `bash -c`, `python -c`, `node -e`, etc. stay readable.
23
+ * @param {unknown} args
24
+ * @returns {string}
25
+ */
26
+ export function formatArgs(args) {
27
+ if (!Array.isArray(args) || args.length === 0) {
28
+ return `args: ${JSON.stringify(args ?? [])}`;
29
+ }
30
+
31
+ const needsBlock = args.some(
32
+ (a) =>
33
+ typeof a === "string" &&
34
+ (a.includes("\n") || a.length > ARG_BLOCK_LENGTH_THRESHOLD),
35
+ );
36
+ if (!needsBlock) {
37
+ return `args: ${JSON.stringify(args)}`;
38
+ }
39
+
40
+ const lines = ["args:"];
41
+ for (const arg of args) {
42
+ if (
43
+ typeof arg === "string" &&
44
+ (arg.includes("\n") || arg.length > ARG_BLOCK_LENGTH_THRESHOLD)
45
+ ) {
46
+ lines.push(" - |");
47
+ for (const line of arg.split("\n")) {
48
+ lines.push(` ${line}`);
49
+ }
50
+ } else {
51
+ lines.push(` - ${JSON.stringify(arg)}`);
52
+ }
53
+ }
54
+ return lines.join("\n");
55
+ }
56
+
14
57
  /**
15
58
  * Format tool use for display.
16
59
  * @param {MessageContentToolUse} toolUse
@@ -25,7 +68,7 @@ export function formatToolUse(toolUse) {
25
68
  return [
26
69
  `tool: ${toolName}`,
27
70
  `command: ${JSON.stringify(execCommandInput.command)}`,
28
- `args: ${JSON.stringify(execCommandInput.args)}`,
71
+ formatArgs(execCommandInput.args),
29
72
  ].join("\n");
30
73
  }
31
74
 
@@ -82,7 +125,7 @@ export function formatToolUse(toolUse) {
82
125
  return [
83
126
  `tool: ${toolName}`,
84
127
  `command: ${tmuxCommandInput.command}`,
85
- `args: ${JSON.stringify(tmuxCommandInput.args)}`,
128
+ formatArgs(tmuxCommandInput.args),
86
129
  ].join("\n");
87
130
  }
88
131
 
@@ -1,6 +1,7 @@
1
1
  /**
2
2
  * @import { UserEventEmitter, AgentEventEmitter, AgentCommands } from "./agent"
3
3
  * @import { ClaudeCodePlugin } from "./claudeCodePlugin.mjs"
4
+ * @import { VoiceInputConfig, VoiceSession } from "./voiceInput.mjs"
4
5
  */
5
6
 
6
7
  import readline from "node:readline";
@@ -13,8 +14,10 @@ import {
13
14
  printMessage,
14
15
  } from "./cliFormatter.mjs";
15
16
  import { createInterruptTransform } from "./cliInterruptTransform.mjs";
17
+ import { createMuteTransform } from "./cliMuteTransform.mjs";
16
18
  import { createPasteHandler } from "./cliPasteTransform.mjs";
17
19
  import { notify } from "./utils/notify.mjs";
20
+ import { parseVoiceToggleKey, startVoiceSession } from "./voiceInput.mjs";
18
21
 
19
22
  const HELP_MESSAGE = [
20
23
  "Commands:",
@@ -57,6 +60,7 @@ const HELP_MESSAGE = [
57
60
  * @property {boolean} sandbox
58
61
  * @property {() => Promise<void>} onStop
59
62
  * @property {ClaudeCodePlugin[]} [claudeCodePlugins]
63
+ * @property {VoiceInputConfig} [voiceInput]
60
64
  */
61
65
 
62
66
  /**
@@ -72,6 +76,7 @@ export function startInteractiveSession({
72
76
  sandbox,
73
77
  onStop,
74
78
  claudeCodePlugins,
79
+ voiceInput,
75
80
  }) {
76
81
  /** @type {{ turn: boolean, multiLineBuffer: string[] | null, subagentName: string }} */
77
82
  const state = {
@@ -80,6 +85,16 @@ export function startInteractiveSession({
80
85
  subagentName: "",
81
86
  };
82
87
 
88
+ /**
89
+ * Active voice input session, or null when not recording.
90
+ * @type {{ session: VoiceSession, startCursor: number, transcriptLength: number } | null}
91
+ */
92
+ let voice = null;
93
+
94
+ // Parse the voice toggle key once at startup so misconfiguration fails
95
+ // loudly instead of silently falling back.
96
+ const voiceToggle = parseVoiceToggleKey(voiceInput?.toggleKey);
97
+
83
98
  const getCliPrompt = (subagentName = "", flashMessage = "") =>
84
99
  [
85
100
  "",
@@ -136,7 +151,100 @@ export function startInteractiveSession({
136
151
  cli.prompt();
137
152
  };
138
153
 
154
+ const stopVoiceSession = async () => {
155
+ if (!voice) return;
156
+ const current = voice;
157
+ voice = null;
158
+ await current.session.stop();
159
+ cli.setPrompt(currentCliPrompt);
160
+ // @ts-expect-error - internal property
161
+ cli._refreshLine?.();
162
+ };
163
+
164
+ const handleVoiceToggle = () => {
165
+ // Ignore while the agent is working.
166
+ if (!state.turn) return;
167
+
168
+ if (voice) {
169
+ stopVoiceSession();
170
+ return;
171
+ }
172
+
173
+ if (!voiceInput) {
174
+ cli.setPrompt(
175
+ getCliPrompt(
176
+ state.subagentName,
177
+ styleText(
178
+ "yellow",
179
+ `Voice input not configured. Set \`voiceInput\` in your config to enable ${voiceToggle.label}.`,
180
+ ),
181
+ ),
182
+ );
183
+ cli.prompt(true);
184
+ return;
185
+ }
186
+
187
+ const startCursor = cli.cursor;
188
+ const session = startVoiceSession({
189
+ config: voiceInput,
190
+ callbacks: {
191
+ onTranscript: (delta) => {
192
+ if (!voice) return;
193
+ const insertAt = voice.startCursor + voice.transcriptLength;
194
+ // Insert delta at the recording's insertion point. User input is
195
+ // swallowed while recording, so the buffer around `insertAt` is
196
+ // stable.
197
+ const before = cli.line.slice(0, insertAt);
198
+ const after = cli.line.slice(insertAt);
199
+ // `line` and `cursor` are declared readonly in the Node typings but
200
+ // are writable at runtime — the existing code already patches
201
+ // `_refreshLine` in the same way.
202
+ const mutableCli = /** @type {{ line: string, cursor: number }} */ (
203
+ /** @type {unknown} */ (cli)
204
+ );
205
+ mutableCli.line = before + delta + after;
206
+ mutableCli.cursor = insertAt + delta.length;
207
+ voice.transcriptLength += delta.length;
208
+ // @ts-expect-error - internal property
209
+ cli._refreshLine?.();
210
+ },
211
+ onError: (err) => {
212
+ voice = null;
213
+ cli.setPrompt(
214
+ getCliPrompt(
215
+ state.subagentName,
216
+ styleText("red", `Voice input error: ${err.message}`),
217
+ ),
218
+ );
219
+ cli.prompt(true);
220
+ },
221
+ onClose: () => {
222
+ if (!voice) return;
223
+ voice = null;
224
+ cli.setPrompt(currentCliPrompt);
225
+ // @ts-expect-error - internal property
226
+ cli._refreshLine?.();
227
+ },
228
+ },
229
+ });
230
+ voice = { session, startCursor, transcriptLength: 0 };
231
+ cli.setPrompt(
232
+ getCliPrompt(
233
+ state.subagentName,
234
+ styleText(["red", "bold"], `● REC (${voiceToggle.label} to stop)`),
235
+ ),
236
+ );
237
+ // @ts-expect-error - internal property
238
+ cli._refreshLine?.();
239
+ };
240
+
139
241
  const handleCtrlC = () => {
242
+ // Stop voice recording first if active.
243
+ if (voice) {
244
+ stopVoiceSession();
245
+ return;
246
+ }
247
+
140
248
  // Agent turn: pause auto-approve; do not clear input.
141
249
  if (!state.turn) {
142
250
  agentCommands.pauseAutoApprove();
@@ -192,14 +300,20 @@ export function startInteractiveSession({
192
300
  };
193
301
 
194
302
  // Pre-readline pipeline:
195
- // stdin -> interrupt (Ctrl-C / Ctrl-D) -> paste (bracketed paste) -> readline
303
+ // stdin -> interrupt (Ctrl-C / Ctrl-D) -> mute (voice recording) -> paste (bracketed paste) -> readline
196
304
  const interrupt = createInterruptTransform({
197
305
  onCtrlC: handleCtrlC,
198
306
  onCtrlD: handleCtrlD,
307
+ onVoiceToggle: handleVoiceToggle,
308
+ voiceToggleByte: voiceToggle.byte,
199
309
  });
310
+ // While a voice session is recording, swallow all stdin bytes other than
311
+ // Ctrl-C / Ctrl-D / the voice toggle key so transcript insertion stays
312
+ // consistent.
313
+ const mute = createMuteTransform({ isMuted: () => voice !== null });
200
314
  const paste = createPasteHandler();
201
315
 
202
- process.stdin.pipe(interrupt).pipe(paste.transform);
316
+ process.stdin.pipe(interrupt).pipe(mute).pipe(paste.transform);
203
317
 
204
318
  // Enable bracketed paste mode
205
319
  if (process.stdout.isTTY) {
@@ -1,19 +1,31 @@
1
1
  import { Transform } from "node:stream";
2
2
 
3
3
  /**
4
- * Create a Transform that intercepts Ctrl-C (0x03) and Ctrl-D (0x04). When
5
- * either byte is seen anywhere in a chunk, the corresponding callback is
6
- * invoked and the entire chunk is dropped so that downstream consumers (e.g.
4
+ * Create a Transform that intercepts Ctrl-C (0x03), Ctrl-D (0x04), and an
5
+ * optional "voice toggle" byte (default Ctrl-O, 0x0f). When one of those
6
+ * bytes is seen anywhere in a chunk, the corresponding callback is invoked
7
+ * and the entire chunk is dropped so that downstream consumers (e.g.
7
8
  * readline) never observe it. All other input flows through unchanged.
8
9
  *
9
- * If both bytes appear in the same chunk, Ctrl-C is handled first.
10
+ * Priority when multiple handled bytes appear in the same chunk:
11
+ * Ctrl-C > Ctrl-D > voice toggle.
10
12
  *
11
13
  * @param {object} handlers
12
14
  * @param {() => void} handlers.onCtrlC - Called when Ctrl-C is detected
13
15
  * @param {() => void} handlers.onCtrlD - Called when Ctrl-D is detected
16
+ * @param {() => void} [handlers.onVoiceToggle]
17
+ * Called when the voice toggle byte is detected.
18
+ * @param {number} [handlers.voiceToggleByte]
19
+ * Byte value for the voice toggle key. Defaults to 0x0f (Ctrl-O).
14
20
  * @returns {Transform}
15
21
  */
16
- export function createInterruptTransform({ onCtrlC, onCtrlD }) {
22
+ export function createInterruptTransform({
23
+ onCtrlC,
24
+ onCtrlD,
25
+ onVoiceToggle,
26
+ voiceToggleByte = 0x0f,
27
+ }) {
28
+ const voiceToggleChar = String.fromCharCode(voiceToggleByte);
17
29
  return new Transform({
18
30
  transform(chunk, _encoding, callback) {
19
31
  const data = chunk.toString("utf8");
@@ -27,6 +39,11 @@ export function createInterruptTransform({ onCtrlC, onCtrlD }) {
27
39
  callback();
28
40
  return;
29
41
  }
42
+ if (onVoiceToggle && data.includes(voiceToggleChar)) {
43
+ onVoiceToggle();
44
+ callback();
45
+ return;
46
+ }
30
47
  this.push(chunk);
31
48
  callback();
32
49
  },
@@ -0,0 +1,26 @@
1
+ import { Transform } from "node:stream";
2
+
3
+ /**
4
+ * Create a Transform that swallows all chunks while `isMuted()` returns true,
5
+ * and passes them through unchanged while it returns false.
6
+ *
7
+ * Intended to sit between `createInterruptTransform` and the paste handler so
8
+ * that callers can fully silence regular stdin input during special modes
9
+ * (e.g. while a voice input session is recording) without coupling that
10
+ * concern to the interrupt-detection logic.
11
+ *
12
+ * @param {object} options
13
+ * @param {() => boolean} options.isMuted
14
+ * Called for each incoming chunk; when true the chunk is dropped.
15
+ * @returns {Transform}
16
+ */
17
+ export function createMuteTransform({ isMuted }) {
18
+ return new Transform({
19
+ transform(chunk, _encoding, callback) {
20
+ if (!isMuted()) {
21
+ this.push(chunk);
22
+ }
23
+ callback();
24
+ },
25
+ });
26
+ }
package/src/config.d.ts CHANGED
@@ -4,6 +4,7 @@ import { AskURLToolOptions } from "./tools/askURL.mjs";
4
4
  import { AskWebToolOptions } from "./tools/askWeb.mjs";
5
5
  import { ExecCommandSanboxConfig } from "./tools/execCommand";
6
6
  import { ClaudeCodePluginRepo } from "./claudeCodePlugin.mjs";
7
+ import { VoiceInputConfig } from "./voiceInput.mjs";
7
8
 
8
9
  export type AppConfig = {
9
10
  model?: string;
@@ -21,6 +22,7 @@ export type AppConfig = {
21
22
  };
22
23
  mcpServers?: Record<string, MCPServerConfig>;
23
24
  notifyCmd?: string;
25
+ voiceInput?: VoiceInputConfig;
24
26
  claudeCodePlugins?: ClaudeCodePluginRepo[];
25
27
  };
26
28
 
package/src/config.mjs CHANGED
@@ -98,6 +98,9 @@ export async function loadAppConfig(options = {}) {
98
98
  ...(merged.claudeCodePlugins ?? []),
99
99
  ...(config.claudeCodePlugins ?? []),
100
100
  ],
101
+ voiceInput: config.voiceInput
102
+ ? { ...(merged.voiceInput ?? {}), ...config.voiceInput }
103
+ : merged.voiceInput,
101
104
  };
102
105
  }
103
106