@iinm/plain-agent 1.7.15 → 1.7.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -57
- package/package.json +1 -1
- package/src/cliFormatter.mjs +45 -2
- package/src/cliInteractive.mjs +116 -2
- package/src/cliInterruptTransform.mjs +22 -5
- package/src/cliMuteTransform.mjs +26 -0
- package/src/config.d.ts +2 -0
- package/src/config.mjs +3 -0
- package/src/main.mjs +3 -1
- package/src/mcp.mjs +5 -2
- package/src/voiceInput.mjs +671 -0
package/README.md
CHANGED
|
@@ -4,21 +4,28 @@
|
|
|
4
4
|
|
|
5
5
|
# Plain Agent
|
|
6
6
|
|
|
7
|
-
A lightweight CLI-based coding agent.
|
|
7
|
+
A lightweight CLI-based coding agent with zero framework dependencies.
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
- **Multi-provider** — Supports Anthropic, OpenAI, Gemini, Bedrock, Azure, Vertex AI, and more
|
|
11
|
-
- **Sequential subagent delegation** — Delegate subtasks to specialized subagents with full visibility
|
|
12
|
-
- **MCP support** — Connect to external MCP servers to extend available tools
|
|
13
|
-
- **Claude Code compatible** — Reuse Claude Code plugins, agents, commands, and skills
|
|
9
|
+
## Why Plain Agent?
|
|
14
10
|
|
|
15
|
-
|
|
11
|
+
- **Multi-provider** — Use Claude, GPT, Gemini, or any OpenAI-compatible model.
|
|
12
|
+
Switch providers without changing your workflow.
|
|
13
|
+
- **Fine-grained approval rules** — Auto-approve commands by name, arguments,
|
|
14
|
+
and file paths using regex patterns
|
|
15
|
+
([`config.predefined.json`](https://github.com/iinm/plain-agent/blob/main/config/config.predefined.json)).
|
|
16
|
+
- **Path validation** — File paths must stay within the working directory
|
|
17
|
+
and git-ignored files (`.env`, etc.) are blocked.
|
|
18
|
+
- **Sandboxed execution** — Run the agent's shell commands inside a Docker
|
|
19
|
+
container with network access restricted to allowlisted destinations
|
|
20
|
+
(e.g., `registry.npmjs.org` only for `npm install`).
|
|
21
|
+
- **Extensible** — Define prompts and subagents in Markdown.
|
|
22
|
+
Connect MCP servers. Reuse Claude Code plugins.
|
|
16
23
|
|
|
17
|
-
|
|
24
|
+
## Limitations
|
|
18
25
|
|
|
19
|
-
**
|
|
20
|
-
|
|
21
|
-
|
|
26
|
+
- **Sequential subagent execution** — Subagents run one at a time rather than
|
|
27
|
+
in parallel. The trade-off is full visibility: every step is streamed to
|
|
28
|
+
your terminal so you can follow exactly what each subagent is doing.
|
|
22
29
|
|
|
23
30
|
## Requirements
|
|
24
31
|
|
|
@@ -53,49 +60,28 @@ Create the configuration.
|
|
|
53
60
|
{
|
|
54
61
|
"name": "anthropic",
|
|
55
62
|
"variant": "default",
|
|
56
|
-
"apiKey": "
|
|
63
|
+
"apiKey": "<ANTHROPIC_API_KEY>"
|
|
57
64
|
// Or
|
|
58
65
|
// "apiKey": { "$env": "ANTHROPIC_API_KEY" }
|
|
59
66
|
},
|
|
60
67
|
{
|
|
61
68
|
"name": "gemini",
|
|
62
69
|
"variant": "default",
|
|
63
|
-
"apiKey": "
|
|
70
|
+
"apiKey": "<GEMINI_API_KEY>"
|
|
64
71
|
},
|
|
65
72
|
{
|
|
66
73
|
"name": "openai",
|
|
67
74
|
"variant": "default",
|
|
68
|
-
"apiKey": "
|
|
69
|
-
},
|
|
70
|
-
{
|
|
71
|
-
// Requires Azure CLI to get access token
|
|
72
|
-
"name": "azure",
|
|
73
|
-
"variant": "openai",
|
|
74
|
-
"baseURL": "https://<resource>.openai.azure.com/openai",
|
|
75
|
-
// Optional
|
|
76
|
-
"azureConfigDir": "/home/xxx/.azure-for-agent"
|
|
75
|
+
"apiKey": "<OPENAI_API_KEY>"
|
|
77
76
|
},
|
|
78
|
-
{
|
|
79
|
-
"name": "bedrock",
|
|
80
|
-
"variant": "default",
|
|
81
|
-
"baseURL": "https://bedrock-runtime.<region>.amazonaws.com",
|
|
82
|
-
"awsProfile": "FIXME"
|
|
83
|
-
},
|
|
84
|
-
{
|
|
85
|
-
// Requires gcloud CLI to get authentication token
|
|
86
|
-
"name": "vertex-ai",
|
|
87
|
-
"variant": "default",
|
|
88
|
-
"baseURL": "https://aiplatform.googleapis.com/v1beta1/projects/<project>/locations/<location>",
|
|
89
|
-
// Optional
|
|
90
|
-
"account": "<service_account_email>"
|
|
91
|
-
}
|
|
92
77
|
],
|
|
93
78
|
|
|
94
79
|
// Optional
|
|
95
80
|
"tools": {
|
|
81
|
+
// askWeb: Searches the web to answer questions requiring up-to-date information or external sources.
|
|
96
82
|
"askWeb": {
|
|
97
83
|
"provider": "gemini",
|
|
98
|
-
"apiKey": "
|
|
84
|
+
"apiKey": "<GEMINI_API_KEY>",
|
|
99
85
|
"model": "gemini-3-flash-preview"
|
|
100
86
|
// Optional
|
|
101
87
|
// "baseURL": "<proxy_url>"
|
|
@@ -108,9 +94,11 @@ Create the configuration.
|
|
|
108
94
|
// "account": "<service_account_email>"
|
|
109
95
|
},
|
|
110
96
|
|
|
97
|
+
// askURL: Answers questions based on provided URL content.
|
|
98
|
+
// Directly injecting URL content into context is not supported to prevent prompt injection.
|
|
111
99
|
"askURL": {
|
|
112
100
|
"provider": "gemini",
|
|
113
|
-
"apiKey": "
|
|
101
|
+
"apiKey": "<GEMINI_API_KEY>"
|
|
114
102
|
"model": "gemini-3-flash-preview"
|
|
115
103
|
// Optional
|
|
116
104
|
// "baseURL": "<proxy_url>"
|
|
@@ -129,7 +117,40 @@ Create the configuration.
|
|
|
129
117
|
```
|
|
130
118
|
|
|
131
119
|
<details>
|
|
132
|
-
<summary><b>
|
|
120
|
+
<summary><b>Azure / Bedrock / Vertex AI provider examples</b></summary>
|
|
121
|
+
|
|
122
|
+
```js
|
|
123
|
+
{
|
|
124
|
+
"platforms": [
|
|
125
|
+
{
|
|
126
|
+
// Requires Azure CLI to get access token
|
|
127
|
+
"name": "azure",
|
|
128
|
+
"variant": "openai",
|
|
129
|
+
"baseURL": "https://<resource>.openai.azure.com/openai",
|
|
130
|
+
// Optional
|
|
131
|
+
"azureConfigDir": "/home/xxx/.azure-for-agent"
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
"name": "bedrock",
|
|
135
|
+
"variant": "default",
|
|
136
|
+
"baseURL": "https://bedrock-runtime.<region>.amazonaws.com",
|
|
137
|
+
"awsProfile": "<AWS_PROFILE>"
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
// Requires gcloud CLI to get authentication token
|
|
141
|
+
"name": "vertex-ai",
|
|
142
|
+
"variant": "default",
|
|
143
|
+
"baseURL": "https://aiplatform.googleapis.com/v1beta1/projects/<project>/locations/<location>",
|
|
144
|
+
// Optional
|
|
145
|
+
"account": "<service_account_email>"
|
|
146
|
+
}
|
|
147
|
+
]
|
|
148
|
+
}
|
|
149
|
+
```
|
|
150
|
+
</details>
|
|
151
|
+
|
|
152
|
+
<details>
|
|
153
|
+
<summary><b>OpenAI compatible provider examples</b></summary>
|
|
133
154
|
|
|
134
155
|
```js
|
|
135
156
|
{
|
|
@@ -138,19 +159,19 @@ Create the configuration.
|
|
|
138
159
|
"name": "openai-compatible",
|
|
139
160
|
"variant": "ollama",
|
|
140
161
|
"baseURL": "https://ollama.com",
|
|
141
|
-
"apiKey": "
|
|
162
|
+
"apiKey": "<API_KEY>"
|
|
142
163
|
},
|
|
143
164
|
{
|
|
144
165
|
"name": "openai-compatible",
|
|
145
166
|
"variant": "huggingface",
|
|
146
167
|
"baseURL": "https://router.huggingface.co",
|
|
147
|
-
"apiKey": "
|
|
168
|
+
"apiKey": "<HUGGINGFACE_API_KEY>"
|
|
148
169
|
},
|
|
149
170
|
{
|
|
150
171
|
"name": "openai-compatible",
|
|
151
172
|
"variant": "fireworks",
|
|
152
173
|
"baseURL": "https://api.fireworks.ai/inference",
|
|
153
|
-
"apiKey": "
|
|
174
|
+
"apiKey": "<FIREWORKS_API_KEY>"
|
|
154
175
|
}
|
|
155
176
|
]
|
|
156
177
|
}
|
|
@@ -222,7 +243,7 @@ Create the configuration.
|
|
|
222
243
|
"name": "bedrock",
|
|
223
244
|
"variant": "jp",
|
|
224
245
|
"baseURL": "https://bedrock-runtime.ap-northeast-1.amazonaws.com",
|
|
225
|
-
"awsProfile": "
|
|
246
|
+
"awsProfile": "<AWS_PROFILE>"
|
|
226
247
|
}
|
|
227
248
|
]
|
|
228
249
|
}
|
|
@@ -442,7 +463,7 @@ The agent loads configuration files in the following order. Settings in later fi
|
|
|
442
463
|
// ⚠️ Add this to config.local.json to avoid committing secrets to Git
|
|
443
464
|
"slack": {
|
|
444
465
|
"command": "npx",
|
|
445
|
-
"args": ["-y", "mcp-remote", "https://mcp.slack.com/mcp", "--header", "Authorization:Bearer
|
|
466
|
+
"args": ["-y", "mcp-remote", "https://mcp.slack.com/mcp", "--header", "Authorization:Bearer <SLACK_TOKEN>"],
|
|
446
467
|
},
|
|
447
468
|
"notion": {
|
|
448
469
|
"command": "npx",
|
|
@@ -459,12 +480,18 @@ The agent loads configuration files in the following order. Settings in later fi
|
|
|
459
480
|
// ⚠️ Add this to config.local.json to avoid committing secrets to Git
|
|
460
481
|
"google_developer-knowledge": {
|
|
461
482
|
"command": "npx",
|
|
462
|
-
"args": ["-y", "mcp-remote", "https://developerknowledge.googleapis.com/mcp", "--header", "X-Goog-Api-Key
|
|
483
|
+
"args": ["-y", "mcp-remote", "https://developerknowledge.googleapis.com/mcp", "--header", "X-Goog-Api-Key:<GOOGLE_API_KEY>"]
|
|
463
484
|
}
|
|
464
485
|
},
|
|
465
486
|
|
|
466
487
|
// Override default notification command
|
|
467
488
|
// "notifyCmd": "/path/to/notification-command"
|
|
489
|
+
|
|
490
|
+
// (Optional) Voice input. See "Voice Input" below.
|
|
491
|
+
// "voiceInput": {
|
|
492
|
+
// "provider": "openai",
|
|
493
|
+
// "apiKey": "<OPENAI_API_KEY>"
|
|
494
|
+
// }
|
|
468
495
|
}
|
|
469
496
|
```
|
|
470
497
|
</details>
|
|
@@ -585,6 +612,53 @@ Example:
|
|
|
585
612
|
plain install-claude-code-plugins
|
|
586
613
|
```
|
|
587
614
|
|
|
615
|
+
## Voice Input
|
|
616
|
+
|
|
617
|
+
Press **Ctrl-O** to start recording, press it again to stop. Partial
|
|
618
|
+
transcripts are inserted into the prompt as you speak so you can edit
|
|
619
|
+
and send them like regular text.
|
|
620
|
+
|
|
621
|
+
### Requirements
|
|
622
|
+
|
|
623
|
+
- A recording command on `PATH`: `arecord`, `sox`, or `ffmpeg`.
|
|
624
|
+
- An API key for the chosen provider.
|
|
625
|
+
- Your host must have microphone access. The sandbox does not need to.
|
|
626
|
+
|
|
627
|
+
### Providers
|
|
628
|
+
|
|
629
|
+
**OpenAI Realtime** (default, recommended):
|
|
630
|
+
|
|
631
|
+
```js
|
|
632
|
+
{
|
|
633
|
+
"voiceInput": {
|
|
634
|
+
"provider": "openai",
|
|
635
|
+
"apiKey": "<OPENAI_API_KEY>"
|
|
636
|
+
// "model": "gpt-4o-transcribe", // or "gpt-4o-mini-transcribe", "whisper-1"
|
|
637
|
+
// "language": "ja" // ISO-639-1 code. Improves accuracy and latency.
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
```
|
|
641
|
+
|
|
642
|
+
**Gemini Live** (preview API; model names and pricing may change):
|
|
643
|
+
|
|
644
|
+
```js
|
|
645
|
+
{
|
|
646
|
+
"voiceInput": {
|
|
647
|
+
"provider": "gemini",
|
|
648
|
+
"apiKey": "<GEMINI_API_KEY>"
|
|
649
|
+
// "model": "gemini-3.1-flash-live-preview",
|
|
650
|
+
// "language": "ja"
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
```
|
|
654
|
+
|
|
655
|
+
### Options
|
|
656
|
+
|
|
657
|
+
- `toggleKey` — Rebind the toggle. Accepts `"ctrl-<char>"` where `<char>`
|
|
658
|
+
is a letter (a-z) or one of `[ \ ] ^ _`. Defaults to `"ctrl-o"`.
|
|
659
|
+
- `recorder` — Override recorder auto-detection. Must write raw 16-bit
|
|
660
|
+
little-endian mono PCM to stdout at 24 kHz (OpenAI) or 16 kHz (Gemini).
|
|
661
|
+
|
|
588
662
|
## Development
|
|
589
663
|
|
|
590
664
|
```sh
|
|
@@ -623,9 +697,9 @@ npm publish --access public
|
|
|
623
697
|
|
|
624
698
|
```sh
|
|
625
699
|
# IAM Identity Center
|
|
626
|
-
identity_center_instance_arn="
|
|
627
|
-
identity_store_id
|
|
628
|
-
aws_account_id
|
|
700
|
+
identity_center_instance_arn="<IDENTITY_CENTER_INSTANCE_ARN>" # e.g., arn:aws:sso:::instance/ssoins-xxxxxxxxxxxxxxxx"
|
|
701
|
+
identity_store_id=<IDENTITY_STORE_ID>
|
|
702
|
+
aws_account_id=<AWS_ACCOUNT_ID>
|
|
629
703
|
|
|
630
704
|
# Create a permission set
|
|
631
705
|
permission_set_arn=$(aws sso-admin create-permission-set \
|
|
@@ -660,10 +734,10 @@ aws sso-admin put-inline-policy-to-permission-set \
|
|
|
660
734
|
--inline-policy "$policy"
|
|
661
735
|
|
|
662
736
|
# Create an SSO user
|
|
663
|
-
sso_user_name
|
|
664
|
-
sso_user_email
|
|
665
|
-
sso_user_family_name
|
|
666
|
-
sso_user_given_name
|
|
737
|
+
sso_user_name=<SSO_USER_NAME>
|
|
738
|
+
sso_user_email=<SSO_USER_EMAIL>
|
|
739
|
+
sso_user_family_name=<SSO_USER_FAMILY_NAME>
|
|
740
|
+
sso_user_given_name=<SSO_USER_GIVEN_NAME>
|
|
667
741
|
|
|
668
742
|
user_id=$(aws identitystore create-user \
|
|
669
743
|
--identity-store-id "$identity_store_id" \
|
|
@@ -704,8 +778,8 @@ aws bedrock-runtime invoke-model \
|
|
|
704
778
|
<summary><b>Azure - Microsoft Foundry</b></summary>
|
|
705
779
|
|
|
706
780
|
```sh
|
|
707
|
-
resource_group
|
|
708
|
-
account_name
|
|
781
|
+
resource_group=<RESOURCE_GROUP>
|
|
782
|
+
account_name=<ACCOUNT_NAME> # resource name
|
|
709
783
|
|
|
710
784
|
# Create a service principal
|
|
711
785
|
service_principal=$(az ad sp create-for-rbac --name "CodingAgentServicePrincipal" --skip-assignment)
|
|
@@ -737,10 +811,10 @@ az login --service-principal -u "$app_id" -p "$app_secret" --tenant "$tenant_id"
|
|
|
737
811
|
<summary><b>Google Cloud Vertex AI</b></summary>
|
|
738
812
|
|
|
739
813
|
```sh
|
|
740
|
-
project_id
|
|
741
|
-
service_account_name
|
|
814
|
+
project_id=<PROJECT_ID>
|
|
815
|
+
service_account_name=<SERVICE_ACCOUNT_NAME>
|
|
742
816
|
service_account_email="${service_account_name}@${project_id}.iam.gserviceaccount.com"
|
|
743
|
-
your_account_email
|
|
817
|
+
your_account_email=<YOUR_ACCOUNT_EMAIL>
|
|
744
818
|
|
|
745
819
|
# Create a service account
|
|
746
820
|
gcloud iam service-accounts create "$service_account_name" \
|
package/package.json
CHANGED
package/src/cliFormatter.mjs
CHANGED
|
@@ -11,6 +11,49 @@
|
|
|
11
11
|
import { styleText } from "node:util";
|
|
12
12
|
import { createPatch } from "diff";
|
|
13
13
|
|
|
14
|
+
/** Length above which a single-line arg forces block-form rendering. */
|
|
15
|
+
const ARG_BLOCK_LENGTH_THRESHOLD = 60;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Format an args array for display.
|
|
19
|
+
* Uses compact JSON for short single-line args; switches to a YAML-style
|
|
20
|
+
* block form when any arg contains newlines or exceeds
|
|
21
|
+
* {@link ARG_BLOCK_LENGTH_THRESHOLD} characters so that long scripts passed
|
|
22
|
+
* to `bash -c`, `python -c`, `node -e`, etc. stay readable.
|
|
23
|
+
* @param {unknown} args
|
|
24
|
+
* @returns {string}
|
|
25
|
+
*/
|
|
26
|
+
export function formatArgs(args) {
|
|
27
|
+
if (!Array.isArray(args) || args.length === 0) {
|
|
28
|
+
return `args: ${JSON.stringify(args ?? [])}`;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const needsBlock = args.some(
|
|
32
|
+
(a) =>
|
|
33
|
+
typeof a === "string" &&
|
|
34
|
+
(a.includes("\n") || a.length > ARG_BLOCK_LENGTH_THRESHOLD),
|
|
35
|
+
);
|
|
36
|
+
if (!needsBlock) {
|
|
37
|
+
return `args: ${JSON.stringify(args)}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const lines = ["args:"];
|
|
41
|
+
for (const arg of args) {
|
|
42
|
+
if (
|
|
43
|
+
typeof arg === "string" &&
|
|
44
|
+
(arg.includes("\n") || arg.length > ARG_BLOCK_LENGTH_THRESHOLD)
|
|
45
|
+
) {
|
|
46
|
+
lines.push(" - |");
|
|
47
|
+
for (const line of arg.split("\n")) {
|
|
48
|
+
lines.push(` ${line}`);
|
|
49
|
+
}
|
|
50
|
+
} else {
|
|
51
|
+
lines.push(` - ${JSON.stringify(arg)}`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return lines.join("\n");
|
|
55
|
+
}
|
|
56
|
+
|
|
14
57
|
/**
|
|
15
58
|
* Format tool use for display.
|
|
16
59
|
* @param {MessageContentToolUse} toolUse
|
|
@@ -25,7 +68,7 @@ export function formatToolUse(toolUse) {
|
|
|
25
68
|
return [
|
|
26
69
|
`tool: ${toolName}`,
|
|
27
70
|
`command: ${JSON.stringify(execCommandInput.command)}`,
|
|
28
|
-
|
|
71
|
+
formatArgs(execCommandInput.args),
|
|
29
72
|
].join("\n");
|
|
30
73
|
}
|
|
31
74
|
|
|
@@ -82,7 +125,7 @@ export function formatToolUse(toolUse) {
|
|
|
82
125
|
return [
|
|
83
126
|
`tool: ${toolName}`,
|
|
84
127
|
`command: ${tmuxCommandInput.command}`,
|
|
85
|
-
|
|
128
|
+
formatArgs(tmuxCommandInput.args),
|
|
86
129
|
].join("\n");
|
|
87
130
|
}
|
|
88
131
|
|
package/src/cliInteractive.mjs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @import { UserEventEmitter, AgentEventEmitter, AgentCommands } from "./agent"
|
|
3
3
|
* @import { ClaudeCodePlugin } from "./claudeCodePlugin.mjs"
|
|
4
|
+
* @import { VoiceInputConfig, VoiceSession } from "./voiceInput.mjs"
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
7
|
import readline from "node:readline";
|
|
@@ -13,8 +14,10 @@ import {
|
|
|
13
14
|
printMessage,
|
|
14
15
|
} from "./cliFormatter.mjs";
|
|
15
16
|
import { createInterruptTransform } from "./cliInterruptTransform.mjs";
|
|
17
|
+
import { createMuteTransform } from "./cliMuteTransform.mjs";
|
|
16
18
|
import { createPasteHandler } from "./cliPasteTransform.mjs";
|
|
17
19
|
import { notify } from "./utils/notify.mjs";
|
|
20
|
+
import { parseVoiceToggleKey, startVoiceSession } from "./voiceInput.mjs";
|
|
18
21
|
|
|
19
22
|
const HELP_MESSAGE = [
|
|
20
23
|
"Commands:",
|
|
@@ -57,6 +60,7 @@ const HELP_MESSAGE = [
|
|
|
57
60
|
* @property {boolean} sandbox
|
|
58
61
|
* @property {() => Promise<void>} onStop
|
|
59
62
|
* @property {ClaudeCodePlugin[]} [claudeCodePlugins]
|
|
63
|
+
* @property {VoiceInputConfig} [voiceInput]
|
|
60
64
|
*/
|
|
61
65
|
|
|
62
66
|
/**
|
|
@@ -72,6 +76,7 @@ export function startInteractiveSession({
|
|
|
72
76
|
sandbox,
|
|
73
77
|
onStop,
|
|
74
78
|
claudeCodePlugins,
|
|
79
|
+
voiceInput,
|
|
75
80
|
}) {
|
|
76
81
|
/** @type {{ turn: boolean, multiLineBuffer: string[] | null, subagentName: string }} */
|
|
77
82
|
const state = {
|
|
@@ -80,6 +85,16 @@ export function startInteractiveSession({
|
|
|
80
85
|
subagentName: "",
|
|
81
86
|
};
|
|
82
87
|
|
|
88
|
+
/**
|
|
89
|
+
* Active voice input session, or null when not recording.
|
|
90
|
+
* @type {{ session: VoiceSession, startCursor: number, transcriptLength: number } | null}
|
|
91
|
+
*/
|
|
92
|
+
let voice = null;
|
|
93
|
+
|
|
94
|
+
// Parse the voice toggle key once at startup so misconfiguration fails
|
|
95
|
+
// loudly instead of silently falling back.
|
|
96
|
+
const voiceToggle = parseVoiceToggleKey(voiceInput?.toggleKey);
|
|
97
|
+
|
|
83
98
|
const getCliPrompt = (subagentName = "", flashMessage = "") =>
|
|
84
99
|
[
|
|
85
100
|
"",
|
|
@@ -136,7 +151,100 @@ export function startInteractiveSession({
|
|
|
136
151
|
cli.prompt();
|
|
137
152
|
};
|
|
138
153
|
|
|
154
|
+
const stopVoiceSession = async () => {
|
|
155
|
+
if (!voice) return;
|
|
156
|
+
const current = voice;
|
|
157
|
+
voice = null;
|
|
158
|
+
await current.session.stop();
|
|
159
|
+
cli.setPrompt(currentCliPrompt);
|
|
160
|
+
// @ts-expect-error - internal property
|
|
161
|
+
cli._refreshLine?.();
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
const handleVoiceToggle = () => {
|
|
165
|
+
// Ignore while the agent is working.
|
|
166
|
+
if (!state.turn) return;
|
|
167
|
+
|
|
168
|
+
if (voice) {
|
|
169
|
+
stopVoiceSession();
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (!voiceInput) {
|
|
174
|
+
cli.setPrompt(
|
|
175
|
+
getCliPrompt(
|
|
176
|
+
state.subagentName,
|
|
177
|
+
styleText(
|
|
178
|
+
"yellow",
|
|
179
|
+
`Voice input not configured. Set \`voiceInput\` in your config to enable ${voiceToggle.label}.`,
|
|
180
|
+
),
|
|
181
|
+
),
|
|
182
|
+
);
|
|
183
|
+
cli.prompt(true);
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const startCursor = cli.cursor;
|
|
188
|
+
const session = startVoiceSession({
|
|
189
|
+
config: voiceInput,
|
|
190
|
+
callbacks: {
|
|
191
|
+
onTranscript: (delta) => {
|
|
192
|
+
if (!voice) return;
|
|
193
|
+
const insertAt = voice.startCursor + voice.transcriptLength;
|
|
194
|
+
// Insert delta at the recording's insertion point. User input is
|
|
195
|
+
// swallowed while recording, so the buffer around `insertAt` is
|
|
196
|
+
// stable.
|
|
197
|
+
const before = cli.line.slice(0, insertAt);
|
|
198
|
+
const after = cli.line.slice(insertAt);
|
|
199
|
+
// `line` and `cursor` are declared readonly in the Node typings but
|
|
200
|
+
// are writable at runtime — the existing code already patches
|
|
201
|
+
// `_refreshLine` in the same way.
|
|
202
|
+
const mutableCli = /** @type {{ line: string, cursor: number }} */ (
|
|
203
|
+
/** @type {unknown} */ (cli)
|
|
204
|
+
);
|
|
205
|
+
mutableCli.line = before + delta + after;
|
|
206
|
+
mutableCli.cursor = insertAt + delta.length;
|
|
207
|
+
voice.transcriptLength += delta.length;
|
|
208
|
+
// @ts-expect-error - internal property
|
|
209
|
+
cli._refreshLine?.();
|
|
210
|
+
},
|
|
211
|
+
onError: (err) => {
|
|
212
|
+
voice = null;
|
|
213
|
+
cli.setPrompt(
|
|
214
|
+
getCliPrompt(
|
|
215
|
+
state.subagentName,
|
|
216
|
+
styleText("red", `Voice input error: ${err.message}`),
|
|
217
|
+
),
|
|
218
|
+
);
|
|
219
|
+
cli.prompt(true);
|
|
220
|
+
},
|
|
221
|
+
onClose: () => {
|
|
222
|
+
if (!voice) return;
|
|
223
|
+
voice = null;
|
|
224
|
+
cli.setPrompt(currentCliPrompt);
|
|
225
|
+
// @ts-expect-error - internal property
|
|
226
|
+
cli._refreshLine?.();
|
|
227
|
+
},
|
|
228
|
+
},
|
|
229
|
+
});
|
|
230
|
+
voice = { session, startCursor, transcriptLength: 0 };
|
|
231
|
+
cli.setPrompt(
|
|
232
|
+
getCliPrompt(
|
|
233
|
+
state.subagentName,
|
|
234
|
+
styleText(["red", "bold"], `● REC (${voiceToggle.label} to stop)`),
|
|
235
|
+
),
|
|
236
|
+
);
|
|
237
|
+
// @ts-expect-error - internal property
|
|
238
|
+
cli._refreshLine?.();
|
|
239
|
+
};
|
|
240
|
+
|
|
139
241
|
const handleCtrlC = () => {
|
|
242
|
+
// Stop voice recording first if active.
|
|
243
|
+
if (voice) {
|
|
244
|
+
stopVoiceSession();
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
|
|
140
248
|
// Agent turn: pause auto-approve; do not clear input.
|
|
141
249
|
if (!state.turn) {
|
|
142
250
|
agentCommands.pauseAutoApprove();
|
|
@@ -192,14 +300,20 @@ export function startInteractiveSession({
|
|
|
192
300
|
};
|
|
193
301
|
|
|
194
302
|
// Pre-readline pipeline:
|
|
195
|
-
// stdin -> interrupt (Ctrl-C / Ctrl-D) -> paste (bracketed paste) -> readline
|
|
303
|
+
// stdin -> interrupt (Ctrl-C / Ctrl-D) -> mute (voice recording) -> paste (bracketed paste) -> readline
|
|
196
304
|
const interrupt = createInterruptTransform({
|
|
197
305
|
onCtrlC: handleCtrlC,
|
|
198
306
|
onCtrlD: handleCtrlD,
|
|
307
|
+
onVoiceToggle: handleVoiceToggle,
|
|
308
|
+
voiceToggleByte: voiceToggle.byte,
|
|
199
309
|
});
|
|
310
|
+
// While a voice session is recording, swallow all stdin bytes other than
|
|
311
|
+
// Ctrl-C / Ctrl-D / the voice toggle key so transcript insertion stays
|
|
312
|
+
// consistent.
|
|
313
|
+
const mute = createMuteTransform({ isMuted: () => voice !== null });
|
|
200
314
|
const paste = createPasteHandler();
|
|
201
315
|
|
|
202
|
-
process.stdin.pipe(interrupt).pipe(paste.transform);
|
|
316
|
+
process.stdin.pipe(interrupt).pipe(mute).pipe(paste.transform);
|
|
203
317
|
|
|
204
318
|
// Enable bracketed paste mode
|
|
205
319
|
if (process.stdout.isTTY) {
|
|
@@ -1,19 +1,31 @@
|
|
|
1
1
|
import { Transform } from "node:stream";
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* Create a Transform that intercepts Ctrl-C (0x03)
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Create a Transform that intercepts Ctrl-C (0x03), Ctrl-D (0x04), and an
|
|
5
|
+
* optional "voice toggle" byte (default Ctrl-O, 0x0f). When one of those
|
|
6
|
+
* bytes is seen anywhere in a chunk, the corresponding callback is invoked
|
|
7
|
+
* and the entire chunk is dropped so that downstream consumers (e.g.
|
|
7
8
|
* readline) never observe it. All other input flows through unchanged.
|
|
8
9
|
*
|
|
9
|
-
*
|
|
10
|
+
* Priority when multiple handled bytes appear in the same chunk:
|
|
11
|
+
* Ctrl-C > Ctrl-D > voice toggle.
|
|
10
12
|
*
|
|
11
13
|
* @param {object} handlers
|
|
12
14
|
* @param {() => void} handlers.onCtrlC - Called when Ctrl-C is detected
|
|
13
15
|
* @param {() => void} handlers.onCtrlD - Called when Ctrl-D is detected
|
|
16
|
+
* @param {() => void} [handlers.onVoiceToggle]
|
|
17
|
+
* Called when the voice toggle byte is detected.
|
|
18
|
+
* @param {number} [handlers.voiceToggleByte]
|
|
19
|
+
* Byte value for the voice toggle key. Defaults to 0x0f (Ctrl-O).
|
|
14
20
|
* @returns {Transform}
|
|
15
21
|
*/
|
|
16
|
-
export function createInterruptTransform({
|
|
22
|
+
export function createInterruptTransform({
|
|
23
|
+
onCtrlC,
|
|
24
|
+
onCtrlD,
|
|
25
|
+
onVoiceToggle,
|
|
26
|
+
voiceToggleByte = 0x0f,
|
|
27
|
+
}) {
|
|
28
|
+
const voiceToggleChar = String.fromCharCode(voiceToggleByte);
|
|
17
29
|
return new Transform({
|
|
18
30
|
transform(chunk, _encoding, callback) {
|
|
19
31
|
const data = chunk.toString("utf8");
|
|
@@ -27,6 +39,11 @@ export function createInterruptTransform({ onCtrlC, onCtrlD }) {
|
|
|
27
39
|
callback();
|
|
28
40
|
return;
|
|
29
41
|
}
|
|
42
|
+
if (onVoiceToggle && data.includes(voiceToggleChar)) {
|
|
43
|
+
onVoiceToggle();
|
|
44
|
+
callback();
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
30
47
|
this.push(chunk);
|
|
31
48
|
callback();
|
|
32
49
|
},
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { Transform } from "node:stream";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Create a Transform that swallows all chunks while `isMuted()` returns true,
|
|
5
|
+
* and passes them through unchanged while it returns false.
|
|
6
|
+
*
|
|
7
|
+
* Intended to sit between `createInterruptTransform` and the paste handler so
|
|
8
|
+
* that callers can fully silence regular stdin input during special modes
|
|
9
|
+
* (e.g. while a voice input session is recording) without coupling that
|
|
10
|
+
* concern to the interrupt-detection logic.
|
|
11
|
+
*
|
|
12
|
+
* @param {object} options
|
|
13
|
+
* @param {() => boolean} options.isMuted
|
|
14
|
+
* Called for each incoming chunk; when true the chunk is dropped.
|
|
15
|
+
* @returns {Transform}
|
|
16
|
+
*/
|
|
17
|
+
export function createMuteTransform({ isMuted }) {
|
|
18
|
+
return new Transform({
|
|
19
|
+
transform(chunk, _encoding, callback) {
|
|
20
|
+
if (!isMuted()) {
|
|
21
|
+
this.push(chunk);
|
|
22
|
+
}
|
|
23
|
+
callback();
|
|
24
|
+
},
|
|
25
|
+
});
|
|
26
|
+
}
|
package/src/config.d.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { AskURLToolOptions } from "./tools/askURL.mjs";
|
|
|
4
4
|
import { AskWebToolOptions } from "./tools/askWeb.mjs";
|
|
5
5
|
import { ExecCommandSanboxConfig } from "./tools/execCommand";
|
|
6
6
|
import { ClaudeCodePluginRepo } from "./claudeCodePlugin.mjs";
|
|
7
|
+
import { VoiceInputConfig } from "./voiceInput.mjs";
|
|
7
8
|
|
|
8
9
|
export type AppConfig = {
|
|
9
10
|
model?: string;
|
|
@@ -21,6 +22,7 @@ export type AppConfig = {
|
|
|
21
22
|
};
|
|
22
23
|
mcpServers?: Record<string, MCPServerConfig>;
|
|
23
24
|
notifyCmd?: string;
|
|
25
|
+
voiceInput?: VoiceInputConfig;
|
|
24
26
|
claudeCodePlugins?: ClaudeCodePluginRepo[];
|
|
25
27
|
};
|
|
26
28
|
|
package/src/config.mjs
CHANGED
|
@@ -98,6 +98,9 @@ export async function loadAppConfig(options = {}) {
|
|
|
98
98
|
...(merged.claudeCodePlugins ?? []),
|
|
99
99
|
...(config.claudeCodePlugins ?? []),
|
|
100
100
|
],
|
|
101
|
+
voiceInput: config.voiceInput
|
|
102
|
+
? { ...(merged.voiceInput ?? {}), ...config.voiceInput }
|
|
103
|
+
: merged.voiceInput,
|
|
101
104
|
};
|
|
102
105
|
}
|
|
103
106
|
|