@iinm/plain-agent 1.7.18 → 1.7.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/package.json +1 -1
- package/src/cliArgs.mjs +31 -1
- package/src/cliBatch.mjs +22 -0
- package/src/cliCost.mjs +274 -0
- package/src/cliInteractive.mjs +28 -0
- package/src/env.mjs +9 -0
- package/src/main.mjs +15 -0
- package/src/usageStore.mjs +167 -0
- package/src/voiceInput.mjs +24 -634
- package/src/voiceInputGemini.mjs +257 -0
- package/src/voiceInputOpenAI.mjs +261 -0
- package/src/voiceInputSession.mjs +250 -0
- package/src/voiceToggleKey.mjs +62 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import { spawn, spawnSync } from "node:child_process";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* @typedef {Object} VoiceRecorderConfig
|
|
5
|
+
* @property {string} command
|
|
6
|
+
* @property {string[]} args
|
|
7
|
+
* Must write raw 16-bit little-endian mono PCM to stdout at the sample
|
|
8
|
+
* rate required by the chosen provider (24 kHz for OpenAI, 16 kHz for
|
|
9
|
+
* Gemini).
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* @typedef {Object} VoiceSessionCallbacks
|
|
14
|
+
* @property {(text: string) => void} onTranscript
|
|
15
|
+
* @property {(error: Error) => void} onError
|
|
16
|
+
* @property {() => void} [onClose]
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* @typedef {Object} VoiceSession
|
|
21
|
+
* @property {() => Promise<void>} stop
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* @typedef {Object} RecorderHandle
|
|
26
|
+
* @property {() => void} stop
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
export const VOICE_DEBUG = process.env.PLAIN_VOICE_DEBUG === "1";
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* @param {number} sampleRate
|
|
33
|
+
* @returns {VoiceRecorderConfig[]}
|
|
34
|
+
*/
|
|
35
|
+
export function getRecorderCandidates(sampleRate) {
|
|
36
|
+
const rate = String(sampleRate);
|
|
37
|
+
const isMac = process.platform === "darwin";
|
|
38
|
+
/** @type {VoiceRecorderConfig[]} */
|
|
39
|
+
const candidates = [];
|
|
40
|
+
|
|
41
|
+
if (!isMac) {
|
|
42
|
+
candidates.push({
|
|
43
|
+
command: "arecord",
|
|
44
|
+
args: ["-q", "-f", "S16_LE", "-c", "1", "-r", rate, "-t", "raw"],
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
candidates.push({
|
|
49
|
+
command: "sox",
|
|
50
|
+
args: [
|
|
51
|
+
"-q",
|
|
52
|
+
"-d",
|
|
53
|
+
"-b",
|
|
54
|
+
"16",
|
|
55
|
+
"-c",
|
|
56
|
+
"1",
|
|
57
|
+
"-r",
|
|
58
|
+
rate,
|
|
59
|
+
"-e",
|
|
60
|
+
"signed-integer",
|
|
61
|
+
"-t",
|
|
62
|
+
"raw",
|
|
63
|
+
"-",
|
|
64
|
+
],
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
const ffmpegInput = isMac
|
|
68
|
+
? ["-f", "avfoundation", "-i", ":0"]
|
|
69
|
+
: ["-f", "alsa", "-i", "default"];
|
|
70
|
+
candidates.push({
|
|
71
|
+
command: "ffmpeg",
|
|
72
|
+
args: [
|
|
73
|
+
"-hide_banner",
|
|
74
|
+
"-loglevel",
|
|
75
|
+
"error",
|
|
76
|
+
...ffmpegInput,
|
|
77
|
+
"-ac",
|
|
78
|
+
"1",
|
|
79
|
+
"-ar",
|
|
80
|
+
rate,
|
|
81
|
+
"-f",
|
|
82
|
+
"s16le",
|
|
83
|
+
"-",
|
|
84
|
+
],
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
return candidates;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* @param {VoiceRecorderConfig[]} candidates
|
|
92
|
+
* @returns {VoiceRecorderConfig | null}
|
|
93
|
+
*/
|
|
94
|
+
export function detectRecorder(candidates) {
|
|
95
|
+
return candidates.find((c) => isCommandAvailable(c.command)) ?? null;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* @param {string} command
|
|
100
|
+
*/
|
|
101
|
+
export function isCommandAvailable(command) {
|
|
102
|
+
if (process.platform === "win32") {
|
|
103
|
+
const result = spawnSync("where", [command], { stdio: "ignore" });
|
|
104
|
+
return result.status === 0;
|
|
105
|
+
}
|
|
106
|
+
const result = spawnSync("sh", ["-c", `command -v ${command}`], {
|
|
107
|
+
stdio: "ignore",
|
|
108
|
+
});
|
|
109
|
+
return result.status === 0;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Spawn a recorder subprocess that emits raw PCM on stdout, and wire its
|
|
114
|
+
* lifecycle events to the provided callbacks. This is purely transport
|
|
115
|
+
* plumbing — it knows nothing about any specific STT provider.
|
|
116
|
+
*
|
|
117
|
+
* @param {object} options
|
|
118
|
+
* @param {VoiceRecorderConfig} options.recorder
|
|
119
|
+
* @param {(chunk: Buffer) => void} options.onAudio
|
|
120
|
+
* @param {(error: Error) => void} options.onError
|
|
121
|
+
* @param {() => void} options.onExit - Called after the recorder subprocess exits (for any reason).
|
|
122
|
+
* @returns {RecorderHandle}
|
|
123
|
+
*/
|
|
124
|
+
export function startRecorder({ recorder, onAudio, onError, onExit }) {
|
|
125
|
+
const child = spawn(recorder.command, recorder.args, {
|
|
126
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
/** @type {string[]} */
|
|
130
|
+
const stderrChunks = [];
|
|
131
|
+
child.stderr.on("data", (chunk) => {
|
|
132
|
+
stderrChunks.push(chunk.toString("utf8"));
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
child.on("error", (err) => {
|
|
136
|
+
const suffix =
|
|
137
|
+
/** @type {NodeJS.ErrnoException} */ (err).code === "ENOENT"
|
|
138
|
+
? ` (command "${recorder.command}" not found)`
|
|
139
|
+
: "";
|
|
140
|
+
onError(new Error(`Recorder failed to start${suffix}: ${err.message}`));
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
child.on("exit", (code, signal) => {
|
|
144
|
+
if (code !== 0 && signal === null) {
|
|
145
|
+
const stderrText = stderrChunks.join("").trim();
|
|
146
|
+
onError(
|
|
147
|
+
new Error(
|
|
148
|
+
`Recorder "${recorder.command}" exited with code ${code}${
|
|
149
|
+
stderrText ? `: ${stderrText}` : ""
|
|
150
|
+
}`,
|
|
151
|
+
),
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
onExit();
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
child.stdout.on("data", onAudio);
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
stop() {
|
|
161
|
+
try {
|
|
162
|
+
child.kill("SIGTERM");
|
|
163
|
+
} catch {
|
|
164
|
+
// ignore
|
|
165
|
+
}
|
|
166
|
+
},
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Report an error asynchronously and return an already-terminated session.
|
|
172
|
+
*
|
|
173
|
+
* @param {VoiceSessionCallbacks} callbacks
|
|
174
|
+
* @param {Error} error
|
|
175
|
+
* @returns {VoiceSession}
|
|
176
|
+
*/
|
|
177
|
+
export function failVoiceSessionAsync(callbacks, error) {
|
|
178
|
+
queueMicrotask(() => {
|
|
179
|
+
callbacks.onError(error);
|
|
180
|
+
callbacks.onClose?.();
|
|
181
|
+
});
|
|
182
|
+
return { stop: async () => {} };
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Drop whitespace sitting between two CJK characters. Some providers return
|
|
187
|
+
* Japanese transcripts with morpheme-separating spaces ("そう 、 声 で");
|
|
188
|
+
* mixed strings like "Windows を使う" keep their inter-script spaces.
|
|
189
|
+
*
|
|
190
|
+
* @returns {{ push: (text: string) => string, flush: () => string }}
|
|
191
|
+
*/
|
|
192
|
+
export function createCJKSpaceNormalizer() {
|
|
193
|
+
let prevChar = "";
|
|
194
|
+
let pendingSpaces = "";
|
|
195
|
+
const isSpace = (/** @type {string} */ c) =>
|
|
196
|
+
c === " " || c === "\t" || c === "\u3000";
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
push(text) {
|
|
200
|
+
let out = "";
|
|
201
|
+
for (const ch of text) {
|
|
202
|
+
if (isSpace(ch)) {
|
|
203
|
+
pendingSpaces += ch;
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
if (pendingSpaces.length > 0) {
|
|
207
|
+
if (!(isCJKChar(prevChar) && isCJKChar(ch))) {
|
|
208
|
+
out += pendingSpaces;
|
|
209
|
+
}
|
|
210
|
+
pendingSpaces = "";
|
|
211
|
+
}
|
|
212
|
+
out += ch;
|
|
213
|
+
prevChar = ch;
|
|
214
|
+
}
|
|
215
|
+
return out;
|
|
216
|
+
},
|
|
217
|
+
flush() {
|
|
218
|
+
const out = pendingSpaces;
|
|
219
|
+
pendingSpaces = "";
|
|
220
|
+
prevChar = "";
|
|
221
|
+
return out;
|
|
222
|
+
},
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* @param {string} ch
|
|
228
|
+
* @returns {boolean}
|
|
229
|
+
*/
|
|
230
|
+
function isCJKChar(ch) {
|
|
231
|
+
const code = ch.codePointAt(0);
|
|
232
|
+
if (code === undefined) return false;
|
|
233
|
+
return (
|
|
234
|
+
(code >= 0x3000 && code <= 0x33ff) ||
|
|
235
|
+
(code >= 0x3400 && code <= 0x4dbf) ||
|
|
236
|
+
(code >= 0x4e00 && code <= 0x9fff) ||
|
|
237
|
+
(code >= 0xac00 && code <= 0xd7af) ||
|
|
238
|
+
(code >= 0xf900 && code <= 0xfaff) ||
|
|
239
|
+
(code >= 0xff00 && code <= 0xffef) ||
|
|
240
|
+
(code >= 0x20000 && code <= 0x2ffff)
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* @param {unknown} value
|
|
246
|
+
* @returns {value is Record<string, unknown>}
|
|
247
|
+
*/
|
|
248
|
+
export function isObjectLike(value) {
|
|
249
|
+
return typeof value === "object" && value !== null;
|
|
250
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @typedef {Object} VoiceToggleKey
|
|
3
|
+
* @property {number} byte
|
|
4
|
+
* @property {string} label
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
// Bytes reserved for other terminal/readline uses — cannot be used as a voice toggle.
|
|
8
|
+
// 0x03 = Ctrl-C (SIGINT)
|
|
9
|
+
// 0x04 = Ctrl-D (EOF / readline exit)
|
|
10
|
+
// 0x09 = Ctrl-I (Tab)
|
|
11
|
+
// 0x0a = Ctrl-J (LF / Enter)
|
|
12
|
+
// 0x0d = Ctrl-M (CR / Enter)
|
|
13
|
+
// 0x11 = Ctrl-Q (XON: resume terminal output)
|
|
14
|
+
// 0x13 = Ctrl-S (XOFF: suspend terminal output)
|
|
15
|
+
const RESERVED_TERMINAL_BYTES = new Set([
|
|
16
|
+
0x03, 0x04, 0x09, 0x0a, 0x0d, 0x11, 0x13,
|
|
17
|
+
]);
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Parse a "ctrl-<char>" binding into the raw byte the terminal sends in
|
|
21
|
+
* raw mode. Only Ctrl-<char> is supported because it is the only family
|
|
22
|
+
* the pre-readline pipeline can recognize without a full key decoder.
|
|
23
|
+
*
|
|
24
|
+
* @param {string | undefined} spec
|
|
25
|
+
* @returns {VoiceToggleKey}
|
|
26
|
+
*/
|
|
27
|
+
export function parseVoiceToggleKey(spec) {
|
|
28
|
+
const raw = (spec ?? "ctrl-o").trim().toLowerCase();
|
|
29
|
+
|
|
30
|
+
const match = /^ctrl-(.)$/.exec(raw);
|
|
31
|
+
if (!match) {
|
|
32
|
+
throw new Error(
|
|
33
|
+
`Invalid voiceInput.toggleKey "${spec}". Expected "ctrl-<char>".`,
|
|
34
|
+
);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const ch = match[1];
|
|
38
|
+
const code = ch.charCodeAt(0);
|
|
39
|
+
|
|
40
|
+
// Subtracting a fixed offset from the character's ASCII code yields the
|
|
41
|
+
// control byte (0x01–0x1f) the terminal sends for that Ctrl combination.
|
|
42
|
+
let byte;
|
|
43
|
+
if (code >= 0x61 && code <= 0x7a) {
|
|
44
|
+
// a–z (0x61–0x7a): subtract 0x60 → 0x01 (Ctrl-A) – 0x1a (Ctrl-Z)
|
|
45
|
+
byte = code - 0x60;
|
|
46
|
+
} else if (code >= 0x5b && code <= 0x5f) {
|
|
47
|
+
// [ \ ] ^ _ (0x5b–0x5f): subtract 0x40 → 0x1b (Ctrl-[) – 0x1f (Ctrl-_)
|
|
48
|
+
byte = code - 0x40;
|
|
49
|
+
} else {
|
|
50
|
+
throw new Error(
|
|
51
|
+
`Unsupported voiceInput.toggleKey "${spec}". Use ctrl-<letter> or ctrl-<[ \\ ] ^ _>.`,
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (RESERVED_TERMINAL_BYTES.has(byte)) {
|
|
56
|
+
throw new Error(
|
|
57
|
+
`voiceInput.toggleKey "${spec}" conflicts with a reserved terminal/readline key.`,
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return { byte, label: `Ctrl-${ch.toUpperCase()}` };
|
|
62
|
+
}
|