@iinm/plain-agent 1.8.4 → 1.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/bin/plain +1 -1
  2. package/package.json +8 -9
  3. package/sandbox/bin/plain-sandbox +13 -0
  4. package/src/agent.d.ts +52 -0
  5. package/src/agent.mjs +204 -0
  6. package/src/agentLoop.mjs +419 -0
  7. package/src/agentState.mjs +41 -0
  8. package/src/claudeCodePlugin.mjs +164 -0
  9. package/src/cliArgs.mjs +175 -0
  10. package/src/cliBatch.mjs +147 -0
  11. package/src/cliCommands.mjs +283 -0
  12. package/src/cliCompleter.mjs +227 -0
  13. package/src/cliCost.mjs +309 -0
  14. package/src/cliFormatter.mjs +518 -0
  15. package/src/cliInteractive.mjs +533 -0
  16. package/src/cliInterruptTransform.mjs +51 -0
  17. package/src/cliMuteTransform.mjs +26 -0
  18. package/src/cliPasteTransform.mjs +183 -0
  19. package/src/config.d.ts +36 -0
  20. package/src/config.mjs +197 -0
  21. package/src/context/loadAgentRoles.mjs +267 -0
  22. package/src/context/loadPrompts.mjs +303 -0
  23. package/src/context/loadUserMessageContext.mjs +147 -0
  24. package/src/costTracker.mjs +210 -0
  25. package/src/env.mjs +44 -0
  26. package/src/main.mjs +281 -0
  27. package/src/mcpClient.mjs +351 -0
  28. package/src/mcpIntegration.mjs +160 -0
  29. package/src/model.d.ts +109 -0
  30. package/src/modelCaller.mjs +32 -0
  31. package/src/modelDefinition.d.ts +92 -0
  32. package/src/prompt.mjs +138 -0
  33. package/src/providers/anthropic.d.ts +248 -0
  34. package/src/providers/anthropic.mjs +587 -0
  35. package/src/providers/bedrock.d.ts +249 -0
  36. package/src/providers/bedrock.mjs +700 -0
  37. package/src/providers/gemini.d.ts +208 -0
  38. package/src/providers/gemini.mjs +754 -0
  39. package/src/providers/openai.d.ts +281 -0
  40. package/src/providers/openai.mjs +544 -0
  41. package/src/providers/openaiCompatible.d.ts +147 -0
  42. package/src/providers/openaiCompatible.mjs +652 -0
  43. package/src/providers/platform/awsSigV4.mjs +184 -0
  44. package/src/providers/platform/azure.mjs +42 -0
  45. package/src/providers/platform/bedrock.mjs +78 -0
  46. package/src/providers/platform/googleCloud.mjs +34 -0
  47. package/src/subagent.mjs +265 -0
  48. package/src/tmpfile.mjs +27 -0
  49. package/src/tool.d.ts +74 -0
  50. package/src/toolExecutor.mjs +236 -0
  51. package/src/toolInputValidator.mjs +183 -0
  52. package/src/toolUseApprover.mjs +99 -0
  53. package/src/tools/askURL.mjs +209 -0
  54. package/src/tools/askWeb.mjs +208 -0
  55. package/src/tools/compactContext.d.ts +4 -0
  56. package/src/tools/compactContext.mjs +87 -0
  57. package/src/tools/execCommand.d.ts +22 -0
  58. package/src/tools/execCommand.mjs +200 -0
  59. package/src/tools/patchFile.d.ts +4 -0
  60. package/src/tools/patchFile.mjs +133 -0
  61. package/src/tools/switchToMainAgent.d.ts +3 -0
  62. package/src/tools/switchToMainAgent.mjs +43 -0
  63. package/src/tools/switchToSubagent.d.ts +4 -0
  64. package/src/tools/switchToSubagent.mjs +59 -0
  65. package/src/tools/tmuxCommand.d.ts +14 -0
  66. package/src/tools/tmuxCommand.mjs +194 -0
  67. package/src/tools/writeFile.d.ts +4 -0
  68. package/src/tools/writeFile.mjs +56 -0
  69. package/src/usageStore.mjs +167 -0
  70. package/src/utils/evalJSONConfig.mjs +72 -0
  71. package/src/utils/matchValue.d.ts +6 -0
  72. package/src/utils/matchValue.mjs +40 -0
  73. package/src/utils/noThrow.mjs +31 -0
  74. package/src/utils/notify.mjs +29 -0
  75. package/src/utils/parseFileRange.mjs +18 -0
  76. package/src/utils/parseFrontmatter.mjs +19 -0
  77. package/src/utils/readFileRange.mjs +33 -0
  78. package/src/utils/retryOnError.mjs +41 -0
  79. package/src/voiceInput.mjs +61 -0
  80. package/src/voiceInputGemini.mjs +105 -0
  81. package/src/voiceInputOpenAI.mjs +104 -0
  82. package/src/voiceInputSession.mjs +543 -0
  83. package/src/voiceToggleKey.mjs +62 -0
  84. package/dist/main.mjs +0 -473
  85. package/dist/main.mjs.map +0 -7
@@ -0,0 +1,56 @@
1
+ /**
2
+ * @import { Tool } from '../tool'
3
+ * @import { WriteFileInput } from './writeFile'
4
+ */
5
+
6
+ import fs from "node:fs/promises";
7
+ import path from "node:path";
8
+ import { noThrow } from "../utils/noThrow.mjs";
9
+
10
+ /** @type {Tool} */
11
+ export const writeFileTool = {
12
+ def: {
13
+ name: "write_file",
14
+ description: "Write a file",
15
+ inputSchema: {
16
+ type: "object",
17
+ properties: {
18
+ filePath: {
19
+ type: "string",
20
+ },
21
+ content: {
22
+ type: "string",
23
+ },
24
+ },
25
+ required: ["filePath", "content"],
26
+ },
27
+ },
28
+
29
+ /**
30
+ * @param {WriteFileInput} input
31
+ * @returns {Promise<string | Error>}
32
+ */
33
+ impl: async (input) =>
34
+ await noThrow(async () => {
35
+ const { filePath, content } = input;
36
+
37
+ const absFilePath = path.resolve(filePath);
38
+
39
+ // Ensure the destination directory exists before writing
40
+ const dir = path.dirname(absFilePath);
41
+ await fs.mkdir(dir, { recursive: true });
42
+ await fs.writeFile(absFilePath, content, "utf8");
43
+ return `Wrote to file: ${filePath}`;
44
+ }),
45
+
46
+ /**
47
+ * @param {Record<string, unknown>} input
48
+ * @returns {Record<string, unknown>}
49
+ */
50
+ maskApprovalInput: (input) => {
51
+ const writeFileInput = /** @type {WriteFileInput} */ (input);
52
+ return {
53
+ filePath: writeFileInput.filePath,
54
+ };
55
+ },
56
+ };
@@ -0,0 +1,167 @@
1
+ /**
2
+ * @import { CostSummary } from "./costTracker.mjs"
3
+ */
4
+
5
+ import fs from "node:fs/promises";
6
+ import path from "node:path";
7
+ import { USAGE_LOG_PATH } from "./env.mjs";
8
+
9
+ /**
10
+ * @typedef {Object} UsageRecord
11
+ * @property {string} timestamp - ISO 8601 timestamp
12
+ * @property {string} sessionId
13
+ * @property {"interactive" | "batch"} mode
14
+ * @property {string} modelName - e.g. "claude-sonnet-4-6+thinking-high"
15
+ * @property {string} workingDir
16
+ * @property {string} currency - e.g. "USD"
17
+ * @property {string} unit - e.g. "1M"
18
+ * @property {number | null} totalCost - null when no pricing configured
19
+ * @property {Record<string, number>} tokens - aggregated token counts by path
20
+ */
21
+
22
+ /**
23
+ * Maximum size (in bytes) of a single JSONL line.
24
+ * Linux guarantees atomicity of O_APPEND writes up to PIPE_BUF (4096 bytes),
25
+ * so we enforce a smaller limit to stay safely under that threshold even
26
+ * with multi-byte UTF-8 characters in model/session names.
27
+ */
28
+ const MAX_RECORD_BYTES = 3072;
29
+
30
+ /**
31
+ * Append a usage record to the persistent usage log.
32
+ *
33
+ * On POSIX systems, `fs.appendFile` opens the file with `O_APPEND`, which
34
+ * guarantees that each write lands at end-of-file and is atomic when the
35
+ * payload is <= PIPE_BUF (4096 bytes on Linux). We write the record as a
36
+ * single call to preserve this guarantee even if multiple plain-agent
37
+ * sessions finish simultaneously.
38
+ *
39
+ * @param {UsageRecord} record
40
+ * @param {{ path?: string }} [options]
41
+ * @returns {Promise<void>}
42
+ */
43
+ export async function appendUsageRecord(record, options = {}) {
44
+ const filePath = options.path ?? USAGE_LOG_PATH;
45
+ const line = `${JSON.stringify(record)}\n`;
46
+ const bytes = Buffer.byteLength(line, "utf8");
47
+ if (bytes > MAX_RECORD_BYTES) {
48
+ throw new Error(
49
+ `Usage record exceeds ${MAX_RECORD_BYTES} bytes (${bytes}); refusing to write to keep appends atomic.`,
50
+ );
51
+ }
52
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
53
+ await fs.appendFile(filePath, line, { encoding: "utf8" });
54
+ }
55
+
56
+ /**
57
+ * Read all usage records from the log file.
58
+ * Malformed lines are skipped and collected in `skipped`.
59
+ *
60
+ * @param {{ path?: string }} [options]
61
+ * @returns {Promise<{ records: UsageRecord[], skipped: { line: number, reason: string }[] }>}
62
+ */
63
+ export async function readUsageRecords(options = {}) {
64
+ const filePath = options.path ?? USAGE_LOG_PATH;
65
+ /** @type {string} */
66
+ let content;
67
+ try {
68
+ content = await fs.readFile(filePath, "utf8");
69
+ } catch (err) {
70
+ if (
71
+ err instanceof Error &&
72
+ /** @type {NodeJS.ErrnoException} */ (err).code === "ENOENT"
73
+ ) {
74
+ return { records: [], skipped: [] };
75
+ }
76
+ throw err;
77
+ }
78
+
79
+ /** @type {UsageRecord[]} */
80
+ const records = [];
81
+ /** @type {{ line: number, reason: string }[]} */
82
+ const skipped = [];
83
+
84
+ const lines = content.split("\n");
85
+ for (let i = 0; i < lines.length; i++) {
86
+ const raw = lines[i];
87
+ if (raw.length === 0) continue;
88
+ try {
89
+ const parsed = JSON.parse(raw);
90
+ if (!isUsageRecord(parsed)) {
91
+ skipped.push({ line: i + 1, reason: "invalid shape" });
92
+ continue;
93
+ }
94
+ records.push(parsed);
95
+ } catch (err) {
96
+ const reason = err instanceof Error ? err.message : String(err);
97
+ skipped.push({ line: i + 1, reason });
98
+ }
99
+ }
100
+
101
+ return { records, skipped };
102
+ }
103
+
104
+ /**
105
+ * Build a usage record from a finished session's cost summary.
106
+ * Returns null when there's nothing worth recording (no tokens).
107
+ *
108
+ * @param {Object} args
109
+ * @param {string} args.sessionId
110
+ * @param {"interactive" | "batch"} args.mode
111
+ * @param {string} args.modelName
112
+ * @param {string} args.workingDir
113
+ * @param {CostSummary} args.costSummary
114
+ * @param {Date} [args.now]
115
+ * @returns {UsageRecord | null}
116
+ */
117
+ export function buildUsageRecord({
118
+ sessionId,
119
+ mode,
120
+ modelName,
121
+ workingDir,
122
+ costSummary,
123
+ now,
124
+ }) {
125
+ /** @type {Record<string, number>} */
126
+ const tokens = {};
127
+ for (const [key, entry] of Object.entries(costSummary.breakdown)) {
128
+ tokens[key] = entry.tokens;
129
+ }
130
+ if (Object.keys(tokens).length === 0) {
131
+ return null;
132
+ }
133
+ const timestamp = (now ?? new Date()).toISOString();
134
+ return {
135
+ timestamp,
136
+ sessionId,
137
+ mode,
138
+ modelName,
139
+ workingDir,
140
+ currency: costSummary.currency,
141
+ unit: costSummary.unit,
142
+ totalCost:
143
+ costSummary.totalCost === undefined ? null : costSummary.totalCost,
144
+ tokens,
145
+ };
146
+ }
147
+
148
+ /**
149
+ * @param {unknown} value
150
+ * @returns {value is UsageRecord}
151
+ */
152
+ function isUsageRecord(value) {
153
+ if (typeof value !== "object" || value === null) return false;
154
+ const r = /** @type {Record<string, unknown>} */ (value);
155
+ return (
156
+ typeof r.timestamp === "string" &&
157
+ typeof r.sessionId === "string" &&
158
+ (r.mode === "interactive" || r.mode === "batch") &&
159
+ typeof r.modelName === "string" &&
160
+ typeof r.workingDir === "string" &&
161
+ typeof r.currency === "string" &&
162
+ typeof r.unit === "string" &&
163
+ (r.totalCost === null || typeof r.totalCost === "number") &&
164
+ typeof r.tokens === "object" &&
165
+ r.tokens !== null
166
+ );
167
+ }
@@ -0,0 +1,72 @@
1
+ /**
2
+ * @param {unknown} configItem
3
+ * @returns {unknown}
4
+ */
5
+ export function evalJSONConfig(configItem) {
6
+ if (Array.isArray(configItem)) {
7
+ return configItem.map((item) => evalJSONConfig(item));
8
+ }
9
+
10
+ if (typeof configItem === "object" && configItem !== null) {
11
+ if (
12
+ Object.keys(configItem).length === 1 &&
13
+ "$regex" in configItem &&
14
+ typeof configItem.$regex === "string"
15
+ ) {
16
+ return new RegExp(configItem.$regex);
17
+ }
18
+
19
+ if (
20
+ Object.keys(configItem).length === 1 &&
21
+ "$env" in configItem &&
22
+ typeof configItem.$env === "string"
23
+ ) {
24
+ const value = process.env[configItem.$env];
25
+ if (value === undefined) {
26
+ throw new Error(
27
+ `Environment variable '${configItem.$env}' is not defined`,
28
+ );
29
+ }
30
+ return value;
31
+ }
32
+
33
+ if (
34
+ Object.keys(configItem).length === 1 &&
35
+ "$env" in configItem &&
36
+ typeof configItem.$env !== "string"
37
+ ) {
38
+ throw new Error(
39
+ `The value of '$env' must be a string, got: ${typeof configItem.$env}`,
40
+ );
41
+ }
42
+
43
+ if (Object.keys(configItem).length === 1 && "$has" in configItem) {
44
+ const pattern = evalJSONConfig(configItem.$has);
45
+ /** @param {unknown} value */
46
+ return (value) => {
47
+ if (!Array.isArray(value)) return false;
48
+ return value.some((item) => {
49
+ if (typeof pattern === "string") {
50
+ return item === pattern;
51
+ }
52
+ if (pattern instanceof RegExp) {
53
+ return typeof item === "string" && pattern.test(item);
54
+ }
55
+ if (typeof pattern === "function") {
56
+ return pattern(item);
57
+ }
58
+ return false;
59
+ });
60
+ };
61
+ }
62
+
63
+ /** @type {Record<string,unknown>} */
64
+ const clone = {};
65
+ for (const [k, v] of Object.entries(configItem)) {
66
+ clone[k] = evalJSONConfig(v);
67
+ }
68
+ return clone;
69
+ }
70
+
71
+ return configItem;
72
+ }
@@ -0,0 +1,6 @@
1
+ export type ValuePattern =
2
+ | string
3
+ | RegExp
4
+ | ((value: unknown) => boolean)
5
+ | ValuePattern[]
6
+ | { [key: string]: ValuePattern };
@@ -0,0 +1,40 @@
1
+ /**
2
+ * @import { ValuePattern } from "./matchValue"
3
+ */
4
+
5
+ /**
6
+ * @param {unknown} value
7
+ * @param {ValuePattern} pattern
8
+ * @returns {boolean}
9
+ */
10
+ export function matchValue(value, pattern) {
11
+ if (typeof pattern === "string") {
12
+ return typeof value === "string" && value === pattern;
13
+ }
14
+
15
+ if (pattern instanceof RegExp) {
16
+ return typeof value === "string" && pattern.test(value);
17
+ }
18
+
19
+ if (typeof pattern === "function") {
20
+ return pattern(value);
21
+ }
22
+
23
+ if (Array.isArray(pattern)) {
24
+ return (
25
+ Array.isArray(value) && pattern.every((p, i) => matchValue(value[i], p))
26
+ );
27
+ }
28
+
29
+ if (typeof pattern === "object") {
30
+ return (
31
+ typeof value === "object" &&
32
+ value !== null &&
33
+ Object.entries(pattern).every(([k, p]) =>
34
+ matchValue(value[/** @type {keyof value} */ (k)], p),
35
+ )
36
+ );
37
+ }
38
+
39
+ return false;
40
+ }
@@ -0,0 +1,31 @@
1
+ /**
2
+ * @template T
3
+ * @param {() => Promise<T>} task
4
+ * @returns {Promise<T | Error>}
5
+ */
6
+ export async function noThrow(task) {
7
+ try {
8
+ return await task();
9
+ } catch (error) {
10
+ if (error instanceof Error) {
11
+ return error;
12
+ }
13
+ return new Error(`Non-Error thrown: ${error}`);
14
+ }
15
+ }
16
+
17
+ /**
18
+ * @template T
19
+ * @param {() => T} task
20
+ * @returns {T | Error}
21
+ */
22
+ export function noThrowSync(task) {
23
+ try {
24
+ return task();
25
+ } catch (error) {
26
+ if (error instanceof Error) {
27
+ return error;
28
+ }
29
+ return new Error(`Non-Error thrown: ${error}`);
30
+ }
31
+ }
@@ -0,0 +1,29 @@
1
+ import { execFileSync } from "node:child_process";
2
+ import { noThrowSync } from "./noThrow.mjs";
3
+
4
+ /**
5
+ * @param {{ command: string; args?: string[] } | undefined} notifyCmd
6
+ * @returns {void | Error}
7
+ */
8
+ export function notify(notifyCmd) {
9
+ if (!notifyCmd) {
10
+ process.stdout.write("\x07");
11
+ return;
12
+ }
13
+
14
+ return noThrowSync(() => {
15
+ execFileSync(notifyCmd.command, notifyCmd.args ?? [], {
16
+ shell: false,
17
+ stdio: ["ignore", "inherit", "pipe"],
18
+ env: {
19
+ PWD: process.env.PWD,
20
+ PATH: process.env.PATH,
21
+ HOME: process.env.HOME,
22
+ // for Linux
23
+ DISPLAY: process.env.DISPLAY,
24
+ DBUS_SESSION_BUS_ADDRESS: process.env.DBUS_SESSION_BUS_ADDRESS,
25
+ },
26
+ timeout: 10 * 1000,
27
+ });
28
+ });
29
+ }
@@ -0,0 +1,18 @@
1
+ /**
2
+ * @param {string} fileRange
3
+ * @returns {{filePath: string, startLine?: number, endLine?: number} | Error}
4
+ */
5
+ export function parseFileRange(fileRange) {
6
+ const match = fileRange.match(/^([^:]+)(?::(\d+)(?:-(\d+))?)?$/);
7
+ if (!match) {
8
+ return new Error(
9
+ "Invalid format. Use: path/to/file[:line] or path/to/file[:start-end]",
10
+ );
11
+ }
12
+ const [, filePath, startLine, endLine] = match;
13
+ return {
14
+ filePath,
15
+ startLine: startLine ? Number.parseInt(startLine, 10) : undefined,
16
+ endLine: endLine ? Number.parseInt(endLine, 10) : undefined,
17
+ };
18
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Parse simple key-value frontmatter using regex.
3
+ * Only supports `key: value` format. No multiline strings.
4
+ * @param {string} frontmatter - The YAML frontmatter content (without --- delimiters)
5
+ * @returns {Record<string, string>} Parsed key-value pairs
6
+ */
7
+ export function parseFrontmatter(frontmatter) {
8
+ /** @type {Record<string, string>} */
9
+ const result = {};
10
+
11
+ for (const line of frontmatter.split(/\r?\n/)) {
12
+ const match = line.match(/^(\w[\w-]*):\s?(.*)$/);
13
+ if (match) {
14
+ result[match[1]] = match[2].trimEnd();
15
+ }
16
+ }
17
+
18
+ return result;
19
+ }
@@ -0,0 +1,33 @@
1
+ import fs from "node:fs/promises";
2
+
3
+ /**
4
+ * @param {{filePath: string, startLine?: number, endLine?: number}} fileRange
5
+ * @returns {Promise<string | Error>}
6
+ */
7
+ export async function readFileRange({ filePath, startLine, endLine }) {
8
+ /** @type {string} */
9
+ let fileContent;
10
+ try {
11
+ fileContent = await fs.readFile(filePath, { encoding: "utf-8" });
12
+ } catch (error) {
13
+ return new Error(
14
+ `Error reading file: ${error instanceof Error ? error.message : String(error)}`,
15
+ );
16
+ }
17
+
18
+ const lines = fileContent.split("\n");
19
+
20
+ if (startLine) {
21
+ const start = startLine;
22
+ const end = endLine ? endLine : start;
23
+
24
+ if (!(1 <= start && start <= end && end <= lines.length)) {
25
+ return new Error(
26
+ `Invalid line range. File ${filePath} has ${lines.length} lines.`,
27
+ );
28
+ }
29
+
30
+ return lines.slice(start - 1, end).join("\n");
31
+ }
32
+ return fileContent;
33
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * @typedef {Object} RetryOnErrorConfig
3
+ * @property {(err: unknown) => boolean} shouldRetry
4
+ * @property {(err: unknown, interval: number) => Promise<void>} [beforeRetry]
5
+ * @property {number} initialInterval
6
+ * @property {number} maxInterval
7
+ * @property {number} multiplier
8
+ * @property {number} maxAttempt
9
+ */
10
+
11
+ /**
12
+ * @template T
13
+ * @param {() => Promise<T>} fn
14
+ * @param {RetryOnErrorConfig} config
15
+ * @returns {Promise<T>}
16
+ */
17
+ export async function retryOnError(fn, config) {
18
+ let attempt = 0;
19
+
20
+ while (true) {
21
+ try {
22
+ attempt++;
23
+ return await fn();
24
+ } catch (err) {
25
+ if (attempt >= config.maxAttempt || !config.shouldRetry(err)) {
26
+ throw err;
27
+ }
28
+
29
+ const interval = Math.min(
30
+ config.initialInterval * config.multiplier ** (attempt - 1),
31
+ config.maxInterval,
32
+ );
33
+
34
+ if (config.beforeRetry) {
35
+ await config.beforeRetry(err, interval);
36
+ }
37
+
38
+ await new Promise((resolve) => setTimeout(resolve, interval));
39
+ }
40
+ }
41
+ }
@@ -0,0 +1,61 @@
1
+ import { startGeminiVoiceSession } from "./voiceInputGemini.mjs";
2
+ import { startOpenAIVoiceSession } from "./voiceInputOpenAI.mjs";
3
+ import { failVoiceSessionAsync } from "./voiceInputSession.mjs";
4
+
5
+ export {
6
+ createCJKSpaceNormalizer,
7
+ detectRecorder,
8
+ getRecorderCandidates,
9
+ } from "./voiceInputSession.mjs";
10
+ export { parseVoiceToggleKey } from "./voiceToggleKey.mjs";
11
+
12
+ /**
13
+ * @typedef {import("./voiceInputSession.mjs").VoiceRecorderConfig} VoiceRecorderConfig
14
+ */
15
+
16
+ /**
17
+ * @typedef {import("./voiceInputSession.mjs").VoiceSessionCallbacks} VoiceSessionCallbacks
18
+ */
19
+
20
+ /**
21
+ * @typedef {import("./voiceInputSession.mjs").VoiceSession} VoiceSession
22
+ */
23
+
24
+ /**
25
+ * @typedef {import("./voiceToggleKey.mjs").VoiceToggleKey} VoiceToggleKey
26
+ */
27
+
28
+ /**
29
+ * @typedef {import("./voiceInputOpenAI.mjs").VoiceInputOpenAIConfig} VoiceInputOpenAIConfig
30
+ */
31
+
32
+ /**
33
+ * @typedef {import("./voiceInputGemini.mjs").VoiceInputGeminiConfig} VoiceInputGeminiConfig
34
+ */
35
+
36
+ /**
37
+ * @typedef {VoiceInputOpenAIConfig | VoiceInputGeminiConfig} VoiceInputConfig
38
+ */
39
+
40
+ /**
41
+ * Start a voice input session. Dispatches to the provider-specific
42
+ * implementation based on `config.provider`.
43
+ *
44
+ * @param {object} options
45
+ * @param {VoiceInputConfig} options.config
46
+ * @param {VoiceSessionCallbacks} options.callbacks
47
+ * @returns {VoiceSession}
48
+ */
49
+ export function startVoiceSession({ config, callbacks }) {
50
+ if (config.provider === "openai") {
51
+ return startOpenAIVoiceSession({ config, callbacks });
52
+ }
53
+ if (config.provider === "gemini") {
54
+ return startGeminiVoiceSession({ config, callbacks });
55
+ }
56
+ const provider = /** @type {{ provider: string }} */ (config).provider;
57
+ return failVoiceSessionAsync(
58
+ callbacks,
59
+ new Error(`Unsupported voiceInput.provider: ${provider}`),
60
+ );
61
+ }
@@ -0,0 +1,105 @@
1
+ import {
2
+ isObjectLike,
3
+ startWebSocketVoiceSession,
4
+ } from "./voiceInputSession.mjs";
5
+
6
+ /**
7
+ * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./voiceInputSession.mjs"
8
+ */
9
+
10
+ /**
11
+ * @typedef {Object} VoiceInputGeminiConfig
12
+ * @property {"gemini"} provider
13
+ * @property {string} apiKey
14
+ * @property {string} [model] - Defaults to "gemini-3.1-flash-live-preview".
15
+ * @property {string} [language] - ISO-639-1 code (e.g. "ja", "en"). Passed to the model as a system instruction since Gemini Live has no native language hint for input transcription.
16
+ * @property {string} [baseURL]
17
+ * @property {VoiceRecorderConfig} [recorder]
18
+ * @property {string} [toggleKey]
19
+ */
20
+
21
+ const GEMINI_DEFAULT_MODEL = "gemini-3.1-flash-live-preview";
22
+ const GEMINI_DEFAULT_WS =
23
+ "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
24
+ const GEMINI_SAMPLE_RATE = 16000;
25
+ const GEMINI_LABEL = "Gemini Live";
26
+
27
+ /**
28
+ * Start a voice input session backed by the Gemini Live BidiGenerateContent
29
+ * WebSocket. Spawns a recorder, streams PCM as base64 JSON messages, and
30
+ * forwards transcript deltas via `onTranscript`.
31
+ *
32
+ * Gemini Live was designed for voice agents, not pure STT, so the setup
33
+ * message forces `maxOutputTokens: 1` and disables thinking on 2.5 models
34
+ * to minimise wasted audio output.
35
+ *
36
+ * @param {object} options
37
+ * @param {VoiceInputGeminiConfig} options.config
38
+ * @param {VoiceSessionCallbacks} options.callbacks
39
+ * @returns {VoiceSession}
40
+ */
41
+ export function startGeminiVoiceSession({ config, callbacks }) {
42
+ /** @type {VoiceProviderHooks<VoiceInputGeminiConfig>} */
43
+ const hooks = {
44
+ label: GEMINI_LABEL,
45
+ sampleRate: GEMINI_SAMPLE_RATE,
46
+ buildWsUrl(config) {
47
+ const base = config.baseURL ?? GEMINI_DEFAULT_WS;
48
+ return `${base}?key=${encodeURIComponent(config.apiKey)}`;
49
+ },
50
+ buildSetupMessage(config) {
51
+ const model = config.model ?? GEMINI_DEFAULT_MODEL;
52
+ /** @type {Record<string, unknown>} */
53
+ const generationConfig = {
54
+ // https://ai.google.dev/gemini-api/docs/live-api/capabilities#response-modalities
55
+ // > The native audio models only support `AUDIO` response modality.
56
+ responseModalities: ["AUDIO"],
57
+ maxOutputTokens: 1,
58
+ };
59
+ if (model.includes("2.5")) {
60
+ generationConfig.thinkingConfig = { thinkingBudget: 0 };
61
+ }
62
+ /** @type {Record<string, unknown>} */
63
+ const setup = {
64
+ model: `models/${model}`,
65
+ generationConfig,
66
+ inputAudioTranscription: {},
67
+ };
68
+ if (config.language) {
69
+ setup.systemInstruction = {
70
+ parts: [{ text: `The user is speaking in ${config.language}.` }],
71
+ };
72
+ }
73
+ return { setup };
74
+ },
75
+ isReadyMessage(message) {
76
+ return isObjectLike(message) && "setupComplete" in message;
77
+ },
78
+ extractTranscript(message) {
79
+ if (!isObjectLike(message)) return undefined;
80
+ const serverContent = message.serverContent;
81
+ if (!isObjectLike(serverContent)) return undefined;
82
+ const transcription = serverContent.inputTranscription;
83
+ if (
84
+ isObjectLike(transcription) &&
85
+ typeof transcription.text === "string" &&
86
+ transcription.text.length > 0
87
+ ) {
88
+ return transcription.text;
89
+ }
90
+ return undefined;
91
+ },
92
+ buildAudioPayload(chunk, sampleRate) {
93
+ return {
94
+ realtimeInput: {
95
+ audio: {
96
+ data: chunk.toString("base64"),
97
+ mimeType: `audio/pcm;rate=${sampleRate}`,
98
+ },
99
+ },
100
+ };
101
+ },
102
+ };
103
+
104
+ return startWebSocketVoiceSession({ hooks, config, callbacks });
105
+ }