@iinm/plain-agent 1.10.1 → 1.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,28 @@
1
1
  /**
2
- * @import { UserEventEmitter, AgentEventEmitter, AgentCommands } from "./agent"
3
- * @import { ClaudeCodePlugin } from "./claudeCodePlugin.mjs"
4
- * @import { VoiceInputConfig, VoiceSession } from "./voiceInput.mjs"
2
+ * @import { UserEventEmitter, AgentEventEmitter, AgentCommands } from "../agent"
3
+ * @import { ClaudeCodePlugin } from "../claudeCodePlugin.mjs"
4
+ * @import { VoiceInputConfig } from "../voice/input.mjs"
5
+ * @import { VoiceSession } from "../voice/session.mjs"
5
6
  */
6
7
 
7
8
  import readline from "node:readline";
8
9
  import { styleText } from "node:util";
9
- import { createCommandHandler } from "./cliCommands.mjs";
10
- import { createCompleter, SLASH_COMMANDS } from "./cliCompleter.mjs";
10
+ import { appendUsageRecord, buildUsageRecord } from "../usageStore.mjs";
11
+ import { createSequentialExecutor } from "../utils/createSequentialExecutor.mjs";
12
+ import { notify } from "../utils/notify.mjs";
13
+ import { startVoiceSession } from "../voice/input.mjs";
14
+ import { parseVoiceToggleKey } from "../voice/toggleKey.mjs";
15
+ import { createCommandHandler } from "./commands.mjs";
16
+ import { createCompleter, SLASH_COMMANDS } from "./completer.mjs";
11
17
  import {
12
18
  formatCostSummary,
13
19
  formatProviderTokenUsage,
14
20
  printMessage,
15
- } from "./cliFormatter.mjs";
16
- import { createInterruptTransform } from "./cliInterruptTransform.mjs";
17
- import { createMuteTransform } from "./cliMuteTransform.mjs";
18
- import { createPasteHandler } from "./cliPasteTransform.mjs";
19
- import { appendUsageRecord, buildUsageRecord } from "./usageStore.mjs";
20
- import { createSequentialExecutor } from "./utils/createSequentialExecutor.mjs";
21
- import { notify } from "./utils/notify.mjs";
22
- import { parseVoiceToggleKey, startVoiceSession } from "./voiceInput.mjs";
21
+ } from "./formatter.mjs";
22
+ import { createInterruptTransform } from "./interruptTransform.mjs";
23
+ import { createMuteTransform } from "./muteTransform.mjs";
24
+ import { createPasteHandler } from "./pasteTransform.mjs";
25
+ import { createTableDetector } from "./tableDetector.mjs";
23
26
 
24
27
  const HELP_MESSAGE = [
25
28
  "Commands:",
@@ -70,7 +73,7 @@ const HELP_MESSAGE = [
70
73
  * Persist the session's cost summary to the usage log.
71
74
  * Failures are logged but never thrown so exit is not blocked.
72
75
  *
73
- * @param {import("./costTracker.mjs").CostSummary} summary
76
+ * @param {import("../costTracker.mjs").CostSummary} summary
74
77
  * @param {{ sessionId: string, modelName: string, startTime: Date }} meta
75
78
  */
76
79
  async function persistUsage(summary, { sessionId, modelName, startTime }) {
@@ -122,6 +125,9 @@ export function startInteractiveSession({
122
125
  */
123
126
  let voice = null;
124
127
 
128
+ // Create the table buffer instance for this session
129
+ const tableBuffer = createTableBuffer();
130
+
125
131
  // Parse the voice toggle key once at startup so misconfiguration fails
126
132
  // loudly instead of silently falling back.
127
133
  const voiceToggle = parseVoiceToggleKey(voiceInput?.toggleKey);
@@ -281,7 +287,7 @@ export function startInteractiveSession({
281
287
  // Agent turn: pause auto-approve; do not clear input.
282
288
  if (!state.turn) {
283
289
  agentCommands.pauseAutoApprove();
284
- console.log(
290
+ console.error(
285
291
  styleText(
286
292
  "yellow",
287
293
  "\n\n⚠️ Ctrl-C: Auto-approve paused. Finishing current tool...\nPress Ctrl-D twice to exit.\n",
@@ -328,7 +334,7 @@ export function startInteractiveSession({
328
334
  );
329
335
  cli.prompt();
330
336
  } else {
331
- console.log(styleText("yellow", "\n\n⚠️ Press Ctrl-D again to exit.\n"));
337
+ console.error(styleText("yellow", "\n\n⚠️ Press Ctrl-D again to exit.\n"));
332
338
  }
333
339
  };
334
340
 
@@ -418,7 +424,7 @@ export function startInteractiveSession({
418
424
 
419
425
  cli.on("line", async (lineInput) => {
420
426
  if (!state.turn) {
421
- console.warn(
427
+ console.error(
422
428
  styleText(
423
429
  "yellow",
424
430
  `\nAgent is working. Ignore input: ${lineInput.trim()}`,
@@ -465,6 +471,8 @@ export function startInteractiveSession({
465
471
  if (partialContent.content) {
466
472
  if (partialContent.type === "tool_use") {
467
473
  process.stdout.write(styleText("gray", partialContent.content));
474
+ } else if (partialContent.type === "text") {
475
+ tableBuffer.feed(partialContent.content);
468
476
  } else {
469
477
  process.stdout.write(partialContent.content);
470
478
  }
@@ -511,7 +519,7 @@ export function startInteractiveSession({
511
519
  });
512
520
 
513
521
  agentEventEmitter.on("error", (error) => {
514
- console.log(
522
+ console.error(
515
523
  styleText(
516
524
  "red",
517
525
  `\nError: message=${error.message}, stack=${error.stack}`,
@@ -520,6 +528,9 @@ export function startInteractiveSession({
520
528
  });
521
529
 
522
530
  agentEventEmitter.on("turnEnd", async () => {
531
+ // Flush any remaining table buffer content
532
+ tableBuffer.forceFlush();
533
+
523
534
  const err = notify(notifyCmd);
524
535
  if (err) {
525
536
  console.error(
@@ -543,3 +554,26 @@ export function startInteractiveSession({
543
554
  process.on("exit", cleanup);
544
555
  process.on("SIGTERM", cleanup);
545
556
  }
557
+
558
+ /**
559
+ * Creates a table buffer for detecting and formatting markdown tables
560
+ * in streaming text output.
561
+ * Thin shell: delegates pure logic to createTableDetector and handles I/O.
562
+ */
563
+ function createTableBuffer() {
564
+ const detector = createTableDetector();
565
+
566
+ function feed(/** @type {string} */ chunk) {
567
+ const { output, warnings } = detector.feed(chunk);
568
+ for (const s of output) process.stdout.write(s);
569
+ for (const w of warnings) console.error(styleText("yellow", w));
570
+ }
571
+
572
+ function forceFlush() {
573
+ const { output, warnings } = detector.forceFlush();
574
+ for (const s of output) process.stdout.write(s);
575
+ for (const w of warnings) console.error(styleText("yellow", w));
576
+ }
577
+
578
+ return { feed, forceFlush };
579
+ }
@@ -0,0 +1,228 @@
1
+ import { formatMarkdownTable } from "./formatter.mjs";
2
+
3
+ /**
4
+ * @typedef {{ output: string[], warnings: string[] }} DetectorResult
5
+ */
6
+
7
+ /**
8
+ * Creates a table detector for detecting and formatting markdown tables
9
+ * in streaming text output. This is a pure logic module with no I/O side effects.
10
+ *
11
+ * @param {(lines: string[], maxWidth?: number) => string} [formatTable=formatMarkdownTable] - Table formatting function (injectable for testing)
12
+ * @param {number} [maxWidth] - Maximum terminal display width (defaults to process.stdout.columns - 4 or 80)
13
+ * @returns {{ feed: (chunk: string) => DetectorResult, forceFlush: () => DetectorResult }}
14
+ */
15
+ export function createTableDetector(
16
+ formatTable = formatMarkdownTable,
17
+ maxWidth = process.stdout.columns ? process.stdout.columns - 4 : 80,
18
+ ) {
19
+ /** @type {string} - Accumulated incomplete line */
20
+ let pendingLine = "";
21
+ /** @type {string[]} - Lines of the current table being detected */
22
+ const tableLines = [];
23
+ /** @type {boolean} - Inside a code block (```) */
24
+ let inCodeBlock = false;
25
+ const MAX_TABLE_LINES = 200;
26
+
27
+ /**
28
+ * Feed a text chunk to the detector.
29
+ * @param {string} chunk
30
+ * @returns {DetectorResult}
31
+ */
32
+ function feed(chunk) {
33
+ if (chunk.length === 0) return { output: [], warnings: [] };
34
+ pendingLine += chunk;
35
+
36
+ /** @type {string[]} */
37
+ const output = [];
38
+ /** @type {string[]} */
39
+ const warnings = [];
40
+
41
+ // Process complete lines (those containing newlines)
42
+ while (pendingLine.includes("\n")) {
43
+ const idx = pendingLine.indexOf("\n");
44
+ const line = pendingLine.slice(0, idx); // Exclude the newline
45
+ pendingLine = pendingLine.slice(idx + 1);
46
+ const result = processLine(`${line}\n`); // Add newline back for output
47
+ output.push(...result.output);
48
+ warnings.push(...result.warnings);
49
+ }
50
+
51
+ // If not buffering a table and pendingLine has no pipe, output immediately
52
+ // This ensures non-table text is streamed without delay
53
+ if (tableLines.length === 0 && !pendingLine.includes("|")) {
54
+ output.push(pendingLine);
55
+ pendingLine = "";
56
+ }
57
+
58
+ return { output, warnings };
59
+ }
60
+
61
+ /**
62
+ * Force flush any pending content (call on turn end).
63
+ * @returns {DetectorResult}
64
+ */
65
+ function forceFlush() {
66
+ /** @type {string[]} */
67
+ const output = [];
68
+ /** @type {string[]} */
69
+ const warnings = [];
70
+
71
+ // Process any remaining pending line
72
+ if (pendingLine.length > 0) {
73
+ // If we have a table buffer, add pending line to it or output directly
74
+ if (tableLines.length > 0) {
75
+ tableLines.push(`${pendingLine}\n`);
76
+ } else {
77
+ output.push(pendingLine);
78
+ }
79
+ pendingLine = "";
80
+ }
81
+ const flushResult = flushTable();
82
+ output.push(...flushResult.output);
83
+ warnings.push(...flushResult.warnings);
84
+
85
+ return { output, warnings };
86
+ }
87
+
88
+ /**
89
+ * Process a complete line.
90
+ * @param {string} line - Line including trailing newline
91
+ * @returns {DetectorResult}
92
+ */
93
+ function processLine(line) {
94
+ /** @type {string[]} */
95
+ const output = [];
96
+ /** @type {string[]} */
97
+ const warnings = [];
98
+
99
+ // Code block detection
100
+ if (line.trimStart().startsWith("```")) {
101
+ inCodeBlock = !inCodeBlock;
102
+ const flushResult = flushTable(); // Code block terminates any ongoing table
103
+ output.push(...flushResult.output);
104
+ warnings.push(...flushResult.warnings);
105
+ output.push(line);
106
+ return { output, warnings };
107
+ }
108
+
109
+ if (inCodeBlock) {
110
+ output.push(line);
111
+ return { output, warnings };
112
+ }
113
+
114
+ // Table start: line begins with pipe
115
+ if (isTableStart(line)) {
116
+ tableLines.push(line);
117
+
118
+ // Buffer limit check
119
+ if (tableLines.length > MAX_TABLE_LINES) {
120
+ const flushResult = flushTableAsIs();
121
+ output.push(...flushResult.output);
122
+ warnings.push(...flushResult.warnings);
123
+ }
124
+ return { output, warnings };
125
+ }
126
+
127
+ // Table continuation: line contains pipe (for rows without leading pipe)
128
+ if (tableLines.length > 0 && isTableContinuation(line)) {
129
+ tableLines.push(line);
130
+ if (tableLines.length > MAX_TABLE_LINES) {
131
+ const flushResult = flushTableAsIs();
132
+ output.push(...flushResult.output);
133
+ warnings.push(...flushResult.warnings);
134
+ }
135
+ return { output, warnings };
136
+ }
137
+
138
+ // Table ended: format and flush buffer, then output current line
139
+ const flushResult = flushTable();
140
+ output.push(...flushResult.output);
141
+ warnings.push(...flushResult.warnings);
142
+ output.push(line);
143
+ return { output, warnings };
144
+ }
145
+
146
+ /**
147
+ * Flush table buffer with formatting.
148
+ * @returns {DetectorResult}
149
+ */
150
+ function flushTable() {
151
+ if (tableLines.length === 0) return { output: [], warnings: [] };
152
+
153
+ /** @type {string[]} */
154
+ const output = [];
155
+ /** @type {string[]} */
156
+ const warnings = [];
157
+
158
+ // Separate trailing empty lines (preserve spacing after table)
159
+ /** @type {string[]} */
160
+ const trailingEmpty = [];
161
+ while (tableLines.length > 0 && tableLines.at(-1)?.trim() === "") {
162
+ const line = tableLines.pop();
163
+ if (line !== undefined) trailingEmpty.unshift(line);
164
+ }
165
+
166
+ if (tableLines.length > 0) {
167
+ // Remove trailing newlines for formatting, then add them back
168
+ const rawLines = tableLines.map((l) =>
169
+ l.endsWith("\n") ? l.slice(0, -1) : l,
170
+ );
171
+ try {
172
+ const formatted = formatTable(rawLines, maxWidth);
173
+ output.push(`${formatted}\n`);
174
+ } catch (err) {
175
+ // Fallback: output raw lines if formatting fails
176
+ const message = err instanceof Error ? err.message : String(err);
177
+ warnings.push(`Warning: Table formatting failed: ${message}`);
178
+ for (const line of tableLines) {
179
+ output.push(line);
180
+ }
181
+ }
182
+ }
183
+
184
+ tableLines.length = 0;
185
+
186
+ // Output trailing empty lines
187
+ for (const empty of trailingEmpty) {
188
+ output.push(empty);
189
+ }
190
+
191
+ return { output, warnings };
192
+ }
193
+
194
+ /**
195
+ * Flush table buffer without formatting (for oversized tables).
196
+ * @returns {DetectorResult}
197
+ */
198
+ function flushTableAsIs() {
199
+ if (tableLines.length === 0) return { output: [], warnings: [] };
200
+ const output = [...tableLines];
201
+ tableLines.length = 0;
202
+ return { output, warnings: [] };
203
+ }
204
+
205
+ /**
206
+ * Check if a line starts a table.
207
+ * @param {string} line
208
+ * @returns {boolean}
209
+ */
210
+ function isTableStart(line) {
211
+ const trimmed = line.trimStart();
212
+ return trimmed.startsWith("|");
213
+ }
214
+
215
+ /**
216
+ * Check if a line continues a table.
217
+ * This is a heuristic: any line containing a pipe character is considered
218
+ * a potential table row. This may produce false positives for non-table
219
+ * content with pipes (e.g., "Choose A | B | C").
220
+ * @param {string} line
221
+ * @returns {boolean}
222
+ */
223
+ function isTableContinuation(line) {
224
+ return line.includes("|");
225
+ }
226
+
227
+ return { feed, forceFlush };
228
+ }
package/src/config.d.ts CHANGED
@@ -10,7 +10,7 @@ import {
10
10
  WebSearchToolGeminiOptions,
11
11
  WebSearchToolGeminiVertexAIOptions,
12
12
  } from "./tools/webSearch.mjs";
13
- import { VoiceInputConfig } from "./voiceInput.mjs";
13
+ import { VoiceInputConfig } from "./voice/input.mjs";
14
14
 
15
15
  /**
16
16
  * JSON-serializable webFetch configuration.
package/src/config.mjs CHANGED
@@ -128,7 +128,7 @@ export async function loadConfigFile(filePath, skipTrustCheck = false) {
128
128
 
129
129
  if (!isTrusted) {
130
130
  if (!process.stdout.isTTY) {
131
- console.warn(
131
+ console.error(
132
132
  styleText(
133
133
  "yellow",
134
134
  `WARNING: Config file found at '${filePath}' but cannot ask for approval without a TTY. Skipping.`,
@@ -54,7 +54,7 @@ export async function loadAgentRoles(claudeCodePlugins) {
54
54
  agentDirs.map(async ({ dir, idPrefix, only }) => {
55
55
  const files = await getMarkdownFiles(dir).catch((err) => {
56
56
  if (err.code !== "ENOENT") {
57
- console.warn(`Failed to list agent roles in ${dir}:`, err);
57
+ console.error(`Failed to list agent roles in ${dir}:`, err);
58
58
  }
59
59
  return /** @type {string[]} */ ([]);
60
60
  });
@@ -72,7 +72,7 @@ export async function loadAgentRoles(claudeCodePlugins) {
72
72
  files.map(async ({ dir, file, idPrefix }) => {
73
73
  const fullPath = path.join(dir, file);
74
74
  const content = await fs.readFile(fullPath, "utf-8").catch((err) => {
75
- console.warn(`Failed to read agent role file ${fullPath}:`, err);
75
+ console.error(`Failed to read agent role file ${fullPath}:`, err);
76
76
  return null;
77
77
  });
78
78
 
@@ -69,7 +69,7 @@ export async function loadPrompts(claudeCodePlugins) {
69
69
  promptDirs.map(async ({ dir, idPrefix, only }) => {
70
70
  const files = await getMarkdownFiles(dir).catch((err) => {
71
71
  if (err.code !== "ENOENT") {
72
- console.warn(`Failed to list prompts in ${dir}:`, err);
72
+ console.error(`Failed to list prompts in ${dir}:`, err);
73
73
  }
74
74
  return /** @type {string[]} */ ([]);
75
75
  });
@@ -95,7 +95,7 @@ export async function loadPrompts(claudeCodePlugins) {
95
95
  files.map(async ({ dir, file, idPrefix }) => {
96
96
  const fullPath = path.join(dir, file);
97
97
  const content = await fs.readFile(fullPath, "utf-8").catch((err) => {
98
- console.warn(`Failed to read prompt file ${fullPath}:`, err);
98
+ console.error(`Failed to read prompt file ${fullPath}:`, err);
99
99
  return null;
100
100
  });
101
101
 
@@ -143,5 +143,5 @@ function inferMimeType(filePath) {
143
143
  * @returns {void}
144
144
  */
145
145
  function warn(message) {
146
- console.warn(styleText("yellow", message));
146
+ console.error(styleText("yellow", message));
147
147
  }
package/src/main.mjs CHANGED
@@ -10,15 +10,15 @@ import {
10
10
  installClaudeCodePlugins,
11
11
  resolvePluginPaths,
12
12
  } from "./claudeCodePlugin.mjs";
13
- import { parseCliArgs, printHelp } from "./cliArgs.mjs";
14
- import { startBatchSession } from "./cliBatch.mjs";
15
- import { runCostCommand } from "./cliCost.mjs";
16
- import { startInteractiveSession } from "./cliInteractive.mjs";
13
+ import { parseCliArgs, printHelp } from "./cli/args.mjs";
14
+ import { startBatchSession } from "./cli/batch.mjs";
15
+ import { runCostCommand } from "./cli/cost.mjs";
16
+ import { startInteractiveSession } from "./cli/interactive.mjs";
17
17
  import { loadAppConfig } from "./config.mjs";
18
18
  import { loadAgentRoles } from "./context/loadAgentRoles.mjs";
19
19
  import { loadPrompts } from "./context/loadPrompts.mjs";
20
20
  import { AGENT_PROJECT_METADATA_DIR, USER_NAME } from "./env.mjs";
21
- import { setupMCPServer } from "./mcpIntegration.mjs";
21
+ import { setupMCPServer } from "./mcp/integration.mjs";
22
22
  import { createModelCaller } from "./modelCaller.mjs";
23
23
  import { createPrompt } from "./prompt.mjs";
24
24
  import { listSessions, loadSession } from "./sessionStore.mjs";
@@ -1,16 +1,16 @@
1
1
  /**
2
- * @import { StructuredToolResultContent, Tool, ToolImplementation } from "./tool";
3
- * @import { MCPServerConfig } from "./config";
2
+ * @import { StructuredToolResultContent, Tool, ToolImplementation } from "../tool";
3
+ * @import { MCPServerConfig } from "../config";
4
4
  */
5
5
 
6
6
  import { mkdir } from "node:fs/promises";
7
7
  import path from "node:path";
8
- import { AGENT_PROJECT_METADATA_DIR } from "./env.mjs";
9
- import { createMCPClient } from "./mcpClient.mjs";
10
- import { writeTmpFile } from "./tmpfile.mjs";
11
- import { noThrow } from "./utils/noThrow.mjs";
8
+ import { AGENT_PROJECT_METADATA_DIR } from "../env.mjs";
9
+ import { writeTmpFile } from "../tmpfile.mjs";
10
+ import { noThrow } from "../utils/noThrow.mjs";
11
+ import { createMCPClient } from "./client.mjs";
12
12
 
13
- /** @typedef {import("./mcpClient.mjs").MCPClient} MCPClient */
13
+ /** @typedef {import("./client.mjs").MCPClient} MCPClient */
14
14
 
15
15
  const OUTPUT_MAX_LENGTH = 1024 * 8;
16
16
 
@@ -27,18 +27,16 @@ export function createPatchFileTool(
27
27
  },
28
28
  patch: {
29
29
  description: `
30
- Format:
31
- @@@ ${nonce} {start}:{startHash}-{end}:{endHash}
30
+ Format — a single patch string may contain multiple blocks:
31
+ >>> ${nonce} {start}:{startHash}-{end}:{endHash}
32
32
  new content
33
- @@@ ${nonce}
34
-
35
- @@@ ${nonce} {N}:{afterHash}+
33
+ <<< ${nonce}
34
+ >>> ${nonce} {N}:{afterHash}+
36
35
  inserted content
37
- @@@ ${nonce}
38
-
39
- @@@ ${nonce} 0+
36
+ <<< ${nonce}
37
+ >>> ${nonce} 0+
40
38
  prepended content
41
- @@@ ${nonce}
39
+ <<< ${nonce}
42
40
 
43
41
  - The nonce "${nonce}" is constant; always use the exact value shown above.
44
42
  - Line numbers are 1-indexed and refer to the original file; "{start}-{end}" is inclusive.
@@ -63,7 +61,7 @@ prepended content
63
61
  const blocks = parseBlocks(patch, nonce);
64
62
  if (blocks.length === 0) {
65
63
  throw new Error(
66
- `No patch blocks found. Each block must start with "@@@ ${nonce} ..." and end with "@@@ ${nonce}".`,
64
+ `No patch blocks found. Each block must start with ">>> ${nonce} ..." and end with "<<< ${nonce}".`,
67
65
  );
68
66
  }
69
67
 
@@ -93,8 +91,8 @@ prepended content
93
91
  * @returns {PatchBlock[]}
94
92
  */
95
93
  export function parseBlocks(patch, nonce) {
96
- const openPrefix = `@@@ ${nonce} `;
97
- const closeMarker = `@@@ ${nonce}`;
94
+ const openPrefix = `>>> ${nonce} `;
95
+ const closeMarker = `<<< ${nonce}`;
98
96
  const lines = patch.split("\n");
99
97
 
100
98
  /** @type {PatchBlock[]} */
@@ -124,6 +122,14 @@ export function parseBlocks(patch, nonce) {
124
122
  );
125
123
  }
126
124
  const body = lines.slice(i + 1, closeIdx);
125
+ const nestedOpen = body.findIndex((l) => l.startsWith(openPrefix));
126
+ if (nestedOpen !== -1) {
127
+ throw new Error(
128
+ `Unclosed block "${openPrefix}${headerArgs}": found another open marker "${body[nestedOpen]}" ` +
129
+ `at line ${i + 1 + nestedOpen + 1} of patch before the close marker. ` +
130
+ `Did you forget "${closeMarker}" to close the previous block?`,
131
+ );
132
+ }
127
133
  if (header.op === "insert" && body.length === 0) {
128
134
  throw new Error(
129
135
  `Insert block "${openPrefix}${headerArgs}" has empty body. Use a replace block to delete content.`,
@@ -1,10 +1,7 @@
1
- import {
2
- isObjectLike,
3
- startWebSocketVoiceSession,
4
- } from "./voiceInputSession.mjs";
1
+ import { isObjectLike, startWebSocketVoiceSession } from "./session.mjs";
5
2
 
6
3
  /**
7
- * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./voiceInputSession.mjs"
4
+ * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./session.mjs"
8
5
  */
9
6
 
10
7
  /**
@@ -0,0 +1,29 @@
1
+ import { startGeminiVoiceSession } from "./gemini.mjs";
2
+ import { startOpenAIVoiceSession } from "./openai.mjs";
3
+ import { failVoiceSessionAsync } from "./session.mjs";
4
+
5
+ /**
6
+ * @typedef {import("./openai.mjs").VoiceInputOpenAIConfig | import("./gemini.mjs").VoiceInputGeminiConfig} VoiceInputConfig
7
+ */
8
+ /**
9
+ * Start a voice input session. Dispatches to the provider-specific
10
+ * implementation based on `config.provider`.
11
+ *
12
+ * @param {object} options
13
+ * @param {VoiceInputConfig} options.config
14
+ * @param {import("./session.mjs").VoiceSessionCallbacks} options.callbacks
15
+ * @returns {import("./session.mjs").VoiceSession}
16
+ */
17
+ export function startVoiceSession({ config, callbacks }) {
18
+ if (config.provider === "openai") {
19
+ return startOpenAIVoiceSession({ config, callbacks });
20
+ }
21
+ if (config.provider === "gemini") {
22
+ return startGeminiVoiceSession({ config, callbacks });
23
+ }
24
+ const provider = /** @type {{ provider: string }} */ (config).provider;
25
+ return failVoiceSessionAsync(
26
+ callbacks,
27
+ new Error(`Unsupported voiceInput.provider: ${provider}`),
28
+ );
29
+ }
@@ -1,24 +1,21 @@
1
- import {
2
- isObjectLike,
3
- startWebSocketVoiceSession,
4
- } from "./voiceInputSession.mjs";
1
+ import { isObjectLike, startWebSocketVoiceSession } from "./session.mjs";
5
2
 
6
3
  /**
7
- * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./voiceInputSession.mjs"
4
+ * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./session.mjs"
8
5
  */
9
6
 
10
7
  /**
11
8
  * @typedef {Object} VoiceInputOpenAIConfig
12
9
  * @property {"openai"} provider
13
10
  * @property {string} apiKey
14
- * @property {string} [model] - Defaults to "gpt-4o-transcribe".
11
+ * @property {string} [model] - Transcription model. Defaults to "gpt-realtime-whisper".
15
12
  * @property {string} [language] - ISO-639-1 code (e.g. "ja", "en"). Improves accuracy and latency when set.
16
13
  * @property {string} [baseURL]
17
14
  * @property {VoiceRecorderConfig} [recorder]
18
15
  * @property {string} [toggleKey] - "ctrl-<char>". Defaults to "ctrl-o".
19
16
  */
20
17
 
21
- const OPENAI_DEFAULT_MODEL = "gpt-4o-transcribe";
18
+ const OPENAI_DEFAULT_TRANSCRIPTION_MODEL = "gpt-realtime-whisper";
22
19
  const OPENAI_DEFAULT_WS = "wss://api.openai.com/v1/realtime";
23
20
  const OPENAI_SAMPLE_RATE = 24000;
24
21
  const OPENAI_LABEL = "OpenAI Realtime";
@@ -46,31 +43,32 @@ export function startOpenAIVoiceSession({ config, callbacks }) {
46
43
  return {
47
44
  headers: {
48
45
  Authorization: `Bearer ${config.apiKey}`,
49
- "OpenAI-Beta": "realtime=v1",
50
46
  },
51
47
  };
52
48
  },
53
49
  buildSetupMessage(config) {
54
- const model = config.model ?? OPENAI_DEFAULT_MODEL;
50
+ const model = config.model ?? OPENAI_DEFAULT_TRANSCRIPTION_MODEL;
55
51
  /** @type {{ model: string, language?: string }} */
56
52
  const transcription = { model };
57
53
  if (config.language) transcription.language = config.language;
58
- // The `?intent=transcription` endpoint uses the flat transcription-session
59
- // schema, not the nested `session.audio.input.*` realtime schema.
60
54
  return {
61
- type: "transcription_session.update",
55
+ type: "session.update",
62
56
  session: {
63
- input_audio_format: "pcm16",
64
- input_audio_transcription: transcription,
65
- turn_detection: { type: "server_vad" },
57
+ type: "transcription",
58
+ audio: {
59
+ input: {
60
+ format: { type: "audio/pcm", rate: OPENAI_SAMPLE_RATE },
61
+ transcription,
62
+ },
63
+ },
66
64
  },
67
65
  };
68
66
  },
69
67
  isReadyMessage(message) {
70
68
  return (
71
69
  isObjectLike(message) &&
72
- (message.type === "transcription_session.created" ||
73
- message.type === "transcription_session.updated")
70
+ (message.type === "session.created" ||
71
+ message.type === "session.updated")
74
72
  );
75
73
  },
76
74
  extractError(message) {