npm - @iinm/plain-agent - Versions diffs - 1.10.1 → 1.10.3 - Mend

@iinm/plain-agent 1.10.1 → 1.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +2 -2
package/package.json +1 -1
package/src/claudeCodePlugin.mjs +2 -2
package/src/{cliBatch.mjs → cli/batch.mjs} +3 -3
package/src/{cliCommands.mjs → cli/commands.mjs} +17 -15
package/src/{cliCompleter.mjs → cli/completer.mjs} +4 -4
package/src/{cliCost.mjs → cli/cost.mjs} +3 -3
package/src/cli/formatter.mjs +997 -0
package/src/{cliInteractive.mjs → cli/interactive.mjs} +52 -18
package/src/cli/tableDetector.mjs +228 -0
package/src/config.d.ts +1 -1
package/src/config.mjs +1 -1
package/src/context/loadAgentRoles.mjs +2 -2
package/src/context/loadPrompts.mjs +2 -2
package/src/context/loadUserMessageContext.mjs +1 -1
package/src/main.mjs +5 -5
package/src/{mcpIntegration.mjs → mcp/integration.mjs} +7 -7
package/src/tools/patchFile.mjs +18 -12
package/src/{voiceInputGemini.mjs → voice/gemini.mjs} +2 -5
package/src/voice/input.mjs +29 -0
package/src/{voiceInputOpenAI.mjs → voice/openai.mjs} +15 -17
package/src/cliFormatter.mjs +0 -573
package/src/voiceInput.mjs +0 -61
/package/src/{cliArgs.mjs → cli/args.mjs} +0 -0
/package/src/{cliInterruptTransform.mjs → cli/interruptTransform.mjs} +0 -0
/package/src/{cliMuteTransform.mjs → cli/muteTransform.mjs} +0 -0
/package/src/{cliPasteTransform.mjs → cli/pasteTransform.mjs} +0 -0
/package/src/{mcpClient.mjs → mcp/client.mjs} +0 -0
/package/src/{voiceInputSession.mjs → voice/session.mjs} +0 -0
/package/src/{voiceToggleKey.mjs → voice/toggleKey.mjs} +0 -0

package/src/{cliInteractive.mjs → cli/interactive.mjs} RENAMED Viewed

@@ -1,25 +1,28 @@
 /**
- * @import { UserEventEmitter, AgentEventEmitter, AgentCommands } from "./agent"
- * @import { ClaudeCodePlugin } from "./claudeCodePlugin.mjs"
- * @import { VoiceInputConfig, VoiceSession } from "./voiceInput.mjs"
+ * @import { UserEventEmitter, AgentEventEmitter, AgentCommands } from "../agent"
+ * @import { ClaudeCodePlugin } from "../claudeCodePlugin.mjs"
+ * @import { VoiceInputConfig } from "../voice/input.mjs"
+ * @import { VoiceSession } from "../voice/session.mjs"
  */
 import readline from "node:readline";
 import { styleText } from "node:util";
-import { createCommandHandler } from "./cliCommands.mjs";
-import { createCompleter, SLASH_COMMANDS } from "./cliCompleter.mjs";
+import { appendUsageRecord, buildUsageRecord } from "../usageStore.mjs";
+import { createSequentialExecutor } from "../utils/createSequentialExecutor.mjs";
+import { notify } from "../utils/notify.mjs";
+import { startVoiceSession } from "../voice/input.mjs";
+import { parseVoiceToggleKey } from "../voice/toggleKey.mjs";
+import { createCommandHandler } from "./commands.mjs";
+import { createCompleter, SLASH_COMMANDS } from "./completer.mjs";
 import {
   formatCostSummary,
   formatProviderTokenUsage,
   printMessage,
-} from "./cliFormatter.mjs";
-import { createInterruptTransform } from "./cliInterruptTransform.mjs";
-import { createMuteTransform } from "./cliMuteTransform.mjs";
-import { createPasteHandler } from "./cliPasteTransform.mjs";
-import { appendUsageRecord, buildUsageRecord } from "./usageStore.mjs";
-import { createSequentialExecutor } from "./utils/createSequentialExecutor.mjs";
-import { notify } from "./utils/notify.mjs";
-import { parseVoiceToggleKey, startVoiceSession } from "./voiceInput.mjs";
+} from "./formatter.mjs";
+import { createInterruptTransform } from "./interruptTransform.mjs";
+import { createMuteTransform } from "./muteTransform.mjs";
+import { createPasteHandler } from "./pasteTransform.mjs";
+import { createTableDetector } from "./tableDetector.mjs";
 const HELP_MESSAGE = [
   "Commands:",
@@ -70,7 +73,7 @@ const HELP_MESSAGE = [
  * Persist the session's cost summary to the usage log.
  * Failures are logged but never thrown so exit is not blocked.
  *
- * @param {import("./costTracker.mjs").CostSummary} summary
+ * @param {import("../costTracker.mjs").CostSummary} summary
  * @param {{ sessionId: string, modelName: string, startTime: Date }} meta
  */
 async function persistUsage(summary, { sessionId, modelName, startTime }) {
@@ -122,6 +125,9 @@ export function startInteractiveSession({
    */
   let voice = null;
+  // Create the table buffer instance for this session
+  const tableBuffer = createTableBuffer();
   // Parse the voice toggle key once at startup so misconfiguration fails
   // loudly instead of silently falling back.
   const voiceToggle = parseVoiceToggleKey(voiceInput?.toggleKey);
@@ -281,7 +287,7 @@ export function startInteractiveSession({
     // Agent turn: pause auto-approve; do not clear input.
     if (!state.turn) {
       agentCommands.pauseAutoApprove();
-      console.log(
+      console.error(
         styleText(
           "yellow",
           "\n\n⚠️ Ctrl-C: Auto-approve paused. Finishing current tool...\nPress Ctrl-D twice to exit.\n",
@@ -328,7 +334,7 @@ export function startInteractiveSession({
       );
       cli.prompt();
     } else {
-      console.log(styleText("yellow", "\n\n⚠️ Press Ctrl-D again to exit.\n"));
+      console.error(styleText("yellow", "\n\n⚠️ Press Ctrl-D again to exit.\n"));
     }
   };
@@ -418,7 +424,7 @@ export function startInteractiveSession({
   cli.on("line", async (lineInput) => {
     if (!state.turn) {
-      console.warn(
+      console.error(
         styleText(
           "yellow",
           `\nAgent is working. Ignore input: ${lineInput.trim()}`,
@@ -465,6 +471,8 @@ export function startInteractiveSession({
     if (partialContent.content) {
       if (partialContent.type === "tool_use") {
         process.stdout.write(styleText("gray", partialContent.content));
+      } else if (partialContent.type === "text") {
+        tableBuffer.feed(partialContent.content);
       } else {
         process.stdout.write(partialContent.content);
       }
@@ -511,7 +519,7 @@ export function startInteractiveSession({
   });
   agentEventEmitter.on("error", (error) => {
-    console.log(
+    console.error(
       styleText(
         "red",
         `\nError: message=${error.message}, stack=${error.stack}`,
@@ -520,6 +528,9 @@ export function startInteractiveSession({
   });
   agentEventEmitter.on("turnEnd", async () => {
+    // Flush any remaining table buffer content
+    tableBuffer.forceFlush();
     const err = notify(notifyCmd);
     if (err) {
       console.error(
@@ -543,3 +554,26 @@ export function startInteractiveSession({
   process.on("exit", cleanup);
   process.on("SIGTERM", cleanup);
 }
+/**
+ * Creates a table buffer for detecting and formatting markdown tables
+ * in streaming text output.
+ * Thin shell: delegates pure logic to createTableDetector and handles I/O.
+ */
+function createTableBuffer() {
+  const detector = createTableDetector();
+  function feed(/** @type {string} */ chunk) {
+    const { output, warnings } = detector.feed(chunk);
+    for (const s of output) process.stdout.write(s);
+    for (const w of warnings) console.error(styleText("yellow", w));
+  }
+  function forceFlush() {
+    const { output, warnings } = detector.forceFlush();
+    for (const s of output) process.stdout.write(s);
+    for (const w of warnings) console.error(styleText("yellow", w));
+  }
+  return { feed, forceFlush };
+}

package/src/cli/tableDetector.mjs ADDED Viewed

@@ -0,0 +1,228 @@
+import { formatMarkdownTable } from "./formatter.mjs";
+/**
+ * @typedef {{ output: string[], warnings: string[] }} DetectorResult
+ */
+/**
+ * Creates a table detector for detecting and formatting markdown tables
+ * in streaming text output. This is a pure logic module with no I/O side effects.
+ *
+ * @param {(lines: string[], maxWidth?: number) => string} [formatTable=formatMarkdownTable] - Table formatting function (injectable for testing)
+ * @param {number} [maxWidth] - Maximum terminal display width (defaults to process.stdout.columns - 4 or 80)
+ * @returns {{ feed: (chunk: string) => DetectorResult, forceFlush: () => DetectorResult }}
+ */
+export function createTableDetector(
+  formatTable = formatMarkdownTable,
+  maxWidth = process.stdout.columns ? process.stdout.columns - 4 : 80,
+) {
+  /** @type {string} - Accumulated incomplete line */
+  let pendingLine = "";
+  /** @type {string[]} - Lines of the current table being detected */
+  const tableLines = [];
+  /** @type {boolean} - Inside a code block (```) */
+  let inCodeBlock = false;
+  const MAX_TABLE_LINES = 200;
+  /**
+   * Feed a text chunk to the detector.
+   * @param {string} chunk
+   * @returns {DetectorResult}
+   */
+  function feed(chunk) {
+    if (chunk.length === 0) return { output: [], warnings: [] };
+    pendingLine += chunk;
+    /** @type {string[]} */
+    const output = [];
+    /** @type {string[]} */
+    const warnings = [];
+    // Process complete lines (those containing newlines)
+    while (pendingLine.includes("\n")) {
+      const idx = pendingLine.indexOf("\n");
+      const line = pendingLine.slice(0, idx); // Exclude the newline
+      pendingLine = pendingLine.slice(idx + 1);
+      const result = processLine(`${line}\n`); // Add newline back for output
+      output.push(...result.output);
+      warnings.push(...result.warnings);
+    }
+    // If not buffering a table and pendingLine has no pipe, output immediately
+    // This ensures non-table text is streamed without delay
+    if (tableLines.length === 0 && !pendingLine.includes("|")) {
+      output.push(pendingLine);
+      pendingLine = "";
+    }
+    return { output, warnings };
+  }
+  /**
+   * Force flush any pending content (call on turn end).
+   * @returns {DetectorResult}
+   */
+  function forceFlush() {
+    /** @type {string[]} */
+    const output = [];
+    /** @type {string[]} */
+    const warnings = [];
+    // Process any remaining pending line
+    if (pendingLine.length > 0) {
+      // If we have a table buffer, add pending line to it or output directly
+      if (tableLines.length > 0) {
+        tableLines.push(`${pendingLine}\n`);
+      } else {
+        output.push(pendingLine);
+      }
+      pendingLine = "";
+    }
+    const flushResult = flushTable();
+    output.push(...flushResult.output);
+    warnings.push(...flushResult.warnings);
+    return { output, warnings };
+  }
+  /**
+   * Process a complete line.
+   * @param {string} line - Line including trailing newline
+   * @returns {DetectorResult}
+   */
+  function processLine(line) {
+    /** @type {string[]} */
+    const output = [];
+    /** @type {string[]} */
+    const warnings = [];
+    // Code block detection
+    if (line.trimStart().startsWith("```")) {
+      inCodeBlock = !inCodeBlock;
+      const flushResult = flushTable(); // Code block terminates any ongoing table
+      output.push(...flushResult.output);
+      warnings.push(...flushResult.warnings);
+      output.push(line);
+      return { output, warnings };
+    }
+    if (inCodeBlock) {
+      output.push(line);
+      return { output, warnings };
+    }
+    // Table start: line begins with pipe
+    if (isTableStart(line)) {
+      tableLines.push(line);
+      // Buffer limit check
+      if (tableLines.length > MAX_TABLE_LINES) {
+        const flushResult = flushTableAsIs();
+        output.push(...flushResult.output);
+        warnings.push(...flushResult.warnings);
+      }
+      return { output, warnings };
+    }
+    // Table continuation: line contains pipe (for rows without leading pipe)
+    if (tableLines.length > 0 && isTableContinuation(line)) {
+      tableLines.push(line);
+      if (tableLines.length > MAX_TABLE_LINES) {
+        const flushResult = flushTableAsIs();
+        output.push(...flushResult.output);
+        warnings.push(...flushResult.warnings);
+      }
+      return { output, warnings };
+    }
+    // Table ended: format and flush buffer, then output current line
+    const flushResult = flushTable();
+    output.push(...flushResult.output);
+    warnings.push(...flushResult.warnings);
+    output.push(line);
+    return { output, warnings };
+  }
+  /**
+   * Flush table buffer with formatting.
+   * @returns {DetectorResult}
+   */
+  function flushTable() {
+    if (tableLines.length === 0) return { output: [], warnings: [] };
+    /** @type {string[]} */
+    const output = [];
+    /** @type {string[]} */
+    const warnings = [];
+    // Separate trailing empty lines (preserve spacing after table)
+    /** @type {string[]} */
+    const trailingEmpty = [];
+    while (tableLines.length > 0 && tableLines.at(-1)?.trim() === "") {
+      const line = tableLines.pop();
+      if (line !== undefined) trailingEmpty.unshift(line);
+    }
+    if (tableLines.length > 0) {
+      // Remove trailing newlines for formatting, then add them back
+      const rawLines = tableLines.map((l) =>
+        l.endsWith("\n") ? l.slice(0, -1) : l,
+      );
+      try {
+        const formatted = formatTable(rawLines, maxWidth);
+        output.push(`${formatted}\n`);
+      } catch (err) {
+        // Fallback: output raw lines if formatting fails
+        const message = err instanceof Error ? err.message : String(err);
+        warnings.push(`Warning: Table formatting failed: ${message}`);
+        for (const line of tableLines) {
+          output.push(line);
+        }
+      }
+    }
+    tableLines.length = 0;
+    // Output trailing empty lines
+    for (const empty of trailingEmpty) {
+      output.push(empty);
+    }
+    return { output, warnings };
+  }
+  /**
+   * Flush table buffer without formatting (for oversized tables).
+   * @returns {DetectorResult}
+   */
+  function flushTableAsIs() {
+    if (tableLines.length === 0) return { output: [], warnings: [] };
+    const output = [...tableLines];
+    tableLines.length = 0;
+    return { output, warnings: [] };
+  }
+  /**
+   * Check if a line starts a table.
+   * @param {string} line
+   * @returns {boolean}
+   */
+  function isTableStart(line) {
+    const trimmed = line.trimStart();
+    return trimmed.startsWith("|");
+  }
+  /**
+   * Check if a line continues a table.
+   * This is a heuristic: any line containing a pipe character is considered
+   * a potential table row. This may produce false positives for non-table
+   * content with pipes (e.g., "Choose A | B | C").
+   * @param {string} line
+   * @returns {boolean}
+   */
+  function isTableContinuation(line) {
+    return line.includes("|");
+  }
+  return { feed, forceFlush };
+}

package/src/config.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@ import {
   WebSearchToolGeminiOptions,
   WebSearchToolGeminiVertexAIOptions,
 } from "./tools/webSearch.mjs";
-import { VoiceInputConfig } from "./voiceInput.mjs";
+import { VoiceInputConfig } from "./voice/input.mjs";
 /**
  * JSON-serializable webFetch configuration.

package/src/config.mjs CHANGED Viewed

@@ -128,7 +128,7 @@ export async function loadConfigFile(filePath, skipTrustCheck = false) {
   if (!isTrusted) {
     if (!process.stdout.isTTY) {
-      console.warn(
+      console.error(
         styleText(
           "yellow",
           `WARNING: Config file found at '${filePath}' but cannot ask for approval without a TTY. Skipping.`,

package/src/context/loadAgentRoles.mjs CHANGED Viewed

@@ -54,7 +54,7 @@ export async function loadAgentRoles(claudeCodePlugins) {
       agentDirs.map(async ({ dir, idPrefix, only }) => {
         const files = await getMarkdownFiles(dir).catch((err) => {
           if (err.code !== "ENOENT") {
-            console.warn(`Failed to list agent roles in ${dir}:`, err);
+            console.error(`Failed to list agent roles in ${dir}:`, err);
           }
           return /** @type {string[]} */ ([]);
         });
@@ -72,7 +72,7 @@ export async function loadAgentRoles(claudeCodePlugins) {
         files.map(async ({ dir, file, idPrefix }) => {
           const fullPath = path.join(dir, file);
           const content = await fs.readFile(fullPath, "utf-8").catch((err) => {
-            console.warn(`Failed to read agent role file ${fullPath}:`, err);
+            console.error(`Failed to read agent role file ${fullPath}:`, err);
             return null;
           });

package/src/context/loadPrompts.mjs CHANGED Viewed

@@ -69,7 +69,7 @@ export async function loadPrompts(claudeCodePlugins) {
       promptDirs.map(async ({ dir, idPrefix, only }) => {
         const files = await getMarkdownFiles(dir).catch((err) => {
           if (err.code !== "ENOENT") {
-            console.warn(`Failed to list prompts in ${dir}:`, err);
+            console.error(`Failed to list prompts in ${dir}:`, err);
           }
           return /** @type {string[]} */ ([]);
         });
@@ -95,7 +95,7 @@ export async function loadPrompts(claudeCodePlugins) {
         files.map(async ({ dir, file, idPrefix }) => {
           const fullPath = path.join(dir, file);
           const content = await fs.readFile(fullPath, "utf-8").catch((err) => {
-            console.warn(`Failed to read prompt file ${fullPath}:`, err);
+            console.error(`Failed to read prompt file ${fullPath}:`, err);
             return null;
           });

package/src/context/loadUserMessageContext.mjs CHANGED Viewed

@@ -143,5 +143,5 @@ function inferMimeType(filePath) {
  * @returns {void}
  */
 function warn(message) {
-  console.warn(styleText("yellow", message));
+  console.error(styleText("yellow", message));
 }

package/src/main.mjs CHANGED Viewed

@@ -10,15 +10,15 @@ import {
   installClaudeCodePlugins,
   resolvePluginPaths,
 } from "./claudeCodePlugin.mjs";
-import { parseCliArgs, printHelp } from "./cliArgs.mjs";
-import { startBatchSession } from "./cliBatch.mjs";
-import { runCostCommand } from "./cliCost.mjs";
-import { startInteractiveSession } from "./cliInteractive.mjs";
+import { parseCliArgs, printHelp } from "./cli/args.mjs";
+import { startBatchSession } from "./cli/batch.mjs";
+import { runCostCommand } from "./cli/cost.mjs";
+import { startInteractiveSession } from "./cli/interactive.mjs";
 import { loadAppConfig } from "./config.mjs";
 import { loadAgentRoles } from "./context/loadAgentRoles.mjs";
 import { loadPrompts } from "./context/loadPrompts.mjs";
 import { AGENT_PROJECT_METADATA_DIR, USER_NAME } from "./env.mjs";
-import { setupMCPServer } from "./mcpIntegration.mjs";
+import { setupMCPServer } from "./mcp/integration.mjs";
 import { createModelCaller } from "./modelCaller.mjs";
 import { createPrompt } from "./prompt.mjs";
 import { listSessions, loadSession } from "./sessionStore.mjs";

package/src/{mcpIntegration.mjs → mcp/integration.mjs} RENAMED Viewed

@@ -1,16 +1,16 @@
 /**
- * @import { StructuredToolResultContent, Tool, ToolImplementation } from "./tool";
- * @import { MCPServerConfig } from "./config";
+ * @import { StructuredToolResultContent, Tool, ToolImplementation } from "../tool";
+ * @import { MCPServerConfig } from "../config";
  */
 import { mkdir } from "node:fs/promises";
 import path from "node:path";
-import { AGENT_PROJECT_METADATA_DIR } from "./env.mjs";
-import { createMCPClient } from "./mcpClient.mjs";
-import { writeTmpFile } from "./tmpfile.mjs";
-import { noThrow } from "./utils/noThrow.mjs";
+import { AGENT_PROJECT_METADATA_DIR } from "../env.mjs";
+import { writeTmpFile } from "../tmpfile.mjs";
+import { noThrow } from "../utils/noThrow.mjs";
+import { createMCPClient } from "./client.mjs";
-/** @typedef {import("./mcpClient.mjs").MCPClient} MCPClient */
+/** @typedef {import("./client.mjs").MCPClient} MCPClient */
 const OUTPUT_MAX_LENGTH = 1024 * 8;

package/src/tools/patchFile.mjs CHANGED Viewed

@@ -27,18 +27,16 @@ export function createPatchFileTool(
           },
           patch: {
             description: `
-Format:
-@@@ ${nonce} {start}:{startHash}-{end}:{endHash}
+Format — a single patch string may contain multiple blocks:
+>>> ${nonce} {start}:{startHash}-{end}:{endHash}
 new content
-@@@ ${nonce}
-@@@ ${nonce} {N}:{afterHash}+
+<<< ${nonce}
+>>> ${nonce} {N}:{afterHash}+
 inserted content
-@@@ ${nonce}
-@@@ ${nonce} 0+
+<<< ${nonce}
+>>> ${nonce} 0+
 prepended content
-@@@ ${nonce}
+<<< ${nonce}
 - The nonce "${nonce}" is constant; always use the exact value shown above.
 - Line numbers are 1-indexed and refer to the original file; "{start}-{end}" is inclusive.
@@ -63,7 +61,7 @@ prepended content
         const blocks = parseBlocks(patch, nonce);
         if (blocks.length === 0) {
           throw new Error(
-            `No patch blocks found. Each block must start with "@@@ ${nonce} ..." and end with "@@@ ${nonce}".`,
+            `No patch blocks found. Each block must start with ">>> ${nonce} ..." and end with "<<< ${nonce}".`,
           );
         }
@@ -93,8 +91,8 @@ prepended content
  * @returns {PatchBlock[]}
  */
 export function parseBlocks(patch, nonce) {
-  const openPrefix = `@@@ ${nonce} `;
-  const closeMarker = `@@@ ${nonce}`;
+  const openPrefix = `>>> ${nonce} `;
+  const closeMarker = `<<< ${nonce}`;
   const lines = patch.split("\n");
   /** @type {PatchBlock[]} */
@@ -124,6 +122,14 @@ export function parseBlocks(patch, nonce) {
       );
     }
     const body = lines.slice(i + 1, closeIdx);
+    const nestedOpen = body.findIndex((l) => l.startsWith(openPrefix));
+    if (nestedOpen !== -1) {
+      throw new Error(
+        `Unclosed block "${openPrefix}${headerArgs}": found another open marker "${body[nestedOpen]}" ` +
+          `at line ${i + 1 + nestedOpen + 1} of patch before the close marker. ` +
+          `Did you forget "${closeMarker}" to close the previous block?`,
+      );
+    }
     if (header.op === "insert" && body.length === 0) {
       throw new Error(
         `Insert block "${openPrefix}${headerArgs}" has empty body. Use a replace block to delete content.`,

package/src/{voiceInputGemini.mjs → voice/gemini.mjs} RENAMED Viewed

@@ -1,10 +1,7 @@
-import {
-  isObjectLike,
-  startWebSocketVoiceSession,
-} from "./voiceInputSession.mjs";
+import { isObjectLike, startWebSocketVoiceSession } from "./session.mjs";
 /**
- * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./voiceInputSession.mjs"
+ * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./session.mjs"
  */
 /**

package/src/voice/input.mjs ADDED Viewed

@@ -0,0 +1,29 @@
+import { startGeminiVoiceSession } from "./gemini.mjs";
+import { startOpenAIVoiceSession } from "./openai.mjs";
+import { failVoiceSessionAsync } from "./session.mjs";
+/**
+ * @typedef {import("./openai.mjs").VoiceInputOpenAIConfig | import("./gemini.mjs").VoiceInputGeminiConfig} VoiceInputConfig
+ */
+/**
+ * Start a voice input session. Dispatches to the provider-specific
+ * implementation based on `config.provider`.
+ *
+ * @param {object} options
+ * @param {VoiceInputConfig} options.config
+ * @param {import("./session.mjs").VoiceSessionCallbacks} options.callbacks
+ * @returns {import("./session.mjs").VoiceSession}
+ */
+export function startVoiceSession({ config, callbacks }) {
+  if (config.provider === "openai") {
+    return startOpenAIVoiceSession({ config, callbacks });
+  }
+  if (config.provider === "gemini") {
+    return startGeminiVoiceSession({ config, callbacks });
+  }
+  const provider = /** @type {{ provider: string }} */ (config).provider;
+  return failVoiceSessionAsync(
+    callbacks,
+    new Error(`Unsupported voiceInput.provider: ${provider}`),
+  );
+}

package/src/{voiceInputOpenAI.mjs → voice/openai.mjs} RENAMED Viewed

@@ -1,24 +1,21 @@
-import {
-  isObjectLike,
-  startWebSocketVoiceSession,
-} from "./voiceInputSession.mjs";
+import { isObjectLike, startWebSocketVoiceSession } from "./session.mjs";
 /**
- * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./voiceInputSession.mjs"
+ * @import { VoiceProviderHooks, VoiceRecorderConfig, VoiceSession, VoiceSessionCallbacks } from "./session.mjs"
  */
 /**
  * @typedef {Object} VoiceInputOpenAIConfig
  * @property {"openai"} provider
  * @property {string} apiKey
- * @property {string} [model] - Defaults to "gpt-4o-transcribe".
+ * @property {string} [model] - Transcription model. Defaults to "gpt-realtime-whisper".
  * @property {string} [language] - ISO-639-1 code (e.g. "ja", "en"). Improves accuracy and latency when set.
  * @property {string} [baseURL]
  * @property {VoiceRecorderConfig} [recorder]
  * @property {string} [toggleKey] - "ctrl-<char>". Defaults to "ctrl-o".
  */
-const OPENAI_DEFAULT_MODEL = "gpt-4o-transcribe";
+const OPENAI_DEFAULT_TRANSCRIPTION_MODEL = "gpt-realtime-whisper";
 const OPENAI_DEFAULT_WS = "wss://api.openai.com/v1/realtime";
 const OPENAI_SAMPLE_RATE = 24000;
 const OPENAI_LABEL = "OpenAI Realtime";
@@ -46,31 +43,32 @@ export function startOpenAIVoiceSession({ config, callbacks }) {
       return {
         headers: {
           Authorization: `Bearer ${config.apiKey}`,
-          "OpenAI-Beta": "realtime=v1",
         },
       };
     },
     buildSetupMessage(config) {
-      const model = config.model ?? OPENAI_DEFAULT_MODEL;
+      const model = config.model ?? OPENAI_DEFAULT_TRANSCRIPTION_MODEL;
       /** @type {{ model: string, language?: string }} */
       const transcription = { model };
       if (config.language) transcription.language = config.language;
-      // The `?intent=transcription` endpoint uses the flat transcription-session
-      // schema, not the nested `session.audio.input.*` realtime schema.
       return {
-        type: "transcription_session.update",
+        type: "session.update",
         session: {
-          input_audio_format: "pcm16",
-          input_audio_transcription: transcription,
-          turn_detection: { type: "server_vad" },
+          type: "transcription",
+          audio: {
+            input: {
+              format: { type: "audio/pcm", rate: OPENAI_SAMPLE_RATE },
+              transcription,
+            },
+          },
         },
       };
     },
     isReadyMessage(message) {
       return (
         isObjectLike(message) &&
-        (message.type === "transcription_session.created" ||
-          message.type === "transcription_session.updated")
+        (message.type === "session.created" ||
+          message.type === "session.updated")
       );
     },
     extractError(message) {