npm - bosun - Versions diffs - 0.37.0 → 0.37.1 - Mend

bosun 0.37.0 → 0.37.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/agent-tool-config.mjs +327 -0
package/bosun.schema.json +1 -1
package/library-manager.mjs +18 -0
package/package.json +3 -1
package/setup-web-server.mjs +2 -2
package/setup.mjs +3 -3
package/ui/demo.html +66 -0
package/ui/modules/settings-schema.js +1 -1
package/ui/modules/voice-client-sdk.js +9 -5
package/ui/modules/voice-client.js +3 -3
package/ui/tabs/library.js +676 -11
package/ui/tabs/workflows.js +69 -0
package/ui-server.mjs +254 -18
package/voice-action-dispatcher.mjs +81 -0
package/voice-agents-sdk.mjs +2 -2
package/voice-relay.mjs +14 -5
package/voice-tools.mjs +475 -9

package/agent-tool-config.mjs ADDED Viewed

@@ -0,0 +1,327 @@
+/**
+ * agent-tool-config.mjs — Per-Agent Tool Configuration Store
+ *
+ * Manages which tools and MCP servers are enabled for each agent profile.
+ * Persisted as `.bosun/agent-tools.json` alongside the library manifest.
+ *
+ * Schema:
+ *   {
+ *     "agents": {
+ *       "<agentId>": {
+ *         "enabledTools": ["tool1", "tool2"] | null,   // null = all tools
+ *         "enabledMcpServers": ["github", "context7"],  // enabled MCP server IDs
+ *         "disabledBuiltinTools": ["tool3"],             // explicitly disabled builtins
+ *         "updatedAt": "2026-01-01T00:00:00.000Z"
+ *       }
+ *     },
+ *     "defaults": {
+ *       "builtinTools": [...],          // default tool list for all agents
+ *       "updatedAt": "..."
+ *     }
+ *   }
+ *
+ * EXPORTS:
+ *   DEFAULT_BUILTIN_TOOLS         — list of default built-in tools for voice/agents
+ *   loadToolConfig(rootDir)       — load the full config
+ *   saveToolConfig(rootDir, cfg)  — save the full config
+ *   getAgentToolConfig(rootDir, agentId) — get config for one agent
+ *   setAgentToolConfig(rootDir, agentId, config) — update config for one agent
+ *   getEffectiveTools(rootDir, agentId)  — compute final enabled tools list
+ *   listAvailableTools(rootDir)   — list all available tools (builtin + MCP)
+ */
+import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
+import { resolve } from "node:path";
+import { homedir } from "node:os";
+// ── Constants ─────────────────────────────────────────────────────────────────
+const TAG = "[agent-tool-config]";
+const CONFIG_FILE = "agent-tools.json";
+function getBosunHome() {
+  return (
+    process.env.BOSUN_HOME ||
+    process.env.BOSUN_DIR ||
+    resolve(homedir(), ".bosun")
+  );
+}
+/**
+ * Default built-in tools available to all voice agents and executors.
+ * Maps to common capabilities that voice/agent sessions can invoke.
+ */
+export const DEFAULT_BUILTIN_TOOLS = Object.freeze([
+  {
+    id: "search-files",
+    name: "Search Files",
+    description: "Search for files in the workspace by name or pattern",
+    category: "Built-In",
+    icon: ":search:",
+    default: true,
+  },
+  {
+    id: "read-file",
+    name: "Read File",
+    description: "Read contents of a file in the workspace",
+    category: "Built-In",
+    icon: ":file:",
+    default: true,
+  },
+  {
+    id: "edit-file",
+    name: "Edit File",
+    description: "Create or edit files in the workspace",
+    category: "Built-In",
+    icon: ":edit:",
+    default: true,
+  },
+  {
+    id: "run-command",
+    name: "Run Terminal Command",
+    description: "Execute shell commands in a terminal",
+    category: "Built-In",
+    icon: ":terminal:",
+    default: true,
+  },
+  {
+    id: "web-search",
+    name: "Web Search",
+    description: "Search the web for information",
+    category: "Built-In",
+    icon: ":globe:",
+    default: true,
+  },
+  {
+    id: "code-search",
+    name: "Semantic Code Search",
+    description: "Search codebase semantically for relevant code",
+    category: "Built-In",
+    icon: ":cpu:",
+    default: true,
+  },
+  {
+    id: "git-operations",
+    name: "Git Operations",
+    description: "Run git commands (commit, push, branch, etc.)",
+    category: "Built-In",
+    icon: ":git:",
+    default: true,
+  },
+  {
+    id: "create-task",
+    name: "Create Task",
+    description: "Create new tasks and issues",
+    category: "Built-In",
+    icon: ":check:",
+    default: true,
+  },
+  {
+    id: "delegate-task",
+    name: "Delegate to Agent",
+    description: "Delegate work to another agent executor",
+    category: "Built-In",
+    icon: ":bot:",
+    default: true,
+  },
+  {
+    id: "fetch-url",
+    name: "Fetch URL",
+    description: "Fetch content from a URL and convert for LLM usage",
+    category: "Built-In",
+    icon: ":link:",
+    default: true,
+  },
+  {
+    id: "list-directory",
+    name: "List Directory",
+    description: "List contents of a directory in the workspace",
+    category: "Built-In",
+    icon: ":folder:",
+    default: true,
+  },
+  {
+    id: "grep-search",
+    name: "Text Search (Grep)",
+    description: "Search for exact text or regex patterns in files",
+    category: "Built-In",
+    icon: ":search:",
+    default: true,
+  },
+  {
+    id: "task-management",
+    name: "Task Management",
+    description: "Track and manage todo items and task status",
+    category: "Built-In",
+    icon: ":clipboard:",
+    default: true,
+  },
+  {
+    id: "notifications",
+    name: "Send Notifications",
+    description: "Send notifications via Telegram, webhook, etc.",
+    category: "Built-In",
+    icon: ":bell:",
+    default: false,
+  },
+  {
+    id: "vision-analysis",
+    name: "Vision Analysis",
+    description: "Analyze images and screenshots",
+    category: "Built-In",
+    icon: ":eye:",
+    default: true,
+  },
+]);
+// ── Config File I/O ───────────────────────────────────────────────────────────
+function getConfigPath(rootDir) {
+  return resolve(rootDir || getBosunHome(), ".bosun", CONFIG_FILE);
+}
+/**
+ * Load the agent tool configuration.
+ * @param {string} [rootDir]
+ * @returns {{ agents: Object, defaults: Object }}
+ */
+export function loadToolConfig(rootDir) {
+  const configPath = getConfigPath(rootDir);
+  if (!existsSync(configPath)) {
+    return {
+      agents: {},
+      defaults: {
+        builtinTools: DEFAULT_BUILTIN_TOOLS.filter((t) => t.default).map((t) => t.id),
+        updatedAt: new Date().toISOString(),
+      },
+    };
+  }
+  try {
+    const raw = readFileSync(configPath, "utf8");
+    const parsed = JSON.parse(raw);
+    return {
+      agents: parsed.agents || {},
+      defaults: parsed.defaults || {
+        builtinTools: DEFAULT_BUILTIN_TOOLS.filter((t) => t.default).map((t) => t.id),
+        updatedAt: new Date().toISOString(),
+      },
+    };
+  } catch {
+    return {
+      agents: {},
+      defaults: {
+        builtinTools: DEFAULT_BUILTIN_TOOLS.filter((t) => t.default).map((t) => t.id),
+        updatedAt: new Date().toISOString(),
+      },
+    };
+  }
+}
+/**
+ * Save the full tool configuration.
+ * @param {string} rootDir
+ * @param {{ agents: Object, defaults: Object }} config
+ */
+export function saveToolConfig(rootDir, config) {
+  const configPath = getConfigPath(rootDir);
+  const dir = resolve(configPath, "..");
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n", "utf8");
+}
+/**
+ * Get tool configuration for a specific agent.
+ * @param {string} rootDir
+ * @param {string} agentId
+ * @returns {{ enabledTools: string[]|null, enabledMcpServers: string[], disabledBuiltinTools: string[] }}
+ */
+export function getAgentToolConfig(rootDir, agentId) {
+  const config = loadToolConfig(rootDir);
+  const agentConfig = config.agents[agentId];
+  if (!agentConfig) {
+    return {
+      enabledTools: null,
+      enabledMcpServers: [],
+      disabledBuiltinTools: [],
+    };
+  }
+  return {
+    enabledTools: agentConfig.enabledTools ?? null,
+    enabledMcpServers: agentConfig.enabledMcpServers || [],
+    disabledBuiltinTools: agentConfig.disabledBuiltinTools || [],
+  };
+}
+/**
+ * Update tool configuration for a specific agent.
+ * @param {string} rootDir
+ * @param {string} agentId
+ * @param {{ enabledTools?: string[]|null, enabledMcpServers?: string[], disabledBuiltinTools?: string[] }} update
+ * @returns {{ ok: boolean }}
+ */
+export function setAgentToolConfig(rootDir, agentId, update) {
+  const config = loadToolConfig(rootDir);
+  const existing = config.agents[agentId] || {};
+  config.agents[agentId] = {
+    ...existing,
+    enabledTools: update.enabledTools !== undefined ? update.enabledTools : (existing.enabledTools ?? null),
+    enabledMcpServers: update.enabledMcpServers !== undefined ? update.enabledMcpServers : (existing.enabledMcpServers || []),
+    disabledBuiltinTools: update.disabledBuiltinTools !== undefined ? update.disabledBuiltinTools : (existing.disabledBuiltinTools || []),
+    updatedAt: new Date().toISOString(),
+  };
+  saveToolConfig(rootDir, config);
+  return { ok: true };
+}
+/**
+ * Compute the effective enabled tools for an agent.
+ * Merges builtin defaults with agent-specific overrides and MCP servers.
+ *
+ * @param {string} rootDir
+ * @param {string} agentId
+ * @returns {{ builtinTools: Array<{ id: string, name: string, enabled: boolean }>, mcpServers: string[] }}
+ */
+export function getEffectiveTools(rootDir, agentId) {
+  const config = loadToolConfig(rootDir);
+  const agentConfig = config.agents[agentId] || {};
+  const disabledSet = new Set(agentConfig.disabledBuiltinTools || []);
+  const defaultIds = new Set(config.defaults?.builtinTools || DEFAULT_BUILTIN_TOOLS.filter((t) => t.default).map((t) => t.id));
+  const builtinTools = DEFAULT_BUILTIN_TOOLS.map((tool) => ({
+    ...tool,
+    enabled: !disabledSet.has(tool.id) && (agentConfig.enabledTools === null || agentConfig.enabledTools === undefined
+      ? defaultIds.has(tool.id)
+      : agentConfig.enabledTools.includes(tool.id)),
+  }));
+  return {
+    builtinTools,
+    mcpServers: agentConfig.enabledMcpServers || [],
+  };
+}
+/**
+ * List all available tools (builtin + installed MCP servers).
+ * @param {string} rootDir
+ * @returns {{ builtinTools: Array<Object>, mcpServers: Array<Object> }}
+ */
+export async function listAvailableTools(rootDir) {
+  let mcpServers = [];
+  try {
+    const { listInstalledMcpServers } = await import("./mcp-registry.mjs");
+    mcpServers = await listInstalledMcpServers(rootDir);
+  } catch {
+    // MCP registry not available
+  }
+  return {
+    builtinTools: [...DEFAULT_BUILTIN_TOOLS],
+    mcpServers: mcpServers.map((s) => ({
+      id: s.id,
+      name: s.name,
+      description: s.description || "",
+      tags: s.tags || [],
+      transport: s.meta?.transport || "stdio",
+    })),
+  };
+}

package/bosun.schema.json CHANGED Viewed

@@ -281,7 +281,7 @@
         "turnDetection": {
           "type": "string",
           "enum": ["server_vad", "semantic_vad", "none"],
-          "default": "server_vad",
+          "default": "semantic_vad",
           "description": "Turn detection mode for voice activity detection"
         },
         "instructions": {

package/library-manager.mjs CHANGED Viewed

@@ -102,6 +102,8 @@ function nowISO() {
  * @property {string[]} [skills]         - skill library refs to inject
  * @property {Object} [hookProfile]      - hook profile overrides
  * @property {Object} [env]              - extra env vars for the agent
+ * @property {string[]} [enabledTools]   - list of tool IDs enabled for this agent (null = all)
+ * @property {string[]} [enabledMcpServers] - list of MCP server IDs enabled for this agent
  */
 /**
@@ -688,6 +690,22 @@ export const BUILTIN_AGENT_PROFILES = [
     env: {},
     tags: ["test", "testing", "e2e", "unit", "coverage"],
   },
+  {
+    id: "voice-agent",
+    name: "Voice Agent",
+    description: "Default voice assistant agent. Handles real-time voice sessions, tool calls, and delegate orchestration. Customize tools and MCP servers for voice interactions.",
+    titlePatterns: ["\\bvoice\\b", "\\bcall\\b", "\\bmeeting\\b", "\\bassistant\\b"],
+    scopes: ["voice", "assistant"],
+    sdk: null,
+    model: null,
+    promptOverride: null,
+    skills: [],
+    hookProfile: null,
+    env: {},
+    tags: ["voice", "assistant", "realtime", "default"],
+    enabledTools: null,
+    enabledMcpServers: [],
+  },
 ];
 /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bosun",
-  "version": "0.37.0",
+  "version": "0.37.1",
   "description": "AI-powered orchestrator supervisor — manages AI agent executors with failover, auto-restarts on failure, analyzes crashes with Codex SDK, creates PRs via Vibe-Kanban API, and sends Telegram notifications. Supports N executors with weighted distribution, multi-repo projects, and auto-setup.",
   "type": "module",
   "license": "Apache 2.0",
@@ -62,6 +62,7 @@
     "./agent-hooks": "./agent-hooks.mjs",
     "./hook-profiles": "./hook-profiles.mjs",
     "./agent-hook-bridge": "./agent-hook-bridge.mjs",
+    "./agent-tool-config": "./agent-tool-config.mjs",
     "./startup-service": "./startup-service.mjs",
     "./telegram-sentinel": "./telegram-sentinel.mjs",
     "./whatsapp-channel": "./whatsapp-channel.mjs",
@@ -240,6 +241,7 @@
     "agent-hooks.mjs",
     "hook-profiles.mjs",
     "agent-hook-bridge.mjs",
+    "agent-tool-config.mjs",
     "agent-supervisor.mjs",
     "agent-work-analyzer.mjs",
     "startup-service.mjs",

package/setup-web-server.mjs CHANGED Viewed

@@ -439,7 +439,7 @@ function buildStableSetupDefaults({
     voiceModel: "gpt-audio-1.5",
     voiceVisionModel: "gpt-4.1-nano",
     voiceId: "alloy",
-    voiceTurnDetection: "server_vad",
+    voiceTurnDetection: "semantic_vad",
     voiceFallbackMode: "browser",
     voiceDelegateExecutor: "codex-sdk",
     openaiRealtimeApiKey: "",
@@ -892,7 +892,7 @@ function applyNonBlockingSetupEnvDefaults(envMap, env = {}, sourceEnv = process.
       sourceEnv.VOICE_TURN_DETECTION,
     ),
     ["server_vad", "semantic_vad", "none"],
-    "server_vad",
+    "semantic_vad",
   );
   envMap.VOICE_FALLBACK_MODE = normalizeEnumValue(
     pickNonEmptyValue(

package/setup.mjs CHANGED Viewed

@@ -1955,7 +1955,7 @@ function normalizeSetupConfiguration({
   env.VOICE_TURN_DETECTION = normalizeEnum(
     env.VOICE_TURN_DETECTION,
     ["server_vad", "semantic_vad", "none"],
-    "server_vad",
+    "semantic_vad",
   );
   env.VOICE_FALLBACK_MODE = normalizeEnum(
     env.VOICE_FALLBACK_MODE,
@@ -3305,7 +3305,7 @@ async function main() {
       );
       env.VOICE_TURN_DETECTION = await prompt.ask(
         "Turn detection (server_vad|semantic_vad|none)",
-        process.env.VOICE_TURN_DETECTION || "server_vad",
+        process.env.VOICE_TURN_DETECTION || "semantic_vad",
       );
       env.VOICE_FALLBACK_MODE = await prompt.ask(
         "Fallback mode (browser|disabled)",
@@ -5659,7 +5659,7 @@ async function runNonInteractive({
   env.AZURE_OPENAI_REALTIME_DEPLOYMENT =
     process.env.AZURE_OPENAI_REALTIME_DEPLOYMENT || "gpt-realtime-1.5";
   env.VOICE_ID = process.env.VOICE_ID || "alloy";
-  env.VOICE_TURN_DETECTION = process.env.VOICE_TURN_DETECTION || "server_vad";
+  env.VOICE_TURN_DETECTION = process.env.VOICE_TURN_DETECTION || "semantic_vad";
   env.VOICE_FALLBACK_MODE = process.env.VOICE_FALLBACK_MODE || "browser";
   env.VOICE_DELEGATE_EXECUTOR =
     process.env.VOICE_DELEGATE_EXECUTOR ||

package/ui/demo.html CHANGED Viewed

@@ -2945,6 +2945,72 @@
         return { ok: true, data: best };
       }
+      // ── MCP Servers ──
+      if (route === '/api/mcp/catalog') {
+        return { ok: true, data: [
+          { id: 'github', name: 'GitHub', description: 'GitHub MCP server', transport: 'stdio', tags: ['code', 'git'], installed: false },
+          { id: 'playwright', name: 'Playwright', description: 'Browser automation', transport: 'stdio', tags: ['testing'], installed: false },
+          { id: 'context7', name: 'Context7', description: 'Documentation lookup', transport: 'stdio', tags: ['docs'], installed: true },
+        ]};
+      }
+      if (route === '/api/mcp/installed') {
+        return { ok: true, data: [
+          { id: 'context7', name: 'Context7', description: 'Documentation lookup', transport: 'stdio', tags: ['docs'] },
+        ]};
+      }
+      if (route === '/api/mcp/install') {
+        return { ok: true, installed: { id: body?.catalogId || 'custom', name: body?.name || 'Custom MCP' } };
+      }
+      if (route === '/api/mcp/uninstall') {
+        return { ok: true };
+      }
+      if (route === '/api/mcp/configure') {
+        return { ok: true };
+      }
+      // ── Agent Tool Config ──
+      if (route === '/api/agent-tools/available') {
+        return { ok: true, data: {
+          builtinTools: [
+            { id: 'search-files', name: 'Search Files', description: 'Search workspace files', category: 'Built-In', default: true },
+            { id: 'read-file', name: 'Read File', description: 'Read file contents', category: 'Built-In', default: true },
+            { id: 'edit-file', name: 'Edit File', description: 'Edit workspace files', category: 'Built-In', default: true },
+            { id: 'run-command', name: 'Run Command', description: 'Execute shell commands', category: 'Built-In', default: true },
+            { id: 'web-search', name: 'Web Search', description: 'Search the web', category: 'Built-In', default: true },
+          ],
+          mcpServers: [
+            { id: 'context7', name: 'Context7', description: 'Documentation lookup', tags: ['docs'], transport: 'stdio' },
+          ],
+        }};
+      }
+      if (route === '/api/agent-tools/config') {
+        if (method === 'POST') {
+          return { ok: true };
+        }
+        const agentId = params.get('agentId');
+        return { ok: true, data: {
+          builtinTools: [
+            { id: 'search-files', name: 'Search Files', enabled: true },
+            { id: 'read-file', name: 'Read File', enabled: true },
+            { id: 'edit-file', name: 'Edit File', enabled: true },
+            { id: 'run-command', name: 'Run Command', enabled: true },
+            { id: 'web-search', name: 'Web Search', enabled: true },
+          ],
+          mcpServers: [],
+        }};
+      }
+      if (route === '/api/agent-tools/defaults') {
+        return { ok: true, data: {
+          builtinTools: [
+            { id: 'search-files', name: 'Search Files', description: 'Search workspace files', category: 'Built-In', default: true },
+            { id: 'read-file', name: 'Read File', description: 'Read file contents', category: 'Built-In', default: true },
+            { id: 'edit-file', name: 'Edit File', description: 'Edit workspace files', category: 'Built-In', default: true },
+            { id: 'run-command', name: 'Run Command', description: 'Execute shell commands', category: 'Built-In', default: true },
+            { id: 'web-search', name: 'Web Search', description: 'Search the web', category: 'Built-In', default: true },
+          ],
+        }};
+      }
       // ── Agents ──
       if (route === '/api/agents')
         return { data: STATE.agents };

package/ui/modules/settings-schema.js CHANGED Viewed

@@ -132,7 +132,7 @@ export const SETTINGS_SCHEMA = [
   { key: "AZURE_OPENAI_REALTIME_API_KEY",  label: "Azure Realtime Key (legacy)",    category: "voice", type: "secret", sensitive: true, description: "Legacy fallback: Azure OpenAI API key. Use the Voice Endpoints card above for full multi-endpoint config. Falls back to AZURE_OPENAI_API_KEY if not set." },
   { key: "AZURE_OPENAI_REALTIME_DEPLOYMENT", label: "Azure Deployment (legacy)",   category: "voice", type: "select", defaultVal: "gpt-audio-1.5", options: ["gpt-audio-1.5", "gpt-realtime-1.5", "gpt-4o-realtime-preview", "custom"], description: "Legacy fallback: Azure deployment name. Use the Voice Endpoints card above. GA models (gpt-realtime-1.5) auto-use /openai/v1/ paths." },
   { key: "VOICE_ID",                       label: "Voice",                      category: "voice", type: "select", defaultVal: "alloy", options: ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"], description: "Voice personality for text-to-speech output." },
-  { key: "VOICE_TURN_DETECTION",           label: "Turn Detection",             category: "voice", type: "select", defaultVal: "server_vad", options: ["server_vad", "semantic_vad", "none"], description: "How the model detects when you stop speaking. 'semantic_vad' is more intelligent but higher latency." },
+  { key: "VOICE_TURN_DETECTION",           label: "Turn Detection",             category: "voice", type: "select", defaultVal: "semantic_vad", options: ["server_vad", "semantic_vad", "none"], description: "How the model detects when you stop speaking. 'semantic_vad' is more intelligent but higher latency." },
   { key: "VOICE_DELEGATE_EXECUTOR",        label: "Delegate Executor",          category: "voice", type: "select", defaultVal: "codex-sdk", options: ["codex-sdk", "copilot-sdk", "claude-sdk", "gemini-sdk", "opencode-sdk"], description: "Which agent executor voice tool calls delegate to for complex tasks." },
   { key: "VOICE_FALLBACK_MODE",            label: "Fallback Mode",              category: "voice", type: "select", defaultVal: "browser", options: ["browser", "disabled"], description: "When Tier 1 (Realtime API) is unavailable, use browser speech APIs as fallback." },

package/ui/modules/voice-client-sdk.js CHANGED Viewed

@@ -246,7 +246,7 @@ function _flushPendingTranscriptBuffers() {
   }
   const finalUser = String(_pendingUserTranscriptText || "").trim();
-  if (finalUser) {
+  if (finalUser && ENABLE_USER_TRANSCRIPT) {
     _persistTranscriptIfNew("user", finalUser, "sdk.history_updated.user.flush");
   }
@@ -314,10 +314,13 @@ function _scheduleUserTranscriptFinalize(text) {
     if (ENABLE_USER_TRANSCRIPT) {
       sdkVoiceTranscript.value = finalText;
       emit("transcript", { text: finalText, final: true });
+      _persistTranscriptIfNew("user", finalText, "sdk.history_updated.user.final");
     } else {
       sdkVoiceTranscript.value = "";
+      // Skip persisting user transcript — ASR often hallucinates wrong
+      // languages from short fragments; the model still receives the raw
+      // audio correctly so nothing is lost.
     }
-    _persistTranscriptIfNew("user", finalText, "sdk.history_updated.user.final");
   }, 350);
 }
@@ -455,14 +458,14 @@ async function startAgentsSdkSession(config, options = {}) {
   // Determine model and voice
   const model = String(tokenData.model || resolvedConfig.model || "gpt-realtime-1.5").trim();
   const voiceId = String(tokenData.voiceId || resolvedConfig.voiceId || "alloy").trim();
-  const turnDetection = String(resolvedConfig.turnDetection || "server_vad").trim();
+  const turnDetection = String(resolvedConfig.turnDetection || "semantic_vad").trim();
   const turnDetectionConfig = {
     type: turnDetection,
     ...(turnDetection === "server_vad"
       ? {
-          threshold: 0.35,
+          threshold: 0.7,
           prefix_padding_ms: 400,
-          silence_duration_ms: 700,
+          silence_duration_ms: 1300,
           create_response: true,
           interrupt_response: true,
           createResponse: true,
@@ -494,6 +497,7 @@ async function startAgentsSdkSession(config, options = {}) {
         output: {
           format: "pcm16",
           voice: voiceId,
+          transcription: { model: "gpt-4o-transcribe" },
         },
       },
     },

package/ui/modules/voice-client.js CHANGED Viewed

@@ -434,14 +434,14 @@ function sendSessionUpdate(tokenData = {}) {
     sessionConfig?.turn_detection?.type ||
     sessionConfig?.audio?.input?.turnDetection?.type ||
     sessionConfig?.audio?.input?.turn_detection?.type ||
-    "server_vad";
+    "semantic_vad";
   const turnDetectionConfig = {
     type: turnDetection,
     ...(turnDetection === "server_vad"
       ? {
-          threshold: 0.35,
+          threshold: 0.7,
           prefix_padding_ms: 400,
-          silence_duration_ms: 700,
+          silence_duration_ms: 1200,
           create_response: true,
           interrupt_response: true,
         }