bosun 0.37.0 → 0.37.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,327 @@
1
+ /**
2
+ * agent-tool-config.mjs — Per-Agent Tool Configuration Store
3
+ *
4
+ * Manages which tools and MCP servers are enabled for each agent profile.
5
+ * Persisted as `.bosun/agent-tools.json` alongside the library manifest.
6
+ *
7
+ * Schema:
8
+ * {
9
+ * "agents": {
10
+ * "<agentId>": {
11
+ * "enabledTools": ["tool1", "tool2"] | null, // null = all tools
12
+ * "enabledMcpServers": ["github", "context7"], // enabled MCP server IDs
13
+ * "disabledBuiltinTools": ["tool3"], // explicitly disabled builtins
14
+ * "updatedAt": "2026-01-01T00:00:00.000Z"
15
+ * }
16
+ * },
17
+ * "defaults": {
18
+ * "builtinTools": [...], // default tool list for all agents
19
+ * "updatedAt": "..."
20
+ * }
21
+ * }
22
+ *
23
+ * EXPORTS:
24
+ * DEFAULT_BUILTIN_TOOLS — list of default built-in tools for voice/agents
25
+ * loadToolConfig(rootDir) — load the full config
26
+ * saveToolConfig(rootDir, cfg) — save the full config
27
+ * getAgentToolConfig(rootDir, agentId) — get config for one agent
28
+ * setAgentToolConfig(rootDir, agentId, config) — update config for one agent
29
+ * getEffectiveTools(rootDir, agentId) — compute final enabled tools list
30
+ * listAvailableTools(rootDir) — list all available tools (builtin + MCP)
31
+ */
32
+
33
+ import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
34
+ import { resolve } from "node:path";
35
+ import { homedir } from "node:os";
36
+
37
+ // ── Constants ─────────────────────────────────────────────────────────────────
38
+
39
+ const TAG = "[agent-tool-config]";
40
+ const CONFIG_FILE = "agent-tools.json";
41
+
42
+ function getBosunHome() {
43
+ return (
44
+ process.env.BOSUN_HOME ||
45
+ process.env.BOSUN_DIR ||
46
+ resolve(homedir(), ".bosun")
47
+ );
48
+ }
49
+
50
+ /**
51
+ * Default built-in tools available to all voice agents and executors.
52
+ * Maps to common capabilities that voice/agent sessions can invoke.
53
+ */
54
+ export const DEFAULT_BUILTIN_TOOLS = Object.freeze([
55
+ {
56
+ id: "search-files",
57
+ name: "Search Files",
58
+ description: "Search for files in the workspace by name or pattern",
59
+ category: "Built-In",
60
+ icon: ":search:",
61
+ default: true,
62
+ },
63
+ {
64
+ id: "read-file",
65
+ name: "Read File",
66
+ description: "Read contents of a file in the workspace",
67
+ category: "Built-In",
68
+ icon: ":file:",
69
+ default: true,
70
+ },
71
+ {
72
+ id: "edit-file",
73
+ name: "Edit File",
74
+ description: "Create or edit files in the workspace",
75
+ category: "Built-In",
76
+ icon: ":edit:",
77
+ default: true,
78
+ },
79
+ {
80
+ id: "run-command",
81
+ name: "Run Terminal Command",
82
+ description: "Execute shell commands in a terminal",
83
+ category: "Built-In",
84
+ icon: ":terminal:",
85
+ default: true,
86
+ },
87
+ {
88
+ id: "web-search",
89
+ name: "Web Search",
90
+ description: "Search the web for information",
91
+ category: "Built-In",
92
+ icon: ":globe:",
93
+ default: true,
94
+ },
95
+ {
96
+ id: "code-search",
97
+ name: "Semantic Code Search",
98
+ description: "Search codebase semantically for relevant code",
99
+ category: "Built-In",
100
+ icon: ":cpu:",
101
+ default: true,
102
+ },
103
+ {
104
+ id: "git-operations",
105
+ name: "Git Operations",
106
+ description: "Run git commands (commit, push, branch, etc.)",
107
+ category: "Built-In",
108
+ icon: ":git:",
109
+ default: true,
110
+ },
111
+ {
112
+ id: "create-task",
113
+ name: "Create Task",
114
+ description: "Create new tasks and issues",
115
+ category: "Built-In",
116
+ icon: ":check:",
117
+ default: true,
118
+ },
119
+ {
120
+ id: "delegate-task",
121
+ name: "Delegate to Agent",
122
+ description: "Delegate work to another agent executor",
123
+ category: "Built-In",
124
+ icon: ":bot:",
125
+ default: true,
126
+ },
127
+ {
128
+ id: "fetch-url",
129
+ name: "Fetch URL",
130
+ description: "Fetch content from a URL and convert for LLM usage",
131
+ category: "Built-In",
132
+ icon: ":link:",
133
+ default: true,
134
+ },
135
+ {
136
+ id: "list-directory",
137
+ name: "List Directory",
138
+ description: "List contents of a directory in the workspace",
139
+ category: "Built-In",
140
+ icon: ":folder:",
141
+ default: true,
142
+ },
143
+ {
144
+ id: "grep-search",
145
+ name: "Text Search (Grep)",
146
+ description: "Search for exact text or regex patterns in files",
147
+ category: "Built-In",
148
+ icon: ":search:",
149
+ default: true,
150
+ },
151
+ {
152
+ id: "task-management",
153
+ name: "Task Management",
154
+ description: "Track and manage todo items and task status",
155
+ category: "Built-In",
156
+ icon: ":clipboard:",
157
+ default: true,
158
+ },
159
+ {
160
+ id: "notifications",
161
+ name: "Send Notifications",
162
+ description: "Send notifications via Telegram, webhook, etc.",
163
+ category: "Built-In",
164
+ icon: ":bell:",
165
+ default: false,
166
+ },
167
+ {
168
+ id: "vision-analysis",
169
+ name: "Vision Analysis",
170
+ description: "Analyze images and screenshots",
171
+ category: "Built-In",
172
+ icon: ":eye:",
173
+ default: true,
174
+ },
175
+ ]);
176
+
177
+ // ── Config File I/O ───────────────────────────────────────────────────────────
178
+
179
+ function getConfigPath(rootDir) {
180
+ return resolve(rootDir || getBosunHome(), ".bosun", CONFIG_FILE);
181
+ }
182
+
183
+ /**
184
+ * Load the agent tool configuration.
185
+ * @param {string} [rootDir]
186
+ * @returns {{ agents: Object, defaults: Object }}
187
+ */
188
+ export function loadToolConfig(rootDir) {
189
+ const configPath = getConfigPath(rootDir);
190
+ if (!existsSync(configPath)) {
191
+ return {
192
+ agents: {},
193
+ defaults: {
194
+ builtinTools: DEFAULT_BUILTIN_TOOLS.filter((t) => t.default).map((t) => t.id),
195
+ updatedAt: new Date().toISOString(),
196
+ },
197
+ };
198
+ }
199
+ try {
200
+ const raw = readFileSync(configPath, "utf8");
201
+ const parsed = JSON.parse(raw);
202
+ return {
203
+ agents: parsed.agents || {},
204
+ defaults: parsed.defaults || {
205
+ builtinTools: DEFAULT_BUILTIN_TOOLS.filter((t) => t.default).map((t) => t.id),
206
+ updatedAt: new Date().toISOString(),
207
+ },
208
+ };
209
+ } catch {
210
+ return {
211
+ agents: {},
212
+ defaults: {
213
+ builtinTools: DEFAULT_BUILTIN_TOOLS.filter((t) => t.default).map((t) => t.id),
214
+ updatedAt: new Date().toISOString(),
215
+ },
216
+ };
217
+ }
218
+ }
219
+
220
+ /**
221
+ * Save the full tool configuration.
222
+ * @param {string} rootDir
223
+ * @param {{ agents: Object, defaults: Object }} config
224
+ */
225
+ export function saveToolConfig(rootDir, config) {
226
+ const configPath = getConfigPath(rootDir);
227
+ const dir = resolve(configPath, "..");
228
+ mkdirSync(dir, { recursive: true });
229
+ writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n", "utf8");
230
+ }
231
+
232
+ /**
233
+ * Get tool configuration for a specific agent.
234
+ * @param {string} rootDir
235
+ * @param {string} agentId
236
+ * @returns {{ enabledTools: string[]|null, enabledMcpServers: string[], disabledBuiltinTools: string[] }}
237
+ */
238
+ export function getAgentToolConfig(rootDir, agentId) {
239
+ const config = loadToolConfig(rootDir);
240
+ const agentConfig = config.agents[agentId];
241
+ if (!agentConfig) {
242
+ return {
243
+ enabledTools: null,
244
+ enabledMcpServers: [],
245
+ disabledBuiltinTools: [],
246
+ };
247
+ }
248
+ return {
249
+ enabledTools: agentConfig.enabledTools ?? null,
250
+ enabledMcpServers: agentConfig.enabledMcpServers || [],
251
+ disabledBuiltinTools: agentConfig.disabledBuiltinTools || [],
252
+ };
253
+ }
254
+
255
+ /**
256
+ * Update tool configuration for a specific agent.
257
+ * @param {string} rootDir
258
+ * @param {string} agentId
259
+ * @param {{ enabledTools?: string[]|null, enabledMcpServers?: string[], disabledBuiltinTools?: string[] }} update
260
+ * @returns {{ ok: boolean }}
261
+ */
262
+ export function setAgentToolConfig(rootDir, agentId, update) {
263
+ const config = loadToolConfig(rootDir);
264
+ const existing = config.agents[agentId] || {};
265
+ config.agents[agentId] = {
266
+ ...existing,
267
+ enabledTools: update.enabledTools !== undefined ? update.enabledTools : (existing.enabledTools ?? null),
268
+ enabledMcpServers: update.enabledMcpServers !== undefined ? update.enabledMcpServers : (existing.enabledMcpServers || []),
269
+ disabledBuiltinTools: update.disabledBuiltinTools !== undefined ? update.disabledBuiltinTools : (existing.disabledBuiltinTools || []),
270
+ updatedAt: new Date().toISOString(),
271
+ };
272
+ saveToolConfig(rootDir, config);
273
+ return { ok: true };
274
+ }
275
+
276
+ /**
277
+ * Compute the effective enabled tools for an agent.
278
+ * Merges builtin defaults with agent-specific overrides and MCP servers.
279
+ *
280
+ * @param {string} rootDir
281
+ * @param {string} agentId
282
+ * @returns {{ builtinTools: Array<{ id: string, name: string, enabled: boolean }>, mcpServers: string[] }}
283
+ */
284
+ export function getEffectiveTools(rootDir, agentId) {
285
+ const config = loadToolConfig(rootDir);
286
+ const agentConfig = config.agents[agentId] || {};
287
+ const disabledSet = new Set(agentConfig.disabledBuiltinTools || []);
288
+ const defaultIds = new Set(config.defaults?.builtinTools || DEFAULT_BUILTIN_TOOLS.filter((t) => t.default).map((t) => t.id));
289
+
290
+ const builtinTools = DEFAULT_BUILTIN_TOOLS.map((tool) => ({
291
+ ...tool,
292
+ enabled: !disabledSet.has(tool.id) && (agentConfig.enabledTools === null || agentConfig.enabledTools === undefined
293
+ ? defaultIds.has(tool.id)
294
+ : agentConfig.enabledTools.includes(tool.id)),
295
+ }));
296
+
297
+ return {
298
+ builtinTools,
299
+ mcpServers: agentConfig.enabledMcpServers || [],
300
+ };
301
+ }
302
+
303
+ /**
304
+ * List all available tools (builtin + installed MCP servers).
305
+ * @param {string} rootDir
306
+ * @returns {{ builtinTools: Array<Object>, mcpServers: Array<Object> }}
307
+ */
308
+ export async function listAvailableTools(rootDir) {
309
+ let mcpServers = [];
310
+ try {
311
+ const { listInstalledMcpServers } = await import("./mcp-registry.mjs");
312
+ mcpServers = await listInstalledMcpServers(rootDir);
313
+ } catch {
314
+ // MCP registry not available
315
+ }
316
+
317
+ return {
318
+ builtinTools: [...DEFAULT_BUILTIN_TOOLS],
319
+ mcpServers: mcpServers.map((s) => ({
320
+ id: s.id,
321
+ name: s.name,
322
+ description: s.description || "",
323
+ tags: s.tags || [],
324
+ transport: s.meta?.transport || "stdio",
325
+ })),
326
+ };
327
+ }
package/bosun.schema.json CHANGED
@@ -281,7 +281,7 @@
281
281
  "turnDetection": {
282
282
  "type": "string",
283
283
  "enum": ["server_vad", "semantic_vad", "none"],
284
- "default": "server_vad",
284
+ "default": "semantic_vad",
285
285
  "description": "Turn detection mode for voice activity detection"
286
286
  },
287
287
  "instructions": {
@@ -102,6 +102,8 @@ function nowISO() {
102
102
  * @property {string[]} [skills] - skill library refs to inject
103
103
  * @property {Object} [hookProfile] - hook profile overrides
104
104
  * @property {Object} [env] - extra env vars for the agent
105
+ * @property {string[]} [enabledTools] - list of tool IDs enabled for this agent (null = all)
106
+ * @property {string[]} [enabledMcpServers] - list of MCP server IDs enabled for this agent
105
107
  */
106
108
 
107
109
  /**
@@ -688,6 +690,22 @@ export const BUILTIN_AGENT_PROFILES = [
688
690
  env: {},
689
691
  tags: ["test", "testing", "e2e", "unit", "coverage"],
690
692
  },
693
+ {
694
+ id: "voice-agent",
695
+ name: "Voice Agent",
696
+ description: "Default voice assistant agent. Handles real-time voice sessions, tool calls, and delegate orchestration. Customize tools and MCP servers for voice interactions.",
697
+ titlePatterns: ["\\bvoice\\b", "\\bcall\\b", "\\bmeeting\\b", "\\bassistant\\b"],
698
+ scopes: ["voice", "assistant"],
699
+ sdk: null,
700
+ model: null,
701
+ promptOverride: null,
702
+ skills: [],
703
+ hookProfile: null,
704
+ env: {},
705
+ tags: ["voice", "assistant", "realtime", "default"],
706
+ enabledTools: null,
707
+ enabledMcpServers: [],
708
+ },
691
709
  ];
692
710
 
693
711
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bosun",
3
- "version": "0.37.0",
3
+ "version": "0.37.1",
4
4
  "description": "AI-powered orchestrator supervisor — manages AI agent executors with failover, auto-restarts on failure, analyzes crashes with Codex SDK, creates PRs via Vibe-Kanban API, and sends Telegram notifications. Supports N executors with weighted distribution, multi-repo projects, and auto-setup.",
5
5
  "type": "module",
6
6
  "license": "Apache 2.0",
@@ -62,6 +62,7 @@
62
62
  "./agent-hooks": "./agent-hooks.mjs",
63
63
  "./hook-profiles": "./hook-profiles.mjs",
64
64
  "./agent-hook-bridge": "./agent-hook-bridge.mjs",
65
+ "./agent-tool-config": "./agent-tool-config.mjs",
65
66
  "./startup-service": "./startup-service.mjs",
66
67
  "./telegram-sentinel": "./telegram-sentinel.mjs",
67
68
  "./whatsapp-channel": "./whatsapp-channel.mjs",
@@ -240,6 +241,7 @@
240
241
  "agent-hooks.mjs",
241
242
  "hook-profiles.mjs",
242
243
  "agent-hook-bridge.mjs",
244
+ "agent-tool-config.mjs",
243
245
  "agent-supervisor.mjs",
244
246
  "agent-work-analyzer.mjs",
245
247
  "startup-service.mjs",
@@ -439,7 +439,7 @@ function buildStableSetupDefaults({
439
439
  voiceModel: "gpt-audio-1.5",
440
440
  voiceVisionModel: "gpt-4.1-nano",
441
441
  voiceId: "alloy",
442
- voiceTurnDetection: "server_vad",
442
+ voiceTurnDetection: "semantic_vad",
443
443
  voiceFallbackMode: "browser",
444
444
  voiceDelegateExecutor: "codex-sdk",
445
445
  openaiRealtimeApiKey: "",
@@ -892,7 +892,7 @@ function applyNonBlockingSetupEnvDefaults(envMap, env = {}, sourceEnv = process.
892
892
  sourceEnv.VOICE_TURN_DETECTION,
893
893
  ),
894
894
  ["server_vad", "semantic_vad", "none"],
895
- "server_vad",
895
+ "semantic_vad",
896
896
  );
897
897
  envMap.VOICE_FALLBACK_MODE = normalizeEnumValue(
898
898
  pickNonEmptyValue(
package/setup.mjs CHANGED
@@ -1955,7 +1955,7 @@ function normalizeSetupConfiguration({
1955
1955
  env.VOICE_TURN_DETECTION = normalizeEnum(
1956
1956
  env.VOICE_TURN_DETECTION,
1957
1957
  ["server_vad", "semantic_vad", "none"],
1958
- "server_vad",
1958
+ "semantic_vad",
1959
1959
  );
1960
1960
  env.VOICE_FALLBACK_MODE = normalizeEnum(
1961
1961
  env.VOICE_FALLBACK_MODE,
@@ -3305,7 +3305,7 @@ async function main() {
3305
3305
  );
3306
3306
  env.VOICE_TURN_DETECTION = await prompt.ask(
3307
3307
  "Turn detection (server_vad|semantic_vad|none)",
3308
- process.env.VOICE_TURN_DETECTION || "server_vad",
3308
+ process.env.VOICE_TURN_DETECTION || "semantic_vad",
3309
3309
  );
3310
3310
  env.VOICE_FALLBACK_MODE = await prompt.ask(
3311
3311
  "Fallback mode (browser|disabled)",
@@ -5659,7 +5659,7 @@ async function runNonInteractive({
5659
5659
  env.AZURE_OPENAI_REALTIME_DEPLOYMENT =
5660
5660
  process.env.AZURE_OPENAI_REALTIME_DEPLOYMENT || "gpt-realtime-1.5";
5661
5661
  env.VOICE_ID = process.env.VOICE_ID || "alloy";
5662
- env.VOICE_TURN_DETECTION = process.env.VOICE_TURN_DETECTION || "server_vad";
5662
+ env.VOICE_TURN_DETECTION = process.env.VOICE_TURN_DETECTION || "semantic_vad";
5663
5663
  env.VOICE_FALLBACK_MODE = process.env.VOICE_FALLBACK_MODE || "browser";
5664
5664
  env.VOICE_DELEGATE_EXECUTOR =
5665
5665
  process.env.VOICE_DELEGATE_EXECUTOR ||
package/ui/demo.html CHANGED
@@ -2945,6 +2945,72 @@
2945
2945
  return { ok: true, data: best };
2946
2946
  }
2947
2947
 
2948
+ // ── MCP Servers ──
2949
+ if (route === '/api/mcp/catalog') {
2950
+ return { ok: true, data: [
2951
+ { id: 'github', name: 'GitHub', description: 'GitHub MCP server', transport: 'stdio', tags: ['code', 'git'], installed: false },
2952
+ { id: 'playwright', name: 'Playwright', description: 'Browser automation', transport: 'stdio', tags: ['testing'], installed: false },
2953
+ { id: 'context7', name: 'Context7', description: 'Documentation lookup', transport: 'stdio', tags: ['docs'], installed: true },
2954
+ ]};
2955
+ }
2956
+ if (route === '/api/mcp/installed') {
2957
+ return { ok: true, data: [
2958
+ { id: 'context7', name: 'Context7', description: 'Documentation lookup', transport: 'stdio', tags: ['docs'] },
2959
+ ]};
2960
+ }
2961
+ if (route === '/api/mcp/install') {
2962
+ return { ok: true, installed: { id: body?.catalogId || 'custom', name: body?.name || 'Custom MCP' } };
2963
+ }
2964
+ if (route === '/api/mcp/uninstall') {
2965
+ return { ok: true };
2966
+ }
2967
+ if (route === '/api/mcp/configure') {
2968
+ return { ok: true };
2969
+ }
2970
+
2971
+ // ── Agent Tool Config ──
2972
+ if (route === '/api/agent-tools/available') {
2973
+ return { ok: true, data: {
2974
+ builtinTools: [
2975
+ { id: 'search-files', name: 'Search Files', description: 'Search workspace files', category: 'Built-In', default: true },
2976
+ { id: 'read-file', name: 'Read File', description: 'Read file contents', category: 'Built-In', default: true },
2977
+ { id: 'edit-file', name: 'Edit File', description: 'Edit workspace files', category: 'Built-In', default: true },
2978
+ { id: 'run-command', name: 'Run Command', description: 'Execute shell commands', category: 'Built-In', default: true },
2979
+ { id: 'web-search', name: 'Web Search', description: 'Search the web', category: 'Built-In', default: true },
2980
+ ],
2981
+ mcpServers: [
2982
+ { id: 'context7', name: 'Context7', description: 'Documentation lookup', tags: ['docs'], transport: 'stdio' },
2983
+ ],
2984
+ }};
2985
+ }
2986
+ if (route === '/api/agent-tools/config') {
2987
+ if (method === 'POST') {
2988
+ return { ok: true };
2989
+ }
2990
+ const agentId = params.get('agentId');
2991
+ return { ok: true, data: {
2992
+ builtinTools: [
2993
+ { id: 'search-files', name: 'Search Files', enabled: true },
2994
+ { id: 'read-file', name: 'Read File', enabled: true },
2995
+ { id: 'edit-file', name: 'Edit File', enabled: true },
2996
+ { id: 'run-command', name: 'Run Command', enabled: true },
2997
+ { id: 'web-search', name: 'Web Search', enabled: true },
2998
+ ],
2999
+ mcpServers: [],
3000
+ }};
3001
+ }
3002
+ if (route === '/api/agent-tools/defaults') {
3003
+ return { ok: true, data: {
3004
+ builtinTools: [
3005
+ { id: 'search-files', name: 'Search Files', description: 'Search workspace files', category: 'Built-In', default: true },
3006
+ { id: 'read-file', name: 'Read File', description: 'Read file contents', category: 'Built-In', default: true },
3007
+ { id: 'edit-file', name: 'Edit File', description: 'Edit workspace files', category: 'Built-In', default: true },
3008
+ { id: 'run-command', name: 'Run Command', description: 'Execute shell commands', category: 'Built-In', default: true },
3009
+ { id: 'web-search', name: 'Web Search', description: 'Search the web', category: 'Built-In', default: true },
3010
+ ],
3011
+ }};
3012
+ }
3013
+
2948
3014
  // ── Agents ──
2949
3015
  if (route === '/api/agents')
2950
3016
  return { data: STATE.agents };
@@ -132,7 +132,7 @@ export const SETTINGS_SCHEMA = [
132
132
  { key: "AZURE_OPENAI_REALTIME_API_KEY", label: "Azure Realtime Key (legacy)", category: "voice", type: "secret", sensitive: true, description: "Legacy fallback: Azure OpenAI API key. Use the Voice Endpoints card above for full multi-endpoint config. Falls back to AZURE_OPENAI_API_KEY if not set." },
133
133
  { key: "AZURE_OPENAI_REALTIME_DEPLOYMENT", label: "Azure Deployment (legacy)", category: "voice", type: "select", defaultVal: "gpt-audio-1.5", options: ["gpt-audio-1.5", "gpt-realtime-1.5", "gpt-4o-realtime-preview", "custom"], description: "Legacy fallback: Azure deployment name. Use the Voice Endpoints card above. GA models (gpt-realtime-1.5) auto-use /openai/v1/ paths." },
134
134
  { key: "VOICE_ID", label: "Voice", category: "voice", type: "select", defaultVal: "alloy", options: ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"], description: "Voice personality for text-to-speech output." },
135
- { key: "VOICE_TURN_DETECTION", label: "Turn Detection", category: "voice", type: "select", defaultVal: "server_vad", options: ["server_vad", "semantic_vad", "none"], description: "How the model detects when you stop speaking. 'semantic_vad' is more intelligent but higher latency." },
135
+ { key: "VOICE_TURN_DETECTION", label: "Turn Detection", category: "voice", type: "select", defaultVal: "semantic_vad", options: ["server_vad", "semantic_vad", "none"], description: "How the model detects when you stop speaking. 'semantic_vad' is more intelligent but higher latency." },
136
136
  { key: "VOICE_DELEGATE_EXECUTOR", label: "Delegate Executor", category: "voice", type: "select", defaultVal: "codex-sdk", options: ["codex-sdk", "copilot-sdk", "claude-sdk", "gemini-sdk", "opencode-sdk"], description: "Which agent executor voice tool calls delegate to for complex tasks." },
137
137
  { key: "VOICE_FALLBACK_MODE", label: "Fallback Mode", category: "voice", type: "select", defaultVal: "browser", options: ["browser", "disabled"], description: "When Tier 1 (Realtime API) is unavailable, use browser speech APIs as fallback." },
138
138
 
@@ -246,7 +246,7 @@ function _flushPendingTranscriptBuffers() {
246
246
  }
247
247
 
248
248
  const finalUser = String(_pendingUserTranscriptText || "").trim();
249
- if (finalUser) {
249
+ if (finalUser && ENABLE_USER_TRANSCRIPT) {
250
250
  _persistTranscriptIfNew("user", finalUser, "sdk.history_updated.user.flush");
251
251
  }
252
252
 
@@ -314,10 +314,13 @@ function _scheduleUserTranscriptFinalize(text) {
314
314
  if (ENABLE_USER_TRANSCRIPT) {
315
315
  sdkVoiceTranscript.value = finalText;
316
316
  emit("transcript", { text: finalText, final: true });
317
+ _persistTranscriptIfNew("user", finalText, "sdk.history_updated.user.final");
317
318
  } else {
318
319
  sdkVoiceTranscript.value = "";
320
+ // Skip persisting user transcript — ASR often hallucinates wrong
321
+ // languages from short fragments; the model still receives the raw
322
+ // audio correctly so nothing is lost.
319
323
  }
320
- _persistTranscriptIfNew("user", finalText, "sdk.history_updated.user.final");
321
324
  }, 350);
322
325
  }
323
326
 
@@ -455,14 +458,14 @@ async function startAgentsSdkSession(config, options = {}) {
455
458
  // Determine model and voice
456
459
  const model = String(tokenData.model || resolvedConfig.model || "gpt-realtime-1.5").trim();
457
460
  const voiceId = String(tokenData.voiceId || resolvedConfig.voiceId || "alloy").trim();
458
- const turnDetection = String(resolvedConfig.turnDetection || "server_vad").trim();
461
+ const turnDetection = String(resolvedConfig.turnDetection || "semantic_vad").trim();
459
462
  const turnDetectionConfig = {
460
463
  type: turnDetection,
461
464
  ...(turnDetection === "server_vad"
462
465
  ? {
463
- threshold: 0.35,
466
+ threshold: 0.7,
464
467
  prefix_padding_ms: 400,
465
- silence_duration_ms: 700,
468
+ silence_duration_ms: 1300,
466
469
  create_response: true,
467
470
  interrupt_response: true,
468
471
  createResponse: true,
@@ -494,6 +497,7 @@ async function startAgentsSdkSession(config, options = {}) {
494
497
  output: {
495
498
  format: "pcm16",
496
499
  voice: voiceId,
500
+ transcription: { model: "gpt-4o-transcribe" },
497
501
  },
498
502
  },
499
503
  },
@@ -434,14 +434,14 @@ function sendSessionUpdate(tokenData = {}) {
434
434
  sessionConfig?.turn_detection?.type ||
435
435
  sessionConfig?.audio?.input?.turnDetection?.type ||
436
436
  sessionConfig?.audio?.input?.turn_detection?.type ||
437
- "server_vad";
437
+ "semantic_vad";
438
438
  const turnDetectionConfig = {
439
439
  type: turnDetection,
440
440
  ...(turnDetection === "server_vad"
441
441
  ? {
442
- threshold: 0.35,
442
+ threshold: 0.7,
443
443
  prefix_padding_ms: 400,
444
- silence_duration_ms: 700,
444
+ silence_duration_ms: 1200,
445
445
  create_response: true,
446
446
  interrupt_response: true,
447
447
  }