bosun 0.36.2 → 0.36.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/agent-prompts.mjs +95 -0
  2. package/analyze-agent-work-helpers.mjs +308 -0
  3. package/analyze-agent-work.mjs +926 -0
  4. package/autofix.mjs +2 -0
  5. package/bosun.schema.json +101 -3
  6. package/codex-shell.mjs +85 -10
  7. package/desktop/main.mjs +871 -48
  8. package/desktop/preload.mjs +54 -1
  9. package/desktop-shortcut.mjs +90 -11
  10. package/git-editor-fix.mjs +273 -0
  11. package/mcp-registry.mjs +579 -0
  12. package/meeting-workflow-service.mjs +631 -0
  13. package/monitor.mjs +18 -103
  14. package/package.json +21 -2
  15. package/primary-agent.mjs +32 -12
  16. package/session-tracker.mjs +68 -0
  17. package/setup-web-server.mjs +20 -10
  18. package/setup.mjs +376 -83
  19. package/startup-service.mjs +51 -6
  20. package/stream-resilience.mjs +17 -7
  21. package/ui/app.js +164 -4
  22. package/ui/components/agent-selector.js +145 -1
  23. package/ui/components/chat-view.js +161 -15
  24. package/ui/components/session-list.js +2 -2
  25. package/ui/components/shared.js +188 -15
  26. package/ui/modules/icons.js +13 -0
  27. package/ui/modules/utils.js +44 -0
  28. package/ui/modules/voice-client-sdk.js +733 -0
  29. package/ui/modules/voice-overlay.js +128 -15
  30. package/ui/modules/voice.js +15 -6
  31. package/ui/setup.html +281 -81
  32. package/ui/styles/components.css +99 -3
  33. package/ui/styles/sessions.css +122 -14
  34. package/ui/styles.css +14 -0
  35. package/ui/tabs/agents.js +1 -1
  36. package/ui/tabs/chat.js +123 -14
  37. package/ui/tabs/control.js +16 -22
  38. package/ui/tabs/dashboard.js +85 -8
  39. package/ui/tabs/library.js +113 -17
  40. package/ui/tabs/settings.js +116 -2
  41. package/ui/tabs/tasks.js +388 -39
  42. package/ui/tabs/telemetry.js +0 -1
  43. package/ui/tabs/workflows.js +4 -0
  44. package/ui-server.mjs +400 -22
  45. package/update-check.mjs +41 -13
  46. package/voice-action-dispatcher.mjs +844 -0
  47. package/voice-agents-sdk.mjs +664 -0
  48. package/voice-auth-manager.mjs +164 -0
  49. package/voice-relay.mjs +1194 -0
  50. package/voice-tools.mjs +914 -0
  51. package/workflow-templates/agents.mjs +6 -2
  52. package/workflow-templates/github.mjs +154 -12
  53. package/workflow-templates.mjs +3 -0
  54. package/github-reconciler.mjs +0 -506
  55. package/merge-strategy.mjs +0 -1210
  56. package/pr-cleanup-daemon.mjs +0 -992
  57. package/workspace-reaper.mjs +0 -405
@@ -0,0 +1,1194 @@
1
+ /**
2
+ * voice-relay.mjs — Multi-provider voice relay for real-time voice sessions.
3
+ *
4
+ * Supports:
5
+ * - OpenAI Realtime API (WebRTC) — direct API key
6
+ * - Azure OpenAI Realtime API (WebRTC) — API key + endpoint
7
+ * - Claude/Gemini provider mode (Tier 2 speech fallback + provider vision)
8
+ * - Tier 2 fallback (browser STT → executor → browser TTS)
9
+ * - Direct JavaScript action dispatch (voice model returns JSON, Bosun executes)
10
+ *
11
+ * @module voice-relay
12
+ */
13
+
14
+ import { loadConfig } from "./config.mjs";
15
+ import { execPrimaryPrompt, getPrimaryAgentName } from "./primary-agent.mjs";
16
+ import { resolveVoiceOAuthToken } from "./voice-auth-manager.mjs";
17
+
18
+ // ── Module-scope state ──────────────────────────────────────────────────────
19
+ let _voiceConfig = null; // cached resolved config
20
+ let _configLoadedAt = 0; // timestamp of last config load
21
+
22
+ const CONFIG_TTL_MS = 30_000; // re-read config every 30s
23
+
24
+ const OPENAI_REALTIME_URL = "https://api.openai.com/v1/realtime";
25
+ const OPENAI_REALTIME_MODEL = "gpt-realtime-1.5"; // Released 2026-02-23; replaces gpt-4o-realtime-preview
26
+ const OPENAI_RESPONSES_URL = "https://api.openai.com/v1/responses";
27
+ const OPENAI_DEFAULT_VISION_MODEL = "gpt-4.1-mini";
28
+
29
+ const AZURE_API_VERSION = "2025-04-01-preview";
30
+ const ANTHROPIC_MESSAGES_URL = "https://api.anthropic.com/v1/messages";
31
+ const ANTHROPIC_API_VERSION = "2023-06-01";
32
+ const CLAUDE_DEFAULT_MODEL = "claude-3-7-sonnet-latest";
33
+ const CLAUDE_DEFAULT_VISION_MODEL = "claude-3-7-sonnet-latest";
34
+ const GEMINI_GENERATE_CONTENT_URL = "https://generativelanguage.googleapis.com/v1beta/models";
35
+ const GEMINI_DEFAULT_MODEL = "gemini-2.5-pro";
36
+ const GEMINI_DEFAULT_VISION_MODEL = "gemini-2.5-flash";
37
+
38
+ const VALID_EXECUTORS = new Set([
39
+ "codex-sdk",
40
+ "copilot-sdk",
41
+ "claude-sdk",
42
+ "gemini-sdk",
43
+ "opencode-sdk",
44
+ ]);
45
+
46
+ const VALID_AGENT_MODES = new Set([
47
+ "ask",
48
+ "agent",
49
+ "plan",
50
+ "code",
51
+ "architect",
52
+ ]);
53
+
54
+ const VALID_VOICE_PROVIDERS = new Set([
55
+ "openai",
56
+ "azure",
57
+ "claude",
58
+ "gemini",
59
+ "fallback",
60
+ ]);
61
+
62
+ const DEFAULT_VOICE_FAILOVER = Object.freeze({
63
+ enabled: true,
64
+ maxAttempts: 2,
65
+ });
66
+
67
+ function parseFailoverInt(rawValue, fallback) {
68
+ const parsed = Number.parseInt(String(rawValue ?? ""), 10);
69
+ if (!Number.isFinite(parsed) || parsed <= 0) return fallback;
70
+ return parsed;
71
+ }
72
+
73
+ function normalizeVoiceProviderEntry(entry) {
74
+ if (typeof entry === "string") {
75
+ const provider = String(entry || "").trim().toLowerCase();
76
+ if (!VALID_VOICE_PROVIDERS.has(provider)) return null;
77
+ return {
78
+ provider,
79
+ model: null,
80
+ visionModel: null,
81
+ voiceId: null,
82
+ azureDeployment: null,
83
+ };
84
+ }
85
+
86
+ if (!entry || typeof entry !== "object") return null;
87
+ const provider = String(entry.provider || "").trim().toLowerCase();
88
+ if (!VALID_VOICE_PROVIDERS.has(provider)) return null;
89
+
90
+ const model = String(entry.model || "").trim() || null;
91
+ const visionModel = String(entry.visionModel || "").trim() || null;
92
+ const voiceId = String(entry.voiceId || "").trim() || null;
93
+ const azureDeployment = String(entry.azureDeployment || "").trim() || null;
94
+
95
+ return {
96
+ provider,
97
+ model,
98
+ visionModel,
99
+ voiceId,
100
+ azureDeployment,
101
+ };
102
+ }
103
+
104
+ function normalizeVoiceProviderChain(rawProviders, primaryProvider) {
105
+ const dedup = new Set();
106
+ const chain = [];
107
+ const pushEntry = (entry) => {
108
+ const normalized = normalizeVoiceProviderEntry(entry);
109
+ if (!normalized) return;
110
+ if (dedup.has(normalized.provider)) return;
111
+ dedup.add(normalized.provider);
112
+ chain.push(normalized);
113
+ };
114
+
115
+ if (Array.isArray(rawProviders)) {
116
+ rawProviders.forEach(pushEntry);
117
+ } else if (typeof rawProviders === "string" && rawProviders.trim()) {
118
+ rawProviders
119
+ .split(",")
120
+ .map((item) => item.trim())
121
+ .filter(Boolean)
122
+ .forEach((provider) => pushEntry({ provider }));
123
+ }
124
+
125
+ if (primaryProvider && VALID_VOICE_PROVIDERS.has(primaryProvider)) {
126
+ if (!dedup.has(primaryProvider)) {
127
+ chain.unshift({
128
+ provider: primaryProvider,
129
+ model: null,
130
+ visionModel: null,
131
+ voiceId: null,
132
+ azureDeployment: null,
133
+ });
134
+ }
135
+ }
136
+
137
+ return chain;
138
+ }
139
+
140
+ function getProviderChainWithCredentialFallbacks(chain, credentialState = {}) {
141
+ const dedup = new Set();
142
+ const providers = [];
143
+ const pushProvider = (provider) => {
144
+ if (!provider || dedup.has(provider)) return;
145
+ if (!VALID_VOICE_PROVIDERS.has(provider)) return;
146
+ dedup.add(provider);
147
+ providers.push(provider);
148
+ };
149
+
150
+ chain.forEach((entry) => pushProvider(entry.provider));
151
+
152
+ if (credentialState.azureAvailable) pushProvider("azure");
153
+ if (credentialState.openaiAvailable) pushProvider("openai");
154
+ if (credentialState.claudeAvailable) pushProvider("claude");
155
+ if (credentialState.geminiAvailable) pushProvider("gemini");
156
+ pushProvider("fallback");
157
+
158
+ return providers;
159
+ }
160
+
161
+ function shouldFailoverRealtimeError(err) {
162
+ const message = String(err?.message || "");
163
+ const statusMatch = message.match(/\((\d{3})\)/);
164
+ const status = statusMatch ? Number.parseInt(statusMatch[1], 10) : null;
165
+ if (status && (status === 401 || status === 403 || status === 408 || status === 409 || status === 429 || status >= 500)) {
166
+ return true;
167
+ }
168
+ if (/ECONNRESET|ETIMEDOUT|network|fetch failed|connection|connect/i.test(message)) {
169
+ return true;
170
+ }
171
+ return false;
172
+ }
173
+
174
+ function redactSecretLikeText(value) {
175
+ let sanitized = String(value || "");
176
+ sanitized = sanitized.replace(/\b(sk|rk|pk)-[A-Za-z0-9_-]{10,}\b/g, "$1-***REDACTED***");
177
+ sanitized = sanitized.replace(/\bBearer\s+[A-Za-z0-9._~+/=-]{8,}\b/gi, "Bearer ***REDACTED***");
178
+ sanitized = sanitized.replace(
179
+ /("?(?:api[_-]?key|access[_-]?token|client[_-]?secret|authorization)"?\s*[:=]\s*"?)([^",\s}{\]]+)/gi,
180
+ "$1***REDACTED***",
181
+ );
182
+ return sanitized;
183
+ }
184
+
185
+ async function buildProviderErrorDetails(response, fallback = "unknown") {
186
+ const raw = await response.text().catch(() => fallback);
187
+ return redactSecretLikeText(raw || fallback);
188
+ }
189
+
190
+ function sanitizeVoiceCallContext(context = {}) {
191
+ const rawSessionId = String(context?.sessionId || "").trim();
192
+ const rawExecutor = String(context?.executor || "").trim().toLowerCase();
193
+ const rawMode = String(context?.mode || "").trim().toLowerCase();
194
+ const rawModel = String(context?.model || "").trim();
195
+
196
+ return {
197
+ sessionId: rawSessionId || null,
198
+ executor: VALID_EXECUTORS.has(rawExecutor) ? rawExecutor : null,
199
+ mode: VALID_AGENT_MODES.has(rawMode) ? rawMode : null,
200
+ model: rawModel || null,
201
+ };
202
+ }
203
+
204
+ function buildSessionScopedInstructions(baseInstructions, callContext = {}) {
205
+ const context = sanitizeVoiceCallContext(callContext);
206
+ if (!context.sessionId && !context.executor && !context.mode && !context.model) {
207
+ return baseInstructions;
208
+ }
209
+
210
+ const suffix = [
211
+ "",
212
+ "## Bosun Voice Call Context",
213
+ `Active chat session id: ${context.sessionId || "none"}.`,
214
+ context.executor
215
+ ? `Preferred executor for delegated work: ${context.executor}.`
216
+ : "Preferred executor for delegated work: use configured default.",
217
+ context.mode
218
+ ? `Preferred delegation mode: ${context.mode}.`
219
+ : "Preferred delegation mode: use configured default.",
220
+ context.model
221
+ ? `Preferred model override: ${context.model}.`
222
+ : "Preferred model override: none.",
223
+ "",
224
+ "## Required Behavior",
225
+ "- For every user turn in this call, invoke delegate_to_agent exactly once before any final spoken answer.",
226
+ "- For coding, repo, task, debugging, automation, or workspace requests, call delegate_to_agent before finalizing your response.",
227
+ "- Preserve user intent when delegating. Do not paraphrase away technical detail.",
228
+ "- Keep responses concise after receiving delegate_to_agent output.",
229
+ ].join("\n");
230
+
231
+ return `${baseInstructions}${suffix}`;
232
+ }
233
+
234
+ function resolveToolChoice(toolDefinitions, callContext = {}) {
235
+ const context = sanitizeVoiceCallContext(callContext);
236
+ const hasDelegateTool = Array.isArray(toolDefinitions)
237
+ && toolDefinitions.some((tool) => tool?.name === "delegate_to_agent");
238
+ if (context.sessionId && hasDelegateTool) {
239
+ return {
240
+ type: "function",
241
+ name: "delegate_to_agent",
242
+ };
243
+ }
244
+ return "auto";
245
+ }
246
+
247
+ function extractModelResponseText(payload) {
248
+ if (!payload || typeof payload !== "object") return "";
249
+ if (typeof payload.output_text === "string" && payload.output_text.trim()) {
250
+ return payload.output_text.trim();
251
+ }
252
+
253
+ const output = Array.isArray(payload.output) ? payload.output : [];
254
+ for (const item of output) {
255
+ const content = Array.isArray(item?.content) ? item.content : [];
256
+ for (const part of content) {
257
+ if (typeof part?.text === "string" && part.text.trim()) {
258
+ return part.text.trim();
259
+ }
260
+ }
261
+ }
262
+
263
+ const choices = Array.isArray(payload.choices) ? payload.choices : [];
264
+ for (const choice of choices) {
265
+ const text = String(choice?.message?.content || "").trim();
266
+ if (text) return text;
267
+ }
268
+
269
+ return "";
270
+ }
271
+
272
+ function parseImageDataUrl(dataUrl) {
273
+ const raw = String(dataUrl || "").trim();
274
+ const match = raw.match(
275
+ /^data:(image\/(?:jpeg|jpg|png|webp));base64,([A-Za-z0-9+/=]+)$/i,
276
+ );
277
+ if (!match) {
278
+ throw new Error("Invalid frame format (expected data:image/*;base64,...)");
279
+ }
280
+ return {
281
+ mimeType: String(match[1] || "").toLowerCase(),
282
+ base64Data: String(match[2] || ""),
283
+ dataUrl: raw,
284
+ };
285
+ }
286
+
287
+ function extractClaudeResponseText(payload) {
288
+ if (!payload || typeof payload !== "object") return "";
289
+ const content = Array.isArray(payload.content) ? payload.content : [];
290
+ const text = content
291
+ .filter((part) => part?.type === "text")
292
+ .map((part) => String(part?.text || "").trim())
293
+ .filter(Boolean)
294
+ .join("\n")
295
+ .trim();
296
+ if (text) return text;
297
+ return "";
298
+ }
299
+
300
+ function extractGeminiResponseText(payload) {
301
+ if (!payload || typeof payload !== "object") return "";
302
+ const candidates = Array.isArray(payload.candidates) ? payload.candidates : [];
303
+ for (const candidate of candidates) {
304
+ const parts = Array.isArray(candidate?.content?.parts)
305
+ ? candidate.content.parts
306
+ : [];
307
+ const text = parts
308
+ .map((part) => String(part?.text || "").trim())
309
+ .filter(Boolean)
310
+ .join("\n")
311
+ .trim();
312
+ if (text) return text;
313
+ }
314
+ return "";
315
+ }
316
+
317
+ async function analyzeVisionWithOpenAI(dataUrl, model, prompt, contextText, cfg) {
318
+ const response = await fetch(OPENAI_RESPONSES_URL, {
319
+ method: "POST",
320
+ headers: {
321
+ Authorization: `Bearer ${cfg.openaiKey}`,
322
+ "Content-Type": "application/json",
323
+ },
324
+ body: JSON.stringify({
325
+ model,
326
+ temperature: 0.2,
327
+ max_output_tokens: 220,
328
+ input: [
329
+ {
330
+ role: "user",
331
+ content: [
332
+ {
333
+ type: "input_text",
334
+ text: `${prompt}\n\n${contextText}`,
335
+ },
336
+ {
337
+ type: "input_image",
338
+ image_url: dataUrl,
339
+ detail: "high",
340
+ },
341
+ ],
342
+ },
343
+ ],
344
+ }),
345
+ });
346
+ if (!response.ok) {
347
+ const errText = await buildProviderErrorDetails(response, "unknown");
348
+ throw new Error(`Vision request failed (${response.status}): ${errText}`);
349
+ }
350
+ const payload = await response.json();
351
+ const summary = extractModelResponseText(payload);
352
+ if (!summary) {
353
+ throw new Error("Vision model returned an empty summary");
354
+ }
355
+ return {
356
+ summary,
357
+ provider: "openai",
358
+ model,
359
+ };
360
+ }
361
+
362
+ async function analyzeVisionWithAzure(dataUrl, model, prompt, contextText, cfg) {
363
+ const endpoint = cfg.azureEndpoint.replace(/\/+$/, "");
364
+ const url = `${endpoint}/openai/responses?api-version=${AZURE_API_VERSION}`;
365
+ const response = await fetch(url, {
366
+ method: "POST",
367
+ headers: {
368
+ "api-key": cfg.azureKey,
369
+ "Content-Type": "application/json",
370
+ },
371
+ body: JSON.stringify({
372
+ model,
373
+ temperature: 0.2,
374
+ max_output_tokens: 220,
375
+ input: [
376
+ {
377
+ role: "user",
378
+ content: [
379
+ {
380
+ type: "input_text",
381
+ text: `${prompt}\n\n${contextText}`,
382
+ },
383
+ {
384
+ type: "input_image",
385
+ image_url: dataUrl,
386
+ detail: "high",
387
+ },
388
+ ],
389
+ },
390
+ ],
391
+ }),
392
+ });
393
+ if (!response.ok) {
394
+ const errText = await buildProviderErrorDetails(response, "unknown");
395
+ throw new Error(`Azure vision request failed (${response.status}): ${errText}`);
396
+ }
397
+ const payload = await response.json();
398
+ const summary = extractModelResponseText(payload);
399
+ if (!summary) {
400
+ throw new Error("Azure vision model returned an empty summary");
401
+ }
402
+ return {
403
+ summary,
404
+ provider: "azure",
405
+ model,
406
+ };
407
+ }
408
+
409
+ async function analyzeVisionWithClaude(frame, model, prompt, contextText, cfg) {
410
+ const response = await fetch(ANTHROPIC_MESSAGES_URL, {
411
+ method: "POST",
412
+ headers: {
413
+ "x-api-key": cfg.claudeKey,
414
+ "anthropic-version": ANTHROPIC_API_VERSION,
415
+ "Content-Type": "application/json",
416
+ },
417
+ body: JSON.stringify({
418
+ model,
419
+ temperature: 0.2,
420
+ max_tokens: 260,
421
+ messages: [
422
+ {
423
+ role: "user",
424
+ content: [
425
+ { type: "text", text: `${prompt}\n\n${contextText}` },
426
+ {
427
+ type: "image",
428
+ source: {
429
+ type: "base64",
430
+ media_type: frame.mimeType,
431
+ data: frame.base64Data,
432
+ },
433
+ },
434
+ ],
435
+ },
436
+ ],
437
+ }),
438
+ });
439
+ if (!response.ok) {
440
+ const errText = await buildProviderErrorDetails(response, "unknown");
441
+ throw new Error(`Claude vision request failed (${response.status}): ${errText}`);
442
+ }
443
+ const payload = await response.json();
444
+ const summary = extractClaudeResponseText(payload);
445
+ if (!summary) {
446
+ throw new Error("Claude vision model returned an empty summary");
447
+ }
448
+ return {
449
+ summary,
450
+ provider: "claude",
451
+ model,
452
+ };
453
+ }
454
+
455
+ async function analyzeVisionWithGemini(frame, model, prompt, contextText, cfg) {
456
+ const apiKey = String(cfg.geminiKey || "").trim();
457
+ const endpoint =
458
+ `${GEMINI_GENERATE_CONTENT_URL}/${encodeURIComponent(model)}:generateContent?key=${encodeURIComponent(apiKey)}`;
459
+ const response = await fetch(endpoint, {
460
+ method: "POST",
461
+ headers: {
462
+ "Content-Type": "application/json",
463
+ },
464
+ body: JSON.stringify({
465
+ contents: [
466
+ {
467
+ role: "user",
468
+ parts: [
469
+ { text: `${prompt}\n\n${contextText}` },
470
+ {
471
+ inlineData: {
472
+ mimeType: frame.mimeType,
473
+ data: frame.base64Data,
474
+ },
475
+ },
476
+ ],
477
+ },
478
+ ],
479
+ generationConfig: {
480
+ temperature: 0.2,
481
+ maxOutputTokens: 220,
482
+ },
483
+ }),
484
+ });
485
+ if (!response.ok) {
486
+ const errText = await buildProviderErrorDetails(response, "unknown");
487
+ throw new Error(`Gemini vision request failed (${response.status}): ${errText}`);
488
+ }
489
+ const payload = await response.json();
490
+ const summary = extractGeminiResponseText(payload);
491
+ if (!summary) {
492
+ throw new Error("Gemini vision model returned an empty summary");
493
+ }
494
+ return {
495
+ summary,
496
+ provider: "gemini",
497
+ model,
498
+ };
499
+ }
500
+
501
+ // ── Voice provider detection ────────────────────────────────────────────────
502
+
503
+ /**
504
+ * Resolve voice configuration from bosun config + env.
505
+ * Returns { provider, model, openaiKey, azureKey, azureEndpoint, azureDeployment,
506
+ * claudeKey, geminiKey, voiceId, turnDetection, instructions,
507
+ * fallbackMode, delegateExecutor, enabled, visionModel }
508
+ */
509
+ export function getVoiceConfig(forceReload = false) {
510
+ if (!forceReload && _voiceConfig && (Date.now() - _configLoadedAt < CONFIG_TTL_MS)) {
511
+ return _voiceConfig;
512
+ }
513
+
514
+ const cfg = loadConfig();
515
+ const voice = cfg.voice || {};
516
+
517
+ // Provider priority: config > env > key autodetect.
518
+ // "auto" resolves to azure/openai/claude/gemini/fallback based on available credentials.
519
+ const rawProvider = String(
520
+ voice.provider || process.env.VOICE_PROVIDER || "auto",
521
+ )
522
+ .trim()
523
+ .toLowerCase();
524
+
525
+ // API keys
526
+ const openaiOAuthToken =
527
+ String(voice.openaiAccessToken || "").trim()
528
+ || resolveVoiceOAuthToken("openai", forceReload)?.token
529
+ || "";
530
+ const openaiKey = voice.openaiApiKey
531
+ || process.env.OPENAI_REALTIME_API_KEY
532
+ || process.env.OPENAI_API_KEY
533
+ || "";
534
+
535
+ const azureOAuthToken =
536
+ String(voice.azureAccessToken || "").trim()
537
+ || resolveVoiceOAuthToken("azure", forceReload)?.token
538
+ || "";
539
+ const azureKey = voice.azureApiKey
540
+ || process.env.AZURE_OPENAI_REALTIME_API_KEY
541
+ || process.env.AZURE_OPENAI_API_KEY
542
+ || "";
543
+
544
+ const azureEndpoint = voice.azureEndpoint
545
+ || process.env.AZURE_OPENAI_REALTIME_ENDPOINT
546
+ || process.env.AZURE_OPENAI_ENDPOINT
547
+ || "";
548
+
549
+ const azureDeployment = voice.azureDeployment
550
+ || process.env.AZURE_OPENAI_REALTIME_DEPLOYMENT
551
+ || "gpt-realtime-1.5";
552
+
553
+ const claudeOAuthToken =
554
+ String(voice.claudeAccessToken || "").trim()
555
+ || resolveVoiceOAuthToken("claude", forceReload)?.token
556
+ || "";
557
+ const claudeKey = voice.claudeApiKey
558
+ || process.env.ANTHROPIC_API_KEY
559
+ || "";
560
+
561
+ const geminiOAuthToken =
562
+ String(voice.geminiAccessToken || "").trim()
563
+ || resolveVoiceOAuthToken("gemini", forceReload)?.token
564
+ || "";
565
+ const geminiKey = voice.geminiApiKey
566
+ || process.env.GEMINI_API_KEY
567
+ || process.env.GOOGLE_API_KEY
568
+ || "";
569
+
570
+ const openaiAvailable = Boolean(openaiOAuthToken || openaiKey);
571
+ const azureAvailable = Boolean((azureOAuthToken || azureKey) && azureEndpoint);
572
+ const claudeAvailable = Boolean(claudeKey || claudeOAuthToken);
573
+ const geminiAvailable = Boolean(geminiKey || geminiOAuthToken);
574
+
575
+ const autoProvider =
576
+ azureAvailable
577
+ ? "azure"
578
+ : (openaiAvailable
579
+ ? "openai"
580
+ : (claudeAvailable
581
+ ? "claude"
582
+ : (geminiAvailable ? "gemini" : "fallback")));
583
+
584
+ const provider = rawProvider === "auto" ? autoProvider : rawProvider;
585
+
586
+ const providerChain = normalizeVoiceProviderChain(
587
+ voice.providers || process.env.VOICE_PROVIDERS || [],
588
+ provider,
589
+ );
590
+ const providerChainWithFallbacks = getProviderChainWithCredentialFallbacks(providerChain, {
591
+ openaiAvailable,
592
+ azureAvailable,
593
+ claudeAvailable,
594
+ geminiAvailable,
595
+ });
596
+
597
+ const realtimeCandidates = providerChain
598
+ .filter((entry) => entry.provider === "openai" || entry.provider === "azure")
599
+ .map((entry) => ({ ...entry }));
600
+ if (!realtimeCandidates.length && (provider === "openai" || provider === "azure")) {
601
+ realtimeCandidates.push({
602
+ provider,
603
+ model: null,
604
+ visionModel: null,
605
+ voiceId: null,
606
+ azureDeployment: null,
607
+ });
608
+ }
609
+
610
+ const failoverEnabledRaw =
611
+ voice?.failover?.enabled ?? process.env.VOICE_FAILOVER_ENABLED;
612
+ const failoverEnabled =
613
+ failoverEnabledRaw == null
614
+ ? DEFAULT_VOICE_FAILOVER.enabled
615
+ : !["0", "false", "no", "off"].includes(
616
+ String(failoverEnabledRaw).trim().toLowerCase(),
617
+ );
618
+ const failoverMaxAttempts = parseFailoverInt(
619
+ voice?.failover?.maxAttempts ?? process.env.VOICE_FAILOVER_MAX_ATTEMPTS,
620
+ DEFAULT_VOICE_FAILOVER.maxAttempts,
621
+ );
622
+
623
+ const diagnostics = [];
624
+ if (
625
+ process.env.OPENAI_REALTIME_API_KEY
626
+ && process.env.OPENAI_API_KEY
627
+ && process.env.OPENAI_REALTIME_API_KEY !== process.env.OPENAI_API_KEY
628
+ ) {
629
+ diagnostics.push(
630
+ "Both OPENAI_REALTIME_API_KEY and OPENAI_API_KEY are set; realtime key takes precedence.",
631
+ );
632
+ }
633
+ if (/^sk-test-/i.test(String(openaiKey || ""))) {
634
+ diagnostics.push(
635
+ "OpenAI realtime key appears to be a test/placeholder value (sk-test-*).",
636
+ );
637
+ }
638
+ const defaultModel =
639
+ provider === "claude"
640
+ ? CLAUDE_DEFAULT_MODEL
641
+ : provider === "gemini"
642
+ ? GEMINI_DEFAULT_MODEL
643
+ : OPENAI_REALTIME_MODEL;
644
+ const model = voice.model || process.env.VOICE_MODEL || defaultModel;
645
+ const voiceId = voice.voiceId || process.env.VOICE_ID || "alloy";
646
+ const turnDetection =
647
+ voice.turnDetection || process.env.VOICE_TURN_DETECTION || "server_vad";
648
+ const defaultVisionModel =
649
+ provider === "claude"
650
+ ? CLAUDE_DEFAULT_VISION_MODEL
651
+ : provider === "gemini"
652
+ ? GEMINI_DEFAULT_VISION_MODEL
653
+ : OPENAI_DEFAULT_VISION_MODEL;
654
+ const visionModel =
655
+ voice.visionModel || process.env.VOICE_VISION_MODEL || defaultVisionModel;
656
+ const fallbackMode =
657
+ voice.fallbackMode || process.env.VOICE_FALLBACK_MODE || "browser";
658
+ const delegateExecutor =
659
+ voice.delegateExecutor ||
660
+ process.env.VOICE_DELEGATE_EXECUTOR ||
661
+ cfg.primaryAgent ||
662
+ "codex-sdk";
663
+ const enabled =
664
+ voice.enabled != null
665
+ ? voice.enabled !== false
666
+ : !["0", "false", "no", "off"].includes(
667
+ String(process.env.VOICE_ENABLED || "")
668
+ .trim()
669
+ .toLowerCase(),
670
+ );
671
+
672
+ const instructions = voice.instructions || `You are Bosun, a helpful voice assistant for the VirtEngine development platform.
673
+ You help developers manage tasks, steer coding agents, monitor builds, and navigate the workspace.
674
+ Be concise and conversational. When users ask about code or tasks, use the available tools.
675
+ For complex operations like writing code or creating PRs, delegate to the appropriate agent.`;
676
+
677
+ _voiceConfig = Object.freeze({
678
+ provider,
679
+ providerChain,
680
+ providerChainWithFallbacks,
681
+ realtimeCandidates,
682
+ failover: {
683
+ enabled: failoverEnabled,
684
+ maxAttempts: failoverMaxAttempts,
685
+ },
686
+ model,
687
+ openaiKey,
688
+ openaiOAuthToken,
689
+ azureKey,
690
+ azureOAuthToken,
691
+ azureEndpoint,
692
+ azureDeployment,
693
+ claudeKey,
694
+ claudeOAuthToken,
695
+ geminiKey,
696
+ geminiOAuthToken,
697
+ voiceId,
698
+ turnDetection,
699
+ visionModel,
700
+ instructions,
701
+ fallbackMode,
702
+ delegateExecutor,
703
+ enabled,
704
+ diagnostics,
705
+ });
706
+ _configLoadedAt = Date.now();
707
+ return _voiceConfig;
708
+ }
709
+
710
+ /**
711
+ * Check if any voice tier is available.
712
+ */
713
+ export function isVoiceAvailable() {
714
+ const cfg = getVoiceConfig();
715
+ if (!cfg.enabled) return { available: false, tier: null, reason: "Voice disabled in config" };
716
+
717
+ const realtimeProvider = cfg.realtimeCandidates.find((candidate) => {
718
+ if (candidate.provider === "openai") {
719
+ return Boolean(cfg.openaiOAuthToken || cfg.openaiKey);
720
+ }
721
+ if (candidate.provider === "azure") {
722
+ return Boolean((cfg.azureOAuthToken || cfg.azureKey) && cfg.azureEndpoint);
723
+ }
724
+ return false;
725
+ });
726
+ if (realtimeProvider) {
727
+ return { available: true, tier: 1, provider: realtimeProvider.provider };
728
+ }
729
+
730
+ if (cfg.provider === "claude" && (cfg.claudeKey || cfg.claudeOAuthToken)) {
731
+ return { available: true, tier: 2, provider: "claude" };
732
+ }
733
+ if (cfg.provider === "gemini" && (cfg.geminiKey || cfg.geminiOAuthToken)) {
734
+ return { available: true, tier: 2, provider: "gemini" };
735
+ }
736
+ if (cfg.fallbackMode === "disabled") {
737
+ return {
738
+ available: false,
739
+ tier: null,
740
+ reason: `Voice provider "${cfg.provider}" is not configured and fallback is disabled`,
741
+ };
742
+ }
743
+ // Tier 2 fallback available when enabled
744
+ return { available: true, tier: 2, provider: "fallback" };
745
+ }
746
+
747
+ /**
748
+ * Create an ephemeral token for OpenAI Realtime API (WebRTC).
749
+ * Returns { token, expiresAt, model, voiceId, provider }
750
+ */
751
+ export async function createEphemeralToken(toolDefinitions = [], callContext = {}) {
752
+ const cfg = getVoiceConfig();
753
+ const candidates = cfg.realtimeCandidates.filter((entry) => {
754
+ if (entry.provider === "openai") return Boolean(cfg.openaiOAuthToken || cfg.openaiKey);
755
+ if (entry.provider === "azure") return Boolean((cfg.azureOAuthToken || cfg.azureKey) && cfg.azureEndpoint);
756
+ return false;
757
+ });
758
+
759
+ if (!candidates.length) {
760
+ throw new Error(
761
+ `Realtime WebRTC token is unavailable for provider "${cfg.provider}". ` +
762
+ "Use VOICE_PROVIDER=openai|azure and configure OAuth/API credentials for Tier 1 realtime voice.",
763
+ );
764
+ }
765
+
766
+ const maxAttempts = cfg.failover.enabled
767
+ ? Math.min(Math.max(cfg.failover.maxAttempts, 1), candidates.length)
768
+ : 1;
769
+
770
+ let lastError = null;
771
+ for (let index = 0; index < maxAttempts; index++) {
772
+ const candidate = candidates[index];
773
+ try {
774
+ if (candidate.provider === "azure") {
775
+ return await createAzureEphemeralToken(cfg, toolDefinitions, callContext, candidate);
776
+ }
777
+ return await createOpenAIEphemeralToken(cfg, toolDefinitions, callContext, candidate);
778
+ } catch (err) {
779
+ lastError = err;
780
+ const canRetry = cfg.failover.enabled && index + 1 < maxAttempts && shouldFailoverRealtimeError(err);
781
+ if (!canRetry) break;
782
+ }
783
+ }
784
+
785
+ throw lastError || new Error("Failed to create realtime token");
786
+ }
787
+
788
+ async function createOpenAIEphemeralToken(cfg, toolDefinitions = [], callContext = {}, candidate = {}) {
789
+ const credential = String(cfg.openaiOAuthToken || cfg.openaiKey || "").trim();
790
+ if (!credential) {
791
+ throw new Error("OpenAI voice credential not configured (OAuth token or API key required)");
792
+ }
793
+
794
+ const context = sanitizeVoiceCallContext(callContext);
795
+ const instructions = buildSessionScopedInstructions(cfg.instructions, context);
796
+ const model = String(candidate?.model || cfg.model || OPENAI_REALTIME_MODEL).trim() || OPENAI_REALTIME_MODEL;
797
+ const voiceId = String(candidate?.voiceId || cfg.voiceId || "alloy").trim() || "alloy";
798
+
799
+ const sessionConfig = {
800
+ model,
801
+ voice: voiceId,
802
+ instructions,
803
+ tool_choice: resolveToolChoice(toolDefinitions, context),
804
+ turn_detection: {
805
+ type: cfg.turnDetection,
806
+ ...(cfg.turnDetection === "server_vad" ? {
807
+ threshold: 0.5,
808
+ prefix_padding_ms: 300,
809
+ silence_duration_ms: 500,
810
+ } : {}),
811
+ ...(cfg.turnDetection === "semantic_vad" ? {
812
+ eagerness: "medium",
813
+ } : {}),
814
+ },
815
+ input_audio_transcription: { model: "gpt-4o-mini-transcribe" },
816
+ tools: toolDefinitions,
817
+ };
818
+
819
+ const response = await fetch(`${OPENAI_REALTIME_URL}/sessions`, {
820
+ method: "POST",
821
+ headers: {
822
+ Authorization: `Bearer ${credential}`,
823
+ "Content-Type": "application/json",
824
+ },
825
+ body: JSON.stringify(sessionConfig),
826
+ });
827
+
828
+ if (!response.ok) {
829
+ const errorText = await buildProviderErrorDetails(response, "unknown");
830
+ throw new Error(`OpenAI Realtime session failed (${response.status}): ${errorText}`);
831
+ }
832
+
833
+ const data = await response.json();
834
+ return {
835
+ token: data.client_secret?.value || data.token,
836
+ expiresAt: data.client_secret?.expires_at || (Date.now() / 1000 + 60),
837
+ model,
838
+ voiceId,
839
+ provider: "openai",
840
+ sessionConfig,
841
+ callContext: context,
842
+ };
843
+ }
844
+
845
+ /**
846
+ * Create an ephemeral token for Azure OpenAI Realtime API.
847
+ */
848
+ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext = {}, candidate = {}) {
849
+ if ((!cfg.azureKey && !cfg.azureOAuthToken) || !cfg.azureEndpoint) {
850
+ throw new Error("Azure OpenAI Realtime not configured (need endpoint + key)");
851
+ }
852
+
853
+ const context = sanitizeVoiceCallContext(callContext);
854
+ const instructions = buildSessionScopedInstructions(cfg.instructions, context);
855
+ const endpoint = cfg.azureEndpoint.replace(/\/+$/, "");
856
+ const deployment =
857
+ String(candidate?.azureDeployment || cfg.azureDeployment || "").trim()
858
+ || "gpt-realtime-1.5";
859
+ const voiceId = String(candidate?.voiceId || cfg.voiceId || "alloy").trim() || "alloy";
860
+ const url = `${endpoint}/openai/realtime/sessions?api-version=${AZURE_API_VERSION}&deployment=${deployment}`;
861
+
862
+ const headers = {
863
+ "Content-Type": "application/json",
864
+ };
865
+ if (cfg.azureOAuthToken) {
866
+ headers.Authorization = `Bearer ${cfg.azureOAuthToken}`;
867
+ } else {
868
+ headers["api-key"] = cfg.azureKey;
869
+ }
870
+
871
+ const sessionConfig = {
872
+ model: deployment,
873
+ voice: voiceId,
874
+ instructions,
875
+ tool_choice: resolveToolChoice(toolDefinitions, context),
876
+ turn_detection: {
877
+ type: cfg.turnDetection,
878
+ ...(cfg.turnDetection === "server_vad" ? {
879
+ threshold: 0.5,
880
+ prefix_padding_ms: 300,
881
+ silence_duration_ms: 500,
882
+ } : {}),
883
+ },
884
+ input_audio_transcription: { model: "whisper-1" },
885
+ tools: toolDefinitions,
886
+ };
887
+
888
+ const response = await fetch(url, {
889
+ method: "POST",
890
+ headers,
891
+ body: JSON.stringify(sessionConfig),
892
+ });
893
+
894
+ if (!response.ok) {
895
+ const errorText = await buildProviderErrorDetails(response, "unknown");
896
+ throw new Error(`Azure Realtime session failed (${response.status}): ${errorText}`);
897
+ }
898
+
899
+ const data = await response.json();
900
+ return {
901
+ token: data.client_secret?.value || data.token,
902
+ expiresAt: data.client_secret?.expires_at || (Date.now() / 1000 + 60),
903
+ model: deployment,
904
+ voiceId,
905
+ provider: "azure",
906
+ sessionConfig,
907
+ azureEndpoint: endpoint,
908
+ azureDeployment: deployment,
909
+ callContext: context,
910
+ };
911
+ }
912
+
913
+ /**
914
+ * Analyze a camera/screen frame and return a concise summary.
915
+ * @param {string} frameDataUrl - data URL (image/jpeg|png|webp)
916
+ * @param {object} options - { source, context, prompt }
917
+ * @returns {Promise<{ summary: string, provider: string, model: string }>}
918
+ */
919
+ export async function analyzeVisionFrame(frameDataUrl, options = {}) {
920
+ const frame = parseImageDataUrl(frameDataUrl);
921
+ const dataUrl = frame.dataUrl;
922
+
923
+ const cfg = getVoiceConfig();
924
+ const source = String(options?.source || "screen").trim().toLowerCase() || "screen";
925
+ const callContext = sanitizeVoiceCallContext(options?.context || {});
926
+ const model =
927
+ String(
928
+ options?.model
929
+ || options?.visionModel
930
+ || cfg.visionModel
931
+ || process.env.VOICE_VISION_MODEL
932
+ || OPENAI_DEFAULT_VISION_MODEL,
933
+ ).trim();
934
+ const prompt = String(options?.prompt || "").trim()
935
+ || "Summarize what is visible in this live frame for a coding assistant. Focus on code, terminal output, errors, UI labels, and actionable context.";
936
+
937
+ const contextText = [
938
+ `Frame source: ${source}.`,
939
+ `Bound chat session: ${callContext.sessionId || "none"}.`,
940
+ callContext.executor ? `Preferred executor: ${callContext.executor}.` : "",
941
+ callContext.mode ? `Preferred mode: ${callContext.mode}.` : "",
942
+ callContext.model ? `Preferred model override: ${callContext.model}.` : "",
943
+ "Respond in 1-3 concise sentences. Include likely next action if obvious.",
944
+ ]
945
+ .filter(Boolean)
946
+ .join("\n");
947
+
948
+ const preferredProviders = [];
949
+ const pushProvider = (value) => {
950
+ const provider = String(value || "").trim().toLowerCase();
951
+ if (!provider || preferredProviders.includes(provider)) return;
952
+ preferredProviders.push(provider);
953
+ };
954
+ pushProvider(cfg.provider);
955
+ if (cfg.openaiKey) pushProvider("openai");
956
+ if (cfg.azureKey && cfg.azureEndpoint) pushProvider("azure");
957
+ if (cfg.claudeKey) pushProvider("claude");
958
+ if (cfg.geminiKey) pushProvider("gemini");
959
+
960
+ let lastError = null;
961
+ for (const provider of preferredProviders) {
962
+ try {
963
+ if (provider === "openai" && cfg.openaiKey) {
964
+ return await analyzeVisionWithOpenAI(
965
+ dataUrl,
966
+ model,
967
+ prompt,
968
+ contextText,
969
+ cfg,
970
+ );
971
+ }
972
+ if (provider === "azure" && cfg.azureKey && cfg.azureEndpoint) {
973
+ return await analyzeVisionWithAzure(
974
+ dataUrl,
975
+ model,
976
+ prompt,
977
+ contextText,
978
+ cfg,
979
+ );
980
+ }
981
+ if (provider === "claude" && cfg.claudeKey) {
982
+ return await analyzeVisionWithClaude(
983
+ frame,
984
+ model,
985
+ prompt,
986
+ contextText,
987
+ cfg,
988
+ );
989
+ }
990
+ if (provider === "gemini" && cfg.geminiKey) {
991
+ return await analyzeVisionWithGemini(
992
+ frame,
993
+ model,
994
+ prompt,
995
+ contextText,
996
+ cfg,
997
+ );
998
+ }
999
+ } catch (err) {
1000
+ lastError = err;
1001
+ }
1002
+ }
1003
+
1004
+ if (lastError) {
1005
+ throw new Error(`Vision request failed: ${lastError.message}`);
1006
+ }
1007
+
1008
+ throw new Error(
1009
+ "Vision unavailable: configure OPENAI, Azure, Anthropic, or Gemini voice credentials",
1010
+ );
1011
+ }
1012
+
1013
+ /**
1014
+ * Execute a voice tool call server-side.
1015
+ * Returns { result: string, error?: string }
1016
+ */
1017
+ export async function executeVoiceTool(toolName, toolArgs, context = {}) {
1018
+ try {
1019
+ // Import voice-tools lazily to avoid circular deps
1020
+ const { executeToolCall } = await import("./voice-tools.mjs");
1021
+ return await executeToolCall(toolName, toolArgs, context);
1022
+ } catch (err) {
1023
+ console.error(`[voice-relay] tool execution error (${toolName}):`, err.message);
1024
+ return { result: null, error: err.message };
1025
+ }
1026
+ }
1027
+
1028
+ /**
1029
+ * Get the full tool definitions array for voice sessions.
1030
+ */
1031
+ export async function getVoiceToolDefinitions(options = {}) {
1032
+ try {
1033
+ const { getToolDefinitions } = await import("./voice-tools.mjs");
1034
+ const allTools = getToolDefinitions();
1035
+ const delegateOnly = options?.delegateOnly === true;
1036
+ if (!delegateOnly) return allTools;
1037
+ return allTools.filter((tool) => tool?.name === "delegate_to_agent");
1038
+ } catch (err) {
1039
+ console.error("[voice-relay] failed to load voice tool definitions:", err.message);
1040
+ return [];
1041
+ }
1042
+ }
1043
+
1044
+ /**
1045
+ * Get the WebRTC connection URL for the client.
1046
+ */
1047
+ export function getRealtimeConnectionInfo() {
1048
+ const cfg = getVoiceConfig();
1049
+ const candidate = cfg.realtimeCandidates.find((entry) => {
1050
+ if (entry.provider === "openai") return Boolean(cfg.openaiOAuthToken || cfg.openaiKey);
1051
+ if (entry.provider === "azure") return Boolean((cfg.azureOAuthToken || cfg.azureKey) && cfg.azureEndpoint);
1052
+ return false;
1053
+ });
1054
+ if (!candidate) {
1055
+ return {
1056
+ provider: cfg.provider,
1057
+ url: null,
1058
+ model: cfg.model,
1059
+ tier: 2,
1060
+ };
1061
+ }
1062
+
1063
+ if (candidate.provider === "azure") {
1064
+ const endpoint = cfg.azureEndpoint.replace(/\/+$/, "");
1065
+ const deployment =
1066
+ String(candidate?.azureDeployment || cfg.azureDeployment || "").trim()
1067
+ || "gpt-realtime-1.5";
1068
+ return {
1069
+ provider: "azure",
1070
+ url: `${endpoint}/openai/realtime?api-version=${AZURE_API_VERSION}&deployment=${deployment}`,
1071
+ model: deployment,
1072
+ };
1073
+ }
1074
+ const model = String(candidate?.model || cfg.model || OPENAI_REALTIME_MODEL).trim() || OPENAI_REALTIME_MODEL;
1075
+ return {
1076
+ provider: "openai",
1077
+ url: `${OPENAI_REALTIME_URL}?model=${model}`,
1078
+ model,
1079
+ };
1080
+ }
1081
+
1082
+ // ── Voice action dispatch (direct JavaScript, no MCP bridge) ────────────────
1083
+
1084
+ /**
1085
+ * Dispatch a voice action intent through the action dispatcher.
1086
+ * The voice model returns JSON action objects; Bosun processes them
1087
+ * directly via JavaScript and returns structured results.
1088
+ *
1089
+ * @param {Object} intent — { action, params, id? }
1090
+ * @param {Object} context — { sessionId, executor, mode, model }
1091
+ * @returns {Promise<Object>} Structured result
1092
+ */
1093
+ export async function dispatchVoiceActionIntent(intent, context = {}) {
1094
+ try {
1095
+ const { dispatchVoiceAction } = await import("./voice-action-dispatcher.mjs");
1096
+ return await dispatchVoiceAction(intent, context);
1097
+ } catch (err) {
1098
+ console.error("[voice-relay] action dispatch error:", err.message);
1099
+ return {
1100
+ ok: false,
1101
+ action: intent?.action || "",
1102
+ data: null,
1103
+ error: err.message,
1104
+ durationMs: 0,
1105
+ };
1106
+ }
1107
+ }
1108
+
1109
+ /**
1110
+ * Dispatch multiple voice action intents.
1111
+ * @param {Array} intents
1112
+ * @param {Object} context
1113
+ * @returns {Promise<Array>}
1114
+ */
1115
+ export async function dispatchVoiceActionIntents(intents, context = {}) {
1116
+ try {
1117
+ const { dispatchVoiceActions } = await import("./voice-action-dispatcher.mjs");
1118
+ return await dispatchVoiceActions(intents, context);
1119
+ } catch (err) {
1120
+ console.error("[voice-relay] batch action dispatch error:", err.message);
1121
+ return [];
1122
+ }
1123
+ }
1124
+
1125
+ /**
1126
+ * Get the action manifest for voice prompt injection.
1127
+ * @returns {string}
1128
+ */
1129
+ export async function getVoiceActionManifest() {
1130
+ try {
1131
+ const { getVoiceActionPromptSection } = await import("./voice-action-dispatcher.mjs");
1132
+ return getVoiceActionPromptSection();
1133
+ } catch (err) {
1134
+ console.error("[voice-relay] action manifest error:", err.message);
1135
+ return "";
1136
+ }
1137
+ }
1138
+
1139
+ /**
1140
+ * List all available voice actions.
1141
+ * @returns {Promise<string[]>}
1142
+ */
1143
+ export async function listVoiceActions() {
1144
+ try {
1145
+ const { listAvailableActions } = await import("./voice-action-dispatcher.mjs");
1146
+ return listAvailableActions();
1147
+ } catch {
1148
+ return [];
1149
+ }
1150
+ }
1151
+
1152
+ /**
1153
+ * Build the full voice agent prompt by resolving the voice prompt template
1154
+ * and injecting the action manifest.
1155
+ *
1156
+ * @param {Object} options — { compact?, customInstructions? }
1157
+ * @returns {Promise<string>}
1158
+ */
1159
+ export async function buildVoiceAgentPrompt(options = {}) {
1160
+ const cfg = getVoiceConfig();
1161
+ let baseInstructions = cfg.instructions || "";
1162
+
1163
+ // Try to load the customizable voice prompt from the prompt library
1164
+ try {
1165
+ const { resolveAgentPrompts, renderPromptTemplate, getDefaultPromptTemplate } = await import("./agent-prompts.mjs");
1166
+ const promptKey = options.compact ? "voiceAgentCompact" : "voiceAgent";
1167
+
1168
+ // Try workspace prompt first, fall back to default
1169
+ let template = "";
1170
+ try {
1171
+ const resolved = resolveAgentPrompts(null, process.cwd(), {});
1172
+ template = resolved.prompts?.[promptKey] || "";
1173
+ } catch {
1174
+ template = getDefaultPromptTemplate(promptKey) || "";
1175
+ }
1176
+
1177
+ if (template) {
1178
+ const manifest = await getVoiceActionManifest();
1179
+ baseInstructions = renderPromptTemplate(template, {
1180
+ VOICE_ACTION_MANIFEST: manifest,
1181
+ });
1182
+ }
1183
+ } catch {
1184
+ // Fall back to config instructions
1185
+ }
1186
+
1187
+ // Allow custom instructions override
1188
+ if (options.customInstructions) {
1189
+ baseInstructions = String(options.customInstructions);
1190
+ }
1191
+
1192
+ return baseInstructions;
1193
+ }
1194
+