bosun 0.36.2 → 0.36.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agent-prompts.mjs +95 -0
- package/analyze-agent-work-helpers.mjs +308 -0
- package/analyze-agent-work.mjs +926 -0
- package/autofix.mjs +2 -0
- package/bosun.schema.json +101 -3
- package/codex-shell.mjs +85 -10
- package/desktop/main.mjs +871 -48
- package/desktop/preload.mjs +54 -1
- package/desktop-shortcut.mjs +90 -11
- package/git-editor-fix.mjs +273 -0
- package/mcp-registry.mjs +579 -0
- package/meeting-workflow-service.mjs +631 -0
- package/monitor.mjs +18 -103
- package/package.json +21 -2
- package/primary-agent.mjs +32 -12
- package/session-tracker.mjs +68 -0
- package/setup-web-server.mjs +20 -10
- package/setup.mjs +376 -83
- package/startup-service.mjs +51 -6
- package/stream-resilience.mjs +17 -7
- package/ui/app.js +164 -4
- package/ui/components/agent-selector.js +145 -1
- package/ui/components/chat-view.js +161 -15
- package/ui/components/session-list.js +2 -2
- package/ui/components/shared.js +188 -15
- package/ui/modules/icons.js +13 -0
- package/ui/modules/utils.js +44 -0
- package/ui/modules/voice-client-sdk.js +733 -0
- package/ui/modules/voice-overlay.js +128 -15
- package/ui/modules/voice.js +15 -6
- package/ui/setup.html +281 -81
- package/ui/styles/components.css +99 -3
- package/ui/styles/sessions.css +122 -14
- package/ui/styles.css +14 -0
- package/ui/tabs/agents.js +1 -1
- package/ui/tabs/chat.js +123 -14
- package/ui/tabs/control.js +16 -22
- package/ui/tabs/dashboard.js +85 -8
- package/ui/tabs/library.js +113 -17
- package/ui/tabs/settings.js +116 -2
- package/ui/tabs/tasks.js +388 -39
- package/ui/tabs/telemetry.js +0 -1
- package/ui/tabs/workflows.js +4 -0
- package/ui-server.mjs +400 -22
- package/update-check.mjs +41 -13
- package/voice-action-dispatcher.mjs +844 -0
- package/voice-agents-sdk.mjs +664 -0
- package/voice-auth-manager.mjs +164 -0
- package/voice-relay.mjs +1194 -0
- package/voice-tools.mjs +914 -0
- package/workflow-templates/agents.mjs +6 -2
- package/workflow-templates/github.mjs +154 -12
- package/workflow-templates.mjs +3 -0
- package/github-reconciler.mjs +0 -506
- package/merge-strategy.mjs +0 -1210
- package/pr-cleanup-daemon.mjs +0 -992
- package/workspace-reaper.mjs +0 -405
|
@@ -0,0 +1,664 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* voice-agents-sdk.mjs — Multi-provider voice agent SDK integration.
|
|
3
|
+
*
|
|
4
|
+
* Provider hierarchy (each uses its native SDK when available):
|
|
5
|
+
* 1. OpenAI → @openai/agents RealtimeAgent (WebRTC / WebSocket)
|
|
6
|
+
* 2. Azure → @openai/agents RealtimeAgent (same SDK, Azure endpoint)
|
|
7
|
+
* 3. Gemini → @google/genai Live API (WebSocket streaming)
|
|
8
|
+
* 4. Claude → No native voice SDK → falls back to Bosun Tier 2
|
|
9
|
+
* 5. Bosun legacy → Custom WebRTC / Web Speech API fallback
|
|
10
|
+
*
|
|
11
|
+
* Each provider exposes a unified interface:
|
|
12
|
+
* - createSdkSession(provider, config) → session handle
|
|
13
|
+
* - getSdkCapabilities(provider) → { hasNativeSdk, transport, ... }
|
|
14
|
+
* - getSdkToolDefinitions(provider, tools) → provider-specific tool format
|
|
15
|
+
*
|
|
16
|
+
* @module voice-agents-sdk
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { loadConfig } from "./config.mjs";
|
|
20
|
+
import { resolveVoiceOAuthToken } from "./voice-auth-manager.mjs";
|
|
21
|
+
|
|
22
|
+
// ── Module-scope lazy imports ───────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
let _openaiAgentsModule = null;
|
|
25
|
+
let _googleGenaiModule = null;
|
|
26
|
+
|
|
27
|
+
async function getOpenAIAgents() {
|
|
28
|
+
if (!_openaiAgentsModule) {
|
|
29
|
+
try {
|
|
30
|
+
_openaiAgentsModule = await import("@openai/agents/realtime");
|
|
31
|
+
} catch (err) {
|
|
32
|
+
console.warn("[voice-agents-sdk] @openai/agents/realtime not available:", err.message);
|
|
33
|
+
_openaiAgentsModule = null;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return _openaiAgentsModule;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function getGoogleGenAI() {
|
|
40
|
+
if (!_googleGenaiModule) {
|
|
41
|
+
try {
|
|
42
|
+
_googleGenaiModule = await import("@google/genai");
|
|
43
|
+
} catch (err) {
|
|
44
|
+
console.warn("[voice-agents-sdk] @google/genai not available:", err.message);
|
|
45
|
+
_googleGenaiModule = null;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return _googleGenaiModule;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// ── Constants ───────────────────────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
const OPENAI_REALTIME_MODEL = "gpt-realtime-1.5";
|
|
54
|
+
const GEMINI_LIVE_MODEL = "gemini-2.5-flash-native-audio-preview-12-2025";
|
|
55
|
+
|
|
56
|
+
const SDK_PROVIDERS = Object.freeze({
|
|
57
|
+
openai: {
|
|
58
|
+
name: "openai",
|
|
59
|
+
sdkPackage: "@openai/agents",
|
|
60
|
+
hasNativeSdk: true,
|
|
61
|
+
transport: "webrtc",
|
|
62
|
+
tier: 1,
|
|
63
|
+
},
|
|
64
|
+
azure: {
|
|
65
|
+
name: "azure",
|
|
66
|
+
sdkPackage: "@openai/agents",
|
|
67
|
+
hasNativeSdk: true,
|
|
68
|
+
transport: "webrtc",
|
|
69
|
+
tier: 1,
|
|
70
|
+
},
|
|
71
|
+
gemini: {
|
|
72
|
+
name: "gemini",
|
|
73
|
+
sdkPackage: "@google/genai",
|
|
74
|
+
hasNativeSdk: true,
|
|
75
|
+
transport: "websocket",
|
|
76
|
+
tier: 1,
|
|
77
|
+
},
|
|
78
|
+
claude: {
|
|
79
|
+
name: "claude",
|
|
80
|
+
sdkPackage: null,
|
|
81
|
+
hasNativeSdk: false,
|
|
82
|
+
transport: "fallback",
|
|
83
|
+
tier: 2,
|
|
84
|
+
},
|
|
85
|
+
fallback: {
|
|
86
|
+
name: "fallback",
|
|
87
|
+
sdkPackage: null,
|
|
88
|
+
hasNativeSdk: false,
|
|
89
|
+
transport: "fallback",
|
|
90
|
+
tier: 2,
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
// ── Capability detection ────────────────────────────────────────────────────
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Check if a provider's native SDK is available and loadable.
|
|
98
|
+
* @param {string} provider
|
|
99
|
+
* @returns {Promise<{ available: boolean, reason?: string, info: object }>}
|
|
100
|
+
*/
|
|
101
|
+
export async function checkSdkAvailability(provider) {
|
|
102
|
+
const normalized = String(provider || "").trim().toLowerCase();
|
|
103
|
+
const info = SDK_PROVIDERS[normalized] || SDK_PROVIDERS.fallback;
|
|
104
|
+
|
|
105
|
+
if (!info.hasNativeSdk) {
|
|
106
|
+
return {
|
|
107
|
+
available: false,
|
|
108
|
+
reason: `No native voice SDK for ${normalized}`,
|
|
109
|
+
info,
|
|
110
|
+
fallbackTo: "bosun-legacy",
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (normalized === "openai" || normalized === "azure") {
|
|
115
|
+
const mod = await getOpenAIAgents();
|
|
116
|
+
if (!mod || !mod.RealtimeAgent || !mod.RealtimeSession) {
|
|
117
|
+
return {
|
|
118
|
+
available: false,
|
|
119
|
+
reason: "@openai/agents/realtime not loadable",
|
|
120
|
+
info,
|
|
121
|
+
fallbackTo: "bosun-legacy",
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
return { available: true, info };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (normalized === "gemini") {
|
|
128
|
+
const mod = await getGoogleGenAI();
|
|
129
|
+
if (!mod) {
|
|
130
|
+
return {
|
|
131
|
+
available: false,
|
|
132
|
+
reason: "@google/genai not loadable",
|
|
133
|
+
info,
|
|
134
|
+
fallbackTo: "bosun-legacy",
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
return { available: true, info };
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
available: false,
|
|
142
|
+
reason: `Unknown provider: ${normalized}`,
|
|
143
|
+
info: SDK_PROVIDERS.fallback,
|
|
144
|
+
fallbackTo: "bosun-legacy",
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Get SDK capabilities for all configured providers.
|
|
150
|
+
* @returns {Promise<Record<string, object>>}
|
|
151
|
+
*/
|
|
152
|
+
export async function getAllSdkCapabilities() {
|
|
153
|
+
const results = {};
|
|
154
|
+
for (const provider of Object.keys(SDK_PROVIDERS)) {
|
|
155
|
+
results[provider] = await checkSdkAvailability(provider);
|
|
156
|
+
}
|
|
157
|
+
return results;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ── OpenAI/Azure Agents SDK integration ─────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Convert Bosun voice tool definitions (OpenAI function-calling format)
|
|
164
|
+
* to @openai/agents tool() format for use with RealtimeAgent.
|
|
165
|
+
*
|
|
166
|
+
* Since the Agents SDK tool() helper requires Zod schemas and execute functions,
|
|
167
|
+
* and our tools execute server-side via voice-tools.mjs, we keep tool definitions
|
|
168
|
+
* in OpenAI format and pass them as raw config to the RealtimeSession.
|
|
169
|
+
*
|
|
170
|
+
* @param {Array} bosunTools — Array of { type, name, description, parameters }
|
|
171
|
+
* @returns {Array} tools in format compatible with RealtimeAgent
|
|
172
|
+
*/
|
|
173
|
+
export function convertToolsForAgentsSdk(bosunTools) {
|
|
174
|
+
if (!Array.isArray(bosunTools)) return [];
|
|
175
|
+
return bosunTools.map((tool) => ({
|
|
176
|
+
type: "function",
|
|
177
|
+
name: tool.name,
|
|
178
|
+
description: tool.description || "",
|
|
179
|
+
parameters: tool.parameters || { type: "object", properties: {} },
|
|
180
|
+
}));
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Create an OpenAI Agents SDK RealtimeAgent for voice sessions.
|
|
185
|
+
* @param {object} options
|
|
186
|
+
* @returns {Promise<{ agent: object, sessionFactory: Function } | null>}
|
|
187
|
+
*/
|
|
188
|
+
export async function createOpenAIRealtimeAgent(options = {}) {
|
|
189
|
+
const mod = await getOpenAIAgents();
|
|
190
|
+
if (!mod || !mod.RealtimeAgent) {
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const { RealtimeAgent } = mod;
|
|
195
|
+
|
|
196
|
+
const instructions = options.instructions || `You are Bosun, a helpful voice assistant for the VirtEngine development platform.
|
|
197
|
+
You help developers manage tasks, steer coding agents, monitor builds, and navigate the workspace.
|
|
198
|
+
Be concise and conversational. When users ask about code or tasks, use the available tools.
|
|
199
|
+
For complex operations like writing code or creating PRs, delegate to the appropriate agent.`;
|
|
200
|
+
|
|
201
|
+
// If caller provided enrichInstructions, attempt to inject the action manifest
|
|
202
|
+
let enrichedInstructions = instructions;
|
|
203
|
+
if (options.enrichInstructions !== false) {
|
|
204
|
+
try {
|
|
205
|
+
const { buildVoiceAgentPrompt } = await import("./voice-relay.mjs");
|
|
206
|
+
enrichedInstructions = await buildVoiceAgentPrompt({
|
|
207
|
+
compact: options.compact || false,
|
|
208
|
+
customInstructions: options.instructions || undefined,
|
|
209
|
+
});
|
|
210
|
+
} catch {
|
|
211
|
+
// Fall back to base instructions
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
if (!enrichedInstructions || !enrichedInstructions.trim()) {
|
|
215
|
+
enrichedInstructions = instructions;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const agent = new RealtimeAgent({
|
|
219
|
+
name: options.name || "Bosun Voice Agent",
|
|
220
|
+
instructions: enrichedInstructions,
|
|
221
|
+
tools: options.tools || [],
|
|
222
|
+
handoffs: options.handoffs || [],
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
agent,
|
|
227
|
+
provider: "openai",
|
|
228
|
+
sdkVersion: mod.VERSION || "unknown",
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Create a RealtimeSession from an agent with provider-specific config.
|
|
234
|
+
* @param {object} agent — RealtimeAgent instance
|
|
235
|
+
* @param {string} provider — "openai" or "azure"
|
|
236
|
+
* @param {object} config — Voice config from getVoiceConfig()
|
|
237
|
+
* @param {object} options — Session options
|
|
238
|
+
* @returns {Promise<object>} RealtimeSession
|
|
239
|
+
*/
|
|
240
|
+
export async function createRealtimeSession(agent, provider, config = {}, options = {}) {
|
|
241
|
+
const mod = await getOpenAIAgents();
|
|
242
|
+
if (!mod || !mod.RealtimeSession) {
|
|
243
|
+
throw new Error("@openai/agents/realtime not available");
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
const { RealtimeSession } = mod;
|
|
247
|
+
|
|
248
|
+
const model = String(
|
|
249
|
+
options.model || config.model || OPENAI_REALTIME_MODEL,
|
|
250
|
+
).trim() || OPENAI_REALTIME_MODEL;
|
|
251
|
+
|
|
252
|
+
const voiceId = String(
|
|
253
|
+
options.voiceId || config.voiceId || "alloy",
|
|
254
|
+
).trim() || "alloy";
|
|
255
|
+
|
|
256
|
+
const turnDetection = String(
|
|
257
|
+
options.turnDetection || config.turnDetection || "server_vad",
|
|
258
|
+
).trim();
|
|
259
|
+
|
|
260
|
+
const sessionConfig = {
|
|
261
|
+
model,
|
|
262
|
+
config: {
|
|
263
|
+
outputModalities: ["text", "audio"],
|
|
264
|
+
audio: {
|
|
265
|
+
input: {
|
|
266
|
+
format: "pcm16",
|
|
267
|
+
transcription: {
|
|
268
|
+
model: "gpt-4o-mini-transcribe",
|
|
269
|
+
},
|
|
270
|
+
turnDetection: {
|
|
271
|
+
type: turnDetection,
|
|
272
|
+
...(turnDetection === "server_vad"
|
|
273
|
+
? { threshold: 0.5, prefix_padding_ms: 300, silence_duration_ms: 500 }
|
|
274
|
+
: {}),
|
|
275
|
+
...(turnDetection === "semantic_vad"
|
|
276
|
+
? { eagerness: "medium" }
|
|
277
|
+
: {}),
|
|
278
|
+
},
|
|
279
|
+
},
|
|
280
|
+
output: {
|
|
281
|
+
format: "pcm16",
|
|
282
|
+
voice: voiceId,
|
|
283
|
+
},
|
|
284
|
+
},
|
|
285
|
+
},
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
const session = new RealtimeSession(agent, sessionConfig);
|
|
289
|
+
|
|
290
|
+
return {
|
|
291
|
+
session,
|
|
292
|
+
provider,
|
|
293
|
+
model,
|
|
294
|
+
voiceId,
|
|
295
|
+
transport: options.transport || "webrtc",
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Connect a RealtimeSession to the provider.
|
|
301
|
+
* @param {object} sessionHandle — from createRealtimeSession()
|
|
302
|
+
* @param {object} config — Voice config with credentials
|
|
303
|
+
* @returns {Promise<void>}
|
|
304
|
+
*/
|
|
305
|
+
export async function connectRealtimeSession(sessionHandle, config = {}) {
|
|
306
|
+
const { session, provider } = sessionHandle;
|
|
307
|
+
|
|
308
|
+
const connectOpts = {};
|
|
309
|
+
|
|
310
|
+
if (provider === "azure") {
|
|
311
|
+
const credential = String(
|
|
312
|
+
config.azureOAuthToken || config.azureKey || "",
|
|
313
|
+
).trim();
|
|
314
|
+
if (!credential) {
|
|
315
|
+
throw new Error("Azure voice credential not configured");
|
|
316
|
+
}
|
|
317
|
+
const endpoint = String(config.azureEndpoint || "").trim().replace(/\/+$/, "");
|
|
318
|
+
const deployment = String(
|
|
319
|
+
config.azureDeployment || "gpt-realtime-1.5",
|
|
320
|
+
).trim();
|
|
321
|
+
connectOpts.apiKey = credential;
|
|
322
|
+
connectOpts.url = `${endpoint}/openai/realtime?api-version=2025-04-01-preview&deployment=${deployment}`;
|
|
323
|
+
} else {
|
|
324
|
+
// OpenAI
|
|
325
|
+
const credential = String(
|
|
326
|
+
config.openaiOAuthToken || config.openaiKey || "",
|
|
327
|
+
).trim();
|
|
328
|
+
if (!credential) {
|
|
329
|
+
throw new Error("OpenAI voice credential not configured");
|
|
330
|
+
}
|
|
331
|
+
connectOpts.apiKey = credential;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
await session.connect(connectOpts);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// ── Gemini Live API integration ─────────────────────────────────────────────
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Create a Gemini Live session using @google/genai.
|
|
341
|
+
* Gemini Live uses WebSocket-based streaming (not WebRTC).
|
|
342
|
+
*
|
|
343
|
+
* @param {object} options
|
|
344
|
+
* @returns {Promise<object | null>}
|
|
345
|
+
*/
|
|
346
|
+
export async function createGeminiLiveSession(options = {}) {
|
|
347
|
+
const genai = await getGoogleGenAI();
|
|
348
|
+
if (!genai) return null;
|
|
349
|
+
|
|
350
|
+
const apiKey = String(
|
|
351
|
+
options.geminiKey || options.apiKey || "",
|
|
352
|
+
).trim();
|
|
353
|
+
if (!apiKey) {
|
|
354
|
+
throw new Error("Gemini API key not configured for Live API");
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
const model = String(
|
|
358
|
+
options.model || GEMINI_LIVE_MODEL,
|
|
359
|
+
).trim() || GEMINI_LIVE_MODEL;
|
|
360
|
+
|
|
361
|
+
const instructions = options.instructions || `You are Bosun, a helpful voice assistant for the VirtEngine development platform.
|
|
362
|
+
You help developers manage tasks, steer coding agents, monitor builds, and navigate the workspace.
|
|
363
|
+
Be concise and conversational.`;
|
|
364
|
+
|
|
365
|
+
// Enrich with action manifest for Gemini too
|
|
366
|
+
let enrichedInstructions = instructions;
|
|
367
|
+
if (options.enrichInstructions !== false) {
|
|
368
|
+
try {
|
|
369
|
+
const { buildVoiceAgentPrompt } = await import("./voice-relay.mjs");
|
|
370
|
+
enrichedInstructions = await buildVoiceAgentPrompt({
|
|
371
|
+
compact: options.compact || false,
|
|
372
|
+
customInstructions: options.instructions || undefined,
|
|
373
|
+
});
|
|
374
|
+
} catch {
|
|
375
|
+
// Fall back to base instructions
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
if (!enrichedInstructions || !enrichedInstructions.trim()) {
|
|
379
|
+
enrichedInstructions = instructions;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const GenAIClient = genai.GoogleGenAI || genai.GoogleGenerativeAI || genai.default;
|
|
383
|
+
if (!GenAIClient) {
|
|
384
|
+
throw new Error("Could not resolve GoogleGenAI client constructor from @google/genai");
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const client = new GenAIClient({ apiKey });
|
|
388
|
+
|
|
389
|
+
// Build tool declarations for Gemini format
|
|
390
|
+
const toolDeclarations = (options.tools || []).map((tool) => ({
|
|
391
|
+
name: tool.name,
|
|
392
|
+
description: tool.description || "",
|
|
393
|
+
parameters: tool.parameters || { type: "object", properties: {} },
|
|
394
|
+
}));
|
|
395
|
+
|
|
396
|
+
const liveConfig = {
|
|
397
|
+
responseModalities: ["AUDIO"],
|
|
398
|
+
systemInstruction: enrichedInstructions,
|
|
399
|
+
...(toolDeclarations.length > 0
|
|
400
|
+
? { tools: [{ functionDeclarations: toolDeclarations }] }
|
|
401
|
+
: {}),
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
return {
|
|
405
|
+
client,
|
|
406
|
+
model,
|
|
407
|
+
liveConfig,
|
|
408
|
+
provider: "gemini",
|
|
409
|
+
transport: "websocket",
|
|
410
|
+
apiKey,
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Connect to Gemini Live streaming session.
|
|
414
|
+
* Returns a live session handle for send/receive.
|
|
415
|
+
*/
|
|
416
|
+
async connect() {
|
|
417
|
+
const liveSession = await client.aio?.live?.connect?.({
|
|
418
|
+
model,
|
|
419
|
+
config: liveConfig,
|
|
420
|
+
});
|
|
421
|
+
if (!liveSession) {
|
|
422
|
+
// Fallback: try the synchronous live connect
|
|
423
|
+
const liveClient = client.live || client.aio?.live;
|
|
424
|
+
if (!liveClient) {
|
|
425
|
+
throw new Error("Gemini Live API not available in this version of @google/genai");
|
|
426
|
+
}
|
|
427
|
+
return liveClient.connect({ model, config: liveConfig });
|
|
428
|
+
}
|
|
429
|
+
return liveSession;
|
|
430
|
+
},
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// ── Unified session factory ─────────────────────────────────────────────────
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Create a voice session using the best available SDK for the given provider.
|
|
438
|
+
* Falls back to { useLegacy: true } if no SDK is available.
|
|
439
|
+
*
|
|
440
|
+
* @param {string} provider — "openai", "azure", "gemini", "claude", "fallback"
|
|
441
|
+
* @param {object} voiceConfig — from getVoiceConfig()
|
|
442
|
+
* @param {object} options — { tools, instructions, callContext }
|
|
443
|
+
* @returns {Promise<object>} Session handle or { useLegacy: true, reason }
|
|
444
|
+
*/
|
|
445
|
+
export async function createSdkVoiceSession(provider, voiceConfig = {}, options = {}) {
|
|
446
|
+
const normalized = String(provider || "").trim().toLowerCase();
|
|
447
|
+
|
|
448
|
+
// ── OpenAI / Azure: use @openai/agents ──
|
|
449
|
+
if (normalized === "openai" || normalized === "azure") {
|
|
450
|
+
try {
|
|
451
|
+
const availability = await checkSdkAvailability(normalized);
|
|
452
|
+
if (!availability.available) {
|
|
453
|
+
return {
|
|
454
|
+
useLegacy: true,
|
|
455
|
+
reason: availability.reason,
|
|
456
|
+
provider: normalized,
|
|
457
|
+
};
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const agentResult = await createOpenAIRealtimeAgent({
|
|
461
|
+
instructions: voiceConfig.instructions || options.instructions,
|
|
462
|
+
tools: options.tools || [],
|
|
463
|
+
name: options.agentName || "Bosun Voice Agent",
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
if (!agentResult) {
|
|
467
|
+
return {
|
|
468
|
+
useLegacy: true,
|
|
469
|
+
reason: "Failed to create RealtimeAgent",
|
|
470
|
+
provider: normalized,
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
const sessionHandle = await createRealtimeSession(
|
|
475
|
+
agentResult.agent,
|
|
476
|
+
normalized,
|
|
477
|
+
voiceConfig,
|
|
478
|
+
{
|
|
479
|
+
model: voiceConfig.model,
|
|
480
|
+
voiceId: voiceConfig.voiceId,
|
|
481
|
+
turnDetection: voiceConfig.turnDetection,
|
|
482
|
+
},
|
|
483
|
+
);
|
|
484
|
+
|
|
485
|
+
return {
|
|
486
|
+
useLegacy: false,
|
|
487
|
+
sdk: "openai-agents",
|
|
488
|
+
provider: normalized,
|
|
489
|
+
agent: agentResult.agent,
|
|
490
|
+
session: sessionHandle.session,
|
|
491
|
+
sessionHandle,
|
|
492
|
+
model: sessionHandle.model,
|
|
493
|
+
voiceId: sessionHandle.voiceId,
|
|
494
|
+
|
|
495
|
+
async connect() {
|
|
496
|
+
await connectRealtimeSession(sessionHandle, voiceConfig);
|
|
497
|
+
},
|
|
498
|
+
|
|
499
|
+
async disconnect() {
|
|
500
|
+
try {
|
|
501
|
+
sessionHandle.session.close?.();
|
|
502
|
+
} catch {
|
|
503
|
+
// best effort
|
|
504
|
+
}
|
|
505
|
+
},
|
|
506
|
+
};
|
|
507
|
+
} catch (err) {
|
|
508
|
+
console.error(`[voice-agents-sdk] ${normalized} SDK session failed:`, err.message);
|
|
509
|
+
return {
|
|
510
|
+
useLegacy: true,
|
|
511
|
+
reason: `SDK error: ${err.message}`,
|
|
512
|
+
provider: normalized,
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// ── Gemini: use @google/genai Live API ──
|
|
518
|
+
if (normalized === "gemini") {
|
|
519
|
+
try {
|
|
520
|
+
const availability = await checkSdkAvailability("gemini");
|
|
521
|
+
if (!availability.available) {
|
|
522
|
+
return {
|
|
523
|
+
useLegacy: true,
|
|
524
|
+
reason: availability.reason,
|
|
525
|
+
provider: "gemini",
|
|
526
|
+
};
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
const geminiSession = await createGeminiLiveSession({
|
|
530
|
+
geminiKey: voiceConfig.geminiKey || voiceConfig.geminiOAuthToken,
|
|
531
|
+
model: voiceConfig.model,
|
|
532
|
+
instructions: voiceConfig.instructions || options.instructions,
|
|
533
|
+
tools: options.tools || [],
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
if (!geminiSession) {
|
|
537
|
+
return {
|
|
538
|
+
useLegacy: true,
|
|
539
|
+
reason: "Failed to create Gemini Live session",
|
|
540
|
+
provider: "gemini",
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
return {
|
|
545
|
+
useLegacy: false,
|
|
546
|
+
sdk: "google-genai-live",
|
|
547
|
+
provider: "gemini",
|
|
548
|
+
model: geminiSession.model,
|
|
549
|
+
transport: "websocket",
|
|
550
|
+
geminiSession,
|
|
551
|
+
|
|
552
|
+
async connect() {
|
|
553
|
+
const liveHandle = await geminiSession.connect();
|
|
554
|
+
this.liveHandle = liveHandle;
|
|
555
|
+
return liveHandle;
|
|
556
|
+
},
|
|
557
|
+
|
|
558
|
+
async disconnect() {
|
|
559
|
+
try {
|
|
560
|
+
this.liveHandle?.close?.();
|
|
561
|
+
} catch {
|
|
562
|
+
// best effort
|
|
563
|
+
}
|
|
564
|
+
},
|
|
565
|
+
};
|
|
566
|
+
} catch (err) {
|
|
567
|
+
console.error("[voice-agents-sdk] Gemini SDK session failed:", err.message);
|
|
568
|
+
return {
|
|
569
|
+
useLegacy: true,
|
|
570
|
+
reason: `SDK error: ${err.message}`,
|
|
571
|
+
provider: "gemini",
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// ── Claude / Fallback: no native SDK ──
|
|
577
|
+
return {
|
|
578
|
+
useLegacy: true,
|
|
579
|
+
reason: normalized === "claude"
|
|
580
|
+
? "Anthropic does not provide a real-time voice SDK"
|
|
581
|
+
: "Using Bosun browser STT/TTS fallback",
|
|
582
|
+
provider: normalized,
|
|
583
|
+
};
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// ── Provider resolution with SDK preference ────────────────────────────────
|
|
587
|
+
|
|
588
|
+
/**
|
|
589
|
+
* Resolve the best voice session across the provider chain, preferring
|
|
590
|
+
* native SDK implementations. Falls through to Bosun legacy for each
|
|
591
|
+
* provider that fails SDK initialization.
|
|
592
|
+
*
|
|
593
|
+
* @param {object} voiceConfig — from getVoiceConfig()
|
|
594
|
+
* @param {object} options — { tools, instructions, callContext }
|
|
595
|
+
* @returns {Promise<object>} Best available session or legacy fallback
|
|
596
|
+
*/
|
|
597
|
+
export async function resolveBestVoiceSession(voiceConfig = {}, options = {}) {
|
|
598
|
+
const providerChain = voiceConfig.providerChainWithFallbacks || [voiceConfig.provider || "openai"];
|
|
599
|
+
const errors = [];
|
|
600
|
+
|
|
601
|
+
for (const provider of providerChain) {
|
|
602
|
+
const session = await createSdkVoiceSession(provider, voiceConfig, options);
|
|
603
|
+
if (!session.useLegacy) {
|
|
604
|
+
return session;
|
|
605
|
+
}
|
|
606
|
+
errors.push({ provider, reason: session.reason });
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// All SDK attempts failed — return legacy fallback signal
|
|
610
|
+
return {
|
|
611
|
+
useLegacy: true,
|
|
612
|
+
reason: "All provider SDKs unavailable, using Bosun legacy voice",
|
|
613
|
+
errors,
|
|
614
|
+
provider: voiceConfig.provider || "fallback",
|
|
615
|
+
};
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
// ── Session config for client-side SDK usage ───────────────────────────────
|
|
619
|
+
|
|
620
|
+
/**
|
|
621
|
+
* Generate client-side SDK configuration data for the voice overlay.
|
|
622
|
+
* The client uses this to decide whether to initialize the Agents SDK
|
|
623
|
+
* or fall back to legacy WebRTC.
|
|
624
|
+
*
|
|
625
|
+
* @param {object} voiceConfig — from getVoiceConfig()
|
|
626
|
+
* @returns {Promise<object>} Client configuration
|
|
627
|
+
*/
|
|
628
|
+
export async function getClientSdkConfig(voiceConfig = {}) {
|
|
629
|
+
const provider = voiceConfig.provider || "fallback";
|
|
630
|
+
const availability = await checkSdkAvailability(provider);
|
|
631
|
+
|
|
632
|
+
return {
|
|
633
|
+
useSdk: availability.available,
|
|
634
|
+
provider,
|
|
635
|
+
sdkPackage: availability.info?.sdkPackage || null,
|
|
636
|
+
transport: availability.info?.transport || "fallback",
|
|
637
|
+
tier: availability.info?.tier || 2,
|
|
638
|
+
model: voiceConfig.model || OPENAI_REALTIME_MODEL,
|
|
639
|
+
voiceId: voiceConfig.voiceId || "alloy",
|
|
640
|
+
turnDetection: voiceConfig.turnDetection || "server_vad",
|
|
641
|
+
fallbackReason: availability.available ? null : availability.reason,
|
|
642
|
+
};
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
// ── Health check for active SDK sessions ───────────────────────────────────
|
|
646
|
+
|
|
647
|
+
/**
|
|
648
|
+
* Diagnostic info for all SDK integrations.
|
|
649
|
+
* @returns {Promise<object>}
|
|
650
|
+
*/
|
|
651
|
+
export async function getSdkDiagnostics() {
|
|
652
|
+
const capabilities = await getAllSdkCapabilities();
|
|
653
|
+
const openaiMod = await getOpenAIAgents().catch(() => null);
|
|
654
|
+
const genaiMod = await getGoogleGenAI().catch(() => null);
|
|
655
|
+
|
|
656
|
+
return {
|
|
657
|
+
providers: capabilities,
|
|
658
|
+
sdkVersions: {
|
|
659
|
+
openaiAgents: openaiMod?.VERSION || (openaiMod ? "loaded (version unknown)" : "not loaded"),
|
|
660
|
+
googleGenai: genaiMod?.VERSION || (genaiMod ? "loaded (version unknown)" : "not loaded"),
|
|
661
|
+
},
|
|
662
|
+
timestamp: new Date().toISOString(),
|
|
663
|
+
};
|
|
664
|
+
}
|