bosun 0.37.0 → 0.37.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -1
- package/agent-tool-config.mjs +338 -0
- package/bosun-skills.mjs +59 -4
- package/bosun.schema.json +1 -1
- package/desktop/launch.mjs +18 -0
- package/desktop/main.mjs +52 -13
- package/fleet-coordinator.mjs +34 -1
- package/kanban-adapter.mjs +30 -3
- package/library-manager.mjs +66 -0
- package/maintenance.mjs +30 -5
- package/monitor.mjs +56 -0
- package/package.json +4 -1
- package/setup-web-server.mjs +73 -12
- package/setup.mjs +3 -3
- package/ui/app.js +40 -3
- package/ui/components/session-list.js +25 -7
- package/ui/components/workspace-switcher.js +48 -1
- package/ui/demo.html +176 -0
- package/ui/modules/mic-track-registry.js +83 -0
- package/ui/modules/settings-schema.js +4 -1
- package/ui/modules/state.js +25 -0
- package/ui/modules/streaming.js +1 -1
- package/ui/modules/voice-barge-in.js +27 -0
- package/ui/modules/voice-client-sdk.js +268 -42
- package/ui/modules/voice-client.js +665 -61
- package/ui/modules/voice-overlay.js +829 -47
- package/ui/setup.html +151 -9
- package/ui/styles.css +258 -0
- package/ui/tabs/chat.js +11 -0
- package/ui/tabs/library.js +890 -15
- package/ui/tabs/settings.js +51 -11
- package/ui/tabs/telemetry.js +327 -105
- package/ui/tabs/workflows.js +86 -0
- package/ui-server.mjs +1201 -107
- package/voice-action-dispatcher.mjs +81 -0
- package/voice-agents-sdk.mjs +2 -2
- package/voice-relay.mjs +131 -14
- package/voice-tools.mjs +475 -9
- package/workflow-engine.mjs +54 -0
- package/workflow-nodes.mjs +177 -28
- package/workflow-templates/github.mjs +205 -94
- package/workflow-templates/task-batch.mjs +247 -0
- package/workflow-templates.mjs +15 -0
|
@@ -746,6 +746,83 @@ registerAction("workflow.run_get", async (params) => {
|
|
|
746
746
|
return run;
|
|
747
747
|
});
|
|
748
748
|
|
|
749
|
+
registerAction("workflow.save", async (params) => {
|
|
750
|
+
const wfEngineMod = await getWorkflowEngineModule();
|
|
751
|
+
const engine = typeof wfEngineMod.getWorkflowEngine === "function"
|
|
752
|
+
? wfEngineMod.getWorkflowEngine()
|
|
753
|
+
: null;
|
|
754
|
+
if (!engine?.save) throw new Error("Workflow engine is unavailable");
|
|
755
|
+
const def = params?.definition;
|
|
756
|
+
if (!def || typeof def !== "object" || Array.isArray(def)) {
|
|
757
|
+
throw new Error("definition object is required");
|
|
758
|
+
}
|
|
759
|
+
const workflowId = String(params.workflowId || def.id || "").trim();
|
|
760
|
+
const payload = { ...def };
|
|
761
|
+
if (workflowId) payload.id = workflowId;
|
|
762
|
+
if (!Array.isArray(payload.nodes)) payload.nodes = [];
|
|
763
|
+
if (!Array.isArray(payload.edges)) payload.edges = [];
|
|
764
|
+
if (!payload.name) payload.name = payload.id || "Voice Workflow";
|
|
765
|
+
const saved = engine.save(payload);
|
|
766
|
+
return {
|
|
767
|
+
id: saved.id,
|
|
768
|
+
name: saved.name || saved.id,
|
|
769
|
+
enabled: saved.enabled !== false,
|
|
770
|
+
nodeCount: Array.isArray(saved.nodes) ? saved.nodes.length : 0,
|
|
771
|
+
edgeCount: Array.isArray(saved.edges) ? saved.edges.length : 0,
|
|
772
|
+
};
|
|
773
|
+
});
|
|
774
|
+
|
|
775
|
+
registerAction("workflow.delete", async (params) => {
|
|
776
|
+
const wfEngineMod = await getWorkflowEngineModule();
|
|
777
|
+
const engine = typeof wfEngineMod.getWorkflowEngine === "function"
|
|
778
|
+
? wfEngineMod.getWorkflowEngine()
|
|
779
|
+
: null;
|
|
780
|
+
if (!engine?.delete) throw new Error("Workflow engine is unavailable");
|
|
781
|
+
const workflowId = String(params.workflowId || params.id || "").trim();
|
|
782
|
+
if (!workflowId) throw new Error("workflowId is required");
|
|
783
|
+
const deleted = await engine.delete(workflowId);
|
|
784
|
+
return { ok: Boolean(deleted), workflowId };
|
|
785
|
+
});
|
|
786
|
+
|
|
787
|
+
registerAction("workflow.execute", async (params) => {
|
|
788
|
+
const wfEngineMod = await getWorkflowEngineModule();
|
|
789
|
+
const engine = typeof wfEngineMod.getWorkflowEngine === "function"
|
|
790
|
+
? wfEngineMod.getWorkflowEngine()
|
|
791
|
+
: null;
|
|
792
|
+
if (!engine?.execute) throw new Error("Workflow engine is unavailable");
|
|
793
|
+
const workflowId = String(params.workflowId || params.id || "").trim();
|
|
794
|
+
if (!workflowId) throw new Error("workflowId is required");
|
|
795
|
+
const input = params?.input && typeof params.input === "object" ? params.input : {};
|
|
796
|
+
const force = params?.force === true;
|
|
797
|
+
const ctx = await engine.execute(workflowId, input, { force });
|
|
798
|
+
return {
|
|
799
|
+
runId: ctx?.id || null,
|
|
800
|
+
workflowId,
|
|
801
|
+
status: Array.isArray(ctx?.errors) && ctx.errors.length > 0 ? "failed" : "completed",
|
|
802
|
+
errorCount: Array.isArray(ctx?.errors) ? ctx.errors.length : 0,
|
|
803
|
+
};
|
|
804
|
+
});
|
|
805
|
+
|
|
806
|
+
registerAction("workflow.retry", async (params) => {
|
|
807
|
+
const wfEngineMod = await getWorkflowEngineModule();
|
|
808
|
+
const engine = typeof wfEngineMod.getWorkflowEngine === "function"
|
|
809
|
+
? wfEngineMod.getWorkflowEngine()
|
|
810
|
+
: null;
|
|
811
|
+
if (!engine?.retryRun) throw new Error("Workflow retry is unavailable");
|
|
812
|
+
const runId = String(params.runId || params.id || "").trim();
|
|
813
|
+
if (!runId) throw new Error("runId is required");
|
|
814
|
+
const mode = String(params.mode || "from_failed").trim().toLowerCase() === "from_scratch"
|
|
815
|
+
? "from_scratch"
|
|
816
|
+
: "from_failed";
|
|
817
|
+
const currentRun = engine?.getRunDetail ? engine.getRunDetail(runId) : null;
|
|
818
|
+
if (!currentRun) throw new Error(`Workflow run "${runId}" not found`);
|
|
819
|
+
const currentStatus = String(currentRun?.status || "").trim().toLowerCase();
|
|
820
|
+
if (mode === "from_failed" && currentStatus !== "failed") {
|
|
821
|
+
throw new Error(`retry mode "from_failed" requires a failed run (current=${currentRun?.status || "unknown"})`);
|
|
822
|
+
}
|
|
823
|
+
return engine.retryRun(runId, { mode });
|
|
824
|
+
});
|
|
825
|
+
|
|
749
826
|
// ── Skill/prompt actions ────────────────────────────────────────────────────
|
|
750
827
|
|
|
751
828
|
registerAction("skill.list", async () => {
|
|
@@ -860,6 +937,10 @@ export function getActionManifest() {
|
|
|
860
937
|
{ action: "workflow.saved_list", description: "List installed workflow definitions. params: {}" },
|
|
861
938
|
{ action: "workflow.runs", description: "List workflow run history. params: { workflowId?, status?, limit? }" },
|
|
862
939
|
{ action: "workflow.run_get", description: "Get a workflow run detail. params: { runId }" },
|
|
940
|
+
{ action: "workflow.save", description: "Create/update a workflow definition. params: { workflowId?, definition }" },
|
|
941
|
+
{ action: "workflow.delete", description: "Delete a workflow definition. params: { workflowId }" },
|
|
942
|
+
{ action: "workflow.execute", description: "Execute a workflow now. params: { workflowId, input?, force? }" },
|
|
943
|
+
{ action: "workflow.retry", description: "Retry a workflow run. params: { runId, mode?: from_failed|from_scratch }" },
|
|
863
944
|
{ action: "skill.list", description: "List available skills. params: {}" },
|
|
864
945
|
{ action: "prompt.list", description: "List agent prompt definitions. params: {}" },
|
|
865
946
|
{ action: "prompt.get", description: "Get a prompt template. params: { key }" },
|
package/voice-agents-sdk.mjs
CHANGED
|
@@ -275,7 +275,7 @@ export async function createRealtimeSession(agent, provider, config = {}, option
|
|
|
275
275
|
).trim() || "alloy";
|
|
276
276
|
|
|
277
277
|
const turnDetection = String(
|
|
278
|
-
options.turnDetection || config.turnDetection || "
|
|
278
|
+
options.turnDetection || config.turnDetection || "semantic_vad",
|
|
279
279
|
).trim();
|
|
280
280
|
|
|
281
281
|
const sessionConfig = {
|
|
@@ -668,7 +668,7 @@ export async function getClientSdkConfig(voiceConfig = {}) {
|
|
|
668
668
|
tier: availability.info?.tier || 2,
|
|
669
669
|
model: configuredModel,
|
|
670
670
|
voiceId: voiceConfig.voiceId || "alloy",
|
|
671
|
-
turnDetection: voiceConfig.turnDetection || "
|
|
671
|
+
turnDetection: voiceConfig.turnDetection || "semantic_vad",
|
|
672
672
|
fallbackReason,
|
|
673
673
|
};
|
|
674
674
|
}
|
package/voice-relay.mjs
CHANGED
|
@@ -26,7 +26,7 @@ const OPENAI_REALTIME_MODEL = "gpt-realtime-1.5";
|
|
|
26
26
|
const OPENAI_AUDIO_RESPONSES_MODEL = "gpt-audio-1.5";
|
|
27
27
|
const OPENAI_RESPONSES_URL = "https://api.openai.com/v1/responses";
|
|
28
28
|
const OPENAI_DEFAULT_VISION_MODEL = "gpt-4.1-nano";
|
|
29
|
-
const
|
|
29
|
+
const DEFAULT_TRANSCRIBE_MODEL = "gpt-4o-transcribe";
|
|
30
30
|
|
|
31
31
|
const AZURE_API_VERSION = "2025-04-01-preview";
|
|
32
32
|
|
|
@@ -73,6 +73,8 @@ function buildOpenAIRealtimeWebRtcUrl(model, overrideBase = "") {
|
|
|
73
73
|
|
|
74
74
|
// GA models (gpt-realtime, gpt-realtime-1.5, gpt-realtime-mini, etc.) use /openai/v1/ paths.
|
|
75
75
|
// Preview models (for example gpt-4o-realtime-preview-*) use legacy /openai/realtimeapi/ paths.
|
|
76
|
+
// NOTE: Azure AI Foundry "Global Standard" deployments may only support preview paths
|
|
77
|
+
// even for GA model names. We try GA first. If it 404s the caller falls back to preview.
|
|
76
78
|
function isAzureGaProtocol(deployment) {
|
|
77
79
|
const d = String(deployment || "").toLowerCase().trim();
|
|
78
80
|
return d.startsWith("gpt-realtime") && !d.startsWith("gpt-4o-realtime");
|
|
@@ -93,6 +95,13 @@ function normalizeAzureRealtimeDeployment(rawDeployment) {
|
|
|
93
95
|
return deployment;
|
|
94
96
|
}
|
|
95
97
|
|
|
98
|
+
function parseOptionalBoolean(rawValue) {
|
|
99
|
+
if (rawValue == null) return null;
|
|
100
|
+
const normalized = String(rawValue).trim().toLowerCase();
|
|
101
|
+
if (!normalized) return null;
|
|
102
|
+
return !["0", "false", "no", "off"].includes(normalized);
|
|
103
|
+
}
|
|
104
|
+
|
|
96
105
|
function isOpenAIAudioResponsesModel(rawModel) {
|
|
97
106
|
const model = String(rawModel || "").trim().toLowerCase();
|
|
98
107
|
return /^gpt-audio/.test(model);
|
|
@@ -305,18 +314,42 @@ function sanitizeVoiceCallContext(context = {}) {
|
|
|
305
314
|
const rawExecutor = String(context?.executor || "").trim().toLowerCase();
|
|
306
315
|
const rawMode = String(context?.mode || "").trim().toLowerCase();
|
|
307
316
|
const rawModel = String(context?.model || "").trim();
|
|
317
|
+
const rawVoiceAgentId = String(context?.voiceAgentId || "").trim();
|
|
318
|
+
const rawVoiceAgentName = String(context?.voiceAgentName || "").trim();
|
|
319
|
+
const rawVoiceAgentInstructions = String(context?.voiceAgentInstructions || "").trim();
|
|
320
|
+
const rawVoiceToolCapabilityPrompt = String(context?.voiceToolCapabilityPrompt || "").trim();
|
|
321
|
+
const rawVoiceAgentSkills = Array.isArray(context?.voiceAgentSkills)
|
|
322
|
+
? context.voiceAgentSkills.map((s) => String(s || "").trim()).filter(Boolean)
|
|
323
|
+
: [];
|
|
324
|
+
const rawEnabledMcpServers = Array.isArray(context?.enabledMcpServers)
|
|
325
|
+
? context.enabledMcpServers.map((s) => String(s || "").trim()).filter(Boolean)
|
|
326
|
+
: [];
|
|
308
327
|
|
|
309
328
|
return {
|
|
310
329
|
sessionId: rawSessionId || null,
|
|
311
330
|
executor: VALID_EXECUTORS.has(rawExecutor) ? rawExecutor : null,
|
|
312
331
|
mode: VALID_AGENT_MODES.has(rawMode) ? rawMode : null,
|
|
313
332
|
model: rawModel || null,
|
|
333
|
+
voiceAgentId: rawVoiceAgentId || null,
|
|
334
|
+
voiceAgentName: rawVoiceAgentName || null,
|
|
335
|
+
voiceAgentInstructions: rawVoiceAgentInstructions || null,
|
|
336
|
+
voiceToolCapabilityPrompt: rawVoiceToolCapabilityPrompt || null,
|
|
337
|
+
voiceAgentSkills: rawVoiceAgentSkills,
|
|
338
|
+
enabledMcpServers: rawEnabledMcpServers,
|
|
314
339
|
};
|
|
315
340
|
}
|
|
316
341
|
|
|
317
342
|
async function buildSessionScopedInstructions(baseInstructions, callContext = {}) {
|
|
318
343
|
const context = sanitizeVoiceCallContext(callContext);
|
|
319
|
-
if (
|
|
344
|
+
if (
|
|
345
|
+
!context.sessionId
|
|
346
|
+
&& !context.executor
|
|
347
|
+
&& !context.mode
|
|
348
|
+
&& !context.model
|
|
349
|
+
&& !context.voiceAgentId
|
|
350
|
+
&& !context.voiceAgentInstructions
|
|
351
|
+
&& !context.voiceToolCapabilityPrompt
|
|
352
|
+
) {
|
|
320
353
|
return baseInstructions;
|
|
321
354
|
}
|
|
322
355
|
|
|
@@ -381,6 +414,22 @@ async function buildSessionScopedInstructions(baseInstructions, callContext = {}
|
|
|
381
414
|
"",
|
|
382
415
|
"## Bosun Voice Call Context",
|
|
383
416
|
`Active chat session id: ${context.sessionId || "none"}.`,
|
|
417
|
+
context.voiceAgentId
|
|
418
|
+
? `Active voice agent id: ${context.voiceAgentId}.`
|
|
419
|
+
: "Active voice agent id: default.",
|
|
420
|
+
context.voiceAgentName
|
|
421
|
+
? `Active voice agent name: ${context.voiceAgentName}.`
|
|
422
|
+
: "",
|
|
423
|
+
context.voiceAgentInstructions
|
|
424
|
+
? `Voice agent instruction emphasis: ${context.voiceAgentInstructions}`
|
|
425
|
+
: "",
|
|
426
|
+
context.voiceToolCapabilityPrompt || "",
|
|
427
|
+
context.enabledMcpServers?.length
|
|
428
|
+
? `Enabled MCP servers for this session: ${context.enabledMcpServers.join(", ")}.`
|
|
429
|
+
: "",
|
|
430
|
+
context.voiceAgentSkills?.length
|
|
431
|
+
? `Voice agent skills: ${context.voiceAgentSkills.join(", ")}.`
|
|
432
|
+
: "",
|
|
384
433
|
context.executor
|
|
385
434
|
? `Preferred executor for delegated work: ${context.executor}.`
|
|
386
435
|
: "Preferred executor for delegated work: use configured default.",
|
|
@@ -783,6 +832,12 @@ export function getVoiceConfig(forceReload = false) {
|
|
|
783
832
|
azureDeployment: String(ep.deployment || ep.azureDeployment || "").trim() || null,
|
|
784
833
|
voiceId: String(ep.voiceId || "").trim() || null,
|
|
785
834
|
visionModel: String(ep.visionModel || "").trim() || null,
|
|
835
|
+
transcriptionModel: String(ep.transcriptionModel || "").trim() || null,
|
|
836
|
+
// Azure defaults to transcription OFF unless explicitly enabled because
|
|
837
|
+
// item-level ASR failures can spam and destabilize long-running calls.
|
|
838
|
+
transcriptionEnabled: String(ep.provider || "").toLowerCase() === "azure"
|
|
839
|
+
? (ep.transcriptionEnabled === true)
|
|
840
|
+
: (ep.transcriptionEnabled !== false),
|
|
786
841
|
role: String(ep.role || "primary").trim() || "primary",
|
|
787
842
|
weight: typeof ep.weight === "number" ? ep.weight : 100,
|
|
788
843
|
name: String(ep.name || "").trim() || null,
|
|
@@ -852,7 +907,7 @@ export function getVoiceConfig(forceReload = false) {
|
|
|
852
907
|
const model = String(voice.model || process.env.VOICE_MODEL || defaultModel).trim() || defaultModel;
|
|
853
908
|
const voiceId = voice.voiceId || process.env.VOICE_ID || "alloy";
|
|
854
909
|
const turnDetection =
|
|
855
|
-
voice.turnDetection || process.env.VOICE_TURN_DETECTION || "
|
|
910
|
+
voice.turnDetection || process.env.VOICE_TURN_DETECTION || "semantic_vad";
|
|
856
911
|
const defaultVisionModel =
|
|
857
912
|
provider === "claude"
|
|
858
913
|
? CLAUDE_DEFAULT_VISION_MODEL
|
|
@@ -861,6 +916,19 @@ export function getVoiceConfig(forceReload = false) {
|
|
|
861
916
|
: OPENAI_DEFAULT_VISION_MODEL;
|
|
862
917
|
const visionModel =
|
|
863
918
|
voice.visionModel || process.env.VOICE_VISION_MODEL || defaultVisionModel;
|
|
919
|
+
const transcriptionModel =
|
|
920
|
+
voice.transcriptionModel || process.env.VOICE_TRANSCRIPTION_MODEL || DEFAULT_TRANSCRIBE_MODEL;
|
|
921
|
+
const transcriptionEnabledRaw =
|
|
922
|
+
voice.transcriptionEnabled ?? process.env.VOICE_TRANSCRIPTION_ENABLED;
|
|
923
|
+
const transcriptionEnabled =
|
|
924
|
+
transcriptionEnabledRaw == null
|
|
925
|
+
? true
|
|
926
|
+
: !["0", "false", "no", "off"].includes(
|
|
927
|
+
String(transcriptionEnabledRaw).trim().toLowerCase(),
|
|
928
|
+
);
|
|
929
|
+
const azureTranscriptionEnabled = parseOptionalBoolean(
|
|
930
|
+
voice.azureTranscriptionEnabled ?? process.env.VOICE_AZURE_TRANSCRIPTION_ENABLED,
|
|
931
|
+
);
|
|
864
932
|
const fallbackMode =
|
|
865
933
|
voice.fallbackMode || process.env.VOICE_FALLBACK_MODE || "browser";
|
|
866
934
|
const delegateExecutor =
|
|
@@ -906,6 +974,9 @@ For complex operations like writing code or creating PRs, delegate to the approp
|
|
|
906
974
|
turnDetection,
|
|
907
975
|
visionModel,
|
|
908
976
|
instructions,
|
|
977
|
+
transcriptionModel,
|
|
978
|
+
transcriptionEnabled,
|
|
979
|
+
azureTranscriptionEnabled,
|
|
909
980
|
fallbackMode,
|
|
910
981
|
delegateExecutor,
|
|
911
982
|
enabled,
|
|
@@ -1120,6 +1191,13 @@ async function createOpenAIEphemeralToken(cfg, toolDefinitions = [], callContext
|
|
|
1120
1191
|
const instructions = await buildSessionScopedInstructions(cfg.instructions, context);
|
|
1121
1192
|
const model = normalizeOpenAIRealtimeModel(candidate?.model || cfg.model || OPENAI_REALTIME_MODEL);
|
|
1122
1193
|
const voiceId = String(candidate?.voiceId || cfg.voiceId || "alloy").trim() || "alloy";
|
|
1194
|
+
// Per-endpoint transcription overrides
|
|
1195
|
+
const transcriptionModel = String(candidate?.transcriptionModel || "").trim() || cfg.transcriptionModel;
|
|
1196
|
+
const transcriptionEnabled = candidate?.transcriptionEnabled !== undefined
|
|
1197
|
+
? candidate.transcriptionEnabled !== false
|
|
1198
|
+
: cfg.azureTranscriptionEnabled != null
|
|
1199
|
+
? cfg.azureTranscriptionEnabled !== false
|
|
1200
|
+
: false;
|
|
1123
1201
|
|
|
1124
1202
|
const sessionConfig = {
|
|
1125
1203
|
model,
|
|
@@ -1132,9 +1210,9 @@ async function createOpenAIEphemeralToken(cfg, toolDefinitions = [], callContext
|
|
|
1132
1210
|
turn_detection: {
|
|
1133
1211
|
type: cfg.turnDetection,
|
|
1134
1212
|
...(cfg.turnDetection === "server_vad" ? {
|
|
1135
|
-
threshold: 0.
|
|
1213
|
+
threshold: 0.7,
|
|
1136
1214
|
prefix_padding_ms: 400,
|
|
1137
|
-
silence_duration_ms:
|
|
1215
|
+
silence_duration_ms: 1200,
|
|
1138
1216
|
create_response: true,
|
|
1139
1217
|
interrupt_response: true,
|
|
1140
1218
|
} : {}),
|
|
@@ -1144,7 +1222,7 @@ async function createOpenAIEphemeralToken(cfg, toolDefinitions = [], callContext
|
|
|
1144
1222
|
interrupt_response: true,
|
|
1145
1223
|
} : {}),
|
|
1146
1224
|
},
|
|
1147
|
-
input_audio_transcription: { model:
|
|
1225
|
+
...(transcriptionEnabled ? { input_audio_transcription: { model: transcriptionModel } } : {}),
|
|
1148
1226
|
tools: toolDefinitions,
|
|
1149
1227
|
};
|
|
1150
1228
|
|
|
@@ -1197,11 +1275,17 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
|
|
|
1197
1275
|
candidate?.azureDeployment || candidate?.model || cfg.azureDeployment || OPENAI_REALTIME_MODEL,
|
|
1198
1276
|
);
|
|
1199
1277
|
const voiceId = String(candidate?.voiceId || cfg.voiceId || "alloy").trim() || "alloy";
|
|
1278
|
+
// Per-endpoint transcription overrides
|
|
1279
|
+
const transcriptionModel = String(candidate?.transcriptionModel || "").trim() || cfg.transcriptionModel;
|
|
1280
|
+
const transcriptionEnabled = candidate?.transcriptionEnabled !== undefined ? candidate.transcriptionEnabled !== false : cfg.transcriptionEnabled;
|
|
1200
1281
|
// GA protocol (gpt-realtime-1.5, gpt-realtime, etc.) uses /openai/v1/realtime/sessions?api-version=...
|
|
1201
1282
|
// Preview protocol uses /openai/realtimeapi/sessions?api-version=...
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1283
|
+
// Azure AI Foundry "Global Standard" resources may not support GA paths even for GA model names,
|
|
1284
|
+
// so we build both and try GA first with automatic fallback to preview.
|
|
1285
|
+
const gaUrl = `${resolvedEndpoint}/openai/v1/realtime/sessions?api-version=${AZURE_API_VERSION}`;
|
|
1286
|
+
const previewUrl = `${resolvedEndpoint}/openai/realtimeapi/sessions?api-version=${AZURE_API_VERSION}&deployment=${encodeURIComponent(deployment)}`;
|
|
1287
|
+
const useGa = isAzureGaProtocol(deployment);
|
|
1288
|
+
const url = useGa ? gaUrl : previewUrl;
|
|
1205
1289
|
|
|
1206
1290
|
const headers = {
|
|
1207
1291
|
"Content-Type": "application/json",
|
|
@@ -1226,9 +1310,9 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
|
|
|
1226
1310
|
turn_detection: {
|
|
1227
1311
|
type: cfg.turnDetection,
|
|
1228
1312
|
...(cfg.turnDetection === "server_vad" ? {
|
|
1229
|
-
threshold: 0.
|
|
1313
|
+
threshold: 0.7,
|
|
1230
1314
|
prefix_padding_ms: 400,
|
|
1231
|
-
silence_duration_ms:
|
|
1315
|
+
silence_duration_ms: 1200,
|
|
1232
1316
|
create_response: true,
|
|
1233
1317
|
interrupt_response: true,
|
|
1234
1318
|
} : {}),
|
|
@@ -1238,16 +1322,28 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
|
|
|
1238
1322
|
interrupt_response: true,
|
|
1239
1323
|
} : {}),
|
|
1240
1324
|
},
|
|
1241
|
-
input_audio_transcription: { model:
|
|
1325
|
+
...(transcriptionEnabled ? { input_audio_transcription: { model: transcriptionModel } } : {}),
|
|
1242
1326
|
tools: toolDefinitions,
|
|
1243
1327
|
};
|
|
1244
1328
|
|
|
1245
|
-
|
|
1329
|
+
let response = await fetch(url, {
|
|
1246
1330
|
method: "POST",
|
|
1247
1331
|
headers,
|
|
1248
1332
|
body: JSON.stringify(sessionConfig),
|
|
1249
1333
|
});
|
|
1250
1334
|
|
|
1335
|
+
// Azure AI Foundry "Global Standard" deployments may 404 on the GA path.
|
|
1336
|
+
// Automatically fall back to the preview path before giving up.
|
|
1337
|
+
if (!response.ok && response.status === 404 && useGa) {
|
|
1338
|
+
const previewConfig = { ...sessionConfig };
|
|
1339
|
+
delete previewConfig.type; // preview path does not accept type: "realtime"
|
|
1340
|
+
response = await fetch(previewUrl, {
|
|
1341
|
+
method: "POST",
|
|
1342
|
+
headers,
|
|
1343
|
+
body: JSON.stringify(previewConfig),
|
|
1344
|
+
});
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1251
1347
|
if (!response.ok) {
|
|
1252
1348
|
const errorText = await buildProviderErrorDetails(response, "unknown");
|
|
1253
1349
|
throw new Error(`Azure Realtime session failed (${response.status}): ${errorText}`);
|
|
@@ -1255,9 +1351,22 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
|
|
|
1255
1351
|
|
|
1256
1352
|
const data = await response.json();
|
|
1257
1353
|
// WebRTC URL diverges from /sessions URL: GA uses /openai/v1/realtime, preview uses /openai/realtime.
|
|
1258
|
-
|
|
1354
|
+
// If the GA session was created via fallback to preview, use preview WebRTC URL too.
|
|
1355
|
+
const gaSessionSucceeded = useGa && response.url?.includes("/v1/realtime");
|
|
1356
|
+
const webrtcUrl = (useGa && gaSessionSucceeded)
|
|
1259
1357
|
? `${resolvedEndpoint}/openai/v1/realtime?api-version=${AZURE_API_VERSION}`
|
|
1260
1358
|
: `${resolvedEndpoint}/openai/realtime?api-version=${AZURE_API_VERSION}&deployment=${encodeURIComponent(deployment)}`;
|
|
1359
|
+
|
|
1360
|
+
// WebSocket fallback URL — Azure Realtime API always supports WebSocket even
|
|
1361
|
+
// when WebRTC SDP is unavailable (404). The api-key query parameter provides
|
|
1362
|
+
// authentication since browsers cannot set custom headers on WebSocket.
|
|
1363
|
+
const wsAuthParam = resolvedOAuthToken
|
|
1364
|
+
? `access_token=${encodeURIComponent(resolvedOAuthToken)}`
|
|
1365
|
+
: `api-key=${encodeURIComponent(resolvedApiKey)}`;
|
|
1366
|
+
const wsUrl = (useGa && gaSessionSucceeded)
|
|
1367
|
+
? `wss://${new URL(resolvedEndpoint).host}/openai/v1/realtime?api-version=${AZURE_API_VERSION}&${wsAuthParam}`
|
|
1368
|
+
: `wss://${new URL(resolvedEndpoint).host}/openai/realtime?api-version=${AZURE_API_VERSION}&deployment=${encodeURIComponent(deployment)}&${wsAuthParam}`;
|
|
1369
|
+
|
|
1261
1370
|
return {
|
|
1262
1371
|
token: data.client_secret?.value || data.token,
|
|
1263
1372
|
expiresAt: data.client_secret?.expires_at || (Date.now() / 1000 + 60),
|
|
@@ -1265,6 +1374,7 @@ async function createAzureEphemeralToken(cfg, toolDefinitions = [], callContext
|
|
|
1265
1374
|
voiceId,
|
|
1266
1375
|
provider: "azure",
|
|
1267
1376
|
url: webrtcUrl,
|
|
1377
|
+
wsUrl,
|
|
1268
1378
|
sessionConfig,
|
|
1269
1379
|
azureEndpoint: resolvedEndpoint,
|
|
1270
1380
|
azureDeployment: deployment,
|
|
@@ -1420,9 +1530,16 @@ const VOICE_SESSION_ALLOWED_TOOLS = new Set([
|
|
|
1420
1530
|
"list_directory",
|
|
1421
1531
|
"get_recent_logs",
|
|
1422
1532
|
"list_workflows",
|
|
1533
|
+
"create_workflow",
|
|
1534
|
+
"update_workflow_definition",
|
|
1535
|
+
"delete_workflow",
|
|
1536
|
+
"create_workflow_from_template",
|
|
1537
|
+
"generate_workflow_with_agent",
|
|
1423
1538
|
"get_workflow_definition",
|
|
1539
|
+
"execute_workflow",
|
|
1424
1540
|
"list_workflow_runs",
|
|
1425
1541
|
"get_workflow_run",
|
|
1542
|
+
"analyze_workflow",
|
|
1426
1543
|
"retry_workflow_run",
|
|
1427
1544
|
"list_skills",
|
|
1428
1545
|
"list_prompts",
|