bosun 0.37.1 → 0.37.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -1
- package/agent-tool-config.mjs +14 -3
- package/bosun-skills.mjs +59 -4
- package/desktop/launch.mjs +18 -0
- package/desktop/main.mjs +52 -13
- package/fleet-coordinator.mjs +34 -1
- package/kanban-adapter.mjs +30 -3
- package/library-manager.mjs +48 -0
- package/maintenance.mjs +30 -5
- package/monitor.mjs +56 -0
- package/package.json +2 -1
- package/setup-web-server.mjs +71 -10
- package/ui/app.js +40 -3
- package/ui/components/session-list.js +25 -7
- package/ui/components/workspace-switcher.js +48 -1
- package/ui/demo.html +110 -0
- package/ui/modules/mic-track-registry.js +83 -0
- package/ui/modules/settings-schema.js +3 -0
- package/ui/modules/state.js +25 -0
- package/ui/modules/streaming.js +1 -1
- package/ui/modules/voice-barge-in.js +27 -0
- package/ui/modules/voice-client-sdk.js +260 -38
- package/ui/modules/voice-client.js +662 -58
- package/ui/modules/voice-overlay.js +829 -47
- package/ui/setup.html +151 -9
- package/ui/styles.css +258 -0
- package/ui/tabs/chat.js +11 -0
- package/ui/tabs/library.js +219 -9
- package/ui/tabs/settings.js +51 -11
- package/ui/tabs/telemetry.js +327 -105
- package/ui/tabs/workflows.js +22 -5
- package/ui-server.mjs +961 -103
- package/voice-relay.mjs +119 -11
- package/workflow-engine.mjs +54 -0
- package/workflow-nodes.mjs +177 -28
- package/workflow-templates/github.mjs +205 -94
- package/workflow-templates/task-batch.mjs +247 -0
- package/workflow-templates.mjs +15 -0
package/ui/modules/state.js
CHANGED
|
@@ -52,6 +52,7 @@ const CACHE_TTL = {
|
|
|
52
52
|
presence: 30000, config: 60000, projects: 60000, git: 20000,
|
|
53
53
|
infra: 30000,
|
|
54
54
|
telemetry: 15000,
|
|
55
|
+
analytics: 30000,
|
|
55
56
|
};
|
|
56
57
|
|
|
57
58
|
function _cacheKey(url) { return url; }
|
|
@@ -126,6 +127,9 @@ export const telemetryErrors = signal([]);
|
|
|
126
127
|
export const telemetryExecutors = signal({});
|
|
127
128
|
export const telemetryAlerts = signal([]);
|
|
128
129
|
|
|
130
|
+
// ── Usage Analytics
|
|
131
|
+
export const usageAnalytics = signal(null);
|
|
132
|
+
|
|
129
133
|
// ── Config (routing, regions, etc.)
|
|
130
134
|
export const configData = signal(null);
|
|
131
135
|
|
|
@@ -687,6 +691,26 @@ export async function loadTelemetryAlerts() {
|
|
|
687
691
|
_markFresh("telemetry");
|
|
688
692
|
}
|
|
689
693
|
|
|
694
|
+
/**
|
|
695
|
+
* Load usage analytics. Pass `days=0` for all-time data.
|
|
696
|
+
* The result is stored in the `usageAnalytics` signal.
|
|
697
|
+
*
|
|
698
|
+
* @param {number} [days=30]
|
|
699
|
+
*/
|
|
700
|
+
export async function loadUsageAnalytics(days = 30) {
|
|
701
|
+
const url = `/api/analytics/usage?days=${days}`;
|
|
702
|
+
// Don't use _cacheFresh here — callers pass explicit day window
|
|
703
|
+
// and the period toggle must always trigger a fresh load.
|
|
704
|
+
try {
|
|
705
|
+
const res = await apiFetch(url, { _silent: true }).catch(() => ({ ok: false }));
|
|
706
|
+
usageAnalytics.value = res?.data ?? null;
|
|
707
|
+
_cacheSet(url, usageAnalytics.value);
|
|
708
|
+
_markFresh("analytics");
|
|
709
|
+
} catch {
|
|
710
|
+
/* best effort */
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
690
714
|
/* ═══════════════════════════════════════════════════════════════
|
|
691
715
|
* TAB REFRESH — map tab names to their required loaders
|
|
692
716
|
* ═══════════════════════════════════════════════════════════════ */
|
|
@@ -712,6 +736,7 @@ const TAB_LOADERS = {
|
|
|
712
736
|
loadTelemetryErrors(),
|
|
713
737
|
loadTelemetryExecutors(),
|
|
714
738
|
loadTelemetryAlerts(),
|
|
739
|
+
loadUsageAnalytics(30),
|
|
715
740
|
]),
|
|
716
741
|
settings: () => Promise.all([loadStatus(), loadConfig()]),
|
|
717
742
|
};
|
package/ui/modules/streaming.js
CHANGED
|
@@ -629,7 +629,7 @@ export function startAgentStatusTracking() {
|
|
|
629
629
|
const content = String(message.content || "").toLowerCase();
|
|
630
630
|
const lifecycle = String(message?.meta?.lifecycle || "").toLowerCase();
|
|
631
631
|
const adapter = payload.session?.type || "";
|
|
632
|
-
const sessionId = payload.
|
|
632
|
+
const sessionId = payload.session?.id || payload.sessionId || payload.taskId || "";
|
|
633
633
|
const sessionStatus = payload.session?.status || "active";
|
|
634
634
|
|
|
635
635
|
if (sessionStatus !== "active") {
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* voice-barge-in.js
|
|
3
|
+
*
|
|
4
|
+
* Shared policy helpers for automatic barge-in (interrupt assistant playback
|
|
5
|
+
* when the user starts speaking).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export function shouldAutoBargeIn({
|
|
9
|
+
muted = false,
|
|
10
|
+
audioActive = false,
|
|
11
|
+
now = Date.now(),
|
|
12
|
+
lastTriggeredAt = 0,
|
|
13
|
+
minIntervalMs = 700,
|
|
14
|
+
} = {}) {
|
|
15
|
+
if (muted) return false;
|
|
16
|
+
if (!audioActive) return false;
|
|
17
|
+
const elapsed = Number(now) - Number(lastTriggeredAt || 0);
|
|
18
|
+
return elapsed >= Number(minIntervalMs || 0);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function shouldAutoBargeInFromMicLevel({
|
|
22
|
+
speaking = false,
|
|
23
|
+
level = 0,
|
|
24
|
+
threshold = 0.08,
|
|
25
|
+
} = {}) {
|
|
26
|
+
return Boolean(speaking) && Number(level) >= Number(threshold);
|
|
27
|
+
}
|
|
@@ -14,6 +14,13 @@
|
|
|
14
14
|
*/
|
|
15
15
|
|
|
16
16
|
import { signal, computed } from "@preact/signals";
|
|
17
|
+
import {
|
|
18
|
+
ensureMicTrackingPatched,
|
|
19
|
+
registerMicStream,
|
|
20
|
+
stopTrackedMicStreams,
|
|
21
|
+
} from "./mic-track-registry.js";
|
|
22
|
+
import { shouldAutoBargeIn } from "./voice-barge-in.js";
|
|
23
|
+
import { isVoiceMicMuted } from "./voice-client.js";
|
|
17
24
|
|
|
18
25
|
// ── State Signals (same shape as voice-client.js) ───────────────────────────
|
|
19
26
|
|
|
@@ -49,6 +56,7 @@ let _callContext = {
|
|
|
49
56
|
executor: null,
|
|
50
57
|
mode: null,
|
|
51
58
|
model: null,
|
|
59
|
+
voiceAgentId: null,
|
|
52
60
|
};
|
|
53
61
|
let _sdkConfig = null;
|
|
54
62
|
let _usingLegacyFallback = false;
|
|
@@ -64,6 +72,13 @@ let _pendingAssistantTranscriptText = "";
|
|
|
64
72
|
let _awaitingToolCompletionAck = false;
|
|
65
73
|
let _toolCompletionAckTimer = null;
|
|
66
74
|
let _assistantBaselineBeforeToolAck = "";
|
|
75
|
+
const _sdkCapturedMicStreams = new Set();
|
|
76
|
+
let _lastAutoBargeInAt = 0;
|
|
77
|
+
const AUTO_BARGE_IN_COOLDOWN_MS = 700;
|
|
78
|
+
// Set to true by stopSdkVoiceSession() so that any in-flight getUserMedia
|
|
79
|
+
// call in startAgentsSdkSession / startGeminiMicCapture releases the track
|
|
80
|
+
// immediately instead of leaving the browser mic indicator active.
|
|
81
|
+
let _sdkExplicitStop = false;
|
|
67
82
|
|
|
68
83
|
// ── Event System ────────────────────────────────────────────────────────────
|
|
69
84
|
|
|
@@ -86,12 +101,31 @@ function emit(event, data) {
|
|
|
86
101
|
}
|
|
87
102
|
}
|
|
88
103
|
|
|
104
|
+
function maybeAutoInterruptSdkResponse(reason = "speech-started") {
|
|
105
|
+
const now = Date.now();
|
|
106
|
+
if (!shouldAutoBargeIn({
|
|
107
|
+
muted: isVoiceMicMuted.value,
|
|
108
|
+
audioActive: Boolean(_session),
|
|
109
|
+
now,
|
|
110
|
+
lastTriggeredAt: _lastAutoBargeInAt,
|
|
111
|
+
minIntervalMs: AUTO_BARGE_IN_COOLDOWN_MS,
|
|
112
|
+
})) {
|
|
113
|
+
return false;
|
|
114
|
+
}
|
|
115
|
+
_lastAutoBargeInAt = now;
|
|
116
|
+
interruptSdkResponse();
|
|
117
|
+
sdkVoiceState.value = "listening";
|
|
118
|
+
emit("auto-barge-in", { reason });
|
|
119
|
+
return true;
|
|
120
|
+
}
|
|
121
|
+
|
|
89
122
|
function _normalizeCallContext(options = {}) {
|
|
90
123
|
return {
|
|
91
124
|
sessionId: String(options?.sessionId || "").trim() || null,
|
|
92
125
|
executor: String(options?.executor || "").trim() || null,
|
|
93
126
|
mode: String(options?.mode || "").trim() || null,
|
|
94
127
|
model: String(options?.model || "").trim() || null,
|
|
128
|
+
voiceAgentId: String(options?.voiceAgentId || "").trim() || null,
|
|
95
129
|
};
|
|
96
130
|
}
|
|
97
131
|
|
|
@@ -122,6 +156,14 @@ function isNonFatalSdkSessionError(err) {
|
|
|
122
156
|
if (/setRemoteDescription/i.test(message) && /SessionDescription/i.test(message)) {
|
|
123
157
|
return true;
|
|
124
158
|
}
|
|
159
|
+
// Runtime item-level transcription failures should not hard-fail the live call.
|
|
160
|
+
if (
|
|
161
|
+
lower.includes("input transcription failed")
|
|
162
|
+
|| lower.includes("transcription failed for item")
|
|
163
|
+
|| lower.includes("input_audio_transcription")
|
|
164
|
+
) {
|
|
165
|
+
return true;
|
|
166
|
+
}
|
|
125
167
|
return false;
|
|
126
168
|
}
|
|
127
169
|
|
|
@@ -367,6 +409,7 @@ async function startAgentsSdkSession(config, options = {}) {
|
|
|
367
409
|
executor: _callContext.executor || undefined,
|
|
368
410
|
mode: _callContext.mode || undefined,
|
|
369
411
|
model: _callContext.model || undefined,
|
|
412
|
+
voiceAgentId: _callContext.voiceAgentId || undefined,
|
|
370
413
|
delegateOnly: false,
|
|
371
414
|
sdkMode: true,
|
|
372
415
|
}),
|
|
@@ -399,6 +442,7 @@ async function startAgentsSdkSession(config, options = {}) {
|
|
|
399
442
|
executor: _callContext.executor || undefined,
|
|
400
443
|
mode: _callContext.mode || undefined,
|
|
401
444
|
model: _callContext.model || undefined,
|
|
445
|
+
voiceAgentId: _callContext.voiceAgentId || undefined,
|
|
402
446
|
}),
|
|
403
447
|
});
|
|
404
448
|
} catch (fetchErr) {
|
|
@@ -459,6 +503,12 @@ async function startAgentsSdkSession(config, options = {}) {
|
|
|
459
503
|
const model = String(tokenData.model || resolvedConfig.model || "gpt-realtime-1.5").trim();
|
|
460
504
|
const voiceId = String(tokenData.voiceId || resolvedConfig.voiceId || "alloy").trim();
|
|
461
505
|
const turnDetection = String(resolvedConfig.turnDetection || "semantic_vad").trim();
|
|
506
|
+
// Use server-provided transcription model from sessionConfig, fall back to default
|
|
507
|
+
const serverSessionConfig = tokenData?.sessionConfig || {};
|
|
508
|
+
const transcriptionModel =
|
|
509
|
+
serverSessionConfig?.input_audio_transcription?.model || "gpt-4o-transcribe";
|
|
510
|
+
const transcriptionEnabled =
|
|
511
|
+
serverSessionConfig?.input_audio_transcription !== undefined;
|
|
462
512
|
const turnDetectionConfig = {
|
|
463
513
|
type: turnDetection,
|
|
464
514
|
...(turnDetection === "server_vad"
|
|
@@ -491,13 +541,13 @@ async function startAgentsSdkSession(config, options = {}) {
|
|
|
491
541
|
audio: {
|
|
492
542
|
input: {
|
|
493
543
|
format: "pcm16",
|
|
494
|
-
transcription: { model:
|
|
544
|
+
...(transcriptionEnabled ? { transcription: { model: transcriptionModel } } : {}),
|
|
495
545
|
turnDetection: turnDetectionConfig,
|
|
496
546
|
},
|
|
497
547
|
output: {
|
|
498
548
|
format: "pcm16",
|
|
499
549
|
voice: voiceId,
|
|
500
|
-
transcription: { model:
|
|
550
|
+
...(transcriptionEnabled ? { transcription: { model: transcriptionModel } } : {}),
|
|
501
551
|
},
|
|
502
552
|
},
|
|
503
553
|
},
|
|
@@ -540,6 +590,11 @@ async function startAgentsSdkSession(config, options = {}) {
|
|
|
540
590
|
emit("interrupt", {});
|
|
541
591
|
});
|
|
542
592
|
|
|
593
|
+
session.on("speech_started", () => {
|
|
594
|
+
maybeAutoInterruptSdkResponse("speech-started");
|
|
595
|
+
emit("speech-started", {});
|
|
596
|
+
});
|
|
597
|
+
|
|
543
598
|
session.on("tool_call_start", (event) => {
|
|
544
599
|
const callId = event?.callId || event?.call_id || `tc-${Date.now()}`;
|
|
545
600
|
const name = event?.name || event?.toolName || "unknown";
|
|
@@ -630,7 +685,35 @@ async function startAgentsSdkSession(config, options = {}) {
|
|
|
630
685
|
// ignore URL logging issues
|
|
631
686
|
}
|
|
632
687
|
|
|
633
|
-
|
|
688
|
+
// Attempt WebRTC connection first. For Azure, if it fails (404 — WebRTC not
|
|
689
|
+
// supported), retry with the WebSocket URL so the SDK uses WS transport.
|
|
690
|
+
// Wrap getUserMedia during connect so we can always stop SDK-owned mic tracks
|
|
691
|
+
// on teardown, even if the SDK keeps hidden stream references.
|
|
692
|
+
await _withGetUserMediaCapture(async () => {
|
|
693
|
+
try {
|
|
694
|
+
await session.connect(connectOpts);
|
|
695
|
+
} catch (connectErr) {
|
|
696
|
+
const errMsg = String(connectErr?.message || "");
|
|
697
|
+
const isWebRtc404 = /404|not found|SDP/i.test(errMsg);
|
|
698
|
+
const hasWsUrl = Boolean(String(tokenData?.wsUrl || "").trim());
|
|
699
|
+
if (isWebRtc404 && hasWsUrl && tokenData.provider === "azure") {
|
|
700
|
+
console.warn("[voice-client-sdk] WebRTC connect failed (404) — retrying via Azure WebSocket");
|
|
701
|
+
await session.connect({ ...connectOpts, url: tokenData.wsUrl });
|
|
702
|
+
} else {
|
|
703
|
+
throw connectErr;
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
});
|
|
707
|
+
|
|
708
|
+
// Guard: stopSdkVoiceSession() may have been called while session.connect()
|
|
709
|
+
// was awaiting. Release any mic streams captured during connect so that the
|
|
710
|
+
// browser indicator goes away, then abort this session setup.
|
|
711
|
+
if (_sdkExplicitStop) {
|
|
712
|
+
_stopCapturedSdkMicStreams();
|
|
713
|
+
stopTrackedMicStreams();
|
|
714
|
+
try { session.close?.(); } catch { /* ignore */ }
|
|
715
|
+
throw new Error("SDK session was stopped during connection");
|
|
716
|
+
}
|
|
634
717
|
|
|
635
718
|
if (_agentsRealtimeModuleSource) {
|
|
636
719
|
console.info(`[voice-client-sdk] using OpenAI Realtime SDK from ${_agentsRealtimeModuleSource}`);
|
|
@@ -689,6 +772,7 @@ async function startGeminiLiveSession(config, options = {}) {
|
|
|
689
772
|
executor: _callContext.executor,
|
|
690
773
|
mode: _callContext.mode,
|
|
691
774
|
model: resolvedConfig.model,
|
|
775
|
+
voiceAgentId: _callContext.voiceAgentId || undefined,
|
|
692
776
|
}));
|
|
693
777
|
|
|
694
778
|
_session = ws;
|
|
@@ -757,6 +841,17 @@ async function startGeminiMicCapture(ws) {
|
|
|
757
841
|
channelCount: 1,
|
|
758
842
|
},
|
|
759
843
|
});
|
|
844
|
+
registerMicStream(_geminiMicStream);
|
|
845
|
+
|
|
846
|
+
// Guard: stopSdkVoiceSession() may have raced with this getUserMedia await.
|
|
847
|
+
// Release the mic immediately instead of leaving the indicator active.
|
|
848
|
+
if (_sdkExplicitStop) {
|
|
849
|
+
for (const track of _geminiMicStream.getTracks()) {
|
|
850
|
+
try { track.stop(); } catch { /* ignore */ }
|
|
851
|
+
}
|
|
852
|
+
_geminiMicStream = null;
|
|
853
|
+
throw new Error("SDK session was stopped during microphone acquisition");
|
|
854
|
+
}
|
|
760
855
|
|
|
761
856
|
// Use MediaRecorder to stream chunks to server
|
|
762
857
|
const recorder = new MediaRecorder(_geminiMicStream, {
|
|
@@ -776,45 +871,118 @@ async function startGeminiMicCapture(ws) {
|
|
|
776
871
|
sdkVoiceState.value = "listening";
|
|
777
872
|
}
|
|
778
873
|
|
|
779
|
-
function
|
|
780
|
-
if (!source) return;
|
|
781
|
-
const
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
874
|
+
function forEachAudioTrackInSource(source, cb) {
|
|
875
|
+
if (!source || typeof cb !== "function") return;
|
|
876
|
+
const seenObjects = new Set();
|
|
877
|
+
const seenTracks = new Set();
|
|
878
|
+
const queue = [{ node: source, depth: 0 }];
|
|
879
|
+
let visited = 0;
|
|
880
|
+
|
|
881
|
+
while (queue.length) {
|
|
882
|
+
const { node, depth } = queue.shift();
|
|
883
|
+
if (!node || (typeof node !== "object" && typeof node !== "function")) continue;
|
|
884
|
+
if (seenObjects.has(node)) continue;
|
|
885
|
+
seenObjects.add(node);
|
|
886
|
+
visited += 1;
|
|
887
|
+
if (visited > 220 || depth > 4) continue;
|
|
888
|
+
|
|
889
|
+
if (typeof node?.getTracks === "function") {
|
|
890
|
+
try {
|
|
891
|
+
for (const track of node.getTracks()) {
|
|
892
|
+
if (!track || String(track?.kind || "").toLowerCase() !== "audio") continue;
|
|
893
|
+
if (seenTracks.has(track)) continue;
|
|
894
|
+
seenTracks.add(track);
|
|
895
|
+
cb(track);
|
|
896
|
+
}
|
|
897
|
+
} catch {
|
|
898
|
+
// ignore stream enumeration failures
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
if (typeof node?.getSenders === "function") {
|
|
903
|
+
try {
|
|
904
|
+
for (const sender of node.getSenders()) {
|
|
905
|
+
const track = sender?.track;
|
|
906
|
+
if (!track || String(track?.kind || "").toLowerCase() !== "audio") continue;
|
|
907
|
+
if (seenTracks.has(track)) continue;
|
|
908
|
+
seenTracks.add(track);
|
|
909
|
+
cb(track);
|
|
910
|
+
}
|
|
911
|
+
} catch {
|
|
912
|
+
// ignore pc sender failures
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
let values = null;
|
|
917
|
+
try {
|
|
918
|
+
values = Object.values(node);
|
|
919
|
+
} catch {
|
|
920
|
+
values = null;
|
|
921
|
+
}
|
|
922
|
+
if (!values) continue;
|
|
923
|
+
for (const next of values) {
|
|
924
|
+
if (!next || (typeof next !== "object" && typeof next !== "function")) continue;
|
|
925
|
+
queue.push({ node: next, depth: depth + 1 });
|
|
799
926
|
}
|
|
800
927
|
}
|
|
928
|
+
}
|
|
801
929
|
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
930
|
+
function stopMicLikeTracks(source) {
|
|
931
|
+
forEachAudioTrackInSource(source, (track) => {
|
|
932
|
+
try { track.stop(); } catch { /* ignore */ }
|
|
933
|
+
});
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
function _captureSdkMicStream(stream) {
|
|
937
|
+
if (!stream || typeof stream.getTracks !== "function") return;
|
|
938
|
+
const hasAudio = (stream.getAudioTracks?.() || []).length > 0;
|
|
939
|
+
if (!hasAudio) return;
|
|
940
|
+
_sdkCapturedMicStreams.add(stream);
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
function _stopCapturedSdkMicStreams() {
|
|
944
|
+
for (const stream of _sdkCapturedMicStreams) {
|
|
945
|
+
try {
|
|
946
|
+
for (const track of stream.getTracks()) {
|
|
947
|
+
if (String(track?.kind || "").toLowerCase() !== "audio") continue;
|
|
948
|
+
try { track.stop(); } catch { /* ignore */ }
|
|
949
|
+
}
|
|
950
|
+
} catch {
|
|
951
|
+
// best effort
|
|
816
952
|
}
|
|
817
953
|
}
|
|
954
|
+
_sdkCapturedMicStreams.clear();
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
async function _withGetUserMediaCapture(fn) {
|
|
958
|
+
const mediaDevices = globalThis?.navigator?.mediaDevices;
|
|
959
|
+
const original = mediaDevices?.getUserMedia;
|
|
960
|
+
if (!mediaDevices || typeof original !== "function") {
|
|
961
|
+
return await fn();
|
|
962
|
+
}
|
|
963
|
+
mediaDevices.getUserMedia = async (...args) => {
|
|
964
|
+
const stream = await original.apply(mediaDevices, args);
|
|
965
|
+
_captureSdkMicStream(stream);
|
|
966
|
+
return stream;
|
|
967
|
+
};
|
|
968
|
+
try {
|
|
969
|
+
return await fn();
|
|
970
|
+
} finally {
|
|
971
|
+
mediaDevices.getUserMedia = original;
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
function setMicLikeTracksEnabled(source, enabled) {
|
|
976
|
+
let updated = false;
|
|
977
|
+
forEachAudioTrackInSource(source, (track) => {
|
|
978
|
+
try {
|
|
979
|
+
track.enabled = Boolean(enabled);
|
|
980
|
+
updated = true;
|
|
981
|
+
} catch {
|
|
982
|
+
// ignore per-track failures
|
|
983
|
+
}
|
|
984
|
+
});
|
|
985
|
+
return updated;
|
|
818
986
|
}
|
|
819
987
|
|
|
820
988
|
function handleGeminiServerEvent(msg) {
|
|
@@ -849,6 +1017,7 @@ function handleGeminiServerEvent(msg) {
|
|
|
849
1017
|
break;
|
|
850
1018
|
|
|
851
1019
|
case "speech_started":
|
|
1020
|
+
maybeAutoInterruptSdkResponse("speech-started");
|
|
852
1021
|
sdkVoiceState.value = "listening";
|
|
853
1022
|
emit("speech-started", {});
|
|
854
1023
|
break;
|
|
@@ -888,6 +1057,7 @@ async function handleGeminiToolCall(msg) {
|
|
|
888
1057
|
executor: _callContext.executor || undefined,
|
|
889
1058
|
mode: _callContext.mode || undefined,
|
|
890
1059
|
model: _callContext.model || undefined,
|
|
1060
|
+
voiceAgentId: _callContext.voiceAgentId || undefined,
|
|
891
1061
|
}),
|
|
892
1062
|
});
|
|
893
1063
|
const result = await res.json();
|
|
@@ -955,11 +1125,14 @@ function playGeminiAudio(data) {
|
|
|
955
1125
|
* @returns {Promise<{ sdk: boolean, provider: string }>}
|
|
956
1126
|
*/
|
|
957
1127
|
export async function startSdkVoiceSession(options = {}) {
|
|
1128
|
+
ensureMicTrackingPatched();
|
|
1129
|
+
_sdkExplicitStop = false; // reset before each new session attempt
|
|
958
1130
|
if (_session) {
|
|
959
1131
|
console.warn("[voice-client-sdk] Session already active");
|
|
960
1132
|
return { sdk: sdkVoiceSdkActive.value, provider: sdkVoiceProvider.value };
|
|
961
1133
|
}
|
|
962
1134
|
|
|
1135
|
+
isVoiceMicMuted.value = false;
|
|
963
1136
|
_callContext = _normalizeCallContext(options);
|
|
964
1137
|
sdkVoiceBoundSessionId.value = _callContext.sessionId;
|
|
965
1138
|
sdkVoiceState.value = "connecting";
|
|
@@ -968,6 +1141,7 @@ export async function startSdkVoiceSession(options = {}) {
|
|
|
968
1141
|
sdkVoiceResponse.value = "";
|
|
969
1142
|
sdkVoiceToolCalls.value = [];
|
|
970
1143
|
_usingLegacyFallback = false;
|
|
1144
|
+
_lastAutoBargeInAt = 0;
|
|
971
1145
|
_resetTranscriptPersistenceState();
|
|
972
1146
|
|
|
973
1147
|
try {
|
|
@@ -1018,6 +1192,7 @@ export async function startSdkVoiceSession(options = {}) {
|
|
|
1018
1192
|
sdkVoiceSdkActive.value = false;
|
|
1019
1193
|
sdkVoiceState.value = "idle";
|
|
1020
1194
|
sdkVoiceError.value = null; // Don't show error — we'll fallback
|
|
1195
|
+
_stopCapturedSdkMicStreams();
|
|
1021
1196
|
emit("sdk-unavailable", {
|
|
1022
1197
|
reason: reason || "SDK unavailable",
|
|
1023
1198
|
provider: _sdkConfig?.provider || "unknown",
|
|
@@ -1035,6 +1210,9 @@ export async function startSdkVoiceSession(options = {}) {
|
|
|
1035
1210
|
* Stop the current SDK voice session.
|
|
1036
1211
|
*/
|
|
1037
1212
|
export function stopSdkVoiceSession() {
|
|
1213
|
+
// Set before any cleanup so in-flight getUserMedia / session.connect awaiters
|
|
1214
|
+
// detect the cancellation and release acquired mic tracks immediately.
|
|
1215
|
+
_sdkExplicitStop = true;
|
|
1038
1216
|
emit("session-ending", { sessionId: sdkVoiceSessionId.value });
|
|
1039
1217
|
_flushPendingTranscriptBuffers();
|
|
1040
1218
|
if (_geminiRecorder) {
|
|
@@ -1055,6 +1233,7 @@ export function stopSdkVoiceSession() {
|
|
|
1055
1233
|
}
|
|
1056
1234
|
_session = null;
|
|
1057
1235
|
}
|
|
1236
|
+
_stopCapturedSdkMicStreams();
|
|
1058
1237
|
|
|
1059
1238
|
// Stop Gemini mic stream if active
|
|
1060
1239
|
if (_geminiMicStream) {
|
|
@@ -1063,6 +1242,9 @@ export function stopSdkVoiceSession() {
|
|
|
1063
1242
|
}
|
|
1064
1243
|
_geminiMicStream = null;
|
|
1065
1244
|
}
|
|
1245
|
+
// Force-stop any tracked audio input streams to avoid stale browser mic
|
|
1246
|
+
// capture indicators after call close (covers async/race teardown paths).
|
|
1247
|
+
stopTrackedMicStreams();
|
|
1066
1248
|
|
|
1067
1249
|
clearInterval(_durationTimer);
|
|
1068
1250
|
_durationTimer = null;
|
|
@@ -1076,7 +1258,14 @@ export function stopSdkVoiceSession() {
|
|
|
1076
1258
|
sdkVoiceDuration.value = 0;
|
|
1077
1259
|
sdkVoiceProvider.value = null;
|
|
1078
1260
|
sdkVoiceSdkActive.value = false;
|
|
1079
|
-
|
|
1261
|
+
isVoiceMicMuted.value = false;
|
|
1262
|
+
_callContext = {
|
|
1263
|
+
sessionId: null,
|
|
1264
|
+
executor: null,
|
|
1265
|
+
mode: null,
|
|
1266
|
+
model: null,
|
|
1267
|
+
voiceAgentId: null,
|
|
1268
|
+
};
|
|
1080
1269
|
_usingLegacyFallback = false;
|
|
1081
1270
|
_resetTranscriptPersistenceState();
|
|
1082
1271
|
|
|
@@ -1099,6 +1288,39 @@ export function interruptSdkResponse() {
|
|
|
1099
1288
|
}
|
|
1100
1289
|
}
|
|
1101
1290
|
|
|
1291
|
+
/**
|
|
1292
|
+
* Toggle microphone mute state for SDK-driven voice sessions.
|
|
1293
|
+
* Returns the new muted state.
|
|
1294
|
+
*/
|
|
1295
|
+
export function toggleSdkMicMute() {
|
|
1296
|
+
const willBeMuted = !isVoiceMicMuted.value;
|
|
1297
|
+
const enabled = !willBeMuted;
|
|
1298
|
+
|
|
1299
|
+
if (_session) {
|
|
1300
|
+
// Try SDK-native controls first when available.
|
|
1301
|
+
try {
|
|
1302
|
+
if (enabled && typeof _session.unmute === "function") {
|
|
1303
|
+
_session.unmute();
|
|
1304
|
+
} else if (!enabled && typeof _session.mute === "function") {
|
|
1305
|
+
_session.mute();
|
|
1306
|
+
}
|
|
1307
|
+
} catch {
|
|
1308
|
+
// fall through to track-level toggles
|
|
1309
|
+
}
|
|
1310
|
+
setMicLikeTracksEnabled(_session, enabled);
|
|
1311
|
+
}
|
|
1312
|
+
|
|
1313
|
+
if (_geminiMicStream) {
|
|
1314
|
+
for (const track of _geminiMicStream.getTracks()) {
|
|
1315
|
+
if (String(track?.kind || "").toLowerCase() !== "audio") continue;
|
|
1316
|
+
try { track.enabled = enabled; } catch { /* ignore */ }
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
|
|
1320
|
+
isVoiceMicMuted.value = willBeMuted;
|
|
1321
|
+
return isVoiceMicMuted.value;
|
|
1322
|
+
}
|
|
1323
|
+
|
|
1102
1324
|
/**
|
|
1103
1325
|
* Send a text message to the voice agent.
|
|
1104
1326
|
* @param {string} text
|