bosun 0.37.0 → 0.37.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.env.example +4 -1
  2. package/agent-tool-config.mjs +338 -0
  3. package/bosun-skills.mjs +59 -4
  4. package/bosun.schema.json +1 -1
  5. package/desktop/launch.mjs +18 -0
  6. package/desktop/main.mjs +52 -13
  7. package/fleet-coordinator.mjs +34 -1
  8. package/kanban-adapter.mjs +30 -3
  9. package/library-manager.mjs +66 -0
  10. package/maintenance.mjs +30 -5
  11. package/monitor.mjs +56 -0
  12. package/package.json +4 -1
  13. package/setup-web-server.mjs +73 -12
  14. package/setup.mjs +3 -3
  15. package/ui/app.js +40 -3
  16. package/ui/components/session-list.js +25 -7
  17. package/ui/components/workspace-switcher.js +48 -1
  18. package/ui/demo.html +176 -0
  19. package/ui/modules/mic-track-registry.js +83 -0
  20. package/ui/modules/settings-schema.js +4 -1
  21. package/ui/modules/state.js +25 -0
  22. package/ui/modules/streaming.js +1 -1
  23. package/ui/modules/voice-barge-in.js +27 -0
  24. package/ui/modules/voice-client-sdk.js +268 -42
  25. package/ui/modules/voice-client.js +665 -61
  26. package/ui/modules/voice-overlay.js +829 -47
  27. package/ui/setup.html +151 -9
  28. package/ui/styles.css +258 -0
  29. package/ui/tabs/chat.js +11 -0
  30. package/ui/tabs/library.js +890 -15
  31. package/ui/tabs/settings.js +51 -11
  32. package/ui/tabs/telemetry.js +327 -105
  33. package/ui/tabs/workflows.js +86 -0
  34. package/ui-server.mjs +1201 -107
  35. package/voice-action-dispatcher.mjs +81 -0
  36. package/voice-agents-sdk.mjs +2 -2
  37. package/voice-relay.mjs +131 -14
  38. package/voice-tools.mjs +475 -9
  39. package/workflow-engine.mjs +54 -0
  40. package/workflow-nodes.mjs +177 -28
  41. package/workflow-templates/github.mjs +205 -94
  42. package/workflow-templates/task-batch.mjs +247 -0
  43. package/workflow-templates.mjs +15 -0
@@ -629,7 +629,7 @@ export function startAgentStatusTracking() {
629
629
  const content = String(message.content || "").toLowerCase();
630
630
  const lifecycle = String(message?.meta?.lifecycle || "").toLowerCase();
631
631
  const adapter = payload.session?.type || "";
632
- const sessionId = payload.sessionId || payload.taskId || payload.session?.id || "";
632
+ const sessionId = payload.session?.id || payload.sessionId || payload.taskId || "";
633
633
  const sessionStatus = payload.session?.status || "active";
634
634
 
635
635
  if (sessionStatus !== "active") {
@@ -0,0 +1,27 @@
1
+ /**
2
+ * voice-barge-in.js
3
+ *
4
+ * Shared policy helpers for automatic barge-in (interrupt assistant playback
5
+ * when the user starts speaking).
6
+ */
7
+
8
+ export function shouldAutoBargeIn({
9
+ muted = false,
10
+ audioActive = false,
11
+ now = Date.now(),
12
+ lastTriggeredAt = 0,
13
+ minIntervalMs = 700,
14
+ } = {}) {
15
+ if (muted) return false;
16
+ if (!audioActive) return false;
17
+ const elapsed = Number(now) - Number(lastTriggeredAt || 0);
18
+ return elapsed >= Number(minIntervalMs || 0);
19
+ }
20
+
21
+ export function shouldAutoBargeInFromMicLevel({
22
+ speaking = false,
23
+ level = 0,
24
+ threshold = 0.08,
25
+ } = {}) {
26
+ return Boolean(speaking) && Number(level) >= Number(threshold);
27
+ }
@@ -14,6 +14,13 @@
14
14
  */
15
15
 
16
16
  import { signal, computed } from "@preact/signals";
17
+ import {
18
+ ensureMicTrackingPatched,
19
+ registerMicStream,
20
+ stopTrackedMicStreams,
21
+ } from "./mic-track-registry.js";
22
+ import { shouldAutoBargeIn } from "./voice-barge-in.js";
23
+ import { isVoiceMicMuted } from "./voice-client.js";
17
24
 
18
25
  // ── State Signals (same shape as voice-client.js) ───────────────────────────
19
26
 
@@ -49,6 +56,7 @@ let _callContext = {
49
56
  executor: null,
50
57
  mode: null,
51
58
  model: null,
59
+ voiceAgentId: null,
52
60
  };
53
61
  let _sdkConfig = null;
54
62
  let _usingLegacyFallback = false;
@@ -64,6 +72,13 @@ let _pendingAssistantTranscriptText = "";
64
72
  let _awaitingToolCompletionAck = false;
65
73
  let _toolCompletionAckTimer = null;
66
74
  let _assistantBaselineBeforeToolAck = "";
75
+ const _sdkCapturedMicStreams = new Set();
76
+ let _lastAutoBargeInAt = 0;
77
+ const AUTO_BARGE_IN_COOLDOWN_MS = 700;
78
+ // Set to true by stopSdkVoiceSession() so that any in-flight getUserMedia
79
+ // call in startAgentsSdkSession / startGeminiMicCapture releases the track
80
+ // immediately instead of leaving the browser mic indicator active.
81
+ let _sdkExplicitStop = false;
67
82
 
68
83
  // ── Event System ────────────────────────────────────────────────────────────
69
84
 
@@ -86,12 +101,31 @@ function emit(event, data) {
86
101
  }
87
102
  }
88
103
 
104
+ function maybeAutoInterruptSdkResponse(reason = "speech-started") {
105
+ const now = Date.now();
106
+ if (!shouldAutoBargeIn({
107
+ muted: isVoiceMicMuted.value,
108
+ audioActive: Boolean(_session),
109
+ now,
110
+ lastTriggeredAt: _lastAutoBargeInAt,
111
+ minIntervalMs: AUTO_BARGE_IN_COOLDOWN_MS,
112
+ })) {
113
+ return false;
114
+ }
115
+ _lastAutoBargeInAt = now;
116
+ interruptSdkResponse();
117
+ sdkVoiceState.value = "listening";
118
+ emit("auto-barge-in", { reason });
119
+ return true;
120
+ }
121
+
89
122
  function _normalizeCallContext(options = {}) {
90
123
  return {
91
124
  sessionId: String(options?.sessionId || "").trim() || null,
92
125
  executor: String(options?.executor || "").trim() || null,
93
126
  mode: String(options?.mode || "").trim() || null,
94
127
  model: String(options?.model || "").trim() || null,
128
+ voiceAgentId: String(options?.voiceAgentId || "").trim() || null,
95
129
  };
96
130
  }
97
131
 
@@ -122,6 +156,14 @@ function isNonFatalSdkSessionError(err) {
122
156
  if (/setRemoteDescription/i.test(message) && /SessionDescription/i.test(message)) {
123
157
  return true;
124
158
  }
159
+ // Runtime item-level transcription failures should not hard-fail the live call.
160
+ if (
161
+ lower.includes("input transcription failed")
162
+ || lower.includes("transcription failed for item")
163
+ || lower.includes("input_audio_transcription")
164
+ ) {
165
+ return true;
166
+ }
125
167
  return false;
126
168
  }
127
169
 
@@ -246,7 +288,7 @@ function _flushPendingTranscriptBuffers() {
246
288
  }
247
289
 
248
290
  const finalUser = String(_pendingUserTranscriptText || "").trim();
249
- if (finalUser) {
291
+ if (finalUser && ENABLE_USER_TRANSCRIPT) {
250
292
  _persistTranscriptIfNew("user", finalUser, "sdk.history_updated.user.flush");
251
293
  }
252
294
 
@@ -314,10 +356,13 @@ function _scheduleUserTranscriptFinalize(text) {
314
356
  if (ENABLE_USER_TRANSCRIPT) {
315
357
  sdkVoiceTranscript.value = finalText;
316
358
  emit("transcript", { text: finalText, final: true });
359
+ _persistTranscriptIfNew("user", finalText, "sdk.history_updated.user.final");
317
360
  } else {
318
361
  sdkVoiceTranscript.value = "";
362
+ // Skip persisting user transcript — ASR often hallucinates wrong
363
+ // languages from short fragments; the model still receives the raw
364
+ // audio correctly so nothing is lost.
319
365
  }
320
- _persistTranscriptIfNew("user", finalText, "sdk.history_updated.user.final");
321
366
  }, 350);
322
367
  }
323
368
 
@@ -364,6 +409,7 @@ async function startAgentsSdkSession(config, options = {}) {
364
409
  executor: _callContext.executor || undefined,
365
410
  mode: _callContext.mode || undefined,
366
411
  model: _callContext.model || undefined,
412
+ voiceAgentId: _callContext.voiceAgentId || undefined,
367
413
  delegateOnly: false,
368
414
  sdkMode: true,
369
415
  }),
@@ -396,6 +442,7 @@ async function startAgentsSdkSession(config, options = {}) {
396
442
  executor: _callContext.executor || undefined,
397
443
  mode: _callContext.mode || undefined,
398
444
  model: _callContext.model || undefined,
445
+ voiceAgentId: _callContext.voiceAgentId || undefined,
399
446
  }),
400
447
  });
401
448
  } catch (fetchErr) {
@@ -455,14 +502,20 @@ async function startAgentsSdkSession(config, options = {}) {
455
502
  // Determine model and voice
456
503
  const model = String(tokenData.model || resolvedConfig.model || "gpt-realtime-1.5").trim();
457
504
  const voiceId = String(tokenData.voiceId || resolvedConfig.voiceId || "alloy").trim();
458
- const turnDetection = String(resolvedConfig.turnDetection || "server_vad").trim();
505
+ const turnDetection = String(resolvedConfig.turnDetection || "semantic_vad").trim();
506
+ // Use server-provided transcription model from sessionConfig, fall back to default
507
+ const serverSessionConfig = tokenData?.sessionConfig || {};
508
+ const transcriptionModel =
509
+ serverSessionConfig?.input_audio_transcription?.model || "gpt-4o-transcribe";
510
+ const transcriptionEnabled =
511
+ serverSessionConfig?.input_audio_transcription !== undefined;
459
512
  const turnDetectionConfig = {
460
513
  type: turnDetection,
461
514
  ...(turnDetection === "server_vad"
462
515
  ? {
463
- threshold: 0.35,
516
+ threshold: 0.7,
464
517
  prefix_padding_ms: 400,
465
- silence_duration_ms: 700,
518
+ silence_duration_ms: 1300,
466
519
  create_response: true,
467
520
  interrupt_response: true,
468
521
  createResponse: true,
@@ -488,12 +541,13 @@ async function startAgentsSdkSession(config, options = {}) {
488
541
  audio: {
489
542
  input: {
490
543
  format: "pcm16",
491
- transcription: { model: "gpt-4o-transcribe" },
544
+ ...(transcriptionEnabled ? { transcription: { model: transcriptionModel } } : {}),
492
545
  turnDetection: turnDetectionConfig,
493
546
  },
494
547
  output: {
495
548
  format: "pcm16",
496
549
  voice: voiceId,
550
+ ...(transcriptionEnabled ? { transcription: { model: transcriptionModel } } : {}),
497
551
  },
498
552
  },
499
553
  },
@@ -536,6 +590,11 @@ async function startAgentsSdkSession(config, options = {}) {
536
590
  emit("interrupt", {});
537
591
  });
538
592
 
593
+ session.on("speech_started", () => {
594
+ maybeAutoInterruptSdkResponse("speech-started");
595
+ emit("speech-started", {});
596
+ });
597
+
539
598
  session.on("tool_call_start", (event) => {
540
599
  const callId = event?.callId || event?.call_id || `tc-${Date.now()}`;
541
600
  const name = event?.name || event?.toolName || "unknown";
@@ -626,7 +685,35 @@ async function startAgentsSdkSession(config, options = {}) {
626
685
  // ignore URL logging issues
627
686
  }
628
687
 
629
- await session.connect(connectOpts);
688
+ // Attempt WebRTC connection first. For Azure, if it fails (404 — WebRTC not
689
+ // supported), retry with the WebSocket URL so the SDK uses WS transport.
690
+ // Wrap getUserMedia during connect so we can always stop SDK-owned mic tracks
691
+ // on teardown, even if the SDK keeps hidden stream references.
692
+ await _withGetUserMediaCapture(async () => {
693
+ try {
694
+ await session.connect(connectOpts);
695
+ } catch (connectErr) {
696
+ const errMsg = String(connectErr?.message || "");
697
+ const isWebRtc404 = /404|not found|SDP/i.test(errMsg);
698
+ const hasWsUrl = Boolean(String(tokenData?.wsUrl || "").trim());
699
+ if (isWebRtc404 && hasWsUrl && tokenData.provider === "azure") {
700
+ console.warn("[voice-client-sdk] WebRTC connect failed (404) — retrying via Azure WebSocket");
701
+ await session.connect({ ...connectOpts, url: tokenData.wsUrl });
702
+ } else {
703
+ throw connectErr;
704
+ }
705
+ }
706
+ });
707
+
708
+ // Guard: stopSdkVoiceSession() may have been called while session.connect()
709
+ // was awaiting. Release any mic streams captured during connect so that the
710
+ // browser indicator goes away, then abort this session setup.
711
+ if (_sdkExplicitStop) {
712
+ _stopCapturedSdkMicStreams();
713
+ stopTrackedMicStreams();
714
+ try { session.close?.(); } catch { /* ignore */ }
715
+ throw new Error("SDK session was stopped during connection");
716
+ }
630
717
 
631
718
  if (_agentsRealtimeModuleSource) {
632
719
  console.info(`[voice-client-sdk] using OpenAI Realtime SDK from ${_agentsRealtimeModuleSource}`);
@@ -685,6 +772,7 @@ async function startGeminiLiveSession(config, options = {}) {
685
772
  executor: _callContext.executor,
686
773
  mode: _callContext.mode,
687
774
  model: resolvedConfig.model,
775
+ voiceAgentId: _callContext.voiceAgentId || undefined,
688
776
  }));
689
777
 
690
778
  _session = ws;
@@ -753,6 +841,17 @@ async function startGeminiMicCapture(ws) {
753
841
  channelCount: 1,
754
842
  },
755
843
  });
844
+ registerMicStream(_geminiMicStream);
845
+
846
+ // Guard: stopSdkVoiceSession() may have raced with this getUserMedia await.
847
+ // Release the mic immediately instead of leaving the indicator active.
848
+ if (_sdkExplicitStop) {
849
+ for (const track of _geminiMicStream.getTracks()) {
850
+ try { track.stop(); } catch { /* ignore */ }
851
+ }
852
+ _geminiMicStream = null;
853
+ throw new Error("SDK session was stopped during microphone acquisition");
854
+ }
756
855
 
757
856
  // Use MediaRecorder to stream chunks to server
758
857
  const recorder = new MediaRecorder(_geminiMicStream, {
@@ -772,45 +871,118 @@ async function startGeminiMicCapture(ws) {
772
871
  sdkVoiceState.value = "listening";
773
872
  }
774
873
 
775
- function stopMicLikeTracks(source) {
776
- if (!source) return;
777
- const streams = [
778
- source,
779
- source?.stream,
780
- source?.localStream,
781
- source?.mediaStream,
782
- source?._mediaStream,
783
- source?.audioInputStream,
784
- source?.transport?.stream,
785
- source?.transport?.localStream,
786
- source?.transport?.mediaStream,
787
- source?.transport?._mediaStream,
788
- ].filter(Boolean);
789
-
790
- for (const stream of streams) {
791
- if (typeof stream?.getTracks !== "function") continue;
792
- for (const track of stream.getTracks()) {
793
- if (String(track?.kind || "").toLowerCase() !== "audio") continue;
794
- try { track.stop(); } catch { /* ignore */ }
874
+ function forEachAudioTrackInSource(source, cb) {
875
+ if (!source || typeof cb !== "function") return;
876
+ const seenObjects = new Set();
877
+ const seenTracks = new Set();
878
+ const queue = [{ node: source, depth: 0 }];
879
+ let visited = 0;
880
+
881
+ while (queue.length) {
882
+ const { node, depth } = queue.shift();
883
+ if (!node || (typeof node !== "object" && typeof node !== "function")) continue;
884
+ if (seenObjects.has(node)) continue;
885
+ seenObjects.add(node);
886
+ visited += 1;
887
+ if (visited > 220 || depth > 4) continue;
888
+
889
+ if (typeof node?.getTracks === "function") {
890
+ try {
891
+ for (const track of node.getTracks()) {
892
+ if (!track || String(track?.kind || "").toLowerCase() !== "audio") continue;
893
+ if (seenTracks.has(track)) continue;
894
+ seenTracks.add(track);
895
+ cb(track);
896
+ }
897
+ } catch {
898
+ // ignore stream enumeration failures
899
+ }
900
+ }
901
+
902
+ if (typeof node?.getSenders === "function") {
903
+ try {
904
+ for (const sender of node.getSenders()) {
905
+ const track = sender?.track;
906
+ if (!track || String(track?.kind || "").toLowerCase() !== "audio") continue;
907
+ if (seenTracks.has(track)) continue;
908
+ seenTracks.add(track);
909
+ cb(track);
910
+ }
911
+ } catch {
912
+ // ignore pc sender failures
913
+ }
914
+ }
915
+
916
+ let values = null;
917
+ try {
918
+ values = Object.values(node);
919
+ } catch {
920
+ values = null;
921
+ }
922
+ if (!values) continue;
923
+ for (const next of values) {
924
+ if (!next || (typeof next !== "object" && typeof next !== "function")) continue;
925
+ queue.push({ node: next, depth: depth + 1 });
795
926
  }
796
927
  }
928
+ }
797
929
 
798
- const pcs = [
799
- source?.pc,
800
- source?._pc,
801
- source?.peerConnection,
802
- source?.transport?.pc,
803
- source?.transport?._pc,
804
- source?.transport?.peerConnection,
805
- ].filter(Boolean);
806
- for (const pc of pcs) {
807
- if (typeof pc?.getSenders !== "function") continue;
808
- for (const sender of pc.getSenders()) {
809
- const track = sender?.track;
810
- if (!track || String(track.kind || "").toLowerCase() !== "audio") continue;
811
- try { track.stop(); } catch { /* ignore */ }
930
+ function stopMicLikeTracks(source) {
931
+ forEachAudioTrackInSource(source, (track) => {
932
+ try { track.stop(); } catch { /* ignore */ }
933
+ });
934
+ }
935
+
936
+ function _captureSdkMicStream(stream) {
937
+ if (!stream || typeof stream.getTracks !== "function") return;
938
+ const hasAudio = (stream.getAudioTracks?.() || []).length > 0;
939
+ if (!hasAudio) return;
940
+ _sdkCapturedMicStreams.add(stream);
941
+ }
942
+
943
+ function _stopCapturedSdkMicStreams() {
944
+ for (const stream of _sdkCapturedMicStreams) {
945
+ try {
946
+ for (const track of stream.getTracks()) {
947
+ if (String(track?.kind || "").toLowerCase() !== "audio") continue;
948
+ try { track.stop(); } catch { /* ignore */ }
949
+ }
950
+ } catch {
951
+ // best effort
812
952
  }
813
953
  }
954
+ _sdkCapturedMicStreams.clear();
955
+ }
956
+
957
+ async function _withGetUserMediaCapture(fn) {
958
+ const mediaDevices = globalThis?.navigator?.mediaDevices;
959
+ const original = mediaDevices?.getUserMedia;
960
+ if (!mediaDevices || typeof original !== "function") {
961
+ return await fn();
962
+ }
963
+ mediaDevices.getUserMedia = async (...args) => {
964
+ const stream = await original.apply(mediaDevices, args);
965
+ _captureSdkMicStream(stream);
966
+ return stream;
967
+ };
968
+ try {
969
+ return await fn();
970
+ } finally {
971
+ mediaDevices.getUserMedia = original;
972
+ }
973
+ }
974
+
975
+ function setMicLikeTracksEnabled(source, enabled) {
976
+ let updated = false;
977
+ forEachAudioTrackInSource(source, (track) => {
978
+ try {
979
+ track.enabled = Boolean(enabled);
980
+ updated = true;
981
+ } catch {
982
+ // ignore per-track failures
983
+ }
984
+ });
985
+ return updated;
814
986
  }
815
987
 
816
988
  function handleGeminiServerEvent(msg) {
@@ -845,6 +1017,7 @@ function handleGeminiServerEvent(msg) {
845
1017
  break;
846
1018
 
847
1019
  case "speech_started":
1020
+ maybeAutoInterruptSdkResponse("speech-started");
848
1021
  sdkVoiceState.value = "listening";
849
1022
  emit("speech-started", {});
850
1023
  break;
@@ -884,6 +1057,7 @@ async function handleGeminiToolCall(msg) {
884
1057
  executor: _callContext.executor || undefined,
885
1058
  mode: _callContext.mode || undefined,
886
1059
  model: _callContext.model || undefined,
1060
+ voiceAgentId: _callContext.voiceAgentId || undefined,
887
1061
  }),
888
1062
  });
889
1063
  const result = await res.json();
@@ -951,11 +1125,14 @@ function playGeminiAudio(data) {
951
1125
  * @returns {Promise<{ sdk: boolean, provider: string }>}
952
1126
  */
953
1127
  export async function startSdkVoiceSession(options = {}) {
1128
+ ensureMicTrackingPatched();
1129
+ _sdkExplicitStop = false; // reset before each new session attempt
954
1130
  if (_session) {
955
1131
  console.warn("[voice-client-sdk] Session already active");
956
1132
  return { sdk: sdkVoiceSdkActive.value, provider: sdkVoiceProvider.value };
957
1133
  }
958
1134
 
1135
+ isVoiceMicMuted.value = false;
959
1136
  _callContext = _normalizeCallContext(options);
960
1137
  sdkVoiceBoundSessionId.value = _callContext.sessionId;
961
1138
  sdkVoiceState.value = "connecting";
@@ -964,6 +1141,7 @@ export async function startSdkVoiceSession(options = {}) {
964
1141
  sdkVoiceResponse.value = "";
965
1142
  sdkVoiceToolCalls.value = [];
966
1143
  _usingLegacyFallback = false;
1144
+ _lastAutoBargeInAt = 0;
967
1145
  _resetTranscriptPersistenceState();
968
1146
 
969
1147
  try {
@@ -1014,6 +1192,7 @@ export async function startSdkVoiceSession(options = {}) {
1014
1192
  sdkVoiceSdkActive.value = false;
1015
1193
  sdkVoiceState.value = "idle";
1016
1194
  sdkVoiceError.value = null; // Don't show error — we'll fallback
1195
+ _stopCapturedSdkMicStreams();
1017
1196
  emit("sdk-unavailable", {
1018
1197
  reason: reason || "SDK unavailable",
1019
1198
  provider: _sdkConfig?.provider || "unknown",
@@ -1031,6 +1210,9 @@ export async function startSdkVoiceSession(options = {}) {
1031
1210
  * Stop the current SDK voice session.
1032
1211
  */
1033
1212
  export function stopSdkVoiceSession() {
1213
+ // Set before any cleanup so in-flight getUserMedia / session.connect awaiters
1214
+ // detect the cancellation and release acquired mic tracks immediately.
1215
+ _sdkExplicitStop = true;
1034
1216
  emit("session-ending", { sessionId: sdkVoiceSessionId.value });
1035
1217
  _flushPendingTranscriptBuffers();
1036
1218
  if (_geminiRecorder) {
@@ -1051,6 +1233,7 @@ export function stopSdkVoiceSession() {
1051
1233
  }
1052
1234
  _session = null;
1053
1235
  }
1236
+ _stopCapturedSdkMicStreams();
1054
1237
 
1055
1238
  // Stop Gemini mic stream if active
1056
1239
  if (_geminiMicStream) {
@@ -1059,6 +1242,9 @@ export function stopSdkVoiceSession() {
1059
1242
  }
1060
1243
  _geminiMicStream = null;
1061
1244
  }
1245
+ // Force-stop any tracked audio input streams to avoid stale browser mic
1246
+ // capture indicators after call close (covers async/race teardown paths).
1247
+ stopTrackedMicStreams();
1062
1248
 
1063
1249
  clearInterval(_durationTimer);
1064
1250
  _durationTimer = null;
@@ -1072,7 +1258,14 @@ export function stopSdkVoiceSession() {
1072
1258
  sdkVoiceDuration.value = 0;
1073
1259
  sdkVoiceProvider.value = null;
1074
1260
  sdkVoiceSdkActive.value = false;
1075
- _callContext = { sessionId: null, executor: null, mode: null, model: null };
1261
+ isVoiceMicMuted.value = false;
1262
+ _callContext = {
1263
+ sessionId: null,
1264
+ executor: null,
1265
+ mode: null,
1266
+ model: null,
1267
+ voiceAgentId: null,
1268
+ };
1076
1269
  _usingLegacyFallback = false;
1077
1270
  _resetTranscriptPersistenceState();
1078
1271
 
@@ -1095,6 +1288,39 @@ export function interruptSdkResponse() {
1095
1288
  }
1096
1289
  }
1097
1290
 
1291
+ /**
1292
+ * Toggle microphone mute state for SDK-driven voice sessions.
1293
+ * Returns the new muted state.
1294
+ */
1295
+ export function toggleSdkMicMute() {
1296
+ const willBeMuted = !isVoiceMicMuted.value;
1297
+ const enabled = !willBeMuted;
1298
+
1299
+ if (_session) {
1300
+ // Try SDK-native controls first when available.
1301
+ try {
1302
+ if (enabled && typeof _session.unmute === "function") {
1303
+ _session.unmute();
1304
+ } else if (!enabled && typeof _session.mute === "function") {
1305
+ _session.mute();
1306
+ }
1307
+ } catch {
1308
+ // fall through to track-level toggles
1309
+ }
1310
+ setMicLikeTracksEnabled(_session, enabled);
1311
+ }
1312
+
1313
+ if (_geminiMicStream) {
1314
+ for (const track of _geminiMicStream.getTracks()) {
1315
+ if (String(track?.kind || "").toLowerCase() !== "audio") continue;
1316
+ try { track.enabled = enabled; } catch { /* ignore */ }
1317
+ }
1318
+ }
1319
+
1320
+ isVoiceMicMuted.value = willBeMuted;
1321
+ return isVoiceMicMuted.value;
1322
+ }
1323
+
1098
1324
  /**
1099
1325
  * Send a text message to the voice agent.
1100
1326
  * @param {string} text