bosun 0.37.1 → 0.37.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -442,6 +442,9 @@ function buildStableSetupDefaults({
442
442
  voiceTurnDetection: "semantic_vad",
443
443
  voiceFallbackMode: "browser",
444
444
  voiceDelegateExecutor: "codex-sdk",
445
+ voiceTranscriptionEnabled: true,
446
+ voiceTranscriptionModel: "gpt-4o-transcribe",
447
+ voiceAzureTranscriptionEnabled: false,
445
448
  openaiRealtimeApiKey: "",
446
449
  azureOpenaiRealtimeEndpoint: "",
447
450
  azureOpenaiRealtimeApiKey: "",
@@ -914,6 +917,32 @@ function applyNonBlockingSetupEnvDefaults(envMap, env = {}, sourceEnv = process.
914
917
  ["codex-sdk", "copilot-sdk", "claude-sdk", "gemini-sdk", "opencode-sdk"],
915
918
  "codex-sdk",
916
919
  );
920
+ envMap.VOICE_TRANSCRIPTION_ENABLED = toBooleanEnvString(
921
+ pickNonEmptyValue(
922
+ env.voiceTranscriptionEnabled,
923
+ env.VOICE_TRANSCRIPTION_ENABLED,
924
+ envMap.VOICE_TRANSCRIPTION_ENABLED,
925
+ sourceEnv.VOICE_TRANSCRIPTION_ENABLED,
926
+ ),
927
+ true,
928
+ );
929
+ envMap.VOICE_TRANSCRIPTION_MODEL = String(
930
+ pickNonEmptyValue(
931
+ env.voiceTranscriptionModel,
932
+ env.VOICE_TRANSCRIPTION_MODEL,
933
+ envMap.VOICE_TRANSCRIPTION_MODEL,
934
+ sourceEnv.VOICE_TRANSCRIPTION_MODEL,
935
+ ) || "gpt-4o-transcribe",
936
+ ).trim() || "gpt-4o-transcribe";
937
+ envMap.VOICE_AZURE_TRANSCRIPTION_ENABLED = toBooleanEnvString(
938
+ pickNonEmptyValue(
939
+ env.voiceAzureTranscriptionEnabled,
940
+ env.VOICE_AZURE_TRANSCRIPTION_ENABLED,
941
+ envMap.VOICE_AZURE_TRANSCRIPTION_ENABLED,
942
+ sourceEnv.VOICE_AZURE_TRANSCRIPTION_ENABLED,
943
+ ),
944
+ false,
945
+ );
917
946
 
918
947
  const openaiRealtimeApiKey = pickNonEmptyValue(
919
948
  env.openaiRealtimeApiKey,
@@ -1481,14 +1510,12 @@ async function handleVoiceEndpointTest(body) {
1481
1510
  // Strip path suffix so users can paste full URLs without double-path 404s.
1482
1511
  let base = String(azureEndpoint).replace(/\/+$/, "");
1483
1512
  try { const u = new URL(base); base = `${u.protocol}//${u.host}`; } catch { /* keep as-is */ }
1484
- // Single-deployment GET only requires Cognitive Services User role.
1485
- // Use the GA api-version (2024-10-21) for broad compatibility across
1486
- // classic Azure OpenAI and Azure AI Foundry resources.
1487
- const dep = String(deployment || "").trim();
1488
- testUrl = dep
1489
- ? `${base}/openai/deployments/${encodeURIComponent(dep)}?api-version=2024-10-21`
1490
- : `${base}/openai/models?api-version=2024-10-21`;
1513
+ // Prefer /openai/models as a credential check it works on both classic
1514
+ // Azure OpenAI resources AND Azure AI Foundry "Global Standard" deployments.
1515
+ // If a deployment name is provided, we verify it separately after confirming
1516
+ // the endpoint + key are valid.
1491
1517
  headers["api-key"] = apiKey;
1518
+ testUrl = `${base}/openai/models?api-version=2024-10-21`;
1492
1519
  } else if (normalizedProvider === "claude") {
1493
1520
  testUrl = "https://api.anthropic.com/v1/models";
1494
1521
  headers["anthropic-version"] = "2023-06-01";
@@ -1525,6 +1552,36 @@ async function handleVoiceEndpointTest(body) {
1525
1552
  clearTimeout(timer);
1526
1553
  const latencyMs = Date.now() - start;
1527
1554
  if (resp.ok || resp.status === 200) {
1555
+ // For Azure with a deployment name, do a secondary check to verify the
1556
+ // deployment exists. We try chat/completions with max_tokens=1 — realtime
1557
+ // deployments return 400 (wrong endpoint type) which still confirms existence.
1558
+ if (normalizedProvider === "azure" && deployment) {
1559
+ const dep = String(deployment).trim();
1560
+ try {
1561
+ let base = String(azureEndpoint).replace(/\/+$/, "");
1562
+ try { const u = new URL(base); base = `${u.protocol}//${u.host}`; } catch { /* keep */ }
1563
+ const depUrl = `${base}/openai/deployments/${encodeURIComponent(dep)}/chat/completions?api-version=2024-10-21`;
1564
+ const depCtrl = new AbortController();
1565
+ const depTimer = setTimeout(() => depCtrl.abort(), 8_000);
1566
+ const depResp = await fetch(depUrl, {
1567
+ method: "POST",
1568
+ headers: { ...headers, "Content-Type": "application/json" },
1569
+ body: JSON.stringify({ messages: [{ role: "user", content: "test" }], max_tokens: 1 }),
1570
+ signal: depCtrl.signal,
1571
+ });
1572
+ clearTimeout(depTimer);
1573
+ // 200 = chat model works, 400 = realtime model (expected), both confirm deployment exists
1574
+ if (depResp.ok || depResp.status === 400) {
1575
+ return { ok: true, latencyMs, deployment: dep };
1576
+ }
1577
+ if (depResp.status === 404) {
1578
+ return { ok: false, error: `Credentials valid but deployment "${dep}" not found — check the deployment name in Azure AI Foundry`, latencyMs };
1579
+ }
1580
+ return { ok: true, latencyMs, warning: `Credentials valid. Could not verify deployment "${dep}" (HTTP ${depResp.status})` };
1581
+ } catch {
1582
+ return { ok: true, latencyMs, warning: `Credentials valid. Could not verify deployment "${dep}" (timeout)` };
1583
+ }
1584
+ }
1528
1585
  return { ok: true, latencyMs };
1529
1586
  }
1530
1587
  const text = await resp.text().catch(() => "");
@@ -1535,9 +1592,13 @@ async function handleVoiceEndpointTest(body) {
1535
1592
  } catch {
1536
1593
  // Keep generic HTTP status message.
1537
1594
  }
1538
- // Friendly message when the deployment name itself is not found (key is fine)
1539
- if (resp.status === 404 && deployment) {
1540
- error = `Deployment "${deployment}" not found check deployment name in Azure AI Foundry`;
1595
+ // Azure-specific: provide helpful messages for common errors
1596
+ if (normalizedProvider === "azure") {
1597
+ if (resp.status === 401 || resp.status === 403) {
1598
+ error = `Authentication failed (HTTP ${resp.status}) — check API key and endpoint URL`;
1599
+ } else if (resp.status === 404) {
1600
+ error = `Endpoint not found (HTTP 404) — check the Azure endpoint URL. Use https://<resource>.openai.azure.com`;
1601
+ }
1541
1602
  }
1542
1603
  return { ok: false, error, latencyMs };
1543
1604
  } catch (err) {
package/ui/app.js CHANGED
@@ -70,6 +70,7 @@ const VOICE_LAUNCH_QUERY_KEYS = [
70
70
  "executor",
71
71
  "mode",
72
72
  "model",
73
+ "voiceAgentId",
73
74
  "vision",
74
75
  "source",
75
76
  "chat_id",
@@ -127,6 +128,7 @@ function parseVoiceLaunchFromUrl() {
127
128
  executor: String(params.get("executor") || "").trim() || null,
128
129
  mode: String(params.get("mode") || "").trim() || null,
129
130
  model: String(params.get("model") || "").trim() || null,
131
+ voiceAgentId: String(params.get("voiceAgentId") || "").trim() || null,
130
132
  },
131
133
  };
132
134
  }
@@ -166,11 +168,13 @@ function buildBrowserFollowUrl(detail = {}) {
166
168
  const executor = String(detail?.executor || "").trim();
167
169
  const mode = String(detail?.mode || "").trim();
168
170
  const model = String(detail?.model || "").trim();
171
+ const voiceAgentId = String(detail?.voiceAgentId || "").trim();
169
172
  const vision = String(detail?.initialVisionSource || "").trim();
170
173
  if (sessionId) target.searchParams.set("sessionId", sessionId);
171
174
  if (executor) target.searchParams.set("executor", executor);
172
175
  if (mode) target.searchParams.set("mode", mode);
173
176
  if (model) target.searchParams.set("model", model);
177
+ if (voiceAgentId) target.searchParams.set("voiceAgentId", voiceAgentId);
174
178
  if (vision) target.searchParams.set("vision", vision);
175
179
  return target.toString();
176
180
  }
@@ -1464,12 +1468,14 @@ function App() {
1464
1468
  const [voiceExecutor, setVoiceExecutor] = useState(null);
1465
1469
  const [voiceAgentMode, setVoiceAgentMode] = useState(null);
1466
1470
  const [voiceModel, setVoiceModel] = useState(null);
1471
+ const [voiceAgentId, setVoiceAgentId] = useState(null);
1467
1472
  const [voiceCallType, setVoiceCallType] = useState("voice");
1468
1473
  const [voiceInitialVisionSource, setVoiceInitialVisionSource] = useState(
1469
1474
  null,
1470
1475
  );
1471
1476
  const followWindowMode = isFollowWindowFromUrl();
1472
1477
  const followOverlayOpenedRef = useRef(false);
1478
+ const externalizeInFlightRef = useRef(false);
1473
1479
  const [floatingCallState, setFloatingCallState] = useState(() =>
1474
1480
  readFloatingCallState(),
1475
1481
  );
@@ -1805,6 +1811,9 @@ function App() {
1805
1811
  const currentModel =
1806
1812
  String(event?.detail?.model || selectedModel.value || "").trim() ||
1807
1813
  null;
1814
+ const currentVoiceAgentId =
1815
+ String(event?.detail?.voiceAgentId || voiceAgentId || "").trim() ||
1816
+ null;
1808
1817
  const explicitSessionId =
1809
1818
  String(event?.detail?.sessionId || "").trim() || null;
1810
1819
  let currentSessionId =
@@ -1836,6 +1845,7 @@ function App() {
1836
1845
  setVoiceExecutor(currentExecutor);
1837
1846
  setVoiceAgentMode(currentMode);
1838
1847
  setVoiceModel(currentModel);
1848
+ setVoiceAgentId(currentVoiceAgentId);
1839
1849
  setVoiceCallType(requestedCallType);
1840
1850
  setVoiceInitialVisionSource(requestedVisionSource);
1841
1851
 
@@ -1859,6 +1869,7 @@ function App() {
1859
1869
  executor: currentExecutor || undefined,
1860
1870
  mode: currentMode || undefined,
1861
1871
  model: currentModel || undefined,
1872
+ voiceAgentId: currentVoiceAgentId || undefined,
1862
1873
  });
1863
1874
  if (followResult?.ok) {
1864
1875
  const nextFloatingState = {
@@ -1868,6 +1879,7 @@ function App() {
1868
1879
  executor: currentExecutor,
1869
1880
  mode: currentMode,
1870
1881
  model: currentModel,
1882
+ voiceAgentId: currentVoiceAgentId,
1871
1883
  initialVisionSource: requestedVisionSource,
1872
1884
  };
1873
1885
  setFloatingCallState(nextFloatingState);
@@ -1890,7 +1902,7 @@ function App() {
1890
1902
  globalThis.addEventListener?.("ve:open-voice-mode", handleOpenVoiceMode);
1891
1903
  return () =>
1892
1904
  globalThis.removeEventListener?.("ve:open-voice-mode", handleOpenVoiceMode);
1893
- }, [followWindowMode]);
1905
+ }, [followWindowMode, voiceAgentId]);
1894
1906
 
1895
1907
  useEffect(() => {
1896
1908
  const onStorage = (event) => {
@@ -1912,6 +1924,7 @@ function App() {
1912
1924
  executor: voiceExecutor,
1913
1925
  mode: voiceAgentMode,
1914
1926
  model: voiceModel,
1927
+ voiceAgentId,
1915
1928
  initialVisionSource: voiceInitialVisionSource,
1916
1929
  };
1917
1930
  setFloatingCallState(nextFloatingState);
@@ -1924,6 +1937,7 @@ function App() {
1924
1937
  voiceExecutor,
1925
1938
  voiceAgentMode,
1926
1939
  voiceModel,
1940
+ voiceAgentId,
1927
1941
  voiceInitialVisionSource,
1928
1942
  ]);
1929
1943
 
@@ -1937,6 +1951,7 @@ function App() {
1937
1951
  executor: voiceExecutor,
1938
1952
  mode: voiceAgentMode,
1939
1953
  model: voiceModel,
1954
+ voiceAgentId,
1940
1955
  initialVisionSource: voiceInitialVisionSource,
1941
1956
  };
1942
1957
  setFloatingCallState(nextFloatingState);
@@ -1951,6 +1966,7 @@ function App() {
1951
1966
  voiceExecutor,
1952
1967
  voiceAgentMode,
1953
1968
  voiceModel,
1969
+ voiceAgentId,
1954
1970
  voiceInitialVisionSource,
1955
1971
  ]);
1956
1972
 
@@ -2002,7 +2018,10 @@ function App() {
2002
2018
  navigateTo("chat", { replace: true, skipGuard: true });
2003
2019
  }
2004
2020
  }
2005
- await new Promise((resolve) => setTimeout(resolve, 60));
2021
+ // Wait for UI components to mount before dispatching the voice launch
2022
+ // event. 60 ms was too aggressive for cold-start Electron windows where
2023
+ // JS bundles are still being parsed; 200 ms is reliably sufficient.
2024
+ await new Promise((resolve) => setTimeout(resolve, 200));
2006
2025
  if (cancelled) return;
2007
2026
  globalThis.dispatchEvent?.(
2008
2027
  new CustomEvent("ve:open-voice-mode", { detail: launch.detail }),
@@ -2341,6 +2360,7 @@ function App() {
2341
2360
  executor: floatingCallState?.executor,
2342
2361
  mode: floatingCallState?.mode,
2343
2362
  model: floatingCallState?.model,
2363
+ voiceAgentId: floatingCallState?.voiceAgentId,
2344
2364
  });
2345
2365
  if (!popupResult.ok) {
2346
2366
  showToast(
@@ -2366,6 +2386,10 @@ function App() {
2366
2386
  onDismiss=${(detail = {}) => {
2367
2387
  const reason = String(detail?.reason || "").trim().toLowerCase();
2368
2388
  if (!followWindowMode && reason === "externalize") {
2389
+ if (externalizeInFlightRef.current) {
2390
+ return;
2391
+ }
2392
+ externalizeInFlightRef.current = true;
2369
2393
  const followDetail = {
2370
2394
  call: voiceCallType,
2371
2395
  sessionId: voiceSessionId,
@@ -2373,6 +2397,7 @@ function App() {
2373
2397
  executor: voiceExecutor,
2374
2398
  mode: voiceAgentMode,
2375
2399
  model: voiceModel,
2400
+ voiceAgentId,
2376
2401
  };
2377
2402
  const desktopFollowApi = globalThis?.veDesktop?.follow;
2378
2403
  if (typeof desktopFollowApi?.open === "function") {
@@ -2390,13 +2415,17 @@ function App() {
2390
2415
  executor: followDetail.executor,
2391
2416
  mode: followDetail.mode,
2392
2417
  model: followDetail.model,
2418
+ voiceAgentId: followDetail.voiceAgentId,
2393
2419
  initialVisionSource: followDetail.initialVisionSource,
2394
2420
  };
2395
2421
  setFloatingCallState(nextFloatingState);
2396
2422
  writeFloatingCallState(nextFloatingState);
2397
2423
  setVoiceOverlayOpen(false);
2398
2424
  })
2399
- .catch(() => showToast("Could not open floating call window.", "error"));
2425
+ .catch(() => showToast("Could not open floating call window.", "error"))
2426
+ .finally(() => {
2427
+ externalizeInFlightRef.current = false;
2428
+ });
2400
2429
  return;
2401
2430
  }
2402
2431
  const popupResult = openBrowserFollowWindow(followDetail);
@@ -2405,6 +2434,7 @@ function App() {
2405
2434
  popupResult.reason || "Could not open floating browser call window.",
2406
2435
  "error",
2407
2436
  );
2437
+ externalizeInFlightRef.current = false;
2408
2438
  return;
2409
2439
  }
2410
2440
  const nextFloatingState = {
@@ -2414,13 +2444,16 @@ function App() {
2414
2444
  executor: followDetail.executor,
2415
2445
  mode: followDetail.mode,
2416
2446
  model: followDetail.model,
2447
+ voiceAgentId: followDetail.voiceAgentId,
2417
2448
  initialVisionSource: followDetail.initialVisionSource,
2418
2449
  };
2419
2450
  setFloatingCallState(nextFloatingState);
2420
2451
  writeFloatingCallState(nextFloatingState);
2421
2452
  setVoiceOverlayOpen(false);
2453
+ externalizeInFlightRef.current = false;
2422
2454
  return;
2423
2455
  }
2456
+ externalizeInFlightRef.current = false;
2424
2457
  if (followWindowMode && globalThis?.veDesktop?.follow?.hide) {
2425
2458
  globalThis.veDesktop.follow.hide().catch(() => {});
2426
2459
  return;
@@ -2432,6 +2465,10 @@ function App() {
2432
2465
  executor=${voiceExecutor}
2433
2466
  mode=${voiceAgentMode}
2434
2467
  model=${voiceModel}
2468
+ voiceAgentId=${voiceAgentId}
2469
+ onVoiceAgentChange=${(nextAgentId) => {
2470
+ setVoiceAgentId(String(nextAgentId || "").trim() || null);
2471
+ }}
2435
2472
  callType=${voiceCallType}
2436
2473
  initialVisionSource=${voiceInitialVisionSource}
2437
2474
  compact=${followWindowMode}
@@ -36,9 +36,19 @@ function sessionPath(id, action = "") {
36
36
 
37
37
  /* ─── Data loaders ─── */
38
38
  export async function loadSessions(filter = {}) {
39
- _lastLoadFilter = filter;
39
+ const normalizedFilter = {
40
+ ...(filter && typeof filter === "object" ? filter : {}),
41
+ };
42
+ if (!Object.prototype.hasOwnProperty.call(normalizedFilter, "workspace")) {
43
+ normalizedFilter.workspace = "active";
44
+ }
45
+ _lastLoadFilter = normalizedFilter;
40
46
  try {
41
- const params = new URLSearchParams(filter);
47
+ const params = new URLSearchParams();
48
+ for (const [key, value] of Object.entries(normalizedFilter)) {
49
+ if (value == null || value === "") continue;
50
+ params.set(key, String(value));
51
+ }
42
52
  const res = await apiFetch(`/api/sessions?${params}`, { _silent: true });
43
53
  if (res?.sessions) sessionsData.value = res.sessions;
44
54
  sessionsError.value = null;
@@ -317,11 +327,19 @@ export function initSessionWsListener() {
317
327
  if (_wsListenerReady) return;
318
328
  _wsListenerReady = true;
319
329
  onWsMessage((msg) => {
320
- if (msg?.type !== "session-message") return;
321
- const payload = msg.payload || {};
322
- const sessionId = payload.sessionId || payload.taskId;
323
- if (!sessionId) return;
324
- appendSessionMessage(sessionId, payload.message, payload.session);
330
+ if (msg?.type === "session-message") {
331
+ const payload = msg.payload || {};
332
+ const sessionId = payload.sessionId || payload.taskId;
333
+ if (!sessionId) return;
334
+ appendSessionMessage(sessionId, payload.message, payload.session);
335
+ return;
336
+ }
337
+ if (msg?.type === "invalidate") {
338
+ const channels = Array.isArray(msg.channels) ? msg.channels : [];
339
+ if (channels.includes("*") || channels.includes("sessions")) {
340
+ loadSessions(_lastLoadFilter).catch(() => {});
341
+ }
342
+ }
325
343
  });
326
344
  }
327
345
 
@@ -8,7 +8,7 @@ import { h } from "preact";
8
8
  import { useState, useEffect, useCallback } from "preact/hooks";
9
9
  import { signal } from "@preact/signals";
10
10
  import htm from "htm";
11
- import { apiFetch } from "../modules/api.js";
11
+ import { apiFetch, onWsMessage } from "../modules/api.js";
12
12
  import { haptic } from "../modules/telegram.js";
13
13
  import { Modal } from "./shared.js";
14
14
  import { iconText, resolveIcon } from "../modules/icon-utils.js";
@@ -53,6 +53,24 @@ export async function switchWorkspace(wsId) {
53
53
  }
54
54
  activeWorkspaceId.value = String(res.activeId || wsId);
55
55
  await loadWorkspaces();
56
+ try {
57
+ globalThis.dispatchEvent?.(
58
+ new CustomEvent("ve:workspace-switched", {
59
+ detail: { workspaceId: activeWorkspaceId.value || String(wsId || "") },
60
+ }),
61
+ );
62
+ } catch {
63
+ // no-op
64
+ }
65
+ try {
66
+ const { refreshTab } = await import("../modules/state.js");
67
+ await Promise.allSettled([
68
+ refreshTab("tasks", { background: true, manual: false }),
69
+ refreshTab("dashboard", { background: true, manual: false }),
70
+ ]);
71
+ } catch {
72
+ // best effort
73
+ }
56
74
  return true;
57
75
  } catch (err) {
58
76
  console.warn("[workspace-switcher] Failed to switch workspace:", err);
@@ -461,6 +479,35 @@ export function WorkspaceSwitcher() {
461
479
  loadWorkspaces();
462
480
  }, []);
463
481
 
482
+ // Keep selector state in sync when workspace is switched externally
483
+ // (for example via Electron menu or another client).
484
+ useEffect(() => {
485
+ const unsubscribe = onWsMessage((msg) => {
486
+ if (msg?.type !== "invalidate") return;
487
+ const channels = Array.isArray(msg.channels) ? msg.channels : [];
488
+ if (channels.includes("*") || channels.includes("workspaces")) {
489
+ loadWorkspaces().catch(() => {});
490
+ }
491
+ });
492
+ return unsubscribe;
493
+ }, []);
494
+
495
+ // Desktop fallback when WS is unavailable: refresh workspace state whenever
496
+ // the window regains focus/visibility.
497
+ useEffect(() => {
498
+ const sync = () => loadWorkspaces().catch(() => {});
499
+ const onFocus = () => sync();
500
+ const onVisibility = () => {
501
+ if (document.visibilityState === "visible") sync();
502
+ };
503
+ globalThis.addEventListener?.("focus", onFocus);
504
+ document.addEventListener?.("visibilitychange", onVisibility);
505
+ return () => {
506
+ globalThis.removeEventListener?.("focus", onFocus);
507
+ document.removeEventListener?.("visibilitychange", onVisibility);
508
+ };
509
+ }, []);
510
+
464
511
  const wsList = workspaces.value;
465
512
  const currentId = activeWorkspaceId.value;
466
513
 
package/ui/demo.html CHANGED
@@ -2723,6 +2723,55 @@
2723
2723
  return { data: STATE.executors.map((e) => ({ ...e, status: e.enabled ? 'active' : 'disabled' })) };
2724
2724
  if (route === '/api/telemetry/alerts')
2725
2725
  return { data: [] };
2726
+ if (route === '/api/analytics/usage') {
2727
+ const daysParam = Number(params.get('days') || '30');
2728
+ // Build mock usage analytics from demo STATE
2729
+ const now = Date.now();
2730
+ const dayMs = 86400000;
2731
+ const numDays = Math.min(daysParam || 30, 30);
2732
+ // Generate synthetic daily trend data
2733
+ const dates = [];
2734
+ const agentDaily = { codex: [], copilot: [], claude: [] };
2735
+ const skillDaily = { 'background-task-execution': [], 'pr-workflow': [], 'error-recovery': [] };
2736
+ const mcpDaily = { execute_bash: [], read_file: [], write_file: [], list_directory: [] };
2737
+ for (let i = numDays - 1; i >= 0; i--) {
2738
+ const d = new Date(now - i * dayMs);
2739
+ dates.push(d.toISOString().slice(0, 10));
2740
+ const active = (i < (numDays * 0.6)) ? 1 : 0;
2741
+ agentDaily.codex.push(active ? Math.floor(Math.random() * 8 + 2) : 0);
2742
+ agentDaily.copilot.push(active ? Math.floor(Math.random() * 5 + 1) : 0);
2743
+ agentDaily.claude.push(active ? Math.floor(Math.random() * 4) : 0);
2744
+ skillDaily['background-task-execution'].push(active ? Math.floor(Math.random() * 6 + 1) : 0);
2745
+ skillDaily['pr-workflow'].push(active ? Math.floor(Math.random() * 4) : 0);
2746
+ skillDaily['error-recovery'].push(active ? Math.floor(Math.random() * 3) : 0);
2747
+ mcpDaily.execute_bash.push(active ? Math.floor(Math.random() * 12 + 2) : 0);
2748
+ mcpDaily.read_file.push(active ? Math.floor(Math.random() * 8 + 1) : 0);
2749
+ mcpDaily.write_file.push(active ? Math.floor(Math.random() * 6) : 0);
2750
+ mcpDaily.list_directory.push(active ? Math.floor(Math.random() * 5) : 0);
2751
+ }
2752
+ const sum = (arr) => arr.reduce((a, b) => a + b, 0);
2753
+ const agentRuns = sum(agentDaily.codex) + sum(agentDaily.copilot) + sum(agentDaily.claude);
2754
+ const skillInvocations = Object.values(skillDaily).reduce((t, a) => t + sum(a), 0);
2755
+ const mcpToolCalls = Object.values(mcpDaily).reduce((t, a) => t + sum(a), 0);
2756
+ return { ok: true, data: {
2757
+ agentRuns, skillInvocations, mcpToolCalls,
2758
+ avgPerDay: Math.round((agentRuns + skillInvocations + mcpToolCalls) / (numDays || 1)),
2759
+ lastActiveAt: new Date(now - 7200000).toISOString(),
2760
+ sinceAt: new Date(now - numDays * dayMs).toISOString(),
2761
+ topAgents: [
2762
+ { name: 'codex', count: sum(agentDaily.codex) },
2763
+ { name: 'copilot', count: sum(agentDaily.copilot) },
2764
+ { name: 'claude', count: sum(agentDaily.claude) },
2765
+ ].filter(a => a.count > 0).sort((a, b) => b.count - a.count),
2766
+ topSkills: Object.entries(skillDaily)
2767
+ .map(([name, v]) => ({ name, count: sum(v) }))
2768
+ .filter(s => s.count > 0).sort((a, b) => b.count - a.count),
2769
+ topMcpTools: Object.entries(mcpDaily)
2770
+ .map(([name, v]) => ({ name, count: sum(v) }))
2771
+ .filter(t => t.count > 0).sort((a, b) => b.count - a.count),
2772
+ trend: { dates, agents: agentDaily, skills: skillDaily, mcpTools: mcpDaily },
2773
+ }};
2774
+ }
2726
2775
  if (route === '/api/executor/pause') {
2727
2776
  STATE.paused = true; addLog('info', 'executor', 'Executor paused');
2728
2777
  return { ok: true, paused: true };
@@ -3543,6 +3592,67 @@
3543
3592
  return { data: buildProjectSnapshot() };
3544
3593
 
3545
3594
  // ── Voice ──
3595
+ if (route === '/api/voice/agents' && method === 'GET') {
3596
+ const fromLibrary = (STATE.libraryEntries || [])
3597
+ .filter((entry) => {
3598
+ if (!entry || entry.type !== 'agent') return false;
3599
+ const id = String(entry.id || '').toLowerCase();
3600
+ const tags = Array.isArray(entry.tags) ? entry.tags.map((t) => String(t || '').toLowerCase()) : [];
3601
+ return id.includes('voice-agent') || tags.includes('voice') || tags.includes('audio-agent');
3602
+ })
3603
+ .map((entry) => ({
3604
+ id: entry.id,
3605
+ name: entry.name || entry.id,
3606
+ description: entry.description || '',
3607
+ tags: Array.isArray(entry.tags) ? entry.tags : [],
3608
+ model: null,
3609
+ voicePersona: String(entry.id || '').includes('female')
3610
+ ? 'female'
3611
+ : (String(entry.id || '').includes('male') ? 'male' : 'neutral'),
3612
+ voiceInstructions: '',
3613
+ skills: [],
3614
+ promptOverride: null,
3615
+ }));
3616
+
3617
+ const defaults = [
3618
+ {
3619
+ id: 'voice-agent-female',
3620
+ name: 'Voice Agent (Female)',
3621
+ description: 'Conversational voice specialist with concise guidance and call-friendly pacing.',
3622
+ tags: ['voice', 'audio-agent', 'female', 'realtime'],
3623
+ model: null,
3624
+ voicePersona: 'female',
3625
+ voiceInstructions: 'You are Nova, a concise and practical female voice agent.',
3626
+ skills: ['concise-voice-guidance', 'conversation-memory'],
3627
+ promptOverride: null,
3628
+ },
3629
+ {
3630
+ id: 'voice-agent-male',
3631
+ name: 'Voice Agent (Male)',
3632
+ description: 'Operational voice specialist focused on diagnostics and execution.',
3633
+ tags: ['voice', 'audio-agent', 'male', 'realtime'],
3634
+ model: null,
3635
+ voicePersona: 'male',
3636
+ voiceInstructions: 'You are Atlas, a direct and execution-oriented male voice agent.',
3637
+ skills: ['ops-diagnostics', 'task-execution'],
3638
+ promptOverride: null,
3639
+ },
3640
+ ];
3641
+
3642
+ const seen = new Set();
3643
+ const agents = [...fromLibrary, ...defaults].filter((agent) => {
3644
+ const id = String(agent?.id || '').trim();
3645
+ if (!id || seen.has(id)) return false;
3646
+ seen.add(id);
3647
+ return true;
3648
+ });
3649
+ return {
3650
+ ok: true,
3651
+ agents,
3652
+ defaultAgentId: agents[0]?.id || 'voice-agent-female',
3653
+ };
3654
+ }
3655
+
3546
3656
  if (route === '/api/voice/audio/respond' && method === 'POST') {
3547
3657
  const inputText = String(body?.inputText || body?.text || '').trim();
3548
3658
  if (!inputText) {
@@ -0,0 +1,83 @@
1
+ /**
2
+ * mic-track-registry.js
3
+ *
4
+ * Tracks microphone input streams obtained via getUserMedia and provides a
5
+ * hard-stop primitive used by voice teardown to prevent lingering "mic in use"
6
+ * indicators after a call is closed.
7
+ */
8
+
9
+ const trackedStreams = new Set();
10
+ let patched = false;
11
+
12
+ function isMediaStreamLike(stream) {
13
+ return Boolean(stream && typeof stream.getTracks === "function");
14
+ }
15
+
16
+ function getAudioTracks(stream) {
17
+ if (!isMediaStreamLike(stream)) return [];
18
+ try {
19
+ return (stream.getAudioTracks?.() || [])
20
+ .filter((track) => String(track?.kind || "").toLowerCase() === "audio");
21
+ } catch {
22
+ return [];
23
+ }
24
+ }
25
+
26
+ function pruneInactiveStreams() {
27
+ for (const stream of trackedStreams) {
28
+ const tracks = getAudioTracks(stream);
29
+ if (!tracks.length) {
30
+ trackedStreams.delete(stream);
31
+ continue;
32
+ }
33
+ const hasLive = tracks.some((track) => String(track?.readyState || "live").toLowerCase() !== "ended");
34
+ if (!hasLive) trackedStreams.delete(stream);
35
+ }
36
+ }
37
+
38
+ export function registerMicStream(stream) {
39
+ if (!isMediaStreamLike(stream)) return;
40
+ trackedStreams.add(stream);
41
+ const tracks = getAudioTracks(stream);
42
+ for (const track of tracks) {
43
+ try {
44
+ track.addEventListener?.("ended", () => {
45
+ pruneInactiveStreams();
46
+ }, { once: true });
47
+ } catch {
48
+ // no-op
49
+ }
50
+ }
51
+ }
52
+
53
+ export function ensureMicTrackingPatched() {
54
+ if (patched) return;
55
+ const mediaDevices = globalThis?.navigator?.mediaDevices;
56
+ if (!mediaDevices || typeof mediaDevices.getUserMedia !== "function") return;
57
+ const original = mediaDevices.getUserMedia.bind(mediaDevices);
58
+ mediaDevices.getUserMedia = async (...args) => {
59
+ const stream = await original(...args);
60
+ registerMicStream(stream);
61
+ return stream;
62
+ };
63
+ patched = true;
64
+ }
65
+
66
+ export function stopTrackedMicStreams() {
67
+ for (const stream of trackedStreams) {
68
+ const tracks = getAudioTracks(stream);
69
+ for (const track of tracks) {
70
+ try {
71
+ track.stop();
72
+ } catch {
73
+ // no-op
74
+ }
75
+ }
76
+ }
77
+ pruneInactiveStreams();
78
+ }
79
+
80
+ export function _resetMicTrackRegistryForTests() {
81
+ trackedStreams.clear();
82
+ patched = false;
83
+ }
@@ -133,6 +133,9 @@ export const SETTINGS_SCHEMA = [
133
133
  { key: "AZURE_OPENAI_REALTIME_DEPLOYMENT", label: "Azure Deployment (legacy)", category: "voice", type: "select", defaultVal: "gpt-audio-1.5", options: ["gpt-audio-1.5", "gpt-realtime-1.5", "gpt-4o-realtime-preview", "custom"], description: "Legacy fallback: Azure deployment name. Use the Voice Endpoints card above. GA models (gpt-realtime-1.5) auto-use /openai/v1/ paths." },
134
134
  { key: "VOICE_ID", label: "Voice", category: "voice", type: "select", defaultVal: "alloy", options: ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"], description: "Voice personality for text-to-speech output." },
135
135
  { key: "VOICE_TURN_DETECTION", label: "Turn Detection", category: "voice", type: "select", defaultVal: "semantic_vad", options: ["server_vad", "semantic_vad", "none"], description: "How the model detects when you stop speaking. 'semantic_vad' is more intelligent but higher latency." },
136
+ { key: "VOICE_TRANSCRIPTION_ENABLED", label: "Input Transcription Enabled", category: "voice", type: "boolean", defaultVal: true, description: "Enable per-turn input audio transcription for OpenAI-compatible realtime sessions." },
137
+ { key: "VOICE_TRANSCRIPTION_MODEL", label: "Input Transcription Model", category: "voice", type: "string", defaultVal: "gpt-4o-transcribe", description: "Model used for input audio transcription when transcription is enabled." },
138
+ { key: "VOICE_AZURE_TRANSCRIPTION_ENABLED", label: "Azure Input Transcription", category: "voice", type: "boolean", defaultVal: false, description: "Enable input transcription specifically for Azure realtime sessions. Disabled by default to avoid Azure per-item transcription failures." },
136
139
  { key: "VOICE_DELEGATE_EXECUTOR", label: "Delegate Executor", category: "voice", type: "select", defaultVal: "codex-sdk", options: ["codex-sdk", "copilot-sdk", "claude-sdk", "gemini-sdk", "opencode-sdk"], description: "Which agent executor voice tool calls delegate to for complex tasks." },
137
140
  { key: "VOICE_FALLBACK_MODE", label: "Fallback Mode", category: "voice", type: "select", defaultVal: "browser", options: ["browser", "disabled"], description: "When Tier 1 (Realtime API) is unavailable, use browser speech APIs as fallback." },
138
141