bosun 0.36.0 → 0.36.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/.env.example +98 -16
  2. package/README.md +27 -0
  3. package/agent-event-bus.mjs +5 -5
  4. package/agent-pool.mjs +129 -12
  5. package/agent-prompts.mjs +7 -1
  6. package/agent-sdk.mjs +13 -2
  7. package/agent-supervisor.mjs +2 -2
  8. package/agent-work-report.mjs +1 -1
  9. package/anomaly-detector.mjs +6 -6
  10. package/autofix.mjs +15 -15
  11. package/bosun-skills.mjs +4 -4
  12. package/bosun.schema.json +160 -4
  13. package/claude-shell.mjs +11 -11
  14. package/cli.mjs +21 -21
  15. package/codex-config.mjs +19 -19
  16. package/codex-shell.mjs +180 -29
  17. package/config-doctor.mjs +27 -2
  18. package/config.mjs +60 -7
  19. package/copilot-shell.mjs +4 -4
  20. package/error-detector.mjs +1 -1
  21. package/fleet-coordinator.mjs +2 -2
  22. package/gemini-shell.mjs +692 -0
  23. package/github-oauth-portal.mjs +1 -1
  24. package/github-reconciler.mjs +2 -2
  25. package/kanban-adapter.mjs +741 -168
  26. package/merge-strategy.mjs +25 -25
  27. package/monitor.mjs +123 -105
  28. package/opencode-shell.mjs +22 -22
  29. package/package.json +7 -1
  30. package/postinstall.mjs +22 -22
  31. package/pr-cleanup-daemon.mjs +6 -6
  32. package/prepublish-check.mjs +4 -4
  33. package/presence.mjs +2 -2
  34. package/primary-agent.mjs +85 -7
  35. package/publish.mjs +1 -1
  36. package/review-agent.mjs +1 -1
  37. package/session-tracker.mjs +11 -0
  38. package/setup-web-server.mjs +429 -21
  39. package/setup.mjs +367 -12
  40. package/shared-knowledge.mjs +1 -1
  41. package/startup-service.mjs +9 -9
  42. package/stream-resilience.mjs +58 -4
  43. package/sync-engine.mjs +2 -2
  44. package/task-assessment.mjs +9 -9
  45. package/task-cli.mjs +1 -1
  46. package/task-complexity.mjs +71 -2
  47. package/task-context.mjs +1 -2
  48. package/task-executor.mjs +104 -41
  49. package/telegram-bot.mjs +825 -494
  50. package/telegram-sentinel.mjs +28 -28
  51. package/ui/app.js +256 -23
  52. package/ui/app.monolith.js +1 -1
  53. package/ui/components/agent-selector.js +4 -3
  54. package/ui/components/chat-view.js +101 -28
  55. package/ui/components/diff-viewer.js +3 -3
  56. package/ui/components/kanban-board.js +3 -3
  57. package/ui/components/session-list.js +255 -35
  58. package/ui/components/workspace-switcher.js +3 -3
  59. package/ui/demo.html +209 -194
  60. package/ui/index.html +3 -3
  61. package/ui/modules/icon-utils.js +206 -142
  62. package/ui/modules/icons.js +2 -27
  63. package/ui/modules/settings-schema.js +29 -5
  64. package/ui/modules/streaming.js +30 -2
  65. package/ui/modules/vision-stream.js +275 -0
  66. package/ui/modules/voice-client.js +102 -9
  67. package/ui/modules/voice-fallback.js +62 -6
  68. package/ui/modules/voice-overlay.js +594 -59
  69. package/ui/modules/voice.js +31 -38
  70. package/ui/setup.html +284 -34
  71. package/ui/styles/components.css +47 -0
  72. package/ui/styles/sessions.css +75 -0
  73. package/ui/tabs/agents.js +73 -43
  74. package/ui/tabs/chat.js +37 -40
  75. package/ui/tabs/control.js +2 -2
  76. package/ui/tabs/dashboard.js +1 -1
  77. package/ui/tabs/infra.js +10 -10
  78. package/ui/tabs/library.js +8 -8
  79. package/ui/tabs/logs.js +10 -10
  80. package/ui/tabs/settings.js +20 -20
  81. package/ui/tabs/tasks.js +76 -47
  82. package/ui-server.mjs +1761 -124
  83. package/update-check.mjs +13 -13
  84. package/ve-kanban.mjs +1 -1
  85. package/whatsapp-channel.mjs +5 -5
  86. package/workflow-engine.mjs +20 -1
  87. package/workflow-nodes.mjs +904 -4
  88. package/workflow-templates/agents.mjs +321 -7
  89. package/workflow-templates/ci-cd.mjs +6 -6
  90. package/workflow-templates/github.mjs +156 -84
  91. package/workflow-templates/planning.mjs +8 -8
  92. package/workflow-templates/reliability.mjs +8 -8
  93. package/workflow-templates/security.mjs +3 -3
  94. package/workflow-templates.mjs +15 -9
  95. package/workspace-manager.mjs +85 -1
  96. package/workspace-monitor.mjs +2 -2
  97. package/workspace-registry.mjs +2 -2
  98. package/worktree-manager.mjs +1 -1
@@ -609,8 +609,10 @@ export function startAgentStatusTracking() {
609
609
  if (!payload) return;
610
610
 
611
611
  const message = payload.message || payload;
612
- const role = message.role;
613
- const type = message.type;
612
+ const role = String(message.role || "").toLowerCase();
613
+ const type = String(message.type || "").toLowerCase();
614
+ const content = String(message.content || "").toLowerCase();
615
+ const lifecycle = String(message?.meta?.lifecycle || "").toLowerCase();
614
616
  const adapter = payload.session?.type || "";
615
617
  const sessionId = payload.sessionId || payload.taskId || payload.session?.id || "";
616
618
  const sessionStatus = payload.session?.status || "active";
@@ -622,12 +624,38 @@ export function startAgentStatusTracking() {
622
624
  return;
623
625
  }
624
626
 
627
+ const isCompletionEvent =
628
+ type === "turn.completed" ||
629
+ type === "session.completed" ||
630
+ lifecycle === "turn_completed" ||
631
+ lifecycle === "session_completed" ||
632
+ content.includes("turn completed") ||
633
+ content.includes("session completed") ||
634
+ content.includes("evt[turn.completed]") ||
635
+ content.includes("evt[session.completed]") ||
636
+ content.includes("session.idle");
637
+
638
+ if (isCompletionEvent) {
639
+ _setAgentState("idle", "", "");
640
+ return;
641
+ }
642
+
625
643
  if (role === "assistant" || type === "agent_message") {
626
644
  _setAgentState("streaming", adapter, sessionId);
627
645
  } else if (type === "tool_call") {
628
646
  _setAgentState("executing", adapter, sessionId);
629
647
  } else if (type === "tool_result") {
630
648
  _setAgentState("streaming", adapter, sessionId);
649
+ } else if (type === "system") {
650
+ if (
651
+ content.includes("running:") ||
652
+ content.includes("command done:") ||
653
+ content.includes("command_execution")
654
+ ) {
655
+ _setAgentState("executing", adapter, sessionId);
656
+ } else {
657
+ _setAgentState("thinking", adapter, sessionId);
658
+ }
631
659
  } else if (type === "error" || type === "stream_error") {
632
660
  _setAgentState("idle", "", "");
633
661
  }
@@ -0,0 +1,275 @@
1
+ /**
2
+ * vision-stream.js — Live camera/screen frame streaming for voice calls.
3
+ *
4
+ * Captures compressed JPEG frames at a fixed interval and sends them to
5
+ * /api/vision/frame with the active chat session context.
6
+ */
7
+
8
+ import { signal } from "@preact/signals";
9
+
10
+ export const visionShareState = signal("off"); // off | starting | streaming | error
11
+ export const visionShareSource = signal(null); // screen | camera | null
12
+ export const visionShareError = signal(null);
13
+ export const visionLastSummary = signal("");
14
+ export const visionLastAnalyzedAt = signal(0);
15
+
16
+ let _stream = null;
17
+ let _video = null;
18
+ let _canvas = null;
19
+ let _captureTimer = null;
20
+ let _sendInFlight = false;
21
+ let _context = {
22
+ sessionId: null,
23
+ executor: null,
24
+ mode: null,
25
+ model: null,
26
+ source: null,
27
+ intervalMs: 1000,
28
+ maxWidth: 1280,
29
+ jpegQuality: 0.65,
30
+ };
31
+
32
+ function normalizeSource(source) {
33
+ const value = String(source || "").trim().toLowerCase();
34
+ if (value === "camera") return "camera";
35
+ return "screen";
36
+ }
37
+
38
+ function normalizeNumber(value, fallback, min, max) {
39
+ const n = Number(value);
40
+ if (!Number.isFinite(n)) return fallback;
41
+ return Math.min(max, Math.max(min, n));
42
+ }
43
+
44
+ function resetContext() {
45
+ _context = {
46
+ sessionId: null,
47
+ executor: null,
48
+ mode: null,
49
+ model: null,
50
+ source: null,
51
+ intervalMs: 1000,
52
+ maxWidth: 1280,
53
+ jpegQuality: 0.65,
54
+ };
55
+ }
56
+
57
+ function stopTracks(stream) {
58
+ if (!stream) return;
59
+ for (const track of stream.getTracks()) {
60
+ try {
61
+ track.stop();
62
+ } catch {
63
+ // no-op
64
+ }
65
+ }
66
+ }
67
+
68
+ async function waitForVideoReady(video) {
69
+ if (!video) return;
70
+ if (video.readyState >= 2 && video.videoWidth > 0 && video.videoHeight > 0) return;
71
+ await new Promise((resolve) => {
72
+ const onReady = () => {
73
+ cleanup();
74
+ resolve();
75
+ };
76
+ const cleanup = () => {
77
+ try {
78
+ video.removeEventListener("loadedmetadata", onReady);
79
+ video.removeEventListener("loadeddata", onReady);
80
+ } catch {
81
+ // no-op
82
+ }
83
+ };
84
+ video.addEventListener("loadedmetadata", onReady, { once: true });
85
+ video.addEventListener("loadeddata", onReady, { once: true });
86
+ setTimeout(() => {
87
+ cleanup();
88
+ resolve();
89
+ }, 1500);
90
+ });
91
+ }
92
+
93
+ async function captureAndSendFrame() {
94
+ if (_sendInFlight) return;
95
+ if (!_video || !_canvas) return;
96
+ if (!_context.sessionId) return;
97
+ const vw = Number(_video.videoWidth) || 0;
98
+ const vh = Number(_video.videoHeight) || 0;
99
+ if (vw <= 0 || vh <= 0) return;
100
+
101
+ const targetWidth = Math.min(vw, Number(_context.maxWidth) || 1280);
102
+ const targetHeight = Math.max(1, Math.round((vh * targetWidth) / vw));
103
+ _canvas.width = targetWidth;
104
+ _canvas.height = targetHeight;
105
+
106
+ const ctx = _canvas.getContext("2d", { alpha: false, desynchronized: true });
107
+ if (!ctx) return;
108
+ ctx.drawImage(_video, 0, 0, targetWidth, targetHeight);
109
+ const frameDataUrl = _canvas.toDataURL("image/jpeg", _context.jpegQuality);
110
+
111
+ _sendInFlight = true;
112
+ try {
113
+ const res = await fetch("/api/vision/frame", {
114
+ method: "POST",
115
+ headers: { "Content-Type": "application/json" },
116
+ body: JSON.stringify({
117
+ sessionId: _context.sessionId,
118
+ executor: _context.executor || undefined,
119
+ mode: _context.mode || undefined,
120
+ model: _context.model || undefined,
121
+ source: _context.source || "screen",
122
+ frameDataUrl,
123
+ width: targetWidth,
124
+ height: targetHeight,
125
+ }),
126
+ });
127
+ const data = await res.json().catch(() => ({}));
128
+ if (!res.ok) {
129
+ throw new Error(data?.error || `Vision upload failed (${res.status})`);
130
+ }
131
+ if (data?.analyzed && typeof data?.summary === "string" && data.summary.trim()) {
132
+ visionLastSummary.value = data.summary.trim();
133
+ visionLastAnalyzedAt.value = Date.now();
134
+ }
135
+ } catch (err) {
136
+ visionShareState.value = "error";
137
+ visionShareError.value = err?.message || "Vision stream failed";
138
+ } finally {
139
+ _sendInFlight = false;
140
+ }
141
+ }
142
+
143
+ function cleanupDomNodes() {
144
+ if (_video) {
145
+ try {
146
+ _video.pause();
147
+ _video.srcObject = null;
148
+ } catch {
149
+ // no-op
150
+ }
151
+ _video = null;
152
+ }
153
+ _canvas = null;
154
+ }
155
+
156
+ export async function startVisionShare(options = {}) {
157
+ const sessionId = String(options?.sessionId || "").trim();
158
+ if (!sessionId) {
159
+ throw new Error("sessionId required to start vision share");
160
+ }
161
+
162
+ const source = normalizeSource(options?.source);
163
+ const intervalMs = normalizeNumber(options?.intervalMs, 1000, 300, 10_000);
164
+ const maxWidth = normalizeNumber(options?.maxWidth, 1280, 320, 1920);
165
+ const jpegQuality = normalizeNumber(options?.jpegQuality, 0.65, 0.35, 0.92);
166
+
167
+ await stopVisionShare();
168
+ visionShareState.value = "starting";
169
+ visionShareError.value = null;
170
+ visionShareSource.value = source;
171
+
172
+ try {
173
+ if (!navigator?.mediaDevices) {
174
+ throw new Error("Media devices API unavailable");
175
+ }
176
+
177
+ if (source === "screen") {
178
+ if (typeof navigator.mediaDevices.getDisplayMedia !== "function") {
179
+ throw new Error("Screen sharing is not supported in this browser");
180
+ }
181
+ _stream = await navigator.mediaDevices.getDisplayMedia({
182
+ video: {
183
+ frameRate: { ideal: 6, max: 12 },
184
+ },
185
+ audio: false,
186
+ });
187
+ } else {
188
+ _stream = await navigator.mediaDevices.getUserMedia({
189
+ video: {
190
+ width: { ideal: 1280 },
191
+ height: { ideal: 720 },
192
+ frameRate: { ideal: 8, max: 12 },
193
+ },
194
+ audio: false,
195
+ });
196
+ }
197
+
198
+ const videoTrack = _stream.getVideoTracks()[0];
199
+ if (!videoTrack) {
200
+ throw new Error("No video track available");
201
+ }
202
+ videoTrack.addEventListener("ended", () => {
203
+ stopVisionShare().catch(() => {});
204
+ });
205
+
206
+ _video = document.createElement("video");
207
+ _video.autoplay = true;
208
+ _video.muted = true;
209
+ _video.playsInline = true;
210
+ _video.srcObject = _stream;
211
+ _canvas = document.createElement("canvas");
212
+
213
+ await waitForVideoReady(_video);
214
+ try {
215
+ await _video.play();
216
+ } catch {
217
+ // Some browsers auto-play once frames are requested.
218
+ }
219
+
220
+ _context = {
221
+ sessionId,
222
+ executor: String(options?.executor || "").trim() || null,
223
+ mode: String(options?.mode || "").trim() || null,
224
+ model: String(options?.model || "").trim() || null,
225
+ source,
226
+ intervalMs,
227
+ maxWidth,
228
+ jpegQuality,
229
+ };
230
+
231
+ visionShareState.value = "streaming";
232
+ _captureTimer = setInterval(() => {
233
+ captureAndSendFrame().catch((err) => {
234
+ visionShareState.value = "error";
235
+ visionShareError.value = err?.message || "Vision capture failed";
236
+ });
237
+ }, intervalMs);
238
+ await captureAndSendFrame();
239
+ } catch (err) {
240
+ stopTracks(_stream);
241
+ _stream = null;
242
+ cleanupDomNodes();
243
+ resetContext();
244
+ visionShareState.value = "error";
245
+ visionShareError.value = err?.message || "Could not start vision share";
246
+ throw err;
247
+ }
248
+ }
249
+
250
+ export async function stopVisionShare() {
251
+ if (_captureTimer) {
252
+ clearInterval(_captureTimer);
253
+ _captureTimer = null;
254
+ }
255
+ stopTracks(_stream);
256
+ _stream = null;
257
+ cleanupDomNodes();
258
+ resetContext();
259
+ _sendInFlight = false;
260
+ visionShareState.value = "off";
261
+ visionShareSource.value = null;
262
+ visionShareError.value = null;
263
+ visionLastSummary.value = "";
264
+ visionLastAnalyzedAt.value = 0;
265
+ }
266
+
267
+ export async function toggleVisionShare(source, options = {}) {
268
+ const nextSource = normalizeSource(source);
269
+ if (visionShareState.value === "streaming" && visionShareSource.value === nextSource) {
270
+ await stopVisionShare();
271
+ return false;
272
+ }
273
+ await startVisionShare({ ...options, source: nextSource });
274
+ return true;
275
+ }
@@ -17,6 +17,7 @@ export const voiceResponse = signal(""); // current assistant response text
17
17
  export const voiceError = signal(null);
18
18
  export const voiceToolCalls = signal([]); // active tool calls
19
19
  export const voiceSessionId = signal(null);
20
+ export const voiceBoundSessionId = signal(null);
20
21
  export const voiceDuration = signal(0); // seconds connected
21
22
 
22
23
  export const isVoiceActive = computed(() =>
@@ -33,11 +34,48 @@ let _reconnectTimer = null; // 28-min reconnect timer
33
34
  let _durationTimer = null; // Duration counter
34
35
  let _sessionStartTime = 0;
35
36
  let _eventHandlers = new Map();
37
+ let _callContext = {
38
+ sessionId: null,
39
+ executor: null,
40
+ mode: null,
41
+ model: null,
42
+ };
36
43
 
37
44
  const RECONNECT_AT_MS = 28 * 60 * 1000; // 28 minutes
38
45
  const MAX_RECONNECT_ATTEMPTS = 3;
39
46
  let _reconnectAttempts = 0;
40
47
 
48
+ function _normalizeCallContext(options = {}) {
49
+ const sessionId = String(options?.sessionId || "").trim() || null;
50
+ const executor = String(options?.executor || "").trim() || null;
51
+ const mode = String(options?.mode || "").trim() || null;
52
+ const model = String(options?.model || "").trim() || null;
53
+ return { sessionId, executor, mode, model };
54
+ }
55
+
56
+ async function _recordVoiceTranscript(role, content, eventType = "") {
57
+ const sessionId = String(_callContext?.sessionId || voiceSessionId.value || "").trim();
58
+ const text = String(content || "").trim();
59
+ if (!sessionId || !text) return;
60
+ try {
61
+ await fetch("/api/voice/transcript", {
62
+ method: "POST",
63
+ headers: { "Content-Type": "application/json" },
64
+ body: JSON.stringify({
65
+ sessionId,
66
+ role,
67
+ content: text,
68
+ eventType,
69
+ executor: _callContext?.executor || undefined,
70
+ mode: _callContext?.mode || undefined,
71
+ model: _callContext?.model || undefined,
72
+ }),
73
+ });
74
+ } catch (err) {
75
+ console.warn("[voice-client] transcript persistence failed:", err?.message || err);
76
+ }
77
+ }
78
+
41
79
  // ── Event System ────────────────────────────────────────────────────────────
42
80
 
43
81
  export function onVoiceEvent(event, handler) {
@@ -67,12 +105,14 @@ function emit(event, data) {
67
105
  * 4. Set up data channel for events
68
106
  * 5. Create offer, set remote answer
69
107
  */
70
- export async function startVoiceSession() {
108
+ export async function startVoiceSession(options = {}) {
71
109
  if (_pc) {
72
110
  console.warn("[voice-client] Session already active");
73
111
  return;
74
112
  }
75
113
 
114
+ _callContext = _normalizeCallContext(options);
115
+ voiceBoundSessionId.value = _callContext.sessionId;
76
116
  voiceState.value = "connecting";
77
117
  voiceError.value = null;
78
118
  voiceTranscript.value = "";
@@ -82,7 +122,17 @@ export async function startVoiceSession() {
82
122
 
83
123
  try {
84
124
  // 1. Fetch ephemeral token
85
- const tokenRes = await fetch("/api/voice/token", { method: "POST" });
125
+ const tokenRes = await fetch("/api/voice/token", {
126
+ method: "POST",
127
+ headers: { "Content-Type": "application/json" },
128
+ body: JSON.stringify({
129
+ sessionId: _callContext.sessionId || undefined,
130
+ executor: _callContext.executor || undefined,
131
+ mode: _callContext.mode || undefined,
132
+ model: _callContext.model || undefined,
133
+ delegateOnly: Boolean(_callContext.sessionId),
134
+ }),
135
+ });
86
136
  if (!tokenRes.ok) {
87
137
  const err = await tokenRes.json().catch(() => ({ error: "Token fetch failed" }));
88
138
  throw new Error(err.error || `Token fetch failed (${tokenRes.status})`);
@@ -90,6 +140,19 @@ export async function startVoiceSession() {
90
140
  const tokenData = await tokenRes.json();
91
141
 
92
142
  // 2. Get microphone
143
+ const mediaDevices = navigator?.mediaDevices;
144
+ if (!mediaDevices?.getUserMedia) {
145
+ const host = String(globalThis.location?.hostname || "").toLowerCase();
146
+ const localhostLike =
147
+ host === "localhost" || host === "127.0.0.1" || host === "::1";
148
+ if (!globalThis.isSecureContext && !localhostLike) {
149
+ throw new Error(
150
+ "Microphone access requires HTTPS (or localhost). Open the UI via the Cloudflare HTTPS URL or localhost.",
151
+ );
152
+ }
153
+ throw new Error("Microphone API unavailable in this browser/runtime.");
154
+ }
155
+
93
156
  _mediaStream = await navigator.mediaDevices.getUserMedia({
94
157
  audio: {
95
158
  echoCancellation: true,
@@ -119,10 +182,14 @@ export async function startVoiceSession() {
119
182
  _dc.onopen = () => {
120
183
  voiceState.value = "connected";
121
184
  _sessionStartTime = Date.now();
122
- voiceSessionId.value = `voice-${Date.now()}`;
185
+ voiceSessionId.value = _callContext.sessionId || `voice-${Date.now()}`;
123
186
  startDurationTimer();
124
187
  startReconnectTimer();
125
- emit("connected", { provider: tokenData.provider });
188
+ emit("connected", {
189
+ provider: tokenData.provider,
190
+ sessionId: voiceSessionId.value,
191
+ callContext: { ..._callContext },
192
+ });
126
193
  };
127
194
  _dc.onclose = () => {
128
195
  if (voiceState.value !== "reconnecting") {
@@ -162,7 +229,10 @@ export async function startVoiceSession() {
162
229
  const answerSdp = await sdpResponse.text();
163
230
  await _pc.setRemoteDescription({ type: "answer", sdp: answerSdp });
164
231
 
165
- emit("session-started", { sessionId: voiceSessionId.value });
232
+ emit("session-started", {
233
+ sessionId: voiceSessionId.value,
234
+ callContext: { ..._callContext },
235
+ });
166
236
  } catch (err) {
167
237
  console.error("[voice-client] Failed to start voice session:", err);
168
238
  voiceState.value = "error";
@@ -183,7 +253,9 @@ export function stopVoiceSession() {
183
253
  voiceResponse.value = "";
184
254
  voiceToolCalls.value = [];
185
255
  voiceSessionId.value = null;
256
+ voiceBoundSessionId.value = null;
186
257
  voiceDuration.value = 0;
258
+ _callContext = { sessionId: null, executor: null, mode: null, model: null };
187
259
  emit("session-ended", {});
188
260
  }
189
261
 
@@ -211,6 +283,11 @@ function handleServerEvent(event) {
211
283
  case "conversation.item.input_audio_transcription.completed":
212
284
  voiceTranscript.value = event.transcript || "";
213
285
  emit("transcript", { text: event.transcript, final: true });
286
+ _recordVoiceTranscript(
287
+ "user",
288
+ event.transcript || "",
289
+ "conversation.item.input_audio_transcription.completed",
290
+ );
214
291
  break;
215
292
 
216
293
  case "response.audio_transcript.delta":
@@ -220,6 +297,11 @@ function handleServerEvent(event) {
220
297
 
221
298
  case "response.audio_transcript.done":
222
299
  emit("response-complete", { text: voiceResponse.value });
300
+ _recordVoiceTranscript(
301
+ "assistant",
302
+ voiceResponse.value,
303
+ "response.audio_transcript.done",
304
+ );
223
305
  voiceResponse.value = "";
224
306
  break;
225
307
 
@@ -238,7 +320,9 @@ function handleServerEvent(event) {
238
320
  break;
239
321
 
240
322
  case "response.function_call_arguments.done":
241
- handleToolCall(event);
323
+ handleToolCall(event).catch((err) => {
324
+ console.error("[voice-client] Tool call handling failed:", err);
325
+ });
242
326
  break;
243
327
 
244
328
  case "response.done":
@@ -284,7 +368,14 @@ async function handleToolCall(event) {
284
368
  const res = await fetch("/api/voice/tool", {
285
369
  method: "POST",
286
370
  headers: { "Content-Type": "application/json" },
287
- body: JSON.stringify({ toolName: name, args, sessionId: voiceSessionId.value }),
371
+ body: JSON.stringify({
372
+ toolName: name,
373
+ args,
374
+ sessionId: voiceSessionId.value,
375
+ executor: _callContext.executor || undefined,
376
+ mode: _callContext.mode || undefined,
377
+ model: _callContext.model || undefined,
378
+ }),
288
379
  });
289
380
  const result = await res.json();
290
381
 
@@ -364,7 +455,9 @@ export function sendTextMessage(text) {
364
455
  function startReconnectTimer() {
365
456
  clearTimeout(_reconnectTimer);
366
457
  _reconnectTimer = setTimeout(() => {
367
- reconnect();
458
+ reconnect().catch((err) => {
459
+ console.error("[voice-client] Reconnect timer error:", err);
460
+ });
368
461
  }, RECONNECT_AT_MS);
369
462
  }
370
463
 
@@ -385,7 +478,7 @@ async function reconnect() {
385
478
  _mediaStream = stream;
386
479
 
387
480
  try {
388
- await startVoiceSession();
481
+ await startVoiceSession(_callContext);
389
482
  } catch (err) {
390
483
  console.error("[voice-client] Reconnect failed:", err);
391
484
  if (_reconnectAttempts < MAX_RECONNECT_ATTEMPTS) {
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * voice-fallback.js — Tier 2 voice using browser Web Speech API + bosun chat.
3
3
  *
4
- * Flow: SpeechRecognition → text → POST /api/sessions/{id}/messageresponse → SpeechSynthesis
4
+ * Flow: SpeechRecognition → text → POST /api/voice/tool(delegate_to_agent) → SpeechSynthesis
5
5
  *
6
6
  * @module voice-fallback
7
7
  */
@@ -17,6 +17,11 @@ let _recognition = null;
17
17
  let _synthesis = null;
18
18
  let _sessionId = null;
19
19
  let _isSpeaking = false;
20
+ let _callContext = {
21
+ executor: null,
22
+ mode: null,
23
+ model: null,
24
+ };
20
25
 
21
26
  const SpeechRecognition = typeof globalThis !== "undefined"
22
27
  ? (globalThis.SpeechRecognition || globalThis.webkitSpeechRecognition)
@@ -24,18 +29,48 @@ const SpeechRecognition = typeof globalThis !== "undefined"
24
29
 
25
30
  export const fallbackSupported = Boolean(SpeechRecognition) && typeof globalThis.speechSynthesis !== "undefined";
26
31
 
32
+ async function recordFallbackTranscript(role, content, eventType = "") {
33
+ const sessionId = String(_sessionId || "").trim();
34
+ const text = String(content || "").trim();
35
+ if (!sessionId || !text) return;
36
+ try {
37
+ await fetch("/api/voice/transcript", {
38
+ method: "POST",
39
+ headers: { "Content-Type": "application/json" },
40
+ body: JSON.stringify({
41
+ sessionId,
42
+ role,
43
+ content: text,
44
+ eventType,
45
+ executor: _callContext.executor || undefined,
46
+ mode: _callContext.mode || undefined,
47
+ model: _callContext.model || undefined,
48
+ provider: "fallback",
49
+ }),
50
+ });
51
+ } catch {
52
+ // best effort persistence
53
+ }
54
+ }
55
+
27
56
  /**
28
57
  * Start a fallback voice session.
29
58
  * @param {string} sessionId — existing chat session ID to use
59
+ * @param {{ executor?: string, mode?: string, model?: string }} [options]
30
60
  */
31
- export function startFallbackSession(sessionId) {
61
+ export function startFallbackSession(sessionId, options = {}) {
32
62
  if (!fallbackSupported) {
33
63
  fallbackError.value = "Speech APIs not supported in this browser";
34
64
  fallbackState.value = "error";
35
65
  return;
36
66
  }
37
67
 
38
- _sessionId = sessionId;
68
+ _sessionId = sessionId || null;
69
+ _callContext = {
70
+ executor: String(options?.executor || "").trim() || null,
71
+ mode: String(options?.mode || "").trim() || null,
72
+ model: String(options?.model || "").trim() || null,
73
+ };
39
74
  _synthesis = globalThis.speechSynthesis;
40
75
  fallbackState.value = "idle";
41
76
  fallbackError.value = null;
@@ -52,6 +87,7 @@ export function stopFallbackSession() {
52
87
  fallbackTranscript.value = "";
53
88
  fallbackResponse.value = "";
54
89
  _sessionId = null;
90
+ _callContext = { executor: null, mode: null, model: null };
55
91
  }
56
92
 
57
93
  function startListening() {
@@ -119,12 +155,26 @@ function stopListening() {
119
155
  async function processUserInput(text) {
120
156
  fallbackState.value = "processing";
121
157
  stopListening();
158
+ await recordFallbackTranscript("user", text, "fallback.user_input");
122
159
 
123
160
  try {
124
- const res = await fetch(`/api/sessions/${_sessionId}/message`, {
161
+ // Use the same server-side voice tool pipeline as Tier 1 for consistency.
162
+ const res = await fetch("/api/voice/tool", {
125
163
  method: "POST",
126
164
  headers: { "Content-Type": "application/json" },
127
- body: JSON.stringify({ message: text }),
165
+ body: JSON.stringify({
166
+ toolName: "delegate_to_agent",
167
+ args: {
168
+ message: text,
169
+ mode: _callContext.mode || "ask",
170
+ executor: _callContext.executor || undefined,
171
+ model: _callContext.model || undefined,
172
+ },
173
+ sessionId: _sessionId || undefined,
174
+ executor: _callContext.executor || undefined,
175
+ mode: _callContext.mode || undefined,
176
+ model: _callContext.model || undefined,
177
+ }),
128
178
  });
129
179
 
130
180
  if (!res.ok) {
@@ -132,9 +182,15 @@ async function processUserInput(text) {
132
182
  }
133
183
 
134
184
  const data = await res.json();
135
- const responseText = data.response || data.text || data.message || data.content || JSON.stringify(data);
185
+ const responseText =
186
+ data?.result ||
187
+ data?.text ||
188
+ data?.message ||
189
+ data?.content ||
190
+ (data?.error ? `Error: ${data.error}` : JSON.stringify(data));
136
191
 
137
192
  fallbackResponse.value = responseText;
193
+ await recordFallbackTranscript("assistant", responseText, "fallback.assistant_output");
138
194
  await speak(responseText);
139
195
  } catch (err) {
140
196
  fallbackError.value = `Processing error: ${err.message}`;