@runtypelabs/persona 1.47.0 → 1.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/widget.css CHANGED
@@ -1060,6 +1060,30 @@
1060
1060
  animation: tvw-voice-recording-pulse 1.5s ease-in-out infinite;
1061
1061
  }
1062
1062
 
1063
+ /* Voice processing animation (spinner) */
1064
+ @keyframes tvw-voice-processing-spin {
1065
+ from { transform: rotate(0deg); }
1066
+ to { transform: rotate(360deg); }
1067
+ }
1068
+
1069
+ .tvw-voice-processing svg {
1070
+ animation: tvw-voice-processing-spin 1.2s linear infinite;
1071
+ }
1072
+
1073
+ /* Voice speaking animation (gentle pulse — slower/subtler than recording) */
1074
+ @keyframes tvw-voice-speaking-pulse {
1075
+ 0%, 100% { opacity: 1; transform: scale(1); }
1076
+ 50% { opacity: 0.85; transform: scale(1.03); }
1077
+ }
1078
+
1079
+ .tvw-voice-speaking {
1080
+ animation: tvw-voice-speaking-pulse 2s ease-in-out infinite;
1081
+ }
1082
+
1083
+ .tvw-voice-speaking svg {
1084
+ animation: tvw-voice-speaking-pulse 2s ease-in-out infinite;
1085
+ }
1086
+
1063
1087
  /* Markdown content overflow handling */
1064
1088
  .vanilla-message-bubble pre {
1065
1089
  overflow-x: auto;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@runtypelabs/persona",
3
- "version": "1.47.0",
3
+ "version": "1.48.0",
4
4
  "description": "Themeable, pluggable streaming agent widget for websites, in plain JS with support for voice input and reasoning / tool output.",
5
5
  "type": "module",
6
6
  "main": "dist/index.cjs",
package/src/session.ts CHANGED
@@ -19,14 +19,12 @@ import {
19
19
  import { IMAGE_ONLY_MESSAGE_FALLBACK_TEXT } from "./utils/content";
20
20
  import type {
21
21
  VoiceProvider,
22
- VoiceResult,
23
22
  VoiceStatus,
24
23
  VoiceConfig,
25
24
  TextToSpeechConfig
26
25
  } from "./types";
27
26
  import {
28
27
  createVoiceProvider,
29
- createBestAvailableVoiceProvider,
30
28
  isVoiceSupported
31
29
  } from "./voice";
32
30
 
@@ -136,6 +134,38 @@ export class AgentWidgetSession {
136
134
  return this.voiceStatus;
137
135
  }
138
136
 
137
+ /**
138
+ * Get the voice interruption mode from the provider (none/cancel/barge-in)
139
+ */
140
+ public getVoiceInterruptionMode(): "none" | "cancel" | "barge-in" {
141
+ if (this.voiceProvider?.getInterruptionMode) {
142
+ return this.voiceProvider.getInterruptionMode();
143
+ }
144
+ return "none";
145
+ }
146
+
147
+ /**
148
+ * Stop voice playback / cancel in-flight request without starting recording.
149
+ * Returns to idle state.
150
+ */
151
+ public stopVoicePlayback(): void {
152
+ if (this.voiceProvider?.stopPlayback) {
153
+ this.voiceProvider.stopPlayback();
154
+ }
155
+ }
156
+
157
+ /** Returns true if the barge-in mic stream is alive (hot mic between turns) */
158
+ public isBargeInActive(): boolean {
159
+ return this.voiceProvider?.isBargeInActive?.() ?? false;
160
+ }
161
+
162
+ /** Tear down the barge-in mic pipeline — "hang up" the always-on mic */
163
+ public async deactivateBargeIn(): Promise<void> {
164
+ if (this.voiceProvider?.deactivateBargeIn) {
165
+ await this.voiceProvider.deactivateBargeIn();
166
+ }
167
+ }
168
+
139
169
  // Pending placeholder IDs for Runtype two-phase voice flow
140
170
  private pendingVoiceUserMessageId: string | null = null;
141
171
  private pendingVoiceAssistantMessageId: string | null = null;
@@ -227,9 +257,12 @@ export class AgentWidgetSession {
227
257
  this.injectAssistantMessage({ content: result.text.trim() });
228
258
  }
229
259
 
230
- // If Runtype provider returned audio (server-side TTS), mark the
231
- // assistant message as already spoken so browser TTS doesn't double-speak
232
- if (result.audio?.base64) {
260
+ // Mark assistant message as already spoken so browser TTS doesn't
261
+ // double-speak. This covers both paths:
262
+ // - Batch: audio.base64 is present in the voice_response
263
+ // - Streaming: audio arrives as binary PCM chunks (no base64 here)
264
+ // In either case, the Runtype provider handles TTS — browser TTS must skip.
265
+ {
233
266
  const spokenId = this.pendingVoiceAssistantMessageId
234
267
  ?? [...this.messages].reverse().find(m => m.role === 'assistant')?.id;
235
268
  if (spokenId) this.ttsSpokenMessageIds.add(spokenId);
@@ -1060,6 +1060,30 @@
1060
1060
  animation: tvw-voice-recording-pulse 1.5s ease-in-out infinite;
1061
1061
  }
1062
1062
 
1063
+ /* Voice processing animation (spinner) */
1064
+ @keyframes tvw-voice-processing-spin {
1065
+ from { transform: rotate(0deg); }
1066
+ to { transform: rotate(360deg); }
1067
+ }
1068
+
1069
+ .tvw-voice-processing svg {
1070
+ animation: tvw-voice-processing-spin 1.2s linear infinite;
1071
+ }
1072
+
1073
+ /* Voice speaking animation (gentle pulse — slower/subtler than recording) */
1074
+ @keyframes tvw-voice-speaking-pulse {
1075
+ 0%, 100% { opacity: 1; transform: scale(1); }
1076
+ 50% { opacity: 0.85; transform: scale(1.03); }
1077
+ }
1078
+
1079
+ .tvw-voice-speaking {
1080
+ animation: tvw-voice-speaking-pulse 2s ease-in-out infinite;
1081
+ }
1082
+
1083
+ .tvw-voice-speaking svg {
1084
+ animation: tvw-voice-speaking-pulse 2s ease-in-out infinite;
1085
+ }
1086
+
1063
1087
  /* Markdown content overflow handling */
1064
1088
  .vanilla-message-bubble pre {
1065
1089
  overflow-x: auto;
package/src/types.ts CHANGED
@@ -696,6 +696,26 @@ export type AgentWidgetVoiceRecognitionConfig = {
696
696
  recordingBackgroundColor?: string;
697
697
  recordingBorderColor?: string;
698
698
  showRecordingIndicator?: boolean;
699
+
700
+ // Processing state (after recording stops, waiting for agent response)
701
+ /** Icon name shown while processing voice input. Default: "loader" */
702
+ processingIconName?: string;
703
+ /** Icon color during processing. Inherits idle iconColor if not set */
704
+ processingIconColor?: string;
705
+ /** Button background color during processing. Inherits idle backgroundColor if not set */
706
+ processingBackgroundColor?: string;
707
+ /** Button border color during processing. Inherits idle borderColor if not set */
708
+ processingBorderColor?: string;
709
+
710
+ // Speaking state (agent TTS audio is playing)
711
+ /** Icon name shown while agent is speaking. Default: "volume-2" (or "square" in cancel mode) */
712
+ speakingIconName?: string;
713
+ /** Icon color while speaking. Inherits idle iconColor if not set */
714
+ speakingIconColor?: string;
715
+ /** Button background color while speaking. Inherits idle backgroundColor if not set */
716
+ speakingBackgroundColor?: string;
717
+ /** Button border color while speaking. Inherits idle borderColor if not set */
718
+ speakingBorderColor?: string;
699
719
  autoResume?: boolean | "assistant";
700
720
 
701
721
  // Voice provider configuration
@@ -794,11 +814,12 @@ export type VoiceResult = {
794
814
  /**
795
815
  * Voice provider status states
796
816
  */
797
- export type VoiceStatus =
817
+ export type VoiceStatus =
798
818
  | 'disconnected'
799
819
  | 'connected'
800
820
  | 'listening'
801
821
  | 'processing'
822
+ | 'speaking'
802
823
  | 'error'
803
824
  | 'idle';
804
825
 
@@ -843,6 +864,18 @@ export interface VoiceProvider {
843
864
 
844
865
  /** Register a callback fired when recording stops and audio is about to be sent */
845
866
  onProcessingStart?(callback: () => void): void;
867
+
868
+ /** Returns the current interruption mode (only meaningful for Runtype provider) */
869
+ getInterruptionMode?(): "none" | "cancel" | "barge-in";
870
+
871
+ /** Returns true if the barge-in mic stream is alive (hot mic between turns) */
872
+ isBargeInActive?(): boolean;
873
+
874
+ /** Tear down the barge-in mic pipeline — "hang up" the always-on mic */
875
+ deactivateBargeIn?(): Promise<void>;
876
+
877
+ /** Stop playback / cancel in-flight request without starting recording */
878
+ stopPlayback?(): void;
846
879
  }
847
880
 
848
881
  /**
package/src/ui.ts CHANGED
@@ -2053,12 +2053,34 @@ export const createAgentExperience = (
2053
2053
  }
2054
2054
  },
2055
2055
  onVoiceStatusChanged(status: VoiceStatus) {
2056
- // When Runtype provider auto-stops (e.g. silence detection), update mic button
2057
- if (config.voiceRecognition?.provider?.type === 'runtype' && status !== 'listening') {
2058
- voiceState.active = false;
2059
- removeRuntypeMicRecordingStyles();
2060
- emitVoiceState("system");
2061
- persistVoiceMetadata();
2056
+ if (config.voiceRecognition?.provider?.type !== 'runtype') return;
2057
+
2058
+ switch (status) {
2059
+ case 'listening':
2060
+ // Recording styles are applied by toggleVoice() / startVoiceRecognition() flows
2061
+ break;
2062
+ case 'processing':
2063
+ removeRuntypeMicStateStyles();
2064
+ applyRuntypeMicProcessingStyles();
2065
+ break;
2066
+ case 'speaking':
2067
+ removeRuntypeMicStateStyles();
2068
+ applyRuntypeMicSpeakingStyles();
2069
+ break;
2070
+ default:
2071
+ // idle, connected, disconnected, error
2072
+ if (status === 'idle' && session.isBargeInActive()) {
2073
+ // Barge-in mic is still hot between turns — show it as active
2074
+ removeRuntypeMicStateStyles();
2075
+ applyRuntypeMicRecordingStyles();
2076
+ micButton?.setAttribute("aria-label", "End voice session");
2077
+ } else {
2078
+ voiceState.active = false;
2079
+ removeRuntypeMicStateStyles();
2080
+ emitVoiceState("system");
2081
+ persistVoiceMetadata();
2082
+ }
2083
+ break;
2062
2084
  }
2063
2085
  }
2064
2086
  });
@@ -2176,6 +2198,8 @@ export const createAgentExperience = (
2176
2198
  backgroundColor: string;
2177
2199
  color: string;
2178
2200
  borderColor: string;
2201
+ iconName: string;
2202
+ iconSize: number;
2179
2203
  } | null = null;
2180
2204
 
2181
2205
  const getSpeechRecognitionClass = (): any => {
@@ -2273,15 +2297,17 @@ export const createAgentExperience = (
2273
2297
  emitVoiceState(source);
2274
2298
  persistVoiceMetadata();
2275
2299
  if (micButton) {
2276
- // Store original styles
2300
+ // Store original styles (including icon info for restoration)
2301
+ const voiceConfig = config.voiceRecognition ?? {};
2277
2302
  originalMicStyles = {
2278
2303
  backgroundColor: micButton.style.backgroundColor,
2279
2304
  color: micButton.style.color,
2280
- borderColor: micButton.style.borderColor
2305
+ borderColor: micButton.style.borderColor,
2306
+ iconName: voiceConfig.iconName ?? "mic",
2307
+ iconSize: parseFloat(voiceConfig.iconSize ?? config.sendButton?.size ?? "40") || 24,
2281
2308
  };
2282
-
2309
+
2283
2310
  // Apply recording state styles from config
2284
- const voiceConfig = config.voiceRecognition ?? {};
2285
2311
  const recordingBackgroundColor = voiceConfig.recordingBackgroundColor ?? "#ef4444";
2286
2312
  const recordingIconColor = voiceConfig.recordingIconColor;
2287
2313
  const recordingBorderColor = voiceConfig.recordingBorderColor;
@@ -2334,24 +2360,7 @@ export const createAgentExperience = (
2334
2360
  persistVoiceMetadata();
2335
2361
 
2336
2362
  if (micButton) {
2337
- micButton.classList.remove("tvw-voice-recording");
2338
-
2339
- // Restore original styles
2340
- if (originalMicStyles) {
2341
- micButton.style.backgroundColor = originalMicStyles.backgroundColor;
2342
- micButton.style.color = originalMicStyles.color;
2343
- micButton.style.borderColor = originalMicStyles.borderColor;
2344
-
2345
- // Restore SVG stroke color if present
2346
- const svg = micButton.querySelector("svg");
2347
- if (svg) {
2348
- svg.setAttribute("stroke", originalMicStyles.color || "currentColor");
2349
- }
2350
-
2351
- originalMicStyles = null;
2352
- }
2353
-
2354
- micButton.setAttribute("aria-label", "Start voice recognition");
2363
+ removeRuntypeMicStateStyles();
2355
2364
  }
2356
2365
  };
2357
2366
 
@@ -2450,18 +2459,46 @@ export const createAgentExperience = (
2450
2459
  return { micButton, micButtonWrapper };
2451
2460
  };
2452
2461
 
2453
- // Helpers to apply/remove Runtype mic recording styles (mirrors start/stopVoiceRecognition)
2454
- const applyRuntypeMicRecordingStyles = () => {
2455
- if (!micButton) return;
2462
+ // --- Helpers to store/restore original mic button state ---
2463
+
2464
+ const storeOriginalMicStyles = () => {
2465
+ if (!micButton || originalMicStyles) return; // Already stored
2466
+ const voiceConfig = config.voiceRecognition ?? {};
2456
2467
  originalMicStyles = {
2457
2468
  backgroundColor: micButton.style.backgroundColor,
2458
2469
  color: micButton.style.color,
2459
- borderColor: micButton.style.borderColor
2470
+ borderColor: micButton.style.borderColor,
2471
+ iconName: voiceConfig.iconName ?? "mic",
2472
+ iconSize: parseFloat(voiceConfig.iconSize ?? config.sendButton?.size ?? "40") || 24,
2460
2473
  };
2474
+ };
2475
+
2476
+ /** Swap the mic button's SVG icon */
2477
+ const swapMicIcon = (iconName: string, color: string) => {
2478
+ if (!micButton) return;
2479
+ const existingSvg = micButton.querySelector("svg");
2480
+ if (existingSvg) existingSvg.remove();
2481
+ const size = originalMicStyles?.iconSize ?? (parseFloat(config.voiceRecognition?.iconSize ?? config.sendButton?.size ?? "40") || 24);
2482
+ const newSvg = renderLucideIcon(iconName, size, color, 1.5);
2483
+ if (newSvg) micButton.appendChild(newSvg);
2484
+ };
2485
+
2486
+ /** Remove all voice state CSS classes */
2487
+ const removeAllVoiceStateClasses = () => {
2488
+ if (!micButton) return;
2489
+ micButton.classList.remove("tvw-voice-recording", "tvw-voice-processing", "tvw-voice-speaking");
2490
+ };
2491
+
2492
+ // --- Per-state style application ---
2493
+
2494
+ const applyRuntypeMicRecordingStyles = () => {
2495
+ if (!micButton) return;
2496
+ storeOriginalMicStyles();
2461
2497
  const voiceConfig = config.voiceRecognition ?? {};
2462
2498
  const recordingBackgroundColor = voiceConfig.recordingBackgroundColor ?? "#ef4444";
2463
2499
  const recordingIconColor = voiceConfig.recordingIconColor;
2464
2500
  const recordingBorderColor = voiceConfig.recordingBorderColor;
2501
+ removeAllVoiceStateClasses();
2465
2502
  micButton.classList.add("tvw-voice-recording");
2466
2503
  micButton.style.backgroundColor = recordingBackgroundColor;
2467
2504
  if (recordingIconColor) {
@@ -2472,17 +2509,86 @@ export const createAgentExperience = (
2472
2509
  if (recordingBorderColor) micButton.style.borderColor = recordingBorderColor;
2473
2510
  micButton.setAttribute("aria-label", "Stop voice recognition");
2474
2511
  };
2475
- const removeRuntypeMicRecordingStyles = () => {
2512
+
2513
+ const applyRuntypeMicProcessingStyles = () => {
2476
2514
  if (!micButton) return;
2477
- micButton.classList.remove("tvw-voice-recording");
2515
+ storeOriginalMicStyles();
2516
+ const voiceConfig = config.voiceRecognition ?? {};
2517
+ const interruptionMode = session.getVoiceInterruptionMode();
2518
+ const iconName = voiceConfig.processingIconName ?? "loader";
2519
+ const iconColor = voiceConfig.processingIconColor ?? originalMicStyles?.color ?? "";
2520
+ const bgColor = voiceConfig.processingBackgroundColor ?? originalMicStyles?.backgroundColor ?? "";
2521
+ const borderColor = voiceConfig.processingBorderColor ?? originalMicStyles?.borderColor ?? "";
2522
+
2523
+ removeAllVoiceStateClasses();
2524
+ micButton.classList.add("tvw-voice-processing");
2525
+ micButton.style.backgroundColor = bgColor;
2526
+ micButton.style.borderColor = borderColor;
2527
+ const resolvedColor = iconColor || "currentColor";
2528
+ micButton.style.color = resolvedColor;
2529
+ swapMicIcon(iconName, resolvedColor);
2530
+ micButton.setAttribute("aria-label", "Processing voice input");
2531
+ // In "none" mode the button is not actionable during processing
2532
+ if (interruptionMode === "none") {
2533
+ micButton.style.cursor = "default";
2534
+ }
2535
+ };
2536
+
2537
+ const applyRuntypeMicSpeakingStyles = () => {
2538
+ if (!micButton) return;
2539
+ storeOriginalMicStyles();
2540
+ const voiceConfig = config.voiceRecognition ?? {};
2541
+ const interruptionMode = session.getVoiceInterruptionMode();
2542
+ // Default icon depends on interruption mode:
2543
+ // "square" for cancel, "mic" for barge-in (hot mic), "volume-2" otherwise
2544
+ const defaultSpeakingIcon = interruptionMode === "cancel" ? "square"
2545
+ : interruptionMode === "barge-in" ? "mic"
2546
+ : "volume-2";
2547
+ const iconName = voiceConfig.speakingIconName ?? defaultSpeakingIcon;
2548
+ const iconColor = voiceConfig.speakingIconColor
2549
+ ?? (interruptionMode === "barge-in" ? (voiceConfig.recordingIconColor ?? originalMicStyles?.color ?? "") : (originalMicStyles?.color ?? ""));
2550
+ const bgColor = voiceConfig.speakingBackgroundColor
2551
+ ?? (interruptionMode === "barge-in" ? (voiceConfig.recordingBackgroundColor ?? "#ef4444") : (originalMicStyles?.backgroundColor ?? ""));
2552
+ const borderColor = voiceConfig.speakingBorderColor
2553
+ ?? (interruptionMode === "barge-in" ? (voiceConfig.recordingBorderColor ?? "") : (originalMicStyles?.borderColor ?? ""));
2554
+
2555
+ removeAllVoiceStateClasses();
2556
+ micButton.classList.add("tvw-voice-speaking");
2557
+ micButton.style.backgroundColor = bgColor;
2558
+ micButton.style.borderColor = borderColor;
2559
+ const resolvedColor = iconColor || "currentColor";
2560
+ micButton.style.color = resolvedColor;
2561
+ swapMicIcon(iconName, resolvedColor);
2562
+
2563
+ // aria-label varies by interruption mode
2564
+ const ariaLabel = interruptionMode === "cancel"
2565
+ ? "Stop playback and re-record"
2566
+ : interruptionMode === "barge-in"
2567
+ ? "Speak to interrupt"
2568
+ : "Agent is speaking";
2569
+ micButton.setAttribute("aria-label", ariaLabel);
2570
+ // In "none" mode the button is not actionable during speaking
2571
+ if (interruptionMode === "none") {
2572
+ micButton.style.cursor = "default";
2573
+ }
2574
+ // In "barge-in" mode, add recording class to show mic is hot
2575
+ if (interruptionMode === "barge-in") {
2576
+ micButton.classList.add("tvw-voice-recording");
2577
+ }
2578
+ };
2579
+
2580
+ /** Restore mic button to idle state (icon, colors, aria-label, cursor) */
2581
+ const removeRuntypeMicStateStyles = () => {
2582
+ if (!micButton) return;
2583
+ removeAllVoiceStateClasses();
2478
2584
  if (originalMicStyles) {
2479
2585
  micButton.style.backgroundColor = originalMicStyles.backgroundColor ?? "";
2480
2586
  micButton.style.color = originalMicStyles.color ?? "";
2481
2587
  micButton.style.borderColor = originalMicStyles.borderColor ?? "";
2482
- const svg = micButton.querySelector("svg");
2483
- if (svg) svg.setAttribute("stroke", originalMicStyles.color || "currentColor");
2588
+ swapMicIcon(originalMicStyles.iconName, originalMicStyles.color || "currentColor");
2484
2589
  originalMicStyles = null;
2485
2590
  }
2591
+ micButton.style.cursor = "";
2486
2592
  micButton.setAttribute("aria-label", "Start voice recognition");
2487
2593
  };
2488
2594
 
@@ -2490,6 +2596,36 @@ export const createAgentExperience = (
2490
2596
  const handleMicButtonClick = () => {
2491
2597
  // Runtype provider: use session.toggleVoice() (WebSocket-based STT)
2492
2598
  if (config.voiceRecognition?.provider?.type === 'runtype') {
2599
+ const voiceStatus = session.getVoiceStatus();
2600
+ const interruptionMode = session.getVoiceInterruptionMode();
2601
+
2602
+ // In "none" mode, ignore clicks while processing or speaking
2603
+ if (interruptionMode === "none" &&
2604
+ (voiceStatus === "processing" || voiceStatus === "speaking")) {
2605
+ return;
2606
+ }
2607
+
2608
+ // In "cancel" mode during processing/speaking: stop playback only
2609
+ if (interruptionMode === "cancel" &&
2610
+ (voiceStatus === "processing" || voiceStatus === "speaking")) {
2611
+ session.stopVoicePlayback();
2612
+ return;
2613
+ }
2614
+
2615
+ // In barge-in mode, clicking mic = "hang up" (any state: speaking, idle, etc.)
2616
+ // Stops playback if active, tears down the always-on mic.
2617
+ if (session.isBargeInActive()) {
2618
+ session.stopVoicePlayback();
2619
+ session.deactivateBargeIn().then(() => {
2620
+ voiceState.active = false;
2621
+ voiceState.manuallyDeactivated = true;
2622
+ persistVoiceMetadata();
2623
+ emitVoiceState("user");
2624
+ removeRuntypeMicStateStyles();
2625
+ });
2626
+ return;
2627
+ }
2628
+
2493
2629
  session.toggleVoice().then(() => {
2494
2630
  voiceState.active = session.isVoiceActive();
2495
2631
  voiceState.manuallyDeactivated = !session.isVoiceActive();
@@ -2498,7 +2634,7 @@ export const createAgentExperience = (
2498
2634
  if (session.isVoiceActive()) {
2499
2635
  applyRuntypeMicRecordingStyles();
2500
2636
  } else {
2501
- removeRuntypeMicRecordingStyles();
2637
+ removeRuntypeMicStateStyles();
2502
2638
  }
2503
2639
  });
2504
2640
  return;
@@ -2530,7 +2666,7 @@ export const createAgentExperience = (
2530
2666
  destroyCallbacks.push(() => {
2531
2667
  if (config.voiceRecognition?.provider?.type === 'runtype') {
2532
2668
  if (session.isVoiceActive()) session.toggleVoice();
2533
- removeRuntypeMicRecordingStyles();
2669
+ removeRuntypeMicStateStyles();
2534
2670
  } else {
2535
2671
  stopVoiceRecognition("system");
2536
2672
  }
@@ -4102,7 +4238,7 @@ export const createAgentExperience = (
4102
4238
  voiceState.manuallyDeactivated = true;
4103
4239
  persistVoiceMetadata();
4104
4240
  emitVoiceState("user");
4105
- removeRuntypeMicRecordingStyles();
4241
+ removeRuntypeMicStateStyles();
4106
4242
  });
4107
4243
  return true;
4108
4244
  }
@@ -0,0 +1,187 @@
1
+ /**
2
+ * AudioPlaybackManager
3
+ *
4
+ * Manages streaming playback of PCM audio chunks via the Web Audio API.
5
+ * Receives raw PCM data (24 kHz, 16-bit signed little-endian, mono),
6
+ * converts to Float32 AudioBuffers, and schedules gap-free sequential
7
+ * playback using AudioBufferSourceNode.
8
+ *
9
+ * Works on all browsers including iOS Safari (no MediaSource dependency).
10
+ */
11
+ export class AudioPlaybackManager {
12
+ private ctx: AudioContext | null = null;
13
+ private nextStartTime = 0;
14
+ private activeSources: AudioBufferSourceNode[] = [];
15
+ private finishedCallbacks: (() => void)[] = [];
16
+ private playing = false;
17
+ private streamEnded = false;
18
+ private pendingCount = 0;
19
+
20
+ // PCM format constants
21
+ private readonly sampleRate: number;
22
+
23
+ // Remainder byte from a previous chunk when the chunk had an odd byte count.
24
+ // Network chunks don't respect 2-byte sample boundaries, so we carry over
25
+ // the orphaned byte and prepend it to the next chunk.
26
+ private remainder: Uint8Array | null = null;
27
+
28
+ constructor(sampleRate = 24000) {
29
+ this.sampleRate = sampleRate;
30
+ }
31
+
32
+ /**
33
+ * Ensure AudioContext is created and running.
34
+ * Must be called after a user gesture on iOS Safari.
35
+ */
36
+ private ensureContext(): AudioContext {
37
+ if (!this.ctx) {
38
+ const w = typeof window !== "undefined" ? (window as any) : undefined;
39
+ if (!w) throw new Error("AudioPlaybackManager requires a browser environment");
40
+ const AudioCtx = w.AudioContext || w.webkitAudioContext;
41
+ this.ctx = new AudioCtx({ sampleRate: this.sampleRate }) as AudioContext;
42
+ }
43
+ const ctx = this.ctx!;
44
+ // Resume if suspended (autoplay policy)
45
+ if (ctx.state === "suspended") {
46
+ ctx.resume();
47
+ }
48
+ return ctx;
49
+ }
50
+
51
+ /**
52
+ * Enqueue a PCM chunk for playback.
53
+ * @param pcmData Raw PCM bytes (16-bit signed LE mono)
54
+ */
55
+ enqueue(pcmData: Uint8Array): void {
56
+ if (pcmData.length === 0) return;
57
+
58
+ // Prepend any remainder byte from the previous chunk
59
+ let data = pcmData;
60
+ if (this.remainder) {
61
+ const merged = new Uint8Array(this.remainder.length + pcmData.length);
62
+ merged.set(this.remainder);
63
+ merged.set(pcmData, this.remainder.length);
64
+ data = merged;
65
+ this.remainder = null;
66
+ }
67
+
68
+ // If odd byte count, save the trailing byte for next chunk
69
+ if (data.length % 2 !== 0) {
70
+ this.remainder = new Uint8Array([data[data.length - 1]]);
71
+ data = data.subarray(0, data.length - 1);
72
+ }
73
+
74
+ if (data.length === 0) return;
75
+
76
+ const ctx = this.ensureContext();
77
+ const float32 = this.pcmToFloat32(data);
78
+
79
+ const buffer = ctx.createBuffer(1, float32.length, this.sampleRate);
80
+ buffer.getChannelData(0).set(float32);
81
+
82
+ const source = ctx.createBufferSource();
83
+ source.buffer = buffer;
84
+ source.connect(ctx.destination);
85
+
86
+ // Schedule gap-free playback
87
+ const now = ctx.currentTime;
88
+ if (this.nextStartTime < now) {
89
+ this.nextStartTime = now;
90
+ }
91
+ source.start(this.nextStartTime);
92
+ this.nextStartTime += buffer.duration;
93
+
94
+ this.activeSources.push(source);
95
+ this.pendingCount++;
96
+ this.playing = true;
97
+
98
+ source.onended = () => {
99
+ const idx = this.activeSources.indexOf(source);
100
+ if (idx !== -1) this.activeSources.splice(idx, 1);
101
+ this.pendingCount--;
102
+ this.checkFinished();
103
+ };
104
+ }
105
+
106
+ /**
107
+ * Signal that no more chunks will arrive.
108
+ * The onFinished callback fires after all queued audio has played.
109
+ */
110
+ markStreamEnd(): void {
111
+ this.streamEnded = true;
112
+ this.checkFinished();
113
+ }
114
+
115
+ /**
116
+ * Immediately stop all playback and discard queued audio.
117
+ */
118
+ flush(): void {
119
+ for (const source of this.activeSources) {
120
+ try {
121
+ source.stop();
122
+ source.disconnect();
123
+ } catch {
124
+ // Ignore errors from already-stopped sources
125
+ }
126
+ }
127
+ this.activeSources = [];
128
+ this.pendingCount = 0;
129
+ this.nextStartTime = 0;
130
+ this.playing = false;
131
+ this.streamEnded = false;
132
+ this.finishedCallbacks = [];
133
+ this.remainder = null;
134
+ }
135
+
136
+ /**
137
+ * Whether audio is currently playing or queued.
138
+ */
139
+ isPlaying(): boolean {
140
+ return this.playing;
141
+ }
142
+
143
+ /**
144
+ * Register a callback for when all queued audio finishes playing.
145
+ */
146
+ onFinished(callback: () => void): void {
147
+ this.finishedCallbacks.push(callback);
148
+ }
149
+
150
+ /**
151
+ * Clean up AudioContext resources.
152
+ */
153
+ async destroy(): Promise<void> {
154
+ this.flush();
155
+ if (this.ctx) {
156
+ await this.ctx.close();
157
+ this.ctx = null;
158
+ }
159
+ }
160
+
161
+ private checkFinished(): void {
162
+ if (this.streamEnded && this.pendingCount <= 0 && this.playing) {
163
+ this.playing = false;
164
+ this.streamEnded = false;
165
+ const cbs = this.finishedCallbacks.slice();
166
+ this.finishedCallbacks = [];
167
+ for (const cb of cbs) cb();
168
+ }
169
+ }
170
+
171
+ /**
172
+ * Convert 16-bit signed LE PCM to Float32 samples in [-1, 1].
173
+ */
174
+ private pcmToFloat32(pcmData: Uint8Array): Float32Array {
175
+ // 2 bytes per sample (16-bit)
176
+ const numSamples = Math.floor(pcmData.length / 2);
177
+ const float32 = new Float32Array(numSamples);
178
+ const view = new DataView(pcmData.buffer, pcmData.byteOffset, pcmData.byteLength);
179
+
180
+ for (let i = 0; i < numSamples; i++) {
181
+ const int16 = view.getInt16(i * 2, true); // little-endian
182
+ float32[i] = int16 / 32768;
183
+ }
184
+
185
+ return float32;
186
+ }
187
+ }