@runtypelabs/persona 1.47.0 → 1.48.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +29 -29
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +38 -1
- package/dist/index.d.ts +38 -1
- package/dist/index.global.js +47 -47
- package/dist/index.global.js.map +1 -1
- package/dist/index.js +29 -29
- package/dist/index.js.map +1 -1
- package/dist/widget.css +24 -0
- package/package.json +1 -1
- package/src/session.ts +38 -5
- package/src/styles/widget.css +24 -0
- package/src/types.ts +34 -1
- package/src/ui.ts +175 -39
- package/src/voice/audio-playback-manager.ts +187 -0
- package/src/voice/runtype-voice-provider.ts +305 -69
- package/src/voice/voice-activity-detector.ts +90 -0
- package/src/voice/voice.test.ts +6 -5
package/dist/widget.css
CHANGED
|
@@ -1060,6 +1060,30 @@
|
|
|
1060
1060
|
animation: tvw-voice-recording-pulse 1.5s ease-in-out infinite;
|
|
1061
1061
|
}
|
|
1062
1062
|
|
|
1063
|
+
/* Voice processing animation (spinner) */
|
|
1064
|
+
@keyframes tvw-voice-processing-spin {
|
|
1065
|
+
from { transform: rotate(0deg); }
|
|
1066
|
+
to { transform: rotate(360deg); }
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
.tvw-voice-processing svg {
|
|
1070
|
+
animation: tvw-voice-processing-spin 1.2s linear infinite;
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
/* Voice speaking animation (gentle pulse — slower/subtler than recording) */
|
|
1074
|
+
@keyframes tvw-voice-speaking-pulse {
|
|
1075
|
+
0%, 100% { opacity: 1; transform: scale(1); }
|
|
1076
|
+
50% { opacity: 0.85; transform: scale(1.03); }
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
.tvw-voice-speaking {
|
|
1080
|
+
animation: tvw-voice-speaking-pulse 2s ease-in-out infinite;
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
.tvw-voice-speaking svg {
|
|
1084
|
+
animation: tvw-voice-speaking-pulse 2s ease-in-out infinite;
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1063
1087
|
/* Markdown content overflow handling */
|
|
1064
1088
|
.vanilla-message-bubble pre {
|
|
1065
1089
|
overflow-x: auto;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@runtypelabs/persona",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.48.0",
|
|
4
4
|
"description": "Themeable, pluggable streaming agent widget for websites, in plain JS with support for voice input and reasoning / tool output.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.cjs",
|
package/src/session.ts
CHANGED
|
@@ -19,14 +19,12 @@ import {
|
|
|
19
19
|
import { IMAGE_ONLY_MESSAGE_FALLBACK_TEXT } from "./utils/content";
|
|
20
20
|
import type {
|
|
21
21
|
VoiceProvider,
|
|
22
|
-
VoiceResult,
|
|
23
22
|
VoiceStatus,
|
|
24
23
|
VoiceConfig,
|
|
25
24
|
TextToSpeechConfig
|
|
26
25
|
} from "./types";
|
|
27
26
|
import {
|
|
28
27
|
createVoiceProvider,
|
|
29
|
-
createBestAvailableVoiceProvider,
|
|
30
28
|
isVoiceSupported
|
|
31
29
|
} from "./voice";
|
|
32
30
|
|
|
@@ -136,6 +134,38 @@ export class AgentWidgetSession {
|
|
|
136
134
|
return this.voiceStatus;
|
|
137
135
|
}
|
|
138
136
|
|
|
137
|
+
/**
|
|
138
|
+
* Get the voice interruption mode from the provider (none/cancel/barge-in)
|
|
139
|
+
*/
|
|
140
|
+
public getVoiceInterruptionMode(): "none" | "cancel" | "barge-in" {
|
|
141
|
+
if (this.voiceProvider?.getInterruptionMode) {
|
|
142
|
+
return this.voiceProvider.getInterruptionMode();
|
|
143
|
+
}
|
|
144
|
+
return "none";
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Stop voice playback / cancel in-flight request without starting recording.
|
|
149
|
+
* Returns to idle state.
|
|
150
|
+
*/
|
|
151
|
+
public stopVoicePlayback(): void {
|
|
152
|
+
if (this.voiceProvider?.stopPlayback) {
|
|
153
|
+
this.voiceProvider.stopPlayback();
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/** Returns true if the barge-in mic stream is alive (hot mic between turns) */
|
|
158
|
+
public isBargeInActive(): boolean {
|
|
159
|
+
return this.voiceProvider?.isBargeInActive?.() ?? false;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/** Tear down the barge-in mic pipeline — "hang up" the always-on mic */
|
|
163
|
+
public async deactivateBargeIn(): Promise<void> {
|
|
164
|
+
if (this.voiceProvider?.deactivateBargeIn) {
|
|
165
|
+
await this.voiceProvider.deactivateBargeIn();
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
139
169
|
// Pending placeholder IDs for Runtype two-phase voice flow
|
|
140
170
|
private pendingVoiceUserMessageId: string | null = null;
|
|
141
171
|
private pendingVoiceAssistantMessageId: string | null = null;
|
|
@@ -227,9 +257,12 @@ export class AgentWidgetSession {
|
|
|
227
257
|
this.injectAssistantMessage({ content: result.text.trim() });
|
|
228
258
|
}
|
|
229
259
|
|
|
230
|
-
//
|
|
231
|
-
//
|
|
232
|
-
|
|
260
|
+
// Mark assistant message as already spoken so browser TTS doesn't
|
|
261
|
+
// double-speak. This covers both paths:
|
|
262
|
+
// - Batch: audio.base64 is present in the voice_response
|
|
263
|
+
// - Streaming: audio arrives as binary PCM chunks (no base64 here)
|
|
264
|
+
// In either case, the Runtype provider handles TTS — browser TTS must skip.
|
|
265
|
+
{
|
|
233
266
|
const spokenId = this.pendingVoiceAssistantMessageId
|
|
234
267
|
?? [...this.messages].reverse().find(m => m.role === 'assistant')?.id;
|
|
235
268
|
if (spokenId) this.ttsSpokenMessageIds.add(spokenId);
|
package/src/styles/widget.css
CHANGED
|
@@ -1060,6 +1060,30 @@
|
|
|
1060
1060
|
animation: tvw-voice-recording-pulse 1.5s ease-in-out infinite;
|
|
1061
1061
|
}
|
|
1062
1062
|
|
|
1063
|
+
/* Voice processing animation (spinner) */
|
|
1064
|
+
@keyframes tvw-voice-processing-spin {
|
|
1065
|
+
from { transform: rotate(0deg); }
|
|
1066
|
+
to { transform: rotate(360deg); }
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
.tvw-voice-processing svg {
|
|
1070
|
+
animation: tvw-voice-processing-spin 1.2s linear infinite;
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
/* Voice speaking animation (gentle pulse — slower/subtler than recording) */
|
|
1074
|
+
@keyframes tvw-voice-speaking-pulse {
|
|
1075
|
+
0%, 100% { opacity: 1; transform: scale(1); }
|
|
1076
|
+
50% { opacity: 0.85; transform: scale(1.03); }
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
.tvw-voice-speaking {
|
|
1080
|
+
animation: tvw-voice-speaking-pulse 2s ease-in-out infinite;
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
.tvw-voice-speaking svg {
|
|
1084
|
+
animation: tvw-voice-speaking-pulse 2s ease-in-out infinite;
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1063
1087
|
/* Markdown content overflow handling */
|
|
1064
1088
|
.vanilla-message-bubble pre {
|
|
1065
1089
|
overflow-x: auto;
|
package/src/types.ts
CHANGED
|
@@ -696,6 +696,26 @@ export type AgentWidgetVoiceRecognitionConfig = {
|
|
|
696
696
|
recordingBackgroundColor?: string;
|
|
697
697
|
recordingBorderColor?: string;
|
|
698
698
|
showRecordingIndicator?: boolean;
|
|
699
|
+
|
|
700
|
+
// Processing state (after recording stops, waiting for agent response)
|
|
701
|
+
/** Icon name shown while processing voice input. Default: "loader" */
|
|
702
|
+
processingIconName?: string;
|
|
703
|
+
/** Icon color during processing. Inherits idle iconColor if not set */
|
|
704
|
+
processingIconColor?: string;
|
|
705
|
+
/** Button background color during processing. Inherits idle backgroundColor if not set */
|
|
706
|
+
processingBackgroundColor?: string;
|
|
707
|
+
/** Button border color during processing. Inherits idle borderColor if not set */
|
|
708
|
+
processingBorderColor?: string;
|
|
709
|
+
|
|
710
|
+
// Speaking state (agent TTS audio is playing)
|
|
711
|
+
/** Icon name shown while agent is speaking. Default: "volume-2" (or "square" in cancel mode) */
|
|
712
|
+
speakingIconName?: string;
|
|
713
|
+
/** Icon color while speaking. Inherits idle iconColor if not set */
|
|
714
|
+
speakingIconColor?: string;
|
|
715
|
+
/** Button background color while speaking. Inherits idle backgroundColor if not set */
|
|
716
|
+
speakingBackgroundColor?: string;
|
|
717
|
+
/** Button border color while speaking. Inherits idle borderColor if not set */
|
|
718
|
+
speakingBorderColor?: string;
|
|
699
719
|
autoResume?: boolean | "assistant";
|
|
700
720
|
|
|
701
721
|
// Voice provider configuration
|
|
@@ -794,11 +814,12 @@ export type VoiceResult = {
|
|
|
794
814
|
/**
|
|
795
815
|
* Voice provider status states
|
|
796
816
|
*/
|
|
797
|
-
export type VoiceStatus =
|
|
817
|
+
export type VoiceStatus =
|
|
798
818
|
| 'disconnected'
|
|
799
819
|
| 'connected'
|
|
800
820
|
| 'listening'
|
|
801
821
|
| 'processing'
|
|
822
|
+
| 'speaking'
|
|
802
823
|
| 'error'
|
|
803
824
|
| 'idle';
|
|
804
825
|
|
|
@@ -843,6 +864,18 @@ export interface VoiceProvider {
|
|
|
843
864
|
|
|
844
865
|
/** Register a callback fired when recording stops and audio is about to be sent */
|
|
845
866
|
onProcessingStart?(callback: () => void): void;
|
|
867
|
+
|
|
868
|
+
/** Returns the current interruption mode (only meaningful for Runtype provider) */
|
|
869
|
+
getInterruptionMode?(): "none" | "cancel" | "barge-in";
|
|
870
|
+
|
|
871
|
+
/** Returns true if the barge-in mic stream is alive (hot mic between turns) */
|
|
872
|
+
isBargeInActive?(): boolean;
|
|
873
|
+
|
|
874
|
+
/** Tear down the barge-in mic pipeline — "hang up" the always-on mic */
|
|
875
|
+
deactivateBargeIn?(): Promise<void>;
|
|
876
|
+
|
|
877
|
+
/** Stop playback / cancel in-flight request without starting recording */
|
|
878
|
+
stopPlayback?(): void;
|
|
846
879
|
}
|
|
847
880
|
|
|
848
881
|
/**
|
package/src/ui.ts
CHANGED
|
@@ -2053,12 +2053,34 @@ export const createAgentExperience = (
|
|
|
2053
2053
|
}
|
|
2054
2054
|
},
|
|
2055
2055
|
onVoiceStatusChanged(status: VoiceStatus) {
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2056
|
+
if (config.voiceRecognition?.provider?.type !== 'runtype') return;
|
|
2057
|
+
|
|
2058
|
+
switch (status) {
|
|
2059
|
+
case 'listening':
|
|
2060
|
+
// Recording styles are applied by toggleVoice() / startVoiceRecognition() flows
|
|
2061
|
+
break;
|
|
2062
|
+
case 'processing':
|
|
2063
|
+
removeRuntypeMicStateStyles();
|
|
2064
|
+
applyRuntypeMicProcessingStyles();
|
|
2065
|
+
break;
|
|
2066
|
+
case 'speaking':
|
|
2067
|
+
removeRuntypeMicStateStyles();
|
|
2068
|
+
applyRuntypeMicSpeakingStyles();
|
|
2069
|
+
break;
|
|
2070
|
+
default:
|
|
2071
|
+
// idle, connected, disconnected, error
|
|
2072
|
+
if (status === 'idle' && session.isBargeInActive()) {
|
|
2073
|
+
// Barge-in mic is still hot between turns — show it as active
|
|
2074
|
+
removeRuntypeMicStateStyles();
|
|
2075
|
+
applyRuntypeMicRecordingStyles();
|
|
2076
|
+
micButton?.setAttribute("aria-label", "End voice session");
|
|
2077
|
+
} else {
|
|
2078
|
+
voiceState.active = false;
|
|
2079
|
+
removeRuntypeMicStateStyles();
|
|
2080
|
+
emitVoiceState("system");
|
|
2081
|
+
persistVoiceMetadata();
|
|
2082
|
+
}
|
|
2083
|
+
break;
|
|
2062
2084
|
}
|
|
2063
2085
|
}
|
|
2064
2086
|
});
|
|
@@ -2176,6 +2198,8 @@ export const createAgentExperience = (
|
|
|
2176
2198
|
backgroundColor: string;
|
|
2177
2199
|
color: string;
|
|
2178
2200
|
borderColor: string;
|
|
2201
|
+
iconName: string;
|
|
2202
|
+
iconSize: number;
|
|
2179
2203
|
} | null = null;
|
|
2180
2204
|
|
|
2181
2205
|
const getSpeechRecognitionClass = (): any => {
|
|
@@ -2273,15 +2297,17 @@ export const createAgentExperience = (
|
|
|
2273
2297
|
emitVoiceState(source);
|
|
2274
2298
|
persistVoiceMetadata();
|
|
2275
2299
|
if (micButton) {
|
|
2276
|
-
// Store original styles
|
|
2300
|
+
// Store original styles (including icon info for restoration)
|
|
2301
|
+
const voiceConfig = config.voiceRecognition ?? {};
|
|
2277
2302
|
originalMicStyles = {
|
|
2278
2303
|
backgroundColor: micButton.style.backgroundColor,
|
|
2279
2304
|
color: micButton.style.color,
|
|
2280
|
-
borderColor: micButton.style.borderColor
|
|
2305
|
+
borderColor: micButton.style.borderColor,
|
|
2306
|
+
iconName: voiceConfig.iconName ?? "mic",
|
|
2307
|
+
iconSize: parseFloat(voiceConfig.iconSize ?? config.sendButton?.size ?? "40") || 24,
|
|
2281
2308
|
};
|
|
2282
|
-
|
|
2309
|
+
|
|
2283
2310
|
// Apply recording state styles from config
|
|
2284
|
-
const voiceConfig = config.voiceRecognition ?? {};
|
|
2285
2311
|
const recordingBackgroundColor = voiceConfig.recordingBackgroundColor ?? "#ef4444";
|
|
2286
2312
|
const recordingIconColor = voiceConfig.recordingIconColor;
|
|
2287
2313
|
const recordingBorderColor = voiceConfig.recordingBorderColor;
|
|
@@ -2334,24 +2360,7 @@ export const createAgentExperience = (
|
|
|
2334
2360
|
persistVoiceMetadata();
|
|
2335
2361
|
|
|
2336
2362
|
if (micButton) {
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
// Restore original styles
|
|
2340
|
-
if (originalMicStyles) {
|
|
2341
|
-
micButton.style.backgroundColor = originalMicStyles.backgroundColor;
|
|
2342
|
-
micButton.style.color = originalMicStyles.color;
|
|
2343
|
-
micButton.style.borderColor = originalMicStyles.borderColor;
|
|
2344
|
-
|
|
2345
|
-
// Restore SVG stroke color if present
|
|
2346
|
-
const svg = micButton.querySelector("svg");
|
|
2347
|
-
if (svg) {
|
|
2348
|
-
svg.setAttribute("stroke", originalMicStyles.color || "currentColor");
|
|
2349
|
-
}
|
|
2350
|
-
|
|
2351
|
-
originalMicStyles = null;
|
|
2352
|
-
}
|
|
2353
|
-
|
|
2354
|
-
micButton.setAttribute("aria-label", "Start voice recognition");
|
|
2363
|
+
removeRuntypeMicStateStyles();
|
|
2355
2364
|
}
|
|
2356
2365
|
};
|
|
2357
2366
|
|
|
@@ -2450,18 +2459,46 @@ export const createAgentExperience = (
|
|
|
2450
2459
|
return { micButton, micButtonWrapper };
|
|
2451
2460
|
};
|
|
2452
2461
|
|
|
2453
|
-
// Helpers to
|
|
2454
|
-
|
|
2455
|
-
|
|
2462
|
+
// --- Helpers to store/restore original mic button state ---
|
|
2463
|
+
|
|
2464
|
+
const storeOriginalMicStyles = () => {
|
|
2465
|
+
if (!micButton || originalMicStyles) return; // Already stored
|
|
2466
|
+
const voiceConfig = config.voiceRecognition ?? {};
|
|
2456
2467
|
originalMicStyles = {
|
|
2457
2468
|
backgroundColor: micButton.style.backgroundColor,
|
|
2458
2469
|
color: micButton.style.color,
|
|
2459
|
-
borderColor: micButton.style.borderColor
|
|
2470
|
+
borderColor: micButton.style.borderColor,
|
|
2471
|
+
iconName: voiceConfig.iconName ?? "mic",
|
|
2472
|
+
iconSize: parseFloat(voiceConfig.iconSize ?? config.sendButton?.size ?? "40") || 24,
|
|
2460
2473
|
};
|
|
2474
|
+
};
|
|
2475
|
+
|
|
2476
|
+
/** Swap the mic button's SVG icon */
|
|
2477
|
+
const swapMicIcon = (iconName: string, color: string) => {
|
|
2478
|
+
if (!micButton) return;
|
|
2479
|
+
const existingSvg = micButton.querySelector("svg");
|
|
2480
|
+
if (existingSvg) existingSvg.remove();
|
|
2481
|
+
const size = originalMicStyles?.iconSize ?? (parseFloat(config.voiceRecognition?.iconSize ?? config.sendButton?.size ?? "40") || 24);
|
|
2482
|
+
const newSvg = renderLucideIcon(iconName, size, color, 1.5);
|
|
2483
|
+
if (newSvg) micButton.appendChild(newSvg);
|
|
2484
|
+
};
|
|
2485
|
+
|
|
2486
|
+
/** Remove all voice state CSS classes */
|
|
2487
|
+
const removeAllVoiceStateClasses = () => {
|
|
2488
|
+
if (!micButton) return;
|
|
2489
|
+
micButton.classList.remove("tvw-voice-recording", "tvw-voice-processing", "tvw-voice-speaking");
|
|
2490
|
+
};
|
|
2491
|
+
|
|
2492
|
+
// --- Per-state style application ---
|
|
2493
|
+
|
|
2494
|
+
const applyRuntypeMicRecordingStyles = () => {
|
|
2495
|
+
if (!micButton) return;
|
|
2496
|
+
storeOriginalMicStyles();
|
|
2461
2497
|
const voiceConfig = config.voiceRecognition ?? {};
|
|
2462
2498
|
const recordingBackgroundColor = voiceConfig.recordingBackgroundColor ?? "#ef4444";
|
|
2463
2499
|
const recordingIconColor = voiceConfig.recordingIconColor;
|
|
2464
2500
|
const recordingBorderColor = voiceConfig.recordingBorderColor;
|
|
2501
|
+
removeAllVoiceStateClasses();
|
|
2465
2502
|
micButton.classList.add("tvw-voice-recording");
|
|
2466
2503
|
micButton.style.backgroundColor = recordingBackgroundColor;
|
|
2467
2504
|
if (recordingIconColor) {
|
|
@@ -2472,17 +2509,86 @@ export const createAgentExperience = (
|
|
|
2472
2509
|
if (recordingBorderColor) micButton.style.borderColor = recordingBorderColor;
|
|
2473
2510
|
micButton.setAttribute("aria-label", "Stop voice recognition");
|
|
2474
2511
|
};
|
|
2475
|
-
|
|
2512
|
+
|
|
2513
|
+
const applyRuntypeMicProcessingStyles = () => {
|
|
2476
2514
|
if (!micButton) return;
|
|
2477
|
-
|
|
2515
|
+
storeOriginalMicStyles();
|
|
2516
|
+
const voiceConfig = config.voiceRecognition ?? {};
|
|
2517
|
+
const interruptionMode = session.getVoiceInterruptionMode();
|
|
2518
|
+
const iconName = voiceConfig.processingIconName ?? "loader";
|
|
2519
|
+
const iconColor = voiceConfig.processingIconColor ?? originalMicStyles?.color ?? "";
|
|
2520
|
+
const bgColor = voiceConfig.processingBackgroundColor ?? originalMicStyles?.backgroundColor ?? "";
|
|
2521
|
+
const borderColor = voiceConfig.processingBorderColor ?? originalMicStyles?.borderColor ?? "";
|
|
2522
|
+
|
|
2523
|
+
removeAllVoiceStateClasses();
|
|
2524
|
+
micButton.classList.add("tvw-voice-processing");
|
|
2525
|
+
micButton.style.backgroundColor = bgColor;
|
|
2526
|
+
micButton.style.borderColor = borderColor;
|
|
2527
|
+
const resolvedColor = iconColor || "currentColor";
|
|
2528
|
+
micButton.style.color = resolvedColor;
|
|
2529
|
+
swapMicIcon(iconName, resolvedColor);
|
|
2530
|
+
micButton.setAttribute("aria-label", "Processing voice input");
|
|
2531
|
+
// In "none" mode the button is not actionable during processing
|
|
2532
|
+
if (interruptionMode === "none") {
|
|
2533
|
+
micButton.style.cursor = "default";
|
|
2534
|
+
}
|
|
2535
|
+
};
|
|
2536
|
+
|
|
2537
|
+
const applyRuntypeMicSpeakingStyles = () => {
|
|
2538
|
+
if (!micButton) return;
|
|
2539
|
+
storeOriginalMicStyles();
|
|
2540
|
+
const voiceConfig = config.voiceRecognition ?? {};
|
|
2541
|
+
const interruptionMode = session.getVoiceInterruptionMode();
|
|
2542
|
+
// Default icon depends on interruption mode:
|
|
2543
|
+
// "square" for cancel, "mic" for barge-in (hot mic), "volume-2" otherwise
|
|
2544
|
+
const defaultSpeakingIcon = interruptionMode === "cancel" ? "square"
|
|
2545
|
+
: interruptionMode === "barge-in" ? "mic"
|
|
2546
|
+
: "volume-2";
|
|
2547
|
+
const iconName = voiceConfig.speakingIconName ?? defaultSpeakingIcon;
|
|
2548
|
+
const iconColor = voiceConfig.speakingIconColor
|
|
2549
|
+
?? (interruptionMode === "barge-in" ? (voiceConfig.recordingIconColor ?? originalMicStyles?.color ?? "") : (originalMicStyles?.color ?? ""));
|
|
2550
|
+
const bgColor = voiceConfig.speakingBackgroundColor
|
|
2551
|
+
?? (interruptionMode === "barge-in" ? (voiceConfig.recordingBackgroundColor ?? "#ef4444") : (originalMicStyles?.backgroundColor ?? ""));
|
|
2552
|
+
const borderColor = voiceConfig.speakingBorderColor
|
|
2553
|
+
?? (interruptionMode === "barge-in" ? (voiceConfig.recordingBorderColor ?? "") : (originalMicStyles?.borderColor ?? ""));
|
|
2554
|
+
|
|
2555
|
+
removeAllVoiceStateClasses();
|
|
2556
|
+
micButton.classList.add("tvw-voice-speaking");
|
|
2557
|
+
micButton.style.backgroundColor = bgColor;
|
|
2558
|
+
micButton.style.borderColor = borderColor;
|
|
2559
|
+
const resolvedColor = iconColor || "currentColor";
|
|
2560
|
+
micButton.style.color = resolvedColor;
|
|
2561
|
+
swapMicIcon(iconName, resolvedColor);
|
|
2562
|
+
|
|
2563
|
+
// aria-label varies by interruption mode
|
|
2564
|
+
const ariaLabel = interruptionMode === "cancel"
|
|
2565
|
+
? "Stop playback and re-record"
|
|
2566
|
+
: interruptionMode === "barge-in"
|
|
2567
|
+
? "Speak to interrupt"
|
|
2568
|
+
: "Agent is speaking";
|
|
2569
|
+
micButton.setAttribute("aria-label", ariaLabel);
|
|
2570
|
+
// In "none" mode the button is not actionable during speaking
|
|
2571
|
+
if (interruptionMode === "none") {
|
|
2572
|
+
micButton.style.cursor = "default";
|
|
2573
|
+
}
|
|
2574
|
+
// In "barge-in" mode, add recording class to show mic is hot
|
|
2575
|
+
if (interruptionMode === "barge-in") {
|
|
2576
|
+
micButton.classList.add("tvw-voice-recording");
|
|
2577
|
+
}
|
|
2578
|
+
};
|
|
2579
|
+
|
|
2580
|
+
/** Restore mic button to idle state (icon, colors, aria-label, cursor) */
|
|
2581
|
+
const removeRuntypeMicStateStyles = () => {
|
|
2582
|
+
if (!micButton) return;
|
|
2583
|
+
removeAllVoiceStateClasses();
|
|
2478
2584
|
if (originalMicStyles) {
|
|
2479
2585
|
micButton.style.backgroundColor = originalMicStyles.backgroundColor ?? "";
|
|
2480
2586
|
micButton.style.color = originalMicStyles.color ?? "";
|
|
2481
2587
|
micButton.style.borderColor = originalMicStyles.borderColor ?? "";
|
|
2482
|
-
|
|
2483
|
-
if (svg) svg.setAttribute("stroke", originalMicStyles.color || "currentColor");
|
|
2588
|
+
swapMicIcon(originalMicStyles.iconName, originalMicStyles.color || "currentColor");
|
|
2484
2589
|
originalMicStyles = null;
|
|
2485
2590
|
}
|
|
2591
|
+
micButton.style.cursor = "";
|
|
2486
2592
|
micButton.setAttribute("aria-label", "Start voice recognition");
|
|
2487
2593
|
};
|
|
2488
2594
|
|
|
@@ -2490,6 +2596,36 @@ export const createAgentExperience = (
|
|
|
2490
2596
|
const handleMicButtonClick = () => {
|
|
2491
2597
|
// Runtype provider: use session.toggleVoice() (WebSocket-based STT)
|
|
2492
2598
|
if (config.voiceRecognition?.provider?.type === 'runtype') {
|
|
2599
|
+
const voiceStatus = session.getVoiceStatus();
|
|
2600
|
+
const interruptionMode = session.getVoiceInterruptionMode();
|
|
2601
|
+
|
|
2602
|
+
// In "none" mode, ignore clicks while processing or speaking
|
|
2603
|
+
if (interruptionMode === "none" &&
|
|
2604
|
+
(voiceStatus === "processing" || voiceStatus === "speaking")) {
|
|
2605
|
+
return;
|
|
2606
|
+
}
|
|
2607
|
+
|
|
2608
|
+
// In "cancel" mode during processing/speaking: stop playback only
|
|
2609
|
+
if (interruptionMode === "cancel" &&
|
|
2610
|
+
(voiceStatus === "processing" || voiceStatus === "speaking")) {
|
|
2611
|
+
session.stopVoicePlayback();
|
|
2612
|
+
return;
|
|
2613
|
+
}
|
|
2614
|
+
|
|
2615
|
+
// In barge-in mode, clicking mic = "hang up" (any state: speaking, idle, etc.)
|
|
2616
|
+
// Stops playback if active, tears down the always-on mic.
|
|
2617
|
+
if (session.isBargeInActive()) {
|
|
2618
|
+
session.stopVoicePlayback();
|
|
2619
|
+
session.deactivateBargeIn().then(() => {
|
|
2620
|
+
voiceState.active = false;
|
|
2621
|
+
voiceState.manuallyDeactivated = true;
|
|
2622
|
+
persistVoiceMetadata();
|
|
2623
|
+
emitVoiceState("user");
|
|
2624
|
+
removeRuntypeMicStateStyles();
|
|
2625
|
+
});
|
|
2626
|
+
return;
|
|
2627
|
+
}
|
|
2628
|
+
|
|
2493
2629
|
session.toggleVoice().then(() => {
|
|
2494
2630
|
voiceState.active = session.isVoiceActive();
|
|
2495
2631
|
voiceState.manuallyDeactivated = !session.isVoiceActive();
|
|
@@ -2498,7 +2634,7 @@ export const createAgentExperience = (
|
|
|
2498
2634
|
if (session.isVoiceActive()) {
|
|
2499
2635
|
applyRuntypeMicRecordingStyles();
|
|
2500
2636
|
} else {
|
|
2501
|
-
|
|
2637
|
+
removeRuntypeMicStateStyles();
|
|
2502
2638
|
}
|
|
2503
2639
|
});
|
|
2504
2640
|
return;
|
|
@@ -2530,7 +2666,7 @@ export const createAgentExperience = (
|
|
|
2530
2666
|
destroyCallbacks.push(() => {
|
|
2531
2667
|
if (config.voiceRecognition?.provider?.type === 'runtype') {
|
|
2532
2668
|
if (session.isVoiceActive()) session.toggleVoice();
|
|
2533
|
-
|
|
2669
|
+
removeRuntypeMicStateStyles();
|
|
2534
2670
|
} else {
|
|
2535
2671
|
stopVoiceRecognition("system");
|
|
2536
2672
|
}
|
|
@@ -4102,7 +4238,7 @@ export const createAgentExperience = (
|
|
|
4102
4238
|
voiceState.manuallyDeactivated = true;
|
|
4103
4239
|
persistVoiceMetadata();
|
|
4104
4240
|
emitVoiceState("user");
|
|
4105
|
-
|
|
4241
|
+
removeRuntypeMicStateStyles();
|
|
4106
4242
|
});
|
|
4107
4243
|
return true;
|
|
4108
4244
|
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AudioPlaybackManager
|
|
3
|
+
*
|
|
4
|
+
* Manages streaming playback of PCM audio chunks via the Web Audio API.
|
|
5
|
+
* Receives raw PCM data (24 kHz, 16-bit signed little-endian, mono),
|
|
6
|
+
* converts to Float32 AudioBuffers, and schedules gap-free sequential
|
|
7
|
+
* playback using AudioBufferSourceNode.
|
|
8
|
+
*
|
|
9
|
+
* Works on all browsers including iOS Safari (no MediaSource dependency).
|
|
10
|
+
*/
|
|
11
|
+
export class AudioPlaybackManager {
|
|
12
|
+
private ctx: AudioContext | null = null;
|
|
13
|
+
private nextStartTime = 0;
|
|
14
|
+
private activeSources: AudioBufferSourceNode[] = [];
|
|
15
|
+
private finishedCallbacks: (() => void)[] = [];
|
|
16
|
+
private playing = false;
|
|
17
|
+
private streamEnded = false;
|
|
18
|
+
private pendingCount = 0;
|
|
19
|
+
|
|
20
|
+
// PCM format constants
|
|
21
|
+
private readonly sampleRate: number;
|
|
22
|
+
|
|
23
|
+
// Remainder byte from a previous chunk when the chunk had an odd byte count.
|
|
24
|
+
// Network chunks don't respect 2-byte sample boundaries, so we carry over
|
|
25
|
+
// the orphaned byte and prepend it to the next chunk.
|
|
26
|
+
private remainder: Uint8Array | null = null;
|
|
27
|
+
|
|
28
|
+
constructor(sampleRate = 24000) {
|
|
29
|
+
this.sampleRate = sampleRate;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Ensure AudioContext is created and running.
|
|
34
|
+
* Must be called after a user gesture on iOS Safari.
|
|
35
|
+
*/
|
|
36
|
+
private ensureContext(): AudioContext {
|
|
37
|
+
if (!this.ctx) {
|
|
38
|
+
const w = typeof window !== "undefined" ? (window as any) : undefined;
|
|
39
|
+
if (!w) throw new Error("AudioPlaybackManager requires a browser environment");
|
|
40
|
+
const AudioCtx = w.AudioContext || w.webkitAudioContext;
|
|
41
|
+
this.ctx = new AudioCtx({ sampleRate: this.sampleRate }) as AudioContext;
|
|
42
|
+
}
|
|
43
|
+
const ctx = this.ctx!;
|
|
44
|
+
// Resume if suspended (autoplay policy)
|
|
45
|
+
if (ctx.state === "suspended") {
|
|
46
|
+
ctx.resume();
|
|
47
|
+
}
|
|
48
|
+
return ctx;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Enqueue a PCM chunk for playback.
|
|
53
|
+
* @param pcmData Raw PCM bytes (16-bit signed LE mono)
|
|
54
|
+
*/
|
|
55
|
+
enqueue(pcmData: Uint8Array): void {
|
|
56
|
+
if (pcmData.length === 0) return;
|
|
57
|
+
|
|
58
|
+
// Prepend any remainder byte from the previous chunk
|
|
59
|
+
let data = pcmData;
|
|
60
|
+
if (this.remainder) {
|
|
61
|
+
const merged = new Uint8Array(this.remainder.length + pcmData.length);
|
|
62
|
+
merged.set(this.remainder);
|
|
63
|
+
merged.set(pcmData, this.remainder.length);
|
|
64
|
+
data = merged;
|
|
65
|
+
this.remainder = null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// If odd byte count, save the trailing byte for next chunk
|
|
69
|
+
if (data.length % 2 !== 0) {
|
|
70
|
+
this.remainder = new Uint8Array([data[data.length - 1]]);
|
|
71
|
+
data = data.subarray(0, data.length - 1);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (data.length === 0) return;
|
|
75
|
+
|
|
76
|
+
const ctx = this.ensureContext();
|
|
77
|
+
const float32 = this.pcmToFloat32(data);
|
|
78
|
+
|
|
79
|
+
const buffer = ctx.createBuffer(1, float32.length, this.sampleRate);
|
|
80
|
+
buffer.getChannelData(0).set(float32);
|
|
81
|
+
|
|
82
|
+
const source = ctx.createBufferSource();
|
|
83
|
+
source.buffer = buffer;
|
|
84
|
+
source.connect(ctx.destination);
|
|
85
|
+
|
|
86
|
+
// Schedule gap-free playback
|
|
87
|
+
const now = ctx.currentTime;
|
|
88
|
+
if (this.nextStartTime < now) {
|
|
89
|
+
this.nextStartTime = now;
|
|
90
|
+
}
|
|
91
|
+
source.start(this.nextStartTime);
|
|
92
|
+
this.nextStartTime += buffer.duration;
|
|
93
|
+
|
|
94
|
+
this.activeSources.push(source);
|
|
95
|
+
this.pendingCount++;
|
|
96
|
+
this.playing = true;
|
|
97
|
+
|
|
98
|
+
source.onended = () => {
|
|
99
|
+
const idx = this.activeSources.indexOf(source);
|
|
100
|
+
if (idx !== -1) this.activeSources.splice(idx, 1);
|
|
101
|
+
this.pendingCount--;
|
|
102
|
+
this.checkFinished();
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Signal that no more chunks will arrive.
|
|
108
|
+
* The onFinished callback fires after all queued audio has played.
|
|
109
|
+
*/
|
|
110
|
+
markStreamEnd(): void {
|
|
111
|
+
this.streamEnded = true;
|
|
112
|
+
this.checkFinished();
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Immediately stop all playback and discard queued audio.
|
|
117
|
+
*/
|
|
118
|
+
flush(): void {
|
|
119
|
+
for (const source of this.activeSources) {
|
|
120
|
+
try {
|
|
121
|
+
source.stop();
|
|
122
|
+
source.disconnect();
|
|
123
|
+
} catch {
|
|
124
|
+
// Ignore errors from already-stopped sources
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
this.activeSources = [];
|
|
128
|
+
this.pendingCount = 0;
|
|
129
|
+
this.nextStartTime = 0;
|
|
130
|
+
this.playing = false;
|
|
131
|
+
this.streamEnded = false;
|
|
132
|
+
this.finishedCallbacks = [];
|
|
133
|
+
this.remainder = null;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Whether audio is currently playing or queued.
|
|
138
|
+
*/
|
|
139
|
+
isPlaying(): boolean {
|
|
140
|
+
return this.playing;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Register a callback for when all queued audio finishes playing.
|
|
145
|
+
*/
|
|
146
|
+
onFinished(callback: () => void): void {
|
|
147
|
+
this.finishedCallbacks.push(callback);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Clean up AudioContext resources.
|
|
152
|
+
*/
|
|
153
|
+
async destroy(): Promise<void> {
|
|
154
|
+
this.flush();
|
|
155
|
+
if (this.ctx) {
|
|
156
|
+
await this.ctx.close();
|
|
157
|
+
this.ctx = null;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
private checkFinished(): void {
|
|
162
|
+
if (this.streamEnded && this.pendingCount <= 0 && this.playing) {
|
|
163
|
+
this.playing = false;
|
|
164
|
+
this.streamEnded = false;
|
|
165
|
+
const cbs = this.finishedCallbacks.slice();
|
|
166
|
+
this.finishedCallbacks = [];
|
|
167
|
+
for (const cb of cbs) cb();
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Convert 16-bit signed LE PCM to Float32 samples in [-1, 1].
|
|
173
|
+
*/
|
|
174
|
+
private pcmToFloat32(pcmData: Uint8Array): Float32Array {
|
|
175
|
+
// 2 bytes per sample (16-bit)
|
|
176
|
+
const numSamples = Math.floor(pcmData.length / 2);
|
|
177
|
+
const float32 = new Float32Array(numSamples);
|
|
178
|
+
const view = new DataView(pcmData.buffer, pcmData.byteOffset, pcmData.byteLength);
|
|
179
|
+
|
|
180
|
+
for (let i = 0; i < numSamples; i++) {
|
|
181
|
+
const int16 = view.getInt16(i * 2, true); // little-endian
|
|
182
|
+
float32[i] = int16 / 32768;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return float32;
|
|
186
|
+
}
|
|
187
|
+
}
|