@runtypelabs/persona 1.46.1 → 1.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,187 @@
1
+ /**
2
+ * AudioPlaybackManager
3
+ *
4
+ * Manages streaming playback of PCM audio chunks via the Web Audio API.
5
+ * Receives raw PCM data (24 kHz, 16-bit signed little-endian, mono),
6
+ * converts to Float32 AudioBuffers, and schedules gap-free sequential
7
+ * playback using AudioBufferSourceNode.
8
+ *
9
+ * Works on all browsers including iOS Safari (no MediaSource dependency).
10
+ */
11
+ export class AudioPlaybackManager {
12
+ private ctx: AudioContext | null = null;
13
+ private nextStartTime = 0;
14
+ private activeSources: AudioBufferSourceNode[] = [];
15
+ private finishedCallbacks: (() => void)[] = [];
16
+ private playing = false;
17
+ private streamEnded = false;
18
+ private pendingCount = 0;
19
+
20
+ // PCM format constants
21
+ private readonly sampleRate: number;
22
+
23
+ // Remainder byte from a previous chunk when the chunk had an odd byte count.
24
+ // Network chunks don't respect 2-byte sample boundaries, so we carry over
25
+ // the orphaned byte and prepend it to the next chunk.
26
+ private remainder: Uint8Array | null = null;
27
+
28
+ constructor(sampleRate = 24000) {
29
+ this.sampleRate = sampleRate;
30
+ }
31
+
32
+ /**
33
+ * Ensure AudioContext is created and running.
34
+ * Must be called after a user gesture on iOS Safari.
35
+ */
36
+ private ensureContext(): AudioContext {
37
+ if (!this.ctx) {
38
+ const w = typeof window !== "undefined" ? (window as any) : undefined;
39
+ if (!w) throw new Error("AudioPlaybackManager requires a browser environment");
40
+ const AudioCtx = w.AudioContext || w.webkitAudioContext;
41
+ this.ctx = new AudioCtx({ sampleRate: this.sampleRate }) as AudioContext;
42
+ }
43
+ const ctx = this.ctx!;
44
+ // Resume if suspended (autoplay policy)
45
+ if (ctx.state === "suspended") {
46
+ ctx.resume();
47
+ }
48
+ return ctx;
49
+ }
50
+
51
+ /**
52
+ * Enqueue a PCM chunk for playback.
53
+ * @param pcmData Raw PCM bytes (16-bit signed LE mono)
54
+ */
55
+ enqueue(pcmData: Uint8Array): void {
56
+ if (pcmData.length === 0) return;
57
+
58
+ // Prepend any remainder byte from the previous chunk
59
+ let data = pcmData;
60
+ if (this.remainder) {
61
+ const merged = new Uint8Array(this.remainder.length + pcmData.length);
62
+ merged.set(this.remainder);
63
+ merged.set(pcmData, this.remainder.length);
64
+ data = merged;
65
+ this.remainder = null;
66
+ }
67
+
68
+ // If odd byte count, save the trailing byte for next chunk
69
+ if (data.length % 2 !== 0) {
70
+ this.remainder = new Uint8Array([data[data.length - 1]]);
71
+ data = data.subarray(0, data.length - 1);
72
+ }
73
+
74
+ if (data.length === 0) return;
75
+
76
+ const ctx = this.ensureContext();
77
+ const float32 = this.pcmToFloat32(data);
78
+
79
+ const buffer = ctx.createBuffer(1, float32.length, this.sampleRate);
80
+ buffer.getChannelData(0).set(float32);
81
+
82
+ const source = ctx.createBufferSource();
83
+ source.buffer = buffer;
84
+ source.connect(ctx.destination);
85
+
86
+ // Schedule gap-free playback
87
+ const now = ctx.currentTime;
88
+ if (this.nextStartTime < now) {
89
+ this.nextStartTime = now;
90
+ }
91
+ source.start(this.nextStartTime);
92
+ this.nextStartTime += buffer.duration;
93
+
94
+ this.activeSources.push(source);
95
+ this.pendingCount++;
96
+ this.playing = true;
97
+
98
+ source.onended = () => {
99
+ const idx = this.activeSources.indexOf(source);
100
+ if (idx !== -1) this.activeSources.splice(idx, 1);
101
+ this.pendingCount--;
102
+ this.checkFinished();
103
+ };
104
+ }
105
+
106
+ /**
107
+ * Signal that no more chunks will arrive.
108
+ * The onFinished callback fires after all queued audio has played.
109
+ */
110
+ markStreamEnd(): void {
111
+ this.streamEnded = true;
112
+ this.checkFinished();
113
+ }
114
+
115
+ /**
116
+ * Immediately stop all playback and discard queued audio.
117
+ */
118
+ flush(): void {
119
+ for (const source of this.activeSources) {
120
+ try {
121
+ source.stop();
122
+ source.disconnect();
123
+ } catch {
124
+ // Ignore errors from already-stopped sources
125
+ }
126
+ }
127
+ this.activeSources = [];
128
+ this.pendingCount = 0;
129
+ this.nextStartTime = 0;
130
+ this.playing = false;
131
+ this.streamEnded = false;
132
+ this.finishedCallbacks = [];
133
+ this.remainder = null;
134
+ }
135
+
136
+ /**
137
+ * Whether audio is currently playing or queued.
138
+ */
139
+ isPlaying(): boolean {
140
+ return this.playing;
141
+ }
142
+
143
+ /**
144
+ * Register a callback for when all queued audio finishes playing.
145
+ */
146
+ onFinished(callback: () => void): void {
147
+ this.finishedCallbacks.push(callback);
148
+ }
149
+
150
+ /**
151
+ * Clean up AudioContext resources.
152
+ */
153
+ async destroy(): Promise<void> {
154
+ this.flush();
155
+ if (this.ctx) {
156
+ await this.ctx.close();
157
+ this.ctx = null;
158
+ }
159
+ }
160
+
161
+ private checkFinished(): void {
162
+ if (this.streamEnded && this.pendingCount <= 0 && this.playing) {
163
+ this.playing = false;
164
+ this.streamEnded = false;
165
+ const cbs = this.finishedCallbacks.slice();
166
+ this.finishedCallbacks = [];
167
+ for (const cb of cbs) cb();
168
+ }
169
+ }
170
+
171
+ /**
172
+ * Convert 16-bit signed LE PCM to Float32 samples in [-1, 1].
173
+ */
174
+ private pcmToFloat32(pcmData: Uint8Array): Float32Array {
175
+ // 2 bytes per sample (16-bit)
176
+ const numSamples = Math.floor(pcmData.length / 2);
177
+ const float32 = new Float32Array(numSamples);
178
+ const view = new DataView(pcmData.buffer, pcmData.byteOffset, pcmData.byteLength);
179
+
180
+ for (let i = 0; i < numSamples; i++) {
181
+ const int16 = view.getInt16(i * 2, true); // little-endian
182
+ float32[i] = int16 / 32768;
183
+ }
184
+
185
+ return float32;
186
+ }
187
+ }
@@ -0,0 +1,119 @@
1
+ // Browser Voice Provider
2
+ // Fallback implementation using Web Speech API
3
+
4
+ import type { VoiceProvider, VoiceResult, VoiceStatus, VoiceConfig } from '../types';
5
+
6
+ export class BrowserVoiceProvider implements VoiceProvider {
7
+ type: 'browser' = 'browser';
8
+ private recognition: any = null;
9
+ private resultCallbacks: ((result: VoiceResult) => void)[] = [];
10
+ private errorCallbacks: ((error: Error) => void)[] = [];
11
+ private statusCallbacks: ((status: VoiceStatus) => void)[] = [];
12
+ private isListening = false;
13
+ private w: any = typeof window !== 'undefined' ? window : undefined;
14
+
15
+ constructor(private config: VoiceConfig['browser'] = {}) {}
16
+
17
+ async connect() {
18
+ // Browser provider doesn't need connection
19
+ this.statusCallbacks.forEach(cb => cb('connected'));
20
+ }
21
+
22
+ async startListening() {
23
+ try {
24
+ if (this.isListening) {
25
+ throw new Error('Already listening');
26
+ }
27
+
28
+ if (!this.w) {
29
+ throw new Error('Window object not available');
30
+ }
31
+
32
+ // @ts-ignore - Browser SpeechRecognition API
33
+ const SpeechRecognition = this.w!.SpeechRecognition || this.w!.webkitSpeechRecognition;
34
+
35
+ if (!SpeechRecognition) {
36
+ throw new Error('Browser speech recognition not supported');
37
+ }
38
+
39
+ this.recognition = new SpeechRecognition();
40
+ this.recognition.lang = this.config?.language || 'en-US';
41
+ this.recognition.continuous = this.config?.continuous || false;
42
+ this.recognition.interimResults = true;
43
+
44
+ this.recognition.onresult = (event: any) => {
45
+ const transcript = Array.from(event.results)
46
+ .map((result: any) => result[0])
47
+ .map((result: any) => result.transcript)
48
+ .join('');
49
+
50
+ const isFinal = event.results[event.results.length - 1].isFinal;
51
+
52
+ this.resultCallbacks.forEach(cb => cb({
53
+ text: transcript,
54
+ confidence: isFinal ? 0.8 : 0.5,
55
+ provider: 'browser'
56
+ }));
57
+
58
+ if (isFinal && !this.config?.continuous) {
59
+ this.stopListening();
60
+ }
61
+ };
62
+
63
+ this.recognition.onerror = (event: any) => {
64
+ this.errorCallbacks.forEach(cb => cb(new Error(event.error)));
65
+ this.statusCallbacks.forEach(cb => cb('error'));
66
+ };
67
+
68
+ this.recognition.onstart = () => {
69
+ this.isListening = true;
70
+ this.statusCallbacks.forEach(cb => cb('listening'));
71
+ };
72
+
73
+ this.recognition.onend = () => {
74
+ this.isListening = false;
75
+ this.statusCallbacks.forEach(cb => cb('idle'));
76
+ };
77
+
78
+ this.recognition.start();
79
+
80
+ } catch (error) {
81
+ this.errorCallbacks.forEach(cb => cb(error as Error));
82
+ this.statusCallbacks.forEach(cb => cb('error'));
83
+ throw error;
84
+ }
85
+ }
86
+
87
+ async stopListening() {
88
+ if (this.recognition) {
89
+ this.recognition.stop();
90
+ this.recognition = null;
91
+ }
92
+
93
+ this.isListening = false;
94
+ this.statusCallbacks.forEach(cb => cb('idle'));
95
+ }
96
+
97
+ onResult(callback: (result: VoiceResult) => void): void {
98
+ this.resultCallbacks.push(callback);
99
+ }
100
+
101
+ onError(callback: (error: Error) => void): void {
102
+ this.errorCallbacks.push(callback);
103
+ }
104
+
105
+ onStatusChange(callback: (status: VoiceStatus) => void): void {
106
+ this.statusCallbacks.push(callback);
107
+ }
108
+
109
+ async disconnect(): Promise<void> {
110
+ await this.stopListening();
111
+ this.statusCallbacks.forEach(cb => cb('disconnected'));
112
+ }
113
+
114
+ // Check if browser supports speech recognition
115
+ static isSupported(): boolean {
116
+ // @ts-ignore
117
+ return 'SpeechRecognition' in window || 'webkitSpeechRecognition' in window;
118
+ }
119
+ }
@@ -0,0 +1,16 @@
1
+ // Voice Module Exports
2
+ // Central export point for all voice-related components
3
+
4
+ export {
5
+ RuntypeVoiceProvider
6
+ } from './runtype-voice-provider';
7
+
8
+ export {
9
+ BrowserVoiceProvider
10
+ } from './browser-voice-provider';
11
+
12
+ export {
13
+ createVoiceProvider,
14
+ createBestAvailableVoiceProvider,
15
+ isVoiceSupported
16
+ } from './voice-factory';