converse-framework 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- converse_framework/__init__.py +108 -0
- converse_framework/audio_utils.py +412 -0
- converse_framework/cuda_utils.py +176 -0
- converse_framework/events.py +94 -0
- converse_framework/examples/__init__.py +20 -0
- converse_framework/examples/subprocess_provider.py +439 -0
- converse_framework/examples/text_chat.py +308 -0
- converse_framework/examples/voice_chat.py +223 -0
- converse_framework/examples/websocket_voice_chat.py +174 -0
- converse_framework/js/browser-voice-client.js +248 -0
- converse_framework/js/mic-frame-sender.js +445 -0
- converse_framework/js/speaker-echo-guard.js +308 -0
- converse_framework/js/tts-audio-player.js +237 -0
- converse_framework/pipeline.py +620 -0
- converse_framework/protocols.py +382 -0
- converse_framework/provider_events.py +159 -0
- converse_framework/providers/__init__.py +28 -0
- converse_framework/providers/faster_whisper.py +290 -0
- converse_framework/providers/kokoro_onnx.py +391 -0
- converse_framework/providers/llamacpp.py +264 -0
- converse_framework/providers/mock.py +171 -0
- converse_framework/providers/pocket_tts.py +409 -0
- converse_framework/providers/silero.py +161 -0
- converse_framework/providers/unavailable.py +137 -0
- converse_framework/providers/whisper_cpp.py +322 -0
- converse_framework/registry.py +397 -0
- converse_framework/session.py +315 -0
- converse_framework/transport.py +54 -0
- converse_framework/utterance_collector.py +336 -0
- converse_framework-0.2.0.dist-info/METADATA +992 -0
- converse_framework-0.2.0.dist-info/RECORD +33 -0
- converse_framework-0.2.0.dist-info/WHEEL +4 -0
- converse_framework-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* browser-voice-client.js — optional composed browser voice client for
|
|
3
|
+
* converse_framework.
|
|
4
|
+
*
|
|
5
|
+
* Combines ``MicFrameSender`` (mic capture) and ``TtsAudioPlayer`` (playback)
|
|
6
|
+
* into a single helper, with optional ``SpeakerEchoGuard`` integration.
|
|
7
|
+
*
|
|
8
|
+
* This module is optional — apps can use ``MicFrameSender`` and
|
|
9
|
+
* ``TtsAudioPlayer`` independently. It does **not** modify or depend on
|
|
10
|
+
* either module's internal API beyond the documented public interface.
|
|
11
|
+
*
|
|
12
|
+
* ```html
|
|
13
|
+
* <script src="tts-audio-player.js"></script>
|
|
14
|
+
* <script src="mic-frame-sender.js"></script>
|
|
15
|
+
* <script src="speaker-echo-guard.js"></script>
|
|
16
|
+
* <script src="browser-voice-client.js"></script>
|
|
17
|
+
* <script>
|
|
18
|
+
* const client = new BrowserVoiceClient({
|
|
19
|
+
* webSocket: new WebSocket("ws://localhost:8000/ws"),
|
|
20
|
+
* });
|
|
21
|
+
* client.start();
|
|
22
|
+
*
|
|
23
|
+
* // Later:
|
|
24
|
+
* client.stop();
|
|
25
|
+
* client.close();
|
|
26
|
+
* </script>
|
|
27
|
+
* ```
|
|
28
|
+
*
|
|
29
|
+
* @module
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
((root, factory) => {
|
|
33
|
+
if (typeof define === "function" && define.amd) {
|
|
34
|
+
define([], factory);
|
|
35
|
+
} else if (typeof module === "object" && module.exports) {
|
|
36
|
+
module.exports = factory();
|
|
37
|
+
} else {
|
|
38
|
+
root.BrowserVoiceClient = factory();
|
|
39
|
+
}
|
|
40
|
+
})(this, () => {
|
|
41
|
+
// -----------------------------------------------------------------------
|
|
42
|
+
// BrowserVoiceClient
|
|
43
|
+
// -----------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Create a composed browser voice client.
|
|
47
|
+
*
|
|
48
|
+
* @param {Object} options
|
|
49
|
+
* @param {WebSocket} options.webSocket - Required: target WebSocket.
|
|
50
|
+
* @param {Object} [options.micOptions] - Options passed to ``MicFrameSender`` constructor.
|
|
51
|
+
* @param {Object} [options.playerOptions] - Options passed to ``TtsAudioPlayer`` constructor.
|
|
52
|
+
* @param {Object} [options.guardOptions] - Options passed to ``SpeakerEchoGuard`` constructor
|
|
53
|
+
* (omit to disable echo guard).
|
|
54
|
+
* @param {boolean} [options.autoStart=false] - If true, call ``start()`` in constructor.
|
|
55
|
+
* @param {function(Object):void} [options.onEvent] - Receive all framework events (from
|
|
56
|
+
* the WebSocket) before they are dispatched.
|
|
57
|
+
*/
|
|
58
|
+
function BrowserVoiceClient(options) {
|
|
59
|
+
if (!options || !options.webSocket) {
|
|
60
|
+
throw new Error("BrowserVoiceClient requires a webSocket");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
var ws = options.webSocket;
|
|
64
|
+
this._ws = ws;
|
|
65
|
+
this._micOptions = options.micOptions || {};
|
|
66
|
+
this._playerOptions = options.playerOptions || {};
|
|
67
|
+
this._guardOptions = options.guardOptions || null;
|
|
68
|
+
this._onEvent = options.onEvent || null;
|
|
69
|
+
|
|
70
|
+
// WebSocket message handler bound so we can removeEventListener
|
|
71
|
+
this._boundOnMessage = null;
|
|
72
|
+
|
|
73
|
+
// Build sub-components
|
|
74
|
+
this._buildMic();
|
|
75
|
+
this._buildPlayer();
|
|
76
|
+
this._buildGuard();
|
|
77
|
+
|
|
78
|
+
if (options.autoStart) {
|
|
79
|
+
this.start();
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
BrowserVoiceClient.prototype = {
|
|
84
|
+
constructor: BrowserVoiceClient,
|
|
85
|
+
|
|
86
|
+
// -----------------------------------------------------------------------
|
|
87
|
+
// Public API
|
|
88
|
+
// -----------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Start mic capture and connect WebSocket event handler.
|
|
92
|
+
* @returns {Promise<void>}
|
|
93
|
+
*/
|
|
94
|
+
start: async function () {
|
|
95
|
+
this._startWebSocketHandler();
|
|
96
|
+
if (this._mic) {
|
|
97
|
+
try {
|
|
98
|
+
await this._mic.start();
|
|
99
|
+
} catch (e) {
|
|
100
|
+
// Mic start failures are surfaced via the mic's onError callback
|
|
101
|
+
// and the caller's catch block.
|
|
102
|
+
throw e;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
},
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Stop mic capture and disconnect WebSocket handler.
|
|
109
|
+
*/
|
|
110
|
+
stop: function () {
|
|
111
|
+
if (this._mic) {
|
|
112
|
+
this._mic.stop();
|
|
113
|
+
}
|
|
114
|
+
this._stopWebSocketHandler();
|
|
115
|
+
},
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Release all resources.
|
|
119
|
+
*/
|
|
120
|
+
close: function () {
|
|
121
|
+
this.stop();
|
|
122
|
+
if (this._player) {
|
|
123
|
+
this._player.clear();
|
|
124
|
+
}
|
|
125
|
+
if (this._guard) {
|
|
126
|
+
this._guard.release();
|
|
127
|
+
this._guard = null;
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Current ``MicFrameSender`` instance.
|
|
133
|
+
* @type {Object|null}
|
|
134
|
+
*/
|
|
135
|
+
get mic() {
|
|
136
|
+
return this._mic;
|
|
137
|
+
},
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Current ``TtsAudioPlayer`` instance.
|
|
141
|
+
* @type {Object|null}
|
|
142
|
+
*/
|
|
143
|
+
get player() {
|
|
144
|
+
return this._player;
|
|
145
|
+
},
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Current ``SpeakerEchoGuard`` instance (may be null).
|
|
149
|
+
* @type {Object|null}
|
|
150
|
+
*/
|
|
151
|
+
get guard() {
|
|
152
|
+
return this._guard;
|
|
153
|
+
},
|
|
154
|
+
|
|
155
|
+
// -----------------------------------------------------------------------
|
|
156
|
+
// Internal: sub-component construction
|
|
157
|
+
// -----------------------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
_buildMic: function () {
|
|
160
|
+
var micOpts = Object.assign({}, this._micOptions);
|
|
161
|
+
if (!micOpts.webSocket) {
|
|
162
|
+
micOpts.webSocket = this._ws;
|
|
163
|
+
}
|
|
164
|
+
// Guard will attach via attachMicSender after construction
|
|
165
|
+
if (
|
|
166
|
+
typeof MicFrameSender !== "undefined" ||
|
|
167
|
+
typeof root.MicFrameSender !== "undefined"
|
|
168
|
+
) {
|
|
169
|
+
var Sender = MicFrameSender || root.MicFrameSender;
|
|
170
|
+
this._mic = new Sender(micOpts);
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
|
|
174
|
+
_buildPlayer: function () {
|
|
175
|
+
var playerOpts = Object.assign({}, this._playerOptions);
|
|
176
|
+
if (!playerOpts.webSocket) {
|
|
177
|
+
playerOpts.webSocket = this._ws;
|
|
178
|
+
}
|
|
179
|
+
if (
|
|
180
|
+
typeof TtsAudioPlayer !== "undefined" ||
|
|
181
|
+
typeof root.TtsAudioPlayer !== "undefined"
|
|
182
|
+
) {
|
|
183
|
+
var Player = TtsAudioPlayer || root.TtsAudioPlayer;
|
|
184
|
+
this._player = new Player(playerOpts);
|
|
185
|
+
}
|
|
186
|
+
},
|
|
187
|
+
|
|
188
|
+
_buildGuard: function () {
|
|
189
|
+
if (
|
|
190
|
+
!this._guardOptions ||
|
|
191
|
+
(typeof SpeakerEchoGuard === "undefined" &&
|
|
192
|
+
typeof root.SpeakerEchoGuard === "undefined")
|
|
193
|
+
) {
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
var Guard = SpeakerEchoGuard || root.SpeakerEchoGuard;
|
|
198
|
+
this._guard = new Guard(this._guardOptions);
|
|
199
|
+
|
|
200
|
+
if (this._mic) {
|
|
201
|
+
this._guard.attachMicSender(this._mic);
|
|
202
|
+
}
|
|
203
|
+
},
|
|
204
|
+
|
|
205
|
+
// -----------------------------------------------------------------------
|
|
206
|
+
// Internal: WebSocket event dispatch
|
|
207
|
+
// -----------------------------------------------------------------------
|
|
208
|
+
|
|
209
|
+
_startWebSocketHandler: function () {
|
|
210
|
+
if (this._boundOnMessage) return;
|
|
211
|
+
|
|
212
|
+
this._boundOnMessage = (evt) => {
|
|
213
|
+
var msg;
|
|
214
|
+
try {
|
|
215
|
+
msg = JSON.parse(evt.data);
|
|
216
|
+
} catch (_) {
|
|
217
|
+
return; // Not a JSON event
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Global event observer
|
|
221
|
+
if (this._onEvent) {
|
|
222
|
+
this._onEvent(msg);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Dispatch to player
|
|
226
|
+
if (this._player && typeof this._player.onEvent === "function") {
|
|
227
|
+
this._player.onEvent(msg);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Dispatch to guard
|
|
231
|
+
if (this._guard && typeof this._guard.onTtsEvent === "function") {
|
|
232
|
+
this._guard.onTtsEvent(msg);
|
|
233
|
+
}
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
this._ws.addEventListener("message", this._boundOnMessage);
|
|
237
|
+
},
|
|
238
|
+
|
|
239
|
+
_stopWebSocketHandler: function () {
|
|
240
|
+
if (this._boundOnMessage) {
|
|
241
|
+
this._ws.removeEventListener("message", this._boundOnMessage);
|
|
242
|
+
this._boundOnMessage = null;
|
|
243
|
+
}
|
|
244
|
+
},
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
return BrowserVoiceClient;
|
|
248
|
+
});
|
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* mic-frame-sender.js — browser microphone capture and frame sender for
|
|
3
|
+
* converse_framework ``audio.frame`` events.
|
|
4
|
+
*
|
|
5
|
+
* Captures microphone input via the Web Audio API, downsamples to the
|
|
6
|
+
* target sample rate, slices into fixed-size PCM s16le frames, and
|
|
7
|
+
* sends them as base64-encoded payloads over a WebSocket.
|
|
8
|
+
*
|
|
9
|
+
* ```html
|
|
10
|
+
* <script src="mic-frame-sender.js"></script>
|
|
11
|
+
* <script>
|
|
12
|
+
* const ws = new WebSocket("ws://localhost:8000/ws");
|
|
13
|
+
* const mic = new MicFrameSender({ webSocket: ws });
|
|
14
|
+
* document.getElementById("start-btn").onclick = () => mic.start();
|
|
15
|
+
* document.getElementById("stop-btn").onclick = () => mic.stop();
|
|
16
|
+
* </script>
|
|
17
|
+
* ```
|
|
18
|
+
*
|
|
19
|
+
* Mobile microphone access requires HTTPS, localhost, or a secure tunnel
|
|
20
|
+
* (Cloudflare Tunnel, ngrok, etc.). Plain ``http://<lan-ip>`` will be
|
|
21
|
+
* rejected by ``getUserMedia`` on most mobile browsers.
|
|
22
|
+
*
|
|
23
|
+
* @module
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
((root, factory) => {
|
|
27
|
+
if (typeof define === "function" && define.amd) {
|
|
28
|
+
define([], factory);
|
|
29
|
+
} else if (typeof module === "object" && module.exports) {
|
|
30
|
+
module.exports = factory();
|
|
31
|
+
} else {
|
|
32
|
+
root.MicFrameSender = factory();
|
|
33
|
+
}
|
|
34
|
+
})(this, () => {
|
|
35
|
+
// -----------------------------------------------------------------------
|
|
36
|
+
// Constants
|
|
37
|
+
// -----------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
const ENCODING = "pcm_s16le";
|
|
40
|
+
|
|
41
|
+
// -----------------------------------------------------------------------
|
|
42
|
+
// Utilities
|
|
43
|
+
// -----------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Downsample an interleaved float32 array from deviceRate to targetRate.
|
|
47
|
+
* Simple linear interpolation.
|
|
48
|
+
*/
|
|
49
|
+
function downsampleFloat32(samples, deviceRate, targetRate) {
|
|
50
|
+
if (deviceRate === targetRate) return samples;
|
|
51
|
+
const ratio = deviceRate / targetRate;
|
|
52
|
+
const outLen = Math.round(samples.length / ratio);
|
|
53
|
+
const out = new Float32Array(outLen);
|
|
54
|
+
for (let i = 0; i < outLen; i++) {
|
|
55
|
+
const srcIdx = i * ratio;
|
|
56
|
+
const lo = Math.floor(srcIdx);
|
|
57
|
+
const hi = Math.min(lo + 1, samples.length - 1);
|
|
58
|
+
const frac = srcIdx - lo;
|
|
59
|
+
// Clamp to [-1, 1] to avoid overflow on conversion
|
|
60
|
+
out[i] = Math.max(
|
|
61
|
+
-1,
|
|
62
|
+
Math.min(1, samples[lo] * (1 - frac) + samples[hi] * frac),
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
return out;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Convert a Float32Array of samples to a PCM s16le ArrayBuffer.
|
|
70
|
+
* Assumes samples in [-1, 1] range.
|
|
71
|
+
*/
|
|
72
|
+
function float32ToPcmS16le(samples) {
|
|
73
|
+
const len = samples.length;
|
|
74
|
+
const buf = new ArrayBuffer(len * 2);
|
|
75
|
+
const view = new DataView(buf);
|
|
76
|
+
for (let i = 0; i < len; i++) {
|
|
77
|
+
// Clamp to [-1, 1] then scale to int16
|
|
78
|
+
const s = Math.max(-1, Math.min(1, samples[i]));
|
|
79
|
+
const val = s < 0 ? s * 0x8000 : s * 0x7fff;
|
|
80
|
+
view.setInt16(i * 2, Math.round(val), true); // little-endian
|
|
81
|
+
}
|
|
82
|
+
return buf;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Convert an ArrayBuffer to a base64-encoded string.
|
|
87
|
+
*/
|
|
88
|
+
function arrayBufferToBase64(buf) {
|
|
89
|
+
const bytes = new Uint8Array(buf);
|
|
90
|
+
let binary = "";
|
|
91
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
92
|
+
binary += String.fromCharCode(bytes[i]);
|
|
93
|
+
}
|
|
94
|
+
return btoa(binary);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Number of samples for N ms at a given sample rate.
|
|
99
|
+
*/
|
|
100
|
+
function msToSamples(ms, rate) {
|
|
101
|
+
return Math.round((ms / 1000) * rate);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// -----------------------------------------------------------------------
|
|
105
|
+
// AudioWorklet processor (inline via blob URL)
|
|
106
|
+
// -----------------------------------------------------------------------
|
|
107
|
+
|
|
108
|
+
let _processorUrl = null;
|
|
109
|
+
|
|
110
|
+
function getProcessorUrl() {
|
|
111
|
+
if (_processorUrl) return _processorUrl;
|
|
112
|
+
const code = [
|
|
113
|
+
"class CaptureProcessor extends AudioWorkletProcessor {",
|
|
114
|
+
" process(inputs, outputs, params) {",
|
|
115
|
+
" const input = inputs[0];",
|
|
116
|
+
" if (input && input[0] && input[0].length > 0) {",
|
|
117
|
+
" this.port.postMessage(input[0]);",
|
|
118
|
+
" }",
|
|
119
|
+
" return true;",
|
|
120
|
+
" }",
|
|
121
|
+
"}",
|
|
122
|
+
"registerProcessor('mic-capture-processor', CaptureProcessor);",
|
|
123
|
+
].join("\n");
|
|
124
|
+
const blob = new Blob([code], { type: "application/javascript" });
|
|
125
|
+
_processorUrl = URL.createObjectURL(blob);
|
|
126
|
+
return _processorUrl;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// -----------------------------------------------------------------------
|
|
130
|
+
// MicFrameSender
|
|
131
|
+
// -----------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Create a microphone frame sender.
|
|
135
|
+
*
|
|
136
|
+
* @param {Object} options
|
|
137
|
+
* @param {WebSocket} options.webSocket - target WebSocket.
|
|
138
|
+
* @param {number} [options.sampleRate=16000] - target sample rate.
|
|
139
|
+
* @param {number} [options.channels=1] - output channel count.
|
|
140
|
+
* @param {number} [options.frameMs=30] - frame duration in ms.
|
|
141
|
+
* @param {string} [options.mode="chat"] - conversation mode tag.
|
|
142
|
+
* @param {string} [options.messageType="audio.frame"] - WebSocket message type.
|
|
143
|
+
* @param {function(number):void} [options.onLevel] - level callback (0-1).
|
|
144
|
+
* @param {function(Error):void} [options.onError] - error callback.
|
|
145
|
+
* @param {AudioContext} [options.audioContext] - shared AudioContext (created if omitted).
|
|
146
|
+
* @param {function(Object):boolean} [options.shouldSendFrame] - optional gate; called with payload, return false to drop.
|
|
147
|
+
*/
|
|
148
|
+
function MicFrameSender(options) {
|
|
149
|
+
options = options || {};
|
|
150
|
+
this._ws = options.webSocket || null;
|
|
151
|
+
this._targetRate = options.sampleRate || 16000;
|
|
152
|
+
this._channels = options.channels || 1;
|
|
153
|
+
this._frameMs = options.frameMs || 30;
|
|
154
|
+
this._mode = options.mode || "chat";
|
|
155
|
+
this._messageType = options.messageType || "audio.frame";
|
|
156
|
+
this._onLevel = options.onLevel || null;
|
|
157
|
+
this._onError = options.onError || null;
|
|
158
|
+
this._audioContext = options.audioContext || null;
|
|
159
|
+
this._shouldSendFrame = options.shouldSendFrame || null;
|
|
160
|
+
|
|
161
|
+
// Owned AudioContext (if none provided)
|
|
162
|
+
this._ownedCtx = null;
|
|
163
|
+
|
|
164
|
+
// Active stream / nodes / worklet
|
|
165
|
+
this._stream = null;
|
|
166
|
+
this._source = null;
|
|
167
|
+
this._workletNode = null;
|
|
168
|
+
this._scriptProcessor = null;
|
|
169
|
+
|
|
170
|
+
// Frame sequencing
|
|
171
|
+
this._sequence = 0;
|
|
172
|
+
this._accumulator = new Float32Array(0);
|
|
173
|
+
this._frameSamples = msToSamples(this._frameMs, this._targetRate);
|
|
174
|
+
this._running = false;
|
|
175
|
+
this._paused = false;
|
|
176
|
+
|
|
177
|
+
// Worklet availability
|
|
178
|
+
this._workletSupported =
|
|
179
|
+
typeof AudioWorkletNode !== "undefined" &&
|
|
180
|
+
typeof AudioContext !== "undefined";
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
MicFrameSender.prototype = {
|
|
184
|
+
constructor: MicFrameSender,
|
|
185
|
+
|
|
186
|
+
// -----------------------------------------------------------------------
|
|
187
|
+
// Public API
|
|
188
|
+
// -----------------------------------------------------------------------
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Start capturing and sending mic frames.
|
|
192
|
+
* @returns {Promise<void>}
|
|
193
|
+
*/
|
|
194
|
+
start: async function () {
|
|
195
|
+
if (this._running) return;
|
|
196
|
+
this._paused = false;
|
|
197
|
+
|
|
198
|
+
try {
|
|
199
|
+
const stream = await navigator.mediaDevices.getUserMedia({
|
|
200
|
+
audio: {
|
|
201
|
+
sampleRate: { ideal: this._targetRate },
|
|
202
|
+
channelCount: { ideal: this._channels },
|
|
203
|
+
echoCancellation: true,
|
|
204
|
+
noiseSuppression: true,
|
|
205
|
+
},
|
|
206
|
+
});
|
|
207
|
+
this._stream = stream;
|
|
208
|
+
|
|
209
|
+
const ctx =
|
|
210
|
+
this._audioContext ||
|
|
211
|
+
new (window.AudioContext || window.webkitAudioContext)();
|
|
212
|
+
if (!this._audioContext) {
|
|
213
|
+
this._ownedCtx = ctx;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Determine device sample rate from AudioContext
|
|
217
|
+
const deviceRate = ctx.sampleRate;
|
|
218
|
+
|
|
219
|
+
// Ensure context is running (needed after autoplay policy)
|
|
220
|
+
if (ctx.state === "suspended") {
|
|
221
|
+
await ctx.resume();
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const source = ctx.createMediaStreamSource(stream);
|
|
225
|
+
this._source = source;
|
|
226
|
+
|
|
227
|
+
// Try AudioWorklet first, fall back to ScriptProcessorNode
|
|
228
|
+
try {
|
|
229
|
+
await this._setupWorkletNode(ctx, source, deviceRate);
|
|
230
|
+
} catch (_) {
|
|
231
|
+
this._setupScriptProcessor(ctx, source, deviceRate);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
this._running = true;
|
|
235
|
+
} catch (err) {
|
|
236
|
+
this._safeError(err);
|
|
237
|
+
throw err;
|
|
238
|
+
}
|
|
239
|
+
},
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Stop capturing and release resources.
|
|
243
|
+
*/
|
|
244
|
+
stop: function () {
|
|
245
|
+
this._running = false;
|
|
246
|
+
this._paused = false;
|
|
247
|
+
this._sequence = 0;
|
|
248
|
+
|
|
249
|
+
this._teardownNodes();
|
|
250
|
+
this._teardownStream();
|
|
251
|
+
this._teardownContext();
|
|
252
|
+
},
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Update the conversation mode tag sent with each frame.
|
|
256
|
+
* @param {string} mode
|
|
257
|
+
*/
|
|
258
|
+
setMode: function (mode) {
|
|
259
|
+
this._mode = String(mode);
|
|
260
|
+
},
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Replace the target WebSocket. Can be called while running.
|
|
264
|
+
* @param {WebSocket|null} ws
|
|
265
|
+
*/
|
|
266
|
+
setWebSocket: function (ws) {
|
|
267
|
+
this._ws = ws;
|
|
268
|
+
},
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Release all resources (alias for stop).
|
|
272
|
+
*/
|
|
273
|
+
close: function () {
|
|
274
|
+
this.stop();
|
|
275
|
+
},
|
|
276
|
+
|
|
277
|
+
// -----------------------------------------------------------------------
|
|
278
|
+
// Internal: AudioWorklet branch
|
|
279
|
+
// -----------------------------------------------------------------------
|
|
280
|
+
|
|
281
|
+
_setupWorkletNode: async function (ctx, source, deviceRate) {
|
|
282
|
+
const url = getProcessorUrl();
|
|
283
|
+
await ctx.audioWorklet.addModule(url);
|
|
284
|
+
const workletNode = new AudioWorkletNode(ctx, "mic-capture-processor");
|
|
285
|
+
this._workletNode = workletNode;
|
|
286
|
+
|
|
287
|
+
source.connect(workletNode);
|
|
288
|
+
|
|
289
|
+
workletNode.port.onmessage = (evt) => {
|
|
290
|
+
if (!this._running || this._paused) return;
|
|
291
|
+
const floatSamples = evt.data;
|
|
292
|
+
this._processAudio(floatSamples, deviceRate);
|
|
293
|
+
};
|
|
294
|
+
|
|
295
|
+
workletNode.connect(ctx.destination);
|
|
296
|
+
},
|
|
297
|
+
|
|
298
|
+
// -----------------------------------------------------------------------
|
|
299
|
+
// Internal: ScriptProcessorNode fallback
|
|
300
|
+
// -----------------------------------------------------------------------
|
|
301
|
+
|
|
302
|
+
_setupScriptProcessor: function (ctx, source, deviceRate) {
|
|
303
|
+
// Buffer size = one frame at device rate, rounded to power-of-2
|
|
304
|
+
const bufSize = this._nextPow2(msToSamples(this._frameMs, deviceRate));
|
|
305
|
+
const processor = ctx.createScriptProcessor(bufSize, 1, 1);
|
|
306
|
+
this._scriptProcessor = processor;
|
|
307
|
+
|
|
308
|
+
source.connect(processor);
|
|
309
|
+
processor.connect(ctx.destination);
|
|
310
|
+
|
|
311
|
+
processor.onaudioprocess = (evt) => {
|
|
312
|
+
if (!this._running || this._paused) return;
|
|
313
|
+
const input = evt.inputBuffer.getChannelData(0);
|
|
314
|
+
this._processAudio(input, deviceRate);
|
|
315
|
+
};
|
|
316
|
+
},
|
|
317
|
+
|
|
318
|
+
// -----------------------------------------------------------------------
|
|
319
|
+
// Internal: audio processing pipeline
|
|
320
|
+
// -----------------------------------------------------------------------
|
|
321
|
+
|
|
322
|
+
_processAudio: function (floatSamples, deviceRate) {
|
|
323
|
+
// Downsample to target rate
|
|
324
|
+
const downsampled = downsampleFloat32(
|
|
325
|
+
floatSamples,
|
|
326
|
+
deviceRate,
|
|
327
|
+
this._targetRate,
|
|
328
|
+
);
|
|
329
|
+
|
|
330
|
+
// Append to accumulator
|
|
331
|
+
const acc = this._accumulator;
|
|
332
|
+
const combined = new Float32Array(acc.length + downsampled.length);
|
|
333
|
+
combined.set(acc);
|
|
334
|
+
combined.set(downsampled, acc.length);
|
|
335
|
+
this._accumulator = combined;
|
|
336
|
+
|
|
337
|
+
// Slice and send complete frames
|
|
338
|
+
const frameSize = this._frameSamples;
|
|
339
|
+
while (this._accumulator.length >= frameSize) {
|
|
340
|
+
const frame = this._accumulator.slice(0, frameSize);
|
|
341
|
+
this._accumulator = this._accumulator.slice(frameSize);
|
|
342
|
+
this._sendFrame(frame);
|
|
343
|
+
}
|
|
344
|
+
},
|
|
345
|
+
|
|
346
|
+
_sendFrame: function (frame) {
|
|
347
|
+
// Report level
|
|
348
|
+
if (this._onLevel) {
|
|
349
|
+
let sum = 0;
|
|
350
|
+
for (let i = 0; i < frame.length; i++) {
|
|
351
|
+
sum += frame[i] * frame[i];
|
|
352
|
+
}
|
|
353
|
+
const rms = Math.sqrt(sum / frame.length);
|
|
354
|
+
this._onLevel(Math.min(1, rms * 3));
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
const pcmBuf = float32ToPcmS16le(frame);
|
|
358
|
+
const base64 = arrayBufferToBase64(pcmBuf);
|
|
359
|
+
|
|
360
|
+
const payload = {
|
|
361
|
+
type: this._messageType,
|
|
362
|
+
payload: {
|
|
363
|
+
data: base64,
|
|
364
|
+
encoding: ENCODING,
|
|
365
|
+
sample_rate: this._targetRate,
|
|
366
|
+
channels: this._channels,
|
|
367
|
+
frame_ms: this._frameMs,
|
|
368
|
+
sequence: this._sequence++,
|
|
369
|
+
mode: this._mode,
|
|
370
|
+
},
|
|
371
|
+
};
|
|
372
|
+
|
|
373
|
+
// Optional gate (speaker echo guard)
|
|
374
|
+
if (this._shouldSendFrame && !this._shouldSendFrame(payload)) {
|
|
375
|
+
return;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
if (this._ws && this._ws.readyState === WebSocket.OPEN) {
|
|
379
|
+
this._ws.send(JSON.stringify(payload));
|
|
380
|
+
}
|
|
381
|
+
},
|
|
382
|
+
|
|
383
|
+
// -----------------------------------------------------------------------
|
|
384
|
+
// Internal: teardown helpers
|
|
385
|
+
// -----------------------------------------------------------------------
|
|
386
|
+
|
|
387
|
+
_teardownNodes: function () {
|
|
388
|
+
if (this._workletNode) {
|
|
389
|
+
this._workletNode.disconnect();
|
|
390
|
+
this._workletNode = null;
|
|
391
|
+
}
|
|
392
|
+
if (this._scriptProcessor) {
|
|
393
|
+
this._scriptProcessor.disconnect();
|
|
394
|
+
this._scriptProcessor = null;
|
|
395
|
+
}
|
|
396
|
+
if (this._source) {
|
|
397
|
+
this._source.disconnect();
|
|
398
|
+
this._source = null;
|
|
399
|
+
}
|
|
400
|
+
},
|
|
401
|
+
|
|
402
|
+
_teardownStream: function () {
|
|
403
|
+
if (this._stream) {
|
|
404
|
+
this._stream.getTracks().forEach((t) => {
|
|
405
|
+
t.stop();
|
|
406
|
+
});
|
|
407
|
+
this._stream = null;
|
|
408
|
+
}
|
|
409
|
+
},
|
|
410
|
+
|
|
411
|
+
_teardownContext: function () {
|
|
412
|
+
if (this._ownedCtx) {
|
|
413
|
+
this._ownedCtx.close();
|
|
414
|
+
this._ownedCtx = null;
|
|
415
|
+
}
|
|
416
|
+
},
|
|
417
|
+
|
|
418
|
+
// -----------------------------------------------------------------------
|
|
419
|
+
// Internal: utilities
|
|
420
|
+
// -----------------------------------------------------------------------
|
|
421
|
+
|
|
422
|
+
_safeError: function (err) {
|
|
423
|
+
if (this._onError) {
|
|
424
|
+
this._onError(err instanceof Error ? err : new Error(String(err)));
|
|
425
|
+
}
|
|
426
|
+
},
|
|
427
|
+
|
|
428
|
+
_nextPow2: (n) => {
|
|
429
|
+
let v = 1;
|
|
430
|
+
while (v < n) v <<= 1;
|
|
431
|
+
return v;
|
|
432
|
+
},
|
|
433
|
+
};
|
|
434
|
+
|
|
435
|
+
// -----------------------------------------------------------------------
|
|
436
|
+
// Exports (pure helper functions for testing)
|
|
437
|
+
// -----------------------------------------------------------------------
|
|
438
|
+
|
|
439
|
+
MicFrameSender.downsampleFloat32 = downsampleFloat32;
|
|
440
|
+
MicFrameSender.float32ToPcmS16le = float32ToPcmS16le;
|
|
441
|
+
MicFrameSender.arrayBufferToBase64 = arrayBufferToBase64;
|
|
442
|
+
MicFrameSender.msToSamples = msToSamples;
|
|
443
|
+
|
|
444
|
+
return MicFrameSender;
|
|
445
|
+
});
|