@m4trix/core 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -0
- package/dist/api/index.cjs +83 -0
- package/dist/api/index.cjs.map +1 -0
- package/dist/api/index.d.cts +74 -0
- package/dist/api/index.d.ts +74 -0
- package/dist/api/index.js +81 -0
- package/dist/api/index.js.map +1 -0
- package/dist/helper/index.cjs +253 -0
- package/dist/helper/index.cjs.map +1 -0
- package/dist/helper/index.d.cts +92 -0
- package/dist/helper/index.d.ts +92 -0
- package/dist/helper/index.js +251 -0
- package/dist/helper/index.js.map +1 -0
- package/dist/index.cjs +2670 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +8 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +2656 -0
- package/dist/index.js.map +1 -0
- package/dist/react/index.cjs +1324 -0
- package/dist/react/index.cjs.map +1 -0
- package/dist/react/index.d.cts +213 -0
- package/dist/react/index.d.ts +213 -0
- package/dist/react/index.js +1316 -0
- package/dist/react/index.js.map +1 -0
- package/dist/stream/index.cjs +716 -0
- package/dist/stream/index.cjs.map +1 -0
- package/dist/stream/index.d.cts +304 -0
- package/dist/stream/index.d.ts +304 -0
- package/dist/stream/index.js +712 -0
- package/dist/stream/index.js.map +1 -0
- package/dist/ui/index.cjs +316 -0
- package/dist/ui/index.cjs.map +1 -0
- package/dist/ui/index.d.cts +30 -0
- package/dist/ui/index.d.ts +30 -0
- package/dist/ui/index.js +314 -0
- package/dist/ui/index.js.map +1 -0
- package/package.json +123 -0
|
@@ -0,0 +1,1316 @@
|
|
|
1
|
+
import { useRef, useState, useCallback, useEffect } from 'react';
|
|
2
|
+
import { io } from 'socket.io-client';
|
|
3
|
+
|
|
4
|
+
// src/react/hooks/use-conversation/useConversation.ts
|
|
5
|
+
|
|
6
|
+
// src/utility/Logger.ts
|
|
7
|
+
var _Logger = class _Logger {
|
|
8
|
+
constructor(namespace = "") {
|
|
9
|
+
this.namespace = namespace;
|
|
10
|
+
}
|
|
11
|
+
static enableGlobalLogging() {
|
|
12
|
+
_Logger.globalEnabled = true;
|
|
13
|
+
}
|
|
14
|
+
static disableGlobalLogging() {
|
|
15
|
+
_Logger.globalEnabled = false;
|
|
16
|
+
}
|
|
17
|
+
formatPrefix() {
|
|
18
|
+
return this.namespace ? `[${this.namespace}]` : "";
|
|
19
|
+
}
|
|
20
|
+
logIfEnabled(level, ...args) {
|
|
21
|
+
if (!_Logger.globalEnabled)
|
|
22
|
+
return;
|
|
23
|
+
const prefix = this.formatPrefix();
|
|
24
|
+
if (prefix) {
|
|
25
|
+
console[level](prefix, ...args);
|
|
26
|
+
} else {
|
|
27
|
+
console[level](...args);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
log(...args) {
|
|
31
|
+
this.logIfEnabled("log", ...args);
|
|
32
|
+
}
|
|
33
|
+
debug(...args) {
|
|
34
|
+
this.logIfEnabled("debug", ...args);
|
|
35
|
+
}
|
|
36
|
+
info(...args) {
|
|
37
|
+
this.logIfEnabled("info", ...args);
|
|
38
|
+
}
|
|
39
|
+
warn(...args) {
|
|
40
|
+
this.logIfEnabled("warn", ...args);
|
|
41
|
+
}
|
|
42
|
+
error(...args) {
|
|
43
|
+
this.logIfEnabled("error", ...args);
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
_Logger.globalEnabled = false;
|
|
47
|
+
var Logger = _Logger;
|
|
48
|
+
|
|
49
|
+
// src/react/adapter/VoiceEndpointAdapter.ts
|
|
50
|
+
var VoiceEndpointAdapter = class {
|
|
51
|
+
constructor(config) {
|
|
52
|
+
this.logger = new Logger("SuTr > EndpointAdapter");
|
|
53
|
+
this.config = config;
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
var BaseVoiceEndpointAdapter = class extends VoiceEndpointAdapter {
|
|
57
|
+
constructor(config) {
|
|
58
|
+
super(config);
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Send a voice file to the API endpoint and return a Pump stream of audio chunks
|
|
62
|
+
*/
|
|
63
|
+
async sendVoiceFile({
|
|
64
|
+
blob,
|
|
65
|
+
metadata
|
|
66
|
+
}) {
|
|
67
|
+
const formData = new FormData();
|
|
68
|
+
formData.append("audio", blob);
|
|
69
|
+
if (metadata) {
|
|
70
|
+
formData.append("metadata", JSON.stringify(metadata));
|
|
71
|
+
}
|
|
72
|
+
this.logger.debug("Sending voice file to", this.config.endpoint, formData);
|
|
73
|
+
const response = await fetch(
|
|
74
|
+
`${this.config.baseUrl || ""}${this.config.endpoint}`,
|
|
75
|
+
{
|
|
76
|
+
method: "POST",
|
|
77
|
+
headers: this.config.headers,
|
|
78
|
+
body: formData
|
|
79
|
+
}
|
|
80
|
+
);
|
|
81
|
+
if (!response.ok) {
|
|
82
|
+
throw new Error(`API error: ${response.status} ${await response.text()}`);
|
|
83
|
+
}
|
|
84
|
+
if (!response.body) {
|
|
85
|
+
throw new Error("No response body");
|
|
86
|
+
}
|
|
87
|
+
return response;
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
// src/react/utility/audio/InputAudioController.ts
|
|
92
|
+
var InputAudioController = class {
|
|
93
|
+
constructor() {
|
|
94
|
+
this.logger = new Logger("@m4trix/core > InputAudioController");
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
// src/react/utility/audio/WebAudioInputAudioController.ts
|
|
99
|
+
var DEFAULT_SLICING_INTERVAL = 3e3;
|
|
100
|
+
var WebAudioInputAudioController = class extends InputAudioController {
|
|
101
|
+
constructor(audioConfig = {}) {
|
|
102
|
+
super();
|
|
103
|
+
this.audioConfig = audioConfig;
|
|
104
|
+
// ─── Recording state ─────────────────────────────────────────────────────
|
|
105
|
+
this.audioContextState = {
|
|
106
|
+
context: null,
|
|
107
|
+
source: null,
|
|
108
|
+
analyser: null
|
|
109
|
+
};
|
|
110
|
+
this.mediaRecorder = null;
|
|
111
|
+
this.recordedChunks = [];
|
|
112
|
+
this.recordingStream = null;
|
|
113
|
+
}
|
|
114
|
+
get audioContext() {
|
|
115
|
+
return this.audioContextState.context;
|
|
116
|
+
}
|
|
117
|
+
async createAudioContext() {
|
|
118
|
+
const context = new AudioContext({
|
|
119
|
+
sampleRate: this.audioConfig.sampleRate || 16e3,
|
|
120
|
+
latencyHint: "interactive"
|
|
121
|
+
});
|
|
122
|
+
const analyser = context.createAnalyser();
|
|
123
|
+
analyser.fftSize = 2048;
|
|
124
|
+
return { context, source: null, analyser };
|
|
125
|
+
}
|
|
126
|
+
async cleanupAudioContext() {
|
|
127
|
+
this.logger.debug("Cleaning up audio context");
|
|
128
|
+
const { source, context } = this.audioContextState;
|
|
129
|
+
if (source)
|
|
130
|
+
source.disconnect();
|
|
131
|
+
if (context)
|
|
132
|
+
await context.close();
|
|
133
|
+
this.audioContextState = { context: null, source: null, analyser: null };
|
|
134
|
+
}
|
|
135
|
+
async startRecording({
|
|
136
|
+
onRecordedChunk,
|
|
137
|
+
onError
|
|
138
|
+
} = {}) {
|
|
139
|
+
try {
|
|
140
|
+
this.logger.debug("Starting recording");
|
|
141
|
+
this.recordedChunks = [];
|
|
142
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
143
|
+
this.recordingStream = stream;
|
|
144
|
+
if (!this.audioContextState.context) {
|
|
145
|
+
this.audioContextState = await this.createAudioContext();
|
|
146
|
+
}
|
|
147
|
+
this.mediaRecorder = new MediaRecorder(stream, {
|
|
148
|
+
mimeType: "audio/webm;codecs=opus"
|
|
149
|
+
});
|
|
150
|
+
this.mediaRecorder.ondataavailable = (e) => {
|
|
151
|
+
if (e.data.size > 0) {
|
|
152
|
+
this.recordedChunks.push(e.data);
|
|
153
|
+
onRecordedChunk?.(e.data);
|
|
154
|
+
this.logger.debug("Recorded chunk", e.data.size);
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
this.mediaRecorder.start(DEFAULT_SLICING_INTERVAL);
|
|
158
|
+
this.logger.debug("MediaRecorder started");
|
|
159
|
+
} catch (err) {
|
|
160
|
+
const error = err instanceof Error ? err : new Error("Failed to start recording");
|
|
161
|
+
this.logger.error(error);
|
|
162
|
+
onError?.(error);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
async stopRecording({
|
|
166
|
+
onRecordingCompleted
|
|
167
|
+
} = {}) {
|
|
168
|
+
this.logger.debug("Stopping recording");
|
|
169
|
+
if (!this.mediaRecorder || this.mediaRecorder.state === "inactive")
|
|
170
|
+
return;
|
|
171
|
+
await new Promise((resolve) => {
|
|
172
|
+
this.mediaRecorder.onstop = async () => {
|
|
173
|
+
if (this.recordedChunks.length) {
|
|
174
|
+
const blob = new Blob(this.recordedChunks, { type: "audio/webm" });
|
|
175
|
+
onRecordingCompleted?.(blob);
|
|
176
|
+
this.logger.debug("Recording completed", blob.size);
|
|
177
|
+
}
|
|
178
|
+
this.recordingStream?.getTracks().forEach((t) => t.stop());
|
|
179
|
+
this.recordingStream = null;
|
|
180
|
+
await this.cleanupAudioContext();
|
|
181
|
+
resolve();
|
|
182
|
+
};
|
|
183
|
+
this.mediaRecorder.stop();
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Cleans up all audio recording resources.
|
|
188
|
+
*/
|
|
189
|
+
cleanup() {
|
|
190
|
+
this.cleanupAudioContext();
|
|
191
|
+
if (this.mediaRecorder && this.mediaRecorder.state !== "inactive") {
|
|
192
|
+
this.mediaRecorder.stop();
|
|
193
|
+
}
|
|
194
|
+
if (this.recordingStream) {
|
|
195
|
+
this.recordingStream.getTracks().forEach((t) => t.stop());
|
|
196
|
+
this.recordingStream = null;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
// src/react/utility/audio/OutputAudioController.ts
|
|
202
|
+
var OutputAudioController = class {
|
|
203
|
+
constructor(loggerName) {
|
|
204
|
+
this.logger = new Logger(loggerName);
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
// src/react/utility/audio/AudioElementOutputAudioController.ts
|
|
209
|
+
var AudioElementOutputAudioController = class extends OutputAudioController {
|
|
210
|
+
constructor() {
|
|
211
|
+
super("@m4trix/core > WebApiOutputAudioController");
|
|
212
|
+
// ─── Playback state ──────────────────────────────────────────────────────
|
|
213
|
+
this.currentHtmlAudio = null;
|
|
214
|
+
this.currentAudioUrl = null;
|
|
215
|
+
}
|
|
216
|
+
// ─── One-shot playback ────────────────────────────────────────────────────
|
|
217
|
+
/**
|
|
218
|
+
* Play either a Blob or a URL string.
|
|
219
|
+
* Uses <audio> under the hood for maximum browser compatibility.
|
|
220
|
+
*/
|
|
221
|
+
async playAudio({
|
|
222
|
+
source,
|
|
223
|
+
onComplete
|
|
224
|
+
}) {
|
|
225
|
+
if (this.currentHtmlAudio) {
|
|
226
|
+
this.currentHtmlAudio.pause();
|
|
227
|
+
this.currentHtmlAudio.src = "";
|
|
228
|
+
if (this.currentAudioUrl && source instanceof Blob) {
|
|
229
|
+
URL.revokeObjectURL(this.currentAudioUrl);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
const audio = new Audio();
|
|
233
|
+
this.currentHtmlAudio = audio;
|
|
234
|
+
let url;
|
|
235
|
+
if (source instanceof Blob) {
|
|
236
|
+
url = URL.createObjectURL(source);
|
|
237
|
+
this.currentAudioUrl = url;
|
|
238
|
+
audio.onended = () => {
|
|
239
|
+
URL.revokeObjectURL(url);
|
|
240
|
+
onComplete?.();
|
|
241
|
+
};
|
|
242
|
+
} else {
|
|
243
|
+
url = source;
|
|
244
|
+
}
|
|
245
|
+
audio.src = url;
|
|
246
|
+
try {
|
|
247
|
+
await audio.play();
|
|
248
|
+
} catch (err) {
|
|
249
|
+
this.logger.error("Playback failed, user gesture may be required", err);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
// ─── Streaming playback ──────────────────────────────────────────────────
|
|
253
|
+
/**
|
|
254
|
+
* Stream audio from a Response via MediaSource Extensions.
|
|
255
|
+
* @param params.response The fetch Response whose body is an audio stream
|
|
256
|
+
* @param params.mimeCodec MIME type+codec string, e.g. 'audio/mpeg'
|
|
257
|
+
* @param params.onComplete Optional callback once the stream ends
|
|
258
|
+
*/
|
|
259
|
+
async playAudioStream({
|
|
260
|
+
response,
|
|
261
|
+
mimeCodec = "audio/mpeg",
|
|
262
|
+
onComplete
|
|
263
|
+
}) {
|
|
264
|
+
if (!response.ok || !response.body) {
|
|
265
|
+
throw new Error(`Invalid response (${response.status})`);
|
|
266
|
+
}
|
|
267
|
+
if (typeof MediaSource === "undefined" || !MediaSource.isTypeSupported(mimeCodec)) {
|
|
268
|
+
throw new Error(`Unsupported MIME type or codec: ${mimeCodec}`);
|
|
269
|
+
}
|
|
270
|
+
await this.stopPlayback();
|
|
271
|
+
const mediaSource = new MediaSource();
|
|
272
|
+
const url = URL.createObjectURL(mediaSource);
|
|
273
|
+
this.currentAudioUrl = url;
|
|
274
|
+
const audio = new Audio(url);
|
|
275
|
+
this.currentHtmlAudio = audio;
|
|
276
|
+
audio.autoplay = true;
|
|
277
|
+
audio.onended = () => {
|
|
278
|
+
URL.revokeObjectURL(url);
|
|
279
|
+
this.currentAudioUrl = null;
|
|
280
|
+
onComplete?.();
|
|
281
|
+
};
|
|
282
|
+
mediaSource.addEventListener(
|
|
283
|
+
"sourceopen",
|
|
284
|
+
() => {
|
|
285
|
+
const sourceBuffer = mediaSource.addSourceBuffer(mimeCodec);
|
|
286
|
+
const reader = response.body.getReader();
|
|
287
|
+
const pump = async () => {
|
|
288
|
+
const { done, value } = await reader.read();
|
|
289
|
+
if (done) {
|
|
290
|
+
mediaSource.endOfStream();
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
if (value) {
|
|
294
|
+
sourceBuffer.appendBuffer(value);
|
|
295
|
+
}
|
|
296
|
+
if (sourceBuffer.updating) {
|
|
297
|
+
sourceBuffer.addEventListener("updateend", pump, { once: true });
|
|
298
|
+
} else {
|
|
299
|
+
pump();
|
|
300
|
+
}
|
|
301
|
+
};
|
|
302
|
+
pump();
|
|
303
|
+
},
|
|
304
|
+
{ once: true }
|
|
305
|
+
);
|
|
306
|
+
try {
|
|
307
|
+
await audio.play();
|
|
308
|
+
} catch (err) {
|
|
309
|
+
this.logger.error(
|
|
310
|
+
"Streaming playback failed, user gesture may be required",
|
|
311
|
+
err
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
// ─── Chunk-based streaming playback ─────────────────────────────────────
|
|
316
|
+
/**
|
|
317
|
+
* Initialize a streaming audio context for chunk-based playback.
|
|
318
|
+
* This creates the necessary MediaSource and SourceBuffer for subsequent chunk additions.
|
|
319
|
+
* Returns functions to add chunks and end the stream, encapsulated in a closure.
|
|
320
|
+
*
|
|
321
|
+
* @param mimeCodec MIME type+codec string, e.g. 'audio/mpeg'
|
|
322
|
+
* @param onComplete Optional callback once the stream ends
|
|
323
|
+
* @returns Object containing functions to add chunks and end the stream
|
|
324
|
+
*/
|
|
325
|
+
async initializeChunkStream({
|
|
326
|
+
onComplete,
|
|
327
|
+
mimeCodec = "audio/mpeg"
|
|
328
|
+
}) {
|
|
329
|
+
this.logger.debug(`Initializing chunk stream with codec: ${mimeCodec}`);
|
|
330
|
+
if (typeof MediaSource === "undefined") {
|
|
331
|
+
throw new Error("MediaSource API is not supported in this browser");
|
|
332
|
+
}
|
|
333
|
+
if (!MediaSource.isTypeSupported(mimeCodec)) {
|
|
334
|
+
this.logger.warn(
|
|
335
|
+
`Codec ${mimeCodec} not supported, falling back to standard audio/mpeg`
|
|
336
|
+
);
|
|
337
|
+
mimeCodec = "audio/mpeg";
|
|
338
|
+
if (!MediaSource.isTypeSupported(mimeCodec)) {
|
|
339
|
+
throw new Error(
|
|
340
|
+
"Neither the specified codec nor the fallback codec are supported"
|
|
341
|
+
);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
await this.stopPlayback();
|
|
345
|
+
const mediaSource = new MediaSource();
|
|
346
|
+
let sourceBuffer = null;
|
|
347
|
+
const url = URL.createObjectURL(mediaSource);
|
|
348
|
+
this.currentAudioUrl = url;
|
|
349
|
+
const audio = new Audio(url);
|
|
350
|
+
this.currentHtmlAudio = audio;
|
|
351
|
+
audio.autoplay = false;
|
|
352
|
+
audio.controls = true;
|
|
353
|
+
audio.style.display = "none";
|
|
354
|
+
document.body.appendChild(audio);
|
|
355
|
+
let playbackStarted = false;
|
|
356
|
+
let hasReceivedFirstChunk = false;
|
|
357
|
+
let receivedChunksCount = 0;
|
|
358
|
+
const pendingChunks = [];
|
|
359
|
+
let isProcessingQueue = false;
|
|
360
|
+
this.logger.debug("Waiting for MediaSource to open...");
|
|
361
|
+
await new Promise((resolve, reject) => {
|
|
362
|
+
const timeout = setTimeout(() => {
|
|
363
|
+
reject(new Error("MediaSource failed to open (timeout)"));
|
|
364
|
+
}, 5e3);
|
|
365
|
+
mediaSource.addEventListener(
|
|
366
|
+
"sourceopen",
|
|
367
|
+
() => {
|
|
368
|
+
clearTimeout(timeout);
|
|
369
|
+
this.logger.debug("MediaSource open event received");
|
|
370
|
+
try {
|
|
371
|
+
sourceBuffer = mediaSource.addSourceBuffer(mimeCodec);
|
|
372
|
+
if (mediaSource.duration === Infinity || isNaN(mediaSource.duration)) {
|
|
373
|
+
mediaSource.duration = 1e3;
|
|
374
|
+
}
|
|
375
|
+
this.logger.debug("SourceBuffer created successfully");
|
|
376
|
+
resolve();
|
|
377
|
+
} catch (err) {
|
|
378
|
+
reject(new Error(`Failed to create SourceBuffer: ${err}`));
|
|
379
|
+
}
|
|
380
|
+
},
|
|
381
|
+
{ once: true }
|
|
382
|
+
);
|
|
383
|
+
});
|
|
384
|
+
const logger = this.logger;
|
|
385
|
+
const processQueue = async () => {
|
|
386
|
+
if (!sourceBuffer || pendingChunks.length === 0 || isProcessingQueue) {
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
isProcessingQueue = true;
|
|
390
|
+
try {
|
|
391
|
+
while (pendingChunks.length > 0) {
|
|
392
|
+
if (sourceBuffer.updating) {
|
|
393
|
+
await new Promise((resolve) => {
|
|
394
|
+
sourceBuffer.addEventListener("updateend", () => resolve(), {
|
|
395
|
+
once: true
|
|
396
|
+
});
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
const nextChunk = pendingChunks.shift();
|
|
400
|
+
if (!nextChunk)
|
|
401
|
+
continue;
|
|
402
|
+
try {
|
|
403
|
+
sourceBuffer.appendBuffer(nextChunk);
|
|
404
|
+
logger.debug(
|
|
405
|
+
`Processed queued chunk of size ${nextChunk.byteLength}`
|
|
406
|
+
);
|
|
407
|
+
if (!playbackStarted && hasReceivedFirstChunk) {
|
|
408
|
+
await tryStartPlayback();
|
|
409
|
+
}
|
|
410
|
+
await new Promise((resolve) => {
|
|
411
|
+
sourceBuffer.addEventListener("updateend", () => resolve(), {
|
|
412
|
+
once: true
|
|
413
|
+
});
|
|
414
|
+
});
|
|
415
|
+
} catch (err) {
|
|
416
|
+
logger.error("Error appending queued chunk to source buffer", err);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
} finally {
|
|
420
|
+
isProcessingQueue = false;
|
|
421
|
+
}
|
|
422
|
+
};
|
|
423
|
+
const tryStartPlayback = async () => {
|
|
424
|
+
if (playbackStarted)
|
|
425
|
+
return;
|
|
426
|
+
playbackStarted = true;
|
|
427
|
+
logger.debug("Attempting to start audio playback...");
|
|
428
|
+
if (receivedChunksCount < 3 && audio.buffered.length > 0 && audio.buffered.end(0) < 0.5) {
|
|
429
|
+
logger.debug("Not enough data buffered yet, delaying playback");
|
|
430
|
+
return;
|
|
431
|
+
}
|
|
432
|
+
try {
|
|
433
|
+
if (audio.readyState === 0) {
|
|
434
|
+
logger.debug(
|
|
435
|
+
"Audio element not ready yet, waiting for canplay event"
|
|
436
|
+
);
|
|
437
|
+
await new Promise((resolve) => {
|
|
438
|
+
audio.addEventListener("canplay", () => resolve(), { once: true });
|
|
439
|
+
});
|
|
440
|
+
}
|
|
441
|
+
await audio.play();
|
|
442
|
+
logger.debug("Successfully started audio playback");
|
|
443
|
+
} catch (err) {
|
|
444
|
+
logger.error("Failed to start playback", err);
|
|
445
|
+
document.addEventListener(
|
|
446
|
+
"click",
|
|
447
|
+
async () => {
|
|
448
|
+
try {
|
|
449
|
+
await audio.play();
|
|
450
|
+
logger.debug("Started playback after user interaction");
|
|
451
|
+
} catch (innerErr) {
|
|
452
|
+
logger.error(
|
|
453
|
+
"Still failed to play after user interaction",
|
|
454
|
+
innerErr
|
|
455
|
+
);
|
|
456
|
+
}
|
|
457
|
+
},
|
|
458
|
+
{ once: true }
|
|
459
|
+
);
|
|
460
|
+
}
|
|
461
|
+
};
|
|
462
|
+
const addChunkToStream = async (chunk) => {
|
|
463
|
+
if (!sourceBuffer) {
|
|
464
|
+
throw new Error(
|
|
465
|
+
"Streaming context was closed or not properly initialized."
|
|
466
|
+
);
|
|
467
|
+
}
|
|
468
|
+
let arrayBufferChunk;
|
|
469
|
+
if (chunk instanceof Blob) {
|
|
470
|
+
logger.debug("Converting Blob to ArrayBuffer");
|
|
471
|
+
arrayBufferChunk = await chunk.arrayBuffer();
|
|
472
|
+
} else {
|
|
473
|
+
arrayBufferChunk = chunk;
|
|
474
|
+
}
|
|
475
|
+
if (!arrayBufferChunk || arrayBufferChunk.byteLength === 0) {
|
|
476
|
+
logger.warn("Received empty chunk, skipping");
|
|
477
|
+
return;
|
|
478
|
+
}
|
|
479
|
+
if (!hasReceivedFirstChunk) {
|
|
480
|
+
hasReceivedFirstChunk = true;
|
|
481
|
+
logger.debug(
|
|
482
|
+
`First chunk received, size: ${arrayBufferChunk.byteLength} bytes`
|
|
483
|
+
);
|
|
484
|
+
}
|
|
485
|
+
receivedChunksCount++;
|
|
486
|
+
pendingChunks.push(arrayBufferChunk);
|
|
487
|
+
logger.debug(
|
|
488
|
+
`Added chunk #${receivedChunksCount} to queue (size: ${arrayBufferChunk.byteLength} bytes)`
|
|
489
|
+
);
|
|
490
|
+
await processQueue();
|
|
491
|
+
if (!playbackStarted && hasReceivedFirstChunk && receivedChunksCount >= 3) {
|
|
492
|
+
await tryStartPlayback();
|
|
493
|
+
}
|
|
494
|
+
};
|
|
495
|
+
const endChunkStream = () => {
|
|
496
|
+
if (mediaSource && mediaSource.readyState === "open") {
|
|
497
|
+
try {
|
|
498
|
+
if (pendingChunks.length > 0 || sourceBuffer && sourceBuffer.updating) {
|
|
499
|
+
logger.debug("Waiting for pending chunks before ending stream");
|
|
500
|
+
setTimeout(() => endChunkStream(), 200);
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
if (hasReceivedFirstChunk) {
|
|
504
|
+
mediaSource.endOfStream();
|
|
505
|
+
logger.debug("MediaSource stream ended successfully");
|
|
506
|
+
} else {
|
|
507
|
+
logger.warn("Stream ended without receiving any chunks");
|
|
508
|
+
}
|
|
509
|
+
} catch (err) {
|
|
510
|
+
logger.error("Error ending MediaSource stream", err);
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
audio.onended = null;
|
|
514
|
+
if (audio.parentNode) {
|
|
515
|
+
audio.parentNode.removeChild(audio);
|
|
516
|
+
}
|
|
517
|
+
if (this.currentAudioUrl === url) {
|
|
518
|
+
this.currentAudioUrl = null;
|
|
519
|
+
URL.revokeObjectURL(url);
|
|
520
|
+
}
|
|
521
|
+
sourceBuffer = null;
|
|
522
|
+
};
|
|
523
|
+
audio.onended = () => {
|
|
524
|
+
logger.debug("Audio playback completed");
|
|
525
|
+
endChunkStream();
|
|
526
|
+
onComplete?.();
|
|
527
|
+
};
|
|
528
|
+
return {
|
|
529
|
+
addChunkToStream,
|
|
530
|
+
endChunkStream
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
/**
|
|
534
|
+
* Stop any ongoing HTMLAudioElement playback.
|
|
535
|
+
*/
|
|
536
|
+
async stopPlayback() {
|
|
537
|
+
if (this.currentHtmlAudio) {
|
|
538
|
+
try {
|
|
539
|
+
this.currentHtmlAudio.pause();
|
|
540
|
+
this.currentHtmlAudio.src = "";
|
|
541
|
+
} catch (err) {
|
|
542
|
+
this.logger.error("Error stopping playback", err);
|
|
543
|
+
}
|
|
544
|
+
this.currentHtmlAudio = null;
|
|
545
|
+
}
|
|
546
|
+
if (this.currentAudioUrl) {
|
|
547
|
+
URL.revokeObjectURL(this.currentAudioUrl);
|
|
548
|
+
this.currentAudioUrl = null;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
/**
|
|
552
|
+
* Cleans up all audio playback resources.
|
|
553
|
+
*/
|
|
554
|
+
cleanup() {
|
|
555
|
+
this.stopPlayback();
|
|
556
|
+
}
|
|
557
|
+
};
|
|
558
|
+
|
|
559
|
+
// src/react/hooks/use-conversation/useConversation.ts
|
|
560
|
+
Logger.enableGlobalLogging();
|
|
561
|
+
function useConversation(endpoint, {
|
|
562
|
+
onStartRecording,
|
|
563
|
+
onStopRecording,
|
|
564
|
+
onReceive,
|
|
565
|
+
autoPlay = true,
|
|
566
|
+
downstreamMode = "STREAM",
|
|
567
|
+
onError,
|
|
568
|
+
audioConfig = {},
|
|
569
|
+
requestData = {},
|
|
570
|
+
endpointConfig = {}
|
|
571
|
+
}) {
|
|
572
|
+
const { current: logger } = useRef(
|
|
573
|
+
new Logger("@m4trix/core > useConversation")
|
|
574
|
+
);
|
|
575
|
+
const inputAudioControllerRef = useRef(void 0);
|
|
576
|
+
const outputAudioControllerRef = useRef(
|
|
577
|
+
void 0
|
|
578
|
+
);
|
|
579
|
+
const endpointAdapterRef = useRef(
|
|
580
|
+
void 0
|
|
581
|
+
);
|
|
582
|
+
const [voiceAgentState, setVoiceAgentState] = useState("READY");
|
|
583
|
+
const [error, setError] = useState(null);
|
|
584
|
+
const handleError = useCallback(
|
|
585
|
+
(state, err) => {
|
|
586
|
+
setError(err);
|
|
587
|
+
logger.error(`Error during ${state}:`, err);
|
|
588
|
+
onError?.(state, err);
|
|
589
|
+
},
|
|
590
|
+
[onError]
|
|
591
|
+
);
|
|
592
|
+
const startRecording = useCallback(() => {
|
|
593
|
+
if (inputAudioControllerRef.current) {
|
|
594
|
+
try {
|
|
595
|
+
logger.debug("Starting recording");
|
|
596
|
+
setVoiceAgentState("RECORDING");
|
|
597
|
+
inputAudioControllerRef.current.startRecording({
|
|
598
|
+
onError: (err) => {
|
|
599
|
+
handleError("RECORDING", err);
|
|
600
|
+
}
|
|
601
|
+
});
|
|
602
|
+
onStartRecording?.();
|
|
603
|
+
} catch (err) {
|
|
604
|
+
if (err instanceof Error) {
|
|
605
|
+
handleError("RECORDING", err);
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
}, [onStartRecording, handleError]);
|
|
610
|
+
const stopRecording = useCallback(async () => {
|
|
611
|
+
if (inputAudioControllerRef.current) {
|
|
612
|
+
try {
|
|
613
|
+
logger.debug("Stopping recording");
|
|
614
|
+
await inputAudioControllerRef.current.stopRecording({
|
|
615
|
+
onRecordingCompleted: async (allData) => {
|
|
616
|
+
setVoiceAgentState("PROCESSING");
|
|
617
|
+
try {
|
|
618
|
+
const response = await endpointAdapterRef.current?.sendVoiceFile({
|
|
619
|
+
blob: allData,
|
|
620
|
+
metadata: requestData
|
|
621
|
+
});
|
|
622
|
+
if (!response) {
|
|
623
|
+
throw new Error("No response received from endpoint");
|
|
624
|
+
}
|
|
625
|
+
setVoiceAgentState("RESPONDING");
|
|
626
|
+
if (autoPlay) {
|
|
627
|
+
if (downstreamMode === "STREAM") {
|
|
628
|
+
await outputAudioControllerRef.current?.playAudioStream({
|
|
629
|
+
response,
|
|
630
|
+
onComplete: () => {
|
|
631
|
+
setVoiceAgentState("READY");
|
|
632
|
+
}
|
|
633
|
+
});
|
|
634
|
+
} else if (downstreamMode === "DOWNLOAD") {
|
|
635
|
+
const responseBlob = await response.blob();
|
|
636
|
+
await outputAudioControllerRef.current?.playAudio({
|
|
637
|
+
source: responseBlob,
|
|
638
|
+
onComplete: () => {
|
|
639
|
+
setVoiceAgentState("READY");
|
|
640
|
+
}
|
|
641
|
+
});
|
|
642
|
+
}
|
|
643
|
+
} else {
|
|
644
|
+
setVoiceAgentState("READY");
|
|
645
|
+
}
|
|
646
|
+
onReceive?.(
|
|
647
|
+
allData,
|
|
648
|
+
async () => {
|
|
649
|
+
if (outputAudioControllerRef.current) {
|
|
650
|
+
if (downstreamMode === "STREAM") {
|
|
651
|
+
return outputAudioControllerRef.current.playAudioStream({
|
|
652
|
+
response,
|
|
653
|
+
onComplete: () => {
|
|
654
|
+
setVoiceAgentState("READY");
|
|
655
|
+
}
|
|
656
|
+
});
|
|
657
|
+
} else {
|
|
658
|
+
const responseBlob = await response.blob();
|
|
659
|
+
return outputAudioControllerRef.current.playAudio({
|
|
660
|
+
source: responseBlob,
|
|
661
|
+
onComplete: () => {
|
|
662
|
+
setVoiceAgentState("READY");
|
|
663
|
+
}
|
|
664
|
+
});
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
},
|
|
668
|
+
async () => {
|
|
669
|
+
if (outputAudioControllerRef.current) {
|
|
670
|
+
return outputAudioControllerRef.current.stopPlayback();
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
);
|
|
674
|
+
} catch (err) {
|
|
675
|
+
if (err instanceof Error) {
|
|
676
|
+
handleError("PROCESSING", err);
|
|
677
|
+
}
|
|
678
|
+
setVoiceAgentState("READY");
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
});
|
|
682
|
+
onStopRecording?.();
|
|
683
|
+
} catch (err) {
|
|
684
|
+
if (err instanceof Error) {
|
|
685
|
+
handleError("RECORDING", err);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
}, [
|
|
690
|
+
onStopRecording,
|
|
691
|
+
requestData,
|
|
692
|
+
autoPlay,
|
|
693
|
+
downstreamMode,
|
|
694
|
+
handleError,
|
|
695
|
+
onReceive
|
|
696
|
+
]);
|
|
697
|
+
useEffect(() => {
|
|
698
|
+
if (endpointAdapterRef.current) {
|
|
699
|
+
return;
|
|
700
|
+
}
|
|
701
|
+
try {
|
|
702
|
+
const endpointAdapter = endpointConfig.endpointAdapter ? endpointConfig.endpointAdapter : new BaseVoiceEndpointAdapter({
|
|
703
|
+
baseUrl: endpointConfig.baseUrl,
|
|
704
|
+
endpoint,
|
|
705
|
+
headers: endpointConfig.headers
|
|
706
|
+
});
|
|
707
|
+
endpointAdapterRef.current = endpointAdapter;
|
|
708
|
+
if (!inputAudioControllerRef.current) {
|
|
709
|
+
inputAudioControllerRef.current = new WebAudioInputAudioController(
|
|
710
|
+
audioConfig
|
|
711
|
+
);
|
|
712
|
+
}
|
|
713
|
+
if (!outputAudioControllerRef.current) {
|
|
714
|
+
outputAudioControllerRef.current = new AudioElementOutputAudioController();
|
|
715
|
+
}
|
|
716
|
+
} catch (err) {
|
|
717
|
+
if (err instanceof Error) {
|
|
718
|
+
handleError("READY", err);
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
}, [endpoint, endpointConfig, audioConfig, handleError]);
|
|
722
|
+
useEffect(() => {
|
|
723
|
+
return () => {
|
|
724
|
+
inputAudioControllerRef.current?.cleanup();
|
|
725
|
+
outputAudioControllerRef.current?.cleanup();
|
|
726
|
+
};
|
|
727
|
+
}, []);
|
|
728
|
+
return {
|
|
729
|
+
startRecording,
|
|
730
|
+
stopRecording,
|
|
731
|
+
voiceAgentState,
|
|
732
|
+
error,
|
|
733
|
+
audioContext: inputAudioControllerRef.current?.audioContext || null
|
|
734
|
+
};
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
// src/react/adapter/socket/VoiceSocketAdapter.ts
|
|
738
|
+
var VoiceSocketAdapter = class {
|
|
739
|
+
constructor(config) {
|
|
740
|
+
this._isConnected = false;
|
|
741
|
+
this.logger = new Logger("@m4trix/core > VoiceSocketAdapter");
|
|
742
|
+
this.emitter = new Emitter();
|
|
743
|
+
this.config = config;
|
|
744
|
+
}
|
|
745
|
+
on(event, listener) {
|
|
746
|
+
this.emitter.on(event, listener);
|
|
747
|
+
}
|
|
748
|
+
off(event, listener) {
|
|
749
|
+
this.emitter.off(event, listener);
|
|
750
|
+
}
|
|
751
|
+
once(event, listener) {
|
|
752
|
+
this.emitter.once(event, listener);
|
|
753
|
+
}
|
|
754
|
+
emit(event, data) {
|
|
755
|
+
this.emitter.emit(event, data);
|
|
756
|
+
}
|
|
757
|
+
isConnected() {
|
|
758
|
+
return this._isConnected;
|
|
759
|
+
}
|
|
760
|
+
};
|
|
761
|
+
var Emitter = class {
|
|
762
|
+
constructor() {
|
|
763
|
+
this.target = new EventTarget();
|
|
764
|
+
}
|
|
765
|
+
on(type, listener) {
|
|
766
|
+
this.target.addEventListener(type, listener);
|
|
767
|
+
}
|
|
768
|
+
off(type, listener) {
|
|
769
|
+
this.target.removeEventListener(type, listener);
|
|
770
|
+
}
|
|
771
|
+
once(type, listener) {
|
|
772
|
+
const wrapper = (event) => {
|
|
773
|
+
this.off(type, wrapper);
|
|
774
|
+
listener(event.detail);
|
|
775
|
+
};
|
|
776
|
+
this.on(type, wrapper);
|
|
777
|
+
}
|
|
778
|
+
emit(type, detail) {
|
|
779
|
+
this.target.dispatchEvent(new CustomEvent(type, { detail }));
|
|
780
|
+
}
|
|
781
|
+
};
|
|
782
|
+
var VoiceSocketIOAdapter = class extends VoiceSocketAdapter {
|
|
783
|
+
constructor(config) {
|
|
784
|
+
super(config);
|
|
785
|
+
this.socket = null;
|
|
786
|
+
}
|
|
787
|
+
async connect() {
|
|
788
|
+
return new Promise((resolve, reject) => {
|
|
789
|
+
if (!this.socket) {
|
|
790
|
+
this.socket = io(this.config.baseUrl, {
|
|
791
|
+
extraHeaders: this.config.headers,
|
|
792
|
+
autoConnect: true
|
|
793
|
+
});
|
|
794
|
+
}
|
|
795
|
+
this.socket.on("connect", () => {
|
|
796
|
+
this._isConnected = true;
|
|
797
|
+
this.logger.debug("Connected to socket");
|
|
798
|
+
this.emit("connect");
|
|
799
|
+
resolve();
|
|
800
|
+
});
|
|
801
|
+
this.socket.on("disconnect", () => {
|
|
802
|
+
this._isConnected = false;
|
|
803
|
+
this.emit("disconnect");
|
|
804
|
+
this.logger.debug("Disconnected from socket");
|
|
805
|
+
if (this.config.autoReconnect)
|
|
806
|
+
this.connect();
|
|
807
|
+
});
|
|
808
|
+
this.socket.on("connect_error", (error) => {
|
|
809
|
+
this.logger.error("Error connecting to socket", error);
|
|
810
|
+
this.emit("error", error);
|
|
811
|
+
reject(error);
|
|
812
|
+
});
|
|
813
|
+
this.socket.on("voice:chunk_received", (chunk) => {
|
|
814
|
+
this.logger.debug("Received voice chunk", chunk.byteLength);
|
|
815
|
+
this.onVoiceChunkReceived(chunk);
|
|
816
|
+
});
|
|
817
|
+
this.socket.on("voice:received_end_of_response_stream", () => {
|
|
818
|
+
this.logger.debug("Received end of response stream");
|
|
819
|
+
this.onReceivedEndOfResponseStream();
|
|
820
|
+
});
|
|
821
|
+
this.socket.on("voice:file_received", (blob) => {
|
|
822
|
+
this.logger.debug("Received voice file");
|
|
823
|
+
this.onVoiceFileReceived(blob);
|
|
824
|
+
});
|
|
825
|
+
this.socket.on("control-message", (message) => {
|
|
826
|
+
this.logger.debug("Received control message", message);
|
|
827
|
+
this.emit("control-message", message);
|
|
828
|
+
});
|
|
829
|
+
});
|
|
830
|
+
}
|
|
831
|
+
disconnect() {
|
|
832
|
+
this.socket?.disconnect();
|
|
833
|
+
this.socket = null;
|
|
834
|
+
this._isConnected = false;
|
|
835
|
+
}
|
|
836
|
+
exposeSocket() {
|
|
837
|
+
return this.socket;
|
|
838
|
+
}
|
|
839
|
+
async sendVoiceChunk(chunk, metadata) {
|
|
840
|
+
this.logger.debug(
|
|
841
|
+
"Sending voice chunk %i",
|
|
842
|
+
chunk instanceof Blob ? chunk.size : chunk.byteLength
|
|
843
|
+
);
|
|
844
|
+
if (!this.socket || !this.isConnected)
|
|
845
|
+
throw new Error("Socket not connected");
|
|
846
|
+
let chunkToSend;
|
|
847
|
+
if (chunk instanceof Blob) {
|
|
848
|
+
chunkToSend = await chunk.arrayBuffer();
|
|
849
|
+
} else {
|
|
850
|
+
chunkToSend = chunk;
|
|
851
|
+
}
|
|
852
|
+
this.logger.debug("[Socket] Sending voice chunk", chunkToSend.byteLength);
|
|
853
|
+
this.socket.emit("voice:send_chunk", chunkToSend, metadata);
|
|
854
|
+
this.emit("chunk_sent", chunk);
|
|
855
|
+
}
|
|
856
|
+
sendVoiceFile(blob, metadata) {
|
|
857
|
+
this.logger.debug("Sending voice file", blob, metadata);
|
|
858
|
+
if (!this.socket || !this.isConnected)
|
|
859
|
+
throw new Error("Socket not connected");
|
|
860
|
+
this.socket.emit("voice:send_file", blob, metadata);
|
|
861
|
+
this.emit("file-sent", blob);
|
|
862
|
+
}
|
|
863
|
+
commitVoiceMessage() {
|
|
864
|
+
if (!this.socket || !this.isConnected)
|
|
865
|
+
throw new Error("Socket not connected");
|
|
866
|
+
this.socket.emit("voice:commit");
|
|
867
|
+
}
|
|
868
|
+
onVoiceChunkReceived(chunk) {
|
|
869
|
+
this.emit("chunk-received", chunk);
|
|
870
|
+
}
|
|
871
|
+
onVoiceFileReceived(blob) {
|
|
872
|
+
this.emit("file-received", blob);
|
|
873
|
+
}
|
|
874
|
+
onReceivedEndOfResponseStream() {
|
|
875
|
+
this.emit("received-end-of-response-stream");
|
|
876
|
+
}
|
|
877
|
+
};
|
|
878
|
+
|
|
879
|
+
// src/react/utility/audio/WebAudioOutputAudioController.ts
|
|
880
|
+
var STREAM_SAMPLE_RATE = 24e3;
|
|
881
|
+
var CHANNELS = 1;
|
|
882
|
+
var SLICE_DURATION_S = 0.25;
|
|
883
|
+
var FRAMES_PER_SLICE = Math.floor(STREAM_SAMPLE_RATE * SLICE_DURATION_S);
|
|
884
|
+
var BYTES_PER_SLICE = FRAMES_PER_SLICE * 2;
|
|
885
|
+
var SCHED_TOLERANCE = 0.05;
|
|
886
|
+
var WebAudioOutputAudioController = class extends OutputAudioController {
|
|
887
|
+
constructor() {
|
|
888
|
+
super("@m4trix/core > WebAudioOutputAudioController");
|
|
889
|
+
this.audioCtx = new AudioContext();
|
|
890
|
+
this.gain = this.audioCtx.createGain();
|
|
891
|
+
this.nextPlayTime = 0;
|
|
892
|
+
this.activeSources = /* @__PURE__ */ new Set();
|
|
893
|
+
this.userGestureHookAttached = false;
|
|
894
|
+
this.gain.connect(this.audioCtx.destination);
|
|
895
|
+
this.resetScheduler();
|
|
896
|
+
}
|
|
897
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
898
|
+
// One‑shot playback
|
|
899
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
900
|
+
async playAudio({
|
|
901
|
+
source,
|
|
902
|
+
onComplete
|
|
903
|
+
}) {
|
|
904
|
+
await this.stopPlayback();
|
|
905
|
+
const buf = await this.sourceToArrayBuffer(source);
|
|
906
|
+
const decoded = await this.decode(buf);
|
|
907
|
+
await this.ensureContextRunning();
|
|
908
|
+
const src = this.createSource(decoded, this.audioCtx.currentTime);
|
|
909
|
+
src.onended = () => {
|
|
910
|
+
this.activeSources.delete(src);
|
|
911
|
+
onComplete?.();
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
async playAudioStream() {
|
|
915
|
+
}
|
|
916
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
917
|
+
// PCM streaming
|
|
918
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
919
|
+
async initializeChunkStream({
|
|
920
|
+
onComplete
|
|
921
|
+
}) {
|
|
922
|
+
await this.stopPlayback();
|
|
923
|
+
await this.ensureContextRunning();
|
|
924
|
+
this.resetScheduler();
|
|
925
|
+
let streamEnded = false;
|
|
926
|
+
let pending = new Uint8Array(0);
|
|
927
|
+
const addChunkToStream = async (pkt) => {
|
|
928
|
+
if (streamEnded) {
|
|
929
|
+
this.logger.warn("Attempt to add chunk after stream ended \u2013 ignoring.");
|
|
930
|
+
return;
|
|
931
|
+
}
|
|
932
|
+
const bytes = new Uint8Array(
|
|
933
|
+
pkt instanceof Blob ? await pkt.arrayBuffer() : pkt
|
|
934
|
+
);
|
|
935
|
+
if (bytes.length === 0)
|
|
936
|
+
return;
|
|
937
|
+
const merged = new Uint8Array(pending.length + bytes.length);
|
|
938
|
+
merged.set(pending);
|
|
939
|
+
merged.set(bytes, pending.length);
|
|
940
|
+
pending = merged;
|
|
941
|
+
if (pending.length % 2 === 1)
|
|
942
|
+
return;
|
|
943
|
+
while (pending.length >= BYTES_PER_SLICE) {
|
|
944
|
+
const sliceBytes = pending.slice(0, BYTES_PER_SLICE);
|
|
945
|
+
pending = pending.slice(BYTES_PER_SLICE);
|
|
946
|
+
const aligned = sliceBytes.buffer.slice(
|
|
947
|
+
sliceBytes.byteOffset,
|
|
948
|
+
sliceBytes.byteOffset + sliceBytes.byteLength
|
|
949
|
+
);
|
|
950
|
+
const int16 = new Int16Array(aligned);
|
|
951
|
+
const buf = this.audioCtx.createBuffer(
|
|
952
|
+
CHANNELS,
|
|
953
|
+
int16.length,
|
|
954
|
+
STREAM_SAMPLE_RATE
|
|
955
|
+
);
|
|
956
|
+
const data = buf.getChannelData(0);
|
|
957
|
+
for (let i = 0; i < int16.length; i++)
|
|
958
|
+
data[i] = int16[i] / 32768;
|
|
959
|
+
this.scheduleBuffer(buf);
|
|
960
|
+
}
|
|
961
|
+
};
|
|
962
|
+
const endChunkStream = () => {
|
|
963
|
+
if (streamEnded)
|
|
964
|
+
return;
|
|
965
|
+
streamEnded = true;
|
|
966
|
+
if (onComplete) {
|
|
967
|
+
if (this.activeSources.size === 0)
|
|
968
|
+
onComplete();
|
|
969
|
+
else {
|
|
970
|
+
const last = Array.from(this.activeSources).pop();
|
|
971
|
+
if (last) {
|
|
972
|
+
const prev = last.onended;
|
|
973
|
+
last.onended = (e) => {
|
|
974
|
+
if (prev)
|
|
975
|
+
prev.call(last, e);
|
|
976
|
+
onComplete();
|
|
977
|
+
};
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
};
|
|
982
|
+
return { addChunkToStream, endChunkStream };
|
|
983
|
+
}
|
|
984
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
985
|
+
// Buffer scheduling helpers
|
|
986
|
+
// ─────────────────────────────────────────────────────────────────────
|
|
987
|
+
scheduleBuffer(buf) {
|
|
988
|
+
if (this.nextPlayTime < this.audioCtx.currentTime + SCHED_TOLERANCE) {
|
|
989
|
+
this.nextPlayTime = this.audioCtx.currentTime + SCHED_TOLERANCE;
|
|
990
|
+
}
|
|
991
|
+
this.createSource(buf, this.nextPlayTime);
|
|
992
|
+
this.nextPlayTime += buf.duration;
|
|
993
|
+
}
|
|
994
|
+
createSource(buf, when) {
|
|
995
|
+
const src = this.audioCtx.createBufferSource();
|
|
996
|
+
src.buffer = buf;
|
|
997
|
+
src.connect(this.gain);
|
|
998
|
+
src.start(when);
|
|
999
|
+
this.activeSources.add(src);
|
|
1000
|
+
src.onended = () => {
|
|
1001
|
+
this.activeSources.delete(src);
|
|
1002
|
+
};
|
|
1003
|
+
return src;
|
|
1004
|
+
}
|
|
1005
|
+
resetScheduler() {
|
|
1006
|
+
this.nextPlayTime = this.audioCtx.currentTime;
|
|
1007
|
+
}
|
|
1008
|
+
// ─── External resource helpers ───────────────────────────────────────
|
|
1009
|
+
sourceToArrayBuffer(src) {
|
|
1010
|
+
return typeof src === "string" ? fetch(src).then((r) => {
|
|
1011
|
+
if (!r.ok)
|
|
1012
|
+
throw new Error(`${r.status}`);
|
|
1013
|
+
return r.arrayBuffer();
|
|
1014
|
+
}) : src.arrayBuffer();
|
|
1015
|
+
}
|
|
1016
|
+
decode(buf) {
|
|
1017
|
+
return new Promise(
|
|
1018
|
+
(res, rej) => this.audioCtx.decodeAudioData(buf, res, rej)
|
|
1019
|
+
);
|
|
1020
|
+
}
|
|
1021
|
+
// ─── Lifecycle methods ───────────────────────────────────────────────
|
|
1022
|
+
async stopPlayback() {
|
|
1023
|
+
for (const src of this.activeSources) {
|
|
1024
|
+
try {
|
|
1025
|
+
src.stop();
|
|
1026
|
+
} catch {
|
|
1027
|
+
}
|
|
1028
|
+
src.disconnect();
|
|
1029
|
+
}
|
|
1030
|
+
this.activeSources.clear();
|
|
1031
|
+
this.resetScheduler();
|
|
1032
|
+
}
|
|
1033
|
+
cleanup() {
|
|
1034
|
+
this.stopPlayback();
|
|
1035
|
+
if (this.audioCtx.state !== "closed")
|
|
1036
|
+
this.audioCtx.close();
|
|
1037
|
+
}
|
|
1038
|
+
// ─── Autoplay‑policy helper ──────────────────────────────────────────
|
|
1039
|
+
async ensureContextRunning() {
|
|
1040
|
+
if (this.audioCtx.state !== "suspended")
|
|
1041
|
+
return;
|
|
1042
|
+
try {
|
|
1043
|
+
await this.audioCtx.resume();
|
|
1044
|
+
} catch {
|
|
1045
|
+
}
|
|
1046
|
+
if (this.audioCtx.state === "running")
|
|
1047
|
+
return;
|
|
1048
|
+
if (!this.userGestureHookAttached) {
|
|
1049
|
+
this.userGestureHookAttached = true;
|
|
1050
|
+
const resume = async () => {
|
|
1051
|
+
try {
|
|
1052
|
+
await this.audioCtx.resume();
|
|
1053
|
+
} catch {
|
|
1054
|
+
}
|
|
1055
|
+
if (this.audioCtx.state === "running")
|
|
1056
|
+
document.removeEventListener("click", resume);
|
|
1057
|
+
};
|
|
1058
|
+
document.addEventListener("click", resume);
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
};
|
|
1062
|
+
|
|
1063
|
+
// src/react/hooks/use-conversation/useSocketConversation.ts
|
|
1064
|
+
Logger.enableGlobalLogging();
|
|
1065
|
+
function useSocketConversation({
|
|
1066
|
+
scope,
|
|
1067
|
+
onStartRecording,
|
|
1068
|
+
onStopRecording,
|
|
1069
|
+
onReceive,
|
|
1070
|
+
upstreamMode = "STREAM_WHILE_TALK",
|
|
1071
|
+
onError,
|
|
1072
|
+
audioConfig = {},
|
|
1073
|
+
socketConfig = {}
|
|
1074
|
+
}) {
|
|
1075
|
+
const { current: logger } = useRef(
|
|
1076
|
+
new Logger("SuTr > useSocketConversation")
|
|
1077
|
+
);
|
|
1078
|
+
const inputAudioControllerRef = useRef(void 0);
|
|
1079
|
+
const outputAudioControllerRef = useRef(
|
|
1080
|
+
void 0
|
|
1081
|
+
);
|
|
1082
|
+
const socketAdapterRef = useRef(void 0);
|
|
1083
|
+
const [socket, setSocket] = useState(null);
|
|
1084
|
+
const [voiceAgentState, setVoiceAgentState] = useState("READY");
|
|
1085
|
+
const [error, setError] = useState(null);
|
|
1086
|
+
const shouldStreamWhileTalk = upstreamMode === "STREAM_WHILE_TALK";
|
|
1087
|
+
const handleError = useCallback(
|
|
1088
|
+
(state, err) => {
|
|
1089
|
+
setError(err);
|
|
1090
|
+
logger.error(`Error during ${state}:`, err);
|
|
1091
|
+
onError?.(state, err);
|
|
1092
|
+
},
|
|
1093
|
+
[onError]
|
|
1094
|
+
);
|
|
1095
|
+
const subscribeToSocketEventsForChunkDownstreaming = useCallback(
|
|
1096
|
+
async (socketAdapter) => {
|
|
1097
|
+
logger.debug("Setting up audio stream for receiving chunks");
|
|
1098
|
+
try {
|
|
1099
|
+
const { addChunkToStream, endChunkStream } = await outputAudioControllerRef.current.initializeChunkStream({
|
|
1100
|
+
mimeCodec: "audio/mpeg",
|
|
1101
|
+
onComplete: () => {
|
|
1102
|
+
logger.debug("Audio stream playback completed");
|
|
1103
|
+
setVoiceAgentState("READY");
|
|
1104
|
+
}
|
|
1105
|
+
});
|
|
1106
|
+
let chunkCount = 0;
|
|
1107
|
+
const chunkReceivedEmitter = async (chunk) => {
|
|
1108
|
+
if (chunk instanceof ArrayBuffer) {
|
|
1109
|
+
chunkCount++;
|
|
1110
|
+
logger.debug(
|
|
1111
|
+
`Received voice chunk #${chunkCount} from socket, size: ${chunk.byteLength} bytes`
|
|
1112
|
+
);
|
|
1113
|
+
if (!chunk || chunk.byteLength === 0) {
|
|
1114
|
+
logger.warn("Received empty chunk, skipping");
|
|
1115
|
+
return;
|
|
1116
|
+
}
|
|
1117
|
+
try {
|
|
1118
|
+
await addChunkToStream(chunk);
|
|
1119
|
+
logger.debug(
|
|
1120
|
+
`Successfully added chunk #${chunkCount} to audio stream`
|
|
1121
|
+
);
|
|
1122
|
+
} catch (err) {
|
|
1123
|
+
logger.error(
|
|
1124
|
+
`Failed to add chunk #${chunkCount} to audio stream`,
|
|
1125
|
+
err
|
|
1126
|
+
);
|
|
1127
|
+
if (err instanceof Error) {
|
|
1128
|
+
handleError("DOWNSTREAMING", err);
|
|
1129
|
+
}
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
};
|
|
1133
|
+
socketAdapter.on("chunk-received", chunkReceivedEmitter);
|
|
1134
|
+
const endOfStreamEmitter = () => {
|
|
1135
|
+
logger.debug(
|
|
1136
|
+
`Received end of stream signal after ${chunkCount} chunks, ending chunk stream`
|
|
1137
|
+
);
|
|
1138
|
+
endChunkStream();
|
|
1139
|
+
setVoiceAgentState("READY");
|
|
1140
|
+
};
|
|
1141
|
+
socketAdapter.on("received-end-of-response-stream", endOfStreamEmitter);
|
|
1142
|
+
return () => {
|
|
1143
|
+
logger.debug("Cleaning up socket event listeners");
|
|
1144
|
+
socketAdapter.off("chunk-received", chunkReceivedEmitter);
|
|
1145
|
+
socketAdapter.off(
|
|
1146
|
+
"received-end-of-response-stream",
|
|
1147
|
+
endOfStreamEmitter
|
|
1148
|
+
);
|
|
1149
|
+
endChunkStream();
|
|
1150
|
+
};
|
|
1151
|
+
} catch (err) {
|
|
1152
|
+
if (err instanceof Error) {
|
|
1153
|
+
handleError("DOWNSTREAMING", err);
|
|
1154
|
+
}
|
|
1155
|
+
return () => {
|
|
1156
|
+
};
|
|
1157
|
+
}
|
|
1158
|
+
},
|
|
1159
|
+
[handleError]
|
|
1160
|
+
);
|
|
1161
|
+
const hookupSocketAdapter = useCallback(
|
|
1162
|
+
async (socketAdapter) => {
|
|
1163
|
+
logger.debug("Connecting to socket...");
|
|
1164
|
+
try {
|
|
1165
|
+
await socketAdapter.connect();
|
|
1166
|
+
socketAdapter.on("connect", () => {
|
|
1167
|
+
logger.debug("Socket adapter connected");
|
|
1168
|
+
setVoiceAgentState("READY");
|
|
1169
|
+
});
|
|
1170
|
+
socketAdapter.on("disconnect", () => {
|
|
1171
|
+
logger.debug("Socket adapter disconnected");
|
|
1172
|
+
});
|
|
1173
|
+
socketAdapter.on("error", (err) => {
|
|
1174
|
+
if (err instanceof Error) {
|
|
1175
|
+
handleError(voiceAgentState, err);
|
|
1176
|
+
} else {
|
|
1177
|
+
handleError(voiceAgentState, new Error("Unknown error"));
|
|
1178
|
+
}
|
|
1179
|
+
});
|
|
1180
|
+
setSocket(socketAdapter.exposeSocket());
|
|
1181
|
+
} catch (err) {
|
|
1182
|
+
if (err instanceof Error) {
|
|
1183
|
+
handleError("READY", err);
|
|
1184
|
+
}
|
|
1185
|
+
}
|
|
1186
|
+
},
|
|
1187
|
+
[handleError, voiceAgentState]
|
|
1188
|
+
);
|
|
1189
|
+
const startRecording = useCallback(() => {
|
|
1190
|
+
if (inputAudioControllerRef.current) {
|
|
1191
|
+
try {
|
|
1192
|
+
logger.debug("Starting recording");
|
|
1193
|
+
setVoiceAgentState("RECORDING");
|
|
1194
|
+
inputAudioControllerRef.current.startRecording({
|
|
1195
|
+
onRecordedChunk: async (chunk) => {
|
|
1196
|
+
if (shouldStreamWhileTalk) {
|
|
1197
|
+
try {
|
|
1198
|
+
await socketAdapterRef.current?.sendVoiceChunk(chunk);
|
|
1199
|
+
} catch (err) {
|
|
1200
|
+
if (err instanceof Error) {
|
|
1201
|
+
handleError("RECORDING", err);
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
});
|
|
1207
|
+
onStartRecording?.();
|
|
1208
|
+
} catch (err) {
|
|
1209
|
+
if (err instanceof Error) {
|
|
1210
|
+
handleError("RECORDING", err);
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
}, [onStartRecording, shouldStreamWhileTalk, handleError]);
|
|
1215
|
+
const stopRecording = useCallback(async () => {
|
|
1216
|
+
if (inputAudioControllerRef.current) {
|
|
1217
|
+
try {
|
|
1218
|
+
logger.debug("Stopping recording");
|
|
1219
|
+
await inputAudioControllerRef.current.stopRecording({
|
|
1220
|
+
onRecordingCompleted: async (allData) => {
|
|
1221
|
+
setVoiceAgentState("PROCESSING");
|
|
1222
|
+
try {
|
|
1223
|
+
if (shouldStreamWhileTalk) {
|
|
1224
|
+
logger.debug("Committing voice message");
|
|
1225
|
+
await socketAdapterRef.current?.commitVoiceMessage();
|
|
1226
|
+
} else {
|
|
1227
|
+
await socketAdapterRef.current?.sendVoiceFile(allData);
|
|
1228
|
+
}
|
|
1229
|
+
setVoiceAgentState("DOWNSTREAMING");
|
|
1230
|
+
await subscribeToSocketEventsForChunkDownstreaming(
|
|
1231
|
+
socketAdapterRef.current
|
|
1232
|
+
);
|
|
1233
|
+
onReceive?.(
|
|
1234
|
+
allData,
|
|
1235
|
+
async () => {
|
|
1236
|
+
if (outputAudioControllerRef.current) {
|
|
1237
|
+
return outputAudioControllerRef.current.stopPlayback();
|
|
1238
|
+
}
|
|
1239
|
+
},
|
|
1240
|
+
async () => {
|
|
1241
|
+
if (outputAudioControllerRef.current) {
|
|
1242
|
+
return outputAudioControllerRef.current.stopPlayback();
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
);
|
|
1246
|
+
} catch (err) {
|
|
1247
|
+
if (err instanceof Error) {
|
|
1248
|
+
handleError("PROCESSING", err);
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
}
|
|
1252
|
+
});
|
|
1253
|
+
onStopRecording?.();
|
|
1254
|
+
} catch (err) {
|
|
1255
|
+
if (err instanceof Error) {
|
|
1256
|
+
handleError("RECORDING", err);
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
}, [
|
|
1261
|
+
onStopRecording,
|
|
1262
|
+
handleError,
|
|
1263
|
+
subscribeToSocketEventsForChunkDownstreaming,
|
|
1264
|
+
onReceive
|
|
1265
|
+
]);
|
|
1266
|
+
useEffect(() => {
|
|
1267
|
+
if (socketAdapterRef.current) {
|
|
1268
|
+
return;
|
|
1269
|
+
}
|
|
1270
|
+
try {
|
|
1271
|
+
const socketAdapter = socketConfig.socketAdapter ? socketConfig.socketAdapter : new VoiceSocketIOAdapter({
|
|
1272
|
+
scope,
|
|
1273
|
+
baseUrl: socketConfig.baseUrl || "",
|
|
1274
|
+
headers: socketConfig.headers
|
|
1275
|
+
});
|
|
1276
|
+
socketAdapterRef.current = socketAdapter;
|
|
1277
|
+
if (!socketAdapter.isConnected()) {
|
|
1278
|
+
hookupSocketAdapter(socketAdapter);
|
|
1279
|
+
}
|
|
1280
|
+
if (!inputAudioControllerRef.current) {
|
|
1281
|
+
inputAudioControllerRef.current = new WebAudioInputAudioController(
|
|
1282
|
+
audioConfig
|
|
1283
|
+
);
|
|
1284
|
+
}
|
|
1285
|
+
if (!outputAudioControllerRef.current) {
|
|
1286
|
+
outputAudioControllerRef.current = new WebAudioOutputAudioController();
|
|
1287
|
+
}
|
|
1288
|
+
} catch (err) {
|
|
1289
|
+
if (err instanceof Error) {
|
|
1290
|
+
handleError("READY", err);
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
}, [scope, socketConfig, hookupSocketAdapter, audioConfig, handleError]);
|
|
1294
|
+
useEffect(() => {
|
|
1295
|
+
return () => {
|
|
1296
|
+
inputAudioControllerRef.current?.cleanup();
|
|
1297
|
+
outputAudioControllerRef.current?.cleanup();
|
|
1298
|
+
if (socketAdapterRef.current) {
|
|
1299
|
+
socketAdapterRef.current.disconnect();
|
|
1300
|
+
socketAdapterRef.current = void 0;
|
|
1301
|
+
}
|
|
1302
|
+
};
|
|
1303
|
+
}, []);
|
|
1304
|
+
return {
|
|
1305
|
+
startRecording,
|
|
1306
|
+
stopRecording,
|
|
1307
|
+
voiceAgentState,
|
|
1308
|
+
error,
|
|
1309
|
+
audioContext: inputAudioControllerRef.current?.audioContext || null,
|
|
1310
|
+
socket
|
|
1311
|
+
};
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
export { BaseVoiceEndpointAdapter, Emitter, InputAudioController, VoiceEndpointAdapter, VoiceSocketAdapter, useConversation, useSocketConversation };
|
|
1315
|
+
//# sourceMappingURL=out.js.map
|
|
1316
|
+
//# sourceMappingURL=index.js.map
|