@m4trix/core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1316 @@
1
+ import { useRef, useState, useCallback, useEffect } from 'react';
2
+ import { io } from 'socket.io-client';
3
+
4
+ // src/react/hooks/use-conversation/useConversation.ts
5
+
6
+ // src/utility/Logger.ts
7
+ var _Logger = class _Logger {
8
+ constructor(namespace = "") {
9
+ this.namespace = namespace;
10
+ }
11
+ static enableGlobalLogging() {
12
+ _Logger.globalEnabled = true;
13
+ }
14
+ static disableGlobalLogging() {
15
+ _Logger.globalEnabled = false;
16
+ }
17
+ formatPrefix() {
18
+ return this.namespace ? `[${this.namespace}]` : "";
19
+ }
20
+ logIfEnabled(level, ...args) {
21
+ if (!_Logger.globalEnabled)
22
+ return;
23
+ const prefix = this.formatPrefix();
24
+ if (prefix) {
25
+ console[level](prefix, ...args);
26
+ } else {
27
+ console[level](...args);
28
+ }
29
+ }
30
+ log(...args) {
31
+ this.logIfEnabled("log", ...args);
32
+ }
33
+ debug(...args) {
34
+ this.logIfEnabled("debug", ...args);
35
+ }
36
+ info(...args) {
37
+ this.logIfEnabled("info", ...args);
38
+ }
39
+ warn(...args) {
40
+ this.logIfEnabled("warn", ...args);
41
+ }
42
+ error(...args) {
43
+ this.logIfEnabled("error", ...args);
44
+ }
45
+ };
46
+ _Logger.globalEnabled = false;
47
+ var Logger = _Logger;
48
+
49
+ // src/react/adapter/VoiceEndpointAdapter.ts
50
+ var VoiceEndpointAdapter = class {
51
+ constructor(config) {
52
+ this.logger = new Logger("SuTr > EndpointAdapter");
53
+ this.config = config;
54
+ }
55
+ };
56
+ var BaseVoiceEndpointAdapter = class extends VoiceEndpointAdapter {
57
+ constructor(config) {
58
+ super(config);
59
+ }
60
+ /**
61
+ * Send a voice file to the API endpoint and return a Pump stream of audio chunks
62
+ */
63
+ async sendVoiceFile({
64
+ blob,
65
+ metadata
66
+ }) {
67
+ const formData = new FormData();
68
+ formData.append("audio", blob);
69
+ if (metadata) {
70
+ formData.append("metadata", JSON.stringify(metadata));
71
+ }
72
+ this.logger.debug("Sending voice file to", this.config.endpoint, formData);
73
+ const response = await fetch(
74
+ `${this.config.baseUrl || ""}${this.config.endpoint}`,
75
+ {
76
+ method: "POST",
77
+ headers: this.config.headers,
78
+ body: formData
79
+ }
80
+ );
81
+ if (!response.ok) {
82
+ throw new Error(`API error: ${response.status} ${await response.text()}`);
83
+ }
84
+ if (!response.body) {
85
+ throw new Error("No response body");
86
+ }
87
+ return response;
88
+ }
89
+ };
90
+
91
+ // src/react/utility/audio/InputAudioController.ts
92
+ var InputAudioController = class {
93
+ constructor() {
94
+ this.logger = new Logger("@m4trix/core > InputAudioController");
95
+ }
96
+ };
97
+
98
+ // src/react/utility/audio/WebAudioInputAudioController.ts
99
+ var DEFAULT_SLICING_INTERVAL = 3e3;
100
+ var WebAudioInputAudioController = class extends InputAudioController {
101
+ constructor(audioConfig = {}) {
102
+ super();
103
+ this.audioConfig = audioConfig;
104
+ // ─── Recording state ─────────────────────────────────────────────────────
105
+ this.audioContextState = {
106
+ context: null,
107
+ source: null,
108
+ analyser: null
109
+ };
110
+ this.mediaRecorder = null;
111
+ this.recordedChunks = [];
112
+ this.recordingStream = null;
113
+ }
114
+ get audioContext() {
115
+ return this.audioContextState.context;
116
+ }
117
+ async createAudioContext() {
118
+ const context = new AudioContext({
119
+ sampleRate: this.audioConfig.sampleRate || 16e3,
120
+ latencyHint: "interactive"
121
+ });
122
+ const analyser = context.createAnalyser();
123
+ analyser.fftSize = 2048;
124
+ return { context, source: null, analyser };
125
+ }
126
+ async cleanupAudioContext() {
127
+ this.logger.debug("Cleaning up audio context");
128
+ const { source, context } = this.audioContextState;
129
+ if (source)
130
+ source.disconnect();
131
+ if (context)
132
+ await context.close();
133
+ this.audioContextState = { context: null, source: null, analyser: null };
134
+ }
135
+ async startRecording({
136
+ onRecordedChunk,
137
+ onError
138
+ } = {}) {
139
+ try {
140
+ this.logger.debug("Starting recording");
141
+ this.recordedChunks = [];
142
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
143
+ this.recordingStream = stream;
144
+ if (!this.audioContextState.context) {
145
+ this.audioContextState = await this.createAudioContext();
146
+ }
147
+ this.mediaRecorder = new MediaRecorder(stream, {
148
+ mimeType: "audio/webm;codecs=opus"
149
+ });
150
+ this.mediaRecorder.ondataavailable = (e) => {
151
+ if (e.data.size > 0) {
152
+ this.recordedChunks.push(e.data);
153
+ onRecordedChunk?.(e.data);
154
+ this.logger.debug("Recorded chunk", e.data.size);
155
+ }
156
+ };
157
+ this.mediaRecorder.start(DEFAULT_SLICING_INTERVAL);
158
+ this.logger.debug("MediaRecorder started");
159
+ } catch (err) {
160
+ const error = err instanceof Error ? err : new Error("Failed to start recording");
161
+ this.logger.error(error);
162
+ onError?.(error);
163
+ }
164
+ }
165
+ async stopRecording({
166
+ onRecordingCompleted
167
+ } = {}) {
168
+ this.logger.debug("Stopping recording");
169
+ if (!this.mediaRecorder || this.mediaRecorder.state === "inactive")
170
+ return;
171
+ await new Promise((resolve) => {
172
+ this.mediaRecorder.onstop = async () => {
173
+ if (this.recordedChunks.length) {
174
+ const blob = new Blob(this.recordedChunks, { type: "audio/webm" });
175
+ onRecordingCompleted?.(blob);
176
+ this.logger.debug("Recording completed", blob.size);
177
+ }
178
+ this.recordingStream?.getTracks().forEach((t) => t.stop());
179
+ this.recordingStream = null;
180
+ await this.cleanupAudioContext();
181
+ resolve();
182
+ };
183
+ this.mediaRecorder.stop();
184
+ });
185
+ }
186
+ /**
187
+ * Cleans up all audio recording resources.
188
+ */
189
+ cleanup() {
190
+ this.cleanupAudioContext();
191
+ if (this.mediaRecorder && this.mediaRecorder.state !== "inactive") {
192
+ this.mediaRecorder.stop();
193
+ }
194
+ if (this.recordingStream) {
195
+ this.recordingStream.getTracks().forEach((t) => t.stop());
196
+ this.recordingStream = null;
197
+ }
198
+ }
199
+ };
200
+
201
+ // src/react/utility/audio/OutputAudioController.ts
202
+ var OutputAudioController = class {
203
+ constructor(loggerName) {
204
+ this.logger = new Logger(loggerName);
205
+ }
206
+ };
207
+
208
+ // src/react/utility/audio/AudioElementOutputAudioController.ts
209
+ var AudioElementOutputAudioController = class extends OutputAudioController {
210
+ constructor() {
211
+ super("@m4trix/core > WebApiOutputAudioController");
212
+ // ─── Playback state ──────────────────────────────────────────────────────
213
+ this.currentHtmlAudio = null;
214
+ this.currentAudioUrl = null;
215
+ }
216
+ // ─── One-shot playback ────────────────────────────────────────────────────
217
+ /**
218
+ * Play either a Blob or a URL string.
219
+ * Uses <audio> under the hood for maximum browser compatibility.
220
+ */
221
+ async playAudio({
222
+ source,
223
+ onComplete
224
+ }) {
225
+ if (this.currentHtmlAudio) {
226
+ this.currentHtmlAudio.pause();
227
+ this.currentHtmlAudio.src = "";
228
+ if (this.currentAudioUrl && source instanceof Blob) {
229
+ URL.revokeObjectURL(this.currentAudioUrl);
230
+ }
231
+ }
232
+ const audio = new Audio();
233
+ this.currentHtmlAudio = audio;
234
+ let url;
235
+ if (source instanceof Blob) {
236
+ url = URL.createObjectURL(source);
237
+ this.currentAudioUrl = url;
238
+ audio.onended = () => {
239
+ URL.revokeObjectURL(url);
240
+ onComplete?.();
241
+ };
242
+ } else {
243
+ url = source;
244
+ }
245
+ audio.src = url;
246
+ try {
247
+ await audio.play();
248
+ } catch (err) {
249
+ this.logger.error("Playback failed, user gesture may be required", err);
250
+ }
251
+ }
252
+ // ─── Streaming playback ──────────────────────────────────────────────────
253
+ /**
254
+ * Stream audio from a Response via MediaSource Extensions.
255
+ * @param params.response The fetch Response whose body is an audio stream
256
+ * @param params.mimeCodec MIME type+codec string, e.g. 'audio/mpeg'
257
+ * @param params.onComplete Optional callback once the stream ends
258
+ */
259
+ async playAudioStream({
260
+ response,
261
+ mimeCodec = "audio/mpeg",
262
+ onComplete
263
+ }) {
264
+ if (!response.ok || !response.body) {
265
+ throw new Error(`Invalid response (${response.status})`);
266
+ }
267
+ if (typeof MediaSource === "undefined" || !MediaSource.isTypeSupported(mimeCodec)) {
268
+ throw new Error(`Unsupported MIME type or codec: ${mimeCodec}`);
269
+ }
270
+ await this.stopPlayback();
271
+ const mediaSource = new MediaSource();
272
+ const url = URL.createObjectURL(mediaSource);
273
+ this.currentAudioUrl = url;
274
+ const audio = new Audio(url);
275
+ this.currentHtmlAudio = audio;
276
+ audio.autoplay = true;
277
+ audio.onended = () => {
278
+ URL.revokeObjectURL(url);
279
+ this.currentAudioUrl = null;
280
+ onComplete?.();
281
+ };
282
+ mediaSource.addEventListener(
283
+ "sourceopen",
284
+ () => {
285
+ const sourceBuffer = mediaSource.addSourceBuffer(mimeCodec);
286
+ const reader = response.body.getReader();
287
+ const pump = async () => {
288
+ const { done, value } = await reader.read();
289
+ if (done) {
290
+ mediaSource.endOfStream();
291
+ return;
292
+ }
293
+ if (value) {
294
+ sourceBuffer.appendBuffer(value);
295
+ }
296
+ if (sourceBuffer.updating) {
297
+ sourceBuffer.addEventListener("updateend", pump, { once: true });
298
+ } else {
299
+ pump();
300
+ }
301
+ };
302
+ pump();
303
+ },
304
+ { once: true }
305
+ );
306
+ try {
307
+ await audio.play();
308
+ } catch (err) {
309
+ this.logger.error(
310
+ "Streaming playback failed, user gesture may be required",
311
+ err
312
+ );
313
+ }
314
+ }
315
+ // ─── Chunk-based streaming playback ─────────────────────────────────────
316
+ /**
317
+ * Initialize a streaming audio context for chunk-based playback.
318
+ * This creates the necessary MediaSource and SourceBuffer for subsequent chunk additions.
319
+ * Returns functions to add chunks and end the stream, encapsulated in a closure.
320
+ *
321
+ * @param mimeCodec MIME type+codec string, e.g. 'audio/mpeg'
322
+ * @param onComplete Optional callback once the stream ends
323
+ * @returns Object containing functions to add chunks and end the stream
324
+ */
325
+ async initializeChunkStream({
326
+ onComplete,
327
+ mimeCodec = "audio/mpeg"
328
+ }) {
329
+ this.logger.debug(`Initializing chunk stream with codec: ${mimeCodec}`);
330
+ if (typeof MediaSource === "undefined") {
331
+ throw new Error("MediaSource API is not supported in this browser");
332
+ }
333
+ if (!MediaSource.isTypeSupported(mimeCodec)) {
334
+ this.logger.warn(
335
+ `Codec ${mimeCodec} not supported, falling back to standard audio/mpeg`
336
+ );
337
+ mimeCodec = "audio/mpeg";
338
+ if (!MediaSource.isTypeSupported(mimeCodec)) {
339
+ throw new Error(
340
+ "Neither the specified codec nor the fallback codec are supported"
341
+ );
342
+ }
343
+ }
344
+ await this.stopPlayback();
345
+ const mediaSource = new MediaSource();
346
+ let sourceBuffer = null;
347
+ const url = URL.createObjectURL(mediaSource);
348
+ this.currentAudioUrl = url;
349
+ const audio = new Audio(url);
350
+ this.currentHtmlAudio = audio;
351
+ audio.autoplay = false;
352
+ audio.controls = true;
353
+ audio.style.display = "none";
354
+ document.body.appendChild(audio);
355
+ let playbackStarted = false;
356
+ let hasReceivedFirstChunk = false;
357
+ let receivedChunksCount = 0;
358
+ const pendingChunks = [];
359
+ let isProcessingQueue = false;
360
+ this.logger.debug("Waiting for MediaSource to open...");
361
+ await new Promise((resolve, reject) => {
362
+ const timeout = setTimeout(() => {
363
+ reject(new Error("MediaSource failed to open (timeout)"));
364
+ }, 5e3);
365
+ mediaSource.addEventListener(
366
+ "sourceopen",
367
+ () => {
368
+ clearTimeout(timeout);
369
+ this.logger.debug("MediaSource open event received");
370
+ try {
371
+ sourceBuffer = mediaSource.addSourceBuffer(mimeCodec);
372
+ if (mediaSource.duration === Infinity || isNaN(mediaSource.duration)) {
373
+ mediaSource.duration = 1e3;
374
+ }
375
+ this.logger.debug("SourceBuffer created successfully");
376
+ resolve();
377
+ } catch (err) {
378
+ reject(new Error(`Failed to create SourceBuffer: ${err}`));
379
+ }
380
+ },
381
+ { once: true }
382
+ );
383
+ });
384
+ const logger = this.logger;
385
+ const processQueue = async () => {
386
+ if (!sourceBuffer || pendingChunks.length === 0 || isProcessingQueue) {
387
+ return;
388
+ }
389
+ isProcessingQueue = true;
390
+ try {
391
+ while (pendingChunks.length > 0) {
392
+ if (sourceBuffer.updating) {
393
+ await new Promise((resolve) => {
394
+ sourceBuffer.addEventListener("updateend", () => resolve(), {
395
+ once: true
396
+ });
397
+ });
398
+ }
399
+ const nextChunk = pendingChunks.shift();
400
+ if (!nextChunk)
401
+ continue;
402
+ try {
403
+ sourceBuffer.appendBuffer(nextChunk);
404
+ logger.debug(
405
+ `Processed queued chunk of size ${nextChunk.byteLength}`
406
+ );
407
+ if (!playbackStarted && hasReceivedFirstChunk) {
408
+ await tryStartPlayback();
409
+ }
410
+ await new Promise((resolve) => {
411
+ sourceBuffer.addEventListener("updateend", () => resolve(), {
412
+ once: true
413
+ });
414
+ });
415
+ } catch (err) {
416
+ logger.error("Error appending queued chunk to source buffer", err);
417
+ }
418
+ }
419
+ } finally {
420
+ isProcessingQueue = false;
421
+ }
422
+ };
423
+ const tryStartPlayback = async () => {
424
+ if (playbackStarted)
425
+ return;
426
+ playbackStarted = true;
427
+ logger.debug("Attempting to start audio playback...");
428
+ if (receivedChunksCount < 3 && audio.buffered.length > 0 && audio.buffered.end(0) < 0.5) {
429
+ logger.debug("Not enough data buffered yet, delaying playback");
430
+ return;
431
+ }
432
+ try {
433
+ if (audio.readyState === 0) {
434
+ logger.debug(
435
+ "Audio element not ready yet, waiting for canplay event"
436
+ );
437
+ await new Promise((resolve) => {
438
+ audio.addEventListener("canplay", () => resolve(), { once: true });
439
+ });
440
+ }
441
+ await audio.play();
442
+ logger.debug("Successfully started audio playback");
443
+ } catch (err) {
444
+ logger.error("Failed to start playback", err);
445
+ document.addEventListener(
446
+ "click",
447
+ async () => {
448
+ try {
449
+ await audio.play();
450
+ logger.debug("Started playback after user interaction");
451
+ } catch (innerErr) {
452
+ logger.error(
453
+ "Still failed to play after user interaction",
454
+ innerErr
455
+ );
456
+ }
457
+ },
458
+ { once: true }
459
+ );
460
+ }
461
+ };
462
+ const addChunkToStream = async (chunk) => {
463
+ if (!sourceBuffer) {
464
+ throw new Error(
465
+ "Streaming context was closed or not properly initialized."
466
+ );
467
+ }
468
+ let arrayBufferChunk;
469
+ if (chunk instanceof Blob) {
470
+ logger.debug("Converting Blob to ArrayBuffer");
471
+ arrayBufferChunk = await chunk.arrayBuffer();
472
+ } else {
473
+ arrayBufferChunk = chunk;
474
+ }
475
+ if (!arrayBufferChunk || arrayBufferChunk.byteLength === 0) {
476
+ logger.warn("Received empty chunk, skipping");
477
+ return;
478
+ }
479
+ if (!hasReceivedFirstChunk) {
480
+ hasReceivedFirstChunk = true;
481
+ logger.debug(
482
+ `First chunk received, size: ${arrayBufferChunk.byteLength} bytes`
483
+ );
484
+ }
485
+ receivedChunksCount++;
486
+ pendingChunks.push(arrayBufferChunk);
487
+ logger.debug(
488
+ `Added chunk #${receivedChunksCount} to queue (size: ${arrayBufferChunk.byteLength} bytes)`
489
+ );
490
+ await processQueue();
491
+ if (!playbackStarted && hasReceivedFirstChunk && receivedChunksCount >= 3) {
492
+ await tryStartPlayback();
493
+ }
494
+ };
495
+ const endChunkStream = () => {
496
+ if (mediaSource && mediaSource.readyState === "open") {
497
+ try {
498
+ if (pendingChunks.length > 0 || sourceBuffer && sourceBuffer.updating) {
499
+ logger.debug("Waiting for pending chunks before ending stream");
500
+ setTimeout(() => endChunkStream(), 200);
501
+ return;
502
+ }
503
+ if (hasReceivedFirstChunk) {
504
+ mediaSource.endOfStream();
505
+ logger.debug("MediaSource stream ended successfully");
506
+ } else {
507
+ logger.warn("Stream ended without receiving any chunks");
508
+ }
509
+ } catch (err) {
510
+ logger.error("Error ending MediaSource stream", err);
511
+ }
512
+ }
513
+ audio.onended = null;
514
+ if (audio.parentNode) {
515
+ audio.parentNode.removeChild(audio);
516
+ }
517
+ if (this.currentAudioUrl === url) {
518
+ this.currentAudioUrl = null;
519
+ URL.revokeObjectURL(url);
520
+ }
521
+ sourceBuffer = null;
522
+ };
523
+ audio.onended = () => {
524
+ logger.debug("Audio playback completed");
525
+ endChunkStream();
526
+ onComplete?.();
527
+ };
528
+ return {
529
+ addChunkToStream,
530
+ endChunkStream
531
+ };
532
+ }
533
+ /**
534
+ * Stop any ongoing HTMLAudioElement playback.
535
+ */
536
+ async stopPlayback() {
537
+ if (this.currentHtmlAudio) {
538
+ try {
539
+ this.currentHtmlAudio.pause();
540
+ this.currentHtmlAudio.src = "";
541
+ } catch (err) {
542
+ this.logger.error("Error stopping playback", err);
543
+ }
544
+ this.currentHtmlAudio = null;
545
+ }
546
+ if (this.currentAudioUrl) {
547
+ URL.revokeObjectURL(this.currentAudioUrl);
548
+ this.currentAudioUrl = null;
549
+ }
550
+ }
551
+ /**
552
+ * Cleans up all audio playback resources.
553
+ */
554
+ cleanup() {
555
+ this.stopPlayback();
556
+ }
557
+ };
558
+
559
+ // src/react/hooks/use-conversation/useConversation.ts
560
+ Logger.enableGlobalLogging();
561
+ function useConversation(endpoint, {
562
+ onStartRecording,
563
+ onStopRecording,
564
+ onReceive,
565
+ autoPlay = true,
566
+ downstreamMode = "STREAM",
567
+ onError,
568
+ audioConfig = {},
569
+ requestData = {},
570
+ endpointConfig = {}
571
+ }) {
572
+ const { current: logger } = useRef(
573
+ new Logger("@m4trix/core > useConversation")
574
+ );
575
+ const inputAudioControllerRef = useRef(void 0);
576
+ const outputAudioControllerRef = useRef(
577
+ void 0
578
+ );
579
+ const endpointAdapterRef = useRef(
580
+ void 0
581
+ );
582
+ const [voiceAgentState, setVoiceAgentState] = useState("READY");
583
+ const [error, setError] = useState(null);
584
+ const handleError = useCallback(
585
+ (state, err) => {
586
+ setError(err);
587
+ logger.error(`Error during ${state}:`, err);
588
+ onError?.(state, err);
589
+ },
590
+ [onError]
591
+ );
592
+ const startRecording = useCallback(() => {
593
+ if (inputAudioControllerRef.current) {
594
+ try {
595
+ logger.debug("Starting recording");
596
+ setVoiceAgentState("RECORDING");
597
+ inputAudioControllerRef.current.startRecording({
598
+ onError: (err) => {
599
+ handleError("RECORDING", err);
600
+ }
601
+ });
602
+ onStartRecording?.();
603
+ } catch (err) {
604
+ if (err instanceof Error) {
605
+ handleError("RECORDING", err);
606
+ }
607
+ }
608
+ }
609
+ }, [onStartRecording, handleError]);
610
+ const stopRecording = useCallback(async () => {
611
+ if (inputAudioControllerRef.current) {
612
+ try {
613
+ logger.debug("Stopping recording");
614
+ await inputAudioControllerRef.current.stopRecording({
615
+ onRecordingCompleted: async (allData) => {
616
+ setVoiceAgentState("PROCESSING");
617
+ try {
618
+ const response = await endpointAdapterRef.current?.sendVoiceFile({
619
+ blob: allData,
620
+ metadata: requestData
621
+ });
622
+ if (!response) {
623
+ throw new Error("No response received from endpoint");
624
+ }
625
+ setVoiceAgentState("RESPONDING");
626
+ if (autoPlay) {
627
+ if (downstreamMode === "STREAM") {
628
+ await outputAudioControllerRef.current?.playAudioStream({
629
+ response,
630
+ onComplete: () => {
631
+ setVoiceAgentState("READY");
632
+ }
633
+ });
634
+ } else if (downstreamMode === "DOWNLOAD") {
635
+ const responseBlob = await response.blob();
636
+ await outputAudioControllerRef.current?.playAudio({
637
+ source: responseBlob,
638
+ onComplete: () => {
639
+ setVoiceAgentState("READY");
640
+ }
641
+ });
642
+ }
643
+ } else {
644
+ setVoiceAgentState("READY");
645
+ }
646
+ onReceive?.(
647
+ allData,
648
+ async () => {
649
+ if (outputAudioControllerRef.current) {
650
+ if (downstreamMode === "STREAM") {
651
+ return outputAudioControllerRef.current.playAudioStream({
652
+ response,
653
+ onComplete: () => {
654
+ setVoiceAgentState("READY");
655
+ }
656
+ });
657
+ } else {
658
+ const responseBlob = await response.blob();
659
+ return outputAudioControllerRef.current.playAudio({
660
+ source: responseBlob,
661
+ onComplete: () => {
662
+ setVoiceAgentState("READY");
663
+ }
664
+ });
665
+ }
666
+ }
667
+ },
668
+ async () => {
669
+ if (outputAudioControllerRef.current) {
670
+ return outputAudioControllerRef.current.stopPlayback();
671
+ }
672
+ }
673
+ );
674
+ } catch (err) {
675
+ if (err instanceof Error) {
676
+ handleError("PROCESSING", err);
677
+ }
678
+ setVoiceAgentState("READY");
679
+ }
680
+ }
681
+ });
682
+ onStopRecording?.();
683
+ } catch (err) {
684
+ if (err instanceof Error) {
685
+ handleError("RECORDING", err);
686
+ }
687
+ }
688
+ }
689
+ }, [
690
+ onStopRecording,
691
+ requestData,
692
+ autoPlay,
693
+ downstreamMode,
694
+ handleError,
695
+ onReceive
696
+ ]);
697
+ useEffect(() => {
698
+ if (endpointAdapterRef.current) {
699
+ return;
700
+ }
701
+ try {
702
+ const endpointAdapter = endpointConfig.endpointAdapter ? endpointConfig.endpointAdapter : new BaseVoiceEndpointAdapter({
703
+ baseUrl: endpointConfig.baseUrl,
704
+ endpoint,
705
+ headers: endpointConfig.headers
706
+ });
707
+ endpointAdapterRef.current = endpointAdapter;
708
+ if (!inputAudioControllerRef.current) {
709
+ inputAudioControllerRef.current = new WebAudioInputAudioController(
710
+ audioConfig
711
+ );
712
+ }
713
+ if (!outputAudioControllerRef.current) {
714
+ outputAudioControllerRef.current = new AudioElementOutputAudioController();
715
+ }
716
+ } catch (err) {
717
+ if (err instanceof Error) {
718
+ handleError("READY", err);
719
+ }
720
+ }
721
+ }, [endpoint, endpointConfig, audioConfig, handleError]);
722
+ useEffect(() => {
723
+ return () => {
724
+ inputAudioControllerRef.current?.cleanup();
725
+ outputAudioControllerRef.current?.cleanup();
726
+ };
727
+ }, []);
728
+ return {
729
+ startRecording,
730
+ stopRecording,
731
+ voiceAgentState,
732
+ error,
733
+ audioContext: inputAudioControllerRef.current?.audioContext || null
734
+ };
735
+ }
736
+
737
+ // src/react/adapter/socket/VoiceSocketAdapter.ts
738
+ var VoiceSocketAdapter = class {
739
+ constructor(config) {
740
+ this._isConnected = false;
741
+ this.logger = new Logger("@m4trix/core > VoiceSocketAdapter");
742
+ this.emitter = new Emitter();
743
+ this.config = config;
744
+ }
745
+ on(event, listener) {
746
+ this.emitter.on(event, listener);
747
+ }
748
+ off(event, listener) {
749
+ this.emitter.off(event, listener);
750
+ }
751
+ once(event, listener) {
752
+ this.emitter.once(event, listener);
753
+ }
754
+ emit(event, data) {
755
+ this.emitter.emit(event, data);
756
+ }
757
+ isConnected() {
758
+ return this._isConnected;
759
+ }
760
+ };
761
+ var Emitter = class {
762
+ constructor() {
763
+ this.target = new EventTarget();
764
+ }
765
+ on(type, listener) {
766
+ this.target.addEventListener(type, listener);
767
+ }
768
+ off(type, listener) {
769
+ this.target.removeEventListener(type, listener);
770
+ }
771
+ once(type, listener) {
772
+ const wrapper = (event) => {
773
+ this.off(type, wrapper);
774
+ listener(event.detail);
775
+ };
776
+ this.on(type, wrapper);
777
+ }
778
+ emit(type, detail) {
779
+ this.target.dispatchEvent(new CustomEvent(type, { detail }));
780
+ }
781
+ };
782
+ var VoiceSocketIOAdapter = class extends VoiceSocketAdapter {
783
+ constructor(config) {
784
+ super(config);
785
+ this.socket = null;
786
+ }
787
+ async connect() {
788
+ return new Promise((resolve, reject) => {
789
+ if (!this.socket) {
790
+ this.socket = io(this.config.baseUrl, {
791
+ extraHeaders: this.config.headers,
792
+ autoConnect: true
793
+ });
794
+ }
795
+ this.socket.on("connect", () => {
796
+ this._isConnected = true;
797
+ this.logger.debug("Connected to socket");
798
+ this.emit("connect");
799
+ resolve();
800
+ });
801
+ this.socket.on("disconnect", () => {
802
+ this._isConnected = false;
803
+ this.emit("disconnect");
804
+ this.logger.debug("Disconnected from socket");
805
+ if (this.config.autoReconnect)
806
+ this.connect();
807
+ });
808
+ this.socket.on("connect_error", (error) => {
809
+ this.logger.error("Error connecting to socket", error);
810
+ this.emit("error", error);
811
+ reject(error);
812
+ });
813
+ this.socket.on("voice:chunk_received", (chunk) => {
814
+ this.logger.debug("Received voice chunk", chunk.byteLength);
815
+ this.onVoiceChunkReceived(chunk);
816
+ });
817
+ this.socket.on("voice:received_end_of_response_stream", () => {
818
+ this.logger.debug("Received end of response stream");
819
+ this.onReceivedEndOfResponseStream();
820
+ });
821
+ this.socket.on("voice:file_received", (blob) => {
822
+ this.logger.debug("Received voice file");
823
+ this.onVoiceFileReceived(blob);
824
+ });
825
+ this.socket.on("control-message", (message) => {
826
+ this.logger.debug("Received control message", message);
827
+ this.emit("control-message", message);
828
+ });
829
+ });
830
+ }
831
+ disconnect() {
832
+ this.socket?.disconnect();
833
+ this.socket = null;
834
+ this._isConnected = false;
835
+ }
836
+ exposeSocket() {
837
+ return this.socket;
838
+ }
839
+ async sendVoiceChunk(chunk, metadata) {
840
+ this.logger.debug(
841
+ "Sending voice chunk %i",
842
+ chunk instanceof Blob ? chunk.size : chunk.byteLength
843
+ );
844
+ if (!this.socket || !this.isConnected)
845
+ throw new Error("Socket not connected");
846
+ let chunkToSend;
847
+ if (chunk instanceof Blob) {
848
+ chunkToSend = await chunk.arrayBuffer();
849
+ } else {
850
+ chunkToSend = chunk;
851
+ }
852
+ this.logger.debug("[Socket] Sending voice chunk", chunkToSend.byteLength);
853
+ this.socket.emit("voice:send_chunk", chunkToSend, metadata);
854
+ this.emit("chunk_sent", chunk);
855
+ }
856
+ sendVoiceFile(blob, metadata) {
857
+ this.logger.debug("Sending voice file", blob, metadata);
858
+ if (!this.socket || !this.isConnected)
859
+ throw new Error("Socket not connected");
860
+ this.socket.emit("voice:send_file", blob, metadata);
861
+ this.emit("file-sent", blob);
862
+ }
863
+ commitVoiceMessage() {
864
+ if (!this.socket || !this.isConnected)
865
+ throw new Error("Socket not connected");
866
+ this.socket.emit("voice:commit");
867
+ }
868
+ onVoiceChunkReceived(chunk) {
869
+ this.emit("chunk-received", chunk);
870
+ }
871
+ onVoiceFileReceived(blob) {
872
+ this.emit("file-received", blob);
873
+ }
874
+ onReceivedEndOfResponseStream() {
875
+ this.emit("received-end-of-response-stream");
876
+ }
877
+ };
878
+
879
+ // src/react/utility/audio/WebAudioOutputAudioController.ts
880
+ var STREAM_SAMPLE_RATE = 24e3;
881
+ var CHANNELS = 1;
882
+ var SLICE_DURATION_S = 0.25;
883
+ var FRAMES_PER_SLICE = Math.floor(STREAM_SAMPLE_RATE * SLICE_DURATION_S);
884
+ var BYTES_PER_SLICE = FRAMES_PER_SLICE * 2;
885
+ var SCHED_TOLERANCE = 0.05;
886
+ var WebAudioOutputAudioController = class extends OutputAudioController {
887
+ constructor() {
888
+ super("@m4trix/core > WebAudioOutputAudioController");
889
+ this.audioCtx = new AudioContext();
890
+ this.gain = this.audioCtx.createGain();
891
+ this.nextPlayTime = 0;
892
+ this.activeSources = /* @__PURE__ */ new Set();
893
+ this.userGestureHookAttached = false;
894
+ this.gain.connect(this.audioCtx.destination);
895
+ this.resetScheduler();
896
+ }
897
+ // ─────────────────────────────────────────────────────────────────────
898
+ // One‑shot playback
899
+ // ─────────────────────────────────────────────────────────────────────
900
+ async playAudio({
901
+ source,
902
+ onComplete
903
+ }) {
904
+ await this.stopPlayback();
905
+ const buf = await this.sourceToArrayBuffer(source);
906
+ const decoded = await this.decode(buf);
907
+ await this.ensureContextRunning();
908
+ const src = this.createSource(decoded, this.audioCtx.currentTime);
909
+ src.onended = () => {
910
+ this.activeSources.delete(src);
911
+ onComplete?.();
912
+ };
913
+ }
914
+ async playAudioStream() {
915
+ }
916
+ // ─────────────────────────────────────────────────────────────────────
917
+ // PCM streaming
918
+ // ─────────────────────────────────────────────────────────────────────
919
+ async initializeChunkStream({
920
+ onComplete
921
+ }) {
922
+ await this.stopPlayback();
923
+ await this.ensureContextRunning();
924
+ this.resetScheduler();
925
+ let streamEnded = false;
926
+ let pending = new Uint8Array(0);
927
+ const addChunkToStream = async (pkt) => {
928
+ if (streamEnded) {
929
+ this.logger.warn("Attempt to add chunk after stream ended \u2013 ignoring.");
930
+ return;
931
+ }
932
+ const bytes = new Uint8Array(
933
+ pkt instanceof Blob ? await pkt.arrayBuffer() : pkt
934
+ );
935
+ if (bytes.length === 0)
936
+ return;
937
+ const merged = new Uint8Array(pending.length + bytes.length);
938
+ merged.set(pending);
939
+ merged.set(bytes, pending.length);
940
+ pending = merged;
941
+ if (pending.length % 2 === 1)
942
+ return;
943
+ while (pending.length >= BYTES_PER_SLICE) {
944
+ const sliceBytes = pending.slice(0, BYTES_PER_SLICE);
945
+ pending = pending.slice(BYTES_PER_SLICE);
946
+ const aligned = sliceBytes.buffer.slice(
947
+ sliceBytes.byteOffset,
948
+ sliceBytes.byteOffset + sliceBytes.byteLength
949
+ );
950
+ const int16 = new Int16Array(aligned);
951
+ const buf = this.audioCtx.createBuffer(
952
+ CHANNELS,
953
+ int16.length,
954
+ STREAM_SAMPLE_RATE
955
+ );
956
+ const data = buf.getChannelData(0);
957
+ for (let i = 0; i < int16.length; i++)
958
+ data[i] = int16[i] / 32768;
959
+ this.scheduleBuffer(buf);
960
+ }
961
+ };
962
+ const endChunkStream = () => {
963
+ if (streamEnded)
964
+ return;
965
+ streamEnded = true;
966
+ if (onComplete) {
967
+ if (this.activeSources.size === 0)
968
+ onComplete();
969
+ else {
970
+ const last = Array.from(this.activeSources).pop();
971
+ if (last) {
972
+ const prev = last.onended;
973
+ last.onended = (e) => {
974
+ if (prev)
975
+ prev.call(last, e);
976
+ onComplete();
977
+ };
978
+ }
979
+ }
980
+ }
981
+ };
982
+ return { addChunkToStream, endChunkStream };
983
+ }
984
+ // ─────────────────────────────────────────────────────────────────────
985
+ // Buffer scheduling helpers
986
+ // ─────────────────────────────────────────────────────────────────────
987
+ scheduleBuffer(buf) {
988
+ if (this.nextPlayTime < this.audioCtx.currentTime + SCHED_TOLERANCE) {
989
+ this.nextPlayTime = this.audioCtx.currentTime + SCHED_TOLERANCE;
990
+ }
991
+ this.createSource(buf, this.nextPlayTime);
992
+ this.nextPlayTime += buf.duration;
993
+ }
994
+ createSource(buf, when) {
995
+ const src = this.audioCtx.createBufferSource();
996
+ src.buffer = buf;
997
+ src.connect(this.gain);
998
+ src.start(when);
999
+ this.activeSources.add(src);
1000
+ src.onended = () => {
1001
+ this.activeSources.delete(src);
1002
+ };
1003
+ return src;
1004
+ }
1005
+ resetScheduler() {
1006
+ this.nextPlayTime = this.audioCtx.currentTime;
1007
+ }
1008
+ // ─── External resource helpers ───────────────────────────────────────
1009
+ sourceToArrayBuffer(src) {
1010
+ return typeof src === "string" ? fetch(src).then((r) => {
1011
+ if (!r.ok)
1012
+ throw new Error(`${r.status}`);
1013
+ return r.arrayBuffer();
1014
+ }) : src.arrayBuffer();
1015
+ }
1016
+ decode(buf) {
1017
+ return new Promise(
1018
+ (res, rej) => this.audioCtx.decodeAudioData(buf, res, rej)
1019
+ );
1020
+ }
1021
+ // ─── Lifecycle methods ───────────────────────────────────────────────
1022
+ async stopPlayback() {
1023
+ for (const src of this.activeSources) {
1024
+ try {
1025
+ src.stop();
1026
+ } catch {
1027
+ }
1028
+ src.disconnect();
1029
+ }
1030
+ this.activeSources.clear();
1031
+ this.resetScheduler();
1032
+ }
1033
+ cleanup() {
1034
+ this.stopPlayback();
1035
+ if (this.audioCtx.state !== "closed")
1036
+ this.audioCtx.close();
1037
+ }
1038
+ // ─── Autoplay‑policy helper ──────────────────────────────────────────
1039
+ async ensureContextRunning() {
1040
+ if (this.audioCtx.state !== "suspended")
1041
+ return;
1042
+ try {
1043
+ await this.audioCtx.resume();
1044
+ } catch {
1045
+ }
1046
+ if (this.audioCtx.state === "running")
1047
+ return;
1048
+ if (!this.userGestureHookAttached) {
1049
+ this.userGestureHookAttached = true;
1050
+ const resume = async () => {
1051
+ try {
1052
+ await this.audioCtx.resume();
1053
+ } catch {
1054
+ }
1055
+ if (this.audioCtx.state === "running")
1056
+ document.removeEventListener("click", resume);
1057
+ };
1058
+ document.addEventListener("click", resume);
1059
+ }
1060
+ }
1061
+ };
1062
+
1063
+ // src/react/hooks/use-conversation/useSocketConversation.ts
1064
+ Logger.enableGlobalLogging();
1065
+ function useSocketConversation({
1066
+ scope,
1067
+ onStartRecording,
1068
+ onStopRecording,
1069
+ onReceive,
1070
+ upstreamMode = "STREAM_WHILE_TALK",
1071
+ onError,
1072
+ audioConfig = {},
1073
+ socketConfig = {}
1074
+ }) {
1075
+ const { current: logger } = useRef(
1076
+ new Logger("SuTr > useSocketConversation")
1077
+ );
1078
+ const inputAudioControllerRef = useRef(void 0);
1079
+ const outputAudioControllerRef = useRef(
1080
+ void 0
1081
+ );
1082
+ const socketAdapterRef = useRef(void 0);
1083
+ const [socket, setSocket] = useState(null);
1084
+ const [voiceAgentState, setVoiceAgentState] = useState("READY");
1085
+ const [error, setError] = useState(null);
1086
+ const shouldStreamWhileTalk = upstreamMode === "STREAM_WHILE_TALK";
1087
+ const handleError = useCallback(
1088
+ (state, err) => {
1089
+ setError(err);
1090
+ logger.error(`Error during ${state}:`, err);
1091
+ onError?.(state, err);
1092
+ },
1093
+ [onError]
1094
+ );
1095
+ const subscribeToSocketEventsForChunkDownstreaming = useCallback(
1096
+ async (socketAdapter) => {
1097
+ logger.debug("Setting up audio stream for receiving chunks");
1098
+ try {
1099
+ const { addChunkToStream, endChunkStream } = await outputAudioControllerRef.current.initializeChunkStream({
1100
+ mimeCodec: "audio/mpeg",
1101
+ onComplete: () => {
1102
+ logger.debug("Audio stream playback completed");
1103
+ setVoiceAgentState("READY");
1104
+ }
1105
+ });
1106
+ let chunkCount = 0;
1107
+ const chunkReceivedEmitter = async (chunk) => {
1108
+ if (chunk instanceof ArrayBuffer) {
1109
+ chunkCount++;
1110
+ logger.debug(
1111
+ `Received voice chunk #${chunkCount} from socket, size: ${chunk.byteLength} bytes`
1112
+ );
1113
+ if (!chunk || chunk.byteLength === 0) {
1114
+ logger.warn("Received empty chunk, skipping");
1115
+ return;
1116
+ }
1117
+ try {
1118
+ await addChunkToStream(chunk);
1119
+ logger.debug(
1120
+ `Successfully added chunk #${chunkCount} to audio stream`
1121
+ );
1122
+ } catch (err) {
1123
+ logger.error(
1124
+ `Failed to add chunk #${chunkCount} to audio stream`,
1125
+ err
1126
+ );
1127
+ if (err instanceof Error) {
1128
+ handleError("DOWNSTREAMING", err);
1129
+ }
1130
+ }
1131
+ }
1132
+ };
1133
+ socketAdapter.on("chunk-received", chunkReceivedEmitter);
1134
+ const endOfStreamEmitter = () => {
1135
+ logger.debug(
1136
+ `Received end of stream signal after ${chunkCount} chunks, ending chunk stream`
1137
+ );
1138
+ endChunkStream();
1139
+ setVoiceAgentState("READY");
1140
+ };
1141
+ socketAdapter.on("received-end-of-response-stream", endOfStreamEmitter);
1142
+ return () => {
1143
+ logger.debug("Cleaning up socket event listeners");
1144
+ socketAdapter.off("chunk-received", chunkReceivedEmitter);
1145
+ socketAdapter.off(
1146
+ "received-end-of-response-stream",
1147
+ endOfStreamEmitter
1148
+ );
1149
+ endChunkStream();
1150
+ };
1151
+ } catch (err) {
1152
+ if (err instanceof Error) {
1153
+ handleError("DOWNSTREAMING", err);
1154
+ }
1155
+ return () => {
1156
+ };
1157
+ }
1158
+ },
1159
+ [handleError]
1160
+ );
1161
+ const hookupSocketAdapter = useCallback(
1162
+ async (socketAdapter) => {
1163
+ logger.debug("Connecting to socket...");
1164
+ try {
1165
+ await socketAdapter.connect();
1166
+ socketAdapter.on("connect", () => {
1167
+ logger.debug("Socket adapter connected");
1168
+ setVoiceAgentState("READY");
1169
+ });
1170
+ socketAdapter.on("disconnect", () => {
1171
+ logger.debug("Socket adapter disconnected");
1172
+ });
1173
+ socketAdapter.on("error", (err) => {
1174
+ if (err instanceof Error) {
1175
+ handleError(voiceAgentState, err);
1176
+ } else {
1177
+ handleError(voiceAgentState, new Error("Unknown error"));
1178
+ }
1179
+ });
1180
+ setSocket(socketAdapter.exposeSocket());
1181
+ } catch (err) {
1182
+ if (err instanceof Error) {
1183
+ handleError("READY", err);
1184
+ }
1185
+ }
1186
+ },
1187
+ [handleError, voiceAgentState]
1188
+ );
1189
+ const startRecording = useCallback(() => {
1190
+ if (inputAudioControllerRef.current) {
1191
+ try {
1192
+ logger.debug("Starting recording");
1193
+ setVoiceAgentState("RECORDING");
1194
+ inputAudioControllerRef.current.startRecording({
1195
+ onRecordedChunk: async (chunk) => {
1196
+ if (shouldStreamWhileTalk) {
1197
+ try {
1198
+ await socketAdapterRef.current?.sendVoiceChunk(chunk);
1199
+ } catch (err) {
1200
+ if (err instanceof Error) {
1201
+ handleError("RECORDING", err);
1202
+ }
1203
+ }
1204
+ }
1205
+ }
1206
+ });
1207
+ onStartRecording?.();
1208
+ } catch (err) {
1209
+ if (err instanceof Error) {
1210
+ handleError("RECORDING", err);
1211
+ }
1212
+ }
1213
+ }
1214
+ }, [onStartRecording, shouldStreamWhileTalk, handleError]);
1215
+ const stopRecording = useCallback(async () => {
1216
+ if (inputAudioControllerRef.current) {
1217
+ try {
1218
+ logger.debug("Stopping recording");
1219
+ await inputAudioControllerRef.current.stopRecording({
1220
+ onRecordingCompleted: async (allData) => {
1221
+ setVoiceAgentState("PROCESSING");
1222
+ try {
1223
+ if (shouldStreamWhileTalk) {
1224
+ logger.debug("Committing voice message");
1225
+ await socketAdapterRef.current?.commitVoiceMessage();
1226
+ } else {
1227
+ await socketAdapterRef.current?.sendVoiceFile(allData);
1228
+ }
1229
+ setVoiceAgentState("DOWNSTREAMING");
1230
+ await subscribeToSocketEventsForChunkDownstreaming(
1231
+ socketAdapterRef.current
1232
+ );
1233
+ onReceive?.(
1234
+ allData,
1235
+ async () => {
1236
+ if (outputAudioControllerRef.current) {
1237
+ return outputAudioControllerRef.current.stopPlayback();
1238
+ }
1239
+ },
1240
+ async () => {
1241
+ if (outputAudioControllerRef.current) {
1242
+ return outputAudioControllerRef.current.stopPlayback();
1243
+ }
1244
+ }
1245
+ );
1246
+ } catch (err) {
1247
+ if (err instanceof Error) {
1248
+ handleError("PROCESSING", err);
1249
+ }
1250
+ }
1251
+ }
1252
+ });
1253
+ onStopRecording?.();
1254
+ } catch (err) {
1255
+ if (err instanceof Error) {
1256
+ handleError("RECORDING", err);
1257
+ }
1258
+ }
1259
+ }
1260
+ }, [
1261
+ onStopRecording,
1262
+ handleError,
1263
+ subscribeToSocketEventsForChunkDownstreaming,
1264
+ onReceive
1265
+ ]);
1266
+ useEffect(() => {
1267
+ if (socketAdapterRef.current) {
1268
+ return;
1269
+ }
1270
+ try {
1271
+ const socketAdapter = socketConfig.socketAdapter ? socketConfig.socketAdapter : new VoiceSocketIOAdapter({
1272
+ scope,
1273
+ baseUrl: socketConfig.baseUrl || "",
1274
+ headers: socketConfig.headers
1275
+ });
1276
+ socketAdapterRef.current = socketAdapter;
1277
+ if (!socketAdapter.isConnected()) {
1278
+ hookupSocketAdapter(socketAdapter);
1279
+ }
1280
+ if (!inputAudioControllerRef.current) {
1281
+ inputAudioControllerRef.current = new WebAudioInputAudioController(
1282
+ audioConfig
1283
+ );
1284
+ }
1285
+ if (!outputAudioControllerRef.current) {
1286
+ outputAudioControllerRef.current = new WebAudioOutputAudioController();
1287
+ }
1288
+ } catch (err) {
1289
+ if (err instanceof Error) {
1290
+ handleError("READY", err);
1291
+ }
1292
+ }
1293
+ }, [scope, socketConfig, hookupSocketAdapter, audioConfig, handleError]);
1294
+ useEffect(() => {
1295
+ return () => {
1296
+ inputAudioControllerRef.current?.cleanup();
1297
+ outputAudioControllerRef.current?.cleanup();
1298
+ if (socketAdapterRef.current) {
1299
+ socketAdapterRef.current.disconnect();
1300
+ socketAdapterRef.current = void 0;
1301
+ }
1302
+ };
1303
+ }, []);
1304
+ return {
1305
+ startRecording,
1306
+ stopRecording,
1307
+ voiceAgentState,
1308
+ error,
1309
+ audioContext: inputAudioControllerRef.current?.audioContext || null,
1310
+ socket
1311
+ };
1312
+ }
1313
+
1314
+ export { BaseVoiceEndpointAdapter, Emitter, InputAudioController, VoiceEndpointAdapter, VoiceSocketAdapter, useConversation, useSocketConversation };
1315
+ //# sourceMappingURL=out.js.map
1316
+ //# sourceMappingURL=index.js.map