@m4trix/core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1324 @@
1
+ 'use strict';
2
+
3
+ var react = require('react');
4
+ var socket_ioClient = require('socket.io-client');
5
+
6
+ // src/react/hooks/use-conversation/useConversation.ts
7
+
8
+ // src/utility/Logger.ts
9
+ var _Logger = class _Logger {
10
+ constructor(namespace = "") {
11
+ this.namespace = namespace;
12
+ }
13
+ static enableGlobalLogging() {
14
+ _Logger.globalEnabled = true;
15
+ }
16
+ static disableGlobalLogging() {
17
+ _Logger.globalEnabled = false;
18
+ }
19
+ formatPrefix() {
20
+ return this.namespace ? `[${this.namespace}]` : "";
21
+ }
22
+ logIfEnabled(level, ...args) {
23
+ if (!_Logger.globalEnabled)
24
+ return;
25
+ const prefix = this.formatPrefix();
26
+ if (prefix) {
27
+ console[level](prefix, ...args);
28
+ } else {
29
+ console[level](...args);
30
+ }
31
+ }
32
+ log(...args) {
33
+ this.logIfEnabled("log", ...args);
34
+ }
35
+ debug(...args) {
36
+ this.logIfEnabled("debug", ...args);
37
+ }
38
+ info(...args) {
39
+ this.logIfEnabled("info", ...args);
40
+ }
41
+ warn(...args) {
42
+ this.logIfEnabled("warn", ...args);
43
+ }
44
+ error(...args) {
45
+ this.logIfEnabled("error", ...args);
46
+ }
47
+ };
48
+ _Logger.globalEnabled = false;
49
+ var Logger = _Logger;
50
+
51
+ // src/react/adapter/VoiceEndpointAdapter.ts
52
+ var VoiceEndpointAdapter = class {
53
+ constructor(config) {
54
+ this.logger = new Logger("SuTr > EndpointAdapter");
55
+ this.config = config;
56
+ }
57
+ };
58
+ var BaseVoiceEndpointAdapter = class extends VoiceEndpointAdapter {
59
+ constructor(config) {
60
+ super(config);
61
+ }
62
+ /**
63
+ * Send a voice file to the API endpoint and return a Pump stream of audio chunks
64
+ */
65
+ async sendVoiceFile({
66
+ blob,
67
+ metadata
68
+ }) {
69
+ const formData = new FormData();
70
+ formData.append("audio", blob);
71
+ if (metadata) {
72
+ formData.append("metadata", JSON.stringify(metadata));
73
+ }
74
+ this.logger.debug("Sending voice file to", this.config.endpoint, formData);
75
+ const response = await fetch(
76
+ `${this.config.baseUrl || ""}${this.config.endpoint}`,
77
+ {
78
+ method: "POST",
79
+ headers: this.config.headers,
80
+ body: formData
81
+ }
82
+ );
83
+ if (!response.ok) {
84
+ throw new Error(`API error: ${response.status} ${await response.text()}`);
85
+ }
86
+ if (!response.body) {
87
+ throw new Error("No response body");
88
+ }
89
+ return response;
90
+ }
91
+ };
92
+
93
+ // src/react/utility/audio/InputAudioController.ts
94
+ var InputAudioController = class {
95
+ constructor() {
96
+ this.logger = new Logger("@m4trix/core > InputAudioController");
97
+ }
98
+ };
99
+
100
+ // src/react/utility/audio/WebAudioInputAudioController.ts
101
+ var DEFAULT_SLICING_INTERVAL = 3e3;
102
+ var WebAudioInputAudioController = class extends InputAudioController {
103
+ constructor(audioConfig = {}) {
104
+ super();
105
+ this.audioConfig = audioConfig;
106
+ // ─── Recording state ─────────────────────────────────────────────────────
107
+ this.audioContextState = {
108
+ context: null,
109
+ source: null,
110
+ analyser: null
111
+ };
112
+ this.mediaRecorder = null;
113
+ this.recordedChunks = [];
114
+ this.recordingStream = null;
115
+ }
116
+ get audioContext() {
117
+ return this.audioContextState.context;
118
+ }
119
+ async createAudioContext() {
120
+ const context = new AudioContext({
121
+ sampleRate: this.audioConfig.sampleRate || 16e3,
122
+ latencyHint: "interactive"
123
+ });
124
+ const analyser = context.createAnalyser();
125
+ analyser.fftSize = 2048;
126
+ return { context, source: null, analyser };
127
+ }
128
+ async cleanupAudioContext() {
129
+ this.logger.debug("Cleaning up audio context");
130
+ const { source, context } = this.audioContextState;
131
+ if (source)
132
+ source.disconnect();
133
+ if (context)
134
+ await context.close();
135
+ this.audioContextState = { context: null, source: null, analyser: null };
136
+ }
137
+ async startRecording({
138
+ onRecordedChunk,
139
+ onError
140
+ } = {}) {
141
+ try {
142
+ this.logger.debug("Starting recording");
143
+ this.recordedChunks = [];
144
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
145
+ this.recordingStream = stream;
146
+ if (!this.audioContextState.context) {
147
+ this.audioContextState = await this.createAudioContext();
148
+ }
149
+ this.mediaRecorder = new MediaRecorder(stream, {
150
+ mimeType: "audio/webm;codecs=opus"
151
+ });
152
+ this.mediaRecorder.ondataavailable = (e) => {
153
+ if (e.data.size > 0) {
154
+ this.recordedChunks.push(e.data);
155
+ onRecordedChunk?.(e.data);
156
+ this.logger.debug("Recorded chunk", e.data.size);
157
+ }
158
+ };
159
+ this.mediaRecorder.start(DEFAULT_SLICING_INTERVAL);
160
+ this.logger.debug("MediaRecorder started");
161
+ } catch (err) {
162
+ const error = err instanceof Error ? err : new Error("Failed to start recording");
163
+ this.logger.error(error);
164
+ onError?.(error);
165
+ }
166
+ }
167
+ async stopRecording({
168
+ onRecordingCompleted
169
+ } = {}) {
170
+ this.logger.debug("Stopping recording");
171
+ if (!this.mediaRecorder || this.mediaRecorder.state === "inactive")
172
+ return;
173
+ await new Promise((resolve) => {
174
+ this.mediaRecorder.onstop = async () => {
175
+ if (this.recordedChunks.length) {
176
+ const blob = new Blob(this.recordedChunks, { type: "audio/webm" });
177
+ onRecordingCompleted?.(blob);
178
+ this.logger.debug("Recording completed", blob.size);
179
+ }
180
+ this.recordingStream?.getTracks().forEach((t) => t.stop());
181
+ this.recordingStream = null;
182
+ await this.cleanupAudioContext();
183
+ resolve();
184
+ };
185
+ this.mediaRecorder.stop();
186
+ });
187
+ }
188
+ /**
189
+ * Cleans up all audio recording resources.
190
+ */
191
+ cleanup() {
192
+ this.cleanupAudioContext();
193
+ if (this.mediaRecorder && this.mediaRecorder.state !== "inactive") {
194
+ this.mediaRecorder.stop();
195
+ }
196
+ if (this.recordingStream) {
197
+ this.recordingStream.getTracks().forEach((t) => t.stop());
198
+ this.recordingStream = null;
199
+ }
200
+ }
201
+ };
202
+
203
+ // src/react/utility/audio/OutputAudioController.ts
204
+ var OutputAudioController = class {
205
+ constructor(loggerName) {
206
+ this.logger = new Logger(loggerName);
207
+ }
208
+ };
209
+
210
+ // src/react/utility/audio/AudioElementOutputAudioController.ts
211
+ var AudioElementOutputAudioController = class extends OutputAudioController {
212
+ constructor() {
213
+ super("@m4trix/core > WebApiOutputAudioController");
214
+ // ─── Playback state ──────────────────────────────────────────────────────
215
+ this.currentHtmlAudio = null;
216
+ this.currentAudioUrl = null;
217
+ }
218
+ // ─── One-shot playback ────────────────────────────────────────────────────
219
+ /**
220
+ * Play either a Blob or a URL string.
221
+ * Uses <audio> under the hood for maximum browser compatibility.
222
+ */
223
+ async playAudio({
224
+ source,
225
+ onComplete
226
+ }) {
227
+ if (this.currentHtmlAudio) {
228
+ this.currentHtmlAudio.pause();
229
+ this.currentHtmlAudio.src = "";
230
+ if (this.currentAudioUrl && source instanceof Blob) {
231
+ URL.revokeObjectURL(this.currentAudioUrl);
232
+ }
233
+ }
234
+ const audio = new Audio();
235
+ this.currentHtmlAudio = audio;
236
+ let url;
237
+ if (source instanceof Blob) {
238
+ url = URL.createObjectURL(source);
239
+ this.currentAudioUrl = url;
240
+ audio.onended = () => {
241
+ URL.revokeObjectURL(url);
242
+ onComplete?.();
243
+ };
244
+ } else {
245
+ url = source;
246
+ }
247
+ audio.src = url;
248
+ try {
249
+ await audio.play();
250
+ } catch (err) {
251
+ this.logger.error("Playback failed, user gesture may be required", err);
252
+ }
253
+ }
254
+ // ─── Streaming playback ──────────────────────────────────────────────────
255
+ /**
256
+ * Stream audio from a Response via MediaSource Extensions.
257
+ * @param params.response The fetch Response whose body is an audio stream
258
+ * @param params.mimeCodec MIME type+codec string, e.g. 'audio/mpeg'
259
+ * @param params.onComplete Optional callback once the stream ends
260
+ */
261
+ async playAudioStream({
262
+ response,
263
+ mimeCodec = "audio/mpeg",
264
+ onComplete
265
+ }) {
266
+ if (!response.ok || !response.body) {
267
+ throw new Error(`Invalid response (${response.status})`);
268
+ }
269
+ if (typeof MediaSource === "undefined" || !MediaSource.isTypeSupported(mimeCodec)) {
270
+ throw new Error(`Unsupported MIME type or codec: ${mimeCodec}`);
271
+ }
272
+ await this.stopPlayback();
273
+ const mediaSource = new MediaSource();
274
+ const url = URL.createObjectURL(mediaSource);
275
+ this.currentAudioUrl = url;
276
+ const audio = new Audio(url);
277
+ this.currentHtmlAudio = audio;
278
+ audio.autoplay = true;
279
+ audio.onended = () => {
280
+ URL.revokeObjectURL(url);
281
+ this.currentAudioUrl = null;
282
+ onComplete?.();
283
+ };
284
+ mediaSource.addEventListener(
285
+ "sourceopen",
286
+ () => {
287
+ const sourceBuffer = mediaSource.addSourceBuffer(mimeCodec);
288
+ const reader = response.body.getReader();
289
+ const pump = async () => {
290
+ const { done, value } = await reader.read();
291
+ if (done) {
292
+ mediaSource.endOfStream();
293
+ return;
294
+ }
295
+ if (value) {
296
+ sourceBuffer.appendBuffer(value);
297
+ }
298
+ if (sourceBuffer.updating) {
299
+ sourceBuffer.addEventListener("updateend", pump, { once: true });
300
+ } else {
301
+ pump();
302
+ }
303
+ };
304
+ pump();
305
+ },
306
+ { once: true }
307
+ );
308
+ try {
309
+ await audio.play();
310
+ } catch (err) {
311
+ this.logger.error(
312
+ "Streaming playback failed, user gesture may be required",
313
+ err
314
+ );
315
+ }
316
+ }
317
+ // ─── Chunk-based streaming playback ─────────────────────────────────────
318
+ /**
319
+ * Initialize a streaming audio context for chunk-based playback.
320
+ * This creates the necessary MediaSource and SourceBuffer for subsequent chunk additions.
321
+ * Returns functions to add chunks and end the stream, encapsulated in a closure.
322
+ *
323
+ * @param mimeCodec MIME type+codec string, e.g. 'audio/mpeg'
324
+ * @param onComplete Optional callback once the stream ends
325
+ * @returns Object containing functions to add chunks and end the stream
326
+ */
327
+ async initializeChunkStream({
328
+ onComplete,
329
+ mimeCodec = "audio/mpeg"
330
+ }) {
331
+ this.logger.debug(`Initializing chunk stream with codec: ${mimeCodec}`);
332
+ if (typeof MediaSource === "undefined") {
333
+ throw new Error("MediaSource API is not supported in this browser");
334
+ }
335
+ if (!MediaSource.isTypeSupported(mimeCodec)) {
336
+ this.logger.warn(
337
+ `Codec ${mimeCodec} not supported, falling back to standard audio/mpeg`
338
+ );
339
+ mimeCodec = "audio/mpeg";
340
+ if (!MediaSource.isTypeSupported(mimeCodec)) {
341
+ throw new Error(
342
+ "Neither the specified codec nor the fallback codec are supported"
343
+ );
344
+ }
345
+ }
346
+ await this.stopPlayback();
347
+ const mediaSource = new MediaSource();
348
+ let sourceBuffer = null;
349
+ const url = URL.createObjectURL(mediaSource);
350
+ this.currentAudioUrl = url;
351
+ const audio = new Audio(url);
352
+ this.currentHtmlAudio = audio;
353
+ audio.autoplay = false;
354
+ audio.controls = true;
355
+ audio.style.display = "none";
356
+ document.body.appendChild(audio);
357
+ let playbackStarted = false;
358
+ let hasReceivedFirstChunk = false;
359
+ let receivedChunksCount = 0;
360
+ const pendingChunks = [];
361
+ let isProcessingQueue = false;
362
+ this.logger.debug("Waiting for MediaSource to open...");
363
+ await new Promise((resolve, reject) => {
364
+ const timeout = setTimeout(() => {
365
+ reject(new Error("MediaSource failed to open (timeout)"));
366
+ }, 5e3);
367
+ mediaSource.addEventListener(
368
+ "sourceopen",
369
+ () => {
370
+ clearTimeout(timeout);
371
+ this.logger.debug("MediaSource open event received");
372
+ try {
373
+ sourceBuffer = mediaSource.addSourceBuffer(mimeCodec);
374
+ if (mediaSource.duration === Infinity || isNaN(mediaSource.duration)) {
375
+ mediaSource.duration = 1e3;
376
+ }
377
+ this.logger.debug("SourceBuffer created successfully");
378
+ resolve();
379
+ } catch (err) {
380
+ reject(new Error(`Failed to create SourceBuffer: ${err}`));
381
+ }
382
+ },
383
+ { once: true }
384
+ );
385
+ });
386
+ const logger = this.logger;
387
+ const processQueue = async () => {
388
+ if (!sourceBuffer || pendingChunks.length === 0 || isProcessingQueue) {
389
+ return;
390
+ }
391
+ isProcessingQueue = true;
392
+ try {
393
+ while (pendingChunks.length > 0) {
394
+ if (sourceBuffer.updating) {
395
+ await new Promise((resolve) => {
396
+ sourceBuffer.addEventListener("updateend", () => resolve(), {
397
+ once: true
398
+ });
399
+ });
400
+ }
401
+ const nextChunk = pendingChunks.shift();
402
+ if (!nextChunk)
403
+ continue;
404
+ try {
405
+ sourceBuffer.appendBuffer(nextChunk);
406
+ logger.debug(
407
+ `Processed queued chunk of size ${nextChunk.byteLength}`
408
+ );
409
+ if (!playbackStarted && hasReceivedFirstChunk) {
410
+ await tryStartPlayback();
411
+ }
412
+ await new Promise((resolve) => {
413
+ sourceBuffer.addEventListener("updateend", () => resolve(), {
414
+ once: true
415
+ });
416
+ });
417
+ } catch (err) {
418
+ logger.error("Error appending queued chunk to source buffer", err);
419
+ }
420
+ }
421
+ } finally {
422
+ isProcessingQueue = false;
423
+ }
424
+ };
425
+ const tryStartPlayback = async () => {
426
+ if (playbackStarted)
427
+ return;
428
+ playbackStarted = true;
429
+ logger.debug("Attempting to start audio playback...");
430
+ if (receivedChunksCount < 3 && audio.buffered.length > 0 && audio.buffered.end(0) < 0.5) {
431
+ logger.debug("Not enough data buffered yet, delaying playback");
432
+ return;
433
+ }
434
+ try {
435
+ if (audio.readyState === 0) {
436
+ logger.debug(
437
+ "Audio element not ready yet, waiting for canplay event"
438
+ );
439
+ await new Promise((resolve) => {
440
+ audio.addEventListener("canplay", () => resolve(), { once: true });
441
+ });
442
+ }
443
+ await audio.play();
444
+ logger.debug("Successfully started audio playback");
445
+ } catch (err) {
446
+ logger.error("Failed to start playback", err);
447
+ document.addEventListener(
448
+ "click",
449
+ async () => {
450
+ try {
451
+ await audio.play();
452
+ logger.debug("Started playback after user interaction");
453
+ } catch (innerErr) {
454
+ logger.error(
455
+ "Still failed to play after user interaction",
456
+ innerErr
457
+ );
458
+ }
459
+ },
460
+ { once: true }
461
+ );
462
+ }
463
+ };
464
+ const addChunkToStream = async (chunk) => {
465
+ if (!sourceBuffer) {
466
+ throw new Error(
467
+ "Streaming context was closed or not properly initialized."
468
+ );
469
+ }
470
+ let arrayBufferChunk;
471
+ if (chunk instanceof Blob) {
472
+ logger.debug("Converting Blob to ArrayBuffer");
473
+ arrayBufferChunk = await chunk.arrayBuffer();
474
+ } else {
475
+ arrayBufferChunk = chunk;
476
+ }
477
+ if (!arrayBufferChunk || arrayBufferChunk.byteLength === 0) {
478
+ logger.warn("Received empty chunk, skipping");
479
+ return;
480
+ }
481
+ if (!hasReceivedFirstChunk) {
482
+ hasReceivedFirstChunk = true;
483
+ logger.debug(
484
+ `First chunk received, size: ${arrayBufferChunk.byteLength} bytes`
485
+ );
486
+ }
487
+ receivedChunksCount++;
488
+ pendingChunks.push(arrayBufferChunk);
489
+ logger.debug(
490
+ `Added chunk #${receivedChunksCount} to queue (size: ${arrayBufferChunk.byteLength} bytes)`
491
+ );
492
+ await processQueue();
493
+ if (!playbackStarted && hasReceivedFirstChunk && receivedChunksCount >= 3) {
494
+ await tryStartPlayback();
495
+ }
496
+ };
497
+ const endChunkStream = () => {
498
+ if (mediaSource && mediaSource.readyState === "open") {
499
+ try {
500
+ if (pendingChunks.length > 0 || sourceBuffer && sourceBuffer.updating) {
501
+ logger.debug("Waiting for pending chunks before ending stream");
502
+ setTimeout(() => endChunkStream(), 200);
503
+ return;
504
+ }
505
+ if (hasReceivedFirstChunk) {
506
+ mediaSource.endOfStream();
507
+ logger.debug("MediaSource stream ended successfully");
508
+ } else {
509
+ logger.warn("Stream ended without receiving any chunks");
510
+ }
511
+ } catch (err) {
512
+ logger.error("Error ending MediaSource stream", err);
513
+ }
514
+ }
515
+ audio.onended = null;
516
+ if (audio.parentNode) {
517
+ audio.parentNode.removeChild(audio);
518
+ }
519
+ if (this.currentAudioUrl === url) {
520
+ this.currentAudioUrl = null;
521
+ URL.revokeObjectURL(url);
522
+ }
523
+ sourceBuffer = null;
524
+ };
525
+ audio.onended = () => {
526
+ logger.debug("Audio playback completed");
527
+ endChunkStream();
528
+ onComplete?.();
529
+ };
530
+ return {
531
+ addChunkToStream,
532
+ endChunkStream
533
+ };
534
+ }
535
+ /**
536
+ * Stop any ongoing HTMLAudioElement playback.
537
+ */
538
+ async stopPlayback() {
539
+ if (this.currentHtmlAudio) {
540
+ try {
541
+ this.currentHtmlAudio.pause();
542
+ this.currentHtmlAudio.src = "";
543
+ } catch (err) {
544
+ this.logger.error("Error stopping playback", err);
545
+ }
546
+ this.currentHtmlAudio = null;
547
+ }
548
+ if (this.currentAudioUrl) {
549
+ URL.revokeObjectURL(this.currentAudioUrl);
550
+ this.currentAudioUrl = null;
551
+ }
552
+ }
553
+ /**
554
+ * Cleans up all audio playback resources.
555
+ */
556
+ cleanup() {
557
+ this.stopPlayback();
558
+ }
559
+ };
560
+
561
+ // src/react/hooks/use-conversation/useConversation.ts
562
+ Logger.enableGlobalLogging();
563
+ function useConversation(endpoint, {
564
+ onStartRecording,
565
+ onStopRecording,
566
+ onReceive,
567
+ autoPlay = true,
568
+ downstreamMode = "STREAM",
569
+ onError,
570
+ audioConfig = {},
571
+ requestData = {},
572
+ endpointConfig = {}
573
+ }) {
574
+ const { current: logger } = react.useRef(
575
+ new Logger("@m4trix/core > useConversation")
576
+ );
577
+ const inputAudioControllerRef = react.useRef(void 0);
578
+ const outputAudioControllerRef = react.useRef(
579
+ void 0
580
+ );
581
+ const endpointAdapterRef = react.useRef(
582
+ void 0
583
+ );
584
+ const [voiceAgentState, setVoiceAgentState] = react.useState("READY");
585
+ const [error, setError] = react.useState(null);
586
+ const handleError = react.useCallback(
587
+ (state, err) => {
588
+ setError(err);
589
+ logger.error(`Error during ${state}:`, err);
590
+ onError?.(state, err);
591
+ },
592
+ [onError]
593
+ );
594
+ const startRecording = react.useCallback(() => {
595
+ if (inputAudioControllerRef.current) {
596
+ try {
597
+ logger.debug("Starting recording");
598
+ setVoiceAgentState("RECORDING");
599
+ inputAudioControllerRef.current.startRecording({
600
+ onError: (err) => {
601
+ handleError("RECORDING", err);
602
+ }
603
+ });
604
+ onStartRecording?.();
605
+ } catch (err) {
606
+ if (err instanceof Error) {
607
+ handleError("RECORDING", err);
608
+ }
609
+ }
610
+ }
611
+ }, [onStartRecording, handleError]);
612
+ const stopRecording = react.useCallback(async () => {
613
+ if (inputAudioControllerRef.current) {
614
+ try {
615
+ logger.debug("Stopping recording");
616
+ await inputAudioControllerRef.current.stopRecording({
617
+ onRecordingCompleted: async (allData) => {
618
+ setVoiceAgentState("PROCESSING");
619
+ try {
620
+ const response = await endpointAdapterRef.current?.sendVoiceFile({
621
+ blob: allData,
622
+ metadata: requestData
623
+ });
624
+ if (!response) {
625
+ throw new Error("No response received from endpoint");
626
+ }
627
+ setVoiceAgentState("RESPONDING");
628
+ if (autoPlay) {
629
+ if (downstreamMode === "STREAM") {
630
+ await outputAudioControllerRef.current?.playAudioStream({
631
+ response,
632
+ onComplete: () => {
633
+ setVoiceAgentState("READY");
634
+ }
635
+ });
636
+ } else if (downstreamMode === "DOWNLOAD") {
637
+ const responseBlob = await response.blob();
638
+ await outputAudioControllerRef.current?.playAudio({
639
+ source: responseBlob,
640
+ onComplete: () => {
641
+ setVoiceAgentState("READY");
642
+ }
643
+ });
644
+ }
645
+ } else {
646
+ setVoiceAgentState("READY");
647
+ }
648
+ onReceive?.(
649
+ allData,
650
+ async () => {
651
+ if (outputAudioControllerRef.current) {
652
+ if (downstreamMode === "STREAM") {
653
+ return outputAudioControllerRef.current.playAudioStream({
654
+ response,
655
+ onComplete: () => {
656
+ setVoiceAgentState("READY");
657
+ }
658
+ });
659
+ } else {
660
+ const responseBlob = await response.blob();
661
+ return outputAudioControllerRef.current.playAudio({
662
+ source: responseBlob,
663
+ onComplete: () => {
664
+ setVoiceAgentState("READY");
665
+ }
666
+ });
667
+ }
668
+ }
669
+ },
670
+ async () => {
671
+ if (outputAudioControllerRef.current) {
672
+ return outputAudioControllerRef.current.stopPlayback();
673
+ }
674
+ }
675
+ );
676
+ } catch (err) {
677
+ if (err instanceof Error) {
678
+ handleError("PROCESSING", err);
679
+ }
680
+ setVoiceAgentState("READY");
681
+ }
682
+ }
683
+ });
684
+ onStopRecording?.();
685
+ } catch (err) {
686
+ if (err instanceof Error) {
687
+ handleError("RECORDING", err);
688
+ }
689
+ }
690
+ }
691
+ }, [
692
+ onStopRecording,
693
+ requestData,
694
+ autoPlay,
695
+ downstreamMode,
696
+ handleError,
697
+ onReceive
698
+ ]);
699
+ react.useEffect(() => {
700
+ if (endpointAdapterRef.current) {
701
+ return;
702
+ }
703
+ try {
704
+ const endpointAdapter = endpointConfig.endpointAdapter ? endpointConfig.endpointAdapter : new BaseVoiceEndpointAdapter({
705
+ baseUrl: endpointConfig.baseUrl,
706
+ endpoint,
707
+ headers: endpointConfig.headers
708
+ });
709
+ endpointAdapterRef.current = endpointAdapter;
710
+ if (!inputAudioControllerRef.current) {
711
+ inputAudioControllerRef.current = new WebAudioInputAudioController(
712
+ audioConfig
713
+ );
714
+ }
715
+ if (!outputAudioControllerRef.current) {
716
+ outputAudioControllerRef.current = new AudioElementOutputAudioController();
717
+ }
718
+ } catch (err) {
719
+ if (err instanceof Error) {
720
+ handleError("READY", err);
721
+ }
722
+ }
723
+ }, [endpoint, endpointConfig, audioConfig, handleError]);
724
+ react.useEffect(() => {
725
+ return () => {
726
+ inputAudioControllerRef.current?.cleanup();
727
+ outputAudioControllerRef.current?.cleanup();
728
+ };
729
+ }, []);
730
+ return {
731
+ startRecording,
732
+ stopRecording,
733
+ voiceAgentState,
734
+ error,
735
+ audioContext: inputAudioControllerRef.current?.audioContext || null
736
+ };
737
+ }
738
+
739
+ // src/react/adapter/socket/VoiceSocketAdapter.ts
740
+ var VoiceSocketAdapter = class {
741
+ constructor(config) {
742
+ this._isConnected = false;
743
+ this.logger = new Logger("@m4trix/core > VoiceSocketAdapter");
744
+ this.emitter = new Emitter();
745
+ this.config = config;
746
+ }
747
+ on(event, listener) {
748
+ this.emitter.on(event, listener);
749
+ }
750
+ off(event, listener) {
751
+ this.emitter.off(event, listener);
752
+ }
753
+ once(event, listener) {
754
+ this.emitter.once(event, listener);
755
+ }
756
+ emit(event, data) {
757
+ this.emitter.emit(event, data);
758
+ }
759
+ isConnected() {
760
+ return this._isConnected;
761
+ }
762
+ };
763
+ var Emitter = class {
764
+ constructor() {
765
+ this.target = new EventTarget();
766
+ }
767
+ on(type, listener) {
768
+ this.target.addEventListener(type, listener);
769
+ }
770
+ off(type, listener) {
771
+ this.target.removeEventListener(type, listener);
772
+ }
773
+ once(type, listener) {
774
+ const wrapper = (event) => {
775
+ this.off(type, wrapper);
776
+ listener(event.detail);
777
+ };
778
+ this.on(type, wrapper);
779
+ }
780
+ emit(type, detail) {
781
+ this.target.dispatchEvent(new CustomEvent(type, { detail }));
782
+ }
783
+ };
784
+ var VoiceSocketIOAdapter = class extends VoiceSocketAdapter {
785
+ constructor(config) {
786
+ super(config);
787
+ this.socket = null;
788
+ }
789
+ async connect() {
790
+ return new Promise((resolve, reject) => {
791
+ if (!this.socket) {
792
+ this.socket = socket_ioClient.io(this.config.baseUrl, {
793
+ extraHeaders: this.config.headers,
794
+ autoConnect: true
795
+ });
796
+ }
797
+ this.socket.on("connect", () => {
798
+ this._isConnected = true;
799
+ this.logger.debug("Connected to socket");
800
+ this.emit("connect");
801
+ resolve();
802
+ });
803
+ this.socket.on("disconnect", () => {
804
+ this._isConnected = false;
805
+ this.emit("disconnect");
806
+ this.logger.debug("Disconnected from socket");
807
+ if (this.config.autoReconnect)
808
+ this.connect();
809
+ });
810
+ this.socket.on("connect_error", (error) => {
811
+ this.logger.error("Error connecting to socket", error);
812
+ this.emit("error", error);
813
+ reject(error);
814
+ });
815
+ this.socket.on("voice:chunk_received", (chunk) => {
816
+ this.logger.debug("Received voice chunk", chunk.byteLength);
817
+ this.onVoiceChunkReceived(chunk);
818
+ });
819
+ this.socket.on("voice:received_end_of_response_stream", () => {
820
+ this.logger.debug("Received end of response stream");
821
+ this.onReceivedEndOfResponseStream();
822
+ });
823
+ this.socket.on("voice:file_received", (blob) => {
824
+ this.logger.debug("Received voice file");
825
+ this.onVoiceFileReceived(blob);
826
+ });
827
+ this.socket.on("control-message", (message) => {
828
+ this.logger.debug("Received control message", message);
829
+ this.emit("control-message", message);
830
+ });
831
+ });
832
+ }
833
+ disconnect() {
834
+ this.socket?.disconnect();
835
+ this.socket = null;
836
+ this._isConnected = false;
837
+ }
838
+ exposeSocket() {
839
+ return this.socket;
840
+ }
841
+ async sendVoiceChunk(chunk, metadata) {
842
+ this.logger.debug(
843
+ "Sending voice chunk %i",
844
+ chunk instanceof Blob ? chunk.size : chunk.byteLength
845
+ );
846
+ if (!this.socket || !this.isConnected)
847
+ throw new Error("Socket not connected");
848
+ let chunkToSend;
849
+ if (chunk instanceof Blob) {
850
+ chunkToSend = await chunk.arrayBuffer();
851
+ } else {
852
+ chunkToSend = chunk;
853
+ }
854
+ this.logger.debug("[Socket] Sending voice chunk", chunkToSend.byteLength);
855
+ this.socket.emit("voice:send_chunk", chunkToSend, metadata);
856
+ this.emit("chunk_sent", chunk);
857
+ }
858
+ sendVoiceFile(blob, metadata) {
859
+ this.logger.debug("Sending voice file", blob, metadata);
860
+ if (!this.socket || !this.isConnected)
861
+ throw new Error("Socket not connected");
862
+ this.socket.emit("voice:send_file", blob, metadata);
863
+ this.emit("file-sent", blob);
864
+ }
865
+ commitVoiceMessage() {
866
+ if (!this.socket || !this.isConnected)
867
+ throw new Error("Socket not connected");
868
+ this.socket.emit("voice:commit");
869
+ }
870
+ onVoiceChunkReceived(chunk) {
871
+ this.emit("chunk-received", chunk);
872
+ }
873
+ onVoiceFileReceived(blob) {
874
+ this.emit("file-received", blob);
875
+ }
876
+ onReceivedEndOfResponseStream() {
877
+ this.emit("received-end-of-response-stream");
878
+ }
879
+ };
880
+
881
+ // src/react/utility/audio/WebAudioOutputAudioController.ts
882
+ var STREAM_SAMPLE_RATE = 24e3;
883
+ var CHANNELS = 1;
884
+ var SLICE_DURATION_S = 0.25;
885
+ var FRAMES_PER_SLICE = Math.floor(STREAM_SAMPLE_RATE * SLICE_DURATION_S);
886
+ var BYTES_PER_SLICE = FRAMES_PER_SLICE * 2;
887
+ var SCHED_TOLERANCE = 0.05;
888
+ var WebAudioOutputAudioController = class extends OutputAudioController {
889
+ constructor() {
890
+ super("@m4trix/core > WebAudioOutputAudioController");
891
+ this.audioCtx = new AudioContext();
892
+ this.gain = this.audioCtx.createGain();
893
+ this.nextPlayTime = 0;
894
+ this.activeSources = /* @__PURE__ */ new Set();
895
+ this.userGestureHookAttached = false;
896
+ this.gain.connect(this.audioCtx.destination);
897
+ this.resetScheduler();
898
+ }
899
+ // ─────────────────────────────────────────────────────────────────────
900
+ // One‑shot playback
901
+ // ─────────────────────────────────────────────────────────────────────
902
+ async playAudio({
903
+ source,
904
+ onComplete
905
+ }) {
906
+ await this.stopPlayback();
907
+ const buf = await this.sourceToArrayBuffer(source);
908
+ const decoded = await this.decode(buf);
909
+ await this.ensureContextRunning();
910
+ const src = this.createSource(decoded, this.audioCtx.currentTime);
911
+ src.onended = () => {
912
+ this.activeSources.delete(src);
913
+ onComplete?.();
914
+ };
915
+ }
916
+ async playAudioStream() {
917
+ }
918
+ // ─────────────────────────────────────────────────────────────────────
919
+ // PCM streaming
920
+ // ─────────────────────────────────────────────────────────────────────
921
+ async initializeChunkStream({
922
+ onComplete
923
+ }) {
924
+ await this.stopPlayback();
925
+ await this.ensureContextRunning();
926
+ this.resetScheduler();
927
+ let streamEnded = false;
928
+ let pending = new Uint8Array(0);
929
+ const addChunkToStream = async (pkt) => {
930
+ if (streamEnded) {
931
+ this.logger.warn("Attempt to add chunk after stream ended \u2013 ignoring.");
932
+ return;
933
+ }
934
+ const bytes = new Uint8Array(
935
+ pkt instanceof Blob ? await pkt.arrayBuffer() : pkt
936
+ );
937
+ if (bytes.length === 0)
938
+ return;
939
+ const merged = new Uint8Array(pending.length + bytes.length);
940
+ merged.set(pending);
941
+ merged.set(bytes, pending.length);
942
+ pending = merged;
943
+ if (pending.length % 2 === 1)
944
+ return;
945
+ while (pending.length >= BYTES_PER_SLICE) {
946
+ const sliceBytes = pending.slice(0, BYTES_PER_SLICE);
947
+ pending = pending.slice(BYTES_PER_SLICE);
948
+ const aligned = sliceBytes.buffer.slice(
949
+ sliceBytes.byteOffset,
950
+ sliceBytes.byteOffset + sliceBytes.byteLength
951
+ );
952
+ const int16 = new Int16Array(aligned);
953
+ const buf = this.audioCtx.createBuffer(
954
+ CHANNELS,
955
+ int16.length,
956
+ STREAM_SAMPLE_RATE
957
+ );
958
+ const data = buf.getChannelData(0);
959
+ for (let i = 0; i < int16.length; i++)
960
+ data[i] = int16[i] / 32768;
961
+ this.scheduleBuffer(buf);
962
+ }
963
+ };
964
+ const endChunkStream = () => {
965
+ if (streamEnded)
966
+ return;
967
+ streamEnded = true;
968
+ if (onComplete) {
969
+ if (this.activeSources.size === 0)
970
+ onComplete();
971
+ else {
972
+ const last = Array.from(this.activeSources).pop();
973
+ if (last) {
974
+ const prev = last.onended;
975
+ last.onended = (e) => {
976
+ if (prev)
977
+ prev.call(last, e);
978
+ onComplete();
979
+ };
980
+ }
981
+ }
982
+ }
983
+ };
984
+ return { addChunkToStream, endChunkStream };
985
+ }
986
+ // ─────────────────────────────────────────────────────────────────────
987
+ // Buffer scheduling helpers
988
+ // ─────────────────────────────────────────────────────────────────────
989
+ scheduleBuffer(buf) {
990
+ if (this.nextPlayTime < this.audioCtx.currentTime + SCHED_TOLERANCE) {
991
+ this.nextPlayTime = this.audioCtx.currentTime + SCHED_TOLERANCE;
992
+ }
993
+ this.createSource(buf, this.nextPlayTime);
994
+ this.nextPlayTime += buf.duration;
995
+ }
996
+ createSource(buf, when) {
997
+ const src = this.audioCtx.createBufferSource();
998
+ src.buffer = buf;
999
+ src.connect(this.gain);
1000
+ src.start(when);
1001
+ this.activeSources.add(src);
1002
+ src.onended = () => {
1003
+ this.activeSources.delete(src);
1004
+ };
1005
+ return src;
1006
+ }
1007
+ resetScheduler() {
1008
+ this.nextPlayTime = this.audioCtx.currentTime;
1009
+ }
1010
+ // ─── External resource helpers ───────────────────────────────────────
1011
+ sourceToArrayBuffer(src) {
1012
+ return typeof src === "string" ? fetch(src).then((r) => {
1013
+ if (!r.ok)
1014
+ throw new Error(`${r.status}`);
1015
+ return r.arrayBuffer();
1016
+ }) : src.arrayBuffer();
1017
+ }
1018
+ decode(buf) {
1019
+ return new Promise(
1020
+ (res, rej) => this.audioCtx.decodeAudioData(buf, res, rej)
1021
+ );
1022
+ }
1023
+ // ─── Lifecycle methods ───────────────────────────────────────────────
1024
+ async stopPlayback() {
1025
+ for (const src of this.activeSources) {
1026
+ try {
1027
+ src.stop();
1028
+ } catch {
1029
+ }
1030
+ src.disconnect();
1031
+ }
1032
+ this.activeSources.clear();
1033
+ this.resetScheduler();
1034
+ }
1035
+ cleanup() {
1036
+ this.stopPlayback();
1037
+ if (this.audioCtx.state !== "closed")
1038
+ this.audioCtx.close();
1039
+ }
1040
+ // ─── Autoplay‑policy helper ──────────────────────────────────────────
1041
+ async ensureContextRunning() {
1042
+ if (this.audioCtx.state !== "suspended")
1043
+ return;
1044
+ try {
1045
+ await this.audioCtx.resume();
1046
+ } catch {
1047
+ }
1048
+ if (this.audioCtx.state === "running")
1049
+ return;
1050
+ if (!this.userGestureHookAttached) {
1051
+ this.userGestureHookAttached = true;
1052
+ const resume = async () => {
1053
+ try {
1054
+ await this.audioCtx.resume();
1055
+ } catch {
1056
+ }
1057
+ if (this.audioCtx.state === "running")
1058
+ document.removeEventListener("click", resume);
1059
+ };
1060
+ document.addEventListener("click", resume);
1061
+ }
1062
+ }
1063
+ };
1064
+
1065
+ // src/react/hooks/use-conversation/useSocketConversation.ts
1066
+ Logger.enableGlobalLogging();
1067
+ function useSocketConversation({
1068
+ scope,
1069
+ onStartRecording,
1070
+ onStopRecording,
1071
+ onReceive,
1072
+ upstreamMode = "STREAM_WHILE_TALK",
1073
+ onError,
1074
+ audioConfig = {},
1075
+ socketConfig = {}
1076
+ }) {
1077
+ const { current: logger } = react.useRef(
1078
+ new Logger("SuTr > useSocketConversation")
1079
+ );
1080
+ const inputAudioControllerRef = react.useRef(void 0);
1081
+ const outputAudioControllerRef = react.useRef(
1082
+ void 0
1083
+ );
1084
+ const socketAdapterRef = react.useRef(void 0);
1085
+ const [socket, setSocket] = react.useState(null);
1086
+ const [voiceAgentState, setVoiceAgentState] = react.useState("READY");
1087
+ const [error, setError] = react.useState(null);
1088
+ const shouldStreamWhileTalk = upstreamMode === "STREAM_WHILE_TALK";
1089
+ const handleError = react.useCallback(
1090
+ (state, err) => {
1091
+ setError(err);
1092
+ logger.error(`Error during ${state}:`, err);
1093
+ onError?.(state, err);
1094
+ },
1095
+ [onError]
1096
+ );
1097
+ const subscribeToSocketEventsForChunkDownstreaming = react.useCallback(
1098
+ async (socketAdapter) => {
1099
+ logger.debug("Setting up audio stream for receiving chunks");
1100
+ try {
1101
+ const { addChunkToStream, endChunkStream } = await outputAudioControllerRef.current.initializeChunkStream({
1102
+ mimeCodec: "audio/mpeg",
1103
+ onComplete: () => {
1104
+ logger.debug("Audio stream playback completed");
1105
+ setVoiceAgentState("READY");
1106
+ }
1107
+ });
1108
+ let chunkCount = 0;
1109
+ const chunkReceivedEmitter = async (chunk) => {
1110
+ if (chunk instanceof ArrayBuffer) {
1111
+ chunkCount++;
1112
+ logger.debug(
1113
+ `Received voice chunk #${chunkCount} from socket, size: ${chunk.byteLength} bytes`
1114
+ );
1115
+ if (!chunk || chunk.byteLength === 0) {
1116
+ logger.warn("Received empty chunk, skipping");
1117
+ return;
1118
+ }
1119
+ try {
1120
+ await addChunkToStream(chunk);
1121
+ logger.debug(
1122
+ `Successfully added chunk #${chunkCount} to audio stream`
1123
+ );
1124
+ } catch (err) {
1125
+ logger.error(
1126
+ `Failed to add chunk #${chunkCount} to audio stream`,
1127
+ err
1128
+ );
1129
+ if (err instanceof Error) {
1130
+ handleError("DOWNSTREAMING", err);
1131
+ }
1132
+ }
1133
+ }
1134
+ };
1135
+ socketAdapter.on("chunk-received", chunkReceivedEmitter);
1136
+ const endOfStreamEmitter = () => {
1137
+ logger.debug(
1138
+ `Received end of stream signal after ${chunkCount} chunks, ending chunk stream`
1139
+ );
1140
+ endChunkStream();
1141
+ setVoiceAgentState("READY");
1142
+ };
1143
+ socketAdapter.on("received-end-of-response-stream", endOfStreamEmitter);
1144
+ return () => {
1145
+ logger.debug("Cleaning up socket event listeners");
1146
+ socketAdapter.off("chunk-received", chunkReceivedEmitter);
1147
+ socketAdapter.off(
1148
+ "received-end-of-response-stream",
1149
+ endOfStreamEmitter
1150
+ );
1151
+ endChunkStream();
1152
+ };
1153
+ } catch (err) {
1154
+ if (err instanceof Error) {
1155
+ handleError("DOWNSTREAMING", err);
1156
+ }
1157
+ return () => {
1158
+ };
1159
+ }
1160
+ },
1161
+ [handleError]
1162
+ );
1163
+ const hookupSocketAdapter = react.useCallback(
1164
+ async (socketAdapter) => {
1165
+ logger.debug("Connecting to socket...");
1166
+ try {
1167
+ await socketAdapter.connect();
1168
+ socketAdapter.on("connect", () => {
1169
+ logger.debug("Socket adapter connected");
1170
+ setVoiceAgentState("READY");
1171
+ });
1172
+ socketAdapter.on("disconnect", () => {
1173
+ logger.debug("Socket adapter disconnected");
1174
+ });
1175
+ socketAdapter.on("error", (err) => {
1176
+ if (err instanceof Error) {
1177
+ handleError(voiceAgentState, err);
1178
+ } else {
1179
+ handleError(voiceAgentState, new Error("Unknown error"));
1180
+ }
1181
+ });
1182
+ setSocket(socketAdapter.exposeSocket());
1183
+ } catch (err) {
1184
+ if (err instanceof Error) {
1185
+ handleError("READY", err);
1186
+ }
1187
+ }
1188
+ },
1189
+ [handleError, voiceAgentState]
1190
+ );
1191
+ const startRecording = react.useCallback(() => {
1192
+ if (inputAudioControllerRef.current) {
1193
+ try {
1194
+ logger.debug("Starting recording");
1195
+ setVoiceAgentState("RECORDING");
1196
+ inputAudioControllerRef.current.startRecording({
1197
+ onRecordedChunk: async (chunk) => {
1198
+ if (shouldStreamWhileTalk) {
1199
+ try {
1200
+ await socketAdapterRef.current?.sendVoiceChunk(chunk);
1201
+ } catch (err) {
1202
+ if (err instanceof Error) {
1203
+ handleError("RECORDING", err);
1204
+ }
1205
+ }
1206
+ }
1207
+ }
1208
+ });
1209
+ onStartRecording?.();
1210
+ } catch (err) {
1211
+ if (err instanceof Error) {
1212
+ handleError("RECORDING", err);
1213
+ }
1214
+ }
1215
+ }
1216
+ }, [onStartRecording, shouldStreamWhileTalk, handleError]);
1217
+ const stopRecording = react.useCallback(async () => {
1218
+ if (inputAudioControllerRef.current) {
1219
+ try {
1220
+ logger.debug("Stopping recording");
1221
+ await inputAudioControllerRef.current.stopRecording({
1222
+ onRecordingCompleted: async (allData) => {
1223
+ setVoiceAgentState("PROCESSING");
1224
+ try {
1225
+ if (shouldStreamWhileTalk) {
1226
+ logger.debug("Committing voice message");
1227
+ await socketAdapterRef.current?.commitVoiceMessage();
1228
+ } else {
1229
+ await socketAdapterRef.current?.sendVoiceFile(allData);
1230
+ }
1231
+ setVoiceAgentState("DOWNSTREAMING");
1232
+ await subscribeToSocketEventsForChunkDownstreaming(
1233
+ socketAdapterRef.current
1234
+ );
1235
+ onReceive?.(
1236
+ allData,
1237
+ async () => {
1238
+ if (outputAudioControllerRef.current) {
1239
+ return outputAudioControllerRef.current.stopPlayback();
1240
+ }
1241
+ },
1242
+ async () => {
1243
+ if (outputAudioControllerRef.current) {
1244
+ return outputAudioControllerRef.current.stopPlayback();
1245
+ }
1246
+ }
1247
+ );
1248
+ } catch (err) {
1249
+ if (err instanceof Error) {
1250
+ handleError("PROCESSING", err);
1251
+ }
1252
+ }
1253
+ }
1254
+ });
1255
+ onStopRecording?.();
1256
+ } catch (err) {
1257
+ if (err instanceof Error) {
1258
+ handleError("RECORDING", err);
1259
+ }
1260
+ }
1261
+ }
1262
+ }, [
1263
+ onStopRecording,
1264
+ handleError,
1265
+ subscribeToSocketEventsForChunkDownstreaming,
1266
+ onReceive
1267
+ ]);
1268
+ react.useEffect(() => {
1269
+ if (socketAdapterRef.current) {
1270
+ return;
1271
+ }
1272
+ try {
1273
+ const socketAdapter = socketConfig.socketAdapter ? socketConfig.socketAdapter : new VoiceSocketIOAdapter({
1274
+ scope,
1275
+ baseUrl: socketConfig.baseUrl || "",
1276
+ headers: socketConfig.headers
1277
+ });
1278
+ socketAdapterRef.current = socketAdapter;
1279
+ if (!socketAdapter.isConnected()) {
1280
+ hookupSocketAdapter(socketAdapter);
1281
+ }
1282
+ if (!inputAudioControllerRef.current) {
1283
+ inputAudioControllerRef.current = new WebAudioInputAudioController(
1284
+ audioConfig
1285
+ );
1286
+ }
1287
+ if (!outputAudioControllerRef.current) {
1288
+ outputAudioControllerRef.current = new WebAudioOutputAudioController();
1289
+ }
1290
+ } catch (err) {
1291
+ if (err instanceof Error) {
1292
+ handleError("READY", err);
1293
+ }
1294
+ }
1295
+ }, [scope, socketConfig, hookupSocketAdapter, audioConfig, handleError]);
1296
+ react.useEffect(() => {
1297
+ return () => {
1298
+ inputAudioControllerRef.current?.cleanup();
1299
+ outputAudioControllerRef.current?.cleanup();
1300
+ if (socketAdapterRef.current) {
1301
+ socketAdapterRef.current.disconnect();
1302
+ socketAdapterRef.current = void 0;
1303
+ }
1304
+ };
1305
+ }, []);
1306
+ return {
1307
+ startRecording,
1308
+ stopRecording,
1309
+ voiceAgentState,
1310
+ error,
1311
+ audioContext: inputAudioControllerRef.current?.audioContext || null,
1312
+ socket
1313
+ };
1314
+ }
1315
+
1316
+ exports.BaseVoiceEndpointAdapter = BaseVoiceEndpointAdapter;
1317
+ exports.Emitter = Emitter;
1318
+ exports.InputAudioController = InputAudioController;
1319
+ exports.VoiceEndpointAdapter = VoiceEndpointAdapter;
1320
+ exports.VoiceSocketAdapter = VoiceSocketAdapter;
1321
+ exports.useConversation = useConversation;
1322
+ exports.useSocketConversation = useSocketConversation;
1323
+ //# sourceMappingURL=out.js.map
1324
+ //# sourceMappingURL=index.cjs.map