@pompeii-labs/audio 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/voice.d.mts +9 -11
- package/dist/voice.d.ts +9 -11
- package/dist/voice.js +30 -27
- package/dist/voice.mjs +30 -27
- package/package.json +1 -1
package/dist/voice.d.mts
CHANGED
|
@@ -15,9 +15,9 @@ declare abstract class MagmaFlowSpeechToText {
|
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
declare abstract class MagmaFlowTextToSpeech {
|
|
18
|
-
abstract input(text: string): void;
|
|
18
|
+
abstract input(text: string, requestId: string): void;
|
|
19
19
|
abstract kill(): void;
|
|
20
|
-
onOutput(audio: Buffer | null): void;
|
|
20
|
+
onOutput(audio: Buffer | null, requestId: string): void;
|
|
21
21
|
constructor();
|
|
22
22
|
}
|
|
23
23
|
|
|
@@ -44,13 +44,15 @@ declare class MagmaFlow {
|
|
|
44
44
|
private textBuffer;
|
|
45
45
|
private textQueue;
|
|
46
46
|
private generatingAudio;
|
|
47
|
+
private currentRequestId;
|
|
47
48
|
private audioBuffer;
|
|
48
49
|
private config;
|
|
49
50
|
constructor(args: MagmaFlowArgs);
|
|
50
51
|
inputAudio(audio: Buffer): void;
|
|
51
|
-
inputText(text: string): void;
|
|
52
|
+
inputText(text: string | null): void;
|
|
52
53
|
private generateAudio;
|
|
53
54
|
private sendAudio;
|
|
55
|
+
interruptTTS(): void;
|
|
54
56
|
kill(): void;
|
|
55
57
|
}
|
|
56
58
|
|
|
@@ -88,8 +90,7 @@ declare class DeepgramTTS extends MagmaFlowTextToSpeech {
|
|
|
88
90
|
private client;
|
|
89
91
|
constructor(args: DeepgramTTSArgs);
|
|
90
92
|
setup(): Promise<void>;
|
|
91
|
-
input(text: string | null): void;
|
|
92
|
-
private output;
|
|
93
|
+
input(text: string | null, requestId: string): void;
|
|
93
94
|
kill(): void;
|
|
94
95
|
reset(): void;
|
|
95
96
|
}
|
|
@@ -115,8 +116,7 @@ declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
|
|
|
115
116
|
private config;
|
|
116
117
|
constructor(args: ElevenLabsTTSArgs);
|
|
117
118
|
setup(): Promise<void>;
|
|
118
|
-
input(text: string | null): void;
|
|
119
|
-
private output;
|
|
119
|
+
input(text: string | null, requestId: string): void;
|
|
120
120
|
kill(): void;
|
|
121
121
|
reset(): void;
|
|
122
122
|
}
|
|
@@ -128,8 +128,7 @@ declare class HumeTTS extends MagmaFlowTextToSpeech {
|
|
|
128
128
|
private client;
|
|
129
129
|
constructor(args: HumeTTSArgs);
|
|
130
130
|
setup(): Promise<void>;
|
|
131
|
-
input(text: string | null): void;
|
|
132
|
-
private output;
|
|
131
|
+
input(text: string | null, requestId: string): void;
|
|
133
132
|
kill(): void;
|
|
134
133
|
reset(): void;
|
|
135
134
|
}
|
|
@@ -141,8 +140,7 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
|
|
|
141
140
|
private client;
|
|
142
141
|
constructor(args: WhisperTTSArgs);
|
|
143
142
|
setup(): Promise<void>;
|
|
144
|
-
input(text: string | null): void;
|
|
145
|
-
private output;
|
|
143
|
+
input(text: string | null, requestId: string): void;
|
|
146
144
|
kill(): void;
|
|
147
145
|
reset(): void;
|
|
148
146
|
}
|
package/dist/voice.d.ts
CHANGED
|
@@ -15,9 +15,9 @@ declare abstract class MagmaFlowSpeechToText {
|
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
declare abstract class MagmaFlowTextToSpeech {
|
|
18
|
-
abstract input(text: string): void;
|
|
18
|
+
abstract input(text: string, requestId: string): void;
|
|
19
19
|
abstract kill(): void;
|
|
20
|
-
onOutput(audio: Buffer | null): void;
|
|
20
|
+
onOutput(audio: Buffer | null, requestId: string): void;
|
|
21
21
|
constructor();
|
|
22
22
|
}
|
|
23
23
|
|
|
@@ -44,13 +44,15 @@ declare class MagmaFlow {
|
|
|
44
44
|
private textBuffer;
|
|
45
45
|
private textQueue;
|
|
46
46
|
private generatingAudio;
|
|
47
|
+
private currentRequestId;
|
|
47
48
|
private audioBuffer;
|
|
48
49
|
private config;
|
|
49
50
|
constructor(args: MagmaFlowArgs);
|
|
50
51
|
inputAudio(audio: Buffer): void;
|
|
51
|
-
inputText(text: string): void;
|
|
52
|
+
inputText(text: string | null): void;
|
|
52
53
|
private generateAudio;
|
|
53
54
|
private sendAudio;
|
|
55
|
+
interruptTTS(): void;
|
|
54
56
|
kill(): void;
|
|
55
57
|
}
|
|
56
58
|
|
|
@@ -88,8 +90,7 @@ declare class DeepgramTTS extends MagmaFlowTextToSpeech {
|
|
|
88
90
|
private client;
|
|
89
91
|
constructor(args: DeepgramTTSArgs);
|
|
90
92
|
setup(): Promise<void>;
|
|
91
|
-
input(text: string | null): void;
|
|
92
|
-
private output;
|
|
93
|
+
input(text: string | null, requestId: string): void;
|
|
93
94
|
kill(): void;
|
|
94
95
|
reset(): void;
|
|
95
96
|
}
|
|
@@ -115,8 +116,7 @@ declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
|
|
|
115
116
|
private config;
|
|
116
117
|
constructor(args: ElevenLabsTTSArgs);
|
|
117
118
|
setup(): Promise<void>;
|
|
118
|
-
input(text: string | null): void;
|
|
119
|
-
private output;
|
|
119
|
+
input(text: string | null, requestId: string): void;
|
|
120
120
|
kill(): void;
|
|
121
121
|
reset(): void;
|
|
122
122
|
}
|
|
@@ -128,8 +128,7 @@ declare class HumeTTS extends MagmaFlowTextToSpeech {
|
|
|
128
128
|
private client;
|
|
129
129
|
constructor(args: HumeTTSArgs);
|
|
130
130
|
setup(): Promise<void>;
|
|
131
|
-
input(text: string | null): void;
|
|
132
|
-
private output;
|
|
131
|
+
input(text: string | null, requestId: string): void;
|
|
133
132
|
kill(): void;
|
|
134
133
|
reset(): void;
|
|
135
134
|
}
|
|
@@ -141,8 +140,7 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
|
|
|
141
140
|
private client;
|
|
142
141
|
constructor(args: WhisperTTSArgs);
|
|
143
142
|
setup(): Promise<void>;
|
|
144
|
-
input(text: string | null): void;
|
|
145
|
-
private output;
|
|
143
|
+
input(text: string | null, requestId: string): void;
|
|
146
144
|
kill(): void;
|
|
147
145
|
reset(): void;
|
|
148
146
|
}
|
package/dist/voice.js
CHANGED
|
@@ -391,6 +391,7 @@ var MagmaFlow = class {
|
|
|
391
391
|
this.textBuffer = "";
|
|
392
392
|
this.textQueue = [];
|
|
393
393
|
this.generatingAudio = false;
|
|
394
|
+
this.currentRequestId = null;
|
|
394
395
|
this.audioBuffer = [];
|
|
395
396
|
this.config = {
|
|
396
397
|
pauseDurationMs: 500,
|
|
@@ -402,7 +403,11 @@ var MagmaFlow = class {
|
|
|
402
403
|
this.outputFormat = args.outputFormat;
|
|
403
404
|
this.onAudioOutput = args.onAudioOutput;
|
|
404
405
|
this.config = { ...this.config, ...args.config };
|
|
405
|
-
this.tts.onOutput = (audio) => {
|
|
406
|
+
this.tts.onOutput = (audio, requestId) => {
|
|
407
|
+
if (this.currentRequestId !== requestId) {
|
|
408
|
+
console.log("[MagmaFlow] Skipping output for cancelled request");
|
|
409
|
+
return;
|
|
410
|
+
}
|
|
406
411
|
if (!audio) {
|
|
407
412
|
const lastChunk = this.audioBuffer[this.audioBuffer.length - 1];
|
|
408
413
|
if (lastChunk) {
|
|
@@ -453,7 +458,10 @@ var MagmaFlow = class {
|
|
|
453
458
|
const chunk = this.textQueue.shift();
|
|
454
459
|
if (!chunk) return;
|
|
455
460
|
this.generatingAudio = true;
|
|
456
|
-
this.
|
|
461
|
+
if (!this.currentRequestId) {
|
|
462
|
+
this.currentRequestId = crypto.randomUUID();
|
|
463
|
+
}
|
|
464
|
+
this.tts.input(chunk, this.currentRequestId);
|
|
457
465
|
}
|
|
458
466
|
sendAudio() {
|
|
459
467
|
if (this.audioBuffer.length === 0) return;
|
|
@@ -471,6 +479,13 @@ var MagmaFlow = class {
|
|
|
471
479
|
console.error("Audio output callback error:", error);
|
|
472
480
|
}
|
|
473
481
|
}
|
|
482
|
+
interruptTTS() {
|
|
483
|
+
this.textQueue = [];
|
|
484
|
+
this.textBuffer = "";
|
|
485
|
+
this.audioBuffer = [];
|
|
486
|
+
this.generatingAudio = false;
|
|
487
|
+
this.currentRequestId = null;
|
|
488
|
+
}
|
|
474
489
|
kill() {
|
|
475
490
|
this.stt.kill();
|
|
476
491
|
this.tts.kill();
|
|
@@ -587,7 +602,7 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
|
|
|
587
602
|
|
|
588
603
|
// src/voice/textToSpeech/base.ts
|
|
589
604
|
var MagmaFlowTextToSpeech = class {
|
|
590
|
-
onOutput(audio) {
|
|
605
|
+
onOutput(audio, requestId) {
|
|
591
606
|
console.log("[Default TTS] Output:", audio);
|
|
592
607
|
}
|
|
593
608
|
constructor() {
|
|
@@ -600,7 +615,7 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
|
|
|
600
615
|
}
|
|
601
616
|
async setup() {
|
|
602
617
|
}
|
|
603
|
-
input(text) {
|
|
618
|
+
input(text, requestId) {
|
|
604
619
|
if (!text) {
|
|
605
620
|
return;
|
|
606
621
|
}
|
|
@@ -620,15 +635,12 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
|
|
|
620
635
|
return;
|
|
621
636
|
}
|
|
622
637
|
for await (const chunk of stream) {
|
|
623
|
-
this.
|
|
638
|
+
this.onOutput(Buffer.from(chunk), requestId);
|
|
624
639
|
}
|
|
625
|
-
this.
|
|
640
|
+
this.onOutput(null, requestId);
|
|
626
641
|
console.log("[Deepgram] Finished:", text);
|
|
627
642
|
});
|
|
628
643
|
}
|
|
629
|
-
output(audio) {
|
|
630
|
-
this.onOutput(audio);
|
|
631
|
-
}
|
|
632
644
|
kill() {
|
|
633
645
|
}
|
|
634
646
|
reset() {
|
|
@@ -654,7 +666,7 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
|
|
|
654
666
|
}
|
|
655
667
|
async setup() {
|
|
656
668
|
}
|
|
657
|
-
input(text) {
|
|
669
|
+
input(text, requestId) {
|
|
658
670
|
if (!text) {
|
|
659
671
|
return;
|
|
660
672
|
}
|
|
@@ -665,15 +677,12 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
|
|
|
665
677
|
...this.config
|
|
666
678
|
}).then(async (stream) => {
|
|
667
679
|
for await (const chunk of stream) {
|
|
668
|
-
this.
|
|
680
|
+
this.onOutput(chunk, requestId);
|
|
669
681
|
}
|
|
670
|
-
this.
|
|
682
|
+
this.onOutput(null, requestId);
|
|
671
683
|
console.log("[ElevenLabs] Finished:", text);
|
|
672
684
|
});
|
|
673
685
|
}
|
|
674
|
-
output(audio) {
|
|
675
|
-
this.onOutput(audio);
|
|
676
|
-
}
|
|
677
686
|
kill() {
|
|
678
687
|
}
|
|
679
688
|
reset() {
|
|
@@ -686,7 +695,7 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
|
|
|
686
695
|
}
|
|
687
696
|
async setup() {
|
|
688
697
|
}
|
|
689
|
-
input(text) {
|
|
698
|
+
input(text, requestId) {
|
|
690
699
|
if (!text) {
|
|
691
700
|
return;
|
|
692
701
|
}
|
|
@@ -702,15 +711,12 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
|
|
|
702
711
|
instantMode: true
|
|
703
712
|
}).then(async (stream) => {
|
|
704
713
|
for await (const chunk of stream) {
|
|
705
|
-
this.
|
|
714
|
+
this.onOutput(Buffer.from(chunk.audio, "base64"), requestId);
|
|
706
715
|
}
|
|
707
|
-
this.
|
|
716
|
+
this.onOutput(null, requestId);
|
|
708
717
|
console.log("[Hume] Finished:", text);
|
|
709
718
|
});
|
|
710
719
|
}
|
|
711
|
-
output(audio) {
|
|
712
|
-
this.onOutput(audio);
|
|
713
|
-
}
|
|
714
720
|
kill() {
|
|
715
721
|
}
|
|
716
722
|
reset() {
|
|
@@ -723,7 +729,7 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
|
|
|
723
729
|
}
|
|
724
730
|
async setup() {
|
|
725
731
|
}
|
|
726
|
-
input(text) {
|
|
732
|
+
input(text, requestId) {
|
|
727
733
|
if (!text) {
|
|
728
734
|
return;
|
|
729
735
|
}
|
|
@@ -739,14 +745,11 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
|
|
|
739
745
|
24e3,
|
|
740
746
|
48e3
|
|
741
747
|
);
|
|
742
|
-
this.
|
|
743
|
-
this.
|
|
748
|
+
this.onOutput(int16ArrayToBuffer(resampledPCM), requestId);
|
|
749
|
+
this.onOutput(null, requestId);
|
|
744
750
|
console.log("[Whisper] Finished:", text);
|
|
745
751
|
});
|
|
746
752
|
}
|
|
747
|
-
output(audio) {
|
|
748
|
-
this.onOutput(audio);
|
|
749
|
-
}
|
|
750
753
|
kill() {
|
|
751
754
|
}
|
|
752
755
|
reset() {
|
package/dist/voice.mjs
CHANGED
|
@@ -385,6 +385,7 @@ var MagmaFlow = class {
|
|
|
385
385
|
this.textBuffer = "";
|
|
386
386
|
this.textQueue = [];
|
|
387
387
|
this.generatingAudio = false;
|
|
388
|
+
this.currentRequestId = null;
|
|
388
389
|
this.audioBuffer = [];
|
|
389
390
|
this.config = {
|
|
390
391
|
pauseDurationMs: 500,
|
|
@@ -396,7 +397,11 @@ var MagmaFlow = class {
|
|
|
396
397
|
this.outputFormat = args.outputFormat;
|
|
397
398
|
this.onAudioOutput = args.onAudioOutput;
|
|
398
399
|
this.config = { ...this.config, ...args.config };
|
|
399
|
-
this.tts.onOutput = (audio) => {
|
|
400
|
+
this.tts.onOutput = (audio, requestId) => {
|
|
401
|
+
if (this.currentRequestId !== requestId) {
|
|
402
|
+
console.log("[MagmaFlow] Skipping output for cancelled request");
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
400
405
|
if (!audio) {
|
|
401
406
|
const lastChunk = this.audioBuffer[this.audioBuffer.length - 1];
|
|
402
407
|
if (lastChunk) {
|
|
@@ -447,7 +452,10 @@ var MagmaFlow = class {
|
|
|
447
452
|
const chunk = this.textQueue.shift();
|
|
448
453
|
if (!chunk) return;
|
|
449
454
|
this.generatingAudio = true;
|
|
450
|
-
this.
|
|
455
|
+
if (!this.currentRequestId) {
|
|
456
|
+
this.currentRequestId = crypto.randomUUID();
|
|
457
|
+
}
|
|
458
|
+
this.tts.input(chunk, this.currentRequestId);
|
|
451
459
|
}
|
|
452
460
|
sendAudio() {
|
|
453
461
|
if (this.audioBuffer.length === 0) return;
|
|
@@ -465,6 +473,13 @@ var MagmaFlow = class {
|
|
|
465
473
|
console.error("Audio output callback error:", error);
|
|
466
474
|
}
|
|
467
475
|
}
|
|
476
|
+
interruptTTS() {
|
|
477
|
+
this.textQueue = [];
|
|
478
|
+
this.textBuffer = "";
|
|
479
|
+
this.audioBuffer = [];
|
|
480
|
+
this.generatingAudio = false;
|
|
481
|
+
this.currentRequestId = null;
|
|
482
|
+
}
|
|
468
483
|
kill() {
|
|
469
484
|
this.stt.kill();
|
|
470
485
|
this.tts.kill();
|
|
@@ -581,7 +596,7 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
|
|
|
581
596
|
|
|
582
597
|
// src/voice/textToSpeech/base.ts
|
|
583
598
|
var MagmaFlowTextToSpeech = class {
|
|
584
|
-
onOutput(audio) {
|
|
599
|
+
onOutput(audio, requestId) {
|
|
585
600
|
console.log("[Default TTS] Output:", audio);
|
|
586
601
|
}
|
|
587
602
|
constructor() {
|
|
@@ -594,7 +609,7 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
|
|
|
594
609
|
}
|
|
595
610
|
async setup() {
|
|
596
611
|
}
|
|
597
|
-
input(text) {
|
|
612
|
+
input(text, requestId) {
|
|
598
613
|
if (!text) {
|
|
599
614
|
return;
|
|
600
615
|
}
|
|
@@ -614,15 +629,12 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
|
|
|
614
629
|
return;
|
|
615
630
|
}
|
|
616
631
|
for await (const chunk of stream) {
|
|
617
|
-
this.
|
|
632
|
+
this.onOutput(Buffer.from(chunk), requestId);
|
|
618
633
|
}
|
|
619
|
-
this.
|
|
634
|
+
this.onOutput(null, requestId);
|
|
620
635
|
console.log("[Deepgram] Finished:", text);
|
|
621
636
|
});
|
|
622
637
|
}
|
|
623
|
-
output(audio) {
|
|
624
|
-
this.onOutput(audio);
|
|
625
|
-
}
|
|
626
638
|
kill() {
|
|
627
639
|
}
|
|
628
640
|
reset() {
|
|
@@ -648,7 +660,7 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
|
|
|
648
660
|
}
|
|
649
661
|
async setup() {
|
|
650
662
|
}
|
|
651
|
-
input(text) {
|
|
663
|
+
input(text, requestId) {
|
|
652
664
|
if (!text) {
|
|
653
665
|
return;
|
|
654
666
|
}
|
|
@@ -659,15 +671,12 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
|
|
|
659
671
|
...this.config
|
|
660
672
|
}).then(async (stream) => {
|
|
661
673
|
for await (const chunk of stream) {
|
|
662
|
-
this.
|
|
674
|
+
this.onOutput(chunk, requestId);
|
|
663
675
|
}
|
|
664
|
-
this.
|
|
676
|
+
this.onOutput(null, requestId);
|
|
665
677
|
console.log("[ElevenLabs] Finished:", text);
|
|
666
678
|
});
|
|
667
679
|
}
|
|
668
|
-
output(audio) {
|
|
669
|
-
this.onOutput(audio);
|
|
670
|
-
}
|
|
671
680
|
kill() {
|
|
672
681
|
}
|
|
673
682
|
reset() {
|
|
@@ -680,7 +689,7 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
|
|
|
680
689
|
}
|
|
681
690
|
async setup() {
|
|
682
691
|
}
|
|
683
|
-
input(text) {
|
|
692
|
+
input(text, requestId) {
|
|
684
693
|
if (!text) {
|
|
685
694
|
return;
|
|
686
695
|
}
|
|
@@ -696,15 +705,12 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
|
|
|
696
705
|
instantMode: true
|
|
697
706
|
}).then(async (stream) => {
|
|
698
707
|
for await (const chunk of stream) {
|
|
699
|
-
this.
|
|
708
|
+
this.onOutput(Buffer.from(chunk.audio, "base64"), requestId);
|
|
700
709
|
}
|
|
701
|
-
this.
|
|
710
|
+
this.onOutput(null, requestId);
|
|
702
711
|
console.log("[Hume] Finished:", text);
|
|
703
712
|
});
|
|
704
713
|
}
|
|
705
|
-
output(audio) {
|
|
706
|
-
this.onOutput(audio);
|
|
707
|
-
}
|
|
708
714
|
kill() {
|
|
709
715
|
}
|
|
710
716
|
reset() {
|
|
@@ -717,7 +723,7 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
|
|
|
717
723
|
}
|
|
718
724
|
async setup() {
|
|
719
725
|
}
|
|
720
|
-
input(text) {
|
|
726
|
+
input(text, requestId) {
|
|
721
727
|
if (!text) {
|
|
722
728
|
return;
|
|
723
729
|
}
|
|
@@ -733,14 +739,11 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
|
|
|
733
739
|
24e3,
|
|
734
740
|
48e3
|
|
735
741
|
);
|
|
736
|
-
this.
|
|
737
|
-
this.
|
|
742
|
+
this.onOutput(int16ArrayToBuffer(resampledPCM), requestId);
|
|
743
|
+
this.onOutput(null, requestId);
|
|
738
744
|
console.log("[Whisper] Finished:", text);
|
|
739
745
|
});
|
|
740
746
|
}
|
|
741
|
-
output(audio) {
|
|
742
|
-
this.onOutput(audio);
|
|
743
|
-
}
|
|
744
747
|
kill() {
|
|
745
748
|
}
|
|
746
749
|
reset() {
|