@pompeii-labs/audio 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/voice.d.mts CHANGED
@@ -15,9 +15,9 @@ declare abstract class MagmaFlowSpeechToText {
15
15
  }
16
16
 
17
17
  declare abstract class MagmaFlowTextToSpeech {
18
- abstract input(text: string): void;
18
+ abstract input(text: string, requestId: string): void;
19
19
  abstract kill(): void;
20
- onOutput(audio: Buffer | null): void;
20
+ onOutput(audio: Buffer | null, requestId: string): void;
21
21
  constructor();
22
22
  }
23
23
 
@@ -44,13 +44,15 @@ declare class MagmaFlow {
44
44
  private textBuffer;
45
45
  private textQueue;
46
46
  private generatingAudio;
47
+ private currentRequestId;
47
48
  private audioBuffer;
48
49
  private config;
49
50
  constructor(args: MagmaFlowArgs);
50
51
  inputAudio(audio: Buffer): void;
51
- inputText(text: string): void;
52
+ inputText(text: string | null): void;
52
53
  private generateAudio;
53
54
  private sendAudio;
55
+ interruptTTS(): void;
54
56
  kill(): void;
55
57
  }
56
58
 
@@ -88,8 +90,7 @@ declare class DeepgramTTS extends MagmaFlowTextToSpeech {
88
90
  private client;
89
91
  constructor(args: DeepgramTTSArgs);
90
92
  setup(): Promise<void>;
91
- input(text: string | null): void;
92
- private output;
93
+ input(text: string | null, requestId: string): void;
93
94
  kill(): void;
94
95
  reset(): void;
95
96
  }
@@ -115,8 +116,7 @@ declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
115
116
  private config;
116
117
  constructor(args: ElevenLabsTTSArgs);
117
118
  setup(): Promise<void>;
118
- input(text: string | null): void;
119
- private output;
119
+ input(text: string | null, requestId: string): void;
120
120
  kill(): void;
121
121
  reset(): void;
122
122
  }
@@ -128,8 +128,7 @@ declare class HumeTTS extends MagmaFlowTextToSpeech {
128
128
  private client;
129
129
  constructor(args: HumeTTSArgs);
130
130
  setup(): Promise<void>;
131
- input(text: string | null): void;
132
- private output;
131
+ input(text: string | null, requestId: string): void;
133
132
  kill(): void;
134
133
  reset(): void;
135
134
  }
@@ -141,8 +140,7 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
141
140
  private client;
142
141
  constructor(args: WhisperTTSArgs);
143
142
  setup(): Promise<void>;
144
- input(text: string | null): void;
145
- private output;
143
+ input(text: string | null, requestId: string): void;
146
144
  kill(): void;
147
145
  reset(): void;
148
146
  }
package/dist/voice.d.ts CHANGED
@@ -15,9 +15,9 @@ declare abstract class MagmaFlowSpeechToText {
15
15
  }
16
16
 
17
17
  declare abstract class MagmaFlowTextToSpeech {
18
- abstract input(text: string): void;
18
+ abstract input(text: string, requestId: string): void;
19
19
  abstract kill(): void;
20
- onOutput(audio: Buffer | null): void;
20
+ onOutput(audio: Buffer | null, requestId: string): void;
21
21
  constructor();
22
22
  }
23
23
 
@@ -44,13 +44,15 @@ declare class MagmaFlow {
44
44
  private textBuffer;
45
45
  private textQueue;
46
46
  private generatingAudio;
47
+ private currentRequestId;
47
48
  private audioBuffer;
48
49
  private config;
49
50
  constructor(args: MagmaFlowArgs);
50
51
  inputAudio(audio: Buffer): void;
51
- inputText(text: string): void;
52
+ inputText(text: string | null): void;
52
53
  private generateAudio;
53
54
  private sendAudio;
55
+ interruptTTS(): void;
54
56
  kill(): void;
55
57
  }
56
58
 
@@ -88,8 +90,7 @@ declare class DeepgramTTS extends MagmaFlowTextToSpeech {
88
90
  private client;
89
91
  constructor(args: DeepgramTTSArgs);
90
92
  setup(): Promise<void>;
91
- input(text: string | null): void;
92
- private output;
93
+ input(text: string | null, requestId: string): void;
93
94
  kill(): void;
94
95
  reset(): void;
95
96
  }
@@ -115,8 +116,7 @@ declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
115
116
  private config;
116
117
  constructor(args: ElevenLabsTTSArgs);
117
118
  setup(): Promise<void>;
118
- input(text: string | null): void;
119
- private output;
119
+ input(text: string | null, requestId: string): void;
120
120
  kill(): void;
121
121
  reset(): void;
122
122
  }
@@ -128,8 +128,7 @@ declare class HumeTTS extends MagmaFlowTextToSpeech {
128
128
  private client;
129
129
  constructor(args: HumeTTSArgs);
130
130
  setup(): Promise<void>;
131
- input(text: string | null): void;
132
- private output;
131
+ input(text: string | null, requestId: string): void;
133
132
  kill(): void;
134
133
  reset(): void;
135
134
  }
@@ -141,8 +140,7 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
141
140
  private client;
142
141
  constructor(args: WhisperTTSArgs);
143
142
  setup(): Promise<void>;
144
- input(text: string | null): void;
145
- private output;
143
+ input(text: string | null, requestId: string): void;
146
144
  kill(): void;
147
145
  reset(): void;
148
146
  }
package/dist/voice.js CHANGED
@@ -391,6 +391,7 @@ var MagmaFlow = class {
391
391
  this.textBuffer = "";
392
392
  this.textQueue = [];
393
393
  this.generatingAudio = false;
394
+ this.currentRequestId = null;
394
395
  this.audioBuffer = [];
395
396
  this.config = {
396
397
  pauseDurationMs: 500,
@@ -402,7 +403,11 @@ var MagmaFlow = class {
402
403
  this.outputFormat = args.outputFormat;
403
404
  this.onAudioOutput = args.onAudioOutput;
404
405
  this.config = { ...this.config, ...args.config };
405
- this.tts.onOutput = (audio) => {
406
+ this.tts.onOutput = (audio, requestId) => {
407
+ if (this.currentRequestId !== requestId) {
408
+ console.log("[MagmaFlow] Skipping output for cancelled request");
409
+ return;
410
+ }
406
411
  if (!audio) {
407
412
  const lastChunk = this.audioBuffer[this.audioBuffer.length - 1];
408
413
  if (lastChunk) {
@@ -453,7 +458,10 @@ var MagmaFlow = class {
453
458
  const chunk = this.textQueue.shift();
454
459
  if (!chunk) return;
455
460
  this.generatingAudio = true;
456
- this.tts.input(chunk);
461
+ if (!this.currentRequestId) {
462
+ this.currentRequestId = crypto.randomUUID();
463
+ }
464
+ this.tts.input(chunk, this.currentRequestId);
457
465
  }
458
466
  sendAudio() {
459
467
  if (this.audioBuffer.length === 0) return;
@@ -471,6 +479,13 @@ var MagmaFlow = class {
471
479
  console.error("Audio output callback error:", error);
472
480
  }
473
481
  }
482
+ interruptTTS() {
483
+ this.textQueue = [];
484
+ this.textBuffer = "";
485
+ this.audioBuffer = [];
486
+ this.generatingAudio = false;
487
+ this.currentRequestId = null;
488
+ }
474
489
  kill() {
475
490
  this.stt.kill();
476
491
  this.tts.kill();
@@ -587,7 +602,7 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
587
602
 
588
603
  // src/voice/textToSpeech/base.ts
589
604
  var MagmaFlowTextToSpeech = class {
590
- onOutput(audio) {
605
+ onOutput(audio, requestId) {
591
606
  console.log("[Default TTS] Output:", audio);
592
607
  }
593
608
  constructor() {
@@ -600,7 +615,7 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
600
615
  }
601
616
  async setup() {
602
617
  }
603
- input(text) {
618
+ input(text, requestId) {
604
619
  if (!text) {
605
620
  return;
606
621
  }
@@ -620,15 +635,12 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
620
635
  return;
621
636
  }
622
637
  for await (const chunk of stream) {
623
- this.output(Buffer.from(chunk));
638
+ this.onOutput(Buffer.from(chunk), requestId);
624
639
  }
625
- this.output(null);
640
+ this.onOutput(null, requestId);
626
641
  console.log("[Deepgram] Finished:", text);
627
642
  });
628
643
  }
629
- output(audio) {
630
- this.onOutput(audio);
631
- }
632
644
  kill() {
633
645
  }
634
646
  reset() {
@@ -654,7 +666,7 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
654
666
  }
655
667
  async setup() {
656
668
  }
657
- input(text) {
669
+ input(text, requestId) {
658
670
  if (!text) {
659
671
  return;
660
672
  }
@@ -665,15 +677,12 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
665
677
  ...this.config
666
678
  }).then(async (stream) => {
667
679
  for await (const chunk of stream) {
668
- this.output(chunk);
680
+ this.onOutput(chunk, requestId);
669
681
  }
670
- this.output(null);
682
+ this.onOutput(null, requestId);
671
683
  console.log("[ElevenLabs] Finished:", text);
672
684
  });
673
685
  }
674
- output(audio) {
675
- this.onOutput(audio);
676
- }
677
686
  kill() {
678
687
  }
679
688
  reset() {
@@ -686,7 +695,7 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
686
695
  }
687
696
  async setup() {
688
697
  }
689
- input(text) {
698
+ input(text, requestId) {
690
699
  if (!text) {
691
700
  return;
692
701
  }
@@ -702,15 +711,12 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
702
711
  instantMode: true
703
712
  }).then(async (stream) => {
704
713
  for await (const chunk of stream) {
705
- this.output(Buffer.from(chunk.audio, "base64"));
714
+ this.onOutput(Buffer.from(chunk.audio, "base64"), requestId);
706
715
  }
707
- this.output(null);
716
+ this.onOutput(null, requestId);
708
717
  console.log("[Hume] Finished:", text);
709
718
  });
710
719
  }
711
- output(audio) {
712
- this.onOutput(audio);
713
- }
714
720
  kill() {
715
721
  }
716
722
  reset() {
@@ -723,7 +729,7 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
723
729
  }
724
730
  async setup() {
725
731
  }
726
- input(text) {
732
+ input(text, requestId) {
727
733
  if (!text) {
728
734
  return;
729
735
  }
@@ -739,14 +745,11 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
739
745
  24e3,
740
746
  48e3
741
747
  );
742
- this.output(int16ArrayToBuffer(resampledPCM));
743
- this.output(null);
748
+ this.onOutput(int16ArrayToBuffer(resampledPCM), requestId);
749
+ this.onOutput(null, requestId);
744
750
  console.log("[Whisper] Finished:", text);
745
751
  });
746
752
  }
747
- output(audio) {
748
- this.onOutput(audio);
749
- }
750
753
  kill() {
751
754
  }
752
755
  reset() {
package/dist/voice.mjs CHANGED
@@ -385,6 +385,7 @@ var MagmaFlow = class {
385
385
  this.textBuffer = "";
386
386
  this.textQueue = [];
387
387
  this.generatingAudio = false;
388
+ this.currentRequestId = null;
388
389
  this.audioBuffer = [];
389
390
  this.config = {
390
391
  pauseDurationMs: 500,
@@ -396,7 +397,11 @@ var MagmaFlow = class {
396
397
  this.outputFormat = args.outputFormat;
397
398
  this.onAudioOutput = args.onAudioOutput;
398
399
  this.config = { ...this.config, ...args.config };
399
- this.tts.onOutput = (audio) => {
400
+ this.tts.onOutput = (audio, requestId) => {
401
+ if (this.currentRequestId !== requestId) {
402
+ console.log("[MagmaFlow] Skipping output for cancelled request");
403
+ return;
404
+ }
400
405
  if (!audio) {
401
406
  const lastChunk = this.audioBuffer[this.audioBuffer.length - 1];
402
407
  if (lastChunk) {
@@ -447,7 +452,10 @@ var MagmaFlow = class {
447
452
  const chunk = this.textQueue.shift();
448
453
  if (!chunk) return;
449
454
  this.generatingAudio = true;
450
- this.tts.input(chunk);
455
+ if (!this.currentRequestId) {
456
+ this.currentRequestId = crypto.randomUUID();
457
+ }
458
+ this.tts.input(chunk, this.currentRequestId);
451
459
  }
452
460
  sendAudio() {
453
461
  if (this.audioBuffer.length === 0) return;
@@ -465,6 +473,13 @@ var MagmaFlow = class {
465
473
  console.error("Audio output callback error:", error);
466
474
  }
467
475
  }
476
+ interruptTTS() {
477
+ this.textQueue = [];
478
+ this.textBuffer = "";
479
+ this.audioBuffer = [];
480
+ this.generatingAudio = false;
481
+ this.currentRequestId = null;
482
+ }
468
483
  kill() {
469
484
  this.stt.kill();
470
485
  this.tts.kill();
@@ -581,7 +596,7 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
581
596
 
582
597
  // src/voice/textToSpeech/base.ts
583
598
  var MagmaFlowTextToSpeech = class {
584
- onOutput(audio) {
599
+ onOutput(audio, requestId) {
585
600
  console.log("[Default TTS] Output:", audio);
586
601
  }
587
602
  constructor() {
@@ -594,7 +609,7 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
594
609
  }
595
610
  async setup() {
596
611
  }
597
- input(text) {
612
+ input(text, requestId) {
598
613
  if (!text) {
599
614
  return;
600
615
  }
@@ -614,15 +629,12 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
614
629
  return;
615
630
  }
616
631
  for await (const chunk of stream) {
617
- this.output(Buffer.from(chunk));
632
+ this.onOutput(Buffer.from(chunk), requestId);
618
633
  }
619
- this.output(null);
634
+ this.onOutput(null, requestId);
620
635
  console.log("[Deepgram] Finished:", text);
621
636
  });
622
637
  }
623
- output(audio) {
624
- this.onOutput(audio);
625
- }
626
638
  kill() {
627
639
  }
628
640
  reset() {
@@ -648,7 +660,7 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
648
660
  }
649
661
  async setup() {
650
662
  }
651
- input(text) {
663
+ input(text, requestId) {
652
664
  if (!text) {
653
665
  return;
654
666
  }
@@ -659,15 +671,12 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
659
671
  ...this.config
660
672
  }).then(async (stream) => {
661
673
  for await (const chunk of stream) {
662
- this.output(chunk);
674
+ this.onOutput(chunk, requestId);
663
675
  }
664
- this.output(null);
676
+ this.onOutput(null, requestId);
665
677
  console.log("[ElevenLabs] Finished:", text);
666
678
  });
667
679
  }
668
- output(audio) {
669
- this.onOutput(audio);
670
- }
671
680
  kill() {
672
681
  }
673
682
  reset() {
@@ -680,7 +689,7 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
680
689
  }
681
690
  async setup() {
682
691
  }
683
- input(text) {
692
+ input(text, requestId) {
684
693
  if (!text) {
685
694
  return;
686
695
  }
@@ -696,15 +705,12 @@ var HumeTTS = class extends MagmaFlowTextToSpeech {
696
705
  instantMode: true
697
706
  }).then(async (stream) => {
698
707
  for await (const chunk of stream) {
699
- this.output(Buffer.from(chunk.audio, "base64"));
708
+ this.onOutput(Buffer.from(chunk.audio, "base64"), requestId);
700
709
  }
701
- this.output(null);
710
+ this.onOutput(null, requestId);
702
711
  console.log("[Hume] Finished:", text);
703
712
  });
704
713
  }
705
- output(audio) {
706
- this.onOutput(audio);
707
- }
708
714
  kill() {
709
715
  }
710
716
  reset() {
@@ -717,7 +723,7 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
717
723
  }
718
724
  async setup() {
719
725
  }
720
- input(text) {
726
+ input(text, requestId) {
721
727
  if (!text) {
722
728
  return;
723
729
  }
@@ -733,14 +739,11 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
733
739
  24e3,
734
740
  48e3
735
741
  );
736
- this.output(int16ArrayToBuffer(resampledPCM));
737
- this.output(null);
742
+ this.onOutput(int16ArrayToBuffer(resampledPCM), requestId);
743
+ this.onOutput(null, requestId);
738
744
  console.log("[Whisper] Finished:", text);
739
745
  });
740
746
  }
741
- output(audio) {
742
- this.onOutput(audio);
743
- }
744
747
  kill() {
745
748
  }
746
749
  reset() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pompeii-labs/audio",
3
- "version": "0.0.3",
3
+ "version": "0.0.4",
4
4
  "description": "The Audio SDK from Pompeii Labs",
5
5
  "keywords": [
6
6
  "Pompeii",