@pompeii-labs/audio 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/voice.d.mts +29 -8
- package/dist/voice.d.ts +29 -8
- package/dist/voice.js +45 -27
- package/dist/voice.mjs +45 -27
- package/package.json +1 -2
package/dist/voice.d.mts
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import { A as AudioFormat } from './index-o4B-ThOL.mjs';
|
|
2
2
|
import { DeepgramClient, LiveSchema } from '@deepgram/sdk';
|
|
3
|
-
import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js';
|
|
4
|
-
import { StreamTextToSpeechRequest } from '@elevenlabs/elevenlabs-js/api/index.js';
|
|
5
3
|
import { HumeClient } from 'hume';
|
|
6
4
|
import OpenAI from 'openai';
|
|
7
5
|
|
|
@@ -58,6 +56,7 @@ declare class MagmaFlow {
|
|
|
58
56
|
private generatingAudio;
|
|
59
57
|
private currentRequestId;
|
|
60
58
|
private audioBuffer;
|
|
59
|
+
private lastChunk;
|
|
61
60
|
private config;
|
|
62
61
|
constructor(args: MagmaFlowArgs);
|
|
63
62
|
inputAudio(audio: Buffer): void;
|
|
@@ -120,22 +119,44 @@ declare class DeepgramTTS extends MagmaFlowTextToSpeech {
|
|
|
120
119
|
reset(): void;
|
|
121
120
|
}
|
|
122
121
|
|
|
123
|
-
declare enum
|
|
122
|
+
declare enum ElevenLabsVoice {
|
|
124
123
|
chris = "iP95p4xoKVk53GoZ742B",
|
|
125
124
|
josh = "TxGEqnHWrfWFTfGW9XjX",
|
|
126
125
|
rachel = "21m00Tcm4TlvDq8ikWAM",
|
|
127
126
|
laura = "FGY2WhTYpPnrIDTdsKH5",
|
|
128
127
|
felicity = "aTbnroHRGIomiKpqAQR8"
|
|
129
128
|
}
|
|
130
|
-
type
|
|
129
|
+
type StreamSpeechConfig = {
|
|
130
|
+
text: string;
|
|
131
|
+
model_id?: string;
|
|
132
|
+
language_code?: string;
|
|
133
|
+
voice_settings?: {
|
|
134
|
+
stability?: number;
|
|
135
|
+
use_speaker_boost?: boolean;
|
|
136
|
+
similarity_boost?: number;
|
|
137
|
+
style?: number;
|
|
138
|
+
speed?: number;
|
|
139
|
+
};
|
|
140
|
+
pronunciation_dictionary_locators?: {
|
|
141
|
+
pronunciation_dictionary_id: string;
|
|
142
|
+
version_id?: string;
|
|
143
|
+
}[];
|
|
144
|
+
seed?: number;
|
|
145
|
+
next_text?: string;
|
|
146
|
+
previous_request_ids?: string[];
|
|
147
|
+
next_request_ids?: string[];
|
|
148
|
+
apply_text_normalization?: 'auto' | 'on' | 'off';
|
|
149
|
+
apply_language_text_normalization?: boolean;
|
|
150
|
+
};
|
|
151
|
+
type ElevenLabsConfig = Omit<StreamSpeechConfig, 'text' | 'model_id'>;
|
|
131
152
|
type ElevenLabsTTSArgs = {
|
|
132
|
-
client?: ElevenLabsClient;
|
|
133
153
|
model: string;
|
|
134
|
-
voice:
|
|
154
|
+
voice: ElevenLabsVoice | string;
|
|
135
155
|
config?: ElevenLabsConfig;
|
|
156
|
+
apiKey?: string;
|
|
136
157
|
};
|
|
137
158
|
declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
|
|
138
|
-
private
|
|
159
|
+
private apiKey;
|
|
139
160
|
private model;
|
|
140
161
|
private voice;
|
|
141
162
|
private config;
|
|
@@ -172,4 +193,4 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
|
|
|
172
193
|
|
|
173
194
|
declare function splitTextIntoChunks(text: string, targetLength?: number): string[];
|
|
174
195
|
|
|
175
|
-
export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs,
|
|
196
|
+
export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };
|
package/dist/voice.d.ts
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import { A as AudioFormat } from './index-o4B-ThOL.js';
|
|
2
2
|
import { DeepgramClient, LiveSchema } from '@deepgram/sdk';
|
|
3
|
-
import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js';
|
|
4
|
-
import { StreamTextToSpeechRequest } from '@elevenlabs/elevenlabs-js/api/index.js';
|
|
5
3
|
import { HumeClient } from 'hume';
|
|
6
4
|
import OpenAI from 'openai';
|
|
7
5
|
|
|
@@ -58,6 +56,7 @@ declare class MagmaFlow {
|
|
|
58
56
|
private generatingAudio;
|
|
59
57
|
private currentRequestId;
|
|
60
58
|
private audioBuffer;
|
|
59
|
+
private lastChunk;
|
|
61
60
|
private config;
|
|
62
61
|
constructor(args: MagmaFlowArgs);
|
|
63
62
|
inputAudio(audio: Buffer): void;
|
|
@@ -120,22 +119,44 @@ declare class DeepgramTTS extends MagmaFlowTextToSpeech {
|
|
|
120
119
|
reset(): void;
|
|
121
120
|
}
|
|
122
121
|
|
|
123
|
-
declare enum
|
|
122
|
+
declare enum ElevenLabsVoice {
|
|
124
123
|
chris = "iP95p4xoKVk53GoZ742B",
|
|
125
124
|
josh = "TxGEqnHWrfWFTfGW9XjX",
|
|
126
125
|
rachel = "21m00Tcm4TlvDq8ikWAM",
|
|
127
126
|
laura = "FGY2WhTYpPnrIDTdsKH5",
|
|
128
127
|
felicity = "aTbnroHRGIomiKpqAQR8"
|
|
129
128
|
}
|
|
130
|
-
type
|
|
129
|
+
type StreamSpeechConfig = {
|
|
130
|
+
text: string;
|
|
131
|
+
model_id?: string;
|
|
132
|
+
language_code?: string;
|
|
133
|
+
voice_settings?: {
|
|
134
|
+
stability?: number;
|
|
135
|
+
use_speaker_boost?: boolean;
|
|
136
|
+
similarity_boost?: number;
|
|
137
|
+
style?: number;
|
|
138
|
+
speed?: number;
|
|
139
|
+
};
|
|
140
|
+
pronunciation_dictionary_locators?: {
|
|
141
|
+
pronunciation_dictionary_id: string;
|
|
142
|
+
version_id?: string;
|
|
143
|
+
}[];
|
|
144
|
+
seed?: number;
|
|
145
|
+
next_text?: string;
|
|
146
|
+
previous_request_ids?: string[];
|
|
147
|
+
next_request_ids?: string[];
|
|
148
|
+
apply_text_normalization?: 'auto' | 'on' | 'off';
|
|
149
|
+
apply_language_text_normalization?: boolean;
|
|
150
|
+
};
|
|
151
|
+
type ElevenLabsConfig = Omit<StreamSpeechConfig, 'text' | 'model_id'>;
|
|
131
152
|
type ElevenLabsTTSArgs = {
|
|
132
|
-
client?: ElevenLabsClient;
|
|
133
153
|
model: string;
|
|
134
|
-
voice:
|
|
154
|
+
voice: ElevenLabsVoice | string;
|
|
135
155
|
config?: ElevenLabsConfig;
|
|
156
|
+
apiKey?: string;
|
|
136
157
|
};
|
|
137
158
|
declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
|
|
138
|
-
private
|
|
159
|
+
private apiKey;
|
|
139
160
|
private model;
|
|
140
161
|
private voice;
|
|
141
162
|
private config;
|
|
@@ -172,4 +193,4 @@ declare class WhisperTTS extends MagmaFlowTextToSpeech {
|
|
|
172
193
|
|
|
173
194
|
declare function splitTextIntoChunks(text: string, targetLength?: number): string[];
|
|
174
195
|
|
|
175
|
-
export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs,
|
|
196
|
+
export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, type DeepgramWord, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenLabsVoice, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, type MagmaFlowSTTOutput, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, type Turn, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };
|
package/dist/voice.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var sdk = require('@deepgram/sdk');
|
|
4
|
-
var elevenlabsJs = require('@elevenlabs/elevenlabs-js');
|
|
5
4
|
var hume = require('hume');
|
|
6
5
|
var OpenAI = require('openai');
|
|
7
6
|
|
|
@@ -429,6 +428,7 @@ var MagmaFlow = class {
|
|
|
429
428
|
generatingAudio = false;
|
|
430
429
|
currentRequestId = null;
|
|
431
430
|
audioBuffer = [];
|
|
431
|
+
lastChunk = null;
|
|
432
432
|
config = {
|
|
433
433
|
pauseDurationMs: 500,
|
|
434
434
|
sentenceChunkLength: 50
|
|
@@ -446,9 +446,8 @@ var MagmaFlow = class {
|
|
|
446
446
|
return;
|
|
447
447
|
}
|
|
448
448
|
if (!audio) {
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
const lastChunkSamples = bufferToInt16Array(lastChunk);
|
|
449
|
+
if (this.lastChunk) {
|
|
450
|
+
const lastChunkSamples = bufferToInt16Array(this.lastChunk);
|
|
452
451
|
const lastSampleValue = lastChunkSamples[lastChunkSamples.length - 1];
|
|
453
452
|
this.audioBuffer.push(
|
|
454
453
|
Buffer.from(
|
|
@@ -462,10 +461,15 @@ var MagmaFlow = class {
|
|
|
462
461
|
}
|
|
463
462
|
this.sendAudio();
|
|
464
463
|
this.generatingAudio = false;
|
|
464
|
+
this.lastChunk = null;
|
|
465
465
|
this.generateAudio();
|
|
466
466
|
return;
|
|
467
467
|
}
|
|
468
468
|
this.audioBuffer.push(audio);
|
|
469
|
+
this.lastChunk = audio;
|
|
470
|
+
if (this.audioBuffer.reduce((acc, curr) => acc + curr.length, 0) % (2 * this.outputFormat.channels) === 0) {
|
|
471
|
+
this.sendAudio();
|
|
472
|
+
}
|
|
469
473
|
};
|
|
470
474
|
this.stt.onOutput = args.onTranscription;
|
|
471
475
|
this.stt.onSpeechDetected = args.onSpeechDetected;
|
|
@@ -505,13 +509,13 @@ var MagmaFlow = class {
|
|
|
505
509
|
sendAudio() {
|
|
506
510
|
if (this.audioBuffer.length === 0) return;
|
|
507
511
|
const concatenatedBuffer = Buffer.concat(this.audioBuffer);
|
|
512
|
+
this.audioBuffer = [];
|
|
508
513
|
const resampledPCM = resamplePcm(
|
|
509
514
|
bufferToInt16Array(concatenatedBuffer),
|
|
510
515
|
uniformSampleRate,
|
|
511
516
|
this.outputFormat.sampleRate
|
|
512
517
|
);
|
|
513
518
|
const encodedAudio = encodePcm(resampledPCM, this.outputFormat.encoding);
|
|
514
|
-
this.audioBuffer = [];
|
|
515
519
|
try {
|
|
516
520
|
this.onAudioOutput(encodedAudio);
|
|
517
521
|
} catch (error) {
|
|
@@ -642,7 +646,7 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
|
|
|
642
646
|
}
|
|
643
647
|
const text = this.turnBuffer.map((turn) => turn.text).join(" ");
|
|
644
648
|
let turns = void 0;
|
|
645
|
-
if (this.turnBuffer.every((turn) =>
|
|
649
|
+
if (this.turnBuffer.every((turn) => turn.speaker !== void 0 && turn.speaker !== null)) {
|
|
646
650
|
turns = this.turnBuffer.reduce((acc, turn) => {
|
|
647
651
|
if (acc.at(-1)?.speaker === turn.speaker) {
|
|
648
652
|
acc.at(-1).text += turn.text;
|
|
@@ -760,24 +764,24 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
|
|
|
760
764
|
reset() {
|
|
761
765
|
}
|
|
762
766
|
};
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
767
|
+
|
|
768
|
+
// src/voice/textToSpeech/elevenlabs.ts
|
|
769
|
+
var ElevenLabsVoice = /* @__PURE__ */ ((ElevenLabsVoice2) => {
|
|
770
|
+
ElevenLabsVoice2["chris"] = "iP95p4xoKVk53GoZ742B";
|
|
771
|
+
ElevenLabsVoice2["josh"] = "TxGEqnHWrfWFTfGW9XjX";
|
|
772
|
+
ElevenLabsVoice2["rachel"] = "21m00Tcm4TlvDq8ikWAM";
|
|
773
|
+
ElevenLabsVoice2["laura"] = "FGY2WhTYpPnrIDTdsKH5";
|
|
774
|
+
ElevenLabsVoice2["felicity"] = "aTbnroHRGIomiKpqAQR8";
|
|
775
|
+
return ElevenLabsVoice2;
|
|
776
|
+
})(ElevenLabsVoice || {});
|
|
771
777
|
var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
|
|
772
|
-
|
|
778
|
+
apiKey;
|
|
773
779
|
model;
|
|
774
780
|
voice;
|
|
775
781
|
config;
|
|
776
782
|
constructor(args) {
|
|
777
783
|
super();
|
|
778
|
-
this.
|
|
779
|
-
apiKey: process.env.ELEVENLABS_API_KEY
|
|
780
|
-
});
|
|
784
|
+
this.apiKey = args.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
781
785
|
this.model = args.model;
|
|
782
786
|
this.voice = args.voice;
|
|
783
787
|
this.config = args.config ?? {};
|
|
@@ -789,14 +793,28 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
|
|
|
789
793
|
return;
|
|
790
794
|
}
|
|
791
795
|
const textToSend = text.replaceAll(/([A-Z])-([A-Z])/g, "$1 - $2").replaceAll(/([0-9])-([0-9])/g, "$1 - $2").replaceAll(/(-\s*[A-Z])\s+([A-Z]\s*-)/g, "$1 - $2").replaceAll(/(-\s*[0-9])\s+([0-9]\s*-)/g, "$1 - $2");
|
|
792
|
-
|
|
793
|
-
text
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
796
|
+
fetch(
|
|
797
|
+
`https://api.elevenlabs.io/v1/text-to-speech/${this.voice}/stream?output_format=pcm_48000`,
|
|
798
|
+
{
|
|
799
|
+
method: "POST",
|
|
800
|
+
headers: {
|
|
801
|
+
"Content-Type": "application/json",
|
|
802
|
+
"xi-api-key": this.apiKey
|
|
803
|
+
},
|
|
804
|
+
body: JSON.stringify({
|
|
805
|
+
text: textToSend,
|
|
806
|
+
model_id: this.model,
|
|
807
|
+
...this.config
|
|
808
|
+
})
|
|
809
|
+
}
|
|
810
|
+
).then(async (response) => {
|
|
811
|
+
const reader = response.body?.getReader();
|
|
812
|
+
if (!reader) return;
|
|
813
|
+
new TextDecoder();
|
|
814
|
+
while (true) {
|
|
815
|
+
const { done, value } = await reader.read();
|
|
816
|
+
if (done) break;
|
|
817
|
+
this.onOutput(Buffer.from(value), requestId);
|
|
800
818
|
}
|
|
801
819
|
this.onOutput(null, requestId);
|
|
802
820
|
console.log("[ElevenLabs] Finished:", textToSend);
|
|
@@ -882,7 +900,7 @@ exports.DeepgramModel = DeepgramModel;
|
|
|
882
900
|
exports.DeepgramSTT = DeepgramSTT;
|
|
883
901
|
exports.DeepgramTTS = DeepgramTTS;
|
|
884
902
|
exports.ElevenLabsTTS = ElevenLabsTTS;
|
|
885
|
-
exports.
|
|
903
|
+
exports.ElevenLabsVoice = ElevenLabsVoice;
|
|
886
904
|
exports.HumeTTS = HumeTTS;
|
|
887
905
|
exports.MagmaFlow = MagmaFlow;
|
|
888
906
|
exports.MagmaFlowSpeechToText = MagmaFlowSpeechToText;
|
package/dist/voice.mjs
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { DeepgramClient, LiveTranscriptionEvents } from '@deepgram/sdk';
|
|
2
|
-
import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js';
|
|
3
2
|
import { HumeClient } from 'hume';
|
|
4
3
|
import OpenAI from 'openai';
|
|
5
4
|
|
|
@@ -423,6 +422,7 @@ var MagmaFlow = class {
|
|
|
423
422
|
generatingAudio = false;
|
|
424
423
|
currentRequestId = null;
|
|
425
424
|
audioBuffer = [];
|
|
425
|
+
lastChunk = null;
|
|
426
426
|
config = {
|
|
427
427
|
pauseDurationMs: 500,
|
|
428
428
|
sentenceChunkLength: 50
|
|
@@ -440,9 +440,8 @@ var MagmaFlow = class {
|
|
|
440
440
|
return;
|
|
441
441
|
}
|
|
442
442
|
if (!audio) {
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
const lastChunkSamples = bufferToInt16Array(lastChunk);
|
|
443
|
+
if (this.lastChunk) {
|
|
444
|
+
const lastChunkSamples = bufferToInt16Array(this.lastChunk);
|
|
446
445
|
const lastSampleValue = lastChunkSamples[lastChunkSamples.length - 1];
|
|
447
446
|
this.audioBuffer.push(
|
|
448
447
|
Buffer.from(
|
|
@@ -456,10 +455,15 @@ var MagmaFlow = class {
|
|
|
456
455
|
}
|
|
457
456
|
this.sendAudio();
|
|
458
457
|
this.generatingAudio = false;
|
|
458
|
+
this.lastChunk = null;
|
|
459
459
|
this.generateAudio();
|
|
460
460
|
return;
|
|
461
461
|
}
|
|
462
462
|
this.audioBuffer.push(audio);
|
|
463
|
+
this.lastChunk = audio;
|
|
464
|
+
if (this.audioBuffer.reduce((acc, curr) => acc + curr.length, 0) % (2 * this.outputFormat.channels) === 0) {
|
|
465
|
+
this.sendAudio();
|
|
466
|
+
}
|
|
463
467
|
};
|
|
464
468
|
this.stt.onOutput = args.onTranscription;
|
|
465
469
|
this.stt.onSpeechDetected = args.onSpeechDetected;
|
|
@@ -499,13 +503,13 @@ var MagmaFlow = class {
|
|
|
499
503
|
sendAudio() {
|
|
500
504
|
if (this.audioBuffer.length === 0) return;
|
|
501
505
|
const concatenatedBuffer = Buffer.concat(this.audioBuffer);
|
|
506
|
+
this.audioBuffer = [];
|
|
502
507
|
const resampledPCM = resamplePcm(
|
|
503
508
|
bufferToInt16Array(concatenatedBuffer),
|
|
504
509
|
uniformSampleRate,
|
|
505
510
|
this.outputFormat.sampleRate
|
|
506
511
|
);
|
|
507
512
|
const encodedAudio = encodePcm(resampledPCM, this.outputFormat.encoding);
|
|
508
|
-
this.audioBuffer = [];
|
|
509
513
|
try {
|
|
510
514
|
this.onAudioOutput(encodedAudio);
|
|
511
515
|
} catch (error) {
|
|
@@ -636,7 +640,7 @@ var DeepgramSTT = class extends MagmaFlowSpeechToText {
|
|
|
636
640
|
}
|
|
637
641
|
const text = this.turnBuffer.map((turn) => turn.text).join(" ");
|
|
638
642
|
let turns = void 0;
|
|
639
|
-
if (this.turnBuffer.every((turn) =>
|
|
643
|
+
if (this.turnBuffer.every((turn) => turn.speaker !== void 0 && turn.speaker !== null)) {
|
|
640
644
|
turns = this.turnBuffer.reduce((acc, turn) => {
|
|
641
645
|
if (acc.at(-1)?.speaker === turn.speaker) {
|
|
642
646
|
acc.at(-1).text += turn.text;
|
|
@@ -754,24 +758,24 @@ var DeepgramTTS = class extends MagmaFlowTextToSpeech {
|
|
|
754
758
|
reset() {
|
|
755
759
|
}
|
|
756
760
|
};
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
761
|
+
|
|
762
|
+
// src/voice/textToSpeech/elevenlabs.ts
|
|
763
|
+
var ElevenLabsVoice = /* @__PURE__ */ ((ElevenLabsVoice2) => {
|
|
764
|
+
ElevenLabsVoice2["chris"] = "iP95p4xoKVk53GoZ742B";
|
|
765
|
+
ElevenLabsVoice2["josh"] = "TxGEqnHWrfWFTfGW9XjX";
|
|
766
|
+
ElevenLabsVoice2["rachel"] = "21m00Tcm4TlvDq8ikWAM";
|
|
767
|
+
ElevenLabsVoice2["laura"] = "FGY2WhTYpPnrIDTdsKH5";
|
|
768
|
+
ElevenLabsVoice2["felicity"] = "aTbnroHRGIomiKpqAQR8";
|
|
769
|
+
return ElevenLabsVoice2;
|
|
770
|
+
})(ElevenLabsVoice || {});
|
|
765
771
|
var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
|
|
766
|
-
|
|
772
|
+
apiKey;
|
|
767
773
|
model;
|
|
768
774
|
voice;
|
|
769
775
|
config;
|
|
770
776
|
constructor(args) {
|
|
771
777
|
super();
|
|
772
|
-
this.
|
|
773
|
-
apiKey: process.env.ELEVENLABS_API_KEY
|
|
774
|
-
});
|
|
778
|
+
this.apiKey = args.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
775
779
|
this.model = args.model;
|
|
776
780
|
this.voice = args.voice;
|
|
777
781
|
this.config = args.config ?? {};
|
|
@@ -783,14 +787,28 @@ var ElevenLabsTTS = class extends MagmaFlowTextToSpeech {
|
|
|
783
787
|
return;
|
|
784
788
|
}
|
|
785
789
|
const textToSend = text.replaceAll(/([A-Z])-([A-Z])/g, "$1 - $2").replaceAll(/([0-9])-([0-9])/g, "$1 - $2").replaceAll(/(-\s*[A-Z])\s+([A-Z]\s*-)/g, "$1 - $2").replaceAll(/(-\s*[0-9])\s+([0-9]\s*-)/g, "$1 - $2");
|
|
786
|
-
|
|
787
|
-
text
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
790
|
+
fetch(
|
|
791
|
+
`https://api.elevenlabs.io/v1/text-to-speech/${this.voice}/stream?output_format=pcm_48000`,
|
|
792
|
+
{
|
|
793
|
+
method: "POST",
|
|
794
|
+
headers: {
|
|
795
|
+
"Content-Type": "application/json",
|
|
796
|
+
"xi-api-key": this.apiKey
|
|
797
|
+
},
|
|
798
|
+
body: JSON.stringify({
|
|
799
|
+
text: textToSend,
|
|
800
|
+
model_id: this.model,
|
|
801
|
+
...this.config
|
|
802
|
+
})
|
|
803
|
+
}
|
|
804
|
+
).then(async (response) => {
|
|
805
|
+
const reader = response.body?.getReader();
|
|
806
|
+
if (!reader) return;
|
|
807
|
+
new TextDecoder();
|
|
808
|
+
while (true) {
|
|
809
|
+
const { done, value } = await reader.read();
|
|
810
|
+
if (done) break;
|
|
811
|
+
this.onOutput(Buffer.from(value), requestId);
|
|
794
812
|
}
|
|
795
813
|
this.onOutput(null, requestId);
|
|
796
814
|
console.log("[ElevenLabs] Finished:", textToSend);
|
|
@@ -871,4 +889,4 @@ var WhisperTTS = class extends MagmaFlowTextToSpeech {
|
|
|
871
889
|
}
|
|
872
890
|
};
|
|
873
891
|
|
|
874
|
-
export { DeepgramLanguage, DeepgramModel, DeepgramSTT, DeepgramTTS, ElevenLabsTTS,
|
|
892
|
+
export { DeepgramLanguage, DeepgramModel, DeepgramSTT, DeepgramTTS, ElevenLabsTTS, ElevenLabsVoice, HumeTTS, MagmaFlow, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, WhisperTTS, splitTextIntoChunks };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pompeii-labs/audio",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "The Audio SDK from Pompeii Labs",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"Pompeii",
|
|
@@ -42,7 +42,6 @@
|
|
|
42
42
|
},
|
|
43
43
|
"dependencies": {
|
|
44
44
|
"@deepgram/sdk": "4.2.0",
|
|
45
|
-
"@elevenlabs/elevenlabs-js": "2.2.0",
|
|
46
45
|
"hume": "0.11.1",
|
|
47
46
|
"openai": "4.86.2"
|
|
48
47
|
},
|