voicemix 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +94 -0
- package/demo/after_the_funera_i_was_in_my_1fr49si.mp3 +0 -0
- package/demo/after_the_funera_i_was_in_my_1uppdqg.mp3 +0 -0
- package/demo/grandm_maggie_did_everyt_she_14brhqm.mp3 +0 -0
- package/demo/grandm_maggie_did_everyt_she_1bjlr65.mp3 +0 -0
- package/demo/index.js +42 -0
- package/demo/lines.json +21 -0
- package/demo/my_parent_were_gone_and_no_1v79j9e.mp3 +0 -0
- package/demo/my_parent_were_gone_and_no_1vyohw8.mp3 +0 -0
- package/demo/package-lock.json +28 -0
- package/demo/package.json +16 -0
- package/errors.js +48 -0
- package/index.js +289 -0
- package/package.json +29 -0
- package/providers/elevenlabs.js +116 -0
- package/providers/resemble.js +138 -0
package/README.md
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# VoiceMix
|
|
2
|
+
|
|
3
|
+
VoiceMix is a flexible text-to-speech library that allows you to generate speech from text using different voices, languages, and customization options.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install voicemix dotenv
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Environment Setup
|
|
12
|
+
|
|
13
|
+
Create a `.env` file in your project root with your API keys:
|
|
14
|
+
|
|
15
|
+
```plaintext
|
|
16
|
+
ELEVENLABS_API_KEY=your_elevenlabs_api_key_here
|
|
17
|
+
RESEMBLE_API_KEY=your_resemble_api_key_here
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
### Basic Example
|
|
23
|
+
|
|
24
|
+
```javascript
|
|
25
|
+
import { VoiceMix } from 'voicemix';
|
|
26
|
+
import dotenv from 'dotenv';
|
|
27
|
+
dotenv.config();
|
|
28
|
+
|
|
29
|
+
const voiceMix = new VoiceMix();
|
|
30
|
+
|
|
31
|
+
voiceMix
|
|
32
|
+
.voice('EbhcCfMvNsbvjN6OhjpJ')
|
|
33
|
+
.say('Hello, world!')
|
|
34
|
+
.save();
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Advanced Usage
|
|
38
|
+
|
|
39
|
+
```javascript
|
|
40
|
+
const voiceMix = new VoiceMix();
|
|
41
|
+
|
|
42
|
+
voiceMix
|
|
43
|
+
.useResemble() // https://www.resemble.ai/
|
|
44
|
+
.prompt('Friendly and conversational tone') // Set voice prompt/style
|
|
45
|
+
.voice('ba875a0a') // Select specific voice
|
|
46
|
+
.lang('en-US') // Set language
|
|
47
|
+
.say('Your text here')
|
|
48
|
+
.save();
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Batch Processing
|
|
52
|
+
|
|
53
|
+
You can process multiple lines of text using a JSON configuration:
|
|
54
|
+
|
|
55
|
+
```javascript
|
|
56
|
+
import { VoiceMix } from 'voicemix';
|
|
57
|
+
import fs from 'fs';
|
|
58
|
+
|
|
59
|
+
// Read script from JSON file
|
|
60
|
+
const script = JSON.parse(fs.readFileSync('./lines.json', 'utf8'));
|
|
61
|
+
const voiceMix = new VoiceMix();
|
|
62
|
+
|
|
63
|
+
// Process each line
|
|
64
|
+
for (const entry of script) {
|
|
65
|
+
voiceMix
|
|
66
|
+
.prompt(entry.prompt || 'Friendly and conversational tone')
|
|
67
|
+
.voice(entry.voiceId)
|
|
68
|
+
.say(entry.english)
|
|
69
|
+
.save();
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Example `lines.json`:
|
|
74
|
+
```json
|
|
75
|
+
[
|
|
76
|
+
{
|
|
77
|
+
"prompt": "Friendly and conversational tone",
|
|
78
|
+
"english": "Hello, how are you today?",
|
|
79
|
+
"voiceId": "EbhcCfMvNsbvjN6OhjpJ"
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Features
|
|
85
|
+
|
|
86
|
+
- Multiple voice support
|
|
87
|
+
- Language selection
|
|
88
|
+
- Voice prompts for style control (Resemble AI)
|
|
89
|
+
- Support for different TTS engines
|
|
90
|
+
- Simple chainable API
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
|
|
94
|
+
MIT
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/demo/index.js
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { VoiceMix } from "../index.js";
|
|
2
|
+
import dotenv from "dotenv";
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
|
|
5
|
+
dotenv.config();
|
|
6
|
+
|
|
7
|
+
// Read and parse the lines.json file
|
|
8
|
+
const script = JSON.parse(fs.readFileSync('./lines.json', 'utf8'));
|
|
9
|
+
const voiceMix = new VoiceMix()//.useResemble().lang('en-US');
|
|
10
|
+
|
|
11
|
+
// Process each line in the script
|
|
12
|
+
for (const entry of script) {
|
|
13
|
+
|
|
14
|
+
voiceMix
|
|
15
|
+
.prompt(entry.prompt || 'Friendly and conversational tone')
|
|
16
|
+
// .voice('ba875a0a') // Peter v2
|
|
17
|
+
.voice('EbhcCfMvNsbvjN6OhjpJ')
|
|
18
|
+
.say(entry.english)
|
|
19
|
+
.save();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// const voiceMix = new VoiceMix();
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
// voiceMix.voice("dBRpMbVO8Bjs9wBZC6cb")
|
|
26
|
+
// voiceMix.say(`As soon as someone picked up, he said: "Dad, it's me, listen carefully. I'm at DAF2... with Liev Thibot."`);
|
|
27
|
+
// voiceMix.save();
|
|
28
|
+
|
|
29
|
+
// let line = `As soon as someone picked up, he said: "Dad, it's me, listen carefully. I'm at DAF2... with Liev Thibot."`;
|
|
30
|
+
|
|
31
|
+
// // line = `Pero recuerdo la sensación. Fue como si alguien hubiera sacado todo el aire de la habitación.`;
|
|
32
|
+
|
|
33
|
+
// const voiceMix = new VoiceMix();
|
|
34
|
+
// voiceMix.useResemble()
|
|
35
|
+
// .prompt('clears his throat before starting')
|
|
36
|
+
// // .voice('0a73c559')
|
|
37
|
+
// // .lang('es-us')
|
|
38
|
+
// // .voice('f23e8ffe')
|
|
39
|
+
// // .voice('fcf8490c')
|
|
40
|
+
// .voice('ba875a0a') // Peter v2
|
|
41
|
+
// .say(line)
|
|
42
|
+
// .save();
|
package/demo/lines.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[
|
|
2
|
+
|
|
3
|
+
{
|
|
4
|
+
"speaker": "@PeterCabot",
|
|
5
|
+
"spanish": "La abuela Maggie hizo todo lo posible para mantener las cosas normales, pero nada era normal.",
|
|
6
|
+
"english": "Grandma Maggie did everything she could to keep things normal, but nothing was normal.",
|
|
7
|
+
"prompt": "Appreciative but resigned tone"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"speaker": "@PeterCabot",
|
|
11
|
+
"spanish": "Mis padres se habían ido y no importaba cuánto lo deseara, no iban a volver.",
|
|
12
|
+
"english": "My parents were gone and no matter how much I wished it, they weren't coming back.",
|
|
13
|
+
"prompt": ""
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"speaker": "@PeterCabot",
|
|
17
|
+
"spanish": "Después del funeral, estaba en mi habitación, jugando con el camión rojo que mamá me había regalado para mi cumpleaños.",
|
|
18
|
+
"english": "After the funeral, I was in my room, playing with the red truck that Mom had given me for my birthday.",
|
|
19
|
+
"prompt": ""
|
|
20
|
+
}
|
|
21
|
+
]
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "demo",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"lockfileVersion": 3,
|
|
5
|
+
"requires": true,
|
|
6
|
+
"packages": {
|
|
7
|
+
"": {
|
|
8
|
+
"name": "demo",
|
|
9
|
+
"version": "1.0.0",
|
|
10
|
+
"license": "ISC",
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"dotenv": "^16.5.0"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"node_modules/dotenv": {
|
|
16
|
+
"version": "16.5.0",
|
|
17
|
+
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz",
|
|
18
|
+
"integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==",
|
|
19
|
+
"license": "BSD-2-Clause",
|
|
20
|
+
"engines": {
|
|
21
|
+
"node": ">=12"
|
|
22
|
+
},
|
|
23
|
+
"funding": {
|
|
24
|
+
"url": "https://dotenvx.com"
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "demo",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
8
|
+
},
|
|
9
|
+
"keywords": [],
|
|
10
|
+
"author": "",
|
|
11
|
+
"license": "ISC",
|
|
12
|
+
"description": "",
|
|
13
|
+
"dependencies": {
|
|
14
|
+
"dotenv": "^16.5.0"
|
|
15
|
+
}
|
|
16
|
+
}
|
package/errors.js
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export class VoiceMixError extends Error {
|
|
2
|
+
constructor(message, details = {}) {
|
|
3
|
+
super(message);
|
|
4
|
+
this.name = 'VoiceMixError';
|
|
5
|
+
this.details = details;
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export class ProviderError extends VoiceMixError {
|
|
10
|
+
constructor(message, provider, details = {}) {
|
|
11
|
+
super(message, details);
|
|
12
|
+
this.name = 'ProviderError';
|
|
13
|
+
this.provider = provider;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export class ValidationError extends VoiceMixError {
|
|
18
|
+
constructor(message, field, details = {}) {
|
|
19
|
+
super(message, details);
|
|
20
|
+
this.name = 'ValidationError';
|
|
21
|
+
this.field = field;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function formatError(error) {
|
|
26
|
+
if (error instanceof VoiceMixError) {
|
|
27
|
+
let message = `${error.name}: ${error.message}`;
|
|
28
|
+
|
|
29
|
+
if (error instanceof ProviderError) {
|
|
30
|
+
message += `\nProvider: ${error.provider}`;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (error instanceof ValidationError) {
|
|
34
|
+
message += `\nField: ${error.field}`;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (Object.keys(error.details).length > 0) {
|
|
38
|
+
message += '\nDetails:';
|
|
39
|
+
for (const [key, value] of Object.entries(error.details)) {
|
|
40
|
+
message += `\n ${key}: ${value}`;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return message;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return error.message || 'An unknown error occurred';
|
|
48
|
+
}
|
package/index.js
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import hashFactory from 'hash-factory';
|
|
4
|
+
import { ElevenLabsProvider } from './providers/elevenlabs.js';
|
|
5
|
+
import { ResembleProvider } from './providers/resemble.js';
|
|
6
|
+
import { ValidationError, formatError } from './errors.js';
|
|
7
|
+
|
|
8
|
+
const hash = hashFactory({ words: true, alpha: true });
|
|
9
|
+
const hashNow = hashFactory({ words: true, alpha: true, now: true });
|
|
10
|
+
|
|
11
|
+
export class VoiceMix {
|
|
12
|
+
voices = {};
|
|
13
|
+
|
|
14
|
+
constructor(opts = {}) {
|
|
15
|
+
this.text = 'string';
|
|
16
|
+
this.ttsId = null;
|
|
17
|
+
this.format = 'mp3';
|
|
18
|
+
this.filePath = './';
|
|
19
|
+
this.fileOutput = 'speech';
|
|
20
|
+
this.filePrefix = '';
|
|
21
|
+
this.randPosfix = false;
|
|
22
|
+
this.drymode = false;
|
|
23
|
+
this.promptText = null;
|
|
24
|
+
this.xmlLang = 'en-us';
|
|
25
|
+
|
|
26
|
+
this.temperature = "0.8";
|
|
27
|
+
this.exaggeration = "0";
|
|
28
|
+
this.prosodyRate = "100%";
|
|
29
|
+
// this.prosodyPitch = "medium";
|
|
30
|
+
|
|
31
|
+
this.requestsQueue = []; // Cola para las solicitudes pendientes
|
|
32
|
+
this.isProcessing = false; // Estado para controlar si ya se está procesando un lote
|
|
33
|
+
this.batchSize = 3; // Tamaño del lote
|
|
34
|
+
|
|
35
|
+
Object.assign(this, opts);
|
|
36
|
+
|
|
37
|
+
// Inicializar el proveedor por defecto (ElevenLabs)
|
|
38
|
+
this.provider = new ElevenLabsProvider(opts.apiKey);
|
|
39
|
+
this.providerType = 'elevenlabs';
|
|
40
|
+
|
|
41
|
+
// Asegurar que filePath sea una ruta absoluta
|
|
42
|
+
this.filePath = path.resolve(this.filePath);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
useElevenLabs(apiKey) {
|
|
46
|
+
this.provider = new ElevenLabsProvider(apiKey);
|
|
47
|
+
this.providerType = 'elevenlabs';
|
|
48
|
+
return this;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
useResemble(apiKey) {
|
|
52
|
+
this.provider = new ResembleProvider(apiKey);
|
|
53
|
+
this.providerType = 'resemble';
|
|
54
|
+
return this;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
monolingual_v1() {
|
|
58
|
+
if (this.providerType === 'elevenlabs') {
|
|
59
|
+
this.provider.monolingual_v1();
|
|
60
|
+
}
|
|
61
|
+
return this;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
multilingual_v1() {
|
|
65
|
+
if (this.providerType === 'elevenlabs') {
|
|
66
|
+
this.provider.multilingual_v1();
|
|
67
|
+
}
|
|
68
|
+
return this;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
multilingual_v2() {
|
|
72
|
+
if (this.providerType === 'elevenlabs') {
|
|
73
|
+
this.provider.multilingual_v2();
|
|
74
|
+
}
|
|
75
|
+
return this;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
setSampleRate(rate) {
|
|
79
|
+
if (this.providerType === 'resemble') {
|
|
80
|
+
this.provider.setSampleRate(rate);
|
|
81
|
+
}
|
|
82
|
+
return this;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
setPrecision(precision) {
|
|
86
|
+
if (this.providerType === 'resemble') {
|
|
87
|
+
this.provider.setPrecision(precision);
|
|
88
|
+
}
|
|
89
|
+
return this;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
setOutputFormat(format) {
|
|
93
|
+
if (this.providerType === 'resemble') {
|
|
94
|
+
this.provider.setOutputFormat(format);
|
|
95
|
+
}
|
|
96
|
+
return this;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
id(id) {
|
|
100
|
+
this.ttsId = id;
|
|
101
|
+
return this;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
actor(key) {
|
|
105
|
+
if (!this.voices[key]) return this;
|
|
106
|
+
this.ttsId = this.voices[key];
|
|
107
|
+
return this;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
path(v) {
|
|
111
|
+
this.filePath = path.resolve(v);
|
|
112
|
+
return this;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
prefix(v) {
|
|
116
|
+
this.filePrefix = v;
|
|
117
|
+
return this;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
voice(id) {
|
|
121
|
+
this.ttsId = id;
|
|
122
|
+
return this;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
exists() {
|
|
126
|
+
return fs.existsSync(this.fullPath());
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
fullPath() {
|
|
130
|
+
return path.join(this.filePath, this.fileName());
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
fileName() {
|
|
134
|
+
return this.fileOutput + "." + this.format;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
_filename(text) {
|
|
138
|
+
text += " " + this.promptText + " " + this.xmlLang + " " + this.ttsId;
|
|
139
|
+
text += " " + this.providerType;
|
|
140
|
+
const filename = this.randPosfix ? hashNow(text) : hash(text);
|
|
141
|
+
return this.filePrefix + filename;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
say(text) {
|
|
145
|
+
this.text = text;
|
|
146
|
+
return this.file(this._filename(text));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
file(fileName) {
|
|
150
|
+
this.fileOutput = fileName;
|
|
151
|
+
return this;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
save() {
|
|
155
|
+
if (this.drymode) return Promise.resolve(this.fileOutput);
|
|
156
|
+
|
|
157
|
+
const fileName = this.fileOutput + "." + this.format;
|
|
158
|
+
|
|
159
|
+
// Crear una instantánea de todas las variables relevantes
|
|
160
|
+
const requestSnapshot = {
|
|
161
|
+
fileName,
|
|
162
|
+
text: this.text,
|
|
163
|
+
ttsId: this.ttsId,
|
|
164
|
+
format: this.format,
|
|
165
|
+
filePath: this.filePath,
|
|
166
|
+
promptText: this.promptText,
|
|
167
|
+
xmlLang: this.xmlLang,
|
|
168
|
+
shouldAddBreakTags: this.shouldAddBreakTags,
|
|
169
|
+
temperature: this.temperature,
|
|
170
|
+
exaggeration: this.exaggeration,
|
|
171
|
+
prosodyRate: this.prosodyRate,
|
|
172
|
+
prosodyPitch: this.prosodyPitch
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
return new Promise((resolve, reject) => {
|
|
176
|
+
this.requestsQueue.push({
|
|
177
|
+
...requestSnapshot,
|
|
178
|
+
resolve,
|
|
179
|
+
reject
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
this._processBatch();
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
_processBatch() {
|
|
187
|
+
if (this.isProcessing) return;
|
|
188
|
+
if (this.requestsQueue.length === 0) {
|
|
189
|
+
this.isProcessing = false;
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
this.isProcessing = true;
|
|
194
|
+
const batch = this.requestsQueue.splice(0, this.batchSize);
|
|
195
|
+
|
|
196
|
+
Promise.all(batch.map(request =>
|
|
197
|
+
this._sendRequest(request)
|
|
198
|
+
.then(fileName => request.resolve(fileName))
|
|
199
|
+
.catch(error => request.reject(error))
|
|
200
|
+
)).then(() => {
|
|
201
|
+
this.isProcessing = false;
|
|
202
|
+
this._processBatch();
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
async _sendRequest(request) {
|
|
207
|
+
if (this.drymode) return Promise.resolve(request.fileName);
|
|
208
|
+
|
|
209
|
+
try {
|
|
210
|
+
if (!request.ttsId) {
|
|
211
|
+
throw new ValidationError('Voice ID is required', 'ttsId');
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const text = this.addBreakTags(request.text, {
|
|
215
|
+
promptText: request.promptText,
|
|
216
|
+
xmlLang: request.xmlLang,
|
|
217
|
+
temperature: request.temperature,
|
|
218
|
+
exaggeration: request.exaggeration,
|
|
219
|
+
prosodyRate: request.prosodyRate,
|
|
220
|
+
prosodyPitch: request.prosodyPitch
|
|
221
|
+
});
|
|
222
|
+
console.log(text);
|
|
223
|
+
|
|
224
|
+
return await this.provider.save(
|
|
225
|
+
request.ttsId,
|
|
226
|
+
text,
|
|
227
|
+
request.format,
|
|
228
|
+
request.filePath,
|
|
229
|
+
request.fileName
|
|
230
|
+
);
|
|
231
|
+
|
|
232
|
+
} catch (error) {
|
|
233
|
+
const formattedError = formatError(error);
|
|
234
|
+
console.error(formattedError);
|
|
235
|
+
throw error;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
lang(lang) {
|
|
241
|
+
this.xmlLang = lang;
|
|
242
|
+
return this;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
addBreakTags(text, options = {}) {
|
|
246
|
+
|
|
247
|
+
if (this.providerType !== 'resemble') return text;
|
|
248
|
+
|
|
249
|
+
// Construir atributos de la etiqueta speak
|
|
250
|
+
const speakAttributes = [];
|
|
251
|
+
|
|
252
|
+
if (options.promptText) {
|
|
253
|
+
options.prosodyRate = "110%"
|
|
254
|
+
speakAttributes.push(`prompt="${options.promptText}"`);
|
|
255
|
+
// speakAttributes.push(`xml:lang="en-us"`);
|
|
256
|
+
if (options.temperature) speakAttributes.push(`temperature="${options.temperature}"`);
|
|
257
|
+
if (options.exaggeration) speakAttributes.push(`exaggeration="${options.exaggeration}"`);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Construir atributos de prosodia
|
|
261
|
+
const prosodyAttributes = [];
|
|
262
|
+
if (options.prosodyRate) prosodyAttributes.push(`rate="${options.prosodyRate}"`);
|
|
263
|
+
if (options.prosodyPitch) prosodyAttributes.push(`pitch="${options.prosodyPitch}"`);
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
if (prosodyAttributes.length > 0) {
|
|
267
|
+
// Si solo existen atributos de prosodia, solo envolver en prosodia
|
|
268
|
+
text = `<prosody ${prosodyAttributes.join(' ')}>${text}</prosody>`;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Solo envolver en etiqueta speak si hay atributos
|
|
272
|
+
if (speakAttributes.length > 0) {
|
|
273
|
+
text = `<speak ${speakAttributes.join(' ')}>${text}</speak>`;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if (options.xmlLang) {
|
|
277
|
+
text = `<lang xml:lang="${options.xmlLang}">${text}</lang>`;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return text;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
prompt(promptText) {
|
|
284
|
+
if (this.providerType !== 'resemble') return this;
|
|
285
|
+
this.promptText = promptText;
|
|
286
|
+
return this;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
package/package.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "voicemix",
|
|
3
|
+
"type": "module",
|
|
4
|
+
"version": "1.0.0",
|
|
5
|
+
"description": "🗣️ text-to-speech tool using ElevenLabs and Resemble AI APIs.",
|
|
6
|
+
"main": "index.js",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
9
|
+
},
|
|
10
|
+
"author": "Martin Clasen",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"dependencies": {
|
|
13
|
+
"axios": "^1.9.0",
|
|
14
|
+
"hash-factory": "^1.1.0"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"text-to-speech",
|
|
18
|
+
"tts",
|
|
19
|
+
"elevenlabs",
|
|
20
|
+
"resemble-ai",
|
|
21
|
+
"voice",
|
|
22
|
+
"speech",
|
|
23
|
+
"audio",
|
|
24
|
+
"synthesis",
|
|
25
|
+
"prompt",
|
|
26
|
+
"tone",
|
|
27
|
+
"clasen"
|
|
28
|
+
]
|
|
29
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { ProviderError } from '../errors.js';
|
|
5
|
+
|
|
6
|
+
export class ElevenLabsProvider {
|
|
7
|
+
constructor(apiKey) {
|
|
8
|
+
this.apiKey = apiKey || process.env.ELEVENLABS_API_KEY;
|
|
9
|
+
this.model_id = 'eleven_multilingual_v2';
|
|
10
|
+
this.voice_settings = {
|
|
11
|
+
stability: 0.5,
|
|
12
|
+
similarity_boost: 0.5,
|
|
13
|
+
style: 0.4,
|
|
14
|
+
use_speaker_boost: true
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
if (!this.apiKey) {
|
|
18
|
+
throw new ProviderError('ElevenLabs API key is required', 'elevenlabs');
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
monolingual_v1() {
|
|
23
|
+
this.model_id = 'eleven_monolingual_v1';
|
|
24
|
+
return this;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
multilingual_v1() {
|
|
28
|
+
this.model_id = 'eleven_multilingual_v1';
|
|
29
|
+
return this;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
multilingual_v2() {
|
|
33
|
+
this.model_id = 'eleven_multilingual_v2';
|
|
34
|
+
return this;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
_getRequestOptions(voiceId, text, format) {
|
|
38
|
+
return {
|
|
39
|
+
method: "post",
|
|
40
|
+
url: `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
|
|
41
|
+
headers: {
|
|
42
|
+
accept: "audio/" + (format === "mp3" ? "mpeg" : format),
|
|
43
|
+
"xi-api-key": this.apiKey,
|
|
44
|
+
"Content-Type": "application/json",
|
|
45
|
+
},
|
|
46
|
+
data: {
|
|
47
|
+
text,
|
|
48
|
+
model_id: this.model_id,
|
|
49
|
+
voice_settings: this.voice_settings,
|
|
50
|
+
},
|
|
51
|
+
responseType: "stream",
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async save(voiceId, text, format, filePath, fileName) {
|
|
56
|
+
try {
|
|
57
|
+
if (!voiceId) {
|
|
58
|
+
throw new ProviderError('Voice ID is required', 'elevenlabs');
|
|
59
|
+
}
|
|
60
|
+
if (!text) {
|
|
61
|
+
throw new ProviderError('Text is required', 'elevenlabs');
|
|
62
|
+
}
|
|
63
|
+
if (!filePath || !fileName) {
|
|
64
|
+
throw new ProviderError('File path and name are required', 'elevenlabs');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const response = await axios({
|
|
68
|
+
...this._getRequestOptions(voiceId, text, format),
|
|
69
|
+
responseType: 'stream'
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
if (!fs.existsSync(filePath)) {
|
|
73
|
+
fs.mkdirSync(filePath, { recursive: true });
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const fullPath = path.join(filePath, fileName);
|
|
77
|
+
const writer = fs.createWriteStream(fullPath);
|
|
78
|
+
response.data.pipe(writer);
|
|
79
|
+
|
|
80
|
+
return new Promise((resolve, reject) => {
|
|
81
|
+
writer.on('finish', () => resolve(fullPath));
|
|
82
|
+
writer.on('error', (err) => {
|
|
83
|
+
reject(new ProviderError(
|
|
84
|
+
'Failed to write audio file',
|
|
85
|
+
'elevenlabs',
|
|
86
|
+
{ path: fullPath, error: err.message }
|
|
87
|
+
));
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
} catch (error) {
|
|
91
|
+
if (error instanceof ProviderError) {
|
|
92
|
+
throw error;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (axios.isAxiosError(error)) {
|
|
96
|
+
const details = {
|
|
97
|
+
status: error.response?.status,
|
|
98
|
+
statusText: error.response?.statusText,
|
|
99
|
+
data: error.response?.data
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
throw new ProviderError(
|
|
103
|
+
'Failed to save audio from ElevenLabs API',
|
|
104
|
+
'elevenlabs',
|
|
105
|
+
details
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
throw new ProviderError(
|
|
110
|
+
'An unexpected error occurred while saving audio',
|
|
111
|
+
'elevenlabs',
|
|
112
|
+
{ originalError: error.message }
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { ProviderError, ValidationError } from '../errors.js';
|
|
5
|
+
|
|
6
|
+
export class ResembleProvider {
|
|
7
|
+
constructor(apiKey) {
|
|
8
|
+
this.apiKey = apiKey || process.env.RESEMBLE_API_KEY;
|
|
9
|
+
this.baseUrl = 'https://f.cluster.resemble.ai';
|
|
10
|
+
this.defaultSettings = {
|
|
11
|
+
sample_rate: 48000,
|
|
12
|
+
output_format: 'mp3',
|
|
13
|
+
precision: 'PCM_16'
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
if (!this.apiKey) {
|
|
17
|
+
throw new ProviderError('Resemble API key is required', 'resemble');
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
_getRequestOptions(endpoint, data) {
|
|
22
|
+
return {
|
|
23
|
+
method: 'post',
|
|
24
|
+
url: `${this.baseUrl}${endpoint}`,
|
|
25
|
+
headers: {
|
|
26
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
27
|
+
'Content-Type': 'application/json',
|
|
28
|
+
'Accept-Encoding': 'gzip, deflate, br'
|
|
29
|
+
},
|
|
30
|
+
data
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async save(voiceId, text, format, filePath, fileName) {
|
|
35
|
+
try {
|
|
36
|
+
if (!voiceId) {
|
|
37
|
+
throw new ProviderError('Voice ID is required', 'resemble');
|
|
38
|
+
}
|
|
39
|
+
if (!text) {
|
|
40
|
+
throw new ProviderError('Text is required', 'resemble');
|
|
41
|
+
}
|
|
42
|
+
if (!filePath || !fileName) {
|
|
43
|
+
throw new ProviderError('File path and name are required', 'resemble');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const response = await axios(this._getRequestOptions('/synthesize', {
|
|
47
|
+
voice_uuid: voiceId,
|
|
48
|
+
data: text,
|
|
49
|
+
sample_rate: this.defaultSettings.sample_rate,
|
|
50
|
+
output_format: format,
|
|
51
|
+
precision: this.defaultSettings.precision
|
|
52
|
+
}));
|
|
53
|
+
|
|
54
|
+
if (!response.data.success) {
|
|
55
|
+
throw new ProviderError(
|
|
56
|
+
response.data.issues?.join(', ') || 'Synthesis failed',
|
|
57
|
+
'resemble',
|
|
58
|
+
response.data
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (!fs.existsSync(filePath)) {
|
|
63
|
+
fs.mkdirSync(filePath, { recursive: true });
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const fullPath = path.join(filePath, fileName);
|
|
67
|
+
const audioBuffer = Buffer.from(response.data.audio_content, 'base64');
|
|
68
|
+
|
|
69
|
+
return new Promise((resolve, reject) => {
|
|
70
|
+
fs.writeFile(fullPath, audioBuffer, (err) => {
|
|
71
|
+
if (err) {
|
|
72
|
+
reject(new ProviderError(
|
|
73
|
+
'Failed to write audio file',
|
|
74
|
+
'resemble',
|
|
75
|
+
{ path: fullPath, error: err.message }
|
|
76
|
+
));
|
|
77
|
+
} else {
|
|
78
|
+
resolve(fullPath);
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
} catch (error) {
|
|
83
|
+
if (error instanceof ProviderError) {
|
|
84
|
+
throw error;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (axios.isAxiosError(error)) {
|
|
88
|
+
const details = {
|
|
89
|
+
status: error.response?.status,
|
|
90
|
+
statusText: error.response?.statusText,
|
|
91
|
+
data: error.response?.data
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
throw new ProviderError(
|
|
95
|
+
'Failed to save audio from Resemble API',
|
|
96
|
+
'resemble',
|
|
97
|
+
details
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
throw new ProviderError(
|
|
102
|
+
'An unexpected error occurred while saving audio',
|
|
103
|
+
'resemble',
|
|
104
|
+
{ originalError: error.message }
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Additional Resemble-specific methods
|
|
110
|
+
setSampleRate(rate) {
|
|
111
|
+
this.defaultSettings.sample_rate = rate;
|
|
112
|
+
return this;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
setPrecision(precision) {
|
|
116
|
+
const validPrecisions = ['MULAW', 'PCM_16', 'PCM_24', 'PCM_32'];
|
|
117
|
+
if (!validPrecisions.includes(precision)) {
|
|
118
|
+
throw new ValidationError(
|
|
119
|
+
`Invalid precision. Must be one of: ${validPrecisions.join(', ')}`,
|
|
120
|
+
'precision'
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
this.defaultSettings.precision = precision;
|
|
124
|
+
return this;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
setOutputFormat(format) {
|
|
128
|
+
const validFormats = ['mp3', 'wav'];
|
|
129
|
+
if (!validFormats.includes(format)) {
|
|
130
|
+
throw new ValidationError(
|
|
131
|
+
`Invalid output format. Must be one of: ${validFormats.join(', ')}`,
|
|
132
|
+
'output_format'
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
this.defaultSettings.output_format = format;
|
|
136
|
+
return this;
|
|
137
|
+
}
|
|
138
|
+
}
|