voicemix 1.0.6 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -1
- package/demo/emotio_valueh_hmm_okay_let_me_di5rff.mp3 +0 -0
- package/demo/grandm_maggie_did_everyt_she_14brhqm.mp3 +0 -0
- package/demo/index.js +24 -9
- package/demo/sorpre_gracia_risas_dos_10ykkn7.mp3 +0 -0
- package/index.js +14 -0
- package/package.json +3 -2
- package/providers/cartesia.js +131 -0
- package/providers/elevenlabs.js +5 -0
package/README.md
CHANGED
|
@@ -15,6 +15,7 @@ Create a `.env` file in your project root with your API keys:
|
|
|
15
15
|
```plaintext
|
|
16
16
|
ELEVENLABS_API_KEY="6e04xxxxxxxxxxxxxxxxxxxxxxxxa9da"
|
|
17
17
|
RESEMBLE_API_KEY="9YWxxxxxxxxxxxxxxxxxmgtt"
|
|
18
|
+
CARTESIA_API_KEY="sk_car_xxxxxxxxxxxxxxxxxxxxjr"
|
|
18
19
|
```
|
|
19
20
|
|
|
20
21
|
## Usage
|
|
@@ -34,8 +35,24 @@ voiceMix
|
|
|
34
35
|
.save();
|
|
35
36
|
```
|
|
36
37
|
|
|
38
|
+
### Using ElevenLabs v3 Model
|
|
39
|
+
|
|
40
|
+
```javascript
|
|
41
|
+
const voiceMix = new VoiceMix();
|
|
42
|
+
|
|
43
|
+
voiceMix
|
|
44
|
+
.v3() // Use the latest ElevenLabs v3 model
|
|
45
|
+
.voice('EbhcCfMvNsbvjN6OhjpJ')
|
|
46
|
+
.say('Hello! This is using the ElevenLabs v3 model.')
|
|
47
|
+
.save();
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
The v3 model is the latest and most advanced model from ElevenLabs, providing the most natural and expressive voice generation.
|
|
51
|
+
|
|
37
52
|
### Advanced Usage
|
|
38
53
|
|
|
54
|
+
#### Using Resemble AI
|
|
55
|
+
|
|
39
56
|
```javascript
|
|
40
57
|
const voiceMix = new VoiceMix();
|
|
41
58
|
|
|
@@ -48,6 +65,20 @@ voiceMix
|
|
|
48
65
|
.save();
|
|
49
66
|
```
|
|
50
67
|
|
|
68
|
+
#### Using Cartesia
|
|
69
|
+
|
|
70
|
+
```javascript
|
|
71
|
+
const voiceMix = new VoiceMix();
|
|
72
|
+
|
|
73
|
+
voiceMix
|
|
74
|
+
.useCartesia() // https://cartesia.ai/
|
|
75
|
+
.voice('your-cartesia-voice-id') // Select specific voice from your Cartesia account
|
|
76
|
+
.say('Your text here')
|
|
77
|
+
.save();
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Note:** You need to use a valid voice ID from your Cartesia account. You can find available voices in your Cartesia dashboard.
|
|
81
|
+
|
|
51
82
|
### Batch Processing
|
|
52
83
|
|
|
53
84
|
You can process multiple lines of text using a JSON configuration:
|
|
@@ -86,7 +117,15 @@ Example `lines.json`:
|
|
|
86
117
|
- Multiple voice support
|
|
87
118
|
- Language selection
|
|
88
119
|
- Voice prompts for style control (Resemble AI)
|
|
89
|
-
- Support for
|
|
120
|
+
- Support for multiple TTS providers:
|
|
121
|
+
- ElevenLabs (including v3 model)
|
|
122
|
+
- Resemble AI
|
|
123
|
+
- Cartesia
|
|
124
|
+
- Support for different ElevenLabs models:
|
|
125
|
+
- `monolingual_v1()` - Original English model
|
|
126
|
+
- `multilingual_v1()` - First multilingual model
|
|
127
|
+
- `multilingual_v2()` - Improved multilingual model (default)
|
|
128
|
+
- `v3()` - Latest and most advanced model
|
|
90
129
|
- Simple chainable API
|
|
91
130
|
|
|
92
131
|
## License
|
|
Binary file
|
|
Binary file
|
package/demo/index.js
CHANGED
|
@@ -9,15 +9,15 @@ const script = JSON.parse(fs.readFileSync('./lines.json', 'utf8'));
|
|
|
9
9
|
const voiceMix = new VoiceMix()//.useResemble().lang('en-US');
|
|
10
10
|
|
|
11
11
|
// Process each line in the script
|
|
12
|
-
for (const entry of script) {
|
|
12
|
+
// for (const entry of script) {
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
}
|
|
14
|
+
// voiceMix
|
|
15
|
+
// .prompt(entry.prompt || 'Friendly and conversational tone')
|
|
16
|
+
// // .voice('ba875a0a') // Peter v2
|
|
17
|
+
// .voice('EbhcCfMvNsbvjN6OhjpJ')
|
|
18
|
+
// .say(entry.english)
|
|
19
|
+
// .save();
|
|
20
|
+
// }
|
|
21
21
|
|
|
22
22
|
// const voiceMix = new VoiceMix();
|
|
23
23
|
|
|
@@ -39,4 +39,19 @@ for (const entry of script) {
|
|
|
39
39
|
// // .voice('fcf8490c')
|
|
40
40
|
// .voice('ba875a0a') // Peter v2
|
|
41
41
|
// .say(line)
|
|
42
|
-
// .save();
|
|
42
|
+
// .save();
|
|
43
|
+
|
|
44
|
+
// Example using Cartesia
|
|
45
|
+
// Note: Replace with your actual Cartesia voice ID from your account
|
|
46
|
+
// voiceMix.useCartesia()
|
|
47
|
+
// .voice('6ccbfb76-1fc6-48f7-b71d-91ac6298247b') // Cartesia voice ID from your account
|
|
48
|
+
// .say('<emotion value="happy" />Hmm… okay, let me think… yeah, this is actually kind of fun. [laughter] Let\'s dive in.')
|
|
49
|
+
// .save();
|
|
50
|
+
|
|
51
|
+
// Example using ElevenLabs v3 model
|
|
52
|
+
// v3 is the latest and most advanced model from ElevenLabs
|
|
53
|
+
voiceMix
|
|
54
|
+
.v3() // Use the ElevenLabs v3 model
|
|
55
|
+
.voice('dxvGlXoa4TLMyfYR6uC9') // ElevenLabs voice ID
|
|
56
|
+
.say('[sorprendida] gracias! [risas] dos personas creyeron en mí cuando nadie más lo hizo!')
|
|
57
|
+
.save();
|
|
Binary file
|
package/index.js
CHANGED
|
@@ -3,6 +3,7 @@ import path from 'path';
|
|
|
3
3
|
import hashFactory from 'hash-factory';
|
|
4
4
|
import { ElevenLabsProvider } from './providers/elevenlabs.js';
|
|
5
5
|
import { ResembleProvider } from './providers/resemble.js';
|
|
6
|
+
import { CartesiaProvider } from './providers/cartesia.js';
|
|
6
7
|
import { ValidationError, formatError } from './errors.js';
|
|
7
8
|
|
|
8
9
|
const hash = hashFactory({ words: true, alpha: true });
|
|
@@ -54,6 +55,12 @@ export class VoiceMix {
|
|
|
54
55
|
return this;
|
|
55
56
|
}
|
|
56
57
|
|
|
58
|
+
useCartesia(apiKey) {
|
|
59
|
+
this.provider = new CartesiaProvider(apiKey);
|
|
60
|
+
this.providerType = 'cartesia';
|
|
61
|
+
return this;
|
|
62
|
+
}
|
|
63
|
+
|
|
57
64
|
monolingual_v1() {
|
|
58
65
|
if (this.providerType === 'elevenlabs') {
|
|
59
66
|
this.provider.monolingual_v1();
|
|
@@ -75,6 +82,13 @@ export class VoiceMix {
|
|
|
75
82
|
return this;
|
|
76
83
|
}
|
|
77
84
|
|
|
85
|
+
v3() {
|
|
86
|
+
if (this.providerType === 'elevenlabs') {
|
|
87
|
+
this.provider.v3();
|
|
88
|
+
}
|
|
89
|
+
return this;
|
|
90
|
+
}
|
|
91
|
+
|
|
78
92
|
setSampleRate(rate) {
|
|
79
93
|
if (this.providerType === 'resemble') {
|
|
80
94
|
this.provider.setSampleRate(rate);
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "voicemix",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "1.
|
|
5
|
-
"description": "🗣️ VoiceMix - A simple text-to-speech tool using ElevenLabs and Resemble AI APIs.",
|
|
4
|
+
"version": "1.1.4",
|
|
5
|
+
"description": "🗣️ VoiceMix - A simple text-to-speech tool using ElevenLabs, Cartesia and Resemble AI APIs.",
|
|
6
6
|
"main": "index.js",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -28,6 +28,7 @@
|
|
|
28
28
|
"synthesis",
|
|
29
29
|
"prompt",
|
|
30
30
|
"tone",
|
|
31
|
+
"cartesia",
|
|
31
32
|
"clasen"
|
|
32
33
|
]
|
|
33
34
|
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { ProviderError } from '../errors.js';
|
|
5
|
+
|
|
6
|
+
export class CartesiaProvider {
|
|
7
|
+
constructor(apiKey) {
|
|
8
|
+
this.apiKey = apiKey || process.env.CARTESIA_API_KEY;
|
|
9
|
+
this.baseUrl = 'https://api.cartesia.ai';
|
|
10
|
+
this.defaultSettings = {
|
|
11
|
+
model_id: 'sonic-3',
|
|
12
|
+
speed: 'normal',
|
|
13
|
+
generation_config: {
|
|
14
|
+
speed: 1,
|
|
15
|
+
volume: 1
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
if (!this.apiKey) {
|
|
20
|
+
throw new ProviderError('Cartesia API key is required', 'cartesia');
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
_getRequestOptions(voiceId, text, format = 'mp3') {
|
|
25
|
+
// Configure output format based on requested format
|
|
26
|
+
const outputFormat = format === 'mp3'
|
|
27
|
+
? { container: 'mp3', encoding: 'mp3', sample_rate: 44100 }
|
|
28
|
+
: { container: 'wav', encoding: 'pcm_f32le', sample_rate: 44100 };
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
method: 'post',
|
|
32
|
+
url: `${this.baseUrl}/tts/bytes`,
|
|
33
|
+
headers: {
|
|
34
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
35
|
+
'Cartesia-Version': '2024-06-10',
|
|
36
|
+
'Content-Type': 'application/json'
|
|
37
|
+
},
|
|
38
|
+
data: {
|
|
39
|
+
model_id: this.defaultSettings.model_id,
|
|
40
|
+
transcript: text,
|
|
41
|
+
voice: {
|
|
42
|
+
mode: 'id',
|
|
43
|
+
id: voiceId
|
|
44
|
+
},
|
|
45
|
+
output_format: outputFormat,
|
|
46
|
+
speed: this.defaultSettings.speed,
|
|
47
|
+
generation_config: this.defaultSettings.generation_config
|
|
48
|
+
},
|
|
49
|
+
responseType: 'stream'
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async save(voiceId, text, format, filePath, fileName) {
|
|
54
|
+
try {
|
|
55
|
+
if (!voiceId) {
|
|
56
|
+
throw new ProviderError('Voice ID is required', 'cartesia');
|
|
57
|
+
}
|
|
58
|
+
if (!text) {
|
|
59
|
+
throw new ProviderError('Text is required', 'cartesia');
|
|
60
|
+
}
|
|
61
|
+
if (!filePath || !fileName) {
|
|
62
|
+
throw new ProviderError('File path and name are required', 'cartesia');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const response = await axios(this._getRequestOptions(voiceId, text, format));
|
|
66
|
+
|
|
67
|
+
if (!fs.existsSync(filePath)) {
|
|
68
|
+
fs.mkdirSync(filePath, { recursive: true });
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const fullPath = path.join(filePath, fileName);
|
|
72
|
+
const writer = fs.createWriteStream(fullPath);
|
|
73
|
+
response.data.pipe(writer);
|
|
74
|
+
|
|
75
|
+
return new Promise((resolve, reject) => {
|
|
76
|
+
writer.on('finish', () => resolve(fullPath));
|
|
77
|
+
writer.on('error', (err) => {
|
|
78
|
+
reject(new ProviderError(
|
|
79
|
+
'Failed to write audio file',
|
|
80
|
+
'cartesia',
|
|
81
|
+
{ path: fullPath, error: err.message }
|
|
82
|
+
));
|
|
83
|
+
});
|
|
84
|
+
});
|
|
85
|
+
} catch (error) {
|
|
86
|
+
if (error instanceof ProviderError) {
|
|
87
|
+
throw error;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (axios.isAxiosError(error)) {
|
|
91
|
+
// Try to read the error message from the response stream
|
|
92
|
+
let errorMessage = 'Failed to save audio from Cartesia API';
|
|
93
|
+
|
|
94
|
+
if (error.response?.data) {
|
|
95
|
+
try {
|
|
96
|
+
// If data is a stream/buffer, try to read it
|
|
97
|
+
if (typeof error.response.data.read === 'function') {
|
|
98
|
+
const errorData = error.response.data.read();
|
|
99
|
+
if (errorData) {
|
|
100
|
+
errorMessage = errorData.toString('utf-8');
|
|
101
|
+
}
|
|
102
|
+
} else if (typeof error.response.data === 'string') {
|
|
103
|
+
errorMessage = error.response.data;
|
|
104
|
+
}
|
|
105
|
+
} catch (readError) {
|
|
106
|
+
// Keep default error message
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const details = {
|
|
111
|
+
status: error.response?.status,
|
|
112
|
+
statusText: error.response?.statusText,
|
|
113
|
+
message: errorMessage
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
throw new ProviderError(
|
|
117
|
+
errorMessage,
|
|
118
|
+
'cartesia',
|
|
119
|
+
details
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
throw new ProviderError(
|
|
124
|
+
'An unexpected error occurred while saving audio',
|
|
125
|
+
'cartesia',
|
|
126
|
+
{ originalError: error.message }
|
|
127
|
+
);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|