voicemix 1.0.6 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,6 +15,7 @@ Create a `.env` file in your project root with your API keys:
15
15
  ```plaintext
16
16
  ELEVENLABS_API_KEY="6e04xxxxxxxxxxxxxxxxxxxxxxxxa9da"
17
17
  RESEMBLE_API_KEY="9YWxxxxxxxxxxxxxxxxxmgtt"
18
+ CARTESIA_API_KEY="sk_car_xxxxxxxxxxxxxxxxxxxxjr"
18
19
  ```
19
20
 
20
21
  ## Usage
@@ -34,8 +35,24 @@ voiceMix
34
35
  .save();
35
36
  ```
36
37
 
38
+ ### Using ElevenLabs v3 Model
39
+
40
+ ```javascript
41
+ const voiceMix = new VoiceMix();
42
+
43
+ voiceMix
44
+ .v3() // Use the latest ElevenLabs v3 model
45
+ .voice('EbhcCfMvNsbvjN6OhjpJ')
46
+ .say('Hello! This is using the ElevenLabs v3 model.')
47
+ .save();
48
+ ```
49
+
50
+ The v3 model is the latest and most advanced model from ElevenLabs, providing the most natural and expressive voice generation.
51
+
37
52
  ### Advanced Usage
38
53
 
54
+ #### Using Resemble AI
55
+
39
56
  ```javascript
40
57
  const voiceMix = new VoiceMix();
41
58
 
@@ -48,6 +65,20 @@ voiceMix
48
65
  .save();
49
66
  ```
50
67
 
68
+ #### Using Cartesia
69
+
70
+ ```javascript
71
+ const voiceMix = new VoiceMix();
72
+
73
+ voiceMix
74
+ .useCartesia() // https://cartesia.ai/
75
+ .voice('your-cartesia-voice-id') // Select specific voice from your Cartesia account
76
+ .say('Your text here')
77
+ .save();
78
+ ```
79
+
80
+ **Note:** You need to use a valid voice ID from your Cartesia account. You can find available voices in your Cartesia dashboard.
81
+
51
82
  ### Batch Processing
52
83
 
53
84
  You can process multiple lines of text using a JSON configuration:
@@ -86,7 +117,15 @@ Example `lines.json`:
86
117
  - Multiple voice support
87
118
  - Language selection
88
119
  - Voice prompts for style control (Resemble AI)
89
- - Support for different TTS engines
120
+ - Support for multiple TTS providers:
121
+ - ElevenLabs (including v3 model)
122
+ - Resemble AI
123
+ - Cartesia
124
+ - Support for different ElevenLabs models:
125
+ - `monolingual_v1()` - Original English model
126
+ - `multilingual_v1()` - First multilingual model
127
+ - `multilingual_v2()` - Improved multilingual model (default)
128
+ - `v3()` - Latest and most advanced model
90
129
  - Simple chainable API
91
130
 
92
131
  ## License
package/demo/index.js CHANGED
@@ -9,15 +9,15 @@ const script = JSON.parse(fs.readFileSync('./lines.json', 'utf8'));
9
9
  const voiceMix = new VoiceMix()//.useResemble().lang('en-US');
10
10
 
11
11
  // Process each line in the script
12
- for (const entry of script) {
12
+ // for (const entry of script) {
13
13
 
14
- voiceMix
15
- .prompt(entry.prompt || 'Friendly and conversational tone')
16
- // .voice('ba875a0a') // Peter v2
17
- .voice('EbhcCfMvNsbvjN6OhjpJ')
18
- .say(entry.english)
19
- .save();
20
- }
14
+ // voiceMix
15
+ // .prompt(entry.prompt || 'Friendly and conversational tone')
16
+ // // .voice('ba875a0a') // Peter v2
17
+ // .voice('EbhcCfMvNsbvjN6OhjpJ')
18
+ // .say(entry.english)
19
+ // .save();
20
+ // }
21
21
 
22
22
  // const voiceMix = new VoiceMix();
23
23
 
@@ -39,4 +39,19 @@ for (const entry of script) {
39
39
  // // .voice('fcf8490c')
40
40
  // .voice('ba875a0a') // Peter v2
41
41
  // .say(line)
42
- // .save();
42
+ // .save();
43
+
44
+ // Example using Cartesia
45
+ // Note: Replace with your actual Cartesia voice ID from your account
46
+ // voiceMix.useCartesia()
47
+ // .voice('6ccbfb76-1fc6-48f7-b71d-91ac6298247b') // Cartesia voice ID from your account
48
+ // .say('<emotion value="happy" />Hmm… okay, let me think… yeah, this is actually kind of fun. [laughter] Let\'s dive in.')
49
+ // .save();
50
+
51
+ // Example using ElevenLabs v3 model
52
+ // v3 is the latest and most advanced model from ElevenLabs
53
+ voiceMix
54
+ .v3() // Use the ElevenLabs v3 model
55
+ .voice('dxvGlXoa4TLMyfYR6uC9') // ElevenLabs voice ID
56
+ .say('[sorprendida] gracias! [risas] dos personas creyeron en mí cuando nadie más lo hizo!')
57
+ .save();
package/index.js CHANGED
@@ -3,6 +3,7 @@ import path from 'path';
3
3
  import hashFactory from 'hash-factory';
4
4
  import { ElevenLabsProvider } from './providers/elevenlabs.js';
5
5
  import { ResembleProvider } from './providers/resemble.js';
6
+ import { CartesiaProvider } from './providers/cartesia.js';
6
7
  import { ValidationError, formatError } from './errors.js';
7
8
 
8
9
  const hash = hashFactory({ words: true, alpha: true });
@@ -54,6 +55,12 @@ export class VoiceMix {
54
55
  return this;
55
56
  }
56
57
 
58
+ useCartesia(apiKey) {
59
+ this.provider = new CartesiaProvider(apiKey);
60
+ this.providerType = 'cartesia';
61
+ return this;
62
+ }
63
+
57
64
  monolingual_v1() {
58
65
  if (this.providerType === 'elevenlabs') {
59
66
  this.provider.monolingual_v1();
@@ -75,6 +82,13 @@ export class VoiceMix {
75
82
  return this;
76
83
  }
77
84
 
85
+ v3() {
86
+ if (this.providerType === 'elevenlabs') {
87
+ this.provider.v3();
88
+ }
89
+ return this;
90
+ }
91
+
78
92
  setSampleRate(rate) {
79
93
  if (this.providerType === 'resemble') {
80
94
  this.provider.setSampleRate(rate);
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "voicemix",
3
3
  "type": "module",
4
- "version": "1.0.6",
5
- "description": "🗣️ VoiceMix - A simple text-to-speech tool using ElevenLabs and Resemble AI APIs.",
4
+ "version": "1.1.4",
5
+ "description": "🗣️ VoiceMix - A simple text-to-speech tool using ElevenLabs, Cartesia and Resemble AI APIs.",
6
6
  "main": "index.js",
7
7
  "repository": {
8
8
  "type": "git",
@@ -28,6 +28,7 @@
28
28
  "synthesis",
29
29
  "prompt",
30
30
  "tone",
31
+ "cartesia",
31
32
  "clasen"
32
33
  ]
33
34
  }
@@ -0,0 +1,131 @@
1
+ import axios from 'axios';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import { ProviderError } from '../errors.js';
5
+
6
+ export class CartesiaProvider {
7
+ constructor(apiKey) {
8
+ this.apiKey = apiKey || process.env.CARTESIA_API_KEY;
9
+ this.baseUrl = 'https://api.cartesia.ai';
10
+ this.defaultSettings = {
11
+ model_id: 'sonic-3',
12
+ speed: 'normal',
13
+ generation_config: {
14
+ speed: 1,
15
+ volume: 1
16
+ }
17
+ };
18
+
19
+ if (!this.apiKey) {
20
+ throw new ProviderError('Cartesia API key is required', 'cartesia');
21
+ }
22
+ }
23
+
24
+ _getRequestOptions(voiceId, text, format = 'mp3') {
25
+ // Configure output format based on requested format
26
+ const outputFormat = format === 'mp3'
27
+ ? { container: 'mp3', encoding: 'mp3', sample_rate: 44100 }
28
+ : { container: 'wav', encoding: 'pcm_f32le', sample_rate: 44100 };
29
+
30
+ return {
31
+ method: 'post',
32
+ url: `${this.baseUrl}/tts/bytes`,
33
+ headers: {
34
+ 'Authorization': `Bearer ${this.apiKey}`,
35
+ 'Cartesia-Version': '2024-06-10',
36
+ 'Content-Type': 'application/json'
37
+ },
38
+ data: {
39
+ model_id: this.defaultSettings.model_id,
40
+ transcript: text,
41
+ voice: {
42
+ mode: 'id',
43
+ id: voiceId
44
+ },
45
+ output_format: outputFormat,
46
+ speed: this.defaultSettings.speed,
47
+ generation_config: this.defaultSettings.generation_config
48
+ },
49
+ responseType: 'stream'
50
+ };
51
+ }
52
+
53
+ async save(voiceId, text, format, filePath, fileName) {
54
+ try {
55
+ if (!voiceId) {
56
+ throw new ProviderError('Voice ID is required', 'cartesia');
57
+ }
58
+ if (!text) {
59
+ throw new ProviderError('Text is required', 'cartesia');
60
+ }
61
+ if (!filePath || !fileName) {
62
+ throw new ProviderError('File path and name are required', 'cartesia');
63
+ }
64
+
65
+ const response = await axios(this._getRequestOptions(voiceId, text, format));
66
+
67
+ if (!fs.existsSync(filePath)) {
68
+ fs.mkdirSync(filePath, { recursive: true });
69
+ }
70
+
71
+ const fullPath = path.join(filePath, fileName);
72
+ const writer = fs.createWriteStream(fullPath);
73
+ response.data.pipe(writer);
74
+
75
+ return new Promise((resolve, reject) => {
76
+ writer.on('finish', () => resolve(fullPath));
77
+ writer.on('error', (err) => {
78
+ reject(new ProviderError(
79
+ 'Failed to write audio file',
80
+ 'cartesia',
81
+ { path: fullPath, error: err.message }
82
+ ));
83
+ });
84
+ });
85
+ } catch (error) {
86
+ if (error instanceof ProviderError) {
87
+ throw error;
88
+ }
89
+
90
+ if (axios.isAxiosError(error)) {
91
+ // Try to read the error message from the response stream
92
+ let errorMessage = 'Failed to save audio from Cartesia API';
93
+
94
+ if (error.response?.data) {
95
+ try {
96
+ // If data is a stream/buffer, try to read it
97
+ if (typeof error.response.data.read === 'function') {
98
+ const errorData = error.response.data.read();
99
+ if (errorData) {
100
+ errorMessage = errorData.toString('utf-8');
101
+ }
102
+ } else if (typeof error.response.data === 'string') {
103
+ errorMessage = error.response.data;
104
+ }
105
+ } catch (readError) {
106
+ // Keep default error message
107
+ }
108
+ }
109
+
110
+ const details = {
111
+ status: error.response?.status,
112
+ statusText: error.response?.statusText,
113
+ message: errorMessage
114
+ };
115
+
116
+ throw new ProviderError(
117
+ errorMessage,
118
+ 'cartesia',
119
+ details
120
+ );
121
+ }
122
+
123
+ throw new ProviderError(
124
+ 'An unexpected error occurred while saving audio',
125
+ 'cartesia',
126
+ { originalError: error.message }
127
+ );
128
+ }
129
+ }
130
+ }
131
+
@@ -34,6 +34,11 @@ export class ElevenLabsProvider {
34
34
  return this;
35
35
  }
36
36
 
37
+ v3() {
38
+ this.model_id = 'eleven_v3';
39
+ return this;
40
+ }
41
+
37
42
  _getRequestOptions(voiceId, text, format) {
38
43
  return {
39
44
  method: "post",