voicemix 1.0.2 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,6 +15,7 @@ Create a `.env` file in your project root with your API keys:
15
15
  ```plaintext
16
16
  ELEVENLABS_API_KEY="6e04xxxxxxxxxxxxxxxxxxxxxxxxa9da"
17
17
  RESEMBLE_API_KEY="9YWxxxxxxxxxxxxxxxxxmgtt"
18
+ CARTESIA_API_KEY="sk_car_xxxxxxxxxxxxxxxxxxxxjr"
18
19
  ```
19
20
 
20
21
  ## Usage
@@ -36,6 +37,8 @@ voiceMix
36
37
 
37
38
  ### Advanced Usage
38
39
 
40
+ #### Using Resemble AI
41
+
39
42
  ```javascript
40
43
  const voiceMix = new VoiceMix();
41
44
 
@@ -48,6 +51,20 @@ voiceMix
48
51
  .save();
49
52
  ```
50
53
 
54
+ #### Using Cartesia
55
+
56
+ ```javascript
57
+ const voiceMix = new VoiceMix();
58
+
59
+ voiceMix
60
+ .useCartesia() // https://cartesia.ai/
61
+ .voice('your-cartesia-voice-id') // Select specific voice from your Cartesia account
62
+ .say('Your text here')
63
+ .save();
64
+ ```
65
+
66
+ **Note:** You need to use a valid voice ID from your Cartesia account. You can find available voices in your Cartesia dashboard.
67
+
51
68
  ### Batch Processing
52
69
 
53
70
  You can process multiple lines of text using a JSON configuration:
@@ -86,7 +103,10 @@ Example `lines.json`:
86
103
  - Multiple voice support
87
104
  - Language selection
88
105
  - Voice prompts for style control (Resemble AI)
89
- - Support for different TTS engines
106
+ - Support for multiple TTS providers:
107
+ - ElevenLabs
108
+ - Resemble AI
109
+ - Cartesia
90
110
  - Simple chainable API
91
111
 
92
112
  ## License
package/demo/index.js CHANGED
@@ -9,15 +9,15 @@ const script = JSON.parse(fs.readFileSync('./lines.json', 'utf8'));
9
9
  const voiceMix = new VoiceMix()//.useResemble().lang('en-US');
10
10
 
11
11
  // Process each line in the script
12
- for (const entry of script) {
12
+ // for (const entry of script) {
13
13
 
14
- voiceMix
15
- .prompt(entry.prompt || 'Friendly and conversational tone')
16
- // .voice('ba875a0a') // Peter v2
17
- .voice('EbhcCfMvNsbvjN6OhjpJ')
18
- .say(entry.english)
19
- .save();
20
- }
14
+ // voiceMix
15
+ // .prompt(entry.prompt || 'Friendly and conversational tone')
16
+ // // .voice('ba875a0a') // Peter v2
17
+ // .voice('EbhcCfMvNsbvjN6OhjpJ')
18
+ // .say(entry.english)
19
+ // .save();
20
+ // }
21
21
 
22
22
  // const voiceMix = new VoiceMix();
23
23
 
@@ -39,4 +39,11 @@ for (const entry of script) {
39
39
  // // .voice('fcf8490c')
40
40
  // .voice('ba875a0a') // Peter v2
41
41
  // .say(line)
42
- // .save();
42
+ // .save();
43
+
44
+ // Example using Cartesia
45
+ // Note: Replace with your actual Cartesia voice ID from your account
46
+ voiceMix.useCartesia()
47
+ .voice('6ccbfb76-1fc6-48f7-b71d-91ac6298247b') // Cartesia voice ID from your account
48
+ .say('<emotion value="happy" />Hmm… okay, let me think… yeah, this is actually kind of fun. [laughter] Let\'s dive in.')
49
+ .save();
package/demo/package.json CHANGED
@@ -9,7 +9,6 @@
9
9
  "keywords": [],
10
10
  "author": "",
11
11
  "license": "ISC",
12
- "description": "",
13
12
  "dependencies": {
14
13
  "dotenv": "^16.5.0"
15
14
  }
package/index.js CHANGED
@@ -3,6 +3,7 @@ import path from 'path';
3
3
  import hashFactory from 'hash-factory';
4
4
  import { ElevenLabsProvider } from './providers/elevenlabs.js';
5
5
  import { ResembleProvider } from './providers/resemble.js';
6
+ import { CartesiaProvider } from './providers/cartesia.js';
6
7
  import { ValidationError, formatError } from './errors.js';
7
8
 
8
9
  const hash = hashFactory({ words: true, alpha: true });
@@ -54,6 +55,12 @@ export class VoiceMix {
54
55
  return this;
55
56
  }
56
57
 
58
+ useCartesia(apiKey) {
59
+ this.provider = new CartesiaProvider(apiKey);
60
+ this.providerType = 'cartesia';
61
+ return this;
62
+ }
63
+
57
64
  monolingual_v1() {
58
65
  if (this.providerType === 'elevenlabs') {
59
66
  this.provider.monolingual_v1();
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "voicemix",
3
3
  "type": "module",
4
- "version": "1.0.2",
5
- "description": "🗣️ text-to-speech tool using ElevenLabs and Resemble AI APIs.",
4
+ "version": "1.1.2",
5
+ "description": "🗣️ VoiceMix - A simple text-to-speech tool using ElevenLabs and Resemble AI APIs.",
6
6
  "main": "index.js",
7
7
  "repository": {
8
8
  "type": "git",
@@ -15,7 +15,7 @@
15
15
  "license": "MIT",
16
16
  "dependencies": {
17
17
  "axios": "^1.9.0",
18
- "hash-factory": "^1.1.0"
18
+ "hash-factory": "^1.1.2"
19
19
  },
20
20
  "keywords": [
21
21
  "text-to-speech",
@@ -28,6 +28,7 @@
28
28
  "synthesis",
29
29
  "prompt",
30
30
  "tone",
31
+ "cartesia",
31
32
  "clasen"
32
33
  ]
33
34
  }
@@ -0,0 +1,131 @@
1
+ import axios from 'axios';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import { ProviderError } from '../errors.js';
5
+
6
+ export class CartesiaProvider {
7
+ constructor(apiKey) {
8
+ this.apiKey = apiKey || process.env.CARTESIA_API_KEY;
9
+ this.baseUrl = 'https://api.cartesia.ai';
10
+ this.defaultSettings = {
11
+ model_id: 'sonic-3',
12
+ speed: 'normal',
13
+ generation_config: {
14
+ speed: 1,
15
+ volume: 1
16
+ }
17
+ };
18
+
19
+ if (!this.apiKey) {
20
+ throw new ProviderError('Cartesia API key is required', 'cartesia');
21
+ }
22
+ }
23
+
24
+ _getRequestOptions(voiceId, text, format = 'mp3') {
25
+ // Configure output format based on requested format
26
+ const outputFormat = format === 'mp3'
27
+ ? { container: 'mp3', encoding: 'mp3', sample_rate: 44100 }
28
+ : { container: 'wav', encoding: 'pcm_f32le', sample_rate: 44100 };
29
+
30
+ return {
31
+ method: 'post',
32
+ url: `${this.baseUrl}/tts/bytes`,
33
+ headers: {
34
+ 'Authorization': `Bearer ${this.apiKey}`,
35
+ 'Cartesia-Version': '2024-06-10',
36
+ 'Content-Type': 'application/json'
37
+ },
38
+ data: {
39
+ model_id: this.defaultSettings.model_id,
40
+ transcript: text,
41
+ voice: {
42
+ mode: 'id',
43
+ id: voiceId
44
+ },
45
+ output_format: outputFormat,
46
+ speed: this.defaultSettings.speed,
47
+ generation_config: this.defaultSettings.generation_config
48
+ },
49
+ responseType: 'stream'
50
+ };
51
+ }
52
+
53
+ async save(voiceId, text, format, filePath, fileName) {
54
+ try {
55
+ if (!voiceId) {
56
+ throw new ProviderError('Voice ID is required', 'cartesia');
57
+ }
58
+ if (!text) {
59
+ throw new ProviderError('Text is required', 'cartesia');
60
+ }
61
+ if (!filePath || !fileName) {
62
+ throw new ProviderError('File path and name are required', 'cartesia');
63
+ }
64
+
65
+ const response = await axios(this._getRequestOptions(voiceId, text, format));
66
+
67
+ if (!fs.existsSync(filePath)) {
68
+ fs.mkdirSync(filePath, { recursive: true });
69
+ }
70
+
71
+ const fullPath = path.join(filePath, fileName);
72
+ const writer = fs.createWriteStream(fullPath);
73
+ response.data.pipe(writer);
74
+
75
+ return new Promise((resolve, reject) => {
76
+ writer.on('finish', () => resolve(fullPath));
77
+ writer.on('error', (err) => {
78
+ reject(new ProviderError(
79
+ 'Failed to write audio file',
80
+ 'cartesia',
81
+ { path: fullPath, error: err.message }
82
+ ));
83
+ });
84
+ });
85
+ } catch (error) {
86
+ if (error instanceof ProviderError) {
87
+ throw error;
88
+ }
89
+
90
+ if (axios.isAxiosError(error)) {
91
+ // Try to read the error message from the response stream
92
+ let errorMessage = 'Failed to save audio from Cartesia API';
93
+
94
+ if (error.response?.data) {
95
+ try {
96
+ // If data is a stream/buffer, try to read it
97
+ if (typeof error.response.data.read === 'function') {
98
+ const errorData = error.response.data.read();
99
+ if (errorData) {
100
+ errorMessage = errorData.toString('utf-8');
101
+ }
102
+ } else if (typeof error.response.data === 'string') {
103
+ errorMessage = error.response.data;
104
+ }
105
+ } catch (readError) {
106
+ // Keep default error message
107
+ }
108
+ }
109
+
110
+ const details = {
111
+ status: error.response?.status,
112
+ statusText: error.response?.statusText,
113
+ message: errorMessage
114
+ };
115
+
116
+ throw new ProviderError(
117
+ errorMessage,
118
+ 'cartesia',
119
+ details
120
+ );
121
+ }
122
+
123
+ throw new ProviderError(
124
+ 'An unexpected error occurred while saving audio',
125
+ 'cartesia',
126
+ { originalError: error.message }
127
+ );
128
+ }
129
+ }
130
+ }
131
+