@mastra/voice-elevenlabs 0.1.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +18 -0
- package/CHANGELOG.md +27 -0
- package/LICENSE +44 -0
- package/README.md +88 -0
- package/dist/_tsup-dts-rollup.d.ts +56 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +94 -0
- package/eslint.config.js +6 -0
- package/package.json +37 -0
- package/src/index.test.ts +113 -0
- package/src/index.ts +116 -0
- package/tsconfig.json +5 -0
- package/vitest.config.ts +8 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
|
|
2
|
+
> @mastra/voice-elevenlabs@0.1.0-alpha.2 build /home/runner/work/mastra/mastra/voice/elevenlabs
|
|
3
|
+
> tsup src/index.ts --format esm --experimental-dts --clean --treeshake
|
|
4
|
+
|
|
5
|
+
[34mCLI[39m Building entry: src/index.ts
|
|
6
|
+
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
|
+
[34mCLI[39m tsup v8.3.6
|
|
8
|
+
[34mTSC[39m Build start
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 7374ms
|
|
10
|
+
[34mDTS[39m Build start
|
|
11
|
+
[34mCLI[39m Target: es2022
|
|
12
|
+
Analysis will use the bundled TypeScript version 5.7.3
|
|
13
|
+
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/elevenlabs/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
|
+
[32mDTS[39m ⚡️ Build success in 5344ms
|
|
15
|
+
[34mCLI[39m Cleaning output folder
|
|
16
|
+
[34mESM[39m Build start
|
|
17
|
+
[32mESM[39m [1mdist/index.js [22m[32m3.36 KB[39m
|
|
18
|
+
[32mESM[39m ⚡️ Build success in 300ms
|
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# @mastra/voice-elevenlabs
|
|
2
|
+
|
|
3
|
+
## 0.1.0-alpha.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- f626fbb: deprecate @mastra/speech-deepgram for @mastra/voice-deepgram
|
|
8
|
+
- Updated dependencies [7fceae1]
|
|
9
|
+
- Updated dependencies [f626fbb]
|
|
10
|
+
- @mastra/core@0.4.2-alpha.0
|
|
11
|
+
|
|
12
|
+
## 0.1.0 (2024-XX-XX)
|
|
13
|
+
|
|
14
|
+
This package replaces the deprecated @mastra/speech-elevenlabs package. All functionality has been migrated to this new package with a more consistent naming scheme.
|
|
15
|
+
|
|
16
|
+
### Changes from @mastra/speech-elevenlabs
|
|
17
|
+
|
|
18
|
+
- Package renamed from @mastra/speech-elevenlabs to @mastra/voice-elevenlabs
|
|
19
|
+
- API changes:
|
|
20
|
+
- `ElevenLabsTTS` class renamed to `ElevenLabsVoice`
|
|
21
|
+
- `generate()` method renamed to `speak()`
|
|
22
|
+
- `voices()` method renamed to `getSpeakers()`
|
|
23
|
+
- Constructor configuration simplified
|
|
24
|
+
- All core functionality remains the same
|
|
25
|
+
- Import paths should be updated from '@mastra/speech-elevenlabs' to '@mastra/voice-elevenlabs'
|
|
26
|
+
|
|
27
|
+
For a complete history of changes prior to the rename, please see the changelog of the original package.
|
package/LICENSE
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Elastic License 2.0 (ELv2)
|
|
2
|
+
|
|
3
|
+
**Acceptance**
|
|
4
|
+
By using the software, you agree to all of the terms and conditions below.
|
|
5
|
+
|
|
6
|
+
**Copyright License**
|
|
7
|
+
The licensor grants you a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable license to use, copy, distribute, make available, and prepare derivative works of the software, in each case subject to the limitations and conditions below
|
|
8
|
+
|
|
9
|
+
**Limitations**
|
|
10
|
+
You may not provide the software to third parties as a hosted or managed service, where the service provides users with access to any substantial set of the features or functionality of the software.
|
|
11
|
+
|
|
12
|
+
You may not move, change, disable, or circumvent the license key functionality in the software, and you may not remove or obscure any functionality in the software that is protected by the license key.
|
|
13
|
+
|
|
14
|
+
You may not alter, remove, or obscure any licensing, copyright, or other notices of the licensor in the software. Any use of the licensor’s trademarks is subject to applicable law.
|
|
15
|
+
|
|
16
|
+
**Patents**
|
|
17
|
+
The licensor grants you a license, under any patent claims the licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import and have imported the software, in each case subject to the limitations and conditions in this license. This license does not cover any patent claims that you cause to be infringed by modifications or additions to the software. If you or your company make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
|
|
18
|
+
|
|
19
|
+
**Notices**
|
|
20
|
+
You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms.
|
|
21
|
+
|
|
22
|
+
If you modify the software, you must include in any modified copies of the software prominent notices stating that you have modified the software.
|
|
23
|
+
|
|
24
|
+
**No Other Rights**
|
|
25
|
+
These terms do not imply any licenses other than those expressly granted in these terms.
|
|
26
|
+
|
|
27
|
+
**Termination**
|
|
28
|
+
If you use the software in violation of these terms, such use is not licensed, and your licenses will automatically terminate. If the licensor provides you with a notice of your violation, and you cease all violation of this license no later than 30 days after you receive that notice, your licenses will be reinstated retroactively. However, if you violate these terms after such reinstatement, any additional violation of these terms will cause your licenses to terminate automatically and permanently.
|
|
29
|
+
|
|
30
|
+
**No Liability**
|
|
31
|
+
As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.
|
|
32
|
+
|
|
33
|
+
**Definitions**
|
|
34
|
+
The _licensor_ is the entity offering these terms, and the _software_ is the software the licensor makes available under these terms, including any portion of it.
|
|
35
|
+
|
|
36
|
+
_you_ refers to the individual or entity agreeing to these terms.
|
|
37
|
+
|
|
38
|
+
_your company_ is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. _control_ means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
|
|
39
|
+
|
|
40
|
+
_your licenses_ are all the licenses granted to you for the software under these terms.
|
|
41
|
+
|
|
42
|
+
_use_ means anything you do with the software requiring one of your licenses.
|
|
43
|
+
|
|
44
|
+
_trademark_ means trademarks, service marks, and similar rights.
|
package/README.md
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# @mastra/voice-elevenlabs
|
|
2
|
+
|
|
3
|
+
ElevenLabs Voice integration for Mastra, providing Text-to-Speech (TTS) capabilities using ElevenLabs' advanced AI voice technology.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @mastra/voice-elevenlabs
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Configuration
|
|
12
|
+
|
|
13
|
+
The module requires the following environment variable:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
ELEVENLABS_API_KEY=your_api_key
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
```typescript
|
|
22
|
+
import { ElevenLabsVoice } from '@mastra/voice-elevenlabs';
|
|
23
|
+
|
|
24
|
+
// Initialize with configuration
|
|
25
|
+
const voice = new ElevenLabsVoice({
|
|
26
|
+
speechModel: {
|
|
27
|
+
name: 'eleven_multilingual_v2',
|
|
28
|
+
apiKey: 'your-api-key', // Optional, can use ELEVENLABS_API_KEY env var
|
|
29
|
+
},
|
|
30
|
+
speaker: 'Adam', // Default speaker
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
// List available speakers
|
|
34
|
+
const speakers = await voice.getSpeakers();
|
|
35
|
+
|
|
36
|
+
// Generate speech
|
|
37
|
+
const stream = await voice.speak('Hello from Mastra!', {
|
|
38
|
+
speaker: 'Adam', // Optional, defaults to constructor speaker
|
|
39
|
+
});
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Features
|
|
43
|
+
|
|
44
|
+
- High-fidelity Text-to-Speech synthesis
|
|
45
|
+
|
|
46
|
+
## Voice Options
|
|
47
|
+
|
|
48
|
+
ElevenLabs provides a variety of premium voices with different characteristics:
|
|
49
|
+
|
|
50
|
+
- Adam (Male)
|
|
51
|
+
- Antoni (Male)
|
|
52
|
+
- Arnold (Male)
|
|
53
|
+
- Bella (Female)
|
|
54
|
+
- Dorothy (Female)
|
|
55
|
+
- Elli (Female)
|
|
56
|
+
- Josh (Male)
|
|
57
|
+
- Rachel (Female)
|
|
58
|
+
- Sam (Male)
|
|
59
|
+
|
|
60
|
+
View the complete list of voices through the `getSpeakers()` method or in [ElevenLabs' documentation](https://docs.elevenlabs.io/api-reference/voices).
|
|
61
|
+
|
|
62
|
+
## API Reference
|
|
63
|
+
|
|
64
|
+
### Constructor
|
|
65
|
+
|
|
66
|
+
```typescript
|
|
67
|
+
new ElevenLabsVoice({
|
|
68
|
+
speechModel?: {
|
|
69
|
+
name?: ElevenLabsModel, // Default: 'eleven_multilingual_v2'
|
|
70
|
+
apiKey?: string, // Optional, can use ELEVENLABS_API_KEY env var
|
|
71
|
+
},
|
|
72
|
+
speaker?: string // Default speaker ID
|
|
73
|
+
})
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Methods
|
|
77
|
+
|
|
78
|
+
#### `getSpeakers()`
|
|
79
|
+
|
|
80
|
+
Returns a list of available speakers with their details.
|
|
81
|
+
|
|
82
|
+
#### `speak(input: string | NodeJS.ReadableStream, options?: { speaker?: string })`
|
|
83
|
+
|
|
84
|
+
Converts text to speech. Returns a readable stream of audio data.
|
|
85
|
+
|
|
86
|
+
#### `listen()`
|
|
87
|
+
|
|
88
|
+
Not supported - ElevenLabs does not provide speech recognition.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
2
|
+
|
|
3
|
+
declare type ElevenLabsModel = 'eleven_multilingual_v2' | 'eleven_flash_v2_5' | 'eleven_flash_v2' | 'eleven_multilingual_sts_v2' | 'eleven_english_sts_v2';
|
|
4
|
+
|
|
5
|
+
export declare class ElevenLabsVoice extends MastraVoice {
|
|
6
|
+
private client;
|
|
7
|
+
/**
|
|
8
|
+
* Creates an instance of the ElevenLabsVoice class.
|
|
9
|
+
*
|
|
10
|
+
* @param {Object} options - The options for the voice configuration.
|
|
11
|
+
* @param {ElevenLabsVoiceConfig} [options.speechModel] - The configuration for the speech model, including the model name and API key.
|
|
12
|
+
* @param {string} [options.speaker] - The ID of the speaker to use. If not provided, a default speaker will be used.
|
|
13
|
+
*
|
|
14
|
+
* @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.
|
|
15
|
+
*/
|
|
16
|
+
constructor({ speechModel, speaker }?: {
|
|
17
|
+
speechModel?: ElevenLabsVoiceConfig;
|
|
18
|
+
speaker?: string;
|
|
19
|
+
});
|
|
20
|
+
/**
|
|
21
|
+
* Retrieves a list of available speakers from the Eleven Labs API.
|
|
22
|
+
* Each speaker includes their ID, name, language, and gender.
|
|
23
|
+
*
|
|
24
|
+
* @returns {Promise<Array<{ voiceId: string, name: string, language: string, gender: string }>>}
|
|
25
|
+
* A promise that resolves to an array of speaker objects.
|
|
26
|
+
*/
|
|
27
|
+
getSpeakers(): Promise<{
|
|
28
|
+
voiceId: string;
|
|
29
|
+
name: string | undefined;
|
|
30
|
+
language: string;
|
|
31
|
+
gender: string;
|
|
32
|
+
}[]>;
|
|
33
|
+
private streamToString;
|
|
34
|
+
/**
|
|
35
|
+
* Converts text or audio input into speech using the Eleven Labs API.
|
|
36
|
+
*
|
|
37
|
+
* @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.
|
|
38
|
+
* @param {Object} [options] - Optional parameters for the speech generation.
|
|
39
|
+
* @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.
|
|
40
|
+
*
|
|
41
|
+
* @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.
|
|
42
|
+
*
|
|
43
|
+
* @throws {Error} If no speaker is specified or if no speech model is set.
|
|
44
|
+
*/
|
|
45
|
+
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
46
|
+
speaker?: string;
|
|
47
|
+
}): Promise<NodeJS.ReadableStream>;
|
|
48
|
+
listen(_input: NodeJS.ReadableStream | Buffer, _options?: Record<string, unknown>): Promise<string>;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
declare interface ElevenLabsVoiceConfig {
|
|
52
|
+
name?: ElevenLabsModel;
|
|
53
|
+
apiKey?: string;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export { }
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { ElevenLabsVoice } from './_tsup-dts-rollup.js';
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
2
|
+
import { ElevenLabsClient } from 'elevenlabs';
|
|
3
|
+
|
|
4
|
+
// src/index.ts
|
|
5
|
+
var ElevenLabsVoice = class extends MastraVoice {
|
|
6
|
+
client;
|
|
7
|
+
/**
|
|
8
|
+
* Creates an instance of the ElevenLabsVoice class.
|
|
9
|
+
*
|
|
10
|
+
* @param {Object} options - The options for the voice configuration.
|
|
11
|
+
* @param {ElevenLabsVoiceConfig} [options.speechModel] - The configuration for the speech model, including the model name and API key.
|
|
12
|
+
* @param {string} [options.speaker] - The ID of the speaker to use. If not provided, a default speaker will be used.
|
|
13
|
+
*
|
|
14
|
+
* @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.
|
|
15
|
+
*/
|
|
16
|
+
constructor({ speechModel, speaker } = {}) {
|
|
17
|
+
const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
18
|
+
super({
|
|
19
|
+
speechModel: {
|
|
20
|
+
name: speechModel?.name ?? "eleven_multilingual_v2",
|
|
21
|
+
apiKey: speechModel?.apiKey
|
|
22
|
+
},
|
|
23
|
+
speaker
|
|
24
|
+
});
|
|
25
|
+
if (!apiKey) {
|
|
26
|
+
throw new Error("ELEVENLABS_API_KEY is not set");
|
|
27
|
+
}
|
|
28
|
+
this.client = new ElevenLabsClient({
|
|
29
|
+
apiKey
|
|
30
|
+
});
|
|
31
|
+
this.speaker = speaker || "9BWtsMINqrJLrRacOk9x";
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Retrieves a list of available speakers from the Eleven Labs API.
|
|
35
|
+
* Each speaker includes their ID, name, language, and gender.
|
|
36
|
+
*
|
|
37
|
+
* @returns {Promise<Array<{ voiceId: string, name: string, language: string, gender: string }>>}
|
|
38
|
+
* A promise that resolves to an array of speaker objects.
|
|
39
|
+
*/
|
|
40
|
+
async getSpeakers() {
|
|
41
|
+
const res = await this.traced(async () => {
|
|
42
|
+
const voices = await this.client.voices.getAll();
|
|
43
|
+
return voices?.voices?.map((voice) => ({
|
|
44
|
+
voiceId: voice.voice_id,
|
|
45
|
+
name: voice.name,
|
|
46
|
+
language: voice.labels?.language || "en",
|
|
47
|
+
gender: voice.labels?.gender || "neutral"
|
|
48
|
+
})) ?? [];
|
|
49
|
+
}, "voice.elevenlabs.voices")();
|
|
50
|
+
return res;
|
|
51
|
+
}
|
|
52
|
+
async streamToString(stream) {
|
|
53
|
+
const chunks = [];
|
|
54
|
+
for await (const chunk of stream) {
|
|
55
|
+
chunks.push(Buffer.from(chunk));
|
|
56
|
+
}
|
|
57
|
+
return Buffer.concat(chunks).toString("utf-8");
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Converts text or audio input into speech using the Eleven Labs API.
|
|
61
|
+
*
|
|
62
|
+
* @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.
|
|
63
|
+
* @param {Object} [options] - Optional parameters for the speech generation.
|
|
64
|
+
* @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.
|
|
65
|
+
*
|
|
66
|
+
* @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.
|
|
67
|
+
*
|
|
68
|
+
* @throws {Error} If no speaker is specified or if no speech model is set.
|
|
69
|
+
*/
|
|
70
|
+
async speak(input, options) {
|
|
71
|
+
const speaker = options?.speaker || this.speaker;
|
|
72
|
+
if (!speaker) {
|
|
73
|
+
throw new Error("No speaker specified");
|
|
74
|
+
}
|
|
75
|
+
if (!this.speechModel?.name) {
|
|
76
|
+
throw new Error("No speech model specified");
|
|
77
|
+
}
|
|
78
|
+
const text = typeof input === "string" ? input : await this.streamToString(input);
|
|
79
|
+
const res = await this.traced(async () => {
|
|
80
|
+
return await this.client.generate({
|
|
81
|
+
text,
|
|
82
|
+
voice: speaker,
|
|
83
|
+
model_id: this.speechModel?.name,
|
|
84
|
+
stream: true
|
|
85
|
+
});
|
|
86
|
+
}, "voice.elevenlabs.speak")();
|
|
87
|
+
return res;
|
|
88
|
+
}
|
|
89
|
+
async listen(_input, _options) {
|
|
90
|
+
throw new Error("ElevenLabs does not support transcription");
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
export { ElevenLabsVoice };
|
package/eslint.config.js
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@mastra/voice-elevenlabs",
|
|
3
|
+
"version": "0.1.0-alpha.2",
|
|
4
|
+
"description": "Mastra ElevenLabs voice integration",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"import": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"default": "./dist/index.js"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"./package.json": "./package.json"
|
|
16
|
+
},
|
|
17
|
+
"dependencies": {
|
|
18
|
+
"elevenlabs": "^1.50.2",
|
|
19
|
+
"zod": "^3.24.1",
|
|
20
|
+
"@mastra/core": "^0.4.2-alpha.0"
|
|
21
|
+
},
|
|
22
|
+
"devDependencies": {
|
|
23
|
+
"@microsoft/api-extractor": "^7.49.2",
|
|
24
|
+
"@types/node": "^22.13.1",
|
|
25
|
+
"tsup": "^8.0.1",
|
|
26
|
+
"typescript": "^5.7.3",
|
|
27
|
+
"vitest": "^2.1.8",
|
|
28
|
+
"eslint": "^9.20.1",
|
|
29
|
+
"@internal/lint": "0.0.0"
|
|
30
|
+
},
|
|
31
|
+
"scripts": {
|
|
32
|
+
"build": "tsup src/index.ts --format esm --experimental-dts --clean --treeshake",
|
|
33
|
+
"build:watch": "tsup build --watch",
|
|
34
|
+
"test": "vitest run",
|
|
35
|
+
"lint": "eslint ."
|
|
36
|
+
}
|
|
37
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { createWriteStream, writeFileSync, mkdirSync } from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { Readable } from 'stream';
|
|
4
|
+
import { describe, expect, it, beforeAll } from 'vitest';
|
|
5
|
+
|
|
6
|
+
import { ElevenLabsVoice } from './index.js';
|
|
7
|
+
|
|
8
|
+
describe('ElevenLabsVoice Integration Tests', () => {
|
|
9
|
+
let voice: ElevenLabsVoice;
|
|
10
|
+
const outputDir = path.join(process.cwd(), 'test-outputs');
|
|
11
|
+
|
|
12
|
+
beforeAll(() => {
|
|
13
|
+
// Create output directory if it doesn't exist
|
|
14
|
+
try {
|
|
15
|
+
mkdirSync(outputDir, { recursive: true });
|
|
16
|
+
} catch (err) {
|
|
17
|
+
console.log('Directory already exists: ', err);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
voice = new ElevenLabsVoice({
|
|
21
|
+
speechModel: {
|
|
22
|
+
name: 'eleven_multilingual_v2',
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
describe('getSpeakers', () => {
|
|
28
|
+
it('should list available speakers', async () => {
|
|
29
|
+
const speakers = await voice.getSpeakers();
|
|
30
|
+
console.log(speakers);
|
|
31
|
+
expect(speakers.length).toBeGreaterThan(0);
|
|
32
|
+
expect(speakers[0]).toHaveProperty('voiceId');
|
|
33
|
+
expect(speakers[0]).toHaveProperty('name');
|
|
34
|
+
expect(speakers[0]).toHaveProperty('language');
|
|
35
|
+
expect(speakers[0]).toHaveProperty('gender');
|
|
36
|
+
});
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
describe('speak', () => {
|
|
40
|
+
it('should speak with default values', async () => {
|
|
41
|
+
const defaultVoice = new ElevenLabsVoice();
|
|
42
|
+
const audioStream = await defaultVoice.speak('Hello World, how are you?');
|
|
43
|
+
|
|
44
|
+
const outputPath = path.join(outputDir, 'elevenlabs-speech-test-default.mp3');
|
|
45
|
+
const fileStream = createWriteStream(outputPath);
|
|
46
|
+
const chunks: Buffer[] = [];
|
|
47
|
+
|
|
48
|
+
audioStream.on('data', (chunk: Buffer) => chunks.push(chunk));
|
|
49
|
+
audioStream.pipe(fileStream);
|
|
50
|
+
writeFileSync(outputPath, Buffer.concat(chunks));
|
|
51
|
+
}, 10000);
|
|
52
|
+
|
|
53
|
+
it('should generate audio from text and save to file', async () => {
|
|
54
|
+
const speakers = await voice.getSpeakers();
|
|
55
|
+
const speaker = speakers[0].voiceId;
|
|
56
|
+
|
|
57
|
+
const audioStream = await voice.speak('Hello World', { speaker });
|
|
58
|
+
|
|
59
|
+
return new Promise((resolve, reject) => {
|
|
60
|
+
const outputPath = path.join(outputDir, 'elevenlabs-speech-test.mp3');
|
|
61
|
+
const fileStream = createWriteStream(outputPath);
|
|
62
|
+
const chunks: Buffer[] = [];
|
|
63
|
+
|
|
64
|
+
audioStream.on('data', (chunk: Buffer) => chunks.push(chunk));
|
|
65
|
+
audioStream.pipe(fileStream);
|
|
66
|
+
|
|
67
|
+
fileStream.on('finish', () => {
|
|
68
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
69
|
+
resolve(undefined);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
audioStream.on('error', reject);
|
|
73
|
+
fileStream.on('error', reject);
|
|
74
|
+
});
|
|
75
|
+
}, 10000);
|
|
76
|
+
|
|
77
|
+
it('should work with different speaker', async () => {
|
|
78
|
+
const speakers = await voice.getSpeakers();
|
|
79
|
+
const speaker = speakers[1]?.voiceId;
|
|
80
|
+
|
|
81
|
+
const audioStream = await voice.speak('Test with different speaker', { speaker });
|
|
82
|
+
|
|
83
|
+
return new Promise((resolve, reject) => {
|
|
84
|
+
const outputPath = path.join(outputDir, 'elevenlabs-speech-test-params.mp3');
|
|
85
|
+
const fileStream = createWriteStream(outputPath);
|
|
86
|
+
const chunks: Buffer[] = [];
|
|
87
|
+
|
|
88
|
+
audioStream.on('data', (chunk: Buffer) => chunks.push(chunk));
|
|
89
|
+
audioStream.pipe(fileStream);
|
|
90
|
+
|
|
91
|
+
fileStream.on('finish', () => {
|
|
92
|
+
expect(chunks.length).toBeGreaterThan(0);
|
|
93
|
+
resolve(undefined);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
audioStream.on('error', reject);
|
|
97
|
+
fileStream.on('error', reject);
|
|
98
|
+
});
|
|
99
|
+
}, 10000);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
describe('listen', () => {
|
|
103
|
+
it('should throw error as transcription is not supported', async () => {
|
|
104
|
+
const dummyStream = new Readable({
|
|
105
|
+
read() {
|
|
106
|
+
this.push(null);
|
|
107
|
+
},
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
await expect(voice.listen(dummyStream)).rejects.toThrow('ElevenLabs does not support transcription');
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
2
|
+
import { ElevenLabsClient } from 'elevenlabs';
|
|
3
|
+
|
|
4
|
+
type ElevenLabsModel =
|
|
5
|
+
| 'eleven_multilingual_v2'
|
|
6
|
+
| 'eleven_flash_v2_5'
|
|
7
|
+
| 'eleven_flash_v2'
|
|
8
|
+
| 'eleven_multilingual_sts_v2'
|
|
9
|
+
| 'eleven_english_sts_v2';
|
|
10
|
+
|
|
11
|
+
interface ElevenLabsVoiceConfig {
|
|
12
|
+
name?: ElevenLabsModel;
|
|
13
|
+
apiKey?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export class ElevenLabsVoice extends MastraVoice {
|
|
17
|
+
private client: ElevenLabsClient;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Creates an instance of the ElevenLabsVoice class.
|
|
21
|
+
*
|
|
22
|
+
* @param {Object} options - The options for the voice configuration.
|
|
23
|
+
* @param {ElevenLabsVoiceConfig} [options.speechModel] - The configuration for the speech model, including the model name and API key.
|
|
24
|
+
* @param {string} [options.speaker] - The ID of the speaker to use. If not provided, a default speaker will be used.
|
|
25
|
+
*
|
|
26
|
+
* @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.
|
|
27
|
+
*/
|
|
28
|
+
constructor({ speechModel, speaker }: { speechModel?: ElevenLabsVoiceConfig; speaker?: string } = {}) {
|
|
29
|
+
const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
30
|
+
super({
|
|
31
|
+
speechModel: {
|
|
32
|
+
name: speechModel?.name ?? 'eleven_multilingual_v2',
|
|
33
|
+
apiKey: speechModel?.apiKey,
|
|
34
|
+
},
|
|
35
|
+
speaker,
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
if (!apiKey) {
|
|
39
|
+
throw new Error('ELEVENLABS_API_KEY is not set');
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
this.client = new ElevenLabsClient({
|
|
43
|
+
apiKey,
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
this.speaker = speaker || '9BWtsMINqrJLrRacOk9x'; // Aria is the default speaker
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Retrieves a list of available speakers from the Eleven Labs API.
|
|
51
|
+
* Each speaker includes their ID, name, language, and gender.
|
|
52
|
+
*
|
|
53
|
+
* @returns {Promise<Array<{ voiceId: string, name: string, language: string, gender: string }>>}
|
|
54
|
+
* A promise that resolves to an array of speaker objects.
|
|
55
|
+
*/
|
|
56
|
+
async getSpeakers() {
|
|
57
|
+
const res = await this.traced(async () => {
|
|
58
|
+
const voices = await this.client.voices.getAll();
|
|
59
|
+
return (
|
|
60
|
+
voices?.voices?.map(voice => ({
|
|
61
|
+
voiceId: voice.voice_id,
|
|
62
|
+
name: voice.name,
|
|
63
|
+
language: voice.labels?.language || 'en',
|
|
64
|
+
gender: voice.labels?.gender || 'neutral',
|
|
65
|
+
})) ?? []
|
|
66
|
+
);
|
|
67
|
+
}, 'voice.elevenlabs.voices')();
|
|
68
|
+
|
|
69
|
+
return res;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {
|
|
73
|
+
const chunks: Buffer[] = [];
|
|
74
|
+
for await (const chunk of stream) {
|
|
75
|
+
chunks.push(Buffer.from(chunk));
|
|
76
|
+
}
|
|
77
|
+
return Buffer.concat(chunks).toString('utf-8');
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Converts text or audio input into speech using the Eleven Labs API.
|
|
82
|
+
*
|
|
83
|
+
* @param {string | NodeJS.ReadableStream} input - The text to be converted to speech or a stream containing audio data.
|
|
84
|
+
* @param {Object} [options] - Optional parameters for the speech generation.
|
|
85
|
+
* @param {string} [options.speaker] - The ID of the speaker to use for the speech. If not provided, the default speaker will be used.
|
|
86
|
+
*
|
|
87
|
+
* @returns {Promise<NodeJS.ReadableStream>} A promise that resolves to a readable stream of the generated speech.
|
|
88
|
+
*
|
|
89
|
+
* @throws {Error} If no speaker is specified or if no speech model is set.
|
|
90
|
+
*/
|
|
91
|
+
async speak(input: string | NodeJS.ReadableStream, options?: { speaker?: string }): Promise<NodeJS.ReadableStream> {
|
|
92
|
+
const speaker = options?.speaker || this.speaker;
|
|
93
|
+
if (!speaker) {
|
|
94
|
+
throw new Error('No speaker specified');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (!this.speechModel?.name) {
|
|
98
|
+
throw new Error('No speech model specified');
|
|
99
|
+
}
|
|
100
|
+
const text = typeof input === 'string' ? input : await this.streamToString(input);
|
|
101
|
+
const res = await this.traced(async () => {
|
|
102
|
+
return await this.client.generate({
|
|
103
|
+
text,
|
|
104
|
+
voice: speaker,
|
|
105
|
+
model_id: this.speechModel?.name as ElevenLabsModel,
|
|
106
|
+
stream: true,
|
|
107
|
+
});
|
|
108
|
+
}, 'voice.elevenlabs.speak')();
|
|
109
|
+
|
|
110
|
+
return res;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
async listen(_input: NodeJS.ReadableStream | Buffer, _options?: Record<string, unknown>): Promise<string> {
|
|
114
|
+
throw new Error('ElevenLabs does not support transcription');
|
|
115
|
+
}
|
|
116
|
+
}
|
package/tsconfig.json
ADDED