@mastra/voice-elevenlabs 0.1.0 → 0.1.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -5
- package/CHANGELOG.md +20 -0
- package/dist/_tsup-dts-rollup.d.ts +37 -3
- package/dist/index.js +48 -3
- package/package.json +2 -2
- package/src/index.test.ts +36 -8
- package/src/index.ts +73 -4
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/voice-elevenlabs@0.1.
|
|
2
|
+
> @mastra/voice-elevenlabs@0.1.1-alpha.1 build /home/runner/work/mastra/mastra/voice/elevenlabs
|
|
3
3
|
> tsup src/index.ts --format esm --experimental-dts --clean --treeshake
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
6
6
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
7
|
[34mCLI[39m tsup v8.3.6
|
|
8
8
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 8591ms
|
|
10
10
|
[34mDTS[39m Build start
|
|
11
11
|
[34mCLI[39m Target: es2022
|
|
12
12
|
Analysis will use the bundled TypeScript version 5.7.3
|
|
13
13
|
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/elevenlabs/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
|
-
[32mDTS[39m ⚡️ Build success in
|
|
14
|
+
[32mDTS[39m ⚡️ Build success in 5735ms
|
|
15
15
|
[34mCLI[39m Cleaning output folder
|
|
16
16
|
[34mESM[39m Build start
|
|
17
|
-
[32mESM[39m [1mdist/index.js [22m[
|
|
18
|
-
[32mESM[39m ⚡️ Build success in
|
|
17
|
+
[32mESM[39m [1mdist/index.js [22m[32m5.00 KB[39m
|
|
18
|
+
[32mESM[39m ⚡️ Build success in 286ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @mastra/voice-elevenlabs
|
|
2
2
|
|
|
3
|
+
## 0.1.1-alpha.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 705d69b: Add STT for ElevenlabsVoice
|
|
8
|
+
- Updated dependencies [0d185b1]
|
|
9
|
+
- Updated dependencies [ed55f1d]
|
|
10
|
+
- Updated dependencies [8d13b14]
|
|
11
|
+
- Updated dependencies [3ee4831]
|
|
12
|
+
- Updated dependencies [108793c]
|
|
13
|
+
- Updated dependencies [5f28f44]
|
|
14
|
+
- @mastra/core@0.4.3-alpha.1
|
|
15
|
+
|
|
16
|
+
## 0.1.1-alpha.0
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- Updated dependencies [06aa827]
|
|
21
|
+
- @mastra/core@0.4.3-alpha.0
|
|
22
|
+
|
|
3
23
|
## 0.1.0
|
|
4
24
|
|
|
5
25
|
### Patch Changes
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { MastraVoice } from '@mastra/core/voice';
|
|
2
2
|
|
|
3
|
-
declare type
|
|
3
|
+
declare type ElevenLabsListenOptions = SpeechToTextOptions & RequestOptions;
|
|
4
|
+
|
|
5
|
+
declare type ElevenLabsModel = 'eleven_multilingual_v2' | 'eleven_flash_v2_5' | 'eleven_flash_v2' | 'eleven_multilingual_sts_v2' | 'eleven_english_sts_v2' | 'scribe_v1';
|
|
4
6
|
|
|
5
7
|
export declare class ElevenLabsVoice extends MastraVoice {
|
|
6
8
|
private client;
|
|
@@ -13,8 +15,9 @@ export declare class ElevenLabsVoice extends MastraVoice {
|
|
|
13
15
|
*
|
|
14
16
|
* @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.
|
|
15
17
|
*/
|
|
16
|
-
constructor({ speechModel, speaker }?: {
|
|
18
|
+
constructor({ speechModel, listeningModel, speaker, }?: {
|
|
17
19
|
speechModel?: ElevenLabsVoiceConfig;
|
|
20
|
+
listeningModel?: ElevenLabsVoiceConfig;
|
|
18
21
|
speaker?: string;
|
|
19
22
|
});
|
|
20
23
|
/**
|
|
@@ -45,7 +48,23 @@ export declare class ElevenLabsVoice extends MastraVoice {
|
|
|
45
48
|
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
46
49
|
speaker?: string;
|
|
47
50
|
}): Promise<NodeJS.ReadableStream>;
|
|
48
|
-
|
|
51
|
+
/**
|
|
52
|
+
* Converts audio input to text using ElevenLabs Speech-to-Text API.
|
|
53
|
+
*
|
|
54
|
+
* @param input - A readable stream containing the audio data to transcribe
|
|
55
|
+
* @param options - Configuration options for the transcription
|
|
56
|
+
* @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')
|
|
57
|
+
* @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.
|
|
58
|
+
* @param options.num_speakers - Number of speakers to detect in the audio
|
|
59
|
+
* @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')
|
|
60
|
+
* @param options.timeoutInSeconds - Request timeout in seconds
|
|
61
|
+
* @param options.maxRetries - Maximum number of retry attempts
|
|
62
|
+
* @param options.abortSignal - Signal to abort the request
|
|
63
|
+
*
|
|
64
|
+
* @returns A Promise that resolves to the transcribed text
|
|
65
|
+
*
|
|
66
|
+
*/
|
|
67
|
+
listen(input: NodeJS.ReadableStream, options?: ElevenLabsListenOptions): Promise<string>;
|
|
49
68
|
}
|
|
50
69
|
|
|
51
70
|
declare interface ElevenLabsVoiceConfig {
|
|
@@ -53,4 +72,19 @@ declare interface ElevenLabsVoiceConfig {
|
|
|
53
72
|
apiKey?: string;
|
|
54
73
|
}
|
|
55
74
|
|
|
75
|
+
declare interface RequestOptions {
|
|
76
|
+
timeoutInSeconds?: number;
|
|
77
|
+
maxRetries?: number;
|
|
78
|
+
abortSignal?: AbortSignal;
|
|
79
|
+
apiKey?: string | undefined;
|
|
80
|
+
headers?: Record<string, string>;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
declare interface SpeechToTextOptions {
|
|
84
|
+
language_code?: string;
|
|
85
|
+
tag_audio_events?: boolean;
|
|
86
|
+
num_speakers?: number;
|
|
87
|
+
filetype?: string;
|
|
88
|
+
}
|
|
89
|
+
|
|
56
90
|
export { }
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { File } from 'node:buffer';
|
|
1
2
|
import { MastraVoice } from '@mastra/core/voice';
|
|
2
3
|
import { ElevenLabsClient } from 'elevenlabs';
|
|
3
4
|
|
|
@@ -13,13 +14,21 @@ var ElevenLabsVoice = class extends MastraVoice {
|
|
|
13
14
|
*
|
|
14
15
|
* @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.
|
|
15
16
|
*/
|
|
16
|
-
constructor({
|
|
17
|
+
constructor({
|
|
18
|
+
speechModel,
|
|
19
|
+
listeningModel,
|
|
20
|
+
speaker
|
|
21
|
+
} = {}) {
|
|
17
22
|
const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
18
23
|
super({
|
|
19
24
|
speechModel: {
|
|
20
25
|
name: speechModel?.name ?? "eleven_multilingual_v2",
|
|
21
26
|
apiKey: speechModel?.apiKey
|
|
22
27
|
},
|
|
28
|
+
listeningModel: {
|
|
29
|
+
name: listeningModel?.name ?? "scribe_v1",
|
|
30
|
+
apiKey: listeningModel?.apiKey
|
|
31
|
+
},
|
|
23
32
|
speaker
|
|
24
33
|
});
|
|
25
34
|
if (!apiKey) {
|
|
@@ -86,8 +95,44 @@ var ElevenLabsVoice = class extends MastraVoice {
|
|
|
86
95
|
}, "voice.elevenlabs.speak")();
|
|
87
96
|
return res;
|
|
88
97
|
}
|
|
89
|
-
|
|
90
|
-
|
|
98
|
+
/**
|
|
99
|
+
* Converts audio input to text using ElevenLabs Speech-to-Text API.
|
|
100
|
+
*
|
|
101
|
+
* @param input - A readable stream containing the audio data to transcribe
|
|
102
|
+
* @param options - Configuration options for the transcription
|
|
103
|
+
* @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')
|
|
104
|
+
* @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.
|
|
105
|
+
* @param options.num_speakers - Number of speakers to detect in the audio
|
|
106
|
+
* @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')
|
|
107
|
+
* @param options.timeoutInSeconds - Request timeout in seconds
|
|
108
|
+
* @param options.maxRetries - Maximum number of retry attempts
|
|
109
|
+
* @param options.abortSignal - Signal to abort the request
|
|
110
|
+
*
|
|
111
|
+
* @returns A Promise that resolves to the transcribed text
|
|
112
|
+
*
|
|
113
|
+
*/
|
|
114
|
+
async listen(input, options) {
|
|
115
|
+
const res = await this.traced(async () => {
|
|
116
|
+
const chunks = [];
|
|
117
|
+
for await (const chunk of input) {
|
|
118
|
+
chunks.push(Buffer.from(chunk));
|
|
119
|
+
}
|
|
120
|
+
const buffer = Buffer.concat(chunks);
|
|
121
|
+
const { language_code, tag_audio_events, num_speakers, filetype, ...requestOptions } = options || {};
|
|
122
|
+
const file = new File([buffer], `audio.${filetype || "mp3"}`);
|
|
123
|
+
const transcription = await this.client.speechToText.convert(
|
|
124
|
+
{
|
|
125
|
+
file,
|
|
126
|
+
model_id: this.listeningModel?.name,
|
|
127
|
+
language_code,
|
|
128
|
+
tag_audio_events,
|
|
129
|
+
num_speakers
|
|
130
|
+
},
|
|
131
|
+
requestOptions
|
|
132
|
+
);
|
|
133
|
+
return transcription.text;
|
|
134
|
+
}, "voice.elevenlabs.listen")();
|
|
135
|
+
return res;
|
|
91
136
|
}
|
|
92
137
|
};
|
|
93
138
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-elevenlabs",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1-alpha.1",
|
|
4
4
|
"description": "Mastra ElevenLabs voice integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"dependencies": {
|
|
18
18
|
"elevenlabs": "^1.50.2",
|
|
19
19
|
"zod": "^3.24.1",
|
|
20
|
-
"@mastra/core": "^0.4.
|
|
20
|
+
"@mastra/core": "^0.4.3-alpha.1"
|
|
21
21
|
},
|
|
22
22
|
"devDependencies": {
|
|
23
23
|
"@microsoft/api-extractor": "^7.49.2",
|
package/src/index.test.ts
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import { createWriteStream, writeFileSync, mkdirSync } from 'fs';
|
|
1
|
+
import { createWriteStream, writeFileSync, mkdirSync, createReadStream } from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
|
-
import { Readable } from 'stream';
|
|
4
3
|
import { describe, expect, it, beforeAll } from 'vitest';
|
|
5
4
|
|
|
6
5
|
import { ElevenLabsVoice } from './index.js';
|
|
@@ -10,7 +9,6 @@ describe('ElevenLabsVoice Integration Tests', () => {
|
|
|
10
9
|
const outputDir = path.join(process.cwd(), 'test-outputs');
|
|
11
10
|
|
|
12
11
|
beforeAll(() => {
|
|
13
|
-
// Create output directory if it doesn't exist
|
|
14
12
|
try {
|
|
15
13
|
mkdirSync(outputDir, { recursive: true });
|
|
16
14
|
} catch (err) {
|
|
@@ -100,14 +98,44 @@ describe('ElevenLabsVoice Integration Tests', () => {
|
|
|
100
98
|
});
|
|
101
99
|
|
|
102
100
|
describe('listen', () => {
|
|
103
|
-
it('should
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
|
|
101
|
+
it('should convert audio to text', async () => {
|
|
102
|
+
const outputPath = path.join(outputDir, 'elevenlabs-speech-test-params.mp3');
|
|
103
|
+
const audio = createReadStream(outputPath);
|
|
104
|
+
const result = await voice.listen(audio);
|
|
105
|
+
|
|
106
|
+
if (typeof result !== 'string') {
|
|
107
|
+
return expect(result).toBeInstanceOf(String);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
expect(typeof result).toBe('string');
|
|
111
|
+
expect(result.length).toBeGreaterThan(0);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('should handle API errors gracefully', async () => {
|
|
115
|
+
// Create a voice instance with an invalid API key to force an error
|
|
116
|
+
const invalidVoice = new ElevenLabsVoice({
|
|
117
|
+
listeningModel: {
|
|
118
|
+
name: 'eleven_multilingual_v2',
|
|
119
|
+
apiKey: 'invalid-api-key',
|
|
107
120
|
},
|
|
108
121
|
});
|
|
109
122
|
|
|
110
|
-
|
|
123
|
+
const outputPath = path.join(outputDir, 'elevenlabs-speech-test-params.mp3');
|
|
124
|
+
const audio = createReadStream(outputPath);
|
|
125
|
+
|
|
126
|
+
// The API call should fail with an authentication error
|
|
127
|
+
await expect(invalidVoice.listen(audio)).rejects.toThrow();
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
it('should handle invalid audio input', async () => {
|
|
131
|
+
// Create a path to a non-existent file
|
|
132
|
+
const nonExistentPath = path.join(outputDir, 'non-existent-file.mp3');
|
|
133
|
+
|
|
134
|
+
// Attempting to create a read stream from a non-existent file should throw
|
|
135
|
+
await expect(async () => {
|
|
136
|
+
const audio = createReadStream(nonExistentPath);
|
|
137
|
+
await voice.listen(audio);
|
|
138
|
+
}).rejects.toThrow();
|
|
111
139
|
});
|
|
112
140
|
});
|
|
113
141
|
});
|
package/src/index.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { File } from 'node:buffer';
|
|
1
2
|
import { MastraVoice } from '@mastra/core/voice';
|
|
2
3
|
import { ElevenLabsClient } from 'elevenlabs';
|
|
3
4
|
|
|
@@ -6,13 +7,32 @@ type ElevenLabsModel =
|
|
|
6
7
|
| 'eleven_flash_v2_5'
|
|
7
8
|
| 'eleven_flash_v2'
|
|
8
9
|
| 'eleven_multilingual_sts_v2'
|
|
9
|
-
| 'eleven_english_sts_v2'
|
|
10
|
+
| 'eleven_english_sts_v2'
|
|
11
|
+
| 'scribe_v1';
|
|
10
12
|
|
|
11
13
|
interface ElevenLabsVoiceConfig {
|
|
12
14
|
name?: ElevenLabsModel;
|
|
13
15
|
apiKey?: string;
|
|
14
16
|
}
|
|
15
17
|
|
|
18
|
+
interface SpeechToTextOptions {
|
|
19
|
+
language_code?: string;
|
|
20
|
+
tag_audio_events?: boolean;
|
|
21
|
+
num_speakers?: number;
|
|
22
|
+
filetype?: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
interface RequestOptions {
|
|
26
|
+
timeoutInSeconds?: number;
|
|
27
|
+
maxRetries?: number;
|
|
28
|
+
abortSignal?: AbortSignal;
|
|
29
|
+
apiKey?: string | undefined;
|
|
30
|
+
headers?: Record<string, string>;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Combined options type
|
|
34
|
+
type ElevenLabsListenOptions = SpeechToTextOptions & RequestOptions;
|
|
35
|
+
|
|
16
36
|
export class ElevenLabsVoice extends MastraVoice {
|
|
17
37
|
private client: ElevenLabsClient;
|
|
18
38
|
|
|
@@ -25,13 +45,21 @@ export class ElevenLabsVoice extends MastraVoice {
|
|
|
25
45
|
*
|
|
26
46
|
* @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.
|
|
27
47
|
*/
|
|
28
|
-
constructor({
|
|
48
|
+
constructor({
|
|
49
|
+
speechModel,
|
|
50
|
+
listeningModel,
|
|
51
|
+
speaker,
|
|
52
|
+
}: { speechModel?: ElevenLabsVoiceConfig; listeningModel?: ElevenLabsVoiceConfig; speaker?: string } = {}) {
|
|
29
53
|
const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
30
54
|
super({
|
|
31
55
|
speechModel: {
|
|
32
56
|
name: speechModel?.name ?? 'eleven_multilingual_v2',
|
|
33
57
|
apiKey: speechModel?.apiKey,
|
|
34
58
|
},
|
|
59
|
+
listeningModel: {
|
|
60
|
+
name: listeningModel?.name ?? 'scribe_v1',
|
|
61
|
+
apiKey: listeningModel?.apiKey,
|
|
62
|
+
},
|
|
35
63
|
speaker,
|
|
36
64
|
});
|
|
37
65
|
|
|
@@ -110,7 +138,48 @@ export class ElevenLabsVoice extends MastraVoice {
|
|
|
110
138
|
return res;
|
|
111
139
|
}
|
|
112
140
|
|
|
113
|
-
|
|
114
|
-
|
|
141
|
+
/**
|
|
142
|
+
* Converts audio input to text using ElevenLabs Speech-to-Text API.
|
|
143
|
+
*
|
|
144
|
+
* @param input - A readable stream containing the audio data to transcribe
|
|
145
|
+
* @param options - Configuration options for the transcription
|
|
146
|
+
* @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')
|
|
147
|
+
* @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.
|
|
148
|
+
* @param options.num_speakers - Number of speakers to detect in the audio
|
|
149
|
+
* @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')
|
|
150
|
+
* @param options.timeoutInSeconds - Request timeout in seconds
|
|
151
|
+
* @param options.maxRetries - Maximum number of retry attempts
|
|
152
|
+
* @param options.abortSignal - Signal to abort the request
|
|
153
|
+
*
|
|
154
|
+
* @returns A Promise that resolves to the transcribed text
|
|
155
|
+
*
|
|
156
|
+
*/
|
|
157
|
+
async listen(input: NodeJS.ReadableStream, options?: ElevenLabsListenOptions): Promise<string> {
|
|
158
|
+
const res = await this.traced(async () => {
|
|
159
|
+
const chunks: Buffer[] = [];
|
|
160
|
+
for await (const chunk of input) {
|
|
161
|
+
chunks.push(Buffer.from(chunk));
|
|
162
|
+
}
|
|
163
|
+
const buffer = Buffer.concat(chunks);
|
|
164
|
+
|
|
165
|
+
const { language_code, tag_audio_events, num_speakers, filetype, ...requestOptions } = options || {};
|
|
166
|
+
|
|
167
|
+
const file = new File([buffer], `audio.${filetype || 'mp3'}`);
|
|
168
|
+
|
|
169
|
+
const transcription = await this.client.speechToText.convert(
|
|
170
|
+
{
|
|
171
|
+
file: file,
|
|
172
|
+
model_id: this.listeningModel?.name as ElevenLabsModel,
|
|
173
|
+
language_code,
|
|
174
|
+
tag_audio_events,
|
|
175
|
+
num_speakers,
|
|
176
|
+
},
|
|
177
|
+
requestOptions,
|
|
178
|
+
);
|
|
179
|
+
|
|
180
|
+
return transcription.text;
|
|
181
|
+
}, 'voice.elevenlabs.listen')();
|
|
182
|
+
|
|
183
|
+
return res;
|
|
115
184
|
}
|
|
116
185
|
}
|