@mastra/voice-elevenlabs 0.1.0 → 0.1.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,18 @@
1
1
 
2
- > @mastra/voice-elevenlabs@0.1.0-alpha.4 build /home/runner/work/mastra/mastra/voice/elevenlabs
2
+ > @mastra/voice-elevenlabs@0.1.1-alpha.1 build /home/runner/work/mastra/mastra/voice/elevenlabs
3
3
  > tsup src/index.ts --format esm --experimental-dts --clean --treeshake
4
4
 
5
5
  CLI Building entry: src/index.ts
6
6
  CLI Using tsconfig: tsconfig.json
7
7
  CLI tsup v8.3.6
8
8
  TSC Build start
9
- TSC ⚡️ Build success in 7370ms
9
+ TSC ⚡️ Build success in 8591ms
10
10
  DTS Build start
11
11
  CLI Target: es2022
12
12
  Analysis will use the bundled TypeScript version 5.7.3
13
13
  Writing package typings: /home/runner/work/mastra/mastra/voice/elevenlabs/dist/_tsup-dts-rollup.d.ts
14
- DTS ⚡️ Build success in 5039ms
14
+ DTS ⚡️ Build success in 5735ms
15
15
  CLI Cleaning output folder
16
16
  ESM Build start
17
- ESM dist/index.js 3.36 KB
18
- ESM ⚡️ Build success in 287ms
17
+ ESM dist/index.js 5.00 KB
18
+ ESM ⚡️ Build success in 286ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # @mastra/voice-elevenlabs
2
2
 
3
+ ## 0.1.1-alpha.1
4
+
5
+ ### Patch Changes
6
+
7
+ - 705d69b: Add STT for ElevenlabsVoice
8
+ - Updated dependencies [0d185b1]
9
+ - Updated dependencies [ed55f1d]
10
+ - Updated dependencies [8d13b14]
11
+ - Updated dependencies [3ee4831]
12
+ - Updated dependencies [108793c]
13
+ - Updated dependencies [5f28f44]
14
+ - @mastra/core@0.4.3-alpha.1
15
+
16
+ ## 0.1.1-alpha.0
17
+
18
+ ### Patch Changes
19
+
20
+ - Updated dependencies [06aa827]
21
+ - @mastra/core@0.4.3-alpha.0
22
+
3
23
  ## 0.1.0
4
24
 
5
25
  ### Patch Changes
@@ -1,6 +1,8 @@
1
1
  import { MastraVoice } from '@mastra/core/voice';
2
2
 
3
- declare type ElevenLabsModel = 'eleven_multilingual_v2' | 'eleven_flash_v2_5' | 'eleven_flash_v2' | 'eleven_multilingual_sts_v2' | 'eleven_english_sts_v2';
3
+ declare type ElevenLabsListenOptions = SpeechToTextOptions & RequestOptions;
4
+
5
+ declare type ElevenLabsModel = 'eleven_multilingual_v2' | 'eleven_flash_v2_5' | 'eleven_flash_v2' | 'eleven_multilingual_sts_v2' | 'eleven_english_sts_v2' | 'scribe_v1';
4
6
 
5
7
  export declare class ElevenLabsVoice extends MastraVoice {
6
8
  private client;
@@ -13,8 +15,9 @@ export declare class ElevenLabsVoice extends MastraVoice {
13
15
  *
14
16
  * @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.
15
17
  */
16
- constructor({ speechModel, speaker }?: {
18
+ constructor({ speechModel, listeningModel, speaker, }?: {
17
19
  speechModel?: ElevenLabsVoiceConfig;
20
+ listeningModel?: ElevenLabsVoiceConfig;
18
21
  speaker?: string;
19
22
  });
20
23
  /**
@@ -45,7 +48,23 @@ export declare class ElevenLabsVoice extends MastraVoice {
45
48
  speak(input: string | NodeJS.ReadableStream, options?: {
46
49
  speaker?: string;
47
50
  }): Promise<NodeJS.ReadableStream>;
48
- listen(_input: NodeJS.ReadableStream | Buffer, _options?: Record<string, unknown>): Promise<string>;
51
+ /**
52
+ * Converts audio input to text using ElevenLabs Speech-to-Text API.
53
+ *
54
+ * @param input - A readable stream containing the audio data to transcribe
55
+ * @param options - Configuration options for the transcription
56
+ * @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')
57
+ * @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.
58
+ * @param options.num_speakers - Number of speakers to detect in the audio
59
+ * @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')
60
+ * @param options.timeoutInSeconds - Request timeout in seconds
61
+ * @param options.maxRetries - Maximum number of retry attempts
62
+ * @param options.abortSignal - Signal to abort the request
63
+ *
64
+ * @returns A Promise that resolves to the transcribed text
65
+ *
66
+ */
67
+ listen(input: NodeJS.ReadableStream, options?: ElevenLabsListenOptions): Promise<string>;
49
68
  }
50
69
 
51
70
  declare interface ElevenLabsVoiceConfig {
@@ -53,4 +72,19 @@ declare interface ElevenLabsVoiceConfig {
53
72
  apiKey?: string;
54
73
  }
55
74
 
75
+ declare interface RequestOptions {
76
+ timeoutInSeconds?: number;
77
+ maxRetries?: number;
78
+ abortSignal?: AbortSignal;
79
+ apiKey?: string | undefined;
80
+ headers?: Record<string, string>;
81
+ }
82
+
83
+ declare interface SpeechToTextOptions {
84
+ language_code?: string;
85
+ tag_audio_events?: boolean;
86
+ num_speakers?: number;
87
+ filetype?: string;
88
+ }
89
+
56
90
  export { }
package/dist/index.js CHANGED
@@ -1,3 +1,4 @@
1
+ import { File } from 'node:buffer';
1
2
  import { MastraVoice } from '@mastra/core/voice';
2
3
  import { ElevenLabsClient } from 'elevenlabs';
3
4
 
@@ -13,13 +14,21 @@ var ElevenLabsVoice = class extends MastraVoice {
13
14
  *
14
15
  * @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.
15
16
  */
16
- constructor({ speechModel, speaker } = {}) {
17
+ constructor({
18
+ speechModel,
19
+ listeningModel,
20
+ speaker
21
+ } = {}) {
17
22
  const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;
18
23
  super({
19
24
  speechModel: {
20
25
  name: speechModel?.name ?? "eleven_multilingual_v2",
21
26
  apiKey: speechModel?.apiKey
22
27
  },
28
+ listeningModel: {
29
+ name: listeningModel?.name ?? "scribe_v1",
30
+ apiKey: listeningModel?.apiKey
31
+ },
23
32
  speaker
24
33
  });
25
34
  if (!apiKey) {
@@ -86,8 +95,44 @@ var ElevenLabsVoice = class extends MastraVoice {
86
95
  }, "voice.elevenlabs.speak")();
87
96
  return res;
88
97
  }
89
- async listen(_input, _options) {
90
- throw new Error("ElevenLabs does not support transcription");
98
+ /**
99
+ * Converts audio input to text using ElevenLabs Speech-to-Text API.
100
+ *
101
+ * @param input - A readable stream containing the audio data to transcribe
102
+ * @param options - Configuration options for the transcription
103
+ * @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')
104
+ * @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.
105
+ * @param options.num_speakers - Number of speakers to detect in the audio
106
+ * @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')
107
+ * @param options.timeoutInSeconds - Request timeout in seconds
108
+ * @param options.maxRetries - Maximum number of retry attempts
109
+ * @param options.abortSignal - Signal to abort the request
110
+ *
111
+ * @returns A Promise that resolves to the transcribed text
112
+ *
113
+ */
114
+ async listen(input, options) {
115
+ const res = await this.traced(async () => {
116
+ const chunks = [];
117
+ for await (const chunk of input) {
118
+ chunks.push(Buffer.from(chunk));
119
+ }
120
+ const buffer = Buffer.concat(chunks);
121
+ const { language_code, tag_audio_events, num_speakers, filetype, ...requestOptions } = options || {};
122
+ const file = new File([buffer], `audio.${filetype || "mp3"}`);
123
+ const transcription = await this.client.speechToText.convert(
124
+ {
125
+ file,
126
+ model_id: this.listeningModel?.name,
127
+ language_code,
128
+ tag_audio_events,
129
+ num_speakers
130
+ },
131
+ requestOptions
132
+ );
133
+ return transcription.text;
134
+ }, "voice.elevenlabs.listen")();
135
+ return res;
91
136
  }
92
137
  };
93
138
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/voice-elevenlabs",
3
- "version": "0.1.0",
3
+ "version": "0.1.1-alpha.1",
4
4
  "description": "Mastra ElevenLabs voice integration",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -17,7 +17,7 @@
17
17
  "dependencies": {
18
18
  "elevenlabs": "^1.50.2",
19
19
  "zod": "^3.24.1",
20
- "@mastra/core": "^0.4.2"
20
+ "@mastra/core": "^0.4.3-alpha.1"
21
21
  },
22
22
  "devDependencies": {
23
23
  "@microsoft/api-extractor": "^7.49.2",
package/src/index.test.ts CHANGED
@@ -1,6 +1,5 @@
1
- import { createWriteStream, writeFileSync, mkdirSync } from 'fs';
1
+ import { createWriteStream, writeFileSync, mkdirSync, createReadStream } from 'fs';
2
2
  import path from 'path';
3
- import { Readable } from 'stream';
4
3
  import { describe, expect, it, beforeAll } from 'vitest';
5
4
 
6
5
  import { ElevenLabsVoice } from './index.js';
@@ -10,7 +9,6 @@ describe('ElevenLabsVoice Integration Tests', () => {
10
9
  const outputDir = path.join(process.cwd(), 'test-outputs');
11
10
 
12
11
  beforeAll(() => {
13
- // Create output directory if it doesn't exist
14
12
  try {
15
13
  mkdirSync(outputDir, { recursive: true });
16
14
  } catch (err) {
@@ -100,14 +98,44 @@ describe('ElevenLabsVoice Integration Tests', () => {
100
98
  });
101
99
 
102
100
  describe('listen', () => {
103
- it('should throw error as transcription is not supported', async () => {
104
- const dummyStream = new Readable({
105
- read() {
106
- this.push(null);
101
+ it('should convert audio to text', async () => {
102
+ const outputPath = path.join(outputDir, 'elevenlabs-speech-test-params.mp3');
103
+ const audio = createReadStream(outputPath);
104
+ const result = await voice.listen(audio);
105
+
106
+ if (typeof result !== 'string') {
107
+ return expect(result).toBeInstanceOf(String);
108
+ }
109
+
110
+ expect(typeof result).toBe('string');
111
+ expect(result.length).toBeGreaterThan(0);
112
+ });
113
+
114
+ it('should handle API errors gracefully', async () => {
115
+ // Create a voice instance with an invalid API key to force an error
116
+ const invalidVoice = new ElevenLabsVoice({
117
+ listeningModel: {
118
+ name: 'eleven_multilingual_v2',
119
+ apiKey: 'invalid-api-key',
107
120
  },
108
121
  });
109
122
 
110
- await expect(voice.listen(dummyStream)).rejects.toThrow('ElevenLabs does not support transcription');
123
+ const outputPath = path.join(outputDir, 'elevenlabs-speech-test-params.mp3');
124
+ const audio = createReadStream(outputPath);
125
+
126
+ // The API call should fail with an authentication error
127
+ await expect(invalidVoice.listen(audio)).rejects.toThrow();
128
+ });
129
+
130
+ it('should handle invalid audio input', async () => {
131
+ // Create a path to a non-existent file
132
+ const nonExistentPath = path.join(outputDir, 'non-existent-file.mp3');
133
+
134
+ // Attempting to create a read stream from a non-existent file should throw
135
+ await expect(async () => {
136
+ const audio = createReadStream(nonExistentPath);
137
+ await voice.listen(audio);
138
+ }).rejects.toThrow();
111
139
  });
112
140
  });
113
141
  });
package/src/index.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import { File } from 'node:buffer';
1
2
  import { MastraVoice } from '@mastra/core/voice';
2
3
  import { ElevenLabsClient } from 'elevenlabs';
3
4
 
@@ -6,13 +7,32 @@ type ElevenLabsModel =
6
7
  | 'eleven_flash_v2_5'
7
8
  | 'eleven_flash_v2'
8
9
  | 'eleven_multilingual_sts_v2'
9
- | 'eleven_english_sts_v2';
10
+ | 'eleven_english_sts_v2'
11
+ | 'scribe_v1';
10
12
 
11
13
  interface ElevenLabsVoiceConfig {
12
14
  name?: ElevenLabsModel;
13
15
  apiKey?: string;
14
16
  }
15
17
 
18
+ interface SpeechToTextOptions {
19
+ language_code?: string;
20
+ tag_audio_events?: boolean;
21
+ num_speakers?: number;
22
+ filetype?: string;
23
+ }
24
+
25
+ interface RequestOptions {
26
+ timeoutInSeconds?: number;
27
+ maxRetries?: number;
28
+ abortSignal?: AbortSignal;
29
+ apiKey?: string | undefined;
30
+ headers?: Record<string, string>;
31
+ }
32
+
33
+ // Combined options type
34
+ type ElevenLabsListenOptions = SpeechToTextOptions & RequestOptions;
35
+
16
36
  export class ElevenLabsVoice extends MastraVoice {
17
37
  private client: ElevenLabsClient;
18
38
 
@@ -25,13 +45,21 @@ export class ElevenLabsVoice extends MastraVoice {
25
45
  *
26
46
  * @throws {Error} If the ELEVENLABS_API_KEY is not set in the environment variables.
27
47
  */
28
- constructor({ speechModel, speaker }: { speechModel?: ElevenLabsVoiceConfig; speaker?: string } = {}) {
48
+ constructor({
49
+ speechModel,
50
+ listeningModel,
51
+ speaker,
52
+ }: { speechModel?: ElevenLabsVoiceConfig; listeningModel?: ElevenLabsVoiceConfig; speaker?: string } = {}) {
29
53
  const apiKey = speechModel?.apiKey ?? process.env.ELEVENLABS_API_KEY;
30
54
  super({
31
55
  speechModel: {
32
56
  name: speechModel?.name ?? 'eleven_multilingual_v2',
33
57
  apiKey: speechModel?.apiKey,
34
58
  },
59
+ listeningModel: {
60
+ name: listeningModel?.name ?? 'scribe_v1',
61
+ apiKey: listeningModel?.apiKey,
62
+ },
35
63
  speaker,
36
64
  });
37
65
 
@@ -110,7 +138,48 @@ export class ElevenLabsVoice extends MastraVoice {
110
138
  return res;
111
139
  }
112
140
 
113
- async listen(_input: NodeJS.ReadableStream | Buffer, _options?: Record<string, unknown>): Promise<string> {
114
- throw new Error('ElevenLabs does not support transcription');
141
+ /**
142
+ * Converts audio input to text using ElevenLabs Speech-to-Text API.
143
+ *
144
+ * @param input - A readable stream containing the audio data to transcribe
145
+ * @param options - Configuration options for the transcription
146
+ * @param options.language_code - ISO language code (e.g., 'en', 'fr', 'es')
147
+ * @param options.tag_audio_events - Whether to tag audio events like [MUSIC], [LAUGHTER], etc.
148
+ * @param options.num_speakers - Number of speakers to detect in the audio
149
+ * @param options.filetype - Audio file format (e.g., 'mp3', 'wav', 'ogg')
150
+ * @param options.timeoutInSeconds - Request timeout in seconds
151
+ * @param options.maxRetries - Maximum number of retry attempts
152
+ * @param options.abortSignal - Signal to abort the request
153
+ *
154
+ * @returns A Promise that resolves to the transcribed text
155
+ *
156
+ */
157
+ async listen(input: NodeJS.ReadableStream, options?: ElevenLabsListenOptions): Promise<string> {
158
+ const res = await this.traced(async () => {
159
+ const chunks: Buffer[] = [];
160
+ for await (const chunk of input) {
161
+ chunks.push(Buffer.from(chunk));
162
+ }
163
+ const buffer = Buffer.concat(chunks);
164
+
165
+ const { language_code, tag_audio_events, num_speakers, filetype, ...requestOptions } = options || {};
166
+
167
+ const file = new File([buffer], `audio.${filetype || 'mp3'}`);
168
+
169
+ const transcription = await this.client.speechToText.convert(
170
+ {
171
+ file: file,
172
+ model_id: this.listeningModel?.name as ElevenLabsModel,
173
+ language_code,
174
+ tag_audio_events,
175
+ num_speakers,
176
+ },
177
+ requestOptions,
178
+ );
179
+
180
+ return transcription.text;
181
+ }, 'voice.elevenlabs.listen')();
182
+
183
+ return res;
115
184
  }
116
185
  }