@mastra/voice-deepgram 0.1.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+
2
+ > @mastra/voice-deepgram@0.1.0-alpha.2 build /home/runner/work/mastra/mastra/voice/deepgram
3
+ > tsup src/index.ts --format esm --experimental-dts --clean --treeshake
4
+
5
+ CLI Building entry: src/index.ts
6
+ CLI Using tsconfig: tsconfig.json
7
+ CLI tsup v8.3.6
8
+ TSC Build start
9
+ TSC ⚡️ Build success in 6794ms
10
+ DTS Build start
11
+ CLI Target: es2022
12
+ Analysis will use the bundled TypeScript version 5.7.3
13
+ Writing package typings: /home/runner/work/mastra/mastra/voice/deepgram/dist/_tsup-dts-rollup.d.ts
14
+ DTS ⚡️ Build success in 4973ms
15
+ CLI Cleaning output folder
16
+ ESM Build start
17
+ ESM dist/index.js 4.50 KB
18
+ ESM ⚡️ Build success in 262ms
package/CHANGELOG.md ADDED
@@ -0,0 +1,22 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0-alpha.2
4
+
5
+ ### Patch Changes
6
+
7
+ - abe4600: deprecate @mastra/speech-deepgram for @mastra/voice-deepgram
8
+ - Updated dependencies [7fceae1]
9
+ - Updated dependencies [f626fbb]
10
+ - @mastra/core@0.4.2-alpha.0
11
+
12
+ ## 0.1.0 (2024-XX-XX)
13
+
14
+ This package replaces the deprecated @mastra/speech-deepgram package. All functionality has been migrated to this new package with a more consistent naming scheme.
15
+
16
+ ### Changes from @mastra/speech-deepgram
17
+
18
+ - Package renamed from @mastra/speech-deepgram to @mastra/voice-deepgram
19
+ - All functionality remains the same
20
+ - Import paths should be updated from '@mastra/speech-deepgram' to '@mastra/voice-deepgram'
21
+
22
+ For a complete history of changes prior to the rename, please see the changelog of the original package.
package/LICENSE ADDED
@@ -0,0 +1,44 @@
1
+ Elastic License 2.0 (ELv2)
2
+
3
+ **Acceptance**
4
+ By using the software, you agree to all of the terms and conditions below.
5
+
6
+ **Copyright License**
7
+ The licensor grants you a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable license to use, copy, distribute, make available, and prepare derivative works of the software, in each case subject to the limitations and conditions below
8
+
9
+ **Limitations**
10
+ You may not provide the software to third parties as a hosted or managed service, where the service provides users with access to any substantial set of the features or functionality of the software.
11
+
12
+ You may not move, change, disable, or circumvent the license key functionality in the software, and you may not remove or obscure any functionality in the software that is protected by the license key.
13
+
14
+ You may not alter, remove, or obscure any licensing, copyright, or other notices of the licensor in the software. Any use of the licensor’s trademarks is subject to applicable law.
15
+
16
+ **Patents**
17
+ The licensor grants you a license, under any patent claims the licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import and have imported the software, in each case subject to the limitations and conditions in this license. This license does not cover any patent claims that you cause to be infringed by modifications or additions to the software. If you or your company make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
18
+
19
+ **Notices**
20
+ You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms.
21
+
22
+ If you modify the software, you must include in any modified copies of the software prominent notices stating that you have modified the software.
23
+
24
+ **No Other Rights**
25
+ These terms do not imply any licenses other than those expressly granted in these terms.
26
+
27
+ **Termination**
28
+ If you use the software in violation of these terms, such use is not licensed, and your licenses will automatically terminate. If the licensor provides you with a notice of your violation, and you cease all violation of this license no later than 30 days after you receive that notice, your licenses will be reinstated retroactively. However, if you violate these terms after such reinstatement, any additional violation of these terms will cause your licenses to terminate automatically and permanently.
29
+
30
+ **No Liability**
31
+ As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.
32
+
33
+ **Definitions**
34
+ The _licensor_ is the entity offering these terms, and the _software_ is the software the licensor makes available under these terms, including any portion of it.
35
+
36
+ _you_ refers to the individual or entity agreeing to these terms.
37
+
38
+ _your company_ is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. _control_ means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
39
+
40
+ _your licenses_ are all the licenses granted to you for the software under these terms.
41
+
42
+ _use_ means anything you do with the software requiring one of your licenses.
43
+
44
+ _trademark_ means trademarks, service marks, and similar rights.
package/README.md ADDED
@@ -0,0 +1,71 @@
1
+ # @mastra/voice-deepgram
2
+
3
+ Deepgram voice integration for Mastra, providing both Text-to-Speech (TTS) and Speech-to-Text (STT) capabilities using Deepgram's advanced AI models.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @mastra/voice-deepgram
9
+ ```
10
+
11
+ ## Configuration
12
+
13
+ The module requires a Deepgram API key, which can be provided through environment variables or directly in the configuration:
14
+
15
+ ```bash
16
+ DEEPGRAM_API_KEY=your_api_key
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```typescript
22
+ import { DeepgramVoice } from '@mastra/voice-deepgram';
23
+
24
+ // Create voice with both speech and listening capabilities
25
+ const voice = new DeepgramVoice({
26
+ speechModel: {
27
+ name: 'aura-asteria-en', // Default voice
28
+ apiKey: 'your-api-key', // Optional, can use DEEPGRAM_API_KEY env var
29
+ },
30
+ listeningModel: {
31
+ name: 'nova', // Optional, specify a listening model
32
+ apiKey: 'your-api-key', // Optional, can use DEEPGRAM_API_KEY env var
33
+ },
34
+ speaker: 'aura-athena-en', // Optional, specify a speaker voice
35
+ });
36
+
37
+ // List available voices
38
+ const voices = await voice.getSpeakers();
39
+
40
+ // Generate speech
41
+ const audioStream = await voice.speak('Hello from Mastra!', {
42
+ speaker: 'aura-athena-en', // Optional: override default speaker
43
+ speed: 1.0, // Optional: adjust speech speed
44
+ });
45
+
46
+ // Convert speech to text
47
+ const text = await voice.listen(audioStream);
48
+ ```
49
+
50
+ ## Features
51
+
52
+ - High-quality Text-to-Speech synthesis
53
+ - Accurate Speech-to-Text transcription
54
+
55
+ ## Voice Options
56
+
57
+ Deepgram provides several AI voices with different characteristics:
58
+
59
+ - aura-asteria-en (Female, American)
60
+ - aura-athena-en (Female, American)
61
+ - aura-zeus-en (Male, American)
62
+ - aura-hera-en (Female, American)
63
+ - aura-orion-en (Male, American)
64
+
65
+ View the complete list in the `voices.ts` file or [Deepgram's documentation](https://developers.deepgram.com/docs/tts-models).
66
+
67
+ ### New Features
68
+
69
+ - **Speaker Selection**: You can now specify a speaker voice when initializing the `DeepgramVoice` class. This allows for more personalized speech generation.
70
+
71
+ - **Updated `speak` Method**: The `speak` method now supports an optional `speaker` parameter in the options, allowing you to dynamically choose the voice for speech synthesis.
Binary file
@@ -0,0 +1,50 @@
1
+ import { MastraVoice } from '@mastra/core/voice';
2
+
3
+ /**
4
+ * List of available Deepgram models for text-to-speech and speech-to-text
5
+ */
6
+ export declare const DEEPGRAM_MODELS: readonly ["aura", "whisper", "base", "enhanced", "nova", "nova-2", "nova-3"];
7
+
8
+ /**
9
+ * List of available Deepgram voice models for text-to-speech
10
+ * Each voice is designed for specific use cases and languages
11
+ * Format: {name}-{language} (e.g. asteria-en)
12
+ */
13
+ export declare const DEEPGRAM_VOICES: readonly ["asteria-en", "luna-en", "stella-en", "athena-en", "hera-en", "orion-en", "arcas-en", "perseus-en", "angus-en", "orpheus-en", "helios-en", "zeus-en"];
14
+
15
+ declare type DeepgramModel = (typeof DEEPGRAM_MODELS)[number];
16
+ export { DeepgramModel }
17
+ export { DeepgramModel as DeepgramModel_alias_1 }
18
+
19
+ export declare class DeepgramVoice extends MastraVoice {
20
+ private speechClient?;
21
+ private listeningClient?;
22
+ constructor({ speechModel, listeningModel, speaker, }?: {
23
+ speechModel?: DeepgramVoiceConfig;
24
+ listeningModel?: DeepgramVoiceConfig;
25
+ speaker?: DeepgramVoiceId;
26
+ });
27
+ getSpeakers(): Promise<{
28
+ voiceId: "asteria-en" | "luna-en" | "stella-en" | "athena-en" | "hera-en" | "orion-en" | "arcas-en" | "perseus-en" | "angus-en" | "orpheus-en" | "helios-en" | "zeus-en";
29
+ }[]>;
30
+ speak(input: string | NodeJS.ReadableStream, options?: {
31
+ speaker?: string;
32
+ [key: string]: any;
33
+ }): Promise<NodeJS.ReadableStream>;
34
+ listen(audioStream: NodeJS.ReadableStream, options?: {
35
+ [key: string]: any;
36
+ }): Promise<string>;
37
+ }
38
+
39
+ export declare interface DeepgramVoiceConfig {
40
+ name?: DeepgramModel;
41
+ apiKey?: string;
42
+ properties?: Record<string, any>;
43
+ language?: string;
44
+ }
45
+
46
+ declare type DeepgramVoiceId = (typeof DEEPGRAM_VOICES)[number];
47
+ export { DeepgramVoiceId }
48
+ export { DeepgramVoiceId as DeepgramVoiceId_alias_1 }
49
+
50
+ export { }
@@ -0,0 +1,4 @@
1
+ export { DeepgramVoice } from './_tsup-dts-rollup.js';
2
+ export { DeepgramVoiceConfig } from './_tsup-dts-rollup.js';
3
+ export { DeepgramVoiceId } from './_tsup-dts-rollup.js';
4
+ export { DeepgramModel } from './_tsup-dts-rollup.js';
package/dist/index.js ADDED
@@ -0,0 +1,163 @@
1
+ import { createClient } from '@deepgram/sdk';
2
+ import { MastraVoice } from '@mastra/core/voice';
3
+ import { PassThrough } from 'stream';
4
+
5
+ // src/index.ts
6
+
7
+ // src/voices.ts
8
+ var DEEPGRAM_VOICES = [
9
+ "asteria-en",
10
+ "luna-en",
11
+ "stella-en",
12
+ "athena-en",
13
+ "hera-en",
14
+ "orion-en",
15
+ "arcas-en",
16
+ "perseus-en",
17
+ "angus-en",
18
+ "orpheus-en",
19
+ "helios-en",
20
+ "zeus-en"
21
+ ];
22
+
23
+ // src/index.ts
24
+ var DeepgramVoice = class extends MastraVoice {
25
+ speechClient;
26
+ listeningClient;
27
+ constructor({
28
+ speechModel,
29
+ listeningModel,
30
+ speaker
31
+ } = {}) {
32
+ const defaultApiKey = process.env.DEEPGRAM_API_KEY;
33
+ const defaultSpeechModel = {
34
+ name: "aura",
35
+ apiKey: defaultApiKey
36
+ };
37
+ const defaultListeningModel = {
38
+ name: "nova",
39
+ apiKey: defaultApiKey
40
+ };
41
+ super({
42
+ speechModel: {
43
+ name: speechModel?.name ?? defaultSpeechModel.name,
44
+ apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
45
+ },
46
+ listeningModel: {
47
+ name: listeningModel?.name ?? defaultListeningModel.name,
48
+ apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey
49
+ },
50
+ speaker
51
+ });
52
+ const speechApiKey = speechModel?.apiKey || defaultApiKey;
53
+ const listeningApiKey = listeningModel?.apiKey || defaultApiKey;
54
+ console.log("speechApiKey", speechApiKey);
55
+ console.log("listeningApiKey", listeningApiKey);
56
+ if (!speechApiKey && !listeningApiKey) {
57
+ throw new Error("At least one of DEEPGRAM_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set");
58
+ }
59
+ if (speechApiKey) {
60
+ this.speechClient = createClient(speechApiKey);
61
+ }
62
+ if (listeningApiKey) {
63
+ this.listeningClient = createClient(listeningApiKey);
64
+ }
65
+ this.speaker = speaker || "asteria-en";
66
+ }
67
+ async getSpeakers() {
68
+ return this.traced(async () => {
69
+ return DEEPGRAM_VOICES.map((voice) => ({
70
+ voiceId: voice
71
+ }));
72
+ }, "voice.deepgram.getSpeakers")();
73
+ }
74
+ async speak(input, options) {
75
+ if (!this.speechClient) {
76
+ throw new Error("Deepgram speech client not configured");
77
+ }
78
+ let text;
79
+ if (typeof input !== "string") {
80
+ const chunks = [];
81
+ for await (const chunk of input) {
82
+ chunks.push(Buffer.from(chunk));
83
+ }
84
+ text = Buffer.concat(chunks).toString("utf-8");
85
+ } else {
86
+ text = input;
87
+ }
88
+ if (text.trim().length === 0) {
89
+ throw new Error("Input text is empty");
90
+ }
91
+ return this.traced(async () => {
92
+ if (!this.speechClient) {
93
+ throw new Error("No speech client configured");
94
+ }
95
+ let model;
96
+ if (options?.speaker) {
97
+ model = this.speechModel?.name + "-" + options.speaker;
98
+ } else if (this.speaker) {
99
+ model = this.speechModel?.name + "-" + this.speaker;
100
+ }
101
+ const speakClient = this.speechClient.speak;
102
+ const response = await speakClient.request(
103
+ { text },
104
+ {
105
+ model,
106
+ ...options
107
+ }
108
+ );
109
+ const webStream = await response.getStream();
110
+ if (!webStream) {
111
+ throw new Error("No stream returned from Deepgram");
112
+ }
113
+ const reader = webStream.getReader();
114
+ const nodeStream = new PassThrough();
115
+ (async () => {
116
+ try {
117
+ while (true) {
118
+ const { done, value } = await reader.read();
119
+ if (done) {
120
+ nodeStream.end();
121
+ break;
122
+ }
123
+ nodeStream.write(value);
124
+ }
125
+ } catch (error) {
126
+ nodeStream.destroy(error);
127
+ }
128
+ })().catch((error) => {
129
+ nodeStream.destroy(error);
130
+ });
131
+ return nodeStream;
132
+ }, "voice.deepgram.speak")();
133
+ }
134
+ async listen(audioStream, options) {
135
+ if (!this.listeningClient) {
136
+ throw new Error("Deepgram listening client not configured");
137
+ }
138
+ const chunks = [];
139
+ for await (const chunk of audioStream) {
140
+ chunks.push(Buffer.from(chunk));
141
+ }
142
+ const buffer = Buffer.concat(chunks);
143
+ return this.traced(async () => {
144
+ if (!this.listeningClient) {
145
+ throw new Error("No listening client configured");
146
+ }
147
+ const { result, error } = await this.listeningClient.listen.prerecorded.transcribeFile(buffer, {
148
+ model: this.listeningModel?.name,
149
+ ...options
150
+ });
151
+ if (error) {
152
+ throw error;
153
+ }
154
+ const transcript = result.results?.channels?.[0]?.alternatives?.[0]?.transcript;
155
+ if (!transcript) {
156
+ throw new Error("No transcript found in Deepgram response");
157
+ }
158
+ return transcript;
159
+ }, "voice.deepgram.listen")();
160
+ }
161
+ };
162
+
163
+ export { DeepgramVoice };
@@ -0,0 +1,6 @@
1
+ import { createConfig } from '@internal/lint/eslint';
2
+
3
+ const config = await createConfig();
4
+
5
+ /** @type {import("eslint").Linter.Config[]} */
6
+ export default [...config];
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "@mastra/voice-deepgram",
3
+ "version": "0.1.0-alpha.2",
4
+ "description": "Mastra Deepgram voice integration",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": {
11
+ "types": "./dist/index.d.ts",
12
+ "default": "./dist/index.js"
13
+ }
14
+ },
15
+ "./package.json": "./package.json"
16
+ },
17
+ "dependencies": {
18
+ "@deepgram/sdk": "^3.9.0",
19
+ "zod": "^3.24.1",
20
+ "@mastra/core": "^0.4.2-alpha.0"
21
+ },
22
+ "devDependencies": {
23
+ "@microsoft/api-extractor": "^7.49.2",
24
+ "@types/node": "^22.13.1",
25
+ "tsup": "^8.3.6",
26
+ "typescript": "^5.7.3",
27
+ "vitest": "^2.1.8",
28
+ "eslint": "^9.20.1",
29
+ "@internal/lint": "0.0.0"
30
+ },
31
+ "scripts": {
32
+ "build": "tsup src/index.ts --format esm --experimental-dts --clean --treeshake",
33
+ "build:watch": "pnpm build --watch",
34
+ "test": "vitest run",
35
+ "lint": "eslint ."
36
+ }
37
+ }
@@ -0,0 +1,134 @@
1
+ import { writeFileSync, mkdirSync, createReadStream } from 'fs';
2
+ import path from 'path';
3
+ import { PassThrough } from 'stream';
4
+ import { describe, expect, it, beforeAll } from 'vitest';
5
+
6
+ import { DeepgramVoice } from './index.js';
7
+
8
+ describe('DeepgramVoice Integration Tests', () => {
9
+ let voice: DeepgramVoice;
10
+ const outputDir = path.join(process.cwd(), 'test-outputs');
11
+
12
+ beforeAll(() => {
13
+ try {
14
+ mkdirSync(outputDir, { recursive: true });
15
+ } catch (err) {
16
+ console.log('Directory already exists: ', err);
17
+ }
18
+
19
+ voice = new DeepgramVoice({
20
+ speechModel: {
21
+ name: 'aura',
22
+ },
23
+ listeningModel: {
24
+ name: 'whisper',
25
+ },
26
+ speaker: 'asteria-en',
27
+ });
28
+ });
29
+
30
+ describe('getSpeakers', () => {
31
+ it('should list available voices', async () => {
32
+ const speakers = await voice.getSpeakers();
33
+ const expectedVoiceIds = ['asteria-en', 'stella-en', 'luna-en'];
34
+ expectedVoiceIds.forEach(voiceId => {
35
+ expect(speakers.some(s => s.voiceId === voiceId)).toBe(true);
36
+ });
37
+ });
38
+ });
39
+
40
+ describe('speak', () => {
41
+ it('should generate audio and save to file', async () => {
42
+ const audioResult = await voice.speak('Hello World', {
43
+ text: 'Hello World',
44
+ });
45
+
46
+ const chunks: Buffer[] = [];
47
+ for await (const chunk of audioResult) {
48
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
49
+ }
50
+ const audioBuffer = Buffer.concat(chunks);
51
+ const outputPath = path.join(outputDir, 'deepgram-speech-test.mp3');
52
+ writeFileSync(outputPath, audioBuffer);
53
+ expect(audioBuffer.length).toBeGreaterThan(0);
54
+ }, 10000);
55
+
56
+ it('should work with different parameters', async () => {
57
+ const audioResult = await voice.speak('Hello World', {
58
+ text: 'Test with parameters',
59
+ speaker: 'luna-en',
60
+ });
61
+
62
+ const chunks: Buffer[] = [];
63
+ for await (const chunk of audioResult) {
64
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
65
+ }
66
+ const audioBuffer = Buffer.concat(chunks);
67
+ const outputPath = path.join(outputDir, 'deepgram-speech-test-params.mp3');
68
+ writeFileSync(outputPath, audioBuffer);
69
+ expect(audioBuffer.length).toBeGreaterThan(0);
70
+ }, 10000);
71
+ });
72
+
73
+ // Error cases
74
+ describe('error handling', () => {
75
+ it('should handle invalid voice names', async () => {
76
+ await expect(voice.speak('Test', { speaker: 'invalid_voice' })).rejects.toThrow();
77
+ });
78
+
79
+ it('should handle empty text', async () => {
80
+ await expect(voice.speak('', { speaker: 'asteria-en' })).rejects.toThrow('Input text is empty');
81
+ });
82
+
83
+ it('should handle whitespace-only text', async () => {
84
+ await expect(voice.speak(' \n\t ', { speaker: 'asteria-en' })).rejects.toThrow('Input text is empty');
85
+ });
86
+ });
87
+
88
+ describe('listen', () => {
89
+ it('should transcribe audio buffer', async () => {
90
+ // First generate some audio to transcribe
91
+ const audioResult = await voice.speak('This is a test of transcription');
92
+
93
+ // Collect audio chunks
94
+ const chunks: Buffer[] = [];
95
+ for await (const chunk of audioResult) {
96
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
97
+ }
98
+ const audioBuffer = Buffer.concat(chunks);
99
+
100
+ // Create stream from the buffer
101
+ const stream = new PassThrough();
102
+ stream.end(audioBuffer);
103
+ const result = await voice.listen(stream);
104
+
105
+ expect(typeof result).toBe('string');
106
+ expect(result.toLowerCase()).toContain('test');
107
+ expect(result.toLowerCase()).toContain('transcription');
108
+ }, 15000);
109
+
110
+ it('should transcribe audio from fixture file', async () => {
111
+ const fixturePath = path.join(process.cwd(), '__fixtures__', 'voice-test.m4a');
112
+ const audioStream = createReadStream(fixturePath);
113
+
114
+ console.log('listening to audio stream');
115
+ const text = await voice.listen(audioStream, {
116
+ filetype: 'm4a',
117
+ });
118
+ console.log('text', text);
119
+
120
+ expect(text).toBeTruthy();
121
+ console.log(text);
122
+ expect(typeof text).toBe('string');
123
+ expect(text.length).toBeGreaterThan(0);
124
+ }, 15000);
125
+
126
+ it('should handle invalid audio', async () => {
127
+ const invalidAudio = Buffer.from('not valid audio');
128
+ const stream = new PassThrough();
129
+ stream.end(invalidAudio);
130
+
131
+ await expect(voice.listen(stream)).rejects.toThrow();
132
+ });
133
+ });
134
+ });
package/src/index.ts ADDED
@@ -0,0 +1,190 @@
1
+ import { createClient } from '@deepgram/sdk';
2
+ import { MastraVoice } from '@mastra/core/voice';
3
+ import { PassThrough } from 'stream';
4
+
5
+ import { DEEPGRAM_VOICES } from './voices';
6
+ import type { DeepgramVoiceId, DeepgramModel } from './voices';
7
+
8
+ interface DeepgramVoiceConfig {
9
+ name?: DeepgramModel;
10
+ apiKey?: string;
11
+ properties?: Record<string, any>;
12
+ language?: string;
13
+ }
14
+
15
+ export class DeepgramVoice extends MastraVoice {
16
+ private speechClient?: ReturnType<typeof createClient>;
17
+ private listeningClient?: ReturnType<typeof createClient>;
18
+
19
+ constructor({
20
+ speechModel,
21
+ listeningModel,
22
+ speaker,
23
+ }: { speechModel?: DeepgramVoiceConfig; listeningModel?: DeepgramVoiceConfig; speaker?: DeepgramVoiceId } = {}) {
24
+ const defaultApiKey = process.env.DEEPGRAM_API_KEY;
25
+
26
+ const defaultSpeechModel = {
27
+ name: 'aura',
28
+ apiKey: defaultApiKey,
29
+ };
30
+
31
+ const defaultListeningModel = {
32
+ name: 'nova',
33
+ apiKey: defaultApiKey,
34
+ };
35
+
36
+ super({
37
+ speechModel: {
38
+ name: speechModel?.name ?? defaultSpeechModel.name,
39
+ apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,
40
+ },
41
+ listeningModel: {
42
+ name: listeningModel?.name ?? defaultListeningModel.name,
43
+ apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,
44
+ },
45
+ speaker,
46
+ });
47
+
48
+ const speechApiKey = speechModel?.apiKey || defaultApiKey;
49
+ const listeningApiKey = listeningModel?.apiKey || defaultApiKey;
50
+ console.log('speechApiKey', speechApiKey);
51
+ console.log('listeningApiKey', listeningApiKey);
52
+ if (!speechApiKey && !listeningApiKey) {
53
+ throw new Error('At least one of DEEPGRAM_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');
54
+ }
55
+
56
+ if (speechApiKey) {
57
+ this.speechClient = createClient(speechApiKey);
58
+ }
59
+ if (listeningApiKey) {
60
+ this.listeningClient = createClient(listeningApiKey);
61
+ }
62
+
63
+ this.speaker = speaker || 'asteria-en';
64
+ }
65
+
66
+ async getSpeakers() {
67
+ return this.traced(async () => {
68
+ return DEEPGRAM_VOICES.map(voice => ({
69
+ voiceId: voice,
70
+ }));
71
+ }, 'voice.deepgram.getSpeakers')();
72
+ }
73
+
74
+ async speak(
75
+ input: string | NodeJS.ReadableStream,
76
+ options?: {
77
+ speaker?: string;
78
+ [key: string]: any;
79
+ },
80
+ ): Promise<NodeJS.ReadableStream> {
81
+ if (!this.speechClient) {
82
+ throw new Error('Deepgram speech client not configured');
83
+ }
84
+
85
+ let text: string;
86
+ if (typeof input !== 'string') {
87
+ const chunks: Buffer[] = [];
88
+ for await (const chunk of input) {
89
+ chunks.push(Buffer.from(chunk));
90
+ }
91
+ text = Buffer.concat(chunks).toString('utf-8');
92
+ } else {
93
+ text = input;
94
+ }
95
+
96
+ if (text.trim().length === 0) {
97
+ throw new Error('Input text is empty');
98
+ }
99
+
100
+ return this.traced(async () => {
101
+ if (!this.speechClient) {
102
+ throw new Error('No speech client configured');
103
+ }
104
+
105
+ let model;
106
+ if (options?.speaker) {
107
+ model = this.speechModel?.name + '-' + options.speaker;
108
+ } else if (this.speaker) {
109
+ model = this.speechModel?.name + '-' + this.speaker;
110
+ }
111
+
112
+ const speakClient = this.speechClient.speak;
113
+ const response = await speakClient.request(
114
+ { text },
115
+ {
116
+ model,
117
+ ...options,
118
+ },
119
+ );
120
+
121
+ const webStream = await response.getStream();
122
+ if (!webStream) {
123
+ throw new Error('No stream returned from Deepgram');
124
+ }
125
+
126
+ const reader = webStream.getReader();
127
+ const nodeStream = new PassThrough();
128
+
129
+ // Add error handling for the stream processing
130
+ (async () => {
131
+ try {
132
+ while (true) {
133
+ const { done, value } = await reader.read();
134
+ if (done) {
135
+ nodeStream.end();
136
+ break;
137
+ }
138
+ nodeStream.write(value);
139
+ }
140
+ } catch (error) {
141
+ nodeStream.destroy(error as Error);
142
+ }
143
+ })().catch(error => {
144
+ nodeStream.destroy(error as Error);
145
+ });
146
+
147
+ return nodeStream;
148
+ }, 'voice.deepgram.speak')();
149
+ }
150
+
151
+ async listen(
152
+ audioStream: NodeJS.ReadableStream,
153
+ options?: {
154
+ [key: string]: any;
155
+ },
156
+ ): Promise<string> {
157
+ if (!this.listeningClient) {
158
+ throw new Error('Deepgram listening client not configured');
159
+ }
160
+
161
+ const chunks: Buffer[] = [];
162
+ for await (const chunk of audioStream) {
163
+ chunks.push(Buffer.from(chunk));
164
+ }
165
+ const buffer = Buffer.concat(chunks);
166
+
167
+ return this.traced(async () => {
168
+ if (!this.listeningClient) {
169
+ throw new Error('No listening client configured');
170
+ }
171
+ const { result, error } = await this.listeningClient.listen.prerecorded.transcribeFile(buffer, {
172
+ model: this.listeningModel?.name,
173
+ ...options,
174
+ });
175
+
176
+ if (error) {
177
+ throw error;
178
+ }
179
+
180
+ const transcript = result.results?.channels?.[0]?.alternatives?.[0]?.transcript;
181
+ if (!transcript) {
182
+ throw new Error('No transcript found in Deepgram response');
183
+ }
184
+
185
+ return transcript;
186
+ }, 'voice.deepgram.listen')();
187
+ }
188
+ }
189
+
190
+ export type { DeepgramVoiceConfig, DeepgramVoiceId, DeepgramModel };
package/src/voices.ts ADDED
@@ -0,0 +1,28 @@
1
+ /**
2
+ * List of available Deepgram voice models for text-to-speech
3
+ * Each voice is designed for specific use cases and languages
4
+ * Format: {name}-{language} (e.g. asteria-en)
5
+ */
6
+ export const DEEPGRAM_VOICES = [
7
+ 'asteria-en',
8
+ 'luna-en',
9
+ 'stella-en',
10
+ 'athena-en',
11
+ 'hera-en',
12
+ 'orion-en',
13
+ 'arcas-en',
14
+ 'perseus-en',
15
+ 'angus-en',
16
+ 'orpheus-en',
17
+ 'helios-en',
18
+ 'zeus-en',
19
+ ] as const;
20
+
21
+ export type DeepgramVoiceId = (typeof DEEPGRAM_VOICES)[number];
22
+
23
+ /**
24
+ * List of available Deepgram models for text-to-speech and speech-to-text
25
+ */
26
+ export const DEEPGRAM_MODELS = ['aura', 'whisper', 'base', 'enhanced', 'nova', 'nova-2', 'nova-3'] as const;
27
+
28
+ export type DeepgramModel = (typeof DEEPGRAM_MODELS)[number];
package/tsconfig.json ADDED
@@ -0,0 +1,5 @@
1
+ {
2
+ "extends": "../../tsconfig.node.json",
3
+ "include": ["src/**/*"],
4
+ "exclude": ["node_modules", "**/*.test.ts"]
5
+ }
@@ -0,0 +1,9 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ globals: true,
6
+ environment: 'node',
7
+ include: ['src/**/*.test.ts'],
8
+ },
9
+ });