@mastra/voice-deepgram 0.0.0-storage-20250225005900 → 0.0.0-vnextWorkflows-20250416071310
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{LICENSE → LICENSE.md} +3 -1
- package/dist/_tsup-dts-rollup.d.cts +50 -0
- package/dist/index.cjs +171 -0
- package/dist/index.d.cts +4 -0
- package/dist/index.js +11 -5
- package/package.json +20 -12
- package/.turbo/turbo-build.log +0 -19
- package/CHANGELOG.md +0 -32
- package/__fixtures__/voice-test.m4a +0 -0
- package/eslint.config.js +0 -6
- package/src/index.test.ts +0 -134
- package/src/index.ts +0 -190
- package/src/voices.ts +0 -28
- package/tsconfig.json +0 -5
- package/vitest.config.ts +0 -9
package/{LICENSE → LICENSE.md}
RENAMED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* List of available Deepgram models for text-to-speech and speech-to-text
|
|
5
|
+
*/
|
|
6
|
+
export declare const DEEPGRAM_MODELS: readonly ["aura", "whisper", "base", "enhanced", "nova", "nova-2", "nova-3"];
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* List of available Deepgram voice models for text-to-speech
|
|
10
|
+
* Each voice is designed for specific use cases and languages
|
|
11
|
+
* Format: {name}-{language} (e.g. asteria-en)
|
|
12
|
+
*/
|
|
13
|
+
export declare const DEEPGRAM_VOICES: readonly ["asteria-en", "luna-en", "stella-en", "athena-en", "hera-en", "orion-en", "arcas-en", "perseus-en", "angus-en", "orpheus-en", "helios-en", "zeus-en"];
|
|
14
|
+
|
|
15
|
+
declare type DeepgramModel = (typeof DEEPGRAM_MODELS)[number];
|
|
16
|
+
export { DeepgramModel }
|
|
17
|
+
export { DeepgramModel as DeepgramModel_alias_1 }
|
|
18
|
+
|
|
19
|
+
export declare class DeepgramVoice extends MastraVoice {
|
|
20
|
+
private speechClient?;
|
|
21
|
+
private listeningClient?;
|
|
22
|
+
constructor({ speechModel, listeningModel, speaker, }?: {
|
|
23
|
+
speechModel?: DeepgramVoiceConfig;
|
|
24
|
+
listeningModel?: DeepgramVoiceConfig;
|
|
25
|
+
speaker?: DeepgramVoiceId;
|
|
26
|
+
});
|
|
27
|
+
getSpeakers(): Promise<{
|
|
28
|
+
voiceId: "asteria-en" | "luna-en" | "stella-en" | "athena-en" | "hera-en" | "orion-en" | "arcas-en" | "perseus-en" | "angus-en" | "orpheus-en" | "helios-en" | "zeus-en";
|
|
29
|
+
}[]>;
|
|
30
|
+
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
31
|
+
speaker?: string;
|
|
32
|
+
[key: string]: any;
|
|
33
|
+
}): Promise<NodeJS.ReadableStream>;
|
|
34
|
+
listen(audioStream: NodeJS.ReadableStream, options?: {
|
|
35
|
+
[key: string]: any;
|
|
36
|
+
}): Promise<string>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export declare interface DeepgramVoiceConfig {
|
|
40
|
+
name?: DeepgramModel;
|
|
41
|
+
apiKey?: string;
|
|
42
|
+
properties?: Record<string, any>;
|
|
43
|
+
language?: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
declare type DeepgramVoiceId = (typeof DEEPGRAM_VOICES)[number];
|
|
47
|
+
export { DeepgramVoiceId }
|
|
48
|
+
export { DeepgramVoiceId as DeepgramVoiceId_alias_1 }
|
|
49
|
+
|
|
50
|
+
export { }
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var stream = require('stream');
|
|
4
|
+
var sdk = require('@deepgram/sdk');
|
|
5
|
+
var voice = require('@mastra/core/voice');
|
|
6
|
+
|
|
7
|
+
// src/index.ts
|
|
8
|
+
|
|
9
|
+
// src/voices.ts
|
|
10
|
+
var DEEPGRAM_VOICES = [
|
|
11
|
+
"asteria-en",
|
|
12
|
+
"luna-en",
|
|
13
|
+
"stella-en",
|
|
14
|
+
"athena-en",
|
|
15
|
+
"hera-en",
|
|
16
|
+
"orion-en",
|
|
17
|
+
"arcas-en",
|
|
18
|
+
"perseus-en",
|
|
19
|
+
"angus-en",
|
|
20
|
+
"orpheus-en",
|
|
21
|
+
"helios-en",
|
|
22
|
+
"zeus-en"
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
// src/index.ts
|
|
26
|
+
var DeepgramVoice = class extends voice.MastraVoice {
|
|
27
|
+
speechClient;
|
|
28
|
+
listeningClient;
|
|
29
|
+
constructor({
|
|
30
|
+
speechModel,
|
|
31
|
+
listeningModel,
|
|
32
|
+
speaker
|
|
33
|
+
} = {}) {
|
|
34
|
+
const defaultApiKey = process.env.DEEPGRAM_API_KEY;
|
|
35
|
+
const defaultSpeechModel = {
|
|
36
|
+
name: "aura",
|
|
37
|
+
apiKey: defaultApiKey
|
|
38
|
+
};
|
|
39
|
+
const defaultListeningModel = {
|
|
40
|
+
name: "nova",
|
|
41
|
+
apiKey: defaultApiKey
|
|
42
|
+
};
|
|
43
|
+
super({
|
|
44
|
+
speechModel: {
|
|
45
|
+
name: speechModel?.name ?? defaultSpeechModel.name,
|
|
46
|
+
apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
|
|
47
|
+
},
|
|
48
|
+
listeningModel: {
|
|
49
|
+
name: listeningModel?.name ?? defaultListeningModel.name,
|
|
50
|
+
apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey
|
|
51
|
+
},
|
|
52
|
+
speaker
|
|
53
|
+
});
|
|
54
|
+
const speechApiKey = speechModel?.apiKey || defaultApiKey;
|
|
55
|
+
const listeningApiKey = listeningModel?.apiKey || defaultApiKey;
|
|
56
|
+
if (!speechApiKey && !listeningApiKey) {
|
|
57
|
+
throw new Error("At least one of DEEPGRAM_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set");
|
|
58
|
+
}
|
|
59
|
+
if (speechApiKey) {
|
|
60
|
+
this.speechClient = sdk.createClient(speechApiKey);
|
|
61
|
+
}
|
|
62
|
+
if (listeningApiKey) {
|
|
63
|
+
this.listeningClient = sdk.createClient(listeningApiKey);
|
|
64
|
+
}
|
|
65
|
+
this.speaker = speaker || "asteria-en";
|
|
66
|
+
}
|
|
67
|
+
async getSpeakers() {
|
|
68
|
+
return this.traced(async () => {
|
|
69
|
+
return DEEPGRAM_VOICES.map((voice) => ({
|
|
70
|
+
voiceId: voice
|
|
71
|
+
}));
|
|
72
|
+
}, "voice.deepgram.getSpeakers")();
|
|
73
|
+
}
|
|
74
|
+
async speak(input, options) {
|
|
75
|
+
if (!this.speechClient) {
|
|
76
|
+
throw new Error("Deepgram speech client not configured");
|
|
77
|
+
}
|
|
78
|
+
let text;
|
|
79
|
+
if (typeof input !== "string") {
|
|
80
|
+
const chunks = [];
|
|
81
|
+
for await (const chunk of input) {
|
|
82
|
+
if (typeof chunk === "string") {
|
|
83
|
+
chunks.push(Buffer.from(chunk));
|
|
84
|
+
} else {
|
|
85
|
+
chunks.push(chunk);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
text = Buffer.concat(chunks).toString("utf-8");
|
|
89
|
+
} else {
|
|
90
|
+
text = input;
|
|
91
|
+
}
|
|
92
|
+
if (text.trim().length === 0) {
|
|
93
|
+
throw new Error("Input text is empty");
|
|
94
|
+
}
|
|
95
|
+
return this.traced(async () => {
|
|
96
|
+
if (!this.speechClient) {
|
|
97
|
+
throw new Error("No speech client configured");
|
|
98
|
+
}
|
|
99
|
+
let model;
|
|
100
|
+
if (options?.speaker) {
|
|
101
|
+
model = this.speechModel?.name + "-" + options.speaker;
|
|
102
|
+
} else if (this.speaker) {
|
|
103
|
+
model = this.speechModel?.name + "-" + this.speaker;
|
|
104
|
+
}
|
|
105
|
+
const speakClient = this.speechClient.speak;
|
|
106
|
+
const response = await speakClient.request(
|
|
107
|
+
{ text },
|
|
108
|
+
{
|
|
109
|
+
model,
|
|
110
|
+
...options
|
|
111
|
+
}
|
|
112
|
+
);
|
|
113
|
+
const webStream = await response.getStream();
|
|
114
|
+
if (!webStream) {
|
|
115
|
+
throw new Error("No stream returned from Deepgram");
|
|
116
|
+
}
|
|
117
|
+
const reader = webStream.getReader();
|
|
118
|
+
const nodeStream = new stream.PassThrough();
|
|
119
|
+
(async () => {
|
|
120
|
+
try {
|
|
121
|
+
while (true) {
|
|
122
|
+
const { done, value } = await reader.read();
|
|
123
|
+
if (done) {
|
|
124
|
+
nodeStream.end();
|
|
125
|
+
break;
|
|
126
|
+
}
|
|
127
|
+
nodeStream.write(value);
|
|
128
|
+
}
|
|
129
|
+
} catch (error) {
|
|
130
|
+
nodeStream.destroy(error);
|
|
131
|
+
}
|
|
132
|
+
})().catch((error) => {
|
|
133
|
+
nodeStream.destroy(error);
|
|
134
|
+
});
|
|
135
|
+
return nodeStream;
|
|
136
|
+
}, "voice.deepgram.speak")();
|
|
137
|
+
}
|
|
138
|
+
async listen(audioStream, options) {
|
|
139
|
+
if (!this.listeningClient) {
|
|
140
|
+
throw new Error("Deepgram listening client not configured");
|
|
141
|
+
}
|
|
142
|
+
const chunks = [];
|
|
143
|
+
for await (const chunk of audioStream) {
|
|
144
|
+
if (typeof chunk === "string") {
|
|
145
|
+
chunks.push(Buffer.from(chunk));
|
|
146
|
+
} else {
|
|
147
|
+
chunks.push(chunk);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
const buffer = Buffer.concat(chunks);
|
|
151
|
+
return this.traced(async () => {
|
|
152
|
+
if (!this.listeningClient) {
|
|
153
|
+
throw new Error("No listening client configured");
|
|
154
|
+
}
|
|
155
|
+
const { result, error } = await this.listeningClient.listen.prerecorded.transcribeFile(buffer, {
|
|
156
|
+
model: this.listeningModel?.name,
|
|
157
|
+
...options
|
|
158
|
+
});
|
|
159
|
+
if (error) {
|
|
160
|
+
throw error;
|
|
161
|
+
}
|
|
162
|
+
const transcript = result.results?.channels?.[0]?.alternatives?.[0]?.transcript;
|
|
163
|
+
if (!transcript) {
|
|
164
|
+
throw new Error("No transcript found in Deepgram response");
|
|
165
|
+
}
|
|
166
|
+
return transcript;
|
|
167
|
+
}, "voice.deepgram.listen")();
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
exports.DeepgramVoice = DeepgramVoice;
|
package/dist/index.d.cts
ADDED
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
import { PassThrough } from 'stream';
|
|
1
2
|
import { createClient } from '@deepgram/sdk';
|
|
2
3
|
import { MastraVoice } from '@mastra/core/voice';
|
|
3
|
-
import { PassThrough } from 'stream';
|
|
4
4
|
|
|
5
5
|
// src/index.ts
|
|
6
6
|
|
|
@@ -51,8 +51,6 @@ var DeepgramVoice = class extends MastraVoice {
|
|
|
51
51
|
});
|
|
52
52
|
const speechApiKey = speechModel?.apiKey || defaultApiKey;
|
|
53
53
|
const listeningApiKey = listeningModel?.apiKey || defaultApiKey;
|
|
54
|
-
console.log("speechApiKey", speechApiKey);
|
|
55
|
-
console.log("listeningApiKey", listeningApiKey);
|
|
56
54
|
if (!speechApiKey && !listeningApiKey) {
|
|
57
55
|
throw new Error("At least one of DEEPGRAM_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set");
|
|
58
56
|
}
|
|
@@ -79,7 +77,11 @@ var DeepgramVoice = class extends MastraVoice {
|
|
|
79
77
|
if (typeof input !== "string") {
|
|
80
78
|
const chunks = [];
|
|
81
79
|
for await (const chunk of input) {
|
|
82
|
-
|
|
80
|
+
if (typeof chunk === "string") {
|
|
81
|
+
chunks.push(Buffer.from(chunk));
|
|
82
|
+
} else {
|
|
83
|
+
chunks.push(chunk);
|
|
84
|
+
}
|
|
83
85
|
}
|
|
84
86
|
text = Buffer.concat(chunks).toString("utf-8");
|
|
85
87
|
} else {
|
|
@@ -137,7 +139,11 @@ var DeepgramVoice = class extends MastraVoice {
|
|
|
137
139
|
}
|
|
138
140
|
const chunks = [];
|
|
139
141
|
for await (const chunk of audioStream) {
|
|
140
|
-
|
|
142
|
+
if (typeof chunk === "string") {
|
|
143
|
+
chunks.push(Buffer.from(chunk));
|
|
144
|
+
} else {
|
|
145
|
+
chunks.push(chunk);
|
|
146
|
+
}
|
|
141
147
|
}
|
|
142
148
|
const buffer = Buffer.concat(chunks);
|
|
143
149
|
return this.traced(async () => {
|
package/package.json
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-deepgram",
|
|
3
|
-
"version": "0.0.0-
|
|
3
|
+
"version": "0.0.0-vnextWorkflows-20250416071310",
|
|
4
4
|
"description": "Mastra Deepgram voice integration",
|
|
5
5
|
"type": "module",
|
|
6
|
+
"files": [
|
|
7
|
+
"dist"
|
|
8
|
+
],
|
|
6
9
|
"main": "dist/index.js",
|
|
7
10
|
"types": "dist/index.d.ts",
|
|
8
11
|
"exports": {
|
|
@@ -10,26 +13,31 @@
|
|
|
10
13
|
"import": {
|
|
11
14
|
"types": "./dist/index.d.ts",
|
|
12
15
|
"default": "./dist/index.js"
|
|
16
|
+
},
|
|
17
|
+
"require": {
|
|
18
|
+
"types": "./dist/index.d.cts",
|
|
19
|
+
"default": "./dist/index.cjs"
|
|
13
20
|
}
|
|
14
21
|
},
|
|
15
22
|
"./package.json": "./package.json"
|
|
16
23
|
},
|
|
24
|
+
"license": "Elastic-2.0",
|
|
17
25
|
"dependencies": {
|
|
18
|
-
"@deepgram/sdk": "^3.
|
|
19
|
-
"zod": "^3.24.
|
|
20
|
-
"@mastra/core": "
|
|
26
|
+
"@deepgram/sdk": "^3.11.2",
|
|
27
|
+
"zod": "^3.24.2",
|
|
28
|
+
"@mastra/core": "0.0.0-vnextWorkflows-20250416071310"
|
|
21
29
|
},
|
|
22
30
|
"devDependencies": {
|
|
23
|
-
"@microsoft/api-extractor": "^7.
|
|
24
|
-
"@types/node": "^
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"@internal/lint": "0.0.
|
|
31
|
+
"@microsoft/api-extractor": "^7.52.2",
|
|
32
|
+
"@types/node": "^20.17.27",
|
|
33
|
+
"eslint": "^9.23.0",
|
|
34
|
+
"tsup": "^8.4.0",
|
|
35
|
+
"typescript": "^5.8.2",
|
|
36
|
+
"vitest": "^2.1.9",
|
|
37
|
+
"@internal/lint": "0.0.2"
|
|
30
38
|
},
|
|
31
39
|
"scripts": {
|
|
32
|
-
"build": "tsup src/index.ts --format esm --experimental-dts --clean --treeshake",
|
|
40
|
+
"build": "tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting",
|
|
33
41
|
"build:watch": "pnpm build --watch",
|
|
34
42
|
"test": "vitest run",
|
|
35
43
|
"lint": "eslint ."
|
package/.turbo/turbo-build.log
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
> @mastra/voice-deepgram@0.1.0-alpha.2 build /Users/ward/projects/mastra/mastra/voice/deepgram
|
|
4
|
-
> tsup src/index.ts --format esm --experimental-dts --clean --treeshake
|
|
5
|
-
|
|
6
|
-
[34mCLI[39m Building entry: src/index.ts
|
|
7
|
-
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
8
|
-
[34mCLI[39m tsup v8.3.6
|
|
9
|
-
[34mTSC[39m Build start
|
|
10
|
-
[32mTSC[39m ⚡️ Build success in 2750ms
|
|
11
|
-
[34mDTS[39m Build start
|
|
12
|
-
[34mCLI[39m Target: es2022
|
|
13
|
-
Analysis will use the bundled TypeScript version 5.7.3
|
|
14
|
-
[36mWriting package typings: /Users/ward/projects/mastra/mastra/voice/deepgram/dist/_tsup-dts-rollup.d.ts[39m
|
|
15
|
-
[32mDTS[39m ⚡️ Build success in 1597ms
|
|
16
|
-
[34mCLI[39m Cleaning output folder
|
|
17
|
-
[34mESM[39m Build start
|
|
18
|
-
[32mESM[39m [1mdist/index.js [22m[32m4.50 KB[39m
|
|
19
|
-
[32mESM[39m ⚡️ Build success in 115ms
|
package/CHANGELOG.md
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
# Changelog
|
|
2
|
-
|
|
3
|
-
## 0.0.0-storage-20250225005900
|
|
4
|
-
|
|
5
|
-
### Patch Changes
|
|
6
|
-
|
|
7
|
-
- abe4600: deprecate @mastra/speech-deepgram for @mastra/voice-deepgram
|
|
8
|
-
- Updated dependencies [7fceae1]
|
|
9
|
-
- Updated dependencies [f626fbb]
|
|
10
|
-
- Updated dependencies [8db2a28]
|
|
11
|
-
- @mastra/core@0.0.0-storage-20250225005900
|
|
12
|
-
|
|
13
|
-
## 0.1.0-alpha.2
|
|
14
|
-
|
|
15
|
-
### Patch Changes
|
|
16
|
-
|
|
17
|
-
- abe4600: deprecate @mastra/speech-deepgram for @mastra/voice-deepgram
|
|
18
|
-
- Updated dependencies [7fceae1]
|
|
19
|
-
- Updated dependencies [f626fbb]
|
|
20
|
-
- @mastra/core@0.4.2-alpha.0
|
|
21
|
-
|
|
22
|
-
## 0.1.0 (2024-XX-XX)
|
|
23
|
-
|
|
24
|
-
This package replaces the deprecated @mastra/speech-deepgram package. All functionality has been migrated to this new package with a more consistent naming scheme.
|
|
25
|
-
|
|
26
|
-
### Changes from @mastra/speech-deepgram
|
|
27
|
-
|
|
28
|
-
- Package renamed from @mastra/speech-deepgram to @mastra/voice-deepgram
|
|
29
|
-
- All functionality remains the same
|
|
30
|
-
- Import paths should be updated from '@mastra/speech-deepgram' to '@mastra/voice-deepgram'
|
|
31
|
-
|
|
32
|
-
For a complete history of changes prior to the rename, please see the changelog of the original package.
|
|
Binary file
|
package/eslint.config.js
DELETED
package/src/index.test.ts
DELETED
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
import { writeFileSync, mkdirSync, createReadStream } from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { PassThrough } from 'stream';
|
|
4
|
-
import { describe, expect, it, beforeAll } from 'vitest';
|
|
5
|
-
|
|
6
|
-
import { DeepgramVoice } from './index.js';
|
|
7
|
-
|
|
8
|
-
describe('DeepgramVoice Integration Tests', () => {
|
|
9
|
-
let voice: DeepgramVoice;
|
|
10
|
-
const outputDir = path.join(process.cwd(), 'test-outputs');
|
|
11
|
-
|
|
12
|
-
beforeAll(() => {
|
|
13
|
-
try {
|
|
14
|
-
mkdirSync(outputDir, { recursive: true });
|
|
15
|
-
} catch (err) {
|
|
16
|
-
console.log('Directory already exists: ', err);
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
voice = new DeepgramVoice({
|
|
20
|
-
speechModel: {
|
|
21
|
-
name: 'aura',
|
|
22
|
-
},
|
|
23
|
-
listeningModel: {
|
|
24
|
-
name: 'whisper',
|
|
25
|
-
},
|
|
26
|
-
speaker: 'asteria-en',
|
|
27
|
-
});
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
describe('getSpeakers', () => {
|
|
31
|
-
it('should list available voices', async () => {
|
|
32
|
-
const speakers = await voice.getSpeakers();
|
|
33
|
-
const expectedVoiceIds = ['asteria-en', 'stella-en', 'luna-en'];
|
|
34
|
-
expectedVoiceIds.forEach(voiceId => {
|
|
35
|
-
expect(speakers.some(s => s.voiceId === voiceId)).toBe(true);
|
|
36
|
-
});
|
|
37
|
-
});
|
|
38
|
-
});
|
|
39
|
-
|
|
40
|
-
describe('speak', () => {
|
|
41
|
-
it('should generate audio and save to file', async () => {
|
|
42
|
-
const audioResult = await voice.speak('Hello World', {
|
|
43
|
-
text: 'Hello World',
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
const chunks: Buffer[] = [];
|
|
47
|
-
for await (const chunk of audioResult) {
|
|
48
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
49
|
-
}
|
|
50
|
-
const audioBuffer = Buffer.concat(chunks);
|
|
51
|
-
const outputPath = path.join(outputDir, 'deepgram-speech-test.mp3');
|
|
52
|
-
writeFileSync(outputPath, audioBuffer);
|
|
53
|
-
expect(audioBuffer.length).toBeGreaterThan(0);
|
|
54
|
-
}, 10000);
|
|
55
|
-
|
|
56
|
-
it('should work with different parameters', async () => {
|
|
57
|
-
const audioResult = await voice.speak('Hello World', {
|
|
58
|
-
text: 'Test with parameters',
|
|
59
|
-
speaker: 'luna-en',
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
const chunks: Buffer[] = [];
|
|
63
|
-
for await (const chunk of audioResult) {
|
|
64
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
65
|
-
}
|
|
66
|
-
const audioBuffer = Buffer.concat(chunks);
|
|
67
|
-
const outputPath = path.join(outputDir, 'deepgram-speech-test-params.mp3');
|
|
68
|
-
writeFileSync(outputPath, audioBuffer);
|
|
69
|
-
expect(audioBuffer.length).toBeGreaterThan(0);
|
|
70
|
-
}, 10000);
|
|
71
|
-
});
|
|
72
|
-
|
|
73
|
-
// Error cases
|
|
74
|
-
describe('error handling', () => {
|
|
75
|
-
it('should handle invalid voice names', async () => {
|
|
76
|
-
await expect(voice.speak('Test', { speaker: 'invalid_voice' })).rejects.toThrow();
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
it('should handle empty text', async () => {
|
|
80
|
-
await expect(voice.speak('', { speaker: 'asteria-en' })).rejects.toThrow('Input text is empty');
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
it('should handle whitespace-only text', async () => {
|
|
84
|
-
await expect(voice.speak(' \n\t ', { speaker: 'asteria-en' })).rejects.toThrow('Input text is empty');
|
|
85
|
-
});
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
describe('listen', () => {
|
|
89
|
-
it('should transcribe audio buffer', async () => {
|
|
90
|
-
// First generate some audio to transcribe
|
|
91
|
-
const audioResult = await voice.speak('This is a test of transcription');
|
|
92
|
-
|
|
93
|
-
// Collect audio chunks
|
|
94
|
-
const chunks: Buffer[] = [];
|
|
95
|
-
for await (const chunk of audioResult) {
|
|
96
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
97
|
-
}
|
|
98
|
-
const audioBuffer = Buffer.concat(chunks);
|
|
99
|
-
|
|
100
|
-
// Create stream from the buffer
|
|
101
|
-
const stream = new PassThrough();
|
|
102
|
-
stream.end(audioBuffer);
|
|
103
|
-
const result = await voice.listen(stream);
|
|
104
|
-
|
|
105
|
-
expect(typeof result).toBe('string');
|
|
106
|
-
expect(result.toLowerCase()).toContain('test');
|
|
107
|
-
expect(result.toLowerCase()).toContain('transcription');
|
|
108
|
-
}, 15000);
|
|
109
|
-
|
|
110
|
-
it('should transcribe audio from fixture file', async () => {
|
|
111
|
-
const fixturePath = path.join(process.cwd(), '__fixtures__', 'voice-test.m4a');
|
|
112
|
-
const audioStream = createReadStream(fixturePath);
|
|
113
|
-
|
|
114
|
-
console.log('listening to audio stream');
|
|
115
|
-
const text = await voice.listen(audioStream, {
|
|
116
|
-
filetype: 'm4a',
|
|
117
|
-
});
|
|
118
|
-
console.log('text', text);
|
|
119
|
-
|
|
120
|
-
expect(text).toBeTruthy();
|
|
121
|
-
console.log(text);
|
|
122
|
-
expect(typeof text).toBe('string');
|
|
123
|
-
expect(text.length).toBeGreaterThan(0);
|
|
124
|
-
}, 15000);
|
|
125
|
-
|
|
126
|
-
it('should handle invalid audio', async () => {
|
|
127
|
-
const invalidAudio = Buffer.from('not valid audio');
|
|
128
|
-
const stream = new PassThrough();
|
|
129
|
-
stream.end(invalidAudio);
|
|
130
|
-
|
|
131
|
-
await expect(voice.listen(stream)).rejects.toThrow();
|
|
132
|
-
});
|
|
133
|
-
});
|
|
134
|
-
});
|
package/src/index.ts
DELETED
|
@@ -1,190 +0,0 @@
|
|
|
1
|
-
import { createClient } from '@deepgram/sdk';
|
|
2
|
-
import { MastraVoice } from '@mastra/core/voice';
|
|
3
|
-
import { PassThrough } from 'stream';
|
|
4
|
-
|
|
5
|
-
import { DEEPGRAM_VOICES } from './voices';
|
|
6
|
-
import type { DeepgramVoiceId, DeepgramModel } from './voices';
|
|
7
|
-
|
|
8
|
-
interface DeepgramVoiceConfig {
|
|
9
|
-
name?: DeepgramModel;
|
|
10
|
-
apiKey?: string;
|
|
11
|
-
properties?: Record<string, any>;
|
|
12
|
-
language?: string;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export class DeepgramVoice extends MastraVoice {
|
|
16
|
-
private speechClient?: ReturnType<typeof createClient>;
|
|
17
|
-
private listeningClient?: ReturnType<typeof createClient>;
|
|
18
|
-
|
|
19
|
-
constructor({
|
|
20
|
-
speechModel,
|
|
21
|
-
listeningModel,
|
|
22
|
-
speaker,
|
|
23
|
-
}: { speechModel?: DeepgramVoiceConfig; listeningModel?: DeepgramVoiceConfig; speaker?: DeepgramVoiceId } = {}) {
|
|
24
|
-
const defaultApiKey = process.env.DEEPGRAM_API_KEY;
|
|
25
|
-
|
|
26
|
-
const defaultSpeechModel = {
|
|
27
|
-
name: 'aura',
|
|
28
|
-
apiKey: defaultApiKey,
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
const defaultListeningModel = {
|
|
32
|
-
name: 'nova',
|
|
33
|
-
apiKey: defaultApiKey,
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
super({
|
|
37
|
-
speechModel: {
|
|
38
|
-
name: speechModel?.name ?? defaultSpeechModel.name,
|
|
39
|
-
apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,
|
|
40
|
-
},
|
|
41
|
-
listeningModel: {
|
|
42
|
-
name: listeningModel?.name ?? defaultListeningModel.name,
|
|
43
|
-
apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,
|
|
44
|
-
},
|
|
45
|
-
speaker,
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
const speechApiKey = speechModel?.apiKey || defaultApiKey;
|
|
49
|
-
const listeningApiKey = listeningModel?.apiKey || defaultApiKey;
|
|
50
|
-
console.log('speechApiKey', speechApiKey);
|
|
51
|
-
console.log('listeningApiKey', listeningApiKey);
|
|
52
|
-
if (!speechApiKey && !listeningApiKey) {
|
|
53
|
-
throw new Error('At least one of DEEPGRAM_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
if (speechApiKey) {
|
|
57
|
-
this.speechClient = createClient(speechApiKey);
|
|
58
|
-
}
|
|
59
|
-
if (listeningApiKey) {
|
|
60
|
-
this.listeningClient = createClient(listeningApiKey);
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
this.speaker = speaker || 'asteria-en';
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
async getSpeakers() {
|
|
67
|
-
return this.traced(async () => {
|
|
68
|
-
return DEEPGRAM_VOICES.map(voice => ({
|
|
69
|
-
voiceId: voice,
|
|
70
|
-
}));
|
|
71
|
-
}, 'voice.deepgram.getSpeakers')();
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
async speak(
|
|
75
|
-
input: string | NodeJS.ReadableStream,
|
|
76
|
-
options?: {
|
|
77
|
-
speaker?: string;
|
|
78
|
-
[key: string]: any;
|
|
79
|
-
},
|
|
80
|
-
): Promise<NodeJS.ReadableStream> {
|
|
81
|
-
if (!this.speechClient) {
|
|
82
|
-
throw new Error('Deepgram speech client not configured');
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
let text: string;
|
|
86
|
-
if (typeof input !== 'string') {
|
|
87
|
-
const chunks: Buffer[] = [];
|
|
88
|
-
for await (const chunk of input) {
|
|
89
|
-
chunks.push(Buffer.from(chunk));
|
|
90
|
-
}
|
|
91
|
-
text = Buffer.concat(chunks).toString('utf-8');
|
|
92
|
-
} else {
|
|
93
|
-
text = input;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
if (text.trim().length === 0) {
|
|
97
|
-
throw new Error('Input text is empty');
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
return this.traced(async () => {
|
|
101
|
-
if (!this.speechClient) {
|
|
102
|
-
throw new Error('No speech client configured');
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
let model;
|
|
106
|
-
if (options?.speaker) {
|
|
107
|
-
model = this.speechModel?.name + '-' + options.speaker;
|
|
108
|
-
} else if (this.speaker) {
|
|
109
|
-
model = this.speechModel?.name + '-' + this.speaker;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
const speakClient = this.speechClient.speak;
|
|
113
|
-
const response = await speakClient.request(
|
|
114
|
-
{ text },
|
|
115
|
-
{
|
|
116
|
-
model,
|
|
117
|
-
...options,
|
|
118
|
-
},
|
|
119
|
-
);
|
|
120
|
-
|
|
121
|
-
const webStream = await response.getStream();
|
|
122
|
-
if (!webStream) {
|
|
123
|
-
throw new Error('No stream returned from Deepgram');
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
const reader = webStream.getReader();
|
|
127
|
-
const nodeStream = new PassThrough();
|
|
128
|
-
|
|
129
|
-
// Add error handling for the stream processing
|
|
130
|
-
(async () => {
|
|
131
|
-
try {
|
|
132
|
-
while (true) {
|
|
133
|
-
const { done, value } = await reader.read();
|
|
134
|
-
if (done) {
|
|
135
|
-
nodeStream.end();
|
|
136
|
-
break;
|
|
137
|
-
}
|
|
138
|
-
nodeStream.write(value);
|
|
139
|
-
}
|
|
140
|
-
} catch (error) {
|
|
141
|
-
nodeStream.destroy(error as Error);
|
|
142
|
-
}
|
|
143
|
-
})().catch(error => {
|
|
144
|
-
nodeStream.destroy(error as Error);
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
return nodeStream;
|
|
148
|
-
}, 'voice.deepgram.speak')();
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
async listen(
|
|
152
|
-
audioStream: NodeJS.ReadableStream,
|
|
153
|
-
options?: {
|
|
154
|
-
[key: string]: any;
|
|
155
|
-
},
|
|
156
|
-
): Promise<string> {
|
|
157
|
-
if (!this.listeningClient) {
|
|
158
|
-
throw new Error('Deepgram listening client not configured');
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
const chunks: Buffer[] = [];
|
|
162
|
-
for await (const chunk of audioStream) {
|
|
163
|
-
chunks.push(Buffer.from(chunk));
|
|
164
|
-
}
|
|
165
|
-
const buffer = Buffer.concat(chunks);
|
|
166
|
-
|
|
167
|
-
return this.traced(async () => {
|
|
168
|
-
if (!this.listeningClient) {
|
|
169
|
-
throw new Error('No listening client configured');
|
|
170
|
-
}
|
|
171
|
-
const { result, error } = await this.listeningClient.listen.prerecorded.transcribeFile(buffer, {
|
|
172
|
-
model: this.listeningModel?.name,
|
|
173
|
-
...options,
|
|
174
|
-
});
|
|
175
|
-
|
|
176
|
-
if (error) {
|
|
177
|
-
throw error;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
const transcript = result.results?.channels?.[0]?.alternatives?.[0]?.transcript;
|
|
181
|
-
if (!transcript) {
|
|
182
|
-
throw new Error('No transcript found in Deepgram response');
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
return transcript;
|
|
186
|
-
}, 'voice.deepgram.listen')();
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
export type { DeepgramVoiceConfig, DeepgramVoiceId, DeepgramModel };
|
package/src/voices.ts
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* List of available Deepgram voice models for text-to-speech
|
|
3
|
-
* Each voice is designed for specific use cases and languages
|
|
4
|
-
* Format: {name}-{language} (e.g. asteria-en)
|
|
5
|
-
*/
|
|
6
|
-
export const DEEPGRAM_VOICES = [
|
|
7
|
-
'asteria-en',
|
|
8
|
-
'luna-en',
|
|
9
|
-
'stella-en',
|
|
10
|
-
'athena-en',
|
|
11
|
-
'hera-en',
|
|
12
|
-
'orion-en',
|
|
13
|
-
'arcas-en',
|
|
14
|
-
'perseus-en',
|
|
15
|
-
'angus-en',
|
|
16
|
-
'orpheus-en',
|
|
17
|
-
'helios-en',
|
|
18
|
-
'zeus-en',
|
|
19
|
-
] as const;
|
|
20
|
-
|
|
21
|
-
export type DeepgramVoiceId = (typeof DEEPGRAM_VOICES)[number];
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* List of available Deepgram models for text-to-speech and speech-to-text
|
|
25
|
-
*/
|
|
26
|
-
export const DEEPGRAM_MODELS = ['aura', 'whisper', 'base', 'enhanced', 'nova', 'nova-2', 'nova-3'] as const;
|
|
27
|
-
|
|
28
|
-
export type DeepgramModel = (typeof DEEPGRAM_MODELS)[number];
|
package/tsconfig.json
DELETED