@mastra/voice-deepgram 0.0.0-mastra-3338-mastra-memory-pinecone-20250507174328 → 0.0.0-mastra-auto-detect-server-20260108233416
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1739 -0
- package/LICENSE.md +11 -42
- package/README.md +10 -7
- package/dist/docs/README.md +32 -0
- package/dist/docs/SKILL.md +33 -0
- package/dist/docs/SOURCE_MAP.json +6 -0
- package/dist/docs/agents/01-adding-voice.md +352 -0
- package/dist/docs/voice/01-overview.md +1019 -0
- package/dist/docs/voice/02-reference.md +62 -0
- package/dist/index.cjs +98 -61
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +52 -4
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +98 -61
- package/dist/index.js.map +1 -0
- package/dist/voices.d.ts +13 -0
- package/dist/voices.d.ts.map +1 -0
- package/package.json +35 -16
- package/dist/_tsup-dts-rollup.d.cts +0 -50
- package/dist/_tsup-dts-rollup.d.ts +0 -50
- package/dist/index.d.cts +0 -4
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Voice API Reference
|
|
2
|
+
|
|
3
|
+
> API reference for voice - 1 entries
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Reference: Deepgram
|
|
9
|
+
|
|
10
|
+
> Documentation for the Deepgram voice implementation, providing text-to-speech and speech-to-text capabilities with multiple voice models and languages.
|
|
11
|
+
|
|
12
|
+
The Deepgram voice implementation in Mastra provides text-to-speech (TTS) and speech-to-text (STT) capabilities using Deepgram's API. It supports multiple voice models and languages, with configurable options for both speech synthesis and transcription.
|
|
13
|
+
|
|
14
|
+
## Usage Example
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
import { DeepgramVoice } from "@mastra/voice-deepgram";
|
|
18
|
+
|
|
19
|
+
// Initialize with default configuration (uses DEEPGRAM_API_KEY environment variable)
|
|
20
|
+
const voice = new DeepgramVoice();
|
|
21
|
+
|
|
22
|
+
// Initialize with custom configuration
|
|
23
|
+
const voice = new DeepgramVoice({
|
|
24
|
+
speechModel: {
|
|
25
|
+
name: "aura",
|
|
26
|
+
apiKey: "your-api-key",
|
|
27
|
+
},
|
|
28
|
+
listeningModel: {
|
|
29
|
+
name: "nova-2",
|
|
30
|
+
apiKey: "your-api-key",
|
|
31
|
+
},
|
|
32
|
+
speaker: "asteria-en",
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
// Text-to-Speech
|
|
36
|
+
const audioStream = await voice.speak("Hello, world!");
|
|
37
|
+
|
|
38
|
+
// Speech-to-Text
|
|
39
|
+
const transcript = await voice.listen(audioStream);
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Constructor Parameters
|
|
43
|
+
|
|
44
|
+
### DeepgramVoiceConfig
|
|
45
|
+
|
|
46
|
+
## Methods
|
|
47
|
+
|
|
48
|
+
### speak()
|
|
49
|
+
|
|
50
|
+
Converts text to speech using the configured speech model and voice.
|
|
51
|
+
|
|
52
|
+
Returns: `Promise<NodeJS.ReadableStream>`
|
|
53
|
+
|
|
54
|
+
### listen()
|
|
55
|
+
|
|
56
|
+
Converts speech to text using the configured listening model.
|
|
57
|
+
|
|
58
|
+
Returns: `Promise<string>`
|
|
59
|
+
|
|
60
|
+
### getSpeakers()
|
|
61
|
+
|
|
62
|
+
Returns a list of available voice options.
|
package/dist/index.cjs
CHANGED
|
@@ -26,6 +26,9 @@ var DEEPGRAM_VOICES = [
|
|
|
26
26
|
var DeepgramVoice = class extends voice.MastraVoice {
|
|
27
27
|
speechClient;
|
|
28
28
|
listeningClient;
|
|
29
|
+
storedSpeechModel;
|
|
30
|
+
storedListeningModel;
|
|
31
|
+
storedSpeaker;
|
|
29
32
|
constructor({
|
|
30
33
|
speechModel,
|
|
31
34
|
listeningModel,
|
|
@@ -51,6 +54,14 @@ var DeepgramVoice = class extends voice.MastraVoice {
|
|
|
51
54
|
},
|
|
52
55
|
speaker
|
|
53
56
|
});
|
|
57
|
+
this.storedSpeechModel = {
|
|
58
|
+
name: speechModel?.name ?? defaultSpeechModel.name,
|
|
59
|
+
apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
|
|
60
|
+
};
|
|
61
|
+
this.storedListeningModel = {
|
|
62
|
+
name: listeningModel?.name ?? defaultListeningModel.name,
|
|
63
|
+
apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey
|
|
64
|
+
};
|
|
54
65
|
const speechApiKey = speechModel?.apiKey || defaultApiKey;
|
|
55
66
|
const listeningApiKey = listeningModel?.apiKey || defaultApiKey;
|
|
56
67
|
if (!speechApiKey && !listeningApiKey) {
|
|
@@ -62,14 +73,12 @@ var DeepgramVoice = class extends voice.MastraVoice {
|
|
|
62
73
|
if (listeningApiKey) {
|
|
63
74
|
this.listeningClient = sdk.createClient(listeningApiKey);
|
|
64
75
|
}
|
|
65
|
-
this.
|
|
76
|
+
this.storedSpeaker = speaker || "asteria-en";
|
|
66
77
|
}
|
|
67
78
|
async getSpeakers() {
|
|
68
|
-
return
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
}));
|
|
72
|
-
}, "voice.deepgram.getSpeakers")();
|
|
79
|
+
return DEEPGRAM_VOICES.map((voice) => ({
|
|
80
|
+
voiceId: voice
|
|
81
|
+
}));
|
|
73
82
|
}
|
|
74
83
|
async speak(input, options) {
|
|
75
84
|
if (!this.speechClient) {
|
|
@@ -92,49 +101,60 @@ var DeepgramVoice = class extends voice.MastraVoice {
|
|
|
92
101
|
if (text.trim().length === 0) {
|
|
93
102
|
throw new Error("Input text is empty");
|
|
94
103
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
}
|
|
105
|
-
const speakClient = this.speechClient.speak;
|
|
106
|
-
const response = await speakClient.request(
|
|
107
|
-
{ text },
|
|
108
|
-
{
|
|
109
|
-
model,
|
|
110
|
-
...options
|
|
111
|
-
}
|
|
112
|
-
);
|
|
113
|
-
const webStream = await response.getStream();
|
|
114
|
-
if (!webStream) {
|
|
115
|
-
throw new Error("No stream returned from Deepgram");
|
|
104
|
+
const baseModel = this.storedSpeechModel?.name;
|
|
105
|
+
const speakerId = options?.speaker || this.storedSpeaker;
|
|
106
|
+
const modelName = baseModel && speakerId ? speakerId.startsWith(`${baseModel}-`) ? speakerId : `${baseModel}-${speakerId}` : baseModel || speakerId;
|
|
107
|
+
const speakClient = this.speechClient.speak;
|
|
108
|
+
const response = await speakClient.request(
|
|
109
|
+
{ text },
|
|
110
|
+
{
|
|
111
|
+
model: modelName,
|
|
112
|
+
...Object.fromEntries(Object.entries(options ?? {}).filter(([k]) => k !== "speaker"))
|
|
116
113
|
}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
114
|
+
);
|
|
115
|
+
const webStream = await response.getStream();
|
|
116
|
+
if (!webStream) {
|
|
117
|
+
throw new Error("No stream returned from Deepgram");
|
|
118
|
+
}
|
|
119
|
+
const reader = webStream.getReader();
|
|
120
|
+
const nodeStream = new stream.PassThrough();
|
|
121
|
+
(async () => {
|
|
122
|
+
try {
|
|
123
|
+
while (true) {
|
|
124
|
+
const { done, value } = await reader.read();
|
|
125
|
+
if (done) {
|
|
126
|
+
nodeStream.end();
|
|
127
|
+
break;
|
|
128
128
|
}
|
|
129
|
-
|
|
130
|
-
nodeStream.destroy(error);
|
|
129
|
+
nodeStream.write(value);
|
|
131
130
|
}
|
|
132
|
-
}
|
|
131
|
+
} catch (error) {
|
|
133
132
|
nodeStream.destroy(error);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
|
|
133
|
+
}
|
|
134
|
+
})().catch((error) => {
|
|
135
|
+
nodeStream.destroy(error);
|
|
136
|
+
});
|
|
137
|
+
return nodeStream;
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Checks if listening capabilities are enabled.
|
|
141
|
+
*
|
|
142
|
+
* @returns {Promise<{ enabled: boolean }>}
|
|
143
|
+
*/
|
|
144
|
+
async getListener() {
|
|
145
|
+
return { enabled: true };
|
|
137
146
|
}
|
|
147
|
+
/**
|
|
148
|
+
* Transcribes audio with optional speaker diarization.
|
|
149
|
+
*
|
|
150
|
+
* @param audioStream - Audio input stream
|
|
151
|
+
* @param options - Transcription options (diarize, language, etc.)
|
|
152
|
+
* @returns Promise resolving to:
|
|
153
|
+
* - transcript: Full transcript string
|
|
154
|
+
* - words: Array of word objects with timing and confidence
|
|
155
|
+
* - raw: Complete Deepgram API response
|
|
156
|
+
* - speakerSegments: (when diarize=true) Array of {word, speaker, start, end}
|
|
157
|
+
*/
|
|
138
158
|
async listen(audioStream, options) {
|
|
139
159
|
if (!this.listeningClient) {
|
|
140
160
|
throw new Error("Deepgram listening client not configured");
|
|
@@ -148,24 +168,41 @@ var DeepgramVoice = class extends voice.MastraVoice {
|
|
|
148
168
|
}
|
|
149
169
|
}
|
|
150
170
|
const buffer = Buffer.concat(chunks);
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
171
|
+
const { diarize, diarize_speaker_count: _, ...restOptions } = options ?? {};
|
|
172
|
+
const { result, error } = await this.listeningClient.listen.prerecorded.transcribeFile(buffer, {
|
|
173
|
+
...restOptions,
|
|
174
|
+
model: this.storedListeningModel?.name,
|
|
175
|
+
diarize
|
|
176
|
+
});
|
|
177
|
+
if (error) {
|
|
178
|
+
throw error;
|
|
179
|
+
}
|
|
180
|
+
const channel = result.results?.channels?.[0];
|
|
181
|
+
const alt = channel?.alternatives?.[0];
|
|
182
|
+
if (!alt) {
|
|
183
|
+
return {
|
|
184
|
+
transcript: "",
|
|
185
|
+
words: [],
|
|
186
|
+
raw: result
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
const response = {
|
|
190
|
+
transcript: alt.transcript,
|
|
191
|
+
words: alt.words,
|
|
192
|
+
raw: result
|
|
193
|
+
};
|
|
194
|
+
if (diarize && alt.words) {
|
|
195
|
+
response.speakerSegments = alt.words.map((w) => ({
|
|
196
|
+
word: w.word,
|
|
197
|
+
speaker: w.speaker,
|
|
198
|
+
start: w.start,
|
|
199
|
+
end: w.end
|
|
200
|
+
}));
|
|
201
|
+
}
|
|
202
|
+
return response;
|
|
168
203
|
}
|
|
169
204
|
};
|
|
170
205
|
|
|
171
206
|
exports.DeepgramVoice = DeepgramVoice;
|
|
207
|
+
//# sourceMappingURL=index.cjs.map
|
|
208
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/voices.ts","../src/index.ts"],"names":["MastraVoice","createClient","PassThrough"],"mappings":";;;;;;;;;AAKO,IAAM,eAAA,GAAkB;AAAA,EAC7B,YAAA;AAAA,EACA,SAAA;AAAA,EACA,WAAA;AAAA,EACA,WAAA;AAAA,EACA,SAAA;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,YAAA;AAAA,EACA,UAAA;AAAA,EACA,YAAA;AAAA,EACA,WAAA;AAAA,EACA;AACF,CAAA;;;ACKO,IAAM,aAAA,GAAN,cAA4BA,iBAAA,CAAY;AAAA,EACrC,YAAA;AAAA,EACA,eAAA;AAAA,EACA,iBAAA;AAAA,EACA,oBAAA;AAAA,EACA,aAAA;AAAA,EAER,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF,GAA4G,EAAC,EAAG;AAC9G,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,gBAAA;AAElC,IAAA,MAAM,kBAAA,GAA+D;AAAA,MACnE,IAAA,EAAM,MAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,MAAM,qBAAA,GAAkE;AAAA,MACtE,IAAA,EAAM,MAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,iBAAA,GAAoB;AAAA,MACvB,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,MAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,KACpD;AACA,IAAA,IAAA,CAAK,oBAAA,GAAuB;AAAA,MAC1B,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,MACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,KAC1D;AAEA,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAElD,IAAA,IAAI,CAAC,YAAA,IAAgB,CAAC,eAAA,EAAiB;AACrC,MAAA,MAAM,IAAI,MAAM,4FAA4F,CAAA;AAAA,IAC9G;AAEA,IAAA,IAAI,YAAA,EAAc;AAChB,MAAA,IAAA,CAAK,YAAA,GAAeC,iBAAa,YAAY,CAAA;AAAA,IAC/C;AACA,IAAA,IAAI,eAAA,EAAiB;AACnB,MAAA,IAAA,CAAK,eAAA,GAAkBA,iBAAa,eAAe,CAAA;AAAA,IACrD;AAEA,IAAA,IAAA,CAAK,gBAAgB,OAAA,IAAW,YAAA;AAAA,EAClC;AAAA,EAEA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,eAAA,CAAgB,IAAI,CAAA,KAAA,MAAU;AAAA,MACnC,OAAA,EAAS;AAAA,KACX,CAAE,CAAA;AAAA,EACJ;AAAA,EAEA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAIgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,uCAAuC,CAAA;AAAA,IACzD;AAEA,IAAA,IAAI,IAAA;AACJ,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,IAAA,GAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAC/C,CAAA,MAAO;AACL,MAAA,IAAA,GAAO,KAAA;AAAA,IACT;AAEA,IAAA,IAAI,IAAA,CAAK,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC5B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,SAAA,GAAY,KAAK,iBAAA,EAAmB,IAAA;AAC1C,IAAA,MAAM,SAAA,GAAY,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,aAAA;AAE3C,IAAA,MAAM,SAAA,GACJ,SAAA,IAAa,SAAA,GACT,SAAA,CAAU,WAAW,CAAA,EAAG,SAAS,CAAA,CAAA,CAAG,CAAA,GAClC,YACA,CAAA,EAAG,SAAS,CAAA,CAAA,EAAI,SAAS,KAC3B,SAAA,IAAa,SAAA;AAEnB,IAAA,MAAM,WAAA,GAAc,KAAK,YAAA,CAAa,KAAA;AACtC,IAAA,MAAM,QAAA,GAAW,MAAM,WAAA,CAAY,OAAA;AAAA,MACjC,EAAE,IAAA,EAAK;AAAA,MACP;AAAA,QACE,KAAA,EAAO,SAAA;AAAA,QACP,GAAG,MAAA,CAAO,WAAA,CAAY,MAAA,CAAO,OAAA,CAAQ,WAAW,EAAE,CAAA,CAAE,MAAA,CAAO,CAAC,CAAC,CAAC,CAAA,KAAM,CAAA,KAAM,SAAS,CAAC;AAAA;AACtF,KACF;AAEA,IAAA,MAAM,SAAA,GAAY,MAAM,QAAA,CAAS,SAAA,EAAU;AAC3C,IAAA,IAAI,CAAC,SAAA,EAAW;AACd,MAAA,MAAM,IAAI,MAAM,kCAAkC,CAAA;AAAA,IACpD;AAEA,IAAA,MAAM,MAAA,GAAS,UAAU,SAAA,EAAU;AACnC,IAAA,MAAM,UAAA,GAAa,IAAIC,kBAAA,EAAY;AAGnC,IAAA,CAAC,YAAY;AACX,MAAA,IAAI;AACF,QAAA,OAAO,IAAA,EAAM;AACX,UAAA,MAAM,EAAE,IAAA,EAAM,KAAA,EAAM,GAAI,MAAM,OAAO,IAAA,EAAK;AAC1C,UAAA,IAAI,IAAA,EAAM;AACR,YAAA,UAAA,CAAW,GAAA,EAAI;AACf,YAAA;AAAA,UACF;AACA,UAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,QACxB;AAAA,MACF,SAAS,KAAA,EAAO;AACd,QAAA,UAAA,CAAW,QAAQ,KAAc,CAAA;AAAA,MACnC;AAAA,IACF,CAAA,GAAG,CAAE,KAAA,CAAM,CAAA,KAAA,KAAS;AAClB,MAAA,UAAA,CAAW,QAAQ,KAAc,CAAA;AAAA,IACnC,CAAC,CAAA;AAED,IAAA,OAAO,UAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIc;AACd,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,0CAA0C,CAAA;AAAA,IAC5D;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AACnC,IAAA,MAAM,EAAE,SAAS,qBAAA,EAAuB,CAAA,EAAG,GAAG,WAAA,EAAY,GAAI,WAAW,EAAC;AAC1E,IAAA,MAAM,EAAE,MAAA,EAAQ,KAAA,EAAM,GAAI,MAAM,KAAK,eAAA,CAAgB,MAAA,CAAO,WAAA,CAAY,cAAA,CAAe,MAAA,EAAQ;AAAA,MAC7F,GAAG,WAAA;AAAA,MACH,KAAA,EAAO,KAAK,oBAAA,EAAsB,IAAA;AAAA,MAClC;AAAA,KACD,CAAA;AACD,IAAA,IAAI,KAAA,EAAO;AACT,MAAA,MAAM,KAAA;AAAA,IACR;AAEA,IAAA,MAAM,OAAA,GAAU,MAAA,CAAO,OAAA,EAAS,QAAA,GAAW,CAAC,CAAA;AAC5C,IAAA,MAAM,GAAA,GAKU,OAAA,EAAS,YAAA,GAAe,CAAC,CAAA;AAEzC,IAAA,IAAI,CAAC,GAAA,EAAK;AACR,MAAA,OAAO;AAAA,QACL,UAAA,EAAY,EAAA;AAAA,QACZ,OAAO,EAAC;AAAA,QACR,GAAA,EAAK;AAAA,OACP;AAAA,IACF;AAEA,IAAA,MAAM,QAAA,GAAgB;AAAA,MACpB,YAAY,GAAA,CAAI,UAAA;AAAA,MAChB,OAAO,GAAA,CAAI,KAAA;AAAA,MACX,GAAA,EAAK;AAAA,KACP;AAEA,IAAA,IAAI,OAAA,IAAW,IAAI,KAAA,EAAO;AACxB,MAAA,QAAA,CAAS,eAAA,GAAkB,GAAA,CAAI,KAAA,CAAM,GAAA,CAAI,CAAC,CAAA,MAAqB;AAAA,QAC7D,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,SAAS,CAAA,CAAE,OAAA;AAAA,QACX,OAAO,CAAA,CAAE,KAAA;AAAA,QACT,KAAK,CAAA,CAAE;AAAA,OACT,CAAE,CAAA;AAAA,IACJ;AAEA,IAAA,OAAO,QAAA;AAAA,EACT;AACF","file":"index.cjs","sourcesContent":["/**\n * List of available Deepgram voice models for text-to-speech\n * Each voice is designed for specific use cases and languages\n * Format: {name}-{language} (e.g. asteria-en)\n */\nexport const DEEPGRAM_VOICES = [\n 'asteria-en',\n 'luna-en',\n 'stella-en',\n 'athena-en',\n 'hera-en',\n 'orion-en',\n 'arcas-en',\n 'perseus-en',\n 'angus-en',\n 'orpheus-en',\n 'helios-en',\n 'zeus-en',\n] as const;\n\nexport type DeepgramVoiceId = (typeof DEEPGRAM_VOICES)[number];\n\n/**\n * List of available Deepgram models for text-to-speech and speech-to-text\n */\nexport const DEEPGRAM_MODELS = ['aura', 'whisper', 'base', 'enhanced', 'nova', 'nova-2', 'nova-3'] as const;\n\nexport type DeepgramModel = (typeof DEEPGRAM_MODELS)[number];\n","import { PassThrough } from 'node:stream';\n\nimport { createClient } from '@deepgram/sdk';\nimport { MastraVoice } from '@mastra/core/voice';\n\nimport { DEEPGRAM_VOICES } from './voices';\nimport type { DeepgramVoiceId, DeepgramModel } from './voices';\n\ninterface DeepgramVoiceConfig {\n name?: DeepgramModel;\n apiKey?: string;\n properties?: Record<string, any>;\n language?: string;\n}\n\ninterface DeepgramWord {\n word: string;\n start?: number;\n end?: number;\n confidence?: number;\n speaker?: number;\n}\n\nexport class DeepgramVoice extends MastraVoice {\n private speechClient?: ReturnType<typeof createClient>;\n private listeningClient?: ReturnType<typeof createClient>;\n private storedSpeechModel?: { name: DeepgramModel; apiKey?: string };\n private storedListeningModel?: { name: DeepgramModel; apiKey?: string };\n private storedSpeaker?: DeepgramVoiceId;\n\n constructor({\n speechModel,\n listeningModel,\n speaker,\n }: { speechModel?: DeepgramVoiceConfig; listeningModel?: DeepgramVoiceConfig; speaker?: DeepgramVoiceId } = {}) {\n const defaultApiKey = process.env.DEEPGRAM_API_KEY;\n\n const defaultSpeechModel: { name: DeepgramModel; apiKey?: string } = {\n name: 'aura',\n apiKey: defaultApiKey,\n };\n\n const defaultListeningModel: { name: DeepgramModel; apiKey?: string } = {\n name: 'nova',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker,\n });\n\n this.storedSpeechModel = {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n };\n this.storedListeningModel = {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n };\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n\n if (!speechApiKey && !listeningApiKey) {\n throw new Error('At least one of DEEPGRAM_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n\n if (speechApiKey) {\n this.speechClient = createClient(speechApiKey);\n }\n if (listeningApiKey) {\n this.listeningClient = createClient(listeningApiKey);\n }\n\n this.storedSpeaker = speaker || 'asteria-en';\n }\n\n async getSpeakers() {\n return DEEPGRAM_VOICES.map(voice => ({\n voiceId: voice,\n }));\n }\n\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Deepgram speech client not configured');\n }\n\n let text: string;\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n text = Buffer.concat(chunks).toString('utf-8');\n } else {\n text = input;\n }\n\n if (text.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const baseModel = this.storedSpeechModel?.name;\n const speakerId = options?.speaker || this.storedSpeaker;\n\n const modelName =\n baseModel && speakerId\n ? speakerId.startsWith(`${baseModel}-`)\n ? speakerId\n : `${baseModel}-${speakerId}`\n : baseModel || speakerId;\n\n const speakClient = this.speechClient.speak;\n const response = await speakClient.request(\n { text },\n {\n model: modelName,\n ...Object.fromEntries(Object.entries(options ?? {}).filter(([k]) => k !== 'speaker')),\n },\n );\n\n const webStream = await response.getStream();\n if (!webStream) {\n throw new Error('No stream returned from Deepgram');\n }\n\n const reader = webStream.getReader();\n const nodeStream = new PassThrough();\n\n // Add error handling for the stream processing\n (async () => {\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) {\n nodeStream.end();\n break;\n }\n nodeStream.write(value);\n }\n } catch (error) {\n nodeStream.destroy(error as Error);\n }\n })().catch(error => {\n nodeStream.destroy(error as Error);\n });\n\n return nodeStream;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n /**\n * Transcribes audio with optional speaker diarization.\n *\n * @param audioStream - Audio input stream\n * @param options - Transcription options (diarize, language, etc.)\n * @returns Promise resolving to:\n * - transcript: Full transcript string\n * - words: Array of word objects with timing and confidence\n * - raw: Complete Deepgram API response\n * - speakerSegments: (when diarize=true) Array of {word, speaker, start, end}\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n diarize?: boolean;\n [key: string]: any;\n },\n ): Promise<any> {\n if (!this.listeningClient) {\n throw new Error('Deepgram listening client not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const buffer = Buffer.concat(chunks);\n const { diarize, diarize_speaker_count: _, ...restOptions } = options ?? {};\n const { result, error } = await this.listeningClient.listen.prerecorded.transcribeFile(buffer, {\n ...restOptions,\n model: this.storedListeningModel?.name,\n diarize,\n });\n if (error) {\n throw error;\n }\n\n const channel = result.results?.channels?.[0];\n const alt:\n | {\n transcript?: string;\n words?: DeepgramWord[];\n }\n | undefined = channel?.alternatives?.[0];\n\n if (!alt) {\n return {\n transcript: '',\n words: [],\n raw: result,\n };\n }\n\n const response: any = {\n transcript: alt.transcript,\n words: alt.words,\n raw: result,\n };\n\n if (diarize && alt.words) {\n response.speakerSegments = alt.words.map((w: DeepgramWord) => ({\n word: w.word,\n speaker: w.speaker,\n start: w.start,\n end: w.end,\n }));\n }\n\n return response;\n }\n}\n\nexport type { DeepgramVoiceConfig, DeepgramVoiceId, DeepgramModel };\n"]}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,52 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
2
|
+
import type { DeepgramVoiceId, DeepgramModel } from './voices.js';
|
|
3
|
+
interface DeepgramVoiceConfig {
|
|
4
|
+
name?: DeepgramModel;
|
|
5
|
+
apiKey?: string;
|
|
6
|
+
properties?: Record<string, any>;
|
|
7
|
+
language?: string;
|
|
8
|
+
}
|
|
9
|
+
export declare class DeepgramVoice extends MastraVoice {
|
|
10
|
+
private speechClient?;
|
|
11
|
+
private listeningClient?;
|
|
12
|
+
private storedSpeechModel?;
|
|
13
|
+
private storedListeningModel?;
|
|
14
|
+
private storedSpeaker?;
|
|
15
|
+
constructor({ speechModel, listeningModel, speaker, }?: {
|
|
16
|
+
speechModel?: DeepgramVoiceConfig;
|
|
17
|
+
listeningModel?: DeepgramVoiceConfig;
|
|
18
|
+
speaker?: DeepgramVoiceId;
|
|
19
|
+
});
|
|
20
|
+
getSpeakers(): Promise<{
|
|
21
|
+
voiceId: "asteria-en" | "luna-en" | "stella-en" | "athena-en" | "hera-en" | "orion-en" | "arcas-en" | "perseus-en" | "angus-en" | "orpheus-en" | "helios-en" | "zeus-en";
|
|
22
|
+
}[]>;
|
|
23
|
+
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
24
|
+
speaker?: string;
|
|
25
|
+
[key: string]: any;
|
|
26
|
+
}): Promise<NodeJS.ReadableStream>;
|
|
27
|
+
/**
|
|
28
|
+
* Checks if listening capabilities are enabled.
|
|
29
|
+
*
|
|
30
|
+
* @returns {Promise<{ enabled: boolean }>}
|
|
31
|
+
*/
|
|
32
|
+
getListener(): Promise<{
|
|
33
|
+
enabled: boolean;
|
|
34
|
+
}>;
|
|
35
|
+
/**
|
|
36
|
+
* Transcribes audio with optional speaker diarization.
|
|
37
|
+
*
|
|
38
|
+
* @param audioStream - Audio input stream
|
|
39
|
+
* @param options - Transcription options (diarize, language, etc.)
|
|
40
|
+
* @returns Promise resolving to:
|
|
41
|
+
* - transcript: Full transcript string
|
|
42
|
+
* - words: Array of word objects with timing and confidence
|
|
43
|
+
* - raw: Complete Deepgram API response
|
|
44
|
+
* - speakerSegments: (when diarize=true) Array of {word, speaker, start, end}
|
|
45
|
+
*/
|
|
46
|
+
listen(audioStream: NodeJS.ReadableStream, options?: {
|
|
47
|
+
diarize?: boolean;
|
|
48
|
+
[key: string]: any;
|
|
49
|
+
}): Promise<any>;
|
|
50
|
+
}
|
|
51
|
+
export type { DeepgramVoiceConfig, DeepgramVoiceId, DeepgramModel };
|
|
52
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGjD,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAE/D,UAAU,mBAAmB;IAC3B,IAAI,CAAC,EAAE,aAAa,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACjC,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAUD,qBAAa,aAAc,SAAQ,WAAW;IAC5C,OAAO,CAAC,YAAY,CAAC,CAAkC;IACvD,OAAO,CAAC,eAAe,CAAC,CAAkC;IAC1D,OAAO,CAAC,iBAAiB,CAAC,CAA2C;IACrE,OAAO,CAAC,oBAAoB,CAAC,CAA2C;IACxE,OAAO,CAAC,aAAa,CAAC,CAAkB;gBAE5B,EACV,WAAW,EACX,cAAc,EACd,OAAO,GACR,GAAE;QAAE,WAAW,CAAC,EAAE,mBAAmB,CAAC;QAAC,cAAc,CAAC,EAAE,mBAAmB,CAAC;QAAC,OAAO,CAAC,EAAE,eAAe,CAAA;KAAO;IAmDxG,WAAW;;;IAMX,KAAK,CACT,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC,cAAc,EACrC,OAAO,CAAC,EAAE;QACR,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;KACpB,GACA,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC;IAwEjC;;;;OAIG;IACG,WAAW;;;IAIjB;;;;;;;;;;OAUG;IACG,MAAM,CACV,WAAW,EAAE,MAAM,CAAC,cAAc,EAClC,OAAO,CAAC,EAAE;QACR,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;KACpB,GACA,OAAO,CAAC,GAAG,CAAC;CAyDhB;AAED,YAAY,EAAE,mBAAmB,EAAE,eAAe,EAAE,aAAa,EAAE,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -24,6 +24,9 @@ var DEEPGRAM_VOICES = [
|
|
|
24
24
|
var DeepgramVoice = class extends MastraVoice {
|
|
25
25
|
speechClient;
|
|
26
26
|
listeningClient;
|
|
27
|
+
storedSpeechModel;
|
|
28
|
+
storedListeningModel;
|
|
29
|
+
storedSpeaker;
|
|
27
30
|
constructor({
|
|
28
31
|
speechModel,
|
|
29
32
|
listeningModel,
|
|
@@ -49,6 +52,14 @@ var DeepgramVoice = class extends MastraVoice {
|
|
|
49
52
|
},
|
|
50
53
|
speaker
|
|
51
54
|
});
|
|
55
|
+
this.storedSpeechModel = {
|
|
56
|
+
name: speechModel?.name ?? defaultSpeechModel.name,
|
|
57
|
+
apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey
|
|
58
|
+
};
|
|
59
|
+
this.storedListeningModel = {
|
|
60
|
+
name: listeningModel?.name ?? defaultListeningModel.name,
|
|
61
|
+
apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey
|
|
62
|
+
};
|
|
52
63
|
const speechApiKey = speechModel?.apiKey || defaultApiKey;
|
|
53
64
|
const listeningApiKey = listeningModel?.apiKey || defaultApiKey;
|
|
54
65
|
if (!speechApiKey && !listeningApiKey) {
|
|
@@ -60,14 +71,12 @@ var DeepgramVoice = class extends MastraVoice {
|
|
|
60
71
|
if (listeningApiKey) {
|
|
61
72
|
this.listeningClient = createClient(listeningApiKey);
|
|
62
73
|
}
|
|
63
|
-
this.
|
|
74
|
+
this.storedSpeaker = speaker || "asteria-en";
|
|
64
75
|
}
|
|
65
76
|
async getSpeakers() {
|
|
66
|
-
return
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
}));
|
|
70
|
-
}, "voice.deepgram.getSpeakers")();
|
|
77
|
+
return DEEPGRAM_VOICES.map((voice) => ({
|
|
78
|
+
voiceId: voice
|
|
79
|
+
}));
|
|
71
80
|
}
|
|
72
81
|
async speak(input, options) {
|
|
73
82
|
if (!this.speechClient) {
|
|
@@ -90,49 +99,60 @@ var DeepgramVoice = class extends MastraVoice {
|
|
|
90
99
|
if (text.trim().length === 0) {
|
|
91
100
|
throw new Error("Input text is empty");
|
|
92
101
|
}
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
}
|
|
103
|
-
const speakClient = this.speechClient.speak;
|
|
104
|
-
const response = await speakClient.request(
|
|
105
|
-
{ text },
|
|
106
|
-
{
|
|
107
|
-
model,
|
|
108
|
-
...options
|
|
109
|
-
}
|
|
110
|
-
);
|
|
111
|
-
const webStream = await response.getStream();
|
|
112
|
-
if (!webStream) {
|
|
113
|
-
throw new Error("No stream returned from Deepgram");
|
|
102
|
+
const baseModel = this.storedSpeechModel?.name;
|
|
103
|
+
const speakerId = options?.speaker || this.storedSpeaker;
|
|
104
|
+
const modelName = baseModel && speakerId ? speakerId.startsWith(`${baseModel}-`) ? speakerId : `${baseModel}-${speakerId}` : baseModel || speakerId;
|
|
105
|
+
const speakClient = this.speechClient.speak;
|
|
106
|
+
const response = await speakClient.request(
|
|
107
|
+
{ text },
|
|
108
|
+
{
|
|
109
|
+
model: modelName,
|
|
110
|
+
...Object.fromEntries(Object.entries(options ?? {}).filter(([k]) => k !== "speaker"))
|
|
114
111
|
}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
112
|
+
);
|
|
113
|
+
const webStream = await response.getStream();
|
|
114
|
+
if (!webStream) {
|
|
115
|
+
throw new Error("No stream returned from Deepgram");
|
|
116
|
+
}
|
|
117
|
+
const reader = webStream.getReader();
|
|
118
|
+
const nodeStream = new PassThrough();
|
|
119
|
+
(async () => {
|
|
120
|
+
try {
|
|
121
|
+
while (true) {
|
|
122
|
+
const { done, value } = await reader.read();
|
|
123
|
+
if (done) {
|
|
124
|
+
nodeStream.end();
|
|
125
|
+
break;
|
|
126
126
|
}
|
|
127
|
-
|
|
128
|
-
nodeStream.destroy(error);
|
|
127
|
+
nodeStream.write(value);
|
|
129
128
|
}
|
|
130
|
-
}
|
|
129
|
+
} catch (error) {
|
|
131
130
|
nodeStream.destroy(error);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
|
|
131
|
+
}
|
|
132
|
+
})().catch((error) => {
|
|
133
|
+
nodeStream.destroy(error);
|
|
134
|
+
});
|
|
135
|
+
return nodeStream;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Checks if listening capabilities are enabled.
|
|
139
|
+
*
|
|
140
|
+
* @returns {Promise<{ enabled: boolean }>}
|
|
141
|
+
*/
|
|
142
|
+
async getListener() {
|
|
143
|
+
return { enabled: true };
|
|
135
144
|
}
|
|
145
|
+
/**
|
|
146
|
+
* Transcribes audio with optional speaker diarization.
|
|
147
|
+
*
|
|
148
|
+
* @param audioStream - Audio input stream
|
|
149
|
+
* @param options - Transcription options (diarize, language, etc.)
|
|
150
|
+
* @returns Promise resolving to:
|
|
151
|
+
* - transcript: Full transcript string
|
|
152
|
+
* - words: Array of word objects with timing and confidence
|
|
153
|
+
* - raw: Complete Deepgram API response
|
|
154
|
+
* - speakerSegments: (when diarize=true) Array of {word, speaker, start, end}
|
|
155
|
+
*/
|
|
136
156
|
async listen(audioStream, options) {
|
|
137
157
|
if (!this.listeningClient) {
|
|
138
158
|
throw new Error("Deepgram listening client not configured");
|
|
@@ -146,24 +166,41 @@ var DeepgramVoice = class extends MastraVoice {
|
|
|
146
166
|
}
|
|
147
167
|
}
|
|
148
168
|
const buffer = Buffer.concat(chunks);
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
169
|
+
const { diarize, diarize_speaker_count: _, ...restOptions } = options ?? {};
|
|
170
|
+
const { result, error } = await this.listeningClient.listen.prerecorded.transcribeFile(buffer, {
|
|
171
|
+
...restOptions,
|
|
172
|
+
model: this.storedListeningModel?.name,
|
|
173
|
+
diarize
|
|
174
|
+
});
|
|
175
|
+
if (error) {
|
|
176
|
+
throw error;
|
|
177
|
+
}
|
|
178
|
+
const channel = result.results?.channels?.[0];
|
|
179
|
+
const alt = channel?.alternatives?.[0];
|
|
180
|
+
if (!alt) {
|
|
181
|
+
return {
|
|
182
|
+
transcript: "",
|
|
183
|
+
words: [],
|
|
184
|
+
raw: result
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
const response = {
|
|
188
|
+
transcript: alt.transcript,
|
|
189
|
+
words: alt.words,
|
|
190
|
+
raw: result
|
|
191
|
+
};
|
|
192
|
+
if (diarize && alt.words) {
|
|
193
|
+
response.speakerSegments = alt.words.map((w) => ({
|
|
194
|
+
word: w.word,
|
|
195
|
+
speaker: w.speaker,
|
|
196
|
+
start: w.start,
|
|
197
|
+
end: w.end
|
|
198
|
+
}));
|
|
199
|
+
}
|
|
200
|
+
return response;
|
|
166
201
|
}
|
|
167
202
|
};
|
|
168
203
|
|
|
169
204
|
export { DeepgramVoice };
|
|
205
|
+
//# sourceMappingURL=index.js.map
|
|
206
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/voices.ts","../src/index.ts"],"names":[],"mappings":";;;;;;;AAKO,IAAM,eAAA,GAAkB;AAAA,EAC7B,YAAA;AAAA,EACA,SAAA;AAAA,EACA,WAAA;AAAA,EACA,WAAA;AAAA,EACA,SAAA;AAAA,EACA,UAAA;AAAA,EACA,UAAA;AAAA,EACA,YAAA;AAAA,EACA,UAAA;AAAA,EACA,YAAA;AAAA,EACA,WAAA;AAAA,EACA;AACF,CAAA;;;ACKO,IAAM,aAAA,GAAN,cAA4B,WAAA,CAAY;AAAA,EACrC,YAAA;AAAA,EACA,eAAA;AAAA,EACA,iBAAA;AAAA,EACA,oBAAA;AAAA,EACA,aAAA;AAAA,EAER,WAAA,CAAY;AAAA,IACV,WAAA;AAAA,IACA,cAAA;AAAA,IACA;AAAA,GACF,GAA4G,EAAC,EAAG;AAC9G,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,gBAAA;AAElC,IAAA,MAAM,kBAAA,GAA+D;AAAA,MACnE,IAAA,EAAM,MAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,MAAM,qBAAA,GAAkE;AAAA,MACtE,IAAA,EAAM,MAAA;AAAA,MACN,MAAA,EAAQ;AAAA,KACV;AAEA,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,QAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,OACpD;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,QACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,OAC1D;AAAA,MACA;AAAA,KACD,CAAA;AAED,IAAA,IAAA,CAAK,iBAAA,GAAoB;AAAA,MACvB,IAAA,EAAM,WAAA,EAAa,IAAA,IAAQ,kBAAA,CAAmB,IAAA;AAAA,MAC9C,MAAA,EAAQ,WAAA,EAAa,MAAA,IAAU,kBAAA,CAAmB;AAAA,KACpD;AACA,IAAA,IAAA,CAAK,oBAAA,GAAuB;AAAA,MAC1B,IAAA,EAAM,cAAA,EAAgB,IAAA,IAAQ,qBAAA,CAAsB,IAAA;AAAA,MACpD,MAAA,EAAQ,cAAA,EAAgB,MAAA,IAAU,qBAAA,CAAsB;AAAA,KAC1D;AAEA,IAAA,MAAM,YAAA,GAAe,aAAa,MAAA,IAAU,aAAA;AAC5C,IAAA,MAAM,eAAA,GAAkB,gBAAgB,MAAA,IAAU,aAAA;AAElD,IAAA,IAAI,CAAC,YAAA,IAAgB,CAAC,eAAA,EAAiB;AACrC,MAAA,MAAM,IAAI,MAAM,4FAA4F,CAAA;AAAA,IAC9G;AAEA,IAAA,IAAI,YAAA,EAAc;AAChB,MAAA,IAAA,CAAK,YAAA,GAAe,aAAa,YAAY,CAAA;AAAA,IAC/C;AACA,IAAA,IAAI,eAAA,EAAiB;AACnB,MAAA,IAAA,CAAK,eAAA,GAAkB,aAAa,eAAe,CAAA;AAAA,IACrD;AAEA,IAAA,IAAA,CAAK,gBAAgB,OAAA,IAAW,YAAA;AAAA,EAClC;AAAA,EAEA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,eAAA,CAAgB,IAAI,CAAA,KAAA,MAAU;AAAA,MACnC,OAAA,EAAS;AAAA,KACX,CAAE,CAAA;AAAA,EACJ;AAAA,EAEA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAIgC;AAChC,IAAA,IAAI,CAAC,KAAK,YAAA,EAAc;AACtB,MAAA,MAAM,IAAI,MAAM,uCAAuC,CAAA;AAAA,IACzD;AAEA,IAAA,IAAI,IAAA;AACJ,IAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,MAAA,MAAM,SAAmB,EAAC;AAC1B,MAAA,WAAA,MAAiB,SAAS,KAAA,EAAO;AAC/B,QAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,UAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,QAChC,CAAA,MAAO;AACL,UAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,QACnB;AAAA,MACF;AACA,MAAA,IAAA,GAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,IAC/C,CAAA,MAAO;AACL,MAAA,IAAA,GAAO,KAAA;AAAA,IACT;AAEA,IAAA,IAAI,IAAA,CAAK,IAAA,EAAK,CAAE,MAAA,KAAW,CAAA,EAAG;AAC5B,MAAA,MAAM,IAAI,MAAM,qBAAqB,CAAA;AAAA,IACvC;AAEA,IAAA,MAAM,SAAA,GAAY,KAAK,iBAAA,EAAmB,IAAA;AAC1C,IAAA,MAAM,SAAA,GAAY,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,aAAA;AAE3C,IAAA,MAAM,SAAA,GACJ,SAAA,IAAa,SAAA,GACT,SAAA,CAAU,WAAW,CAAA,EAAG,SAAS,CAAA,CAAA,CAAG,CAAA,GAClC,YACA,CAAA,EAAG,SAAS,CAAA,CAAA,EAAI,SAAS,KAC3B,SAAA,IAAa,SAAA;AAEnB,IAAA,MAAM,WAAA,GAAc,KAAK,YAAA,CAAa,KAAA;AACtC,IAAA,MAAM,QAAA,GAAW,MAAM,WAAA,CAAY,OAAA;AAAA,MACjC,EAAE,IAAA,EAAK;AAAA,MACP;AAAA,QACE,KAAA,EAAO,SAAA;AAAA,QACP,GAAG,MAAA,CAAO,WAAA,CAAY,MAAA,CAAO,OAAA,CAAQ,WAAW,EAAE,CAAA,CAAE,MAAA,CAAO,CAAC,CAAC,CAAC,CAAA,KAAM,CAAA,KAAM,SAAS,CAAC;AAAA;AACtF,KACF;AAEA,IAAA,MAAM,SAAA,GAAY,MAAM,QAAA,CAAS,SAAA,EAAU;AAC3C,IAAA,IAAI,CAAC,SAAA,EAAW;AACd,MAAA,MAAM,IAAI,MAAM,kCAAkC,CAAA;AAAA,IACpD;AAEA,IAAA,MAAM,MAAA,GAAS,UAAU,SAAA,EAAU;AACnC,IAAA,MAAM,UAAA,GAAa,IAAI,WAAA,EAAY;AAGnC,IAAA,CAAC,YAAY;AACX,MAAA,IAAI;AACF,QAAA,OAAO,IAAA,EAAM;AACX,UAAA,MAAM,EAAE,IAAA,EAAM,KAAA,EAAM,GAAI,MAAM,OAAO,IAAA,EAAK;AAC1C,UAAA,IAAI,IAAA,EAAM;AACR,YAAA,UAAA,CAAW,GAAA,EAAI;AACf,YAAA;AAAA,UACF;AACA,UAAA,UAAA,CAAW,MAAM,KAAK,CAAA;AAAA,QACxB;AAAA,MACF,SAAS,KAAA,EAAO;AACd,QAAA,UAAA,CAAW,QAAQ,KAAc,CAAA;AAAA,MACnC;AAAA,IACF,CAAA,GAAG,CAAE,KAAA,CAAM,CAAA,KAAA,KAAS;AAClB,MAAA,UAAA,CAAW,QAAQ,KAAc,CAAA;AAAA,IACnC,CAAC,CAAA;AAED,IAAA,OAAO,UAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EAIc;AACd,IAAA,IAAI,CAAC,KAAK,eAAA,EAAiB;AACzB,MAAA,MAAM,IAAI,MAAM,0CAA0C,CAAA;AAAA,IAC5D;AAEA,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AACnC,IAAA,MAAM,EAAE,SAAS,qBAAA,EAAuB,CAAA,EAAG,GAAG,WAAA,EAAY,GAAI,WAAW,EAAC;AAC1E,IAAA,MAAM,EAAE,MAAA,EAAQ,KAAA,EAAM,GAAI,MAAM,KAAK,eAAA,CAAgB,MAAA,CAAO,WAAA,CAAY,cAAA,CAAe,MAAA,EAAQ;AAAA,MAC7F,GAAG,WAAA;AAAA,MACH,KAAA,EAAO,KAAK,oBAAA,EAAsB,IAAA;AAAA,MAClC;AAAA,KACD,CAAA;AACD,IAAA,IAAI,KAAA,EAAO;AACT,MAAA,MAAM,KAAA;AAAA,IACR;AAEA,IAAA,MAAM,OAAA,GAAU,MAAA,CAAO,OAAA,EAAS,QAAA,GAAW,CAAC,CAAA;AAC5C,IAAA,MAAM,GAAA,GAKU,OAAA,EAAS,YAAA,GAAe,CAAC,CAAA;AAEzC,IAAA,IAAI,CAAC,GAAA,EAAK;AACR,MAAA,OAAO;AAAA,QACL,UAAA,EAAY,EAAA;AAAA,QACZ,OAAO,EAAC;AAAA,QACR,GAAA,EAAK;AAAA,OACP;AAAA,IACF;AAEA,IAAA,MAAM,QAAA,GAAgB;AAAA,MACpB,YAAY,GAAA,CAAI,UAAA;AAAA,MAChB,OAAO,GAAA,CAAI,KAAA;AAAA,MACX,GAAA,EAAK;AAAA,KACP;AAEA,IAAA,IAAI,OAAA,IAAW,IAAI,KAAA,EAAO;AACxB,MAAA,QAAA,CAAS,eAAA,GAAkB,GAAA,CAAI,KAAA,CAAM,GAAA,CAAI,CAAC,CAAA,MAAqB;AAAA,QAC7D,MAAM,CAAA,CAAE,IAAA;AAAA,QACR,SAAS,CAAA,CAAE,OAAA;AAAA,QACX,OAAO,CAAA,CAAE,KAAA;AAAA,QACT,KAAK,CAAA,CAAE;AAAA,OACT,CAAE,CAAA;AAAA,IACJ;AAEA,IAAA,OAAO,QAAA;AAAA,EACT;AACF","file":"index.js","sourcesContent":["/**\n * List of available Deepgram voice models for text-to-speech\n * Each voice is designed for specific use cases and languages\n * Format: {name}-{language} (e.g. asteria-en)\n */\nexport const DEEPGRAM_VOICES = [\n 'asteria-en',\n 'luna-en',\n 'stella-en',\n 'athena-en',\n 'hera-en',\n 'orion-en',\n 'arcas-en',\n 'perseus-en',\n 'angus-en',\n 'orpheus-en',\n 'helios-en',\n 'zeus-en',\n] as const;\n\nexport type DeepgramVoiceId = (typeof DEEPGRAM_VOICES)[number];\n\n/**\n * List of available Deepgram models for text-to-speech and speech-to-text\n */\nexport const DEEPGRAM_MODELS = ['aura', 'whisper', 'base', 'enhanced', 'nova', 'nova-2', 'nova-3'] as const;\n\nexport type DeepgramModel = (typeof DEEPGRAM_MODELS)[number];\n","import { PassThrough } from 'node:stream';\n\nimport { createClient } from '@deepgram/sdk';\nimport { MastraVoice } from '@mastra/core/voice';\n\nimport { DEEPGRAM_VOICES } from './voices';\nimport type { DeepgramVoiceId, DeepgramModel } from './voices';\n\ninterface DeepgramVoiceConfig {\n name?: DeepgramModel;\n apiKey?: string;\n properties?: Record<string, any>;\n language?: string;\n}\n\ninterface DeepgramWord {\n word: string;\n start?: number;\n end?: number;\n confidence?: number;\n speaker?: number;\n}\n\nexport class DeepgramVoice extends MastraVoice {\n private speechClient?: ReturnType<typeof createClient>;\n private listeningClient?: ReturnType<typeof createClient>;\n private storedSpeechModel?: { name: DeepgramModel; apiKey?: string };\n private storedListeningModel?: { name: DeepgramModel; apiKey?: string };\n private storedSpeaker?: DeepgramVoiceId;\n\n constructor({\n speechModel,\n listeningModel,\n speaker,\n }: { speechModel?: DeepgramVoiceConfig; listeningModel?: DeepgramVoiceConfig; speaker?: DeepgramVoiceId } = {}) {\n const defaultApiKey = process.env.DEEPGRAM_API_KEY;\n\n const defaultSpeechModel: { name: DeepgramModel; apiKey?: string } = {\n name: 'aura',\n apiKey: defaultApiKey,\n };\n\n const defaultListeningModel: { name: DeepgramModel; apiKey?: string } = {\n name: 'nova',\n apiKey: defaultApiKey,\n };\n\n super({\n speechModel: {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n },\n listeningModel: {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n },\n speaker,\n });\n\n this.storedSpeechModel = {\n name: speechModel?.name ?? defaultSpeechModel.name,\n apiKey: speechModel?.apiKey ?? defaultSpeechModel.apiKey,\n };\n this.storedListeningModel = {\n name: listeningModel?.name ?? defaultListeningModel.name,\n apiKey: listeningModel?.apiKey ?? defaultListeningModel.apiKey,\n };\n\n const speechApiKey = speechModel?.apiKey || defaultApiKey;\n const listeningApiKey = listeningModel?.apiKey || defaultApiKey;\n\n if (!speechApiKey && !listeningApiKey) {\n throw new Error('At least one of DEEPGRAM_API_KEY, speechModel.apiKey, or listeningModel.apiKey must be set');\n }\n\n if (speechApiKey) {\n this.speechClient = createClient(speechApiKey);\n }\n if (listeningApiKey) {\n this.listeningClient = createClient(listeningApiKey);\n }\n\n this.storedSpeaker = speaker || 'asteria-en';\n }\n\n async getSpeakers() {\n return DEEPGRAM_VOICES.map(voice => ({\n voiceId: voice,\n }));\n }\n\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n [key: string]: any;\n },\n ): Promise<NodeJS.ReadableStream> {\n if (!this.speechClient) {\n throw new Error('Deepgram speech client not configured');\n }\n\n let text: string;\n if (typeof input !== 'string') {\n const chunks: Buffer[] = [];\n for await (const chunk of input) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n text = Buffer.concat(chunks).toString('utf-8');\n } else {\n text = input;\n }\n\n if (text.trim().length === 0) {\n throw new Error('Input text is empty');\n }\n\n const baseModel = this.storedSpeechModel?.name;\n const speakerId = options?.speaker || this.storedSpeaker;\n\n const modelName =\n baseModel && speakerId\n ? speakerId.startsWith(`${baseModel}-`)\n ? speakerId\n : `${baseModel}-${speakerId}`\n : baseModel || speakerId;\n\n const speakClient = this.speechClient.speak;\n const response = await speakClient.request(\n { text },\n {\n model: modelName,\n ...Object.fromEntries(Object.entries(options ?? {}).filter(([k]) => k !== 'speaker')),\n },\n );\n\n const webStream = await response.getStream();\n if (!webStream) {\n throw new Error('No stream returned from Deepgram');\n }\n\n const reader = webStream.getReader();\n const nodeStream = new PassThrough();\n\n // Add error handling for the stream processing\n (async () => {\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) {\n nodeStream.end();\n break;\n }\n nodeStream.write(value);\n }\n } catch (error) {\n nodeStream.destroy(error as Error);\n }\n })().catch(error => {\n nodeStream.destroy(error as Error);\n });\n\n return nodeStream;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n /**\n * Transcribes audio with optional speaker diarization.\n *\n * @param audioStream - Audio input stream\n * @param options - Transcription options (diarize, language, etc.)\n * @returns Promise resolving to:\n * - transcript: Full transcript string\n * - words: Array of word objects with timing and confidence\n * - raw: Complete Deepgram API response\n * - speakerSegments: (when diarize=true) Array of {word, speaker, start, end}\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: {\n diarize?: boolean;\n [key: string]: any;\n },\n ): Promise<any> {\n if (!this.listeningClient) {\n throw new Error('Deepgram listening client not configured');\n }\n\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const buffer = Buffer.concat(chunks);\n const { diarize, diarize_speaker_count: _, ...restOptions } = options ?? {};\n const { result, error } = await this.listeningClient.listen.prerecorded.transcribeFile(buffer, {\n ...restOptions,\n model: this.storedListeningModel?.name,\n diarize,\n });\n if (error) {\n throw error;\n }\n\n const channel = result.results?.channels?.[0];\n const alt:\n | {\n transcript?: string;\n words?: DeepgramWord[];\n }\n | undefined = channel?.alternatives?.[0];\n\n if (!alt) {\n return {\n transcript: '',\n words: [],\n raw: result,\n };\n }\n\n const response: any = {\n transcript: alt.transcript,\n words: alt.words,\n raw: result,\n };\n\n if (diarize && alt.words) {\n response.speakerSegments = alt.words.map((w: DeepgramWord) => ({\n word: w.word,\n speaker: w.speaker,\n start: w.start,\n end: w.end,\n }));\n }\n\n return response;\n }\n}\n\nexport type { DeepgramVoiceConfig, DeepgramVoiceId, DeepgramModel };\n"]}
|
package/dist/voices.d.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* List of available Deepgram voice models for text-to-speech
|
|
3
|
+
* Each voice is designed for specific use cases and languages
|
|
4
|
+
* Format: {name}-{language} (e.g. asteria-en)
|
|
5
|
+
*/
|
|
6
|
+
export declare const DEEPGRAM_VOICES: readonly ["asteria-en", "luna-en", "stella-en", "athena-en", "hera-en", "orion-en", "arcas-en", "perseus-en", "angus-en", "orpheus-en", "helios-en", "zeus-en"];
|
|
7
|
+
export type DeepgramVoiceId = (typeof DEEPGRAM_VOICES)[number];
|
|
8
|
+
/**
|
|
9
|
+
* List of available Deepgram models for text-to-speech and speech-to-text
|
|
10
|
+
*/
|
|
11
|
+
export declare const DEEPGRAM_MODELS: readonly ["aura", "whisper", "base", "enhanced", "nova", "nova-2", "nova-3"];
|
|
12
|
+
export type DeepgramModel = (typeof DEEPGRAM_MODELS)[number];
|
|
13
|
+
//# sourceMappingURL=voices.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"voices.d.ts","sourceRoot":"","sources":["../src/voices.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,eAAO,MAAM,eAAe,iKAalB,CAAC;AAEX,MAAM,MAAM,eAAe,GAAG,CAAC,OAAO,eAAe,CAAC,CAAC,MAAM,CAAC,CAAC;AAE/D;;GAEG;AACH,eAAO,MAAM,eAAe,8EAA+E,CAAC;AAE5G,MAAM,MAAM,aAAa,GAAG,CAAC,OAAO,eAAe,CAAC,CAAC,MAAM,CAAC,CAAC"}
|