@mastra/voice-google 0.0.0-vnext-inngest-20250508122351 → 0.0.0-vnext-20251104230439
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1752 -0
- package/LICENSE.md +11 -42
- package/README.md +6 -3
- package/dist/index.cjs +106 -77
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +85 -2
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +106 -77
- package/dist/index.js.map +1 -0
- package/package.json +28 -14
- package/dist/_tsup-dts-rollup.d.cts +0 -73
- package/dist/_tsup-dts-rollup.d.ts +0 -73
- package/dist/index.d.cts +0 -2
package/dist/index.js
CHANGED
|
@@ -4,6 +4,34 @@ import { TextToSpeechClient } from '@google-cloud/text-to-speech';
|
|
|
4
4
|
import { MastraVoice } from '@mastra/core/voice';
|
|
5
5
|
|
|
6
6
|
// src/index.ts
|
|
7
|
+
var resolveAuthConfig = (modelConfig, fallback) => {
|
|
8
|
+
const resolved = {};
|
|
9
|
+
const apiKey = modelConfig?.apiKey ?? fallback.apiKey;
|
|
10
|
+
if (apiKey) {
|
|
11
|
+
resolved.apiKey = apiKey;
|
|
12
|
+
}
|
|
13
|
+
const keyFilename = modelConfig?.keyFilename ?? fallback.keyFilename;
|
|
14
|
+
if (keyFilename) {
|
|
15
|
+
resolved.keyFilename = keyFilename;
|
|
16
|
+
}
|
|
17
|
+
const credentials = modelConfig?.credentials ?? fallback.credentials;
|
|
18
|
+
if (credentials) {
|
|
19
|
+
resolved.credentials = credentials;
|
|
20
|
+
}
|
|
21
|
+
return resolved;
|
|
22
|
+
};
|
|
23
|
+
var buildAuthOptions = (config) => {
|
|
24
|
+
if (config.credentials) {
|
|
25
|
+
return { credentials: config.credentials };
|
|
26
|
+
}
|
|
27
|
+
if (config.keyFilename) {
|
|
28
|
+
return { keyFilename: config.keyFilename };
|
|
29
|
+
}
|
|
30
|
+
if (config.apiKey) {
|
|
31
|
+
return { apiKey: config.apiKey };
|
|
32
|
+
}
|
|
33
|
+
return {};
|
|
34
|
+
};
|
|
7
35
|
var DEFAULT_VOICE = "en-US-Casual-K";
|
|
8
36
|
var GoogleVoice = class extends MastraVoice {
|
|
9
37
|
ttsClient;
|
|
@@ -14,7 +42,6 @@ var GoogleVoice = class extends MastraVoice {
|
|
|
14
42
|
* @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
|
|
15
43
|
* @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
|
|
16
44
|
* @param {string} [config.speaker] - Default voice ID to use for speech synthesis
|
|
17
|
-
* @throws {Error} If no API key is provided via config or environment variable
|
|
18
45
|
*/
|
|
19
46
|
constructor({
|
|
20
47
|
listeningModel,
|
|
@@ -22,43 +49,41 @@ var GoogleVoice = class extends MastraVoice {
|
|
|
22
49
|
speaker
|
|
23
50
|
} = {}) {
|
|
24
51
|
const defaultApiKey = process.env.GOOGLE_API_KEY;
|
|
52
|
+
const defaultKeyFilename = process.env.GOOGLE_APPLICATION_CREDENTIALS;
|
|
25
53
|
const defaultSpeaker = DEFAULT_VOICE;
|
|
54
|
+
const sharedFallback = {
|
|
55
|
+
apiKey: defaultApiKey ?? speechModel?.apiKey ?? listeningModel?.apiKey,
|
|
56
|
+
keyFilename: defaultKeyFilename ?? speechModel?.keyFilename ?? listeningModel?.keyFilename,
|
|
57
|
+
credentials: speechModel?.credentials ?? listeningModel?.credentials
|
|
58
|
+
};
|
|
59
|
+
const speechAuthConfig = resolveAuthConfig(speechModel, sharedFallback);
|
|
60
|
+
const listeningAuthConfig = resolveAuthConfig(listeningModel, sharedFallback);
|
|
26
61
|
super({
|
|
27
62
|
speechModel: {
|
|
28
63
|
name: "",
|
|
29
|
-
apiKey:
|
|
64
|
+
apiKey: speechAuthConfig.apiKey ?? defaultApiKey
|
|
30
65
|
},
|
|
31
66
|
listeningModel: {
|
|
32
67
|
name: "",
|
|
33
|
-
apiKey:
|
|
68
|
+
apiKey: listeningAuthConfig.apiKey ?? defaultApiKey
|
|
34
69
|
},
|
|
35
70
|
speaker: speaker ?? defaultSpeaker
|
|
36
71
|
});
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
);
|
|
42
|
-
}
|
|
43
|
-
this.ttsClient = new TextToSpeechClient({
|
|
44
|
-
apiKey: this.speechModel?.apiKey || defaultApiKey
|
|
45
|
-
});
|
|
46
|
-
this.speechClient = new SpeechClient({
|
|
47
|
-
apiKey: this.listeningModel?.apiKey || defaultApiKey
|
|
48
|
-
});
|
|
72
|
+
const ttsOptions = buildAuthOptions(speechAuthConfig);
|
|
73
|
+
const speechOptions = buildAuthOptions(listeningAuthConfig);
|
|
74
|
+
this.ttsClient = new TextToSpeechClient(ttsOptions);
|
|
75
|
+
this.speechClient = new SpeechClient(speechOptions);
|
|
49
76
|
}
|
|
50
77
|
/**
|
|
51
78
|
* Gets a list of available voices
|
|
52
79
|
* @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
|
|
53
80
|
*/
|
|
54
81
|
async getSpeakers({ languageCode = "en-US" } = {}) {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
}));
|
|
61
|
-
}, "voice.google.getSpeakers")();
|
|
82
|
+
const [response] = await this.ttsClient.listVoices({ languageCode });
|
|
83
|
+
return (response?.voices || []).filter((voice) => voice.name && voice.languageCodes).map((voice) => ({
|
|
84
|
+
voiceId: voice.name,
|
|
85
|
+
languageCodes: voice.languageCodes
|
|
86
|
+
}));
|
|
62
87
|
}
|
|
63
88
|
async streamToString(stream) {
|
|
64
89
|
const chunks = [];
|
|
@@ -81,27 +106,33 @@ var GoogleVoice = class extends MastraVoice {
|
|
|
81
106
|
* @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
|
|
82
107
|
*/
|
|
83
108
|
async speak(input, options) {
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
109
|
+
const text = typeof input === "string" ? input : await this.streamToString(input);
|
|
110
|
+
const request = {
|
|
111
|
+
input: { text },
|
|
112
|
+
voice: {
|
|
113
|
+
name: options?.speaker || this.speaker,
|
|
114
|
+
languageCode: options?.languageCode || options?.speaker?.split("-").slice(0, 2).join("-") || "en-US"
|
|
115
|
+
},
|
|
116
|
+
audioConfig: options?.audioConfig || { audioEncoding: "LINEAR16" }
|
|
117
|
+
};
|
|
118
|
+
const [response] = await this.ttsClient.synthesizeSpeech(request);
|
|
119
|
+
if (!response.audioContent) {
|
|
120
|
+
throw new Error("No audio content returned.");
|
|
121
|
+
}
|
|
122
|
+
if (typeof response.audioContent === "string") {
|
|
123
|
+
throw new Error("Audio content is a string.");
|
|
124
|
+
}
|
|
125
|
+
const stream = new PassThrough();
|
|
126
|
+
stream.end(Buffer.from(response.audioContent));
|
|
127
|
+
return stream;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Checks if listening capabilities are enabled.
|
|
131
|
+
*
|
|
132
|
+
* @returns {Promise<{ enabled: boolean }>}
|
|
133
|
+
*/
|
|
134
|
+
async getListener() {
|
|
135
|
+
return { enabled: true };
|
|
105
136
|
}
|
|
106
137
|
/**
|
|
107
138
|
* Converts speech to text
|
|
@@ -111,44 +142,42 @@ var GoogleVoice = class extends MastraVoice {
|
|
|
111
142
|
* @returns {Promise<string>} Transcribed text
|
|
112
143
|
*/
|
|
113
144
|
async listen(audioStream, options) {
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
chunks.push(chunk);
|
|
121
|
-
}
|
|
145
|
+
const chunks = [];
|
|
146
|
+
for await (const chunk of audioStream) {
|
|
147
|
+
if (typeof chunk === "string") {
|
|
148
|
+
chunks.push(Buffer.from(chunk));
|
|
149
|
+
} else {
|
|
150
|
+
chunks.push(chunk);
|
|
122
151
|
}
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
};
|
|
134
|
-
console.log(`BEFORE REQUEST`);
|
|
135
|
-
const [response] = await this.speechClient.recognize(request);
|
|
136
|
-
console.log(`AFTER REQUEST`);
|
|
137
|
-
if (!response.results || response.results.length === 0) {
|
|
138
|
-
throw new Error("No transcription results returned");
|
|
152
|
+
}
|
|
153
|
+
const buffer = Buffer.concat(chunks);
|
|
154
|
+
let request = {
|
|
155
|
+
config: {
|
|
156
|
+
encoding: "LINEAR16",
|
|
157
|
+
languageCode: "en-US",
|
|
158
|
+
...options?.config
|
|
159
|
+
},
|
|
160
|
+
audio: {
|
|
161
|
+
content: buffer.toString("base64")
|
|
139
162
|
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
if (!
|
|
147
|
-
|
|
163
|
+
};
|
|
164
|
+
const [response] = await this.speechClient.recognize(request);
|
|
165
|
+
if (!response.results || response.results.length === 0) {
|
|
166
|
+
throw new Error("No transcription results returned");
|
|
167
|
+
}
|
|
168
|
+
const transcription = response.results.map((result) => {
|
|
169
|
+
if (!result.alternatives || result.alternatives.length === 0) {
|
|
170
|
+
return "";
|
|
148
171
|
}
|
|
149
|
-
return
|
|
150
|
-
}
|
|
172
|
+
return result.alternatives[0].transcript || "";
|
|
173
|
+
}).filter((text) => text.length > 0).join(" ");
|
|
174
|
+
if (!transcription) {
|
|
175
|
+
throw new Error("No valid transcription found in results");
|
|
176
|
+
}
|
|
177
|
+
return transcription;
|
|
151
178
|
}
|
|
152
179
|
};
|
|
153
180
|
|
|
154
181
|
export { GoogleVoice };
|
|
182
|
+
//# sourceMappingURL=index.js.map
|
|
183
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"names":[],"mappings":";;;;;;AA6BA,IAAM,iBAAA,GAAoB,CAAC,WAAA,EAA4C,QAAA,KAAqC;AAC1G,EAAA,MAAM,WAAuB,EAAC;AAE9B,EAAA,MAAM,MAAA,GAAS,WAAA,EAAa,MAAA,IAAU,QAAA,CAAS,MAAA;AAC/C,EAAA,IAAI,MAAA,EAAQ;AACV,IAAA,QAAA,CAAS,MAAA,GAAS,MAAA;AAAA,EACpB;AAEA,EAAA,MAAM,WAAA,GAAc,WAAA,EAAa,WAAA,IAAe,QAAA,CAAS,WAAA;AACzD,EAAA,IAAI,WAAA,EAAa;AACf,IAAA,QAAA,CAAS,WAAA,GAAc,WAAA;AAAA,EACzB;AAEA,EAAA,MAAM,WAAA,GAAc,WAAA,EAAa,WAAA,IAAe,QAAA,CAAS,WAAA;AACzD,EAAA,IAAI,WAAA,EAAa;AACf,IAAA,QAAA,CAAS,WAAA,GAAc,WAAA;AAAA,EACzB;AAEA,EAAA,OAAO,QAAA;AACT,CAAA;AAEA,IAAM,gBAAA,GAAmB,CAAC,MAAA,KAA4C;AACpE,EAAA,IAAI,OAAO,WAAA,EAAa;AACtB,IAAA,OAAO,EAAE,WAAA,EAAa,MAAA,CAAO,WAAA,EAAY;AAAA,EAC3C;AAEA,EAAA,IAAI,OAAO,WAAA,EAAa;AACtB,IAAA,OAAO,EAAE,WAAA,EAAa,MAAA,CAAO,WAAA,EAAY;AAAA,EAC3C;AAEA,EAAA,IAAI,OAAO,MAAA,EAAQ;AACjB,IAAA,OAAO,EAAE,MAAA,EAAQ,MAAA,CAAO,MAAA,EAAO;AAAA,EACjC;AAEA,EAAA,OAAO,EAAC;AACV,CAAA;AAEA,IAAM,aAAA,GAAgB,gBAAA;AAOf,IAAM,WAAA,GAAN,cAA0B,WAAA,CAAY;AAAA,EACnC,SAAA;AAAA,EACA,YAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASR,WAAA,CAAY;AAAA,IACV,cAAA;AAAA,IACA,WAAA;AAAA,IACA;AAAA,GACF,GAII,EAAC,EAAG;AACN,IAAA,MAAM,aAAA,GAAgB,QAAQ,GAAA,CAAI,cAAA;AAClC,IAAA,MAAM,kBAAA,GAAqB,QAAQ,GAAA,CAAI,8BAAA;AACvC,IAAA,MAAM,cAAA,GAAiB,aAAA;AAEvB,IAAA,MAAM,cAAA,GAA6B;AAAA,MACjC,MAAA,EAAQ,aAAA,IAAiB,WAAA,EAAa,MAAA,IAAU,cAAA,EAAgB,MAAA;AAAA,MAChE,WAAA,EAAa,kBAAA,IAAsB,WAAA,EAAa,WAAA,IAAe,cAAA,EAAgB,WAAA;AAAA,MAC/E,WAAA,EAAa,WAAA,EAAa,WAAA,IAAe,cAAA,EAAgB;AAAA,KAC3D;AAEA,IAAA,MAAM,gBAAA,GAAmB,iBAAA,CAAkB,WAAA,EAAa,cAAc,CAAA;AACtE,IAAA,MAAM,mBAAA,GAAsB,iBAAA,CAAkB,cAAA,EAAgB,cAAc,CAAA;AAE5E,IAAA,KAAA,CAAM;AAAA,MACJ,WAAA,EAAa;AAAA,QACX,IAAA,EAAM,EAAA;AAAA,QACN,MAAA,EAAQ,iBAAiB,MAAA,IAAU;AAAA,OACrC;AAAA,MACA,cAAA,EAAgB;AAAA,QACd,IAAA,EAAM,EAAA;AAAA,QACN,MAAA,EAAQ,oBAAoB,MAAA,IAAU;AAAA,OACxC;AAAA,MACA,SAAS,OAAA,IAAW;AAAA,KACrB,CAAA;AAED,IAAA,MAAM,UAAA,GAAa,iBAAiB,gBAAgB,CAAA;AACpD,IAAA,MAAM,aAAA,GAAgB,iBAAiB,mBAAmB,CAAA;AAE1D,IAAA,IAAA,CAAK,SAAA,GAAY,IAAI,kBAAA,CAAmB,UAAU,CAAA;AAElD,IAAA,IAAA,CAAK,YAAA,GAAe,IAAI,YAAA,CAAa,aAAa,CAAA;AAAA,EACpD;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,WAAA,CAAY,EAAE,eAAe,OAAA,EAAQ,GAA+B,EAAC,EAAG;AAC5E,IAAA,MAAM,CAAC,QAAQ,CAAA,GAAI,MAAM,KAAK,SAAA,CAAU,UAAA,CAAW,EAAE,YAAA,EAA4B,CAAA;AACjF,IAAA,OAAA,CAAQ,QAAA,EAAU,MAAA,IAAU,EAAC,EAC1B,MAAA,CAAO,CAAA,KAAA,KAAS,KAAA,CAAM,IAAA,IAAQ,KAAA,CAAM,aAAa,CAAA,CACjD,GAAA,CAAI,CAAA,KAAA,MAAU;AAAA,MACb,SAAS,KAAA,CAAM,IAAA;AAAA,MACf,eAAe,KAAA,CAAM;AAAA,KACvB,CAAE,CAAA;AAAA,EACN;AAAA,EAEA,MAAc,eAAe,MAAA,EAAgD;AAC3E,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,MAAA,EAAQ;AAChC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,OAAO,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA,CAAE,SAAS,OAAO,CAAA;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,KAAA,CACJ,KAAA,EACA,OAAA,EAKgC;AAChC,IAAA,MAAM,IAAA,GAAO,OAAO,KAAA,KAAU,QAAA,GAAW,QAAQ,MAAM,IAAA,CAAK,eAAe,KAAK,CAAA;AAEhF,IAAA,MAAM,OAAA,GAA4E;AAAA,MAChF,KAAA,EAAO,EAAE,IAAA,EAAK;AAAA,MACd,KAAA,EAAO;AAAA,QACL,IAAA,EAAM,OAAA,EAAS,OAAA,IAAW,IAAA,CAAK,OAAA;AAAA,QAC/B,YAAA,EAAc,OAAA,EAAS,YAAA,IAAgB,OAAA,EAAS,SAAS,KAAA,CAAM,GAAG,CAAA,CAAE,KAAA,CAAM,CAAA,EAAG,CAAC,CAAA,CAAE,IAAA,CAAK,GAAG,CAAA,IAAK;AAAA,OAC/F;AAAA,MACA,WAAA,EAAa,OAAA,EAAS,WAAA,IAAe,EAAE,eAAe,UAAA;AAAW,KACnE;AAEA,IAAA,MAAM,CAAC,QAAQ,CAAA,GAAI,MAAM,IAAA,CAAK,SAAA,CAAU,iBAAiB,OAAO,CAAA;AAEhE,IAAA,IAAI,CAAC,SAAS,YAAA,EAAc;AAC1B,MAAA,MAAM,IAAI,MAAM,4BAA4B,CAAA;AAAA,IAC9C;AAEA,IAAA,IAAI,OAAO,QAAA,CAAS,YAAA,KAAiB,QAAA,EAAU;AAC7C,MAAA,MAAM,IAAI,MAAM,4BAA4B,CAAA;AAAA,IAC9C;AAEA,IAAA,MAAM,MAAA,GAAS,IAAI,WAAA,EAAY;AAC/B,IAAA,MAAA,CAAO,GAAA,CAAI,MAAA,CAAO,IAAA,CAAK,QAAA,CAAS,YAAY,CAAC,CAAA;AAC7C,IAAA,OAAO,MAAA;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,WAAA,GAAc;AAClB,IAAA,OAAO,EAAE,SAAS,IAAA,EAAK;AAAA,EACzB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MAAA,CACJ,WAAA,EACA,OAAA,EACiB;AACjB,IAAA,MAAM,SAAmB,EAAC;AAC1B,IAAA,WAAA,MAAiB,SAAS,WAAA,EAAa;AACrC,MAAA,IAAI,OAAO,UAAU,QAAA,EAAU;AAC7B,QAAA,MAAA,CAAO,IAAA,CAAK,MAAA,CAAO,IAAA,CAAK,KAAK,CAAC,CAAA;AAAA,MAChC,CAAA,MAAO;AACL,QAAA,MAAA,CAAO,KAAK,KAAK,CAAA;AAAA,MACnB;AAAA,IACF;AACA,IAAA,MAAM,MAAA,GAAS,MAAA,CAAO,MAAA,CAAO,MAAM,CAAA;AAEnC,IAAA,IAAI,OAAA,GAAU;AAAA,MACZ,MAAA,EAAQ;AAAA,QACN,QAAA,EAAU,UAAA;AAAA,QACV,YAAA,EAAc,OAAA;AAAA,QACd,GAAG,OAAA,EAAS;AAAA,OACd;AAAA,MACA,KAAA,EAAO;AAAA,QACL,OAAA,EAAS,MAAA,CAAO,QAAA,CAAS,QAAQ;AAAA;AACnC,KACF;AACA,IAAA,MAAM,CAAC,QAAQ,CAAA,GAAI,MAAM,IAAA,CAAK,YAAA,CAAa,UAAU,OAAwD,CAAA;AAE7G,IAAA,IAAI,CAAC,QAAA,CAAS,OAAA,IAAW,QAAA,CAAS,OAAA,CAAQ,WAAW,CAAA,EAAG;AACtD,MAAA,MAAM,IAAI,MAAM,mCAAmC,CAAA;AAAA,IACrD;AAEA,IAAA,MAAM,aAAA,GAAgB,QAAA,CAAS,OAAA,CAC5B,GAAA,CAAI,CAAC,MAAA,KAAgB;AACpB,MAAA,IAAI,CAAC,MAAA,CAAO,YAAA,IAAgB,MAAA,CAAO,YAAA,CAAa,WAAW,CAAA,EAAG;AAC5D,QAAA,OAAO,EAAA;AAAA,MACT;AACA,MAAA,OAAO,MAAA,CAAO,YAAA,CAAa,CAAC,CAAA,CAAE,UAAA,IAAc,EAAA;AAAA,IAC9C,CAAC,CAAA,CACA,MAAA,CAAO,CAAC,IAAA,KAAiB,KAAK,MAAA,GAAS,CAAC,CAAA,CACxC,IAAA,CAAK,GAAG,CAAA;AAEX,IAAA,IAAI,CAAC,aAAA,EAAe;AAClB,MAAA,MAAM,IAAI,MAAM,yCAAyC,CAAA;AAAA,IAC3D;AAEA,IAAA,OAAO,aAAA;AAAA,EACT;AACF","file":"index.js","sourcesContent":["import { PassThrough } from 'stream';\n\nimport { SpeechClient } from '@google-cloud/speech';\nimport type { google as SpeechTypes } from '@google-cloud/speech/build/protos/protos';\nimport { TextToSpeechClient } from '@google-cloud/text-to-speech';\nimport type { google as TextToSpeechTypes } from '@google-cloud/text-to-speech/build/protos/protos';\nimport { MastraVoice } from '@mastra/core/voice';\n\n/**\n * Configuration for Google Cloud Voice models\n * @interface GoogleModelConfig\n * @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable\n * @property {string} [keyFilename] - Optional path to a service account key file. If not provided, will use GOOGLE_APPLICATION_CREDENTIALS environment variable\n * @property {{ client_email?: string; private_key?: string }} [credentials] - Optional in-memory service account credentials\n */\nexport interface GoogleModelConfig {\n apiKey?: string;\n keyFilename?: string;\n credentials?: {\n client_email?: string;\n private_key?: string;\n [key: string]: unknown;\n };\n}\n\ntype AuthConfig = Pick<GoogleModelConfig, 'apiKey' | 'keyFilename' | 'credentials'>;\n\ntype GoogleClientOptions = AuthConfig;\n\nconst resolveAuthConfig = (modelConfig: GoogleModelConfig | undefined, fallback: AuthConfig): AuthConfig => {\n const resolved: AuthConfig = {};\n\n const apiKey = modelConfig?.apiKey ?? fallback.apiKey;\n if (apiKey) {\n resolved.apiKey = apiKey;\n }\n\n const keyFilename = modelConfig?.keyFilename ?? fallback.keyFilename;\n if (keyFilename) {\n resolved.keyFilename = keyFilename;\n }\n\n const credentials = modelConfig?.credentials ?? fallback.credentials;\n if (credentials) {\n resolved.credentials = credentials;\n }\n\n return resolved;\n};\n\nconst buildAuthOptions = (config: AuthConfig): GoogleClientOptions => {\n if (config.credentials) {\n return { credentials: config.credentials };\n }\n\n if (config.keyFilename) {\n return { keyFilename: config.keyFilename };\n }\n\n if (config.apiKey) {\n return { apiKey: config.apiKey };\n }\n\n return {};\n};\n\nconst DEFAULT_VOICE = 'en-US-Casual-K';\n\n/**\n * GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services\n * @class GoogleVoice\n * @extends MastraVoice\n */\nexport class GoogleVoice extends MastraVoice {\n private ttsClient: TextToSpeechClient;\n private speechClient: SpeechClient;\n\n /**\n * Creates an instance of GoogleVoice\n * @param {Object} config - Configuration options\n * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis\n * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition\n * @param {string} [config.speaker] - Default voice ID to use for speech synthesis\n */\n constructor({\n listeningModel,\n speechModel,\n speaker,\n }: {\n listeningModel?: GoogleModelConfig;\n speechModel?: GoogleModelConfig;\n speaker?: string;\n } = {}) {\n const defaultApiKey = process.env.GOOGLE_API_KEY;\n const defaultKeyFilename = process.env.GOOGLE_APPLICATION_CREDENTIALS;\n const defaultSpeaker = DEFAULT_VOICE;\n\n const sharedFallback: AuthConfig = {\n apiKey: defaultApiKey ?? speechModel?.apiKey ?? listeningModel?.apiKey,\n keyFilename: defaultKeyFilename ?? speechModel?.keyFilename ?? listeningModel?.keyFilename,\n credentials: speechModel?.credentials ?? listeningModel?.credentials,\n };\n\n const speechAuthConfig = resolveAuthConfig(speechModel, sharedFallback);\n const listeningAuthConfig = resolveAuthConfig(listeningModel, sharedFallback);\n\n super({\n speechModel: {\n name: '',\n apiKey: speechAuthConfig.apiKey ?? defaultApiKey,\n },\n listeningModel: {\n name: '',\n apiKey: listeningAuthConfig.apiKey ?? defaultApiKey,\n },\n speaker: speaker ?? defaultSpeaker,\n });\n\n const ttsOptions = buildAuthOptions(speechAuthConfig);\n const speechOptions = buildAuthOptions(listeningAuthConfig);\n\n this.ttsClient = new TextToSpeechClient(ttsOptions);\n\n this.speechClient = new SpeechClient(speechOptions);\n }\n\n /**\n * Gets a list of available voices\n * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.\n */\n async getSpeakers({ languageCode = 'en-US' }: { languageCode?: string } = {}) {\n const [response] = await this.ttsClient.listVoices({ languageCode: languageCode });\n return (response?.voices || [])\n .filter(voice => voice.name && voice.languageCodes)\n .map(voice => ({\n voiceId: voice.name!,\n languageCodes: voice.languageCodes!,\n }));\n }\n\n private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of stream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n return Buffer.concat(chunks).toString('utf-8');\n }\n\n /**\n * Converts text to speech\n * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech\n * @param {Object} [options] - Speech synthesis options\n * @param {string} [options.speaker] - Voice ID to use\n * @param {string} [options.languageCode] - Language code for the voice\n * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options\n * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.\n */\n async speak(\n input: string | NodeJS.ReadableStream,\n options?: {\n speaker?: string;\n languageCode?: string;\n audioConfig?: TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];\n },\n ): Promise<NodeJS.ReadableStream> {\n const text = typeof input === 'string' ? input : await this.streamToString(input);\n\n const request: TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest = {\n input: { text },\n voice: {\n name: options?.speaker || this.speaker,\n languageCode: options?.languageCode || options?.speaker?.split('-').slice(0, 2).join('-') || 'en-US',\n },\n audioConfig: options?.audioConfig || { audioEncoding: 'LINEAR16' },\n };\n\n const [response] = await this.ttsClient.synthesizeSpeech(request);\n\n if (!response.audioContent) {\n throw new Error('No audio content returned.');\n }\n\n if (typeof response.audioContent === 'string') {\n throw new Error('Audio content is a string.');\n }\n\n const stream = new PassThrough();\n stream.end(Buffer.from(response.audioContent));\n return stream;\n }\n\n /**\n * Checks if listening capabilities are enabled.\n *\n * @returns {Promise<{ enabled: boolean }>}\n */\n async getListener() {\n return { enabled: true };\n }\n\n /**\n * Converts speech to text\n * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.\n * @param {Object} [options] - Recognition options\n * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration\n * @returns {Promise<string>} Transcribed text\n */\n async listen(\n audioStream: NodeJS.ReadableStream,\n options?: { stream?: boolean; config?: SpeechTypes.cloud.speech.v1.IRecognitionConfig },\n ): Promise<string> {\n const chunks: Buffer[] = [];\n for await (const chunk of audioStream) {\n if (typeof chunk === 'string') {\n chunks.push(Buffer.from(chunk));\n } else {\n chunks.push(chunk);\n }\n }\n const buffer = Buffer.concat(chunks);\n\n let request = {\n config: {\n encoding: 'LINEAR16',\n languageCode: 'en-US',\n ...options?.config,\n },\n audio: {\n content: buffer.toString('base64'),\n },\n };\n const [response] = await this.speechClient.recognize(request as SpeechTypes.cloud.speech.v1.IRecognizeRequest);\n\n if (!response.results || response.results.length === 0) {\n throw new Error('No transcription results returned');\n }\n\n const transcription = response.results\n .map((result: any) => {\n if (!result.alternatives || result.alternatives.length === 0) {\n return '';\n }\n return result.alternatives[0].transcript || '';\n })\n .filter((text: string) => text.length > 0)\n .join(' ');\n\n if (!transcription) {\n throw new Error('No valid transcription found in results');\n }\n\n return transcription;\n }\n}\n"]}
|
package/package.json
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-google",
|
|
3
|
-
"version": "0.0.0-vnext-
|
|
3
|
+
"version": "0.0.0-vnext-20251104230439",
|
|
4
4
|
"description": "Mastra Google voice integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"files": [
|
|
7
|
-
"dist"
|
|
7
|
+
"dist",
|
|
8
|
+
"CHANGELOG.md"
|
|
8
9
|
],
|
|
9
10
|
"main": "dist/index.js",
|
|
10
11
|
"types": "dist/index.d.ts",
|
|
@@ -15,30 +16,43 @@
|
|
|
15
16
|
"default": "./dist/index.js"
|
|
16
17
|
},
|
|
17
18
|
"require": {
|
|
18
|
-
"types": "./dist/index.d.
|
|
19
|
+
"types": "./dist/index.d.ts",
|
|
19
20
|
"default": "./dist/index.cjs"
|
|
20
21
|
}
|
|
21
22
|
},
|
|
22
23
|
"./package.json": "./package.json"
|
|
23
24
|
},
|
|
24
|
-
"license": "
|
|
25
|
+
"license": "Apache-2.0",
|
|
25
26
|
"dependencies": {
|
|
26
27
|
"@google-cloud/speech": "^6.7.1",
|
|
27
|
-
"@google-cloud/text-to-speech": "^6.
|
|
28
|
-
"zod": "^3.24.3",
|
|
29
|
-
"@mastra/core": "0.0.0-vnext-inngest-20250508122351"
|
|
28
|
+
"@google-cloud/text-to-speech": "^6.3.1"
|
|
30
29
|
},
|
|
31
30
|
"devDependencies": {
|
|
32
|
-
"@types/node": "^20.
|
|
33
|
-
"eslint": "^9.
|
|
34
|
-
"tsup": "^8.
|
|
31
|
+
"@types/node": "^20.19.0",
|
|
32
|
+
"eslint": "^9.37.0",
|
|
33
|
+
"tsup": "^8.5.0",
|
|
35
34
|
"typescript": "^5.8.3",
|
|
36
|
-
"vitest": "^2.
|
|
37
|
-
"@internal/lint": "0.0.0-vnext-
|
|
35
|
+
"vitest": "^3.2.4",
|
|
36
|
+
"@internal/lint": "0.0.0-vnext-20251104230439",
|
|
37
|
+
"@internal/types-builder": "0.0.0-vnext-20251104230439",
|
|
38
|
+
"@mastra/core": "0.0.0-vnext-20251104230439"
|
|
39
|
+
},
|
|
40
|
+
"peerDependencies": {
|
|
41
|
+
"zod": "^3.25.0 || ^4.0.0",
|
|
42
|
+
"@mastra/core": "0.0.0-vnext-20251104230439"
|
|
43
|
+
},
|
|
44
|
+
"homepage": "https://mastra.ai",
|
|
45
|
+
"repository": {
|
|
46
|
+
"type": "git",
|
|
47
|
+
"url": "git+https://github.com/mastra-ai/mastra.git",
|
|
48
|
+
"directory": "voice/google"
|
|
49
|
+
},
|
|
50
|
+
"bugs": {
|
|
51
|
+
"url": "https://github.com/mastra-ai/mastra/issues"
|
|
38
52
|
},
|
|
39
53
|
"scripts": {
|
|
40
|
-
"build": "tsup
|
|
41
|
-
"build:watch": "
|
|
54
|
+
"build": "tsup --silent --config tsup.config.ts",
|
|
55
|
+
"build:watch": "tsup --watch --silent --config tsup.config.ts",
|
|
42
56
|
"test": "vitest run",
|
|
43
57
|
"lint": "eslint ."
|
|
44
58
|
}
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
import type { google } from '@google-cloud/text-to-speech/build/protos/protos';
|
|
2
|
-
import type { google as google_2 } from '@google-cloud/speech/build/protos/protos';
|
|
3
|
-
import { MastraVoice } from '@mastra/core/voice';
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Configuration for Google Cloud Voice models
|
|
7
|
-
* @interface GoogleModelConfig
|
|
8
|
-
* @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable
|
|
9
|
-
*/
|
|
10
|
-
export declare interface GoogleModelConfig {
|
|
11
|
-
apiKey?: string;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services
|
|
16
|
-
* @class GoogleVoice
|
|
17
|
-
* @extends MastraVoice
|
|
18
|
-
*/
|
|
19
|
-
export declare class GoogleVoice extends MastraVoice {
|
|
20
|
-
private ttsClient;
|
|
21
|
-
private speechClient;
|
|
22
|
-
/**
|
|
23
|
-
* Creates an instance of GoogleVoice
|
|
24
|
-
* @param {Object} config - Configuration options
|
|
25
|
-
* @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
|
|
26
|
-
* @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
|
|
27
|
-
* @param {string} [config.speaker] - Default voice ID to use for speech synthesis
|
|
28
|
-
* @throws {Error} If no API key is provided via config or environment variable
|
|
29
|
-
*/
|
|
30
|
-
constructor({ listeningModel, speechModel, speaker, }?: {
|
|
31
|
-
listeningModel?: GoogleModelConfig;
|
|
32
|
-
speechModel?: GoogleModelConfig;
|
|
33
|
-
speaker?: string;
|
|
34
|
-
});
|
|
35
|
-
/**
|
|
36
|
-
* Gets a list of available voices
|
|
37
|
-
* @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
|
|
38
|
-
*/
|
|
39
|
-
getSpeakers({ languageCode }?: {
|
|
40
|
-
languageCode?: string;
|
|
41
|
-
}): Promise<{
|
|
42
|
-
voiceId: string;
|
|
43
|
-
languageCodes: string[];
|
|
44
|
-
}[]>;
|
|
45
|
-
private streamToString;
|
|
46
|
-
/**
|
|
47
|
-
* Converts text to speech
|
|
48
|
-
* @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
|
|
49
|
-
* @param {Object} [options] - Speech synthesis options
|
|
50
|
-
* @param {string} [options.speaker] - Voice ID to use
|
|
51
|
-
* @param {string} [options.languageCode] - Language code for the voice
|
|
52
|
-
* @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
|
|
53
|
-
* @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
|
|
54
|
-
*/
|
|
55
|
-
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
56
|
-
speaker?: string;
|
|
57
|
-
languageCode?: string;
|
|
58
|
-
audioConfig?: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];
|
|
59
|
-
}): Promise<NodeJS.ReadableStream>;
|
|
60
|
-
/**
|
|
61
|
-
* Converts speech to text
|
|
62
|
-
* @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
|
|
63
|
-
* @param {Object} [options] - Recognition options
|
|
64
|
-
* @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
|
|
65
|
-
* @returns {Promise<string>} Transcribed text
|
|
66
|
-
*/
|
|
67
|
-
listen(audioStream: NodeJS.ReadableStream, options?: {
|
|
68
|
-
stream?: boolean;
|
|
69
|
-
config?: google_2.cloud.speech.v1.IRecognitionConfig;
|
|
70
|
-
}): Promise<string>;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
export { }
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
import type { google } from '@google-cloud/text-to-speech/build/protos/protos';
|
|
2
|
-
import type { google as google_2 } from '@google-cloud/speech/build/protos/protos';
|
|
3
|
-
import { MastraVoice } from '@mastra/core/voice';
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Configuration for Google Cloud Voice models
|
|
7
|
-
* @interface GoogleModelConfig
|
|
8
|
-
* @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable
|
|
9
|
-
*/
|
|
10
|
-
export declare interface GoogleModelConfig {
|
|
11
|
-
apiKey?: string;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services
|
|
16
|
-
* @class GoogleVoice
|
|
17
|
-
* @extends MastraVoice
|
|
18
|
-
*/
|
|
19
|
-
export declare class GoogleVoice extends MastraVoice {
|
|
20
|
-
private ttsClient;
|
|
21
|
-
private speechClient;
|
|
22
|
-
/**
|
|
23
|
-
* Creates an instance of GoogleVoice
|
|
24
|
-
* @param {Object} config - Configuration options
|
|
25
|
-
* @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
|
|
26
|
-
* @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
|
|
27
|
-
* @param {string} [config.speaker] - Default voice ID to use for speech synthesis
|
|
28
|
-
* @throws {Error} If no API key is provided via config or environment variable
|
|
29
|
-
*/
|
|
30
|
-
constructor({ listeningModel, speechModel, speaker, }?: {
|
|
31
|
-
listeningModel?: GoogleModelConfig;
|
|
32
|
-
speechModel?: GoogleModelConfig;
|
|
33
|
-
speaker?: string;
|
|
34
|
-
});
|
|
35
|
-
/**
|
|
36
|
-
* Gets a list of available voices
|
|
37
|
-
* @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
|
|
38
|
-
*/
|
|
39
|
-
getSpeakers({ languageCode }?: {
|
|
40
|
-
languageCode?: string;
|
|
41
|
-
}): Promise<{
|
|
42
|
-
voiceId: string;
|
|
43
|
-
languageCodes: string[];
|
|
44
|
-
}[]>;
|
|
45
|
-
private streamToString;
|
|
46
|
-
/**
|
|
47
|
-
* Converts text to speech
|
|
48
|
-
* @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
|
|
49
|
-
* @param {Object} [options] - Speech synthesis options
|
|
50
|
-
* @param {string} [options.speaker] - Voice ID to use
|
|
51
|
-
* @param {string} [options.languageCode] - Language code for the voice
|
|
52
|
-
* @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
|
|
53
|
-
* @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
|
|
54
|
-
*/
|
|
55
|
-
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
56
|
-
speaker?: string;
|
|
57
|
-
languageCode?: string;
|
|
58
|
-
audioConfig?: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];
|
|
59
|
-
}): Promise<NodeJS.ReadableStream>;
|
|
60
|
-
/**
|
|
61
|
-
* Converts speech to text
|
|
62
|
-
* @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
|
|
63
|
-
* @param {Object} [options] - Recognition options
|
|
64
|
-
* @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
|
|
65
|
-
* @returns {Promise<string>} Transcribed text
|
|
66
|
-
*/
|
|
67
|
-
listen(audioStream: NodeJS.ReadableStream, options?: {
|
|
68
|
-
stream?: boolean;
|
|
69
|
-
config?: google_2.cloud.speech.v1.IRecognitionConfig;
|
|
70
|
-
}): Promise<string>;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
export { }
|
package/dist/index.d.cts
DELETED