@mastra/voice-google 0.0.0-commonjs-20250227130920

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+
2
+ > @mastra/voice-google@0.1.1-alpha.0 build C:\Users\Ward\projects\mastra\mastra\voice\google
3
+ > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
4
+
5
+ CLI Building entry: src/index.ts
6
+ CLI Using tsconfig: tsconfig.json
7
+ CLI tsup v8.3.6
8
+ TSC Build start
9
+ TSC ⚡️ Build success in 2797ms
10
+ DTS Build start
11
+ CLI Target: es2022
12
+ Analysis will use the bundled TypeScript version 5.7.3
13
+ Writing package typings: C:\Users\Ward\projects\mastra\mastra\voice\google\dist\_tsup-dts-rollup.d.ts
14
+ Analysis will use the bundled TypeScript version 5.7.3
15
+ Writing package typings: C:\Users\Ward\projects\mastra\mastra\voice\google\dist\_tsup-dts-rollup.d.cts
16
+ DTS ⚡️ Build success in 5320ms
17
+ CLI Cleaning output folder
18
+ ESM Build start
19
+ CJS Build start
20
+ ESM dist\index.js 5.45 KB
21
+ ESM ⚡️ Build success in 141ms
22
+ CJS dist\index.cjs 5.48 KB
23
+ CJS ⚡️ Build success in 142ms
package/CHANGELOG.md ADDED
@@ -0,0 +1,83 @@
1
+ # @mastra/voice-google
2
+
3
+ ## 0.0.0-commonjs-20250227130920
4
+
5
+ ### Patch Changes
6
+
7
+ - 4a712fc: Add support for commonjs
8
+ - Updated dependencies [ed55f1d]
9
+ - Updated dependencies [06aa827]
10
+ - Updated dependencies [8d13b14]
11
+ - Updated dependencies [4a712fc]
12
+ - Updated dependencies [108793c]
13
+ - Updated dependencies [5f28f44]
14
+ - @mastra/core@0.0.0-commonjs-20250227130920
15
+
16
+ ## 0.1.1-alpha.0
17
+
18
+ ### Patch Changes
19
+
20
+ - Updated dependencies [06aa827]
21
+ - @mastra/core@0.4.3-alpha.0
22
+
23
+ ## 0.1.0
24
+
25
+ ### Patch Changes
26
+
27
+ - 5e0f727: deprecate @mastra/speech-google for @mastra/voice-google
28
+ - Updated dependencies [7fceae1]
29
+ - Updated dependencies [8d94c3e]
30
+ - Updated dependencies [99dcdb5]
31
+ - Updated dependencies [6cb63e0]
32
+ - Updated dependencies [f626fbb]
33
+ - Updated dependencies [e752340]
34
+ - Updated dependencies [eb91535]
35
+ - @mastra/core@0.4.2
36
+
37
+ ## 0.1.0-alpha.4
38
+
39
+ ### Patch Changes
40
+
41
+ - Updated dependencies [8d94c3e]
42
+ - Updated dependencies [99dcdb5]
43
+ - Updated dependencies [e752340]
44
+ - Updated dependencies [eb91535]
45
+ - @mastra/core@0.4.2-alpha.2
46
+
47
+ ## 0.1.0-alpha.3
48
+
49
+ ### Patch Changes
50
+
51
+ - Updated dependencies [6cb63e0]
52
+ - @mastra/core@0.4.2-alpha.1
53
+
54
+ ## 0.1.0-alpha.2
55
+
56
+ ### Patch Changes
57
+
58
+ - 5e0f727: deprecate @mastra/speech-google for @mastra/voice-google
59
+
60
+ ## 0.1.0
61
+
62
+ ### Minor Changes
63
+
64
+ - Initial release of @mastra/voice-google
65
+ - Combines functionality from deprecated @mastra/speech-google
66
+ - Adds Speech-to-Text capabilities
67
+ - Implements new MastraVoice interface from @mastra/core
68
+
69
+ ### Notes
70
+
71
+ This package replaces @mastra/speech-google, which reached version 0.1.3-alpha.1. Key features from the previous package:
72
+
73
+ - Neural Text-to-Speech synthesis
74
+ - Multiple voice options
75
+ - Streaming support
76
+ - Integration with Google Cloud services
77
+
78
+ The new package adds:
79
+
80
+ - Speech-to-Text recognition
81
+ - Combined speech and listening models
82
+ - Improved voice management
83
+ - Better type safety and error handling
package/LICENSE ADDED
@@ -0,0 +1,44 @@
1
+ Elastic License 2.0 (ELv2)
2
+
3
+ **Acceptance**
4
+ By using the software, you agree to all of the terms and conditions below.
5
+
6
+ **Copyright License**
7
+ The licensor grants you a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable license to use, copy, distribute, make available, and prepare derivative works of the software, in each case subject to the limitations and conditions below
8
+
9
+ **Limitations**
10
+ You may not provide the software to third parties as a hosted or managed service, where the service provides users with access to any substantial set of the features or functionality of the software.
11
+
12
+ You may not move, change, disable, or circumvent the license key functionality in the software, and you may not remove or obscure any functionality in the software that is protected by the license key.
13
+
14
+ You may not alter, remove, or obscure any licensing, copyright, or other notices of the licensor in the software. Any use of the licensor’s trademarks is subject to applicable law.
15
+
16
+ **Patents**
17
+ The licensor grants you a license, under any patent claims the licensor can license, or becomes able to license, to make, have made, use, sell, offer for sale, import and have imported the software, in each case subject to the limitations and conditions in this license. This license does not cover any patent claims that you cause to be infringed by modifications or additions to the software. If you or your company make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
18
+
19
+ **Notices**
20
+ You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms.
21
+
22
+ If you modify the software, you must include in any modified copies of the software prominent notices stating that you have modified the software.
23
+
24
+ **No Other Rights**
25
+ These terms do not imply any licenses other than those expressly granted in these terms.
26
+
27
+ **Termination**
28
+ If you use the software in violation of these terms, such use is not licensed, and your licenses will automatically terminate. If the licensor provides you with a notice of your violation, and you cease all violation of this license no later than 30 days after you receive that notice, your licenses will be reinstated retroactively. However, if you violate these terms after such reinstatement, any additional violation of these terms will cause your licenses to terminate automatically and permanently.
29
+
30
+ **No Liability**
31
+ As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.
32
+
33
+ **Definitions**
34
+ The _licensor_ is the entity offering these terms, and the _software_ is the software the licensor makes available under these terms, including any portion of it.
35
+
36
+ _you_ refers to the individual or entity agreeing to these terms.
37
+
38
+ _your company_ is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. _control_ means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
39
+
40
+ _your licenses_ are all the licenses granted to you for the software under these terms.
41
+
42
+ _use_ means anything you do with the software requiring one of your licenses.
43
+
44
+ _trademark_ means trademarks, service marks, and similar rights.
package/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # @mastra/voice-google
2
+
3
+ Google Cloud Voice integration for Mastra, providing both Text-to-Speech (TTS) and Speech-to-Text capabilities.
4
+
5
+ > Note: This package replaces the deprecated @mastra/speech-google package, combining both speech synthesis and recognition capabilities.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install @mastra/voice-google
11
+ ```
12
+
13
+ ## Configuration
14
+
15
+ The module requires the following environment variable:
16
+
17
+ ```bash
18
+ GOOGLE_API_KEY=your_api_key
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ ```typescript
24
+ import { GoogleVoice } from '@mastra/voice-google';
25
+
26
+ // Initialize with configuration
27
+ const voice = new GoogleVoice({
28
+ speechModel: {
29
+ apiKey: 'your-api-key', // Optional, can use GOOGLE_API_KEY env var
30
+ },
31
+ listeningModel: {
32
+ apiKey: 'your-api-key', // Optional, can use GOOGLE_API_KEY env var
33
+ },
34
+ speaker: 'en-US-Standard-F', // Default voice
35
+ });
36
+
37
+ // List available voices
38
+ const voices = await voice.getSpeakers();
39
+
40
+ // Generate speech
41
+ const audioStream = await voice.speak('Hello from Mastra!', {
42
+ speaker: 'en-US-Standard-F',
43
+ languageCode: 'en-US',
44
+ });
45
+
46
+ // Transcribe speech
47
+ const text = await voice.listen(audioStream);
48
+ ```
49
+
50
+ ## Features
51
+
52
+ - Neural Text-to-Speech synthesis
53
+ - Speech-to-Text recognition
54
+ - Multiple voice options across different languages
55
+ - Streaming support for both speech and transcription
56
+ - High-quality audio processing
57
+ - Natural-sounding voice synthesis
58
+
59
+ ## Voice Options
60
+
61
+ View the complete list using the `getSpeakers()` method or [Google Cloud's documentation](https://cloud.google.com/text-to-speech/docs/voices).
@@ -0,0 +1,73 @@
1
+ import type { google } from '@google-cloud/text-to-speech/build/protos/protos';
2
+ import type { google as google_2 } from '@google-cloud/speech/build/protos/protos';
3
+ import { MastraVoice } from '@mastra/core/voice';
4
+
5
+ /**
6
+ * Configuration for Google Cloud Voice models
7
+ * @interface GoogleModelConfig
8
+ * @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable
9
+ */
10
+ export declare interface GoogleModelConfig {
11
+ apiKey?: string;
12
+ }
13
+
14
+ /**
15
+ * GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services
16
+ * @class GoogleVoice
17
+ * @extends MastraVoice
18
+ */
19
+ export declare class GoogleVoice extends MastraVoice {
20
+ private ttsClient;
21
+ private speechClient;
22
+ /**
23
+ * Creates an instance of GoogleVoice
24
+ * @param {Object} config - Configuration options
25
+ * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
26
+ * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
27
+ * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
28
+ * @throws {Error} If no API key is provided via config or environment variable
29
+ */
30
+ constructor({ listeningModel, speechModel, speaker, }?: {
31
+ listeningModel?: GoogleModelConfig;
32
+ speechModel?: GoogleModelConfig;
33
+ speaker?: string;
34
+ });
35
+ /**
36
+ * Gets a list of available voices
37
+ * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
38
+ */
39
+ getSpeakers({ languageCode }?: {
40
+ languageCode?: string;
41
+ }): Promise<{
42
+ voiceId: string;
43
+ languageCodes: string[];
44
+ }[]>;
45
+ private streamToString;
46
+ /**
47
+ * Converts text to speech
48
+ * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
49
+ * @param {Object} [options] - Speech synthesis options
50
+ * @param {string} [options.speaker] - Voice ID to use
51
+ * @param {string} [options.languageCode] - Language code for the voice
52
+ * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
53
+ * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
54
+ */
55
+ speak(input: string | NodeJS.ReadableStream, options?: {
56
+ speaker?: string;
57
+ languageCode?: string;
58
+ audioConfig?: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];
59
+ }): Promise<NodeJS.ReadableStream>;
60
+ /**
61
+ * Converts speech to text
62
+ * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
63
+ * @param {Object} [options] - Recognition options
64
+ * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
65
+ * @returns {Promise<string>} Transcribed text
66
+ */
67
+ listen(audioStream: NodeJS.ReadableStream, options?: {
68
+ stream?: boolean;
69
+ config?: google_2.cloud.speech.v1.IRecognitionConfig;
70
+ }): Promise<string>;
71
+ }
72
+
73
+ export { }
@@ -0,0 +1,73 @@
1
+ import type { google } from '@google-cloud/text-to-speech/build/protos/protos';
2
+ import type { google as google_2 } from '@google-cloud/speech/build/protos/protos';
3
+ import { MastraVoice } from '@mastra/core/voice';
4
+
5
+ /**
6
+ * Configuration for Google Cloud Voice models
7
+ * @interface GoogleModelConfig
8
+ * @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable
9
+ */
10
+ export declare interface GoogleModelConfig {
11
+ apiKey?: string;
12
+ }
13
+
14
+ /**
15
+ * GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services
16
+ * @class GoogleVoice
17
+ * @extends MastraVoice
18
+ */
19
+ export declare class GoogleVoice extends MastraVoice {
20
+ private ttsClient;
21
+ private speechClient;
22
+ /**
23
+ * Creates an instance of GoogleVoice
24
+ * @param {Object} config - Configuration options
25
+ * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
26
+ * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
27
+ * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
28
+ * @throws {Error} If no API key is provided via config or environment variable
29
+ */
30
+ constructor({ listeningModel, speechModel, speaker, }?: {
31
+ listeningModel?: GoogleModelConfig;
32
+ speechModel?: GoogleModelConfig;
33
+ speaker?: string;
34
+ });
35
+ /**
36
+ * Gets a list of available voices
37
+ * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
38
+ */
39
+ getSpeakers({ languageCode }?: {
40
+ languageCode?: string;
41
+ }): Promise<{
42
+ voiceId: string;
43
+ languageCodes: string[];
44
+ }[]>;
45
+ private streamToString;
46
+ /**
47
+ * Converts text to speech
48
+ * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
49
+ * @param {Object} [options] - Speech synthesis options
50
+ * @param {string} [options.speaker] - Voice ID to use
51
+ * @param {string} [options.languageCode] - Language code for the voice
52
+ * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
53
+ * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
54
+ */
55
+ speak(input: string | NodeJS.ReadableStream, options?: {
56
+ speaker?: string;
57
+ languageCode?: string;
58
+ audioConfig?: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];
59
+ }): Promise<NodeJS.ReadableStream>;
60
+ /**
61
+ * Converts speech to text
62
+ * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
63
+ * @param {Object} [options] - Recognition options
64
+ * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
65
+ * @returns {Promise<string>} Transcribed text
66
+ */
67
+ listen(audioStream: NodeJS.ReadableStream, options?: {
68
+ stream?: boolean;
69
+ config?: google_2.cloud.speech.v1.IRecognitionConfig;
70
+ }): Promise<string>;
71
+ }
72
+
73
+ export { }
package/dist/index.cjs ADDED
@@ -0,0 +1,148 @@
1
+ 'use strict';
2
+
3
+ var stream = require('stream');
4
+ var speech = require('@google-cloud/speech');
5
+ var textToSpeech = require('@google-cloud/text-to-speech');
6
+ var voice = require('@mastra/core/voice');
7
+
8
+ // src/index.ts
9
+ var DEFAULT_VOICE = "en-US-Casual-K";
10
+ var GoogleVoice = class extends voice.MastraVoice {
11
+ ttsClient;
12
+ speechClient;
13
+ /**
14
+ * Creates an instance of GoogleVoice
15
+ * @param {Object} config - Configuration options
16
+ * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
17
+ * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
18
+ * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
19
+ * @throws {Error} If no API key is provided via config or environment variable
20
+ */
21
+ constructor({
22
+ listeningModel,
23
+ speechModel,
24
+ speaker
25
+ } = {}) {
26
+ const defaultApiKey = process.env.GOOGLE_API_KEY;
27
+ const defaultSpeaker = DEFAULT_VOICE;
28
+ super({
29
+ speechModel: {
30
+ name: "",
31
+ apiKey: speechModel?.apiKey ?? defaultApiKey
32
+ },
33
+ listeningModel: {
34
+ name: "",
35
+ apiKey: listeningModel?.apiKey ?? defaultApiKey
36
+ },
37
+ speaker: speaker ?? defaultSpeaker
38
+ });
39
+ const apiKey = defaultApiKey || speechModel?.apiKey || listeningModel?.apiKey;
40
+ if (!apiKey) {
41
+ throw new Error(
42
+ "Google API key is not set, set GOOGLE_API_KEY environment variable or pass apiKey to constructor"
43
+ );
44
+ }
45
+ this.ttsClient = new textToSpeech.TextToSpeechClient({
46
+ apiKey: this.speechModel?.apiKey || defaultApiKey
47
+ });
48
+ this.speechClient = new speech.SpeechClient({
49
+ apiKey: this.listeningModel?.apiKey || defaultApiKey
50
+ });
51
+ }
52
+ /**
53
+ * Gets a list of available voices
54
+ * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
55
+ */
56
+ async getSpeakers({ languageCode = "en-US" } = {}) {
57
+ return this.traced(async () => {
58
+ const [response] = await this.ttsClient.listVoices({ languageCode });
59
+ return (response?.voices || []).filter((voice) => voice.name && voice.languageCodes).map((voice) => ({
60
+ voiceId: voice.name,
61
+ languageCodes: voice.languageCodes
62
+ }));
63
+ }, "voice.google.getSpeakers")();
64
+ }
65
+ async streamToString(stream) {
66
+ const chunks = [];
67
+ for await (const chunk of stream) {
68
+ chunks.push(Buffer.from(chunk));
69
+ }
70
+ return Buffer.concat(chunks).toString("utf-8");
71
+ }
72
+ /**
73
+ * Converts text to speech
74
+ * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
75
+ * @param {Object} [options] - Speech synthesis options
76
+ * @param {string} [options.speaker] - Voice ID to use
77
+ * @param {string} [options.languageCode] - Language code for the voice
78
+ * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
79
+ * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
80
+ */
81
+ async speak(input, options) {
82
+ return this.traced(async () => {
83
+ const text = typeof input === "string" ? input : await this.streamToString(input);
84
+ const request = {
85
+ input: { text },
86
+ voice: {
87
+ name: options?.speaker || this.speaker,
88
+ languageCode: options?.languageCode || options?.speaker?.split("-").slice(0, 2).join("-") || "en-US"
89
+ },
90
+ audioConfig: options?.audioConfig || { audioEncoding: "LINEAR16" }
91
+ };
92
+ const [response] = await this.ttsClient.synthesizeSpeech(request);
93
+ if (!response.audioContent) {
94
+ throw new Error("No audio content returned.");
95
+ }
96
+ if (typeof response.audioContent === "string") {
97
+ throw new Error("Audio content is a string.");
98
+ }
99
+ const stream$1 = new stream.PassThrough();
100
+ stream$1.end(Buffer.from(response.audioContent));
101
+ return stream$1;
102
+ }, "voice.google.speak")();
103
+ }
104
+ /**
105
+ * Converts speech to text
106
+ * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
107
+ * @param {Object} [options] - Recognition options
108
+ * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
109
+ * @returns {Promise<string>} Transcribed text
110
+ */
111
+ async listen(audioStream, options) {
112
+ return this.traced(async () => {
113
+ const chunks = [];
114
+ for await (const chunk of audioStream) {
115
+ chunks.push(Buffer.from(chunk));
116
+ }
117
+ const buffer = Buffer.concat(chunks);
118
+ let request = {
119
+ config: {
120
+ encoding: "LINEAR16",
121
+ languageCode: "en-US",
122
+ ...options?.config
123
+ },
124
+ audio: {
125
+ content: buffer.toString("base64")
126
+ }
127
+ };
128
+ console.log(`BEFORE REQUEST`);
129
+ const [response] = await this.speechClient.recognize(request);
130
+ console.log(`AFTER REQUEST`);
131
+ if (!response.results || response.results.length === 0) {
132
+ throw new Error("No transcription results returned");
133
+ }
134
+ const transcription = response.results.map((result) => {
135
+ if (!result.alternatives || result.alternatives.length === 0) {
136
+ return "";
137
+ }
138
+ return result.alternatives[0].transcript || "";
139
+ }).filter((text) => text.length > 0).join(" ");
140
+ if (!transcription) {
141
+ throw new Error("No valid transcription found in results");
142
+ }
143
+ return transcription;
144
+ }, "voice.google.listen")();
145
+ }
146
+ };
147
+
148
+ exports.GoogleVoice = GoogleVoice;
@@ -0,0 +1,2 @@
1
+ export { GoogleModelConfig } from './_tsup-dts-rollup.cjs';
2
+ export { GoogleVoice } from './_tsup-dts-rollup.cjs';
@@ -0,0 +1,2 @@
1
+ export { GoogleModelConfig } from './_tsup-dts-rollup.js';
2
+ export { GoogleVoice } from './_tsup-dts-rollup.js';
package/dist/index.js ADDED
@@ -0,0 +1,146 @@
1
+ import { PassThrough } from 'stream';
2
+ import { SpeechClient } from '@google-cloud/speech';
3
+ import { TextToSpeechClient } from '@google-cloud/text-to-speech';
4
+ import { MastraVoice } from '@mastra/core/voice';
5
+
6
+ // src/index.ts
7
+ var DEFAULT_VOICE = "en-US-Casual-K";
8
+ var GoogleVoice = class extends MastraVoice {
9
+ ttsClient;
10
+ speechClient;
11
+ /**
12
+ * Creates an instance of GoogleVoice
13
+ * @param {Object} config - Configuration options
14
+ * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
15
+ * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
16
+ * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
17
+ * @throws {Error} If no API key is provided via config or environment variable
18
+ */
19
+ constructor({
20
+ listeningModel,
21
+ speechModel,
22
+ speaker
23
+ } = {}) {
24
+ const defaultApiKey = process.env.GOOGLE_API_KEY;
25
+ const defaultSpeaker = DEFAULT_VOICE;
26
+ super({
27
+ speechModel: {
28
+ name: "",
29
+ apiKey: speechModel?.apiKey ?? defaultApiKey
30
+ },
31
+ listeningModel: {
32
+ name: "",
33
+ apiKey: listeningModel?.apiKey ?? defaultApiKey
34
+ },
35
+ speaker: speaker ?? defaultSpeaker
36
+ });
37
+ const apiKey = defaultApiKey || speechModel?.apiKey || listeningModel?.apiKey;
38
+ if (!apiKey) {
39
+ throw new Error(
40
+ "Google API key is not set, set GOOGLE_API_KEY environment variable or pass apiKey to constructor"
41
+ );
42
+ }
43
+ this.ttsClient = new TextToSpeechClient({
44
+ apiKey: this.speechModel?.apiKey || defaultApiKey
45
+ });
46
+ this.speechClient = new SpeechClient({
47
+ apiKey: this.listeningModel?.apiKey || defaultApiKey
48
+ });
49
+ }
50
+ /**
51
+ * Gets a list of available voices
52
+ * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
53
+ */
54
+ async getSpeakers({ languageCode = "en-US" } = {}) {
55
+ return this.traced(async () => {
56
+ const [response] = await this.ttsClient.listVoices({ languageCode });
57
+ return (response?.voices || []).filter((voice) => voice.name && voice.languageCodes).map((voice) => ({
58
+ voiceId: voice.name,
59
+ languageCodes: voice.languageCodes
60
+ }));
61
+ }, "voice.google.getSpeakers")();
62
+ }
63
+ async streamToString(stream) {
64
+ const chunks = [];
65
+ for await (const chunk of stream) {
66
+ chunks.push(Buffer.from(chunk));
67
+ }
68
+ return Buffer.concat(chunks).toString("utf-8");
69
+ }
70
+ /**
71
+ * Converts text to speech
72
+ * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
73
+ * @param {Object} [options] - Speech synthesis options
74
+ * @param {string} [options.speaker] - Voice ID to use
75
+ * @param {string} [options.languageCode] - Language code for the voice
76
+ * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
77
+ * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
78
+ */
79
+ async speak(input, options) {
80
+ return this.traced(async () => {
81
+ const text = typeof input === "string" ? input : await this.streamToString(input);
82
+ const request = {
83
+ input: { text },
84
+ voice: {
85
+ name: options?.speaker || this.speaker,
86
+ languageCode: options?.languageCode || options?.speaker?.split("-").slice(0, 2).join("-") || "en-US"
87
+ },
88
+ audioConfig: options?.audioConfig || { audioEncoding: "LINEAR16" }
89
+ };
90
+ const [response] = await this.ttsClient.synthesizeSpeech(request);
91
+ if (!response.audioContent) {
92
+ throw new Error("No audio content returned.");
93
+ }
94
+ if (typeof response.audioContent === "string") {
95
+ throw new Error("Audio content is a string.");
96
+ }
97
+ const stream = new PassThrough();
98
+ stream.end(Buffer.from(response.audioContent));
99
+ return stream;
100
+ }, "voice.google.speak")();
101
+ }
102
+ /**
103
+ * Converts speech to text
104
+ * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
105
+ * @param {Object} [options] - Recognition options
106
+ * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
107
+ * @returns {Promise<string>} Transcribed text
108
+ */
109
+ async listen(audioStream, options) {
110
+ return this.traced(async () => {
111
+ const chunks = [];
112
+ for await (const chunk of audioStream) {
113
+ chunks.push(Buffer.from(chunk));
114
+ }
115
+ const buffer = Buffer.concat(chunks);
116
+ let request = {
117
+ config: {
118
+ encoding: "LINEAR16",
119
+ languageCode: "en-US",
120
+ ...options?.config
121
+ },
122
+ audio: {
123
+ content: buffer.toString("base64")
124
+ }
125
+ };
126
+ console.log(`BEFORE REQUEST`);
127
+ const [response] = await this.speechClient.recognize(request);
128
+ console.log(`AFTER REQUEST`);
129
+ if (!response.results || response.results.length === 0) {
130
+ throw new Error("No transcription results returned");
131
+ }
132
+ const transcription = response.results.map((result) => {
133
+ if (!result.alternatives || result.alternatives.length === 0) {
134
+ return "";
135
+ }
136
+ return result.alternatives[0].transcript || "";
137
+ }).filter((text) => text.length > 0).join(" ");
138
+ if (!transcription) {
139
+ throw new Error("No valid transcription found in results");
140
+ }
141
+ return transcription;
142
+ }, "voice.google.listen")();
143
+ }
144
+ };
145
+
146
+ export { GoogleVoice };
@@ -0,0 +1,6 @@
1
+ import { createConfig } from '@internal/lint/eslint';
2
+
3
+ const config = await createConfig();
4
+
5
+ /** @type {import("eslint").Linter.Config[]} */
6
+ export default [...config];
package/package.json ADDED
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "@mastra/voice-google",
3
+ "version": "0.0.0-commonjs-20250227130920",
4
+ "description": "Mastra Google voice integration",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "import": {
11
+ "types": "./dist/index.d.ts",
12
+ "default": "./dist/index.js"
13
+ },
14
+ "require": {
15
+ "types": "./dist/index.d.cts",
16
+ "default": "./dist/index.cjs"
17
+ }
18
+ },
19
+ "./package.json": "./package.json"
20
+ },
21
+ "dependencies": {
22
+ "@google-cloud/speech": "^6.7.0",
23
+ "@google-cloud/text-to-speech": "^5.0.1",
24
+ "zod": "^3.24.1",
25
+ "@mastra/core": "^0.0.0-commonjs-20250227130920"
26
+ },
27
+ "devDependencies": {
28
+ "@types/node": "^22.13.1",
29
+ "tsup": "^8.0.1",
30
+ "typescript": "^5.7.3",
31
+ "vitest": "^2.1.8",
32
+ "eslint": "^9.20.1",
33
+ "@internal/lint": "0.0.0"
34
+ },
35
+ "scripts": {
36
+ "build": "tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake",
37
+ "build:watch": "pnpm build --watch",
38
+ "test": "vitest run",
39
+ "lint": "eslint ."
40
+ }
41
+ }
@@ -0,0 +1,133 @@
1
+ import { createWriteStream, mkdirSync, readFileSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { Readable } from 'node:stream';
4
+ import { describe, expect, it, beforeAll } from 'vitest';
5
+
6
+ import { GoogleVoice } from './index';
7
+
8
+ describe('GoogleVoice Integration Tests', () => {
9
+ let voice: GoogleVoice;
10
+ const outputDir = join(process.cwd(), 'test-outputs');
11
+
12
+ beforeAll(() => {
13
+ // Create output directory if it doesn't exist
14
+ try {
15
+ mkdirSync(outputDir, { recursive: true });
16
+ } catch (err) {
17
+ console.error(err);
18
+ // Ignore if directory already exists
19
+ }
20
+
21
+ voice = new GoogleVoice();
22
+ });
23
+
24
+ describe('getSpeakers', () => {
25
+ it('should list available voices', async () => {
26
+ const voices = await voice.getSpeakers();
27
+ expect(voices.length).toBeGreaterThan(0);
28
+ expect(voices[0]).toHaveProperty('voiceId');
29
+ expect(voices[0]).toHaveProperty('languageCodes');
30
+ }, 10000);
31
+ });
32
+
33
+ describe('speak', () => {
34
+ it('should generate audio from text and save to file', async () => {
35
+ const audioStream = await voice.speak('Hello World', {
36
+ speaker: 'en-US-Standard-F',
37
+ });
38
+
39
+ return new Promise((resolve, reject) => {
40
+ const outputPath = join(outputDir, 'speech-test.wav');
41
+ const fileStream = createWriteStream(outputPath);
42
+ const chunks: Buffer[] = [];
43
+
44
+ audioStream.on('data', (chunk: Buffer) => chunks.push(chunk));
45
+ audioStream.pipe(fileStream);
46
+
47
+ fileStream.on('finish', () => {
48
+ expect(chunks.length).toBeGreaterThan(0);
49
+ resolve(undefined);
50
+ });
51
+
52
+ audioStream.on('error', reject);
53
+ fileStream.on('error', reject);
54
+ });
55
+ }, 10000);
56
+
57
+ it('should work with default voice', async () => {
58
+ const audioStream = await voice.speak('Test with default voice');
59
+
60
+ return new Promise((resolve, reject) => {
61
+ const outputPath = join(outputDir, 'speech-test-default.wav');
62
+ const fileStream = createWriteStream(outputPath);
63
+ const chunks: Buffer[] = [];
64
+
65
+ audioStream.on('data', (chunk: Buffer) => chunks.push(chunk));
66
+ audioStream.pipe(fileStream);
67
+
68
+ fileStream.on('finish', () => {
69
+ expect(chunks.length).toBeGreaterThan(0);
70
+ resolve(undefined);
71
+ });
72
+
73
+ audioStream.on('error', reject);
74
+ fileStream.on('error', reject);
75
+ });
76
+ }, 10000);
77
+
78
+ it('should handle stream input', async () => {
79
+ const textStream = Readable.from(['Hello', ' from', ' stream', ' input!']);
80
+
81
+ const audioStream = await voice.speak(textStream);
82
+
83
+ return new Promise((resolve, reject) => {
84
+ const outputPath = join(outputDir, 'speech-stream-input-test.wav');
85
+ const fileStream = createWriteStream(outputPath);
86
+ const chunks: Buffer[] = [];
87
+
88
+ audioStream.on('data', (chunk: Buffer) => chunks.push(chunk));
89
+ audioStream.pipe(fileStream);
90
+
91
+ fileStream.on('finish', () => {
92
+ expect(chunks.length).toBeGreaterThan(0);
93
+ resolve(undefined);
94
+ });
95
+
96
+ audioStream.on('error', reject);
97
+ fileStream.on('error', reject);
98
+ });
99
+ }, 10000);
100
+ });
101
+
102
+ describe('listen', () => {
103
+ it('should transcribe audio stream to text', async () => {
104
+ const audioStream = Readable.from(readFileSync(join(outputDir, 'speech-test.wav')));
105
+
106
+ const result = await voice.listen(audioStream);
107
+ console.log(result);
108
+ expect(typeof result).toBe('string');
109
+ expect(result).toContain('hello world');
110
+ }, 10000);
111
+
112
+ // it('should support streaming transcription', async () => {
113
+ // const audioStream = Readable.from(
114
+ // readFileSync(join(outputDir, 'speech-test.mp3'))
115
+ // );
116
+
117
+ // const outputStream = await voice.listen(audioStream, { stream: true });
118
+ // expect(outputStream).toBeInstanceOf(PassThrough);
119
+
120
+ // return new Promise((resolve, reject) => {
121
+ // const chunks: string[] = [];
122
+ // (outputStream as PassThrough).on('data', (chunk: string) => chunks.push(chunk));
123
+ // (outputStream as PassThrough).on('end', () => {
124
+ // expect(chunks.length).toBeGreaterThan(0);
125
+ // const transcription = chunks.join('');
126
+ // expect(transcription).toContain('hello world');
127
+ // resolve(undefined);
128
+ // });
129
+ // (outputStream as PassThrough).on('error', reject);
130
+ // });
131
+ // });
132
+ });
133
+ });
package/src/index.ts ADDED
@@ -0,0 +1,199 @@
1
+ import { PassThrough } from 'stream';
2
+
3
+ import { SpeechClient } from '@google-cloud/speech';
4
+ import type { google as SpeechTypes } from '@google-cloud/speech/build/protos/protos';
5
+ import { TextToSpeechClient } from '@google-cloud/text-to-speech';
6
+ import type { google as TextToSpeechTypes } from '@google-cloud/text-to-speech/build/protos/protos';
7
+ import { MastraVoice } from '@mastra/core/voice';
8
+
9
+ /**
10
+ * Configuration for Google Cloud Voice models
11
+ * @interface GoogleModelConfig
12
+ * @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable
13
+ */
14
+ export interface GoogleModelConfig {
15
+ apiKey?: string;
16
+ }
17
+
18
+ const DEFAULT_VOICE = 'en-US-Casual-K';
19
+
20
+ /**
21
+ * GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services
22
+ * @class GoogleVoice
23
+ * @extends MastraVoice
24
+ */
25
+ export class GoogleVoice extends MastraVoice {
26
+ private ttsClient: TextToSpeechClient;
27
+ private speechClient: SpeechClient;
28
+
29
+ /**
30
+ * Creates an instance of GoogleVoice
31
+ * @param {Object} config - Configuration options
32
+ * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
33
+ * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
34
+ * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
35
+ * @throws {Error} If no API key is provided via config or environment variable
36
+ */
37
+ constructor({
38
+ listeningModel,
39
+ speechModel,
40
+ speaker,
41
+ }: {
42
+ listeningModel?: GoogleModelConfig;
43
+ speechModel?: GoogleModelConfig;
44
+ speaker?: string;
45
+ } = {}) {
46
+ const defaultApiKey = process.env.GOOGLE_API_KEY;
47
+ const defaultSpeaker = DEFAULT_VOICE;
48
+
49
+ super({
50
+ speechModel: {
51
+ name: '',
52
+ apiKey: speechModel?.apiKey ?? defaultApiKey,
53
+ },
54
+ listeningModel: {
55
+ name: '',
56
+ apiKey: listeningModel?.apiKey ?? defaultApiKey,
57
+ },
58
+ speaker: speaker ?? defaultSpeaker,
59
+ });
60
+
61
+ const apiKey = defaultApiKey || speechModel?.apiKey || listeningModel?.apiKey;
62
+ if (!apiKey) {
63
+ throw new Error(
64
+ 'Google API key is not set, set GOOGLE_API_KEY environment variable or pass apiKey to constructor',
65
+ );
66
+ }
67
+
68
+ this.ttsClient = new TextToSpeechClient({
69
+ apiKey: this.speechModel?.apiKey || defaultApiKey,
70
+ });
71
+
72
+ this.speechClient = new SpeechClient({
73
+ apiKey: this.listeningModel?.apiKey || defaultApiKey,
74
+ });
75
+ }
76
+
77
+ /**
78
+ * Gets a list of available voices
79
+ * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
80
+ */
81
+ async getSpeakers({ languageCode = 'en-US' }: { languageCode?: string } = {}) {
82
+ return this.traced(async () => {
83
+ const [response] = await this.ttsClient.listVoices({ languageCode: languageCode });
84
+ return (response?.voices || [])
85
+ .filter(voice => voice.name && voice.languageCodes)
86
+ .map(voice => ({
87
+ voiceId: voice.name!,
88
+ languageCodes: voice.languageCodes!,
89
+ }));
90
+ }, 'voice.google.getSpeakers')();
91
+ }
92
+
93
+ private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {
94
+ const chunks: Buffer[] = [];
95
+ for await (const chunk of stream) {
96
+ chunks.push(Buffer.from(chunk));
97
+ }
98
+ return Buffer.concat(chunks).toString('utf-8');
99
+ }
100
+
101
+ /**
102
+ * Converts text to speech
103
+ * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
104
+ * @param {Object} [options] - Speech synthesis options
105
+ * @param {string} [options.speaker] - Voice ID to use
106
+ * @param {string} [options.languageCode] - Language code for the voice
107
+ * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
108
+ * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
109
+ */
110
+ async speak(
111
+ input: string | NodeJS.ReadableStream,
112
+ options?: {
113
+ speaker?: string;
114
+ languageCode?: string;
115
+ audioConfig?: TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];
116
+ },
117
+ ): Promise<NodeJS.ReadableStream> {
118
+ return this.traced(async () => {
119
+ const text = typeof input === 'string' ? input : await this.streamToString(input);
120
+
121
+ const request: TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest = {
122
+ input: { text },
123
+ voice: {
124
+ name: options?.speaker || this.speaker,
125
+ languageCode: options?.languageCode || options?.speaker?.split('-').slice(0, 2).join('-') || 'en-US',
126
+ },
127
+ audioConfig: options?.audioConfig || { audioEncoding: 'LINEAR16' },
128
+ };
129
+
130
+ const [response] = await this.ttsClient.synthesizeSpeech(request);
131
+
132
+ if (!response.audioContent) {
133
+ throw new Error('No audio content returned.');
134
+ }
135
+
136
+ if (typeof response.audioContent === 'string') {
137
+ throw new Error('Audio content is a string.');
138
+ }
139
+
140
+ const stream = new PassThrough();
141
+ stream.end(Buffer.from(response.audioContent));
142
+ return stream;
143
+ }, 'voice.google.speak')();
144
+ }
145
+
146
+ /**
147
+ * Converts speech to text
148
+ * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
149
+ * @param {Object} [options] - Recognition options
150
+ * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
151
+ * @returns {Promise<string>} Transcribed text
152
+ */
153
+ async listen(
154
+ audioStream: NodeJS.ReadableStream,
155
+ options?: { stream?: boolean; config?: SpeechTypes.cloud.speech.v1.IRecognitionConfig },
156
+ ): Promise<string> {
157
+ return this.traced(async () => {
158
+ const chunks: Buffer[] = [];
159
+ for await (const chunk of audioStream) {
160
+ chunks.push(Buffer.from(chunk));
161
+ }
162
+ const buffer = Buffer.concat(chunks);
163
+
164
+ let request = {
165
+ config: {
166
+ encoding: 'LINEAR16',
167
+ languageCode: 'en-US',
168
+ ...options?.config,
169
+ },
170
+ audio: {
171
+ content: buffer.toString('base64'),
172
+ },
173
+ };
174
+ console.log(`BEFORE REQUEST`);
175
+ const [response] = await this.speechClient.recognize(request as SpeechTypes.cloud.speech.v1.IRecognizeRequest);
176
+ console.log(`AFTER REQUEST`);
177
+
178
+ if (!response.results || response.results.length === 0) {
179
+ throw new Error('No transcription results returned');
180
+ }
181
+
182
+ const transcription = response.results
183
+ .map((result: any) => {
184
+ if (!result.alternatives || result.alternatives.length === 0) {
185
+ return '';
186
+ }
187
+ return result.alternatives[0].transcript || '';
188
+ })
189
+ .filter((text: string) => text.length > 0)
190
+ .join(' ');
191
+
192
+ if (!transcription) {
193
+ throw new Error('No valid transcription found in results');
194
+ }
195
+
196
+ return transcription;
197
+ }, 'voice.google.listen')();
198
+ }
199
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,5 @@
1
+ {
2
+ "extends": "../../tsconfig.node.json",
3
+ "include": ["src/**/*"],
4
+ "exclude": ["node_modules", "**/*.test.ts"]
5
+ }
@@ -0,0 +1,8 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ globals: true,
6
+ environment: 'node',
7
+ },
8
+ });