@mastra/voice-google 0.1.1-alpha.0 → 0.1.1-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -5
- package/CHANGELOG.md +33 -0
- package/dist/_tsup-dts-rollup.d.cts +73 -0
- package/dist/index.cjs +148 -0
- package/dist/index.d.cts +2 -0
- package/package.json +7 -3
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/voice-google@0.1.1-alpha.
|
|
3
|
-
> tsup src/index.ts --format esm --experimental-dts --clean --treeshake
|
|
2
|
+
> @mastra/voice-google@0.1.1-alpha.3 build /home/runner/work/mastra/mastra/voice/google
|
|
3
|
+
> tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
6
6
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
7
|
[34mCLI[39m tsup v8.3.6
|
|
8
8
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 10277ms
|
|
10
10
|
[34mDTS[39m Build start
|
|
11
11
|
[34mCLI[39m Target: es2022
|
|
12
12
|
Analysis will use the bundled TypeScript version 5.7.3
|
|
13
13
|
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/google/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
|
-
|
|
14
|
+
Analysis will use the bundled TypeScript version 5.7.3
|
|
15
|
+
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/google/dist/_tsup-dts-rollup.d.cts[39m
|
|
16
|
+
[32mDTS[39m ⚡️ Build success in 15583ms
|
|
15
17
|
[34mCLI[39m Cleaning output folder
|
|
16
18
|
[34mESM[39m Build start
|
|
19
|
+
[34mCJS[39m Build start
|
|
20
|
+
[32mCJS[39m [1mdist/index.cjs [22m[32m5.48 KB[39m
|
|
21
|
+
[32mCJS[39m ⚡️ Build success in 550ms
|
|
17
22
|
[32mESM[39m [1mdist/index.js [22m[32m5.45 KB[39m
|
|
18
|
-
[32mESM[39m ⚡️ Build success in
|
|
23
|
+
[32mESM[39m ⚡️ Build success in 556ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,38 @@
|
|
|
1
1
|
# @mastra/voice-google
|
|
2
2
|
|
|
3
|
+
## 0.1.1-alpha.3
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- bb4f447: Add support for commonjs
|
|
8
|
+
- Updated dependencies [0fd78ac]
|
|
9
|
+
- Updated dependencies [0d25b75]
|
|
10
|
+
- Updated dependencies [fd14a3f]
|
|
11
|
+
- Updated dependencies [3f369a2]
|
|
12
|
+
- Updated dependencies [4d4e1e1]
|
|
13
|
+
- Updated dependencies [bb4f447]
|
|
14
|
+
- @mastra/core@0.4.3-alpha.3
|
|
15
|
+
|
|
16
|
+
## 0.1.1-alpha.2
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- Updated dependencies [2512a93]
|
|
21
|
+
- Updated dependencies [e62de74]
|
|
22
|
+
- @mastra/core@0.4.3-alpha.2
|
|
23
|
+
|
|
24
|
+
## 0.1.1-alpha.1
|
|
25
|
+
|
|
26
|
+
### Patch Changes
|
|
27
|
+
|
|
28
|
+
- Updated dependencies [0d185b1]
|
|
29
|
+
- Updated dependencies [ed55f1d]
|
|
30
|
+
- Updated dependencies [8d13b14]
|
|
31
|
+
- Updated dependencies [3ee4831]
|
|
32
|
+
- Updated dependencies [108793c]
|
|
33
|
+
- Updated dependencies [5f28f44]
|
|
34
|
+
- @mastra/core@0.4.3-alpha.1
|
|
35
|
+
|
|
3
36
|
## 0.1.1-alpha.0
|
|
4
37
|
|
|
5
38
|
### Patch Changes
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import type { google } from '@google-cloud/text-to-speech/build/protos/protos';
|
|
2
|
+
import type { google as google_2 } from '@google-cloud/speech/build/protos/protos';
|
|
3
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Configuration for Google Cloud Voice models
|
|
7
|
+
* @interface GoogleModelConfig
|
|
8
|
+
* @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable
|
|
9
|
+
*/
|
|
10
|
+
export declare interface GoogleModelConfig {
|
|
11
|
+
apiKey?: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services
|
|
16
|
+
* @class GoogleVoice
|
|
17
|
+
* @extends MastraVoice
|
|
18
|
+
*/
|
|
19
|
+
export declare class GoogleVoice extends MastraVoice {
|
|
20
|
+
private ttsClient;
|
|
21
|
+
private speechClient;
|
|
22
|
+
/**
|
|
23
|
+
* Creates an instance of GoogleVoice
|
|
24
|
+
* @param {Object} config - Configuration options
|
|
25
|
+
* @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
|
|
26
|
+
* @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
|
|
27
|
+
* @param {string} [config.speaker] - Default voice ID to use for speech synthesis
|
|
28
|
+
* @throws {Error} If no API key is provided via config or environment variable
|
|
29
|
+
*/
|
|
30
|
+
constructor({ listeningModel, speechModel, speaker, }?: {
|
|
31
|
+
listeningModel?: GoogleModelConfig;
|
|
32
|
+
speechModel?: GoogleModelConfig;
|
|
33
|
+
speaker?: string;
|
|
34
|
+
});
|
|
35
|
+
/**
|
|
36
|
+
* Gets a list of available voices
|
|
37
|
+
* @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
|
|
38
|
+
*/
|
|
39
|
+
getSpeakers({ languageCode }?: {
|
|
40
|
+
languageCode?: string;
|
|
41
|
+
}): Promise<{
|
|
42
|
+
voiceId: string;
|
|
43
|
+
languageCodes: string[];
|
|
44
|
+
}[]>;
|
|
45
|
+
private streamToString;
|
|
46
|
+
/**
|
|
47
|
+
* Converts text to speech
|
|
48
|
+
* @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
|
|
49
|
+
* @param {Object} [options] - Speech synthesis options
|
|
50
|
+
* @param {string} [options.speaker] - Voice ID to use
|
|
51
|
+
* @param {string} [options.languageCode] - Language code for the voice
|
|
52
|
+
* @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
|
|
53
|
+
* @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
|
|
54
|
+
*/
|
|
55
|
+
speak(input: string | NodeJS.ReadableStream, options?: {
|
|
56
|
+
speaker?: string;
|
|
57
|
+
languageCode?: string;
|
|
58
|
+
audioConfig?: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];
|
|
59
|
+
}): Promise<NodeJS.ReadableStream>;
|
|
60
|
+
/**
|
|
61
|
+
* Converts speech to text
|
|
62
|
+
* @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
|
|
63
|
+
* @param {Object} [options] - Recognition options
|
|
64
|
+
* @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
|
|
65
|
+
* @returns {Promise<string>} Transcribed text
|
|
66
|
+
*/
|
|
67
|
+
listen(audioStream: NodeJS.ReadableStream, options?: {
|
|
68
|
+
stream?: boolean;
|
|
69
|
+
config?: google_2.cloud.speech.v1.IRecognitionConfig;
|
|
70
|
+
}): Promise<string>;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export { }
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var stream = require('stream');
|
|
4
|
+
var speech = require('@google-cloud/speech');
|
|
5
|
+
var textToSpeech = require('@google-cloud/text-to-speech');
|
|
6
|
+
var voice = require('@mastra/core/voice');
|
|
7
|
+
|
|
8
|
+
// src/index.ts
|
|
9
|
+
var DEFAULT_VOICE = "en-US-Casual-K";
|
|
10
|
+
var GoogleVoice = class extends voice.MastraVoice {
|
|
11
|
+
ttsClient;
|
|
12
|
+
speechClient;
|
|
13
|
+
/**
|
|
14
|
+
* Creates an instance of GoogleVoice
|
|
15
|
+
* @param {Object} config - Configuration options
|
|
16
|
+
* @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
|
|
17
|
+
* @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
|
|
18
|
+
* @param {string} [config.speaker] - Default voice ID to use for speech synthesis
|
|
19
|
+
* @throws {Error} If no API key is provided via config or environment variable
|
|
20
|
+
*/
|
|
21
|
+
constructor({
|
|
22
|
+
listeningModel,
|
|
23
|
+
speechModel,
|
|
24
|
+
speaker
|
|
25
|
+
} = {}) {
|
|
26
|
+
const defaultApiKey = process.env.GOOGLE_API_KEY;
|
|
27
|
+
const defaultSpeaker = DEFAULT_VOICE;
|
|
28
|
+
super({
|
|
29
|
+
speechModel: {
|
|
30
|
+
name: "",
|
|
31
|
+
apiKey: speechModel?.apiKey ?? defaultApiKey
|
|
32
|
+
},
|
|
33
|
+
listeningModel: {
|
|
34
|
+
name: "",
|
|
35
|
+
apiKey: listeningModel?.apiKey ?? defaultApiKey
|
|
36
|
+
},
|
|
37
|
+
speaker: speaker ?? defaultSpeaker
|
|
38
|
+
});
|
|
39
|
+
const apiKey = defaultApiKey || speechModel?.apiKey || listeningModel?.apiKey;
|
|
40
|
+
if (!apiKey) {
|
|
41
|
+
throw new Error(
|
|
42
|
+
"Google API key is not set, set GOOGLE_API_KEY environment variable or pass apiKey to constructor"
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
this.ttsClient = new textToSpeech.TextToSpeechClient({
|
|
46
|
+
apiKey: this.speechModel?.apiKey || defaultApiKey
|
|
47
|
+
});
|
|
48
|
+
this.speechClient = new speech.SpeechClient({
|
|
49
|
+
apiKey: this.listeningModel?.apiKey || defaultApiKey
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Gets a list of available voices
|
|
54
|
+
* @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
|
|
55
|
+
*/
|
|
56
|
+
async getSpeakers({ languageCode = "en-US" } = {}) {
|
|
57
|
+
return this.traced(async () => {
|
|
58
|
+
const [response] = await this.ttsClient.listVoices({ languageCode });
|
|
59
|
+
return (response?.voices || []).filter((voice) => voice.name && voice.languageCodes).map((voice) => ({
|
|
60
|
+
voiceId: voice.name,
|
|
61
|
+
languageCodes: voice.languageCodes
|
|
62
|
+
}));
|
|
63
|
+
}, "voice.google.getSpeakers")();
|
|
64
|
+
}
|
|
65
|
+
async streamToString(stream) {
|
|
66
|
+
const chunks = [];
|
|
67
|
+
for await (const chunk of stream) {
|
|
68
|
+
chunks.push(Buffer.from(chunk));
|
|
69
|
+
}
|
|
70
|
+
return Buffer.concat(chunks).toString("utf-8");
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Converts text to speech
|
|
74
|
+
* @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
|
|
75
|
+
* @param {Object} [options] - Speech synthesis options
|
|
76
|
+
* @param {string} [options.speaker] - Voice ID to use
|
|
77
|
+
* @param {string} [options.languageCode] - Language code for the voice
|
|
78
|
+
* @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
|
|
79
|
+
* @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
|
|
80
|
+
*/
|
|
81
|
+
async speak(input, options) {
|
|
82
|
+
return this.traced(async () => {
|
|
83
|
+
const text = typeof input === "string" ? input : await this.streamToString(input);
|
|
84
|
+
const request = {
|
|
85
|
+
input: { text },
|
|
86
|
+
voice: {
|
|
87
|
+
name: options?.speaker || this.speaker,
|
|
88
|
+
languageCode: options?.languageCode || options?.speaker?.split("-").slice(0, 2).join("-") || "en-US"
|
|
89
|
+
},
|
|
90
|
+
audioConfig: options?.audioConfig || { audioEncoding: "LINEAR16" }
|
|
91
|
+
};
|
|
92
|
+
const [response] = await this.ttsClient.synthesizeSpeech(request);
|
|
93
|
+
if (!response.audioContent) {
|
|
94
|
+
throw new Error("No audio content returned.");
|
|
95
|
+
}
|
|
96
|
+
if (typeof response.audioContent === "string") {
|
|
97
|
+
throw new Error("Audio content is a string.");
|
|
98
|
+
}
|
|
99
|
+
const stream$1 = new stream.PassThrough();
|
|
100
|
+
stream$1.end(Buffer.from(response.audioContent));
|
|
101
|
+
return stream$1;
|
|
102
|
+
}, "voice.google.speak")();
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Converts speech to text
|
|
106
|
+
* @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
|
|
107
|
+
* @param {Object} [options] - Recognition options
|
|
108
|
+
* @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
|
|
109
|
+
* @returns {Promise<string>} Transcribed text
|
|
110
|
+
*/
|
|
111
|
+
async listen(audioStream, options) {
|
|
112
|
+
return this.traced(async () => {
|
|
113
|
+
const chunks = [];
|
|
114
|
+
for await (const chunk of audioStream) {
|
|
115
|
+
chunks.push(Buffer.from(chunk));
|
|
116
|
+
}
|
|
117
|
+
const buffer = Buffer.concat(chunks);
|
|
118
|
+
let request = {
|
|
119
|
+
config: {
|
|
120
|
+
encoding: "LINEAR16",
|
|
121
|
+
languageCode: "en-US",
|
|
122
|
+
...options?.config
|
|
123
|
+
},
|
|
124
|
+
audio: {
|
|
125
|
+
content: buffer.toString("base64")
|
|
126
|
+
}
|
|
127
|
+
};
|
|
128
|
+
console.log(`BEFORE REQUEST`);
|
|
129
|
+
const [response] = await this.speechClient.recognize(request);
|
|
130
|
+
console.log(`AFTER REQUEST`);
|
|
131
|
+
if (!response.results || response.results.length === 0) {
|
|
132
|
+
throw new Error("No transcription results returned");
|
|
133
|
+
}
|
|
134
|
+
const transcription = response.results.map((result) => {
|
|
135
|
+
if (!result.alternatives || result.alternatives.length === 0) {
|
|
136
|
+
return "";
|
|
137
|
+
}
|
|
138
|
+
return result.alternatives[0].transcript || "";
|
|
139
|
+
}).filter((text) => text.length > 0).join(" ");
|
|
140
|
+
if (!transcription) {
|
|
141
|
+
throw new Error("No valid transcription found in results");
|
|
142
|
+
}
|
|
143
|
+
return transcription;
|
|
144
|
+
}, "voice.google.listen")();
|
|
145
|
+
}
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
exports.GoogleVoice = GoogleVoice;
|
package/dist/index.d.cts
ADDED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-google",
|
|
3
|
-
"version": "0.1.1-alpha.
|
|
3
|
+
"version": "0.1.1-alpha.3",
|
|
4
4
|
"description": "Mastra Google voice integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -10,6 +10,10 @@
|
|
|
10
10
|
"import": {
|
|
11
11
|
"types": "./dist/index.d.ts",
|
|
12
12
|
"default": "./dist/index.js"
|
|
13
|
+
},
|
|
14
|
+
"require": {
|
|
15
|
+
"types": "./dist/index.d.cts",
|
|
16
|
+
"default": "./dist/index.cjs"
|
|
13
17
|
}
|
|
14
18
|
},
|
|
15
19
|
"./package.json": "./package.json"
|
|
@@ -18,7 +22,7 @@
|
|
|
18
22
|
"@google-cloud/speech": "^6.7.0",
|
|
19
23
|
"@google-cloud/text-to-speech": "^5.0.1",
|
|
20
24
|
"zod": "^3.24.1",
|
|
21
|
-
"@mastra/core": "^0.4.3-alpha.
|
|
25
|
+
"@mastra/core": "^0.4.3-alpha.3"
|
|
22
26
|
},
|
|
23
27
|
"devDependencies": {
|
|
24
28
|
"@types/node": "^22.13.1",
|
|
@@ -29,7 +33,7 @@
|
|
|
29
33
|
"@internal/lint": "0.0.0"
|
|
30
34
|
},
|
|
31
35
|
"scripts": {
|
|
32
|
-
"build": "tsup src/index.ts --format esm --experimental-dts --clean --treeshake",
|
|
36
|
+
"build": "tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake",
|
|
33
37
|
"build:watch": "pnpm build --watch",
|
|
34
38
|
"test": "vitest run",
|
|
35
39
|
"lint": "eslint ."
|