@mastra/voice-google 0.1.1-alpha.0 → 0.1.1-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,23 @@
1
1
 
2
- > @mastra/voice-google@0.1.1-alpha.0 build /home/runner/work/mastra/mastra/voice/google
3
- > tsup src/index.ts --format esm --experimental-dts --clean --treeshake
2
+ > @mastra/voice-google@0.1.1-alpha.3 build /home/runner/work/mastra/mastra/voice/google
3
+ > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
4
4
 
5
5
  CLI Building entry: src/index.ts
6
6
  CLI Using tsconfig: tsconfig.json
7
7
  CLI tsup v8.3.6
8
8
  TSC Build start
9
- TSC ⚡️ Build success in 10798ms
9
+ TSC ⚡️ Build success in 10277ms
10
10
  DTS Build start
11
11
  CLI Target: es2022
12
12
  Analysis will use the bundled TypeScript version 5.7.3
13
13
  Writing package typings: /home/runner/work/mastra/mastra/voice/google/dist/_tsup-dts-rollup.d.ts
14
- DTS ⚡️ Build success in 9155ms
14
+ Analysis will use the bundled TypeScript version 5.7.3
15
+ Writing package typings: /home/runner/work/mastra/mastra/voice/google/dist/_tsup-dts-rollup.d.cts
16
+ DTS ⚡️ Build success in 15583ms
15
17
  CLI Cleaning output folder
16
18
  ESM Build start
19
+ CJS Build start
20
+ CJS dist/index.cjs 5.48 KB
21
+ CJS ⚡️ Build success in 550ms
17
22
  ESM dist/index.js 5.45 KB
18
- ESM ⚡️ Build success in 290ms
23
+ ESM ⚡️ Build success in 556ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,38 @@
1
1
  # @mastra/voice-google
2
2
 
3
+ ## 0.1.1-alpha.3
4
+
5
+ ### Patch Changes
6
+
7
+ - bb4f447: Add support for commonjs
8
+ - Updated dependencies [0fd78ac]
9
+ - Updated dependencies [0d25b75]
10
+ - Updated dependencies [fd14a3f]
11
+ - Updated dependencies [3f369a2]
12
+ - Updated dependencies [4d4e1e1]
13
+ - Updated dependencies [bb4f447]
14
+ - @mastra/core@0.4.3-alpha.3
15
+
16
+ ## 0.1.1-alpha.2
17
+
18
+ ### Patch Changes
19
+
20
+ - Updated dependencies [2512a93]
21
+ - Updated dependencies [e62de74]
22
+ - @mastra/core@0.4.3-alpha.2
23
+
24
+ ## 0.1.1-alpha.1
25
+
26
+ ### Patch Changes
27
+
28
+ - Updated dependencies [0d185b1]
29
+ - Updated dependencies [ed55f1d]
30
+ - Updated dependencies [8d13b14]
31
+ - Updated dependencies [3ee4831]
32
+ - Updated dependencies [108793c]
33
+ - Updated dependencies [5f28f44]
34
+ - @mastra/core@0.4.3-alpha.1
35
+
3
36
  ## 0.1.1-alpha.0
4
37
 
5
38
  ### Patch Changes
@@ -0,0 +1,73 @@
1
+ import type { google } from '@google-cloud/text-to-speech/build/protos/protos';
2
+ import type { google as google_2 } from '@google-cloud/speech/build/protos/protos';
3
+ import { MastraVoice } from '@mastra/core/voice';
4
+
5
+ /**
6
+ * Configuration for Google Cloud Voice models
7
+ * @interface GoogleModelConfig
8
+ * @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable
9
+ */
10
+ export declare interface GoogleModelConfig {
11
+ apiKey?: string;
12
+ }
13
+
14
+ /**
15
+ * GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services
16
+ * @class GoogleVoice
17
+ * @extends MastraVoice
18
+ */
19
+ export declare class GoogleVoice extends MastraVoice {
20
+ private ttsClient;
21
+ private speechClient;
22
+ /**
23
+ * Creates an instance of GoogleVoice
24
+ * @param {Object} config - Configuration options
25
+ * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
26
+ * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
27
+ * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
28
+ * @throws {Error} If no API key is provided via config or environment variable
29
+ */
30
+ constructor({ listeningModel, speechModel, speaker, }?: {
31
+ listeningModel?: GoogleModelConfig;
32
+ speechModel?: GoogleModelConfig;
33
+ speaker?: string;
34
+ });
35
+ /**
36
+ * Gets a list of available voices
37
+ * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
38
+ */
39
+ getSpeakers({ languageCode }?: {
40
+ languageCode?: string;
41
+ }): Promise<{
42
+ voiceId: string;
43
+ languageCodes: string[];
44
+ }[]>;
45
+ private streamToString;
46
+ /**
47
+ * Converts text to speech
48
+ * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
49
+ * @param {Object} [options] - Speech synthesis options
50
+ * @param {string} [options.speaker] - Voice ID to use
51
+ * @param {string} [options.languageCode] - Language code for the voice
52
+ * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
53
+ * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
54
+ */
55
+ speak(input: string | NodeJS.ReadableStream, options?: {
56
+ speaker?: string;
57
+ languageCode?: string;
58
+ audioConfig?: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];
59
+ }): Promise<NodeJS.ReadableStream>;
60
+ /**
61
+ * Converts speech to text
62
+ * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
63
+ * @param {Object} [options] - Recognition options
64
+ * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
65
+ * @returns {Promise<string>} Transcribed text
66
+ */
67
+ listen(audioStream: NodeJS.ReadableStream, options?: {
68
+ stream?: boolean;
69
+ config?: google_2.cloud.speech.v1.IRecognitionConfig;
70
+ }): Promise<string>;
71
+ }
72
+
73
+ export { }
package/dist/index.cjs ADDED
@@ -0,0 +1,148 @@
1
+ 'use strict';
2
+
3
+ var stream = require('stream');
4
+ var speech = require('@google-cloud/speech');
5
+ var textToSpeech = require('@google-cloud/text-to-speech');
6
+ var voice = require('@mastra/core/voice');
7
+
8
+ // src/index.ts
9
+ var DEFAULT_VOICE = "en-US-Casual-K";
10
+ var GoogleVoice = class extends voice.MastraVoice {
11
+ ttsClient;
12
+ speechClient;
13
+ /**
14
+ * Creates an instance of GoogleVoice
15
+ * @param {Object} config - Configuration options
16
+ * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
17
+ * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
18
+ * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
19
+ * @throws {Error} If no API key is provided via config or environment variable
20
+ */
21
+ constructor({
22
+ listeningModel,
23
+ speechModel,
24
+ speaker
25
+ } = {}) {
26
+ const defaultApiKey = process.env.GOOGLE_API_KEY;
27
+ const defaultSpeaker = DEFAULT_VOICE;
28
+ super({
29
+ speechModel: {
30
+ name: "",
31
+ apiKey: speechModel?.apiKey ?? defaultApiKey
32
+ },
33
+ listeningModel: {
34
+ name: "",
35
+ apiKey: listeningModel?.apiKey ?? defaultApiKey
36
+ },
37
+ speaker: speaker ?? defaultSpeaker
38
+ });
39
+ const apiKey = defaultApiKey || speechModel?.apiKey || listeningModel?.apiKey;
40
+ if (!apiKey) {
41
+ throw new Error(
42
+ "Google API key is not set, set GOOGLE_API_KEY environment variable or pass apiKey to constructor"
43
+ );
44
+ }
45
+ this.ttsClient = new textToSpeech.TextToSpeechClient({
46
+ apiKey: this.speechModel?.apiKey || defaultApiKey
47
+ });
48
+ this.speechClient = new speech.SpeechClient({
49
+ apiKey: this.listeningModel?.apiKey || defaultApiKey
50
+ });
51
+ }
52
+ /**
53
+ * Gets a list of available voices
54
+ * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
55
+ */
56
+ async getSpeakers({ languageCode = "en-US" } = {}) {
57
+ return this.traced(async () => {
58
+ const [response] = await this.ttsClient.listVoices({ languageCode });
59
+ return (response?.voices || []).filter((voice) => voice.name && voice.languageCodes).map((voice) => ({
60
+ voiceId: voice.name,
61
+ languageCodes: voice.languageCodes
62
+ }));
63
+ }, "voice.google.getSpeakers")();
64
+ }
65
+ async streamToString(stream) {
66
+ const chunks = [];
67
+ for await (const chunk of stream) {
68
+ chunks.push(Buffer.from(chunk));
69
+ }
70
+ return Buffer.concat(chunks).toString("utf-8");
71
+ }
72
+ /**
73
+ * Converts text to speech
74
+ * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
75
+ * @param {Object} [options] - Speech synthesis options
76
+ * @param {string} [options.speaker] - Voice ID to use
77
+ * @param {string} [options.languageCode] - Language code for the voice
78
+ * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
79
+ * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
80
+ */
81
+ async speak(input, options) {
82
+ return this.traced(async () => {
83
+ const text = typeof input === "string" ? input : await this.streamToString(input);
84
+ const request = {
85
+ input: { text },
86
+ voice: {
87
+ name: options?.speaker || this.speaker,
88
+ languageCode: options?.languageCode || options?.speaker?.split("-").slice(0, 2).join("-") || "en-US"
89
+ },
90
+ audioConfig: options?.audioConfig || { audioEncoding: "LINEAR16" }
91
+ };
92
+ const [response] = await this.ttsClient.synthesizeSpeech(request);
93
+ if (!response.audioContent) {
94
+ throw new Error("No audio content returned.");
95
+ }
96
+ if (typeof response.audioContent === "string") {
97
+ throw new Error("Audio content is a string.");
98
+ }
99
+ const stream$1 = new stream.PassThrough();
100
+ stream$1.end(Buffer.from(response.audioContent));
101
+ return stream$1;
102
+ }, "voice.google.speak")();
103
+ }
104
+ /**
105
+ * Converts speech to text
106
+ * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
107
+ * @param {Object} [options] - Recognition options
108
+ * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
109
+ * @returns {Promise<string>} Transcribed text
110
+ */
111
+ async listen(audioStream, options) {
112
+ return this.traced(async () => {
113
+ const chunks = [];
114
+ for await (const chunk of audioStream) {
115
+ chunks.push(Buffer.from(chunk));
116
+ }
117
+ const buffer = Buffer.concat(chunks);
118
+ let request = {
119
+ config: {
120
+ encoding: "LINEAR16",
121
+ languageCode: "en-US",
122
+ ...options?.config
123
+ },
124
+ audio: {
125
+ content: buffer.toString("base64")
126
+ }
127
+ };
128
+ console.log(`BEFORE REQUEST`);
129
+ const [response] = await this.speechClient.recognize(request);
130
+ console.log(`AFTER REQUEST`);
131
+ if (!response.results || response.results.length === 0) {
132
+ throw new Error("No transcription results returned");
133
+ }
134
+ const transcription = response.results.map((result) => {
135
+ if (!result.alternatives || result.alternatives.length === 0) {
136
+ return "";
137
+ }
138
+ return result.alternatives[0].transcript || "";
139
+ }).filter((text) => text.length > 0).join(" ");
140
+ if (!transcription) {
141
+ throw new Error("No valid transcription found in results");
142
+ }
143
+ return transcription;
144
+ }, "voice.google.listen")();
145
+ }
146
+ };
147
+
148
+ exports.GoogleVoice = GoogleVoice;
@@ -0,0 +1,2 @@
1
+ export { GoogleModelConfig } from './_tsup-dts-rollup.cjs';
2
+ export { GoogleVoice } from './_tsup-dts-rollup.cjs';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/voice-google",
3
- "version": "0.1.1-alpha.0",
3
+ "version": "0.1.1-alpha.3",
4
4
  "description": "Mastra Google voice integration",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -10,6 +10,10 @@
10
10
  "import": {
11
11
  "types": "./dist/index.d.ts",
12
12
  "default": "./dist/index.js"
13
+ },
14
+ "require": {
15
+ "types": "./dist/index.d.cts",
16
+ "default": "./dist/index.cjs"
13
17
  }
14
18
  },
15
19
  "./package.json": "./package.json"
@@ -18,7 +22,7 @@
18
22
  "@google-cloud/speech": "^6.7.0",
19
23
  "@google-cloud/text-to-speech": "^5.0.1",
20
24
  "zod": "^3.24.1",
21
- "@mastra/core": "^0.4.3-alpha.0"
25
+ "@mastra/core": "^0.4.3-alpha.3"
22
26
  },
23
27
  "devDependencies": {
24
28
  "@types/node": "^22.13.1",
@@ -29,7 +33,7 @@
29
33
  "@internal/lint": "0.0.0"
30
34
  },
31
35
  "scripts": {
32
- "build": "tsup src/index.ts --format esm --experimental-dts --clean --treeshake",
36
+ "build": "tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake",
33
37
  "build:watch": "pnpm build --watch",
34
38
  "test": "vitest run",
35
39
  "lint": "eslint ."