@mastra/voice-google 0.0.0-storage-20250225005900 → 0.0.0-vnextWorkflows-20250417075051

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
- Elastic License 2.0 (ELv2)
1
+ # Elastic License 2.0 (ELv2)
2
+
3
+ Copyright (c) 2025 Mastra AI, Inc.
2
4
 
3
5
  **Acceptance**
4
6
  By using the software, you agree to all of the terms and conditions below.
@@ -0,0 +1,73 @@
1
+ import type { google } from '@google-cloud/text-to-speech/build/protos/protos';
2
+ import type { google as google_2 } from '@google-cloud/speech/build/protos/protos';
3
+ import { MastraVoice } from '@mastra/core/voice';
4
+
5
+ /**
6
+ * Configuration for Google Cloud Voice models
7
+ * @interface GoogleModelConfig
8
+ * @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable
9
+ */
10
+ export declare interface GoogleModelConfig {
11
+ apiKey?: string;
12
+ }
13
+
14
+ /**
15
+ * GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services
16
+ * @class GoogleVoice
17
+ * @extends MastraVoice
18
+ */
19
+ export declare class GoogleVoice extends MastraVoice {
20
+ private ttsClient;
21
+ private speechClient;
22
+ /**
23
+ * Creates an instance of GoogleVoice
24
+ * @param {Object} config - Configuration options
25
+ * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
26
+ * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
27
+ * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
28
+ * @throws {Error} If no API key is provided via config or environment variable
29
+ */
30
+ constructor({ listeningModel, speechModel, speaker, }?: {
31
+ listeningModel?: GoogleModelConfig;
32
+ speechModel?: GoogleModelConfig;
33
+ speaker?: string;
34
+ });
35
+ /**
36
+ * Gets a list of available voices
37
+ * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
38
+ */
39
+ getSpeakers({ languageCode }?: {
40
+ languageCode?: string;
41
+ }): Promise<{
42
+ voiceId: string;
43
+ languageCodes: string[];
44
+ }[]>;
45
+ private streamToString;
46
+ /**
47
+ * Converts text to speech
48
+ * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
49
+ * @param {Object} [options] - Speech synthesis options
50
+ * @param {string} [options.speaker] - Voice ID to use
51
+ * @param {string} [options.languageCode] - Language code for the voice
52
+ * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
53
+ * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
54
+ */
55
+ speak(input: string | NodeJS.ReadableStream, options?: {
56
+ speaker?: string;
57
+ languageCode?: string;
58
+ audioConfig?: google.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];
59
+ }): Promise<NodeJS.ReadableStream>;
60
+ /**
61
+ * Converts speech to text
62
+ * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
63
+ * @param {Object} [options] - Recognition options
64
+ * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
65
+ * @returns {Promise<string>} Transcribed text
66
+ */
67
+ listen(audioStream: NodeJS.ReadableStream, options?: {
68
+ stream?: boolean;
69
+ config?: google_2.cloud.speech.v1.IRecognitionConfig;
70
+ }): Promise<string>;
71
+ }
72
+
73
+ export { }
package/dist/index.cjs ADDED
@@ -0,0 +1,156 @@
1
+ 'use strict';
2
+
3
+ var stream = require('stream');
4
+ var speech = require('@google-cloud/speech');
5
+ var textToSpeech = require('@google-cloud/text-to-speech');
6
+ var voice = require('@mastra/core/voice');
7
+
8
+ // src/index.ts
9
+ var DEFAULT_VOICE = "en-US-Casual-K";
10
+ var GoogleVoice = class extends voice.MastraVoice {
11
+ ttsClient;
12
+ speechClient;
13
+ /**
14
+ * Creates an instance of GoogleVoice
15
+ * @param {Object} config - Configuration options
16
+ * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
17
+ * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
18
+ * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
19
+ * @throws {Error} If no API key is provided via config or environment variable
20
+ */
21
+ constructor({
22
+ listeningModel,
23
+ speechModel,
24
+ speaker
25
+ } = {}) {
26
+ const defaultApiKey = process.env.GOOGLE_API_KEY;
27
+ const defaultSpeaker = DEFAULT_VOICE;
28
+ super({
29
+ speechModel: {
30
+ name: "",
31
+ apiKey: speechModel?.apiKey ?? defaultApiKey
32
+ },
33
+ listeningModel: {
34
+ name: "",
35
+ apiKey: listeningModel?.apiKey ?? defaultApiKey
36
+ },
37
+ speaker: speaker ?? defaultSpeaker
38
+ });
39
+ const apiKey = defaultApiKey || speechModel?.apiKey || listeningModel?.apiKey;
40
+ if (!apiKey) {
41
+ throw new Error(
42
+ "Google API key is not set, set GOOGLE_API_KEY environment variable or pass apiKey to constructor"
43
+ );
44
+ }
45
+ this.ttsClient = new textToSpeech.TextToSpeechClient({
46
+ apiKey: this.speechModel?.apiKey || defaultApiKey
47
+ });
48
+ this.speechClient = new speech.SpeechClient({
49
+ apiKey: this.listeningModel?.apiKey || defaultApiKey
50
+ });
51
+ }
52
+ /**
53
+ * Gets a list of available voices
54
+ * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
55
+ */
56
+ async getSpeakers({ languageCode = "en-US" } = {}) {
57
+ return this.traced(async () => {
58
+ const [response] = await this.ttsClient.listVoices({ languageCode });
59
+ return (response?.voices || []).filter((voice) => voice.name && voice.languageCodes).map((voice) => ({
60
+ voiceId: voice.name,
61
+ languageCodes: voice.languageCodes
62
+ }));
63
+ }, "voice.google.getSpeakers")();
64
+ }
65
+ async streamToString(stream) {
66
+ const chunks = [];
67
+ for await (const chunk of stream) {
68
+ if (typeof chunk === "string") {
69
+ chunks.push(Buffer.from(chunk));
70
+ } else {
71
+ chunks.push(chunk);
72
+ }
73
+ }
74
+ return Buffer.concat(chunks).toString("utf-8");
75
+ }
76
+ /**
77
+ * Converts text to speech
78
+ * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
79
+ * @param {Object} [options] - Speech synthesis options
80
+ * @param {string} [options.speaker] - Voice ID to use
81
+ * @param {string} [options.languageCode] - Language code for the voice
82
+ * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
83
+ * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
84
+ */
85
+ async speak(input, options) {
86
+ return this.traced(async () => {
87
+ const text = typeof input === "string" ? input : await this.streamToString(input);
88
+ const request = {
89
+ input: { text },
90
+ voice: {
91
+ name: options?.speaker || this.speaker,
92
+ languageCode: options?.languageCode || options?.speaker?.split("-").slice(0, 2).join("-") || "en-US"
93
+ },
94
+ audioConfig: options?.audioConfig || { audioEncoding: "LINEAR16" }
95
+ };
96
+ const [response] = await this.ttsClient.synthesizeSpeech(request);
97
+ if (!response.audioContent) {
98
+ throw new Error("No audio content returned.");
99
+ }
100
+ if (typeof response.audioContent === "string") {
101
+ throw new Error("Audio content is a string.");
102
+ }
103
+ const stream$1 = new stream.PassThrough();
104
+ stream$1.end(Buffer.from(response.audioContent));
105
+ return stream$1;
106
+ }, "voice.google.speak")();
107
+ }
108
+ /**
109
+ * Converts speech to text
110
+ * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
111
+ * @param {Object} [options] - Recognition options
112
+ * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
113
+ * @returns {Promise<string>} Transcribed text
114
+ */
115
+ async listen(audioStream, options) {
116
+ return this.traced(async () => {
117
+ const chunks = [];
118
+ for await (const chunk of audioStream) {
119
+ if (typeof chunk === "string") {
120
+ chunks.push(Buffer.from(chunk));
121
+ } else {
122
+ chunks.push(chunk);
123
+ }
124
+ }
125
+ const buffer = Buffer.concat(chunks);
126
+ let request = {
127
+ config: {
128
+ encoding: "LINEAR16",
129
+ languageCode: "en-US",
130
+ ...options?.config
131
+ },
132
+ audio: {
133
+ content: buffer.toString("base64")
134
+ }
135
+ };
136
+ console.log(`BEFORE REQUEST`);
137
+ const [response] = await this.speechClient.recognize(request);
138
+ console.log(`AFTER REQUEST`);
139
+ if (!response.results || response.results.length === 0) {
140
+ throw new Error("No transcription results returned");
141
+ }
142
+ const transcription = response.results.map((result) => {
143
+ if (!result.alternatives || result.alternatives.length === 0) {
144
+ return "";
145
+ }
146
+ return result.alternatives[0].transcript || "";
147
+ }).filter((text) => text.length > 0).join(" ");
148
+ if (!transcription) {
149
+ throw new Error("No valid transcription found in results");
150
+ }
151
+ return transcription;
152
+ }, "voice.google.listen")();
153
+ }
154
+ };
155
+
156
+ exports.GoogleVoice = GoogleVoice;
@@ -0,0 +1,2 @@
1
+ export { GoogleModelConfig } from './_tsup-dts-rollup.cjs';
2
+ export { GoogleVoice } from './_tsup-dts-rollup.cjs';
package/dist/index.js CHANGED
@@ -63,7 +63,11 @@ var GoogleVoice = class extends MastraVoice {
63
63
  async streamToString(stream) {
64
64
  const chunks = [];
65
65
  for await (const chunk of stream) {
66
- chunks.push(Buffer.from(chunk));
66
+ if (typeof chunk === "string") {
67
+ chunks.push(Buffer.from(chunk));
68
+ } else {
69
+ chunks.push(chunk);
70
+ }
67
71
  }
68
72
  return Buffer.concat(chunks).toString("utf-8");
69
73
  }
@@ -110,7 +114,11 @@ var GoogleVoice = class extends MastraVoice {
110
114
  return this.traced(async () => {
111
115
  const chunks = [];
112
116
  for await (const chunk of audioStream) {
113
- chunks.push(Buffer.from(chunk));
117
+ if (typeof chunk === "string") {
118
+ chunks.push(Buffer.from(chunk));
119
+ } else {
120
+ chunks.push(chunk);
121
+ }
114
122
  }
115
123
  const buffer = Buffer.concat(chunks);
116
124
  let request = {
package/package.json CHANGED
@@ -1,8 +1,11 @@
1
1
  {
2
2
  "name": "@mastra/voice-google",
3
- "version": "0.0.0-storage-20250225005900",
3
+ "version": "0.0.0-vnextWorkflows-20250417075051",
4
4
  "description": "Mastra Google voice integration",
5
5
  "type": "module",
6
+ "files": [
7
+ "dist"
8
+ ],
6
9
  "main": "dist/index.js",
7
10
  "types": "dist/index.d.ts",
8
11
  "exports": {
@@ -10,26 +13,31 @@
10
13
  "import": {
11
14
  "types": "./dist/index.d.ts",
12
15
  "default": "./dist/index.js"
16
+ },
17
+ "require": {
18
+ "types": "./dist/index.d.cts",
19
+ "default": "./dist/index.cjs"
13
20
  }
14
21
  },
15
22
  "./package.json": "./package.json"
16
23
  },
24
+ "license": "Elastic-2.0",
17
25
  "dependencies": {
18
- "@google-cloud/speech": "^6.7.0",
19
- "@google-cloud/text-to-speech": "^5.0.1",
20
- "zod": "^3.24.1",
21
- "@mastra/core": "^0.0.0-storage-20250225005900"
26
+ "@google-cloud/speech": "^6.7.1",
27
+ "@google-cloud/text-to-speech": "^6.0.1",
28
+ "zod": "^3.24.2",
29
+ "@mastra/core": "0.0.0-vnextWorkflows-20250417075051"
22
30
  },
23
31
  "devDependencies": {
24
- "@types/node": "^22.13.1",
25
- "tsup": "^8.0.1",
26
- "typescript": "^5.7.3",
27
- "vitest": "^2.1.8",
28
- "eslint": "^9.20.1",
29
- "@internal/lint": "0.0.0"
32
+ "@types/node": "^20.17.27",
33
+ "eslint": "^9.23.0",
34
+ "tsup": "^8.4.0",
35
+ "typescript": "^5.8.2",
36
+ "vitest": "^2.1.9",
37
+ "@internal/lint": "0.0.2"
30
38
  },
31
39
  "scripts": {
32
- "build": "tsup src/index.ts --format esm --experimental-dts --clean --treeshake",
40
+ "build": "tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake=smallest --splitting",
33
41
  "build:watch": "pnpm build --watch",
34
42
  "test": "vitest run",
35
43
  "lint": "eslint ."
@@ -1,19 +0,0 @@
1
-
2
- 
3
- > @mastra/voice-google@0.1.0-alpha.2 build /Users/ward/projects/mastra/mastra/voice/google
4
- > tsup src/index.ts --format esm --experimental-dts --clean --treeshake
5
-
6
- CLI Building entry: src/index.ts
7
- CLI Using tsconfig: tsconfig.json
8
- CLI tsup v8.3.6
9
- TSC Build start
10
- TSC ⚡️ Build success in 2292ms
11
- DTS Build start
12
- CLI Target: es2022
13
- Analysis will use the bundled TypeScript version 5.7.3
14
- Writing package typings: /Users/ward/projects/mastra/mastra/voice/google/dist/_tsup-dts-rollup.d.ts
15
- DTS ⚡️ Build success in 1858ms
16
- CLI Cleaning output folder
17
- ESM Build start
18
- ESM dist/index.js 5.45 KB
19
- ESM ⚡️ Build success in 86ms
package/CHANGELOG.md DELETED
@@ -1,42 +0,0 @@
1
- # @mastra/voice-google
2
-
3
- ## 0.0.0-storage-20250225005900
4
-
5
- ### Patch Changes
6
-
7
- - 5e0f727: deprecate @mastra/speech-google for @mastra/voice-google
8
- - Updated dependencies [7fceae1]
9
- - Updated dependencies [f626fbb]
10
- - Updated dependencies [8db2a28]
11
- - @mastra/core@0.0.0-storage-20250225005900
12
-
13
- ## 0.1.0-alpha.2
14
-
15
- ### Patch Changes
16
-
17
- - 5e0f727: deprecate @mastra/speech-google for @mastra/voice-google
18
-
19
- ## 0.1.0
20
-
21
- ### Minor Changes
22
-
23
- - Initial release of @mastra/voice-google
24
- - Combines functionality from deprecated @mastra/speech-google
25
- - Adds Speech-to-Text capabilities
26
- - Implements new MastraVoice interface from @mastra/core
27
-
28
- ### Notes
29
-
30
- This package replaces @mastra/speech-google, which reached version 0.1.3-alpha.1. Key features from the previous package:
31
-
32
- - Neural Text-to-Speech synthesis
33
- - Multiple voice options
34
- - Streaming support
35
- - Integration with Google Cloud services
36
-
37
- The new package adds:
38
-
39
- - Speech-to-Text recognition
40
- - Combined speech and listening models
41
- - Improved voice management
42
- - Better type safety and error handling
package/eslint.config.js DELETED
@@ -1,6 +0,0 @@
1
- import { createConfig } from '@internal/lint/eslint';
2
-
3
- const config = await createConfig();
4
-
5
- /** @type {import("eslint").Linter.Config[]} */
6
- export default [...config];
package/src/index.test.ts DELETED
@@ -1,133 +0,0 @@
1
- import { createWriteStream, mkdirSync, readFileSync } from 'node:fs';
2
- import { join } from 'node:path';
3
- import { Readable } from 'node:stream';
4
- import { describe, expect, it, beforeAll } from 'vitest';
5
-
6
- import { GoogleVoice } from './index';
7
-
8
- describe('GoogleVoice Integration Tests', () => {
9
- let voice: GoogleVoice;
10
- const outputDir = join(process.cwd(), 'test-outputs');
11
-
12
- beforeAll(() => {
13
- // Create output directory if it doesn't exist
14
- try {
15
- mkdirSync(outputDir, { recursive: true });
16
- } catch (err) {
17
- console.error(err);
18
- // Ignore if directory already exists
19
- }
20
-
21
- voice = new GoogleVoice();
22
- });
23
-
24
- describe('getSpeakers', () => {
25
- it('should list available voices', async () => {
26
- const voices = await voice.getSpeakers();
27
- expect(voices.length).toBeGreaterThan(0);
28
- expect(voices[0]).toHaveProperty('voiceId');
29
- expect(voices[0]).toHaveProperty('languageCodes');
30
- }, 10000);
31
- });
32
-
33
- describe('speak', () => {
34
- it('should generate audio from text and save to file', async () => {
35
- const audioStream = await voice.speak('Hello World', {
36
- speaker: 'en-US-Standard-F',
37
- });
38
-
39
- return new Promise((resolve, reject) => {
40
- const outputPath = join(outputDir, 'speech-test.wav');
41
- const fileStream = createWriteStream(outputPath);
42
- const chunks: Buffer[] = [];
43
-
44
- audioStream.on('data', (chunk: Buffer) => chunks.push(chunk));
45
- audioStream.pipe(fileStream);
46
-
47
- fileStream.on('finish', () => {
48
- expect(chunks.length).toBeGreaterThan(0);
49
- resolve(undefined);
50
- });
51
-
52
- audioStream.on('error', reject);
53
- fileStream.on('error', reject);
54
- });
55
- }, 10000);
56
-
57
- it('should work with default voice', async () => {
58
- const audioStream = await voice.speak('Test with default voice');
59
-
60
- return new Promise((resolve, reject) => {
61
- const outputPath = join(outputDir, 'speech-test-default.wav');
62
- const fileStream = createWriteStream(outputPath);
63
- const chunks: Buffer[] = [];
64
-
65
- audioStream.on('data', (chunk: Buffer) => chunks.push(chunk));
66
- audioStream.pipe(fileStream);
67
-
68
- fileStream.on('finish', () => {
69
- expect(chunks.length).toBeGreaterThan(0);
70
- resolve(undefined);
71
- });
72
-
73
- audioStream.on('error', reject);
74
- fileStream.on('error', reject);
75
- });
76
- }, 10000);
77
-
78
- it('should handle stream input', async () => {
79
- const textStream = Readable.from(['Hello', ' from', ' stream', ' input!']);
80
-
81
- const audioStream = await voice.speak(textStream);
82
-
83
- return new Promise((resolve, reject) => {
84
- const outputPath = join(outputDir, 'speech-stream-input-test.wav');
85
- const fileStream = createWriteStream(outputPath);
86
- const chunks: Buffer[] = [];
87
-
88
- audioStream.on('data', (chunk: Buffer) => chunks.push(chunk));
89
- audioStream.pipe(fileStream);
90
-
91
- fileStream.on('finish', () => {
92
- expect(chunks.length).toBeGreaterThan(0);
93
- resolve(undefined);
94
- });
95
-
96
- audioStream.on('error', reject);
97
- fileStream.on('error', reject);
98
- });
99
- }, 10000);
100
- });
101
-
102
- describe('listen', () => {
103
- it('should transcribe audio stream to text', async () => {
104
- const audioStream = Readable.from(readFileSync(join(outputDir, 'speech-test.wav')));
105
-
106
- const result = await voice.listen(audioStream);
107
- console.log(result);
108
- expect(typeof result).toBe('string');
109
- expect(result).toContain('hello world');
110
- }, 10000);
111
-
112
- // it('should support streaming transcription', async () => {
113
- // const audioStream = Readable.from(
114
- // readFileSync(join(outputDir, 'speech-test.mp3'))
115
- // );
116
-
117
- // const outputStream = await voice.listen(audioStream, { stream: true });
118
- // expect(outputStream).toBeInstanceOf(PassThrough);
119
-
120
- // return new Promise((resolve, reject) => {
121
- // const chunks: string[] = [];
122
- // (outputStream as PassThrough).on('data', (chunk: string) => chunks.push(chunk));
123
- // (outputStream as PassThrough).on('end', () => {
124
- // expect(chunks.length).toBeGreaterThan(0);
125
- // const transcription = chunks.join('');
126
- // expect(transcription).toContain('hello world');
127
- // resolve(undefined);
128
- // });
129
- // (outputStream as PassThrough).on('error', reject);
130
- // });
131
- // });
132
- });
133
- });
package/src/index.ts DELETED
@@ -1,199 +0,0 @@
1
- import { PassThrough } from 'stream';
2
-
3
- import { SpeechClient } from '@google-cloud/speech';
4
- import type { google as SpeechTypes } from '@google-cloud/speech/build/protos/protos';
5
- import { TextToSpeechClient } from '@google-cloud/text-to-speech';
6
- import type { google as TextToSpeechTypes } from '@google-cloud/text-to-speech/build/protos/protos';
7
- import { MastraVoice } from '@mastra/core/voice';
8
-
9
- /**
10
- * Configuration for Google Cloud Voice models
11
- * @interface GoogleModelConfig
12
- * @property {string} [apiKey] - Optional Google Cloud API key. If not provided, will use GOOGLE_API_KEY environment variable
13
- */
14
- export interface GoogleModelConfig {
15
- apiKey?: string;
16
- }
17
-
18
- const DEFAULT_VOICE = 'en-US-Casual-K';
19
-
20
- /**
21
- * GoogleVoice class provides Text-to-Speech and Speech-to-Text capabilities using Google Cloud services
22
- * @class GoogleVoice
23
- * @extends MastraVoice
24
- */
25
- export class GoogleVoice extends MastraVoice {
26
- private ttsClient: TextToSpeechClient;
27
- private speechClient: SpeechClient;
28
-
29
- /**
30
- * Creates an instance of GoogleVoice
31
- * @param {Object} config - Configuration options
32
- * @param {GoogleModelConfig} [config.speechModel] - Configuration for speech synthesis
33
- * @param {GoogleModelConfig} [config.listeningModel] - Configuration for speech recognition
34
- * @param {string} [config.speaker] - Default voice ID to use for speech synthesis
35
- * @throws {Error} If no API key is provided via config or environment variable
36
- */
37
- constructor({
38
- listeningModel,
39
- speechModel,
40
- speaker,
41
- }: {
42
- listeningModel?: GoogleModelConfig;
43
- speechModel?: GoogleModelConfig;
44
- speaker?: string;
45
- } = {}) {
46
- const defaultApiKey = process.env.GOOGLE_API_KEY;
47
- const defaultSpeaker = DEFAULT_VOICE;
48
-
49
- super({
50
- speechModel: {
51
- name: '',
52
- apiKey: speechModel?.apiKey ?? defaultApiKey,
53
- },
54
- listeningModel: {
55
- name: '',
56
- apiKey: listeningModel?.apiKey ?? defaultApiKey,
57
- },
58
- speaker: speaker ?? defaultSpeaker,
59
- });
60
-
61
- const apiKey = defaultApiKey || speechModel?.apiKey || listeningModel?.apiKey;
62
- if (!apiKey) {
63
- throw new Error(
64
- 'Google API key is not set, set GOOGLE_API_KEY environment variable or pass apiKey to constructor',
65
- );
66
- }
67
-
68
- this.ttsClient = new TextToSpeechClient({
69
- apiKey: this.speechModel?.apiKey || defaultApiKey,
70
- });
71
-
72
- this.speechClient = new SpeechClient({
73
- apiKey: this.listeningModel?.apiKey || defaultApiKey,
74
- });
75
- }
76
-
77
- /**
78
- * Gets a list of available voices
79
- * @returns {Promise<Array<{voiceId: string, languageCodes: string[]}>>} List of available voices and their supported languages. Default language is en-US.
80
- */
81
- async getSpeakers({ languageCode = 'en-US' }: { languageCode?: string } = {}) {
82
- return this.traced(async () => {
83
- const [response] = await this.ttsClient.listVoices({ languageCode: languageCode });
84
- return (response?.voices || [])
85
- .filter(voice => voice.name && voice.languageCodes)
86
- .map(voice => ({
87
- voiceId: voice.name!,
88
- languageCodes: voice.languageCodes!,
89
- }));
90
- }, 'voice.google.getSpeakers')();
91
- }
92
-
93
- private async streamToString(stream: NodeJS.ReadableStream): Promise<string> {
94
- const chunks: Buffer[] = [];
95
- for await (const chunk of stream) {
96
- chunks.push(Buffer.from(chunk));
97
- }
98
- return Buffer.concat(chunks).toString('utf-8');
99
- }
100
-
101
- /**
102
- * Converts text to speech
103
- * @param {string | NodeJS.ReadableStream} input - Text or stream to convert to speech
104
- * @param {Object} [options] - Speech synthesis options
105
- * @param {string} [options.speaker] - Voice ID to use
106
- * @param {string} [options.languageCode] - Language code for the voice
107
- * @param {TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig']} [options.audioConfig] - Audio configuration options
108
- * @returns {Promise<NodeJS.ReadableStream>} Stream of synthesized audio. Default encoding is LINEAR16.
109
- */
110
- async speak(
111
- input: string | NodeJS.ReadableStream,
112
- options?: {
113
- speaker?: string;
114
- languageCode?: string;
115
- audioConfig?: TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest['audioConfig'];
116
- },
117
- ): Promise<NodeJS.ReadableStream> {
118
- return this.traced(async () => {
119
- const text = typeof input === 'string' ? input : await this.streamToString(input);
120
-
121
- const request: TextToSpeechTypes.cloud.texttospeech.v1.ISynthesizeSpeechRequest = {
122
- input: { text },
123
- voice: {
124
- name: options?.speaker || this.speaker,
125
- languageCode: options?.languageCode || options?.speaker?.split('-').slice(0, 2).join('-') || 'en-US',
126
- },
127
- audioConfig: options?.audioConfig || { audioEncoding: 'LINEAR16' },
128
- };
129
-
130
- const [response] = await this.ttsClient.synthesizeSpeech(request);
131
-
132
- if (!response.audioContent) {
133
- throw new Error('No audio content returned.');
134
- }
135
-
136
- if (typeof response.audioContent === 'string') {
137
- throw new Error('Audio content is a string.');
138
- }
139
-
140
- const stream = new PassThrough();
141
- stream.end(Buffer.from(response.audioContent));
142
- return stream;
143
- }, 'voice.google.speak')();
144
- }
145
-
146
- /**
147
- * Converts speech to text
148
- * @param {NodeJS.ReadableStream} audioStream - Audio stream to transcribe. Default encoding is LINEAR16.
149
- * @param {Object} [options] - Recognition options
150
- * @param {SpeechTypes.cloud.speech.v1.IRecognitionConfig} [options.config] - Recognition configuration
151
- * @returns {Promise<string>} Transcribed text
152
- */
153
- async listen(
154
- audioStream: NodeJS.ReadableStream,
155
- options?: { stream?: boolean; config?: SpeechTypes.cloud.speech.v1.IRecognitionConfig },
156
- ): Promise<string> {
157
- return this.traced(async () => {
158
- const chunks: Buffer[] = [];
159
- for await (const chunk of audioStream) {
160
- chunks.push(Buffer.from(chunk));
161
- }
162
- const buffer = Buffer.concat(chunks);
163
-
164
- let request = {
165
- config: {
166
- encoding: 'LINEAR16',
167
- languageCode: 'en-US',
168
- ...options?.config,
169
- },
170
- audio: {
171
- content: buffer.toString('base64'),
172
- },
173
- };
174
- console.log(`BEFORE REQUEST`);
175
- const [response] = await this.speechClient.recognize(request as SpeechTypes.cloud.speech.v1.IRecognizeRequest);
176
- console.log(`AFTER REQUEST`);
177
-
178
- if (!response.results || response.results.length === 0) {
179
- throw new Error('No transcription results returned');
180
- }
181
-
182
- const transcription = response.results
183
- .map((result: any) => {
184
- if (!result.alternatives || result.alternatives.length === 0) {
185
- return '';
186
- }
187
- return result.alternatives[0].transcript || '';
188
- })
189
- .filter((text: string) => text.length > 0)
190
- .join(' ');
191
-
192
- if (!transcription) {
193
- throw new Error('No valid transcription found in results');
194
- }
195
-
196
- return transcription;
197
- }, 'voice.google.listen')();
198
- }
199
- }
package/tsconfig.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "extends": "../../tsconfig.node.json",
3
- "include": ["src/**/*"],
4
- "exclude": ["node_modules", "**/*.test.ts"]
5
- }
package/vitest.config.ts DELETED
@@ -1,8 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- globals: true,
6
- environment: 'node',
7
- },
8
- });