@mastra/voice-google 0.12.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/LICENSE.md +15 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_ai-sdk-v5/dist/index.d.ts +8888 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/base/index.d.ts +31 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/logger/index.d.ts +217 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/request-context/index.d.ts +147 -0
- package/dist/_types/@internal_voice/dist/_types/@internal_core/dist/types/index.d.ts +3 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/ZodError.d.ts +164 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/errors.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/external.d.ts +6 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/enumUtil.d.ts +8 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/errorUtil.d.ts +9 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/parseUtil.d.ts +78 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/partialUtil.d.ts +8 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/typeAliases.d.ts +2 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/helpers/util.d.ts +85 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.cts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/index.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/locales/en.d.ts +3 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/standard-schema.d.ts +102 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v3/types.d.ts +1034 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/checks.d.ts +1 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/coerce.d.ts +17 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/compat.d.ts +50 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/errors.d.ts +30 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/external.d.ts +16 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/from-json-schema.d.ts +12 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/index.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/iso.d.ts +22 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/parse.d.ts +31 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/classic/schemas.d.ts +767 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/api.d.ts +325 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/checks.d.ts +278 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/core.d.ts +70 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/doc.d.ts +14 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/errors.d.ts +221 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/index.d.ts +16 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-generator.d.ts +65 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema-processors.d.ts +49 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/json-schema.d.ts +88 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/parse.d.ts +49 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/regexes.d.ts +85 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/registries.d.ts +35 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/schemas.d.ts +1184 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/standard-schema.d.ts +126 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/to-json-schema.d.ts +114 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/util.d.ts +200 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/core/versions.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/index.d.cts +3 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ar.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/az.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/be.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/bg.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ca.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/cs.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/da.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/de.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/el.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/en.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/eo.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/es.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fa.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fi.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr-CA.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/fr.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/he.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hr.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hu.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/hy.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/id.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/index.d.ts +52 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/is.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/it.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ja.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ka.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/kh.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/km.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ko.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/lt.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/mk.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ms.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/nl.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/no.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ota.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pl.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ps.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/pt.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ro.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ru.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sl.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/sv.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ta.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/th.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/tr.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ua.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uk.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/ur.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/uz.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/vi.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/yo.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-CN.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/_types/zod/v4/locales/zh-TW.d.ts +4 -0
- package/dist/_types/@internal_voice/dist/index.d.ts +16 -0
- package/dist/_types/@internal_voice/dist/voice/aisdk/index.d.ts +3 -0
- package/dist/_types/@internal_voice/dist/voice/aisdk/speech.d.ts +23 -0
- package/dist/_types/@internal_voice/dist/voice/aisdk/transcription.d.ts +22 -0
- package/dist/_types/@internal_voice/dist/voice/composite-voice.d.ts +72 -0
- package/dist/_types/@internal_voice/dist/voice/default-voice.d.ts +13 -0
- package/dist/_types/@internal_voice/dist/voice/index.d.ts +5 -0
- package/dist/_types/@internal_voice/dist/voice/voice.d.ts +172 -0
- package/dist/docs/SKILL.md +15 -21
- package/dist/docs/{SOURCE_MAP.json → assets/SOURCE_MAP.json} +1 -1
- package/dist/docs/references/docs-agents-adding-voice.md +381 -0
- package/dist/docs/references/docs-voice-overview.md +1250 -0
- package/dist/docs/{voice/02-reference.md → references/reference-voice-google.md} +98 -50
- package/dist/index.cjs +262 -2
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +261 -1
- package/dist/index.js.map +1 -1
- package/package.json +12 -14
- package/dist/docs/README.md +0 -32
- package/dist/docs/agents/01-adding-voice.md +0 -352
- package/dist/docs/voice/01-overview.md +0 -1019
|
@@ -1,79 +1,114 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
> API reference for voice - 1 entries
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
---
|
|
7
|
-
|
|
8
|
-
## Reference: Google
|
|
9
|
-
|
|
10
|
-
> Documentation for the Google Voice implementation, providing text-to-speech and speech-to-text capabilities with support for both API key and Vertex AI authentication.
|
|
1
|
+
# Google
|
|
11
2
|
|
|
12
3
|
The Google Voice implementation in Mastra provides both text-to-speech (TTS) and speech-to-text (STT) capabilities using Google Cloud services. It supports multiple voices, languages, advanced audio configuration options, and both standard API key authentication and Vertex AI mode for enterprise deployments.
|
|
13
4
|
|
|
14
|
-
## Usage
|
|
5
|
+
## Usage example
|
|
15
6
|
|
|
16
7
|
```typescript
|
|
17
|
-
import { GoogleVoice } from
|
|
8
|
+
import { GoogleVoice } from '@mastra/voice-google'
|
|
18
9
|
|
|
19
10
|
// Initialize with default configuration (uses GOOGLE_API_KEY environment variable)
|
|
20
|
-
const voice = new GoogleVoice()
|
|
11
|
+
const voice = new GoogleVoice()
|
|
21
12
|
|
|
22
13
|
// Text-to-Speech
|
|
23
|
-
const audioStream = await voice.speak(
|
|
24
|
-
languageCode:
|
|
14
|
+
const audioStream = await voice.speak('Hello, world!', {
|
|
15
|
+
languageCode: 'en-US',
|
|
25
16
|
audioConfig: {
|
|
26
|
-
audioEncoding:
|
|
17
|
+
audioEncoding: 'LINEAR16',
|
|
27
18
|
},
|
|
28
|
-
})
|
|
19
|
+
})
|
|
29
20
|
|
|
30
21
|
// Speech-to-Text
|
|
31
22
|
const transcript = await voice.listen(audioStream, {
|
|
32
23
|
config: {
|
|
33
|
-
encoding:
|
|
34
|
-
languageCode:
|
|
24
|
+
encoding: 'LINEAR16',
|
|
25
|
+
languageCode: 'en-US',
|
|
35
26
|
},
|
|
36
|
-
})
|
|
27
|
+
})
|
|
37
28
|
|
|
38
29
|
// Get available voices for a specific language
|
|
39
|
-
const voices = await voice.getSpeakers({ languageCode:
|
|
30
|
+
const voices = await voice.getSpeakers({ languageCode: 'en-US' })
|
|
40
31
|
```
|
|
41
32
|
|
|
42
|
-
## Constructor
|
|
33
|
+
## Constructor parameters
|
|
34
|
+
|
|
35
|
+
**speechModel** (`GoogleModelConfig`): Configuration for text-to-speech functionality (Default: `{ apiKey: process.env.GOOGLE_API_KEY }`)
|
|
36
|
+
|
|
37
|
+
**speechModel.apiKey** (`string`): Google Cloud API key. Falls back to GOOGLE\_API\_KEY environment variable. Not used when vertexAI is true.
|
|
38
|
+
|
|
39
|
+
**speechModel.keyFilename** (`string`): Path to service account JSON key file. Falls back to GOOGLE\_APPLICATION\_CREDENTIALS environment variable.
|
|
40
|
+
|
|
41
|
+
**speechModel.credentials** (`object`): In-memory service account credentials object with client\_email and private\_key properties.
|
|
42
|
+
|
|
43
|
+
**listeningModel** (`GoogleModelConfig`): Configuration for speech-to-text functionality (Default: `{ apiKey: process.env.GOOGLE_API_KEY }`)
|
|
43
44
|
|
|
44
|
-
|
|
45
|
+
**listeningModel.apiKey** (`string`): Google Cloud API key. Falls back to GOOGLE\_API\_KEY environment variable. Not used when vertexAI is true.
|
|
46
|
+
|
|
47
|
+
**listeningModel.keyFilename** (`string`): Path to service account JSON key file. Falls back to GOOGLE\_APPLICATION\_CREDENTIALS environment variable.
|
|
48
|
+
|
|
49
|
+
**listeningModel.credentials** (`object`): In-memory service account credentials object with client\_email and private\_key properties.
|
|
50
|
+
|
|
51
|
+
**speaker** (`string`): Default voice ID to use for text-to-speech (Default: `'en-US-Casual-K'`)
|
|
52
|
+
|
|
53
|
+
**vertexAI** (`boolean`): Enable Vertex AI mode for enterprise deployments. Uses project-based authentication instead of API keys. Requires 'project' to be set. (Default: `false`)
|
|
54
|
+
|
|
55
|
+
**project** (`string`): Google Cloud project ID (required when vertexAI is true). Falls back to GOOGLE\_CLOUD\_PROJECT environment variable.
|
|
56
|
+
|
|
57
|
+
**location** (`string`): Google Cloud region for Vertex AI. Falls back to GOOGLE\_CLOUD\_LOCATION environment variable. (Default: `'us-central1'`)
|
|
45
58
|
|
|
46
59
|
## Methods
|
|
47
60
|
|
|
48
|
-
### speak()
|
|
61
|
+
### `speak()`
|
|
49
62
|
|
|
50
63
|
Converts text to speech using Google Cloud Text-to-Speech service.
|
|
51
64
|
|
|
65
|
+
**input** (`string | NodeJS.ReadableStream`): Text to convert to speech. If a stream is provided, it will be converted to text first.
|
|
66
|
+
|
|
67
|
+
**options** (`object`): Speech synthesis options
|
|
68
|
+
|
|
69
|
+
**options.speaker** (`string`): Voice ID to use for this request
|
|
70
|
+
|
|
71
|
+
**options.languageCode** (`string`): Language code for the voice (e.g., 'en-US'). Defaults to the language code from the speaker ID or 'en-US'
|
|
72
|
+
|
|
73
|
+
**options.audioConfig** (`ISynthesizeSpeechRequest['audioConfig']`): Audio configuration options from Google Cloud Text-to-Speech API
|
|
74
|
+
|
|
52
75
|
Returns: `Promise<NodeJS.ReadableStream>`
|
|
53
76
|
|
|
54
|
-
### listen()
|
|
77
|
+
### `listen()`
|
|
55
78
|
|
|
56
79
|
Converts speech to text using Google Cloud Speech-to-Text service.
|
|
57
80
|
|
|
81
|
+
**audioStream** (`NodeJS.ReadableStream`): Audio stream to transcribe
|
|
82
|
+
|
|
83
|
+
**options** (`object`): Recognition options
|
|
84
|
+
|
|
85
|
+
**options.stream** (`boolean`): Whether to use streaming recognition
|
|
86
|
+
|
|
87
|
+
**options.config** (`IRecognitionConfig`): Recognition configuration from Google Cloud Speech-to-Text API
|
|
88
|
+
|
|
58
89
|
Returns: `Promise<string>`
|
|
59
90
|
|
|
60
|
-
### getSpeakers()
|
|
91
|
+
### `getSpeakers()`
|
|
61
92
|
|
|
62
93
|
Returns an array of available voice options, where each node contains:
|
|
63
94
|
|
|
64
|
-
|
|
95
|
+
**voiceId** (`string`): Unique identifier for the voice
|
|
96
|
+
|
|
97
|
+
**languageCodes** (`string[]`): List of language codes supported by this voice
|
|
98
|
+
|
|
99
|
+
### `isUsingVertexAI()`
|
|
65
100
|
|
|
66
101
|
Checks if Vertex AI mode is enabled.
|
|
67
102
|
|
|
68
103
|
Returns: `boolean` - `true` if using Vertex AI, `false` otherwise
|
|
69
104
|
|
|
70
|
-
### getProject()
|
|
105
|
+
### `getProject()`
|
|
71
106
|
|
|
72
107
|
Gets the configured Google Cloud project ID.
|
|
73
108
|
|
|
74
109
|
Returns: `string | undefined` - The project ID or `undefined` if not set
|
|
75
110
|
|
|
76
|
-
### getLocation()
|
|
111
|
+
### `getLocation()`
|
|
77
112
|
|
|
78
113
|
Gets the configured Google Cloud location/region.
|
|
79
114
|
|
|
@@ -85,18 +120,18 @@ The Google Voice provider supports two authentication methods:
|
|
|
85
120
|
|
|
86
121
|
### Standard Mode (API Key)
|
|
87
122
|
|
|
88
|
-
Uses a Google Cloud API key for authentication. Suitable for development and
|
|
123
|
+
Uses a Google Cloud API key for authentication. Suitable for development and basic use cases.
|
|
89
124
|
|
|
90
125
|
```typescript
|
|
91
126
|
// Using environment variable (GOOGLE_API_KEY)
|
|
92
|
-
const voice = new GoogleVoice()
|
|
127
|
+
const voice = new GoogleVoice()
|
|
93
128
|
|
|
94
129
|
// Using explicit API key
|
|
95
130
|
const voice = new GoogleVoice({
|
|
96
|
-
speechModel: { apiKey:
|
|
97
|
-
listeningModel: { apiKey:
|
|
98
|
-
speaker:
|
|
99
|
-
})
|
|
131
|
+
speechModel: { apiKey: 'your-api-key' },
|
|
132
|
+
listeningModel: { apiKey: 'your-api-key' },
|
|
133
|
+
speaker: 'en-US-Casual-K',
|
|
134
|
+
})
|
|
100
135
|
```
|
|
101
136
|
|
|
102
137
|
### Vertex AI Mode (Service Account)
|
|
@@ -104,6 +139,7 @@ const voice = new GoogleVoice({
|
|
|
104
139
|
Uses Google Cloud project-based authentication with service accounts. Recommended for production and enterprise deployments.
|
|
105
140
|
|
|
106
141
|
**Benefits:**
|
|
142
|
+
|
|
107
143
|
- Better security (no API keys in code)
|
|
108
144
|
- IAM-based access control
|
|
109
145
|
- Project-level billing and quotas
|
|
@@ -117,68 +153,80 @@ Uses Google Cloud project-based authentication with service accounts. Recommende
|
|
|
117
153
|
// Set GOOGLE_APPLICATION_CREDENTIALS and GOOGLE_CLOUD_PROJECT env vars
|
|
118
154
|
const voice = new GoogleVoice({
|
|
119
155
|
vertexAI: true,
|
|
120
|
-
project:
|
|
121
|
-
location:
|
|
122
|
-
})
|
|
156
|
+
project: 'your-gcp-project',
|
|
157
|
+
location: 'us-central1', // Optional, defaults to 'us-central1'
|
|
158
|
+
})
|
|
123
159
|
|
|
124
160
|
// Using service account key file
|
|
125
161
|
const voice = new GoogleVoice({
|
|
126
162
|
vertexAI: true,
|
|
127
|
-
project:
|
|
163
|
+
project: 'your-gcp-project',
|
|
128
164
|
speechModel: {
|
|
129
|
-
keyFilename:
|
|
165
|
+
keyFilename: '/path/to/service-account.json',
|
|
130
166
|
},
|
|
131
167
|
listeningModel: {
|
|
132
|
-
keyFilename:
|
|
168
|
+
keyFilename: '/path/to/service-account.json',
|
|
133
169
|
},
|
|
134
|
-
})
|
|
170
|
+
})
|
|
135
171
|
|
|
136
172
|
// Using in-memory credentials
|
|
137
173
|
const voice = new GoogleVoice({
|
|
138
174
|
vertexAI: true,
|
|
139
|
-
project:
|
|
175
|
+
project: 'your-gcp-project',
|
|
140
176
|
speechModel: {
|
|
141
177
|
credentials: {
|
|
142
|
-
client_email:
|
|
143
|
-
private_key:
|
|
178
|
+
client_email: 'service-account@project.iam.gserviceaccount.com',
|
|
179
|
+
private_key: '-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----',
|
|
144
180
|
},
|
|
145
181
|
},
|
|
146
|
-
})
|
|
182
|
+
})
|
|
147
183
|
```
|
|
148
184
|
|
|
149
|
-
|
|
185
|
+
#### Required Permissions
|
|
150
186
|
|
|
151
|
-
|
|
187
|
+
#### IAM Roles
|
|
152
188
|
|
|
153
189
|
For Text-to-Speech:
|
|
190
|
+
|
|
154
191
|
- `roles/texttospeech.admin` - Text-to-Speech Admin (full access)
|
|
155
192
|
- `roles/texttospeech.editor` - Text-to-Speech Editor (create and manage)
|
|
156
193
|
- `roles/texttospeech.viewer` - Text-to-Speech Viewer (read-only)
|
|
157
194
|
|
|
158
195
|
For Speech-to-Text:
|
|
196
|
+
|
|
159
197
|
- `roles/speech.client` - Speech-to-Text Client
|
|
160
198
|
|
|
161
|
-
|
|
199
|
+
#### OAuth Scopes
|
|
162
200
|
|
|
163
201
|
For synchronous Text-to-Speech synthesis:
|
|
202
|
+
|
|
164
203
|
- `https://www.googleapis.com/auth/cloud-platform` - Full access to Google Cloud Platform services
|
|
165
204
|
|
|
166
205
|
For long-audio Text-to-Speech operations:
|
|
206
|
+
|
|
167
207
|
- `locations.longAudioSynthesize` - Create long-audio synthesis operations
|
|
168
208
|
- `operations.get` - Get operation status
|
|
169
209
|
- `operations.list` - List operations
|
|
170
210
|
|
|
171
|
-
## Important
|
|
211
|
+
## Important notes
|
|
172
212
|
|
|
173
213
|
1. **Authentication**: Either a Google Cloud API key (standard mode) or service account credentials (Vertex AI mode) is required.
|
|
214
|
+
|
|
174
215
|
2. **Environment Variables**:
|
|
216
|
+
|
|
175
217
|
- `GOOGLE_API_KEY` - API key for standard mode
|
|
176
218
|
- `GOOGLE_CLOUD_PROJECT` - Project ID for Vertex AI mode
|
|
177
219
|
- `GOOGLE_CLOUD_LOCATION` - Location for Vertex AI mode (defaults to 'us-central1')
|
|
178
220
|
- `GOOGLE_APPLICATION_CREDENTIALS` - Path to service account key file
|
|
221
|
+
|
|
179
222
|
3. The default voice is set to `'en-US-Casual-K'`.
|
|
223
|
+
|
|
180
224
|
4. Both text-to-speech and speech-to-text services use LINEAR16 as the default audio encoding.
|
|
225
|
+
|
|
181
226
|
5. The `speak()` method supports advanced audio configuration through the Google Cloud Text-to-Speech API.
|
|
227
|
+
|
|
182
228
|
6. The `listen()` method supports various recognition configurations through the Google Cloud Speech-to-Text API.
|
|
229
|
+
|
|
183
230
|
7. Available voices can be filtered by language code using the `getSpeakers()` method.
|
|
231
|
+
|
|
184
232
|
8. Vertex AI mode provides enterprise features including IAM control, audit logs, and project-level billing.
|
package/dist/index.cjs
CHANGED
|
@@ -3,7 +3,267 @@
|
|
|
3
3
|
var stream = require('stream');
|
|
4
4
|
var speech = require('@google-cloud/speech');
|
|
5
5
|
var textToSpeech = require('@google-cloud/text-to-speech');
|
|
6
|
-
|
|
6
|
+
|
|
7
|
+
// src/index.ts
|
|
8
|
+
|
|
9
|
+
// ../../packages/_internal-core/dist/chunk-HDURQPU2.js
|
|
10
|
+
var RegisteredLogger = {
|
|
11
|
+
LLM: "LLM"};
|
|
12
|
+
var LogLevel = {
|
|
13
|
+
DEBUG: "debug",
|
|
14
|
+
INFO: "info",
|
|
15
|
+
WARN: "warn",
|
|
16
|
+
ERROR: "error"};
|
|
17
|
+
var MastraLogger = class {
|
|
18
|
+
name;
|
|
19
|
+
level;
|
|
20
|
+
transports;
|
|
21
|
+
constructor(options = {}) {
|
|
22
|
+
this.name = options.name || "Mastra";
|
|
23
|
+
this.level = options.level || LogLevel.ERROR;
|
|
24
|
+
this.transports = new Map(Object.entries(options.transports || {}));
|
|
25
|
+
}
|
|
26
|
+
getTransports() {
|
|
27
|
+
return this.transports;
|
|
28
|
+
}
|
|
29
|
+
trackException(_error, _metadata) {
|
|
30
|
+
}
|
|
31
|
+
async listLogs(transportId, params) {
|
|
32
|
+
if (!transportId || !this.transports.has(transportId)) {
|
|
33
|
+
return { logs: [], total: 0, page: params?.page ?? 1, perPage: params?.perPage ?? 100, hasMore: false };
|
|
34
|
+
}
|
|
35
|
+
return this.transports.get(transportId).listLogs?.(params) ?? {
|
|
36
|
+
logs: [],
|
|
37
|
+
total: 0,
|
|
38
|
+
page: params?.page ?? 1,
|
|
39
|
+
perPage: params?.perPage ?? 100,
|
|
40
|
+
hasMore: false
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
async listLogsByRunId({
|
|
44
|
+
transportId,
|
|
45
|
+
runId,
|
|
46
|
+
fromDate,
|
|
47
|
+
toDate,
|
|
48
|
+
logLevel,
|
|
49
|
+
filters,
|
|
50
|
+
page,
|
|
51
|
+
perPage
|
|
52
|
+
}) {
|
|
53
|
+
if (!transportId || !this.transports.has(transportId) || !runId) {
|
|
54
|
+
return { logs: [], total: 0, page: page ?? 1, perPage: perPage ?? 100, hasMore: false };
|
|
55
|
+
}
|
|
56
|
+
return this.transports.get(transportId).listLogsByRunId?.({ runId, fromDate, toDate, logLevel, filters, page, perPage }) ?? {
|
|
57
|
+
logs: [],
|
|
58
|
+
total: 0,
|
|
59
|
+
page: page ?? 1,
|
|
60
|
+
perPage: perPage ?? 100,
|
|
61
|
+
hasMore: false
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
var ConsoleLogger = class _ConsoleLogger extends MastraLogger {
|
|
66
|
+
component;
|
|
67
|
+
filter;
|
|
68
|
+
constructor(options = {}) {
|
|
69
|
+
super(options);
|
|
70
|
+
this.component = options.component;
|
|
71
|
+
this.filter = options.filter;
|
|
72
|
+
}
|
|
73
|
+
child(componentOrBindings) {
|
|
74
|
+
const component = typeof componentOrBindings === "string" ? componentOrBindings : componentOrBindings?.component ?? this.component;
|
|
75
|
+
return new _ConsoleLogger({
|
|
76
|
+
name: this.name,
|
|
77
|
+
level: this.level,
|
|
78
|
+
component,
|
|
79
|
+
filter: this.filter
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
shouldLog(level, message, args) {
|
|
83
|
+
if (!this.filter) return true;
|
|
84
|
+
try {
|
|
85
|
+
return this.filter({ component: this.component, level, message, args });
|
|
86
|
+
} catch (e) {
|
|
87
|
+
console.error(`[Logger] Filter error for component=${this.component} level=${level}:`, e);
|
|
88
|
+
return true;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
prefix() {
|
|
92
|
+
return this.component ? `[${this.component}] ` : "";
|
|
93
|
+
}
|
|
94
|
+
debug(message, ...args) {
|
|
95
|
+
if (this.level === LogLevel.DEBUG && this.shouldLog(LogLevel.DEBUG, message, args)) {
|
|
96
|
+
console.info(`${this.prefix()}${message}`, ...args);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
info(message, ...args) {
|
|
100
|
+
if ((this.level === LogLevel.INFO || this.level === LogLevel.DEBUG) && this.shouldLog(LogLevel.INFO, message, args)) {
|
|
101
|
+
console.info(`${this.prefix()}${message}`, ...args);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
warn(message, ...args) {
|
|
105
|
+
if ((this.level === LogLevel.WARN || this.level === LogLevel.INFO || this.level === LogLevel.DEBUG) && this.shouldLog(LogLevel.WARN, message, args)) {
|
|
106
|
+
console.info(`${this.prefix()}${message}`, ...args);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
error(message, ...args) {
|
|
110
|
+
if ((this.level === LogLevel.ERROR || this.level === LogLevel.WARN || this.level === LogLevel.INFO || this.level === LogLevel.DEBUG) && this.shouldLog(LogLevel.ERROR, message, args)) {
|
|
111
|
+
console.error(`${this.prefix()}${message}`, ...args);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
async listLogs(_transportId, _params) {
|
|
115
|
+
return { logs: [], total: 0, page: _params?.page ?? 1, perPage: _params?.perPage ?? 100, hasMore: false };
|
|
116
|
+
}
|
|
117
|
+
async listLogsByRunId(_args) {
|
|
118
|
+
return { logs: [], total: 0, page: _args.page ?? 1, perPage: _args.perPage ?? 100, hasMore: false };
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
// ../../packages/_internal-core/dist/base/index.js
|
|
123
|
+
var MastraBase = class {
|
|
124
|
+
component = RegisteredLogger.LLM;
|
|
125
|
+
logger;
|
|
126
|
+
name;
|
|
127
|
+
#rawConfig;
|
|
128
|
+
constructor({
|
|
129
|
+
component,
|
|
130
|
+
name,
|
|
131
|
+
rawConfig
|
|
132
|
+
}) {
|
|
133
|
+
this.component = component || RegisteredLogger.LLM;
|
|
134
|
+
this.name = name;
|
|
135
|
+
this.#rawConfig = rawConfig;
|
|
136
|
+
this.logger = new ConsoleLogger({ name: `${this.component} - ${this.name}` });
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Returns the raw storage configuration this primitive was created from,
|
|
140
|
+
* or undefined if it was created from code.
|
|
141
|
+
*/
|
|
142
|
+
toRawConfig() {
|
|
143
|
+
return this.#rawConfig;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Sets the raw storage configuration for this primitive.
|
|
147
|
+
* @internal
|
|
148
|
+
*/
|
|
149
|
+
__setRawConfig(rawConfig) {
|
|
150
|
+
this.#rawConfig = rawConfig;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Set the logger for the agent
|
|
154
|
+
* @param logger
|
|
155
|
+
*/
|
|
156
|
+
__setLogger(logger) {
|
|
157
|
+
this.logger = "child" in logger && typeof logger.child === "function" ? logger.child({ component: this.component }) : logger;
|
|
158
|
+
}
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
// ../../packages/_internals/voice/dist/chunk-NWNKSBZV.js
|
|
162
|
+
var MastraVoice = class extends MastraBase {
|
|
163
|
+
listeningModel;
|
|
164
|
+
speechModel;
|
|
165
|
+
speaker;
|
|
166
|
+
realtimeConfig;
|
|
167
|
+
constructor({ listeningModel, speechModel, speaker, realtimeConfig, name } = {}) {
|
|
168
|
+
super({
|
|
169
|
+
component: "VOICE",
|
|
170
|
+
name
|
|
171
|
+
});
|
|
172
|
+
this.listeningModel = listeningModel;
|
|
173
|
+
this.speechModel = speechModel;
|
|
174
|
+
this.speaker = speaker;
|
|
175
|
+
this.realtimeConfig = realtimeConfig;
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Custom serialization for tracing/observability spans.
|
|
179
|
+
* Excludes `apiKey` from listeningModel / speechModel / realtimeConfig
|
|
180
|
+
* and any provider-specific state held by subclasses. Subclasses that
|
|
181
|
+
* need to expose additional non-sensitive fields can override.
|
|
182
|
+
*/
|
|
183
|
+
serializeForSpan() {
|
|
184
|
+
return {
|
|
185
|
+
component: "VOICE",
|
|
186
|
+
name: this.name,
|
|
187
|
+
speaker: this.speaker,
|
|
188
|
+
listeningModel: this.listeningModel ? { name: this.listeningModel.name } : void 0,
|
|
189
|
+
speechModel: this.speechModel ? { name: this.speechModel.name } : void 0,
|
|
190
|
+
realtimeModel: this.realtimeConfig?.model
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
updateConfig(_options) {
|
|
194
|
+
this.logger.debug("updateConfig not implemented by this voice provider");
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Initializes a WebSocket or WebRTC connection for real-time communication
|
|
198
|
+
* @returns Promise that resolves when the connection is established
|
|
199
|
+
*/
|
|
200
|
+
async connect(_options) {
|
|
201
|
+
this.logger.debug("connect not implemented by this voice provider");
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Relay audio data to the voice provider for real-time processing
|
|
205
|
+
* @param audioData Audio data to relay
|
|
206
|
+
*/
|
|
207
|
+
async send(_audioData) {
|
|
208
|
+
this.logger.debug("relay not implemented by this voice provider");
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Trigger voice providers to respond
|
|
212
|
+
*/
|
|
213
|
+
async answer(_options) {
|
|
214
|
+
this.logger.debug("answer not implemented by this voice provider");
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Equip the voice provider with instructions
|
|
218
|
+
* @param instructions Instructions to add
|
|
219
|
+
*/
|
|
220
|
+
addInstructions(_instructions) {
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Equip the voice provider with tools
|
|
224
|
+
* @param tools Array of tools to add
|
|
225
|
+
*/
|
|
226
|
+
addTools(_tools) {
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Disconnect from the WebSocket or WebRTC connection
|
|
230
|
+
*/
|
|
231
|
+
close() {
|
|
232
|
+
this.logger.debug("close not implemented by this voice provider");
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Register an event listener
|
|
236
|
+
* @param event Event name (e.g., 'speaking', 'writing', 'error')
|
|
237
|
+
* @param callback Callback function that receives event data
|
|
238
|
+
*/
|
|
239
|
+
on(_event, _callback) {
|
|
240
|
+
this.logger.debug("on not implemented by this voice provider");
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Remove an event listener
|
|
244
|
+
* @param event Event name (e.g., 'speaking', 'writing', 'error')
|
|
245
|
+
* @param callback Callback function to remove
|
|
246
|
+
*/
|
|
247
|
+
off(_event, _callback) {
|
|
248
|
+
this.logger.debug("off not implemented by this voice provider");
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Get available speakers/voices
|
|
252
|
+
* @returns Array of available voice IDs and their metadata
|
|
253
|
+
*/
|
|
254
|
+
getSpeakers() {
|
|
255
|
+
this.logger.debug("getSpeakers not implemented by this voice provider");
|
|
256
|
+
return Promise.resolve([]);
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Get available speakers/voices
|
|
260
|
+
* @returns Array of available voice IDs and their metadata
|
|
261
|
+
*/
|
|
262
|
+
getListener() {
|
|
263
|
+
this.logger.debug("getListener not implemented by this voice provider");
|
|
264
|
+
return Promise.resolve({ enabled: false });
|
|
265
|
+
}
|
|
266
|
+
};
|
|
7
267
|
|
|
8
268
|
// src/index.ts
|
|
9
269
|
var resolveAuthConfig = (modelConfig, fallback, vertexConfig) => {
|
|
@@ -45,7 +305,7 @@ var buildAuthOptions = (config, vertexConfig) => {
|
|
|
45
305
|
return options;
|
|
46
306
|
};
|
|
47
307
|
var DEFAULT_VOICE = "en-US-Casual-K";
|
|
48
|
-
var GoogleVoice = class extends
|
|
308
|
+
var GoogleVoice = class extends MastraVoice {
|
|
49
309
|
ttsClient;
|
|
50
310
|
speechClient;
|
|
51
311
|
vertexAI;
|