@pie-players/tts-server-google 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,532 @@
1
+ # Google Cloud TTS Integration Guide
2
+
3
+ This guide shows how to integrate the Google Cloud Text-to-Speech provider into your server-side application.
4
+
5
+ ## Prerequisites
6
+
7
+ 1. **Google Cloud Project**: Create a project at [console.cloud.google.com](https://console.cloud.google.com)
8
+ 2. **Enable Text-to-Speech API**: Go to APIs & Services → Enable "Cloud Text-to-Speech API"
9
+ 3. **Authentication**: Set up one of the authentication methods below
10
+
11
+ ## Authentication Setup
12
+
13
+ ### Option 1: Service Account (Recommended for Production)
14
+
15
+ 1. Go to IAM & Admin → Service Accounts
16
+ 2. Create a service account
17
+ 3. Grant the role: "Cloud Text-to-Speech User"
18
+ 4. Create and download a JSON key file
19
+
20
+ ```typescript
21
+ import { GoogleCloudTTSProvider } from '@pie-players/tts-server-google';
22
+
23
+ const provider = new GoogleCloudTTSProvider();
24
+
25
+ await provider.initialize({
26
+ projectId: 'your-project-id',
27
+ credentials: './config/service-account.json',
28
+ voiceType: 'wavenet',
29
+ });
30
+ ```
31
+
32
+ ### Option 2: API Key (Simple but Less Secure)
33
+
34
+ 1. Go to APIs & Services → Credentials
35
+ 2. Create credentials → API Key
36
+ 3. Restrict the key to "Cloud Text-to-Speech API"
37
+
38
+ ```typescript
39
+ await provider.initialize({
40
+ projectId: 'your-project-id',
41
+ credentials: {
42
+ apiKey: process.env.GOOGLE_TTS_API_KEY!,
43
+ },
44
+ });
45
+ ```
46
+
47
+ ### Option 3: Application Default Credentials (Local Development)
48
+
49
+ 1. Install Google Cloud SDK
50
+ 2. Run: `gcloud auth application-default login`
51
+
52
+ ```typescript
53
+ await provider.initialize({
54
+ projectId: 'your-project-id',
55
+ // No credentials needed - uses ADC
56
+ });
57
+ ```
58
+
59
+ ## SvelteKit Integration
60
+
61
+ ### 1. Install Dependencies
62
+
63
+ ```bash
64
+ npm install @pie-players/tts-server-google
65
+ ```
66
+
67
+ ### 2. Create TTS API Route
68
+
69
+ **File: `src/routes/api/tts/synthesize/+server.ts`**
70
+
71
+ ```typescript
72
+ import { GoogleCloudTTSProvider } from '@pie-players/tts-server-google';
73
+ import { json } from '@sveltejs/kit';
74
+ import type { RequestHandler } from './$types';
75
+
76
+ // Initialize provider once (singleton pattern)
77
+ let ttsProvider: GoogleCloudTTSProvider | null = null;
78
+
79
+ async function getTTSProvider() {
80
+ if (!ttsProvider) {
81
+ ttsProvider = new GoogleCloudTTSProvider();
82
+ await ttsProvider.initialize({
83
+ projectId: process.env.GOOGLE_CLOUD_PROJECT!,
84
+ credentials: process.env.GOOGLE_APPLICATION_CREDENTIALS,
85
+ voiceType: 'wavenet',
86
+ defaultVoice: 'en-US-Wavenet-A',
87
+ });
88
+ }
89
+ return ttsProvider;
90
+ }
91
+
92
+ export const POST: RequestHandler = async ({ request }) => {
93
+ try {
94
+ const { text, voice, includeSpeechMarks } = await request.json();
95
+
96
+ const provider = await getTTSProvider();
97
+
98
+ const result = await provider.synthesize({
99
+ text,
100
+ voice,
101
+ includeSpeechMarks: includeSpeechMarks ?? true,
102
+ });
103
+
104
+ // Convert audio buffer to base64 for JSON response
105
+ const audioBase64 = result.audio.toString('base64');
106
+
107
+ return json({
108
+ audio: audioBase64,
109
+ contentType: result.contentType,
110
+ speechMarks: result.speechMarks,
111
+ metadata: result.metadata,
112
+ });
113
+ } catch (error) {
114
+ console.error('TTS synthesis failed:', error);
115
+ return json(
116
+ { error: error instanceof Error ? error.message : 'TTS synthesis failed' },
117
+ { status: 500 }
118
+ );
119
+ }
120
+ };
121
+ ```
122
+
123
+ ### 3. Create Voices API Route
124
+
125
+ **File: `src/routes/api/tts/voices/+server.ts`**
126
+
127
+ ```typescript
128
+ import { GoogleCloudTTSProvider } from '@pie-players/tts-server-google';
129
+ import { json } from '@sveltejs/kit';
130
+ import type { RequestHandler } from './$types';
131
+
132
+ let ttsProvider: GoogleCloudTTSProvider | null = null;
133
+
134
+ async function getTTSProvider() {
135
+ if (!ttsProvider) {
136
+ ttsProvider = new GoogleCloudTTSProvider();
137
+ await ttsProvider.initialize({
138
+ projectId: process.env.GOOGLE_CLOUD_PROJECT!,
139
+ credentials: process.env.GOOGLE_APPLICATION_CREDENTIALS,
140
+ voiceType: 'wavenet',
141
+ });
142
+ }
143
+ return ttsProvider;
144
+ }
145
+
146
+ export const GET: RequestHandler = async ({ url }) => {
147
+ try {
148
+ const language = url.searchParams.get('language') || undefined;
149
+ const gender = url.searchParams.get('gender') as 'male' | 'female' | 'neutral' | undefined;
150
+
151
+ const provider = await getTTSProvider();
152
+
153
+ const voices = await provider.getVoices({ language, gender });
154
+
155
+ return json({ voices });
156
+ } catch (error) {
157
+ console.error('Failed to fetch voices:', error);
158
+ return json(
159
+ { error: error instanceof Error ? error.message : 'Failed to fetch voices' },
160
+ { status: 500 }
161
+ );
162
+ }
163
+ };
164
+ ```
165
+
166
+ ### 4. Environment Variables
167
+
168
+ **File: `.env`**
169
+
170
+ ```bash
171
+ # Google Cloud Configuration
172
+ GOOGLE_CLOUD_PROJECT=your-project-id
173
+ GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
174
+
175
+ # Or use API key
176
+ # GOOGLE_TTS_API_KEY=AIza...
177
+ ```
178
+
179
+ ### 5. Client-Side Usage
180
+
181
+ **File: `src/lib/tts-client.ts`**
182
+
183
+ ```typescript
184
+ export interface TTSResult {
185
+ audio: string; // Base64 encoded
186
+ contentType: string;
187
+ speechMarks: Array<{
188
+ time: number;
189
+ type: string;
190
+ start: number;
191
+ end: number;
192
+ value: string;
193
+ }>;
194
+ metadata: {
195
+ providerId: string;
196
+ voice: string;
197
+ duration: number;
198
+ };
199
+ }
200
+
201
+ export async function synthesizeSpeech(
202
+ text: string,
203
+ voice?: string
204
+ ): Promise<TTSResult> {
205
+ const response = await fetch('/api/tts/synthesize', {
206
+ method: 'POST',
207
+ headers: { 'Content-Type': 'application/json' },
208
+ body: JSON.stringify({ text, voice, includeSpeechMarks: true }),
209
+ });
210
+
211
+ if (!response.ok) {
212
+ throw new Error(`TTS failed: ${response.statusText}`);
213
+ }
214
+
215
+ return response.json();
216
+ }
217
+
218
+ export async function playAudio(result: TTSResult): Promise<void> {
219
+ // Convert base64 to blob
220
+ const audioData = atob(result.audio);
221
+ const audioArray = new Uint8Array(audioData.length);
222
+ for (let i = 0; i < audioData.length; i++) {
223
+ audioArray[i] = audioData.charCodeAt(i);
224
+ }
225
+ const blob = new Blob([audioArray], { type: result.contentType });
226
+
227
+ // Play audio
228
+ const url = URL.createObjectURL(blob);
229
+ const audio = new Audio(url);
230
+ await audio.play();
231
+
232
+ // Clean up
233
+ audio.onended = () => URL.revokeObjectURL(url);
234
+ }
235
+
236
+ export async function getVoices(language?: string) {
237
+ const params = new URLSearchParams();
238
+ if (language) params.set('language', language);
239
+
240
+ const response = await fetch(`/api/tts/voices?${params}`);
241
+ const { voices } = await response.json();
242
+ return voices;
243
+ }
244
+ ```
245
+
246
+ **File: `src/routes/+page.svelte`**
247
+
248
+ ```svelte
249
+ <script lang="ts">
250
+ import { synthesizeSpeech, playAudio, getVoices } from '$lib/tts-client';
251
+ import { onMount } from 'svelte';
252
+
253
+ let text = 'Hello world, this is Google Cloud Text to Speech!';
254
+ let voice = 'en-US-Wavenet-A';
255
+ let voices: any[] = [];
256
+ let speechMarks: any[] = [];
257
+ let isPlaying = false;
258
+
259
+ onMount(async () => {
260
+ voices = await getVoices('en-US');
261
+ });
262
+
263
+ async function handleSpeak() {
264
+ try {
265
+ isPlaying = true;
266
+ const result = await synthesizeSpeech(text, voice);
267
+ speechMarks = result.speechMarks;
268
+ await playAudio(result);
269
+ } catch (error) {
270
+ console.error('Speech failed:', error);
271
+ alert('Speech synthesis failed');
272
+ } finally {
273
+ isPlaying = false;
274
+ }
275
+ }
276
+ </script>
277
+
278
+ <div class="container">
279
+ <h1>Google Cloud TTS Demo</h1>
280
+
281
+ <div class="controls">
282
+ <label>
283
+ Text to speak:
284
+ <textarea bind:value={text} rows="4"></textarea>
285
+ </label>
286
+
287
+ <label>
288
+ Voice:
289
+ <select bind:value={voice}>
290
+ {#each voices as v}
291
+ <option value={v.id}>{v.name} ({v.gender})</option>
292
+ {/each}
293
+ </select>
294
+ </label>
295
+
296
+ <button on:click={handleSpeak} disabled={isPlaying}>
297
+ {isPlaying ? 'Speaking...' : 'Speak'}
298
+ </button>
299
+ </div>
300
+
301
+ {#if speechMarks.length > 0}
302
+ <div class="speech-marks">
303
+ <h2>Speech Marks</h2>
304
+ <ul>
305
+ {#each speechMarks as mark}
306
+ <li>
307
+ {mark.value} ({mark.time}ms)
308
+ </li>
309
+ {/each}
310
+ </ul>
311
+ </div>
312
+ {/if}
313
+ </div>
314
+
315
+ <style>
316
+ .container {
317
+ max-width: 800px;
318
+ margin: 2rem auto;
319
+ padding: 2rem;
320
+ }
321
+
322
+ .controls {
323
+ display: flex;
324
+ flex-direction: column;
325
+ gap: 1rem;
326
+ }
327
+
328
+ textarea {
329
+ width: 100%;
330
+ padding: 0.5rem;
331
+ font-family: inherit;
332
+ }
333
+
334
+ select {
335
+ width: 100%;
336
+ padding: 0.5rem;
337
+ }
338
+
339
+ button {
340
+ padding: 0.75rem 1.5rem;
341
+ background: #4285f4;
342
+ color: white;
343
+ border: none;
344
+ border-radius: 4px;
345
+ cursor: pointer;
346
+ font-size: 1rem;
347
+ }
348
+
349
+ button:disabled {
350
+ background: #ccc;
351
+ cursor: not-allowed;
352
+ }
353
+
354
+ .speech-marks {
355
+ margin-top: 2rem;
356
+ padding: 1rem;
357
+ background: #f5f5f5;
358
+ border-radius: 4px;
359
+ }
360
+
361
+ .speech-marks ul {
362
+ list-style: none;
363
+ padding: 0;
364
+ }
365
+
366
+ .speech-marks li {
367
+ padding: 0.25rem;
368
+ font-family: monospace;
369
+ }
370
+ </style>
371
+ ```
372
+
373
+ ## Express.js Integration
374
+
375
+ ```typescript
376
+ import express from 'express';
377
+ import { GoogleCloudTTSProvider } from '@pie-players/tts-server-google';
378
+
379
+ const app = express();
380
+ app.use(express.json());
381
+
382
+ // Initialize provider
383
+ const ttsProvider = new GoogleCloudTTSProvider();
384
+ await ttsProvider.initialize({
385
+ projectId: process.env.GOOGLE_CLOUD_PROJECT!,
386
+ credentials: process.env.GOOGLE_APPLICATION_CREDENTIALS,
387
+ voiceType: 'wavenet',
388
+ });
389
+
390
+ // Synthesize endpoint
391
+ app.post('/api/tts/synthesize', async (req, res) => {
392
+ try {
393
+ const { text, voice, includeSpeechMarks } = req.body;
394
+
395
+ const result = await ttsProvider.synthesize({
396
+ text,
397
+ voice,
398
+ includeSpeechMarks: includeSpeechMarks ?? true,
399
+ });
400
+
401
+ // Return audio as buffer
402
+ res.json({
403
+ audio: result.audio.toString('base64'),
404
+ contentType: result.contentType,
405
+ speechMarks: result.speechMarks,
406
+ metadata: result.metadata,
407
+ });
408
+ } catch (error) {
409
+ console.error('TTS synthesis failed:', error);
410
+ res.status(500).json({ error: 'TTS synthesis failed' });
411
+ }
412
+ });
413
+
414
+ // Voices endpoint
415
+ app.get('/api/tts/voices', async (req, res) => {
416
+ try {
417
+ const { language, gender } = req.query;
418
+
419
+ const voices = await ttsProvider.getVoices({
420
+ language: language as string,
421
+ gender: gender as 'male' | 'female' | 'neutral',
422
+ });
423
+
424
+ res.json({ voices });
425
+ } catch (error) {
426
+ console.error('Failed to fetch voices:', error);
427
+ res.status(500).json({ error: 'Failed to fetch voices' });
428
+ }
429
+ });
430
+
431
+ app.listen(3000, () => {
432
+ console.log('Server running on http://localhost:3000');
433
+ });
434
+ ```
435
+
436
+ ## Next.js App Router Integration
437
+
438
+ **File: `app/api/tts/synthesize/route.ts`**
439
+
440
+ ```typescript
441
+ import { GoogleCloudTTSProvider } from '@pie-players/tts-server-google';
442
+ import { NextResponse } from 'next/server';
443
+
444
+ let ttsProvider: GoogleCloudTTSProvider | null = null;
445
+
446
+ async function getTTSProvider() {
447
+ if (!ttsProvider) {
448
+ ttsProvider = new GoogleCloudTTSProvider();
449
+ await ttsProvider.initialize({
450
+ projectId: process.env.GOOGLE_CLOUD_PROJECT!,
451
+ credentials: process.env.GOOGLE_APPLICATION_CREDENTIALS,
452
+ voiceType: 'wavenet',
453
+ });
454
+ }
455
+ return ttsProvider;
456
+ }
457
+
458
+ export async function POST(request: Request) {
459
+ try {
460
+ const { text, voice, includeSpeechMarks } = await request.json();
461
+
462
+ const provider = await getTTSProvider();
463
+
464
+ const result = await provider.synthesize({
465
+ text,
466
+ voice,
467
+ includeSpeechMarks: includeSpeechMarks ?? true,
468
+ });
469
+
470
+ return NextResponse.json({
471
+ audio: result.audio.toString('base64'),
472
+ contentType: result.contentType,
473
+ speechMarks: result.speechMarks,
474
+ metadata: result.metadata,
475
+ });
476
+ } catch (error) {
477
+ console.error('TTS synthesis failed:', error);
478
+ return NextResponse.json(
479
+ { error: 'TTS synthesis failed' },
480
+ { status: 500 }
481
+ );
482
+ }
483
+ }
484
+ ```
485
+
486
+ ## Security Best Practices
487
+
488
+ 1. **Never expose API keys in client code** - Always use server-side endpoints
489
+ 2. **Restrict API keys** - Limit to specific APIs and IP addresses
490
+ 3. **Use service accounts in production** - More secure than API keys
491
+ 4. **Rate limiting** - Implement rate limiting to prevent abuse
492
+ 5. **Cache results** - Cache TTS output to reduce API calls and costs
493
+ 6. **Validate input** - Sanitize and validate user input before synthesis
494
+
495
+ ## Cost Optimization
496
+
497
+ 1. **Cache frequently used phrases** - Store audio for common text
498
+ 2. **Use standard voices when possible** - $4/1M vs $16/1M for neural
499
+ 3. **Batch requests** - Group multiple synthesis requests when feasible
500
+ 4. **Monitor usage** - Set up billing alerts in Google Cloud Console
501
+
502
+ ## Troubleshooting
503
+
504
+ ### Authentication Errors
505
+
506
+ ```
507
+ Error: Google Cloud authentication failed
508
+ ```
509
+
510
+ **Solution**: Verify your credentials are correct and the service account has the "Cloud Text-to-Speech User" role.
511
+
512
+ ### Rate Limit Exceeded
513
+
514
+ ```
515
+ Error: Google Cloud rate limit exceeded
516
+ ```
517
+
518
+ **Solution**: Implement exponential backoff and request rate limiting. Consider increasing your quota in Google Cloud Console.
519
+
520
+ ### No Audio Content
521
+
522
+ ```
523
+ Error: No audio content received from Google Cloud TTS
524
+ ```
525
+
526
+ **Solution**: Check that your project has the Text-to-Speech API enabled and your billing is active.
527
+
528
+ ## Support
529
+
530
+ For issues specific to this package, please file an issue on GitHub.
531
+
532
+ For Google Cloud TTS API issues, see the [official documentation](https://cloud.google.com/text-to-speech/docs).
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "@pie-players/tts-server-google",
3
+ "version": "0.1.0",
4
+ "description": "Google Cloud Text-to-Speech provider for server-side TTS with speech marks support",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js"
12
+ }
13
+ },
14
+ "scripts": {
15
+ "build": "tsc",
16
+ "dev": "tsc --watch",
17
+ "test": "vitest",
18
+ "test:coverage": "vitest --coverage"
19
+ },
20
+ "keywords": [
21
+ "tts",
22
+ "text-to-speech",
23
+ "google-cloud",
24
+ "wavenet",
25
+ "speech-marks",
26
+ "server-side"
27
+ ],
28
+ "author": "PIE Framework",
29
+ "license": "MIT",
30
+ "dependencies": {
31
+ "@google-cloud/text-to-speech": "^5.0.0",
32
+ "@pie-players/tts-server-core": "workspace:*"
33
+ },
34
+ "devDependencies": {
35
+ "typescript": "^5.3.3",
36
+ "vitest": "^1.0.4"
37
+ }
38
+ }