@pie-players/tts-server-polly 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -0
- package/README.md +181 -0
- package/dist/PollyServerProvider.d.ts +106 -0
- package/dist/PollyServerProvider.d.ts.map +1 -0
- package/dist/PollyServerProvider.js +284 -0
- package/dist/PollyServerProvider.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/examples/INTEGRATION-GUIDE.md +603 -0
- package/examples/sveltekit/synthesize-server.ts +110 -0
- package/examples/sveltekit/voices-server.ts +65 -0
- package/package.json +37 -0
- package/src/PollyServerProvider.ts +426 -0
- package/src/index.ts +7 -0
- package/tsconfig.json +9 -0
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
# TTS Server API Integration Guide
|
|
2
|
+
|
|
3
|
+
This guide shows how to integrate the server-side TTS with speech marks into your SvelteKit application.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The integration has three parts:
|
|
8
|
+
|
|
9
|
+
1. **Server-side packages** - Handle AWS Polly API calls
|
|
10
|
+
2. **SvelteKit API routes** - Expose TTS endpoints
|
|
11
|
+
3. **Client-side provider** - Call API from browser
|
|
12
|
+
|
|
13
|
+
## Architecture
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
Browser (Client)
|
|
17
|
+
↓
|
|
18
|
+
ServerTTSProvider (@pie-players/tts-client-server)
|
|
19
|
+
↓ HTTP POST
|
|
20
|
+
SvelteKit API Route (/api/tts/synthesize/+server.ts)
|
|
21
|
+
↓
|
|
22
|
+
PollyServerProvider (@pie-players/tts-server-polly)
|
|
23
|
+
↓
|
|
24
|
+
AWS Polly API (audio + speech marks)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Step 1: Install Packages
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
cd your-sveltekit-app
|
|
31
|
+
|
|
32
|
+
# Install server-side packages
|
|
33
|
+
bun add @pie-players/tts-server-core
|
|
34
|
+
bun add @pie-players/tts-server-polly
|
|
35
|
+
|
|
36
|
+
# Install client-side provider
|
|
37
|
+
bun add @pie-players/tts-client-server
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Step 2: Configure Environment Variables
|
|
41
|
+
|
|
42
|
+
Create or update `.env`:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
# AWS Polly credentials
|
|
46
|
+
AWS_REGION=us-east-1
|
|
47
|
+
AWS_ACCESS_KEY_ID=your_access_key_id
|
|
48
|
+
AWS_SECRET_ACCESS_KEY=your_secret_access_key
|
|
49
|
+
|
|
50
|
+
# Optional: Redis for caching
|
|
51
|
+
REDIS_URL=redis://localhost:6379
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Important:** Never commit `.env` to git. Add to `.gitignore`:
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
.env
|
|
58
|
+
.env.local
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Step 3: Create SvelteKit API Routes
|
|
62
|
+
|
|
63
|
+
### Create Directory Structure
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
mkdir -p src/routes/api/tts/synthesize
|
|
67
|
+
mkdir -p src/routes/api/tts/voices
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Synthesize Endpoint
|
|
71
|
+
|
|
72
|
+
Copy the example to: **`src/routes/api/tts/synthesize/+server.ts`**
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
import { json, error } from '@sveltejs/kit';
|
|
76
|
+
import type { RequestHandler } from './$types';
|
|
77
|
+
import { PollyServerProvider } from '@pie-players/tts-server-polly';
|
|
78
|
+
|
|
79
|
+
// Singleton provider instance
|
|
80
|
+
let pollyProvider: PollyServerProvider | null = null;
|
|
81
|
+
|
|
82
|
+
async function getPollyProvider(): Promise<PollyServerProvider> {
|
|
83
|
+
if (!pollyProvider) {
|
|
84
|
+
pollyProvider = new PollyServerProvider();
|
|
85
|
+
await pollyProvider.initialize({
|
|
86
|
+
region: process.env.AWS_REGION || 'us-east-1',
|
|
87
|
+
credentials: {
|
|
88
|
+
accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
|
|
89
|
+
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
|
|
90
|
+
},
|
|
91
|
+
engine: 'neural',
|
|
92
|
+
defaultVoice: 'Joanna',
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
return pollyProvider;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export const POST: RequestHandler = async ({ request }) => {
|
|
99
|
+
try {
|
|
100
|
+
const body = await request.json();
|
|
101
|
+
const { text, voice, language, rate, includeSpeechMarks = true } = body;
|
|
102
|
+
|
|
103
|
+
if (!text || typeof text !== 'string') {
|
|
104
|
+
throw error(400, { message: 'Text is required' });
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (text.length > 3000) {
|
|
108
|
+
throw error(400, { message: 'Text too long (max 3000 characters)' });
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const polly = await getPollyProvider();
|
|
112
|
+
const result = await polly.synthesize({
|
|
113
|
+
text,
|
|
114
|
+
voice: voice || 'Joanna',
|
|
115
|
+
language: language || 'en-US',
|
|
116
|
+
rate,
|
|
117
|
+
includeSpeechMarks,
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
return json({
|
|
121
|
+
audio: result.audio instanceof Buffer ? result.audio.toString('base64') : result.audio,
|
|
122
|
+
contentType: result.contentType,
|
|
123
|
+
speechMarks: result.speechMarks,
|
|
124
|
+
metadata: result.metadata,
|
|
125
|
+
});
|
|
126
|
+
} catch (err) {
|
|
127
|
+
console.error('[TTS API] Error:', err);
|
|
128
|
+
throw error(500, { message: err instanceof Error ? err.message : 'Synthesis failed' });
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Voices Endpoint
|
|
134
|
+
|
|
135
|
+
Copy the example to: **`src/routes/api/tts/voices/+server.ts`**
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
import { json, error } from '@sveltejs/kit';
|
|
139
|
+
import type { RequestHandler } from './$types';
|
|
140
|
+
import { PollyServerProvider } from '@pie-players/tts-server-polly';
|
|
141
|
+
|
|
142
|
+
// Use same singleton as synthesize route
|
|
143
|
+
let pollyProvider: PollyServerProvider | null = null;
|
|
144
|
+
|
|
145
|
+
async function getPollyProvider(): Promise<PollyServerProvider> {
|
|
146
|
+
if (!pollyProvider) {
|
|
147
|
+
pollyProvider = new PollyServerProvider();
|
|
148
|
+
await pollyProvider.initialize({
|
|
149
|
+
region: process.env.AWS_REGION || 'us-east-1',
|
|
150
|
+
credentials: {
|
|
151
|
+
accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
|
|
152
|
+
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
|
|
153
|
+
},
|
|
154
|
+
engine: 'neural',
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
return pollyProvider;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
export const GET: RequestHandler = async ({ url }) => {
|
|
161
|
+
try {
|
|
162
|
+
const language = url.searchParams.get('language') || undefined;
|
|
163
|
+
const gender = url.searchParams.get('gender') as 'male' | 'female' | 'neutral' | undefined;
|
|
164
|
+
|
|
165
|
+
const polly = await getPollyProvider();
|
|
166
|
+
const voices = await polly.getVoices({ language, gender });
|
|
167
|
+
|
|
168
|
+
return json({ voices });
|
|
169
|
+
} catch (err) {
|
|
170
|
+
console.error('[TTS API] Error:', err);
|
|
171
|
+
throw error(500, { message: err instanceof Error ? err.message : 'Failed to get voices' });
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Step 4: Use in Client Code
|
|
177
|
+
|
|
178
|
+
### Basic Usage
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
import { ServerTTSProvider } from '@pie-players/tts-client-server';
|
|
182
|
+
import { TTSService } from '@pie-players/pie-assessment-toolkit';
|
|
183
|
+
|
|
184
|
+
// Initialize TTS service with server provider
|
|
185
|
+
const provider = new ServerTTSProvider();
|
|
186
|
+
const ttsService = new TTSService();
|
|
187
|
+
|
|
188
|
+
await ttsService.initialize(provider, {
|
|
189
|
+
apiEndpoint: '/api/tts',
|
|
190
|
+
provider: 'polly',
|
|
191
|
+
voice: 'Joanna',
|
|
192
|
+
language: 'en-US',
|
|
193
|
+
rate: 1.0,
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// Speak with word highlighting
|
|
197
|
+
await ttsService.speak('Hello world, this is a test.', {
|
|
198
|
+
contentElement: document.getElementById('content'),
|
|
199
|
+
});
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### With Svelte Component
|
|
203
|
+
|
|
204
|
+
```svelte
|
|
205
|
+
<script lang="ts">
|
|
206
|
+
import { ServerTTSProvider } from '@pie-players/tts-client-server';
|
|
207
|
+
import { TTSService } from '@pie-players/pie-assessment-toolkit';
|
|
208
|
+
import { onMount } from 'svelte';
|
|
209
|
+
|
|
210
|
+
let ttsService: TTSService;
|
|
211
|
+
let contentElement: HTMLElement;
|
|
212
|
+
|
|
213
|
+
onMount(async () => {
|
|
214
|
+
const provider = new ServerTTSProvider();
|
|
215
|
+
ttsService = new TTSService();
|
|
216
|
+
|
|
217
|
+
await ttsService.initialize(provider, {
|
|
218
|
+
apiEndpoint: '/api/tts',
|
|
219
|
+
provider: 'polly',
|
|
220
|
+
voice: 'Joanna',
|
|
221
|
+
});
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
async function handleSpeak() {
|
|
225
|
+
await ttsService.speak('Hello world', {
|
|
226
|
+
contentElement,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
</script>
|
|
230
|
+
|
|
231
|
+
<div bind:this={contentElement}>
|
|
232
|
+
<p>Hello world, this is a test of text to speech.</p>
|
|
233
|
+
</div>
|
|
234
|
+
|
|
235
|
+
<button on:click={handleSpeak}>Speak</button>
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Step 5: Add Redis Caching (Optional)
|
|
239
|
+
|
|
240
|
+
### Install Redis
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
bun add ioredis
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### Update API Route with Caching
|
|
247
|
+
|
|
248
|
+
```typescript
|
|
249
|
+
import { json, error } from '@sveltejs/kit';
|
|
250
|
+
import type { RequestHandler } from './$types';
|
|
251
|
+
import { PollyServerProvider } from '@pie-players/tts-server-polly';
|
|
252
|
+
import { generateHashedCacheKey } from '@pie-players/tts-server-core';
|
|
253
|
+
import Redis from 'ioredis';
|
|
254
|
+
|
|
255
|
+
// Singleton instances
|
|
256
|
+
let pollyProvider: PollyServerProvider | null = null;
|
|
257
|
+
let redis: Redis | null = null;
|
|
258
|
+
|
|
259
|
+
async function getRedis(): Promise<Redis> {
|
|
260
|
+
if (!redis && process.env.REDIS_URL) {
|
|
261
|
+
redis = new Redis(process.env.REDIS_URL);
|
|
262
|
+
}
|
|
263
|
+
return redis!;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
async function getPollyProvider(): Promise<PollyServerProvider> {
|
|
267
|
+
if (!pollyProvider) {
|
|
268
|
+
pollyProvider = new PollyServerProvider();
|
|
269
|
+
await pollyProvider.initialize({
|
|
270
|
+
region: process.env.AWS_REGION || 'us-east-1',
|
|
271
|
+
credentials: {
|
|
272
|
+
accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
|
|
273
|
+
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
|
|
274
|
+
},
|
|
275
|
+
engine: 'neural',
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
return pollyProvider;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
export const POST: RequestHandler = async ({ request }) => {
|
|
282
|
+
try {
|
|
283
|
+
const body = await request.json();
|
|
284
|
+
const { text, voice = 'Joanna', language = 'en-US', rate = 1.0, includeSpeechMarks = true } = body;
|
|
285
|
+
|
|
286
|
+
if (!text || typeof text !== 'string') {
|
|
287
|
+
throw error(400, { message: 'Text is required' });
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
if (text.length > 3000) {
|
|
291
|
+
throw error(400, { message: 'Text too long (max 3000 characters)' });
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Generate cache key
|
|
295
|
+
const cacheKey = await generateHashedCacheKey({
|
|
296
|
+
providerId: 'aws-polly',
|
|
297
|
+
text,
|
|
298
|
+
voice,
|
|
299
|
+
language,
|
|
300
|
+
rate,
|
|
301
|
+
format: 'mp3',
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
// Check Redis cache
|
|
305
|
+
if (process.env.REDIS_URL) {
|
|
306
|
+
try {
|
|
307
|
+
const redisClient = await getRedis();
|
|
308
|
+
const cached = await redisClient.get(cacheKey);
|
|
309
|
+
|
|
310
|
+
if (cached) {
|
|
311
|
+
console.log('[TTS API] Cache hit:', cacheKey);
|
|
312
|
+
const result = JSON.parse(cached);
|
|
313
|
+
result.metadata.cached = true;
|
|
314
|
+
return json(result);
|
|
315
|
+
}
|
|
316
|
+
} catch (cacheError) {
|
|
317
|
+
console.warn('[TTS API] Cache read error:', cacheError);
|
|
318
|
+
// Continue without cache
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Synthesize with Polly
|
|
323
|
+
const polly = await getPollyProvider();
|
|
324
|
+
const result = await polly.synthesize({
|
|
325
|
+
text,
|
|
326
|
+
voice,
|
|
327
|
+
language,
|
|
328
|
+
rate,
|
|
329
|
+
includeSpeechMarks,
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
const response = {
|
|
333
|
+
audio: result.audio instanceof Buffer ? result.audio.toString('base64') : result.audio,
|
|
334
|
+
contentType: result.contentType,
|
|
335
|
+
speechMarks: result.speechMarks,
|
|
336
|
+
metadata: result.metadata,
|
|
337
|
+
};
|
|
338
|
+
|
|
339
|
+
// Cache result
|
|
340
|
+
if (process.env.REDIS_URL) {
|
|
341
|
+
try {
|
|
342
|
+
const redisClient = await getRedis();
|
|
343
|
+
await redisClient.setex(cacheKey, 24 * 60 * 60, JSON.stringify(response));
|
|
344
|
+
console.log('[TTS API] Cached result:', cacheKey);
|
|
345
|
+
} catch (cacheError) {
|
|
346
|
+
console.warn('[TTS API] Cache write error:', cacheError);
|
|
347
|
+
// Non-fatal, continue
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return json(response);
|
|
352
|
+
} catch (err) {
|
|
353
|
+
console.error('[TTS API] Error:', err);
|
|
354
|
+
throw error(500, { message: err instanceof Error ? err.message : 'Synthesis failed' });
|
|
355
|
+
}
|
|
356
|
+
};
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
## Step 6: Test the Integration
|
|
360
|
+
|
|
361
|
+
### Test API Endpoints
|
|
362
|
+
|
|
363
|
+
```bash
|
|
364
|
+
# Test synthesize endpoint
|
|
365
|
+
curl -X POST http://localhost:5173/api/tts/synthesize \
|
|
366
|
+
-H "Content-Type: application/json" \
|
|
367
|
+
-d '{"text": "Hello world", "voice": "Joanna"}'
|
|
368
|
+
|
|
369
|
+
# Test voices endpoint
|
|
370
|
+
curl http://localhost:5173/api/tts/voices
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
### Test in Browser
|
|
374
|
+
|
|
375
|
+
```typescript
|
|
376
|
+
// In browser console
|
|
377
|
+
const response = await fetch('/api/tts/synthesize', {
|
|
378
|
+
method: 'POST',
|
|
379
|
+
headers: { 'Content-Type': 'application/json' },
|
|
380
|
+
body: JSON.stringify({
|
|
381
|
+
text: 'Hello world, this is a test.',
|
|
382
|
+
voice: 'Joanna',
|
|
383
|
+
}),
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
const data = await response.json();
|
|
387
|
+
console.log('Speech marks:', data.speechMarks);
|
|
388
|
+
console.log('Metadata:', data.metadata);
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
## Redis Caching Benefits
|
|
392
|
+
|
|
393
|
+
With Redis caching enabled:
|
|
394
|
+
|
|
395
|
+
- **First request:** Full Polly API call (~300-500ms)
|
|
396
|
+
- **Cached requests:** Redis retrieval (~10-20ms)
|
|
397
|
+
- **Cost savings:** 80-90% reduction in Polly API calls
|
|
398
|
+
- **TTL:** 24 hours (configurable)
|
|
399
|
+
|
|
400
|
+
### Cache Key Format
|
|
401
|
+
|
|
402
|
+
```
|
|
403
|
+
tts:aws-polly:Joanna:en-US:1.00:mp3:<sha256-hash-of-text>
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
## Security Considerations
|
|
407
|
+
|
|
408
|
+
### Credentials
|
|
409
|
+
|
|
410
|
+
- ✅ AWS credentials stay on server (never exposed to browser)
|
|
411
|
+
- ✅ Use IAM roles in production (no hardcoded credentials)
|
|
412
|
+
- ✅ Use environment variables for configuration
|
|
413
|
+
|
|
414
|
+
### Authentication (Optional)
|
|
415
|
+
|
|
416
|
+
Add authentication middleware to protect API:
|
|
417
|
+
|
|
418
|
+
```typescript
|
|
419
|
+
// src/hooks.server.ts
|
|
420
|
+
import type { Handle } from '@sveltejs/kit';
|
|
421
|
+
|
|
422
|
+
export const handle: Handle = async ({ event, resolve }) => {
|
|
423
|
+
// Check if request is to TTS API
|
|
424
|
+
if (event.url.pathname.startsWith('/api/tts')) {
|
|
425
|
+
// Verify JWT token or API key
|
|
426
|
+
const authHeader = event.request.headers.get('Authorization');
|
|
427
|
+
if (!authHeader || !isValidToken(authHeader)) {
|
|
428
|
+
return new Response(JSON.stringify({ error: 'Unauthorized' }), {
|
|
429
|
+
status: 401,
|
|
430
|
+
headers: { 'Content-Type': 'application/json' },
|
|
431
|
+
});
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
return resolve(event);
|
|
436
|
+
};
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
### Rate Limiting
|
|
440
|
+
|
|
441
|
+
Add rate limiting to prevent abuse:
|
|
442
|
+
|
|
443
|
+
```typescript
|
|
444
|
+
import { rateLimit } from '$lib/rate-limiter';
|
|
445
|
+
|
|
446
|
+
export const POST: RequestHandler = async ({ request, getClientAddress }) => {
|
|
447
|
+
// Check rate limit
|
|
448
|
+
const clientIP = getClientAddress();
|
|
449
|
+
const allowed = await rateLimit.check(clientIP, {
|
|
450
|
+
maxRequests: 60, // 60 requests
|
|
451
|
+
windowMs: 60000, // per minute
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
if (!allowed) {
|
|
455
|
+
throw error(429, { message: 'Rate limit exceeded' });
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// ... rest of handler
|
|
459
|
+
};
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
## Cost Optimization
|
|
463
|
+
|
|
464
|
+
### AWS Polly Pricing
|
|
465
|
+
|
|
466
|
+
- **Neural voices:** $16 per 1M characters
|
|
467
|
+
- **Standard voices:** $4 per 1M characters
|
|
468
|
+
|
|
469
|
+
### Example Costs
|
|
470
|
+
|
|
471
|
+
**Scenario:** 1000 students taking an assessment
|
|
472
|
+
|
|
473
|
+
- Average assessment: 5 passages × 500 words × 5 chars = 12,500 chars per student
|
|
474
|
+
- Total: 12.5M characters
|
|
475
|
+
- Cost without caching: $200 (neural) or $50 (standard)
|
|
476
|
+
- Cost with 80% cache hit rate: $40 (neural) or $10 (standard)
|
|
477
|
+
|
|
478
|
+
### Optimization Tips
|
|
479
|
+
|
|
480
|
+
1. **Use Redis caching** - 24-hour TTL captures repeated content
|
|
481
|
+
2. **Standard voices for development** - Switch to neural for production
|
|
482
|
+
3. **Monitor usage** - Track API calls and cache hit rates
|
|
483
|
+
4. **Pre-generate common content** - Cache frequently used passages
|
|
484
|
+
|
|
485
|
+
## Troubleshooting
|
|
486
|
+
|
|
487
|
+
### Error: "AWS credentials not found"
|
|
488
|
+
|
|
489
|
+
Check environment variables are set:
|
|
490
|
+
```bash
|
|
491
|
+
echo $AWS_REGION
|
|
492
|
+
echo $AWS_ACCESS_KEY_ID
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
### Error: "Text too long"
|
|
496
|
+
|
|
497
|
+
AWS Polly limit is 3000 characters. Split longer text:
|
|
498
|
+
|
|
499
|
+
```typescript
|
|
500
|
+
function splitText(text: string, maxLength = 2500): string[] {
|
|
501
|
+
const sentences = text.split(/(?<=[.!?])\s+/);
|
|
502
|
+
const chunks: string[] = [];
|
|
503
|
+
let currentChunk = '';
|
|
504
|
+
|
|
505
|
+
for (const sentence of sentences) {
|
|
506
|
+
if (currentChunk.length + sentence.length > maxLength) {
|
|
507
|
+
chunks.push(currentChunk.trim());
|
|
508
|
+
currentChunk = sentence;
|
|
509
|
+
} else {
|
|
510
|
+
currentChunk += ' ' + sentence;
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
if (currentChunk.trim()) {
|
|
515
|
+
chunks.push(currentChunk.trim());
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
return chunks;
|
|
519
|
+
}
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
### Error: "Speech marks empty"
|
|
523
|
+
|
|
524
|
+
Check that:
|
|
525
|
+
1. Speech marks are requested in API call
|
|
526
|
+
2. Provider supports speech marks
|
|
527
|
+
3. Text is not empty
|
|
528
|
+
|
|
529
|
+
### Redis connection errors
|
|
530
|
+
|
|
531
|
+
If Redis is unavailable, the API will work without caching. Check Redis:
|
|
532
|
+
|
|
533
|
+
```bash
|
|
534
|
+
redis-cli ping
|
|
535
|
+
# Should return: PONG
|
|
536
|
+
```
|
|
537
|
+
|
|
538
|
+
## Production Deployment
|
|
539
|
+
|
|
540
|
+
### Environment Setup
|
|
541
|
+
|
|
542
|
+
```bash
|
|
543
|
+
# Production environment variables
|
|
544
|
+
export NODE_ENV=production
|
|
545
|
+
export AWS_REGION=us-east-1
|
|
546
|
+
export AWS_ACCESS_KEY_ID=xxx
|
|
547
|
+
export AWS_SECRET_ACCESS_KEY=yyy
|
|
548
|
+
export REDIS_URL=redis://your-redis-host:6379
|
|
549
|
+
```
|
|
550
|
+
|
|
551
|
+
### Docker Deployment
|
|
552
|
+
|
|
553
|
+
```dockerfile
|
|
554
|
+
FROM node:20-alpine
|
|
555
|
+
WORKDIR /app
|
|
556
|
+
COPY . .
|
|
557
|
+
RUN npm ci --production
|
|
558
|
+
RUN npm run build
|
|
559
|
+
EXPOSE 3000
|
|
560
|
+
CMD ["node", "build"]
|
|
561
|
+
```
|
|
562
|
+
|
|
563
|
+
### Health Check
|
|
564
|
+
|
|
565
|
+
Add a health endpoint:
|
|
566
|
+
|
|
567
|
+
```typescript
|
|
568
|
+
// src/routes/api/health/+server.ts
|
|
569
|
+
import { json } from '@sveltejs/kit';
|
|
570
|
+
import type { RequestHandler } from './$types';
|
|
571
|
+
|
|
572
|
+
export const GET: RequestHandler = async () => {
|
|
573
|
+
const health = {
|
|
574
|
+
status: 'ok',
|
|
575
|
+
timestamp: new Date().toISOString(),
|
|
576
|
+
services: {
|
|
577
|
+
polly: await checkPolly(),
|
|
578
|
+
redis: await checkRedis(),
|
|
579
|
+
},
|
|
580
|
+
};
|
|
581
|
+
|
|
582
|
+
return json(health);
|
|
583
|
+
};
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
## Next Steps
|
|
587
|
+
|
|
588
|
+
1. **Test in your app** - Create a demo page
|
|
589
|
+
2. **Monitor usage** - Track API calls and costs
|
|
590
|
+
3. **Add more providers** - Google Cloud TTS, ElevenLabs
|
|
591
|
+
4. **Optimize caching** - Fine-tune TTL and eviction
|
|
592
|
+
|
|
593
|
+
## Complete Example
|
|
594
|
+
|
|
595
|
+
See the section-player demo for a complete working example:
|
|
596
|
+
- `packages/section-player/demo.html` - Client-side usage
|
|
597
|
+
- API routes would be added to a SvelteKit app
|
|
598
|
+
|
|
599
|
+
## Support
|
|
600
|
+
|
|
601
|
+
For issues or questions:
|
|
602
|
+
- Check the [TTS Server API Architecture](../../../docs/tts-server-api-architecture.md)
|
|
603
|
+
- See [TTS Highlighting Implementation Plan](../../../docs/tts-highlighting-implementation-plan.md)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SvelteKit API route for TTS synthesis
|
|
3
|
+
*
|
|
4
|
+
* Copy this file to your SvelteKit app:
|
|
5
|
+
* src/routes/api/tts/synthesize/+server.ts
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { generateHashedCacheKey } from "@pie-players/tts-server-core";
|
|
9
|
+
import { PollyServerProvider } from "@pie-players/tts-server-polly";
|
|
10
|
+
import { error, json } from "@sveltejs/kit";
|
|
11
|
+
import type { RequestHandler } from "./$types";
|
|
12
|
+
|
|
13
|
+
// Initialize Polly provider (singleton)
|
|
14
|
+
let pollyProvider: PollyServerProvider | null = null;
|
|
15
|
+
|
|
16
|
+
async function getPollyProvider(): Promise<PollyServerProvider> {
|
|
17
|
+
if (!pollyProvider) {
|
|
18
|
+
pollyProvider = new PollyServerProvider();
|
|
19
|
+
await pollyProvider.initialize({
|
|
20
|
+
region: process.env.AWS_REGION || "us-east-1",
|
|
21
|
+
credentials: {
|
|
22
|
+
accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
|
|
23
|
+
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
|
|
24
|
+
},
|
|
25
|
+
engine: "neural",
|
|
26
|
+
defaultVoice: "Joanna",
|
|
27
|
+
});
|
|
28
|
+
}
|
|
29
|
+
return pollyProvider;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Optional: Redis caching
|
|
33
|
+
// import { createClient } from 'redis';
|
|
34
|
+
// const redis = createClient({ url: process.env.REDIS_URL });
|
|
35
|
+
// await redis.connect();
|
|
36
|
+
|
|
37
|
+
export const POST: RequestHandler = async ({ request }) => {
|
|
38
|
+
try {
|
|
39
|
+
const body = await request.json();
|
|
40
|
+
const {
|
|
41
|
+
text,
|
|
42
|
+
provider = "polly",
|
|
43
|
+
voice,
|
|
44
|
+
language,
|
|
45
|
+
rate,
|
|
46
|
+
includeSpeechMarks = true,
|
|
47
|
+
} = body;
|
|
48
|
+
|
|
49
|
+
// Validate request
|
|
50
|
+
if (!text || typeof text !== "string") {
|
|
51
|
+
throw error(400, { message: "Text is required and must be a string" });
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (text.length > 3000) {
|
|
55
|
+
throw error(400, { message: "Text too long (max 3000 characters)" });
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Optional: Check Redis cache
|
|
59
|
+
// const cacheKey = await generateHashedCacheKey({
|
|
60
|
+
// providerId: 'aws-polly',
|
|
61
|
+
// text,
|
|
62
|
+
// voice: voice || 'Joanna',
|
|
63
|
+
// language: language || 'en-US',
|
|
64
|
+
// rate: rate || 1.0,
|
|
65
|
+
// format: 'mp3',
|
|
66
|
+
// });
|
|
67
|
+
//
|
|
68
|
+
// const cached = await redis.get(cacheKey);
|
|
69
|
+
// if (cached) {
|
|
70
|
+
// console.log('[TTS API] Cache hit:', cacheKey);
|
|
71
|
+
// return json(JSON.parse(cached));
|
|
72
|
+
// }
|
|
73
|
+
|
|
74
|
+
// Get Polly provider
|
|
75
|
+
const polly = await getPollyProvider();
|
|
76
|
+
|
|
77
|
+
// Synthesize speech
|
|
78
|
+
const result = await polly.synthesize({
|
|
79
|
+
text,
|
|
80
|
+
voice: voice || "Joanna",
|
|
81
|
+
language: language || "en-US",
|
|
82
|
+
rate,
|
|
83
|
+
includeSpeechMarks,
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
// Convert Buffer to base64 for JSON response
|
|
87
|
+
const response = {
|
|
88
|
+
audio:
|
|
89
|
+
result.audio instanceof Buffer
|
|
90
|
+
? result.audio.toString("base64")
|
|
91
|
+
: result.audio,
|
|
92
|
+
contentType: result.contentType,
|
|
93
|
+
speechMarks: result.speechMarks,
|
|
94
|
+
metadata: result.metadata,
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
// Optional: Cache result
|
|
98
|
+
// await redis.setex(cacheKey, 24 * 60 * 60, JSON.stringify(response));
|
|
99
|
+
|
|
100
|
+
return json(response);
|
|
101
|
+
} catch (err) {
|
|
102
|
+
console.error("[TTS API] Synthesis error:", err);
|
|
103
|
+
|
|
104
|
+
if (err instanceof Error) {
|
|
105
|
+
throw error(500, { message: err.message });
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
throw error(500, { message: "Internal server error" });
|
|
109
|
+
}
|
|
110
|
+
};
|