@pie-players/tts-server-polly 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,603 @@
1
+ # TTS Server API Integration Guide
2
+
3
+ This guide shows how to integrate the server-side TTS with speech marks into your SvelteKit application.
4
+
5
+ ## Overview
6
+
7
+ The integration has three parts:
8
+
9
+ 1. **Server-side packages** - Handle AWS Polly API calls
10
+ 2. **SvelteKit API routes** - Expose TTS endpoints
11
+ 3. **Client-side provider** - Call API from browser
12
+
13
+ ## Architecture
14
+
15
+ ```
16
+ Browser (Client)
17
+
18
+ ServerTTSProvider (@pie-players/tts-client-server)
19
+ ↓ HTTP POST
20
+ SvelteKit API Route (/api/tts/synthesize/+server.ts)
21
+
22
+ PollyServerProvider (@pie-players/tts-server-polly)
23
+
24
+ AWS Polly API (audio + speech marks)
25
+ ```
26
+
27
+ ## Step 1: Install Packages
28
+
29
+ ```bash
30
+ cd your-sveltekit-app
31
+
32
+ # Install server-side packages
33
+ bun add @pie-players/tts-server-core
34
+ bun add @pie-players/tts-server-polly
35
+
36
+ # Install client-side provider
37
+ bun add @pie-players/tts-client-server
38
+ ```
39
+
40
+ ## Step 2: Configure Environment Variables
41
+
42
+ Create or update `.env`:
43
+
44
+ ```bash
45
+ # AWS Polly credentials
46
+ AWS_REGION=us-east-1
47
+ AWS_ACCESS_KEY_ID=your_access_key_id
48
+ AWS_SECRET_ACCESS_KEY=your_secret_access_key
49
+
50
+ # Optional: Redis for caching
51
+ REDIS_URL=redis://localhost:6379
52
+ ```
53
+
54
+ **Important:** Never commit `.env` to git. Add to `.gitignore`:
55
+
56
+ ```
57
+ .env
58
+ .env.local
59
+ ```
60
+
61
+ ## Step 3: Create SvelteKit API Routes
62
+
63
+ ### Create Directory Structure
64
+
65
+ ```bash
66
+ mkdir -p src/routes/api/tts/synthesize
67
+ mkdir -p src/routes/api/tts/voices
68
+ ```
69
+
70
+ ### Synthesize Endpoint
71
+
72
+ Copy the example to: **`src/routes/api/tts/synthesize/+server.ts`**
73
+
74
+ ```typescript
75
+ import { json, error } from '@sveltejs/kit';
76
+ import type { RequestHandler } from './$types';
77
+ import { PollyServerProvider } from '@pie-players/tts-server-polly';
78
+
79
+ // Singleton provider instance
80
+ let pollyProvider: PollyServerProvider | null = null;
81
+
82
+ async function getPollyProvider(): Promise<PollyServerProvider> {
83
+ if (!pollyProvider) {
84
+ pollyProvider = new PollyServerProvider();
85
+ await pollyProvider.initialize({
86
+ region: process.env.AWS_REGION || 'us-east-1',
87
+ credentials: {
88
+ accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
89
+ secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
90
+ },
91
+ engine: 'neural',
92
+ defaultVoice: 'Joanna',
93
+ });
94
+ }
95
+ return pollyProvider;
96
+ }
97
+
98
+ export const POST: RequestHandler = async ({ request }) => {
99
+ try {
100
+ const body = await request.json();
101
+ const { text, voice, language, rate, includeSpeechMarks = true } = body;
102
+
103
+ if (!text || typeof text !== 'string') {
104
+ throw error(400, { message: 'Text is required' });
105
+ }
106
+
107
+ if (text.length > 3000) {
108
+ throw error(400, { message: 'Text too long (max 3000 characters)' });
109
+ }
110
+
111
+ const polly = await getPollyProvider();
112
+ const result = await polly.synthesize({
113
+ text,
114
+ voice: voice || 'Joanna',
115
+ language: language || 'en-US',
116
+ rate,
117
+ includeSpeechMarks,
118
+ });
119
+
120
+ return json({
121
+ audio: result.audio instanceof Buffer ? result.audio.toString('base64') : result.audio,
122
+ contentType: result.contentType,
123
+ speechMarks: result.speechMarks,
124
+ metadata: result.metadata,
125
+ });
126
+ } catch (err) {
127
+ console.error('[TTS API] Error:', err);
128
+ throw error(500, { message: err instanceof Error ? err.message : 'Synthesis failed' });
129
+ }
130
+ };
131
+ ```
132
+
133
+ ### Voices Endpoint
134
+
135
+ Copy the example to: **`src/routes/api/tts/voices/+server.ts`**
136
+
137
+ ```typescript
138
+ import { json, error } from '@sveltejs/kit';
139
+ import type { RequestHandler } from './$types';
140
+ import { PollyServerProvider } from '@pie-players/tts-server-polly';
141
+
142
+ // Use same singleton as synthesize route
143
+ let pollyProvider: PollyServerProvider | null = null;
144
+
145
+ async function getPollyProvider(): Promise<PollyServerProvider> {
146
+ if (!pollyProvider) {
147
+ pollyProvider = new PollyServerProvider();
148
+ await pollyProvider.initialize({
149
+ region: process.env.AWS_REGION || 'us-east-1',
150
+ credentials: {
151
+ accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
152
+ secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
153
+ },
154
+ engine: 'neural',
155
+ });
156
+ }
157
+ return pollyProvider;
158
+ }
159
+
160
+ export const GET: RequestHandler = async ({ url }) => {
161
+ try {
162
+ const language = url.searchParams.get('language') || undefined;
163
+ const gender = url.searchParams.get('gender') as 'male' | 'female' | 'neutral' | undefined;
164
+
165
+ const polly = await getPollyProvider();
166
+ const voices = await polly.getVoices({ language, gender });
167
+
168
+ return json({ voices });
169
+ } catch (err) {
170
+ console.error('[TTS API] Error:', err);
171
+ throw error(500, { message: err instanceof Error ? err.message : 'Failed to get voices' });
172
+ }
173
+ };
174
+ ```
175
+
176
+ ## Step 4: Use in Client Code
177
+
178
+ ### Basic Usage
179
+
180
+ ```typescript
181
+ import { ServerTTSProvider } from '@pie-players/tts-client-server';
182
+ import { TTSService } from '@pie-players/pie-assessment-toolkit';
183
+
184
+ // Initialize TTS service with server provider
185
+ const provider = new ServerTTSProvider();
186
+ const ttsService = new TTSService();
187
+
188
+ await ttsService.initialize(provider, {
189
+ apiEndpoint: '/api/tts',
190
+ provider: 'polly',
191
+ voice: 'Joanna',
192
+ language: 'en-US',
193
+ rate: 1.0,
194
+ });
195
+
196
+ // Speak with word highlighting
197
+ await ttsService.speak('Hello world, this is a test.', {
198
+ contentElement: document.getElementById('content'),
199
+ });
200
+ ```
201
+
202
+ ### With Svelte Component
203
+
204
+ ```svelte
205
+ <script lang="ts">
206
+ import { ServerTTSProvider } from '@pie-players/tts-client-server';
207
+ import { TTSService } from '@pie-players/pie-assessment-toolkit';
208
+ import { onMount } from 'svelte';
209
+
210
+ let ttsService: TTSService;
211
+ let contentElement: HTMLElement;
212
+
213
+ onMount(async () => {
214
+ const provider = new ServerTTSProvider();
215
+ ttsService = new TTSService();
216
+
217
+ await ttsService.initialize(provider, {
218
+ apiEndpoint: '/api/tts',
219
+ provider: 'polly',
220
+ voice: 'Joanna',
221
+ });
222
+ });
223
+
224
+ async function handleSpeak() {
225
+ await ttsService.speak('Hello world', {
226
+ contentElement,
227
+ });
228
+ }
229
+ </script>
230
+
231
+ <div bind:this={contentElement}>
232
+ <p>Hello world, this is a test of text to speech.</p>
233
+ </div>
234
+
235
+ <button on:click={handleSpeak}>Speak</button>
236
+ ```
237
+
238
+ ## Step 5: Add Redis Caching (Optional)
239
+
240
+ ### Install Redis
241
+
242
+ ```bash
243
+ bun add ioredis
244
+ ```
245
+
246
+ ### Update API Route with Caching
247
+
248
+ ```typescript
249
+ import { json, error } from '@sveltejs/kit';
250
+ import type { RequestHandler } from './$types';
251
+ import { PollyServerProvider } from '@pie-players/tts-server-polly';
252
+ import { generateHashedCacheKey } from '@pie-players/tts-server-core';
253
+ import Redis from 'ioredis';
254
+
255
+ // Singleton instances
256
+ let pollyProvider: PollyServerProvider | null = null;
257
+ let redis: Redis | null = null;
258
+
259
+ async function getRedis(): Promise<Redis> {
260
+ if (!redis && process.env.REDIS_URL) {
261
+ redis = new Redis(process.env.REDIS_URL);
262
+ }
263
+ return redis!;
264
+ }
265
+
266
+ async function getPollyProvider(): Promise<PollyServerProvider> {
267
+ if (!pollyProvider) {
268
+ pollyProvider = new PollyServerProvider();
269
+ await pollyProvider.initialize({
270
+ region: process.env.AWS_REGION || 'us-east-1',
271
+ credentials: {
272
+ accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
273
+ secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
274
+ },
275
+ engine: 'neural',
276
+ });
277
+ }
278
+ return pollyProvider;
279
+ }
280
+
281
+ export const POST: RequestHandler = async ({ request }) => {
282
+ try {
283
+ const body = await request.json();
284
+ const { text, voice = 'Joanna', language = 'en-US', rate = 1.0, includeSpeechMarks = true } = body;
285
+
286
+ if (!text || typeof text !== 'string') {
287
+ throw error(400, { message: 'Text is required' });
288
+ }
289
+
290
+ if (text.length > 3000) {
291
+ throw error(400, { message: 'Text too long (max 3000 characters)' });
292
+ }
293
+
294
+ // Generate cache key
295
+ const cacheKey = await generateHashedCacheKey({
296
+ providerId: 'aws-polly',
297
+ text,
298
+ voice,
299
+ language,
300
+ rate,
301
+ format: 'mp3',
302
+ });
303
+
304
+ // Check Redis cache
305
+ if (process.env.REDIS_URL) {
306
+ try {
307
+ const redisClient = await getRedis();
308
+ const cached = await redisClient.get(cacheKey);
309
+
310
+ if (cached) {
311
+ console.log('[TTS API] Cache hit:', cacheKey);
312
+ const result = JSON.parse(cached);
313
+ result.metadata.cached = true;
314
+ return json(result);
315
+ }
316
+ } catch (cacheError) {
317
+ console.warn('[TTS API] Cache read error:', cacheError);
318
+ // Continue without cache
319
+ }
320
+ }
321
+
322
+ // Synthesize with Polly
323
+ const polly = await getPollyProvider();
324
+ const result = await polly.synthesize({
325
+ text,
326
+ voice,
327
+ language,
328
+ rate,
329
+ includeSpeechMarks,
330
+ });
331
+
332
+ const response = {
333
+ audio: result.audio instanceof Buffer ? result.audio.toString('base64') : result.audio,
334
+ contentType: result.contentType,
335
+ speechMarks: result.speechMarks,
336
+ metadata: result.metadata,
337
+ };
338
+
339
+ // Cache result
340
+ if (process.env.REDIS_URL) {
341
+ try {
342
+ const redisClient = await getRedis();
343
+ await redisClient.setex(cacheKey, 24 * 60 * 60, JSON.stringify(response));
344
+ console.log('[TTS API] Cached result:', cacheKey);
345
+ } catch (cacheError) {
346
+ console.warn('[TTS API] Cache write error:', cacheError);
347
+ // Non-fatal, continue
348
+ }
349
+ }
350
+
351
+ return json(response);
352
+ } catch (err) {
353
+ console.error('[TTS API] Error:', err);
354
+ throw error(500, { message: err instanceof Error ? err.message : 'Synthesis failed' });
355
+ }
356
+ };
357
+ ```
358
+
359
+ ## Step 6: Test the Integration
360
+
361
+ ### Test API Endpoints
362
+
363
+ ```bash
364
+ # Test synthesize endpoint
365
+ curl -X POST http://localhost:5173/api/tts/synthesize \
366
+ -H "Content-Type: application/json" \
367
+ -d '{"text": "Hello world", "voice": "Joanna"}'
368
+
369
+ # Test voices endpoint
370
+ curl http://localhost:5173/api/tts/voices
371
+ ```
372
+
373
+ ### Test in Browser
374
+
375
+ ```typescript
376
+ // In browser console
377
+ const response = await fetch('/api/tts/synthesize', {
378
+ method: 'POST',
379
+ headers: { 'Content-Type': 'application/json' },
380
+ body: JSON.stringify({
381
+ text: 'Hello world, this is a test.',
382
+ voice: 'Joanna',
383
+ }),
384
+ });
385
+
386
+ const data = await response.json();
387
+ console.log('Speech marks:', data.speechMarks);
388
+ console.log('Metadata:', data.metadata);
389
+ ```
390
+
391
+ ## Redis Caching Benefits
392
+
393
+ With Redis caching enabled:
394
+
395
+ - **First request:** Full Polly API call (~300-500ms)
396
+ - **Cached requests:** Redis retrieval (~10-20ms)
397
+ - **Cost savings:** 80-90% reduction in Polly API calls
398
+ - **TTL:** 24 hours (configurable)
399
+
400
+ ### Cache Key Format
401
+
402
+ ```
403
+ tts:aws-polly:Joanna:en-US:1.00:mp3:<sha256-hash-of-text>
404
+ ```
405
+
406
+ ## Security Considerations
407
+
408
+ ### Credentials
409
+
410
+ - ✅ AWS credentials stay on server (never exposed to browser)
411
+ - ✅ Use IAM roles in production (no hardcoded credentials)
412
+ - ✅ Use environment variables for configuration
413
+
414
+ ### Authentication (Optional)
415
+
416
+ Add authentication middleware to protect API:
417
+
418
+ ```typescript
419
+ // src/hooks.server.ts
420
+ import type { Handle } from '@sveltejs/kit';
421
+
422
+ export const handle: Handle = async ({ event, resolve }) => {
423
+ // Check if request is to TTS API
424
+ if (event.url.pathname.startsWith('/api/tts')) {
425
+ // Verify JWT token or API key
426
+ const authHeader = event.request.headers.get('Authorization');
427
+ if (!authHeader || !isValidToken(authHeader)) {
428
+ return new Response(JSON.stringify({ error: 'Unauthorized' }), {
429
+ status: 401,
430
+ headers: { 'Content-Type': 'application/json' },
431
+ });
432
+ }
433
+ }
434
+
435
+ return resolve(event);
436
+ };
437
+ ```
438
+
439
+ ### Rate Limiting
440
+
441
+ Add rate limiting to prevent abuse:
442
+
443
+ ```typescript
444
+ import { rateLimit } from '$lib/rate-limiter';
445
+
446
+ export const POST: RequestHandler = async ({ request, getClientAddress }) => {
447
+ // Check rate limit
448
+ const clientIP = getClientAddress();
449
+ const allowed = await rateLimit.check(clientIP, {
450
+ maxRequests: 60, // 60 requests
451
+ windowMs: 60000, // per minute
452
+ });
453
+
454
+ if (!allowed) {
455
+ throw error(429, { message: 'Rate limit exceeded' });
456
+ }
457
+
458
+ // ... rest of handler
459
+ };
460
+ ```
461
+
462
+ ## Cost Optimization
463
+
464
+ ### AWS Polly Pricing
465
+
466
+ - **Neural voices:** $16 per 1M characters
467
+ - **Standard voices:** $4 per 1M characters
468
+
469
+ ### Example Costs
470
+
471
+ **Scenario:** 1000 students taking an assessment
472
+
473
+ - Average assessment: 5 passages × 500 words × 5 chars = 12,500 chars per student
474
+ - Total: 12.5M characters
475
+ - Cost without caching: $200 (neural) or $50 (standard)
476
+ - Cost with 80% cache hit rate: $40 (neural) or $10 (standard)
477
+
478
+ ### Optimization Tips
479
+
480
+ 1. **Use Redis caching** - 24-hour TTL captures repeated content
481
+ 2. **Standard voices for development** - Switch to neural for production
482
+ 3. **Monitor usage** - Track API calls and cache hit rates
483
+ 4. **Pre-generate common content** - Cache frequently used passages
484
+
485
+ ## Troubleshooting
486
+
487
+ ### Error: "AWS credentials not found"
488
+
489
+ Check environment variables are set:
490
+ ```bash
491
+ echo $AWS_REGION
492
+ echo $AWS_ACCESS_KEY_ID
493
+ ```
494
+
495
+ ### Error: "Text too long"
496
+
497
+ AWS Polly limit is 3000 characters. Split longer text:
498
+
499
+ ```typescript
500
+ function splitText(text: string, maxLength = 2500): string[] {
501
+ const sentences = text.split(/(?<=[.!?])\s+/);
502
+ const chunks: string[] = [];
503
+ let currentChunk = '';
504
+
505
+ for (const sentence of sentences) {
506
+ if (currentChunk.length + sentence.length > maxLength) {
507
+ chunks.push(currentChunk.trim());
508
+ currentChunk = sentence;
509
+ } else {
510
+ currentChunk += ' ' + sentence;
511
+ }
512
+ }
513
+
514
+ if (currentChunk.trim()) {
515
+ chunks.push(currentChunk.trim());
516
+ }
517
+
518
+ return chunks;
519
+ }
520
+ ```
521
+
522
+ ### Error: "Speech marks empty"
523
+
524
+ Check that:
525
+ 1. Speech marks are requested in API call
526
+ 2. Provider supports speech marks
527
+ 3. Text is not empty
528
+
529
+ ### Redis connection errors
530
+
531
+ If Redis is unavailable, the API will work without caching. Check Redis:
532
+
533
+ ```bash
534
+ redis-cli ping
535
+ # Should return: PONG
536
+ ```
537
+
538
+ ## Production Deployment
539
+
540
+ ### Environment Setup
541
+
542
+ ```bash
543
+ # Production environment variables
544
+ export NODE_ENV=production
545
+ export AWS_REGION=us-east-1
546
+ export AWS_ACCESS_KEY_ID=xxx
547
+ export AWS_SECRET_ACCESS_KEY=yyy
548
+ export REDIS_URL=redis://your-redis-host:6379
549
+ ```
550
+
551
+ ### Docker Deployment
552
+
553
+ ```dockerfile
554
+ FROM node:20-alpine
555
+ WORKDIR /app
556
+ COPY . .
557
+ RUN npm ci --production
558
+ RUN npm run build
559
+ EXPOSE 3000
560
+ CMD ["node", "build"]
561
+ ```
562
+
563
+ ### Health Check
564
+
565
+ Add a health endpoint:
566
+
567
+ ```typescript
568
+ // src/routes/api/health/+server.ts
569
+ import { json } from '@sveltejs/kit';
570
+ import type { RequestHandler } from './$types';
571
+
572
+ export const GET: RequestHandler = async () => {
573
+ const health = {
574
+ status: 'ok',
575
+ timestamp: new Date().toISOString(),
576
+ services: {
577
+ polly: await checkPolly(),
578
+ redis: await checkRedis(),
579
+ },
580
+ };
581
+
582
+ return json(health);
583
+ };
584
+ ```
585
+
586
+ ## Next Steps
587
+
588
+ 1. **Test in your app** - Create a demo page
589
+ 2. **Monitor usage** - Track API calls and costs
590
+ 3. **Add more providers** - Google Cloud TTS, ElevenLabs
591
+ 4. **Optimize caching** - Fine-tune TTL and eviction
592
+
593
+ ## Complete Example
594
+
595
+ See the section-player demo for a complete working example:
596
+ - `packages/section-player/demo.html` - Client-side usage
597
+ - API routes would be added to a SvelteKit app
598
+
599
+ ## Support
600
+
601
+ For issues or questions:
602
+ - Check the [TTS Server API Architecture](../../../docs/tts-server-api-architecture.md)
603
+ - See [TTS Highlighting Implementation Plan](../../../docs/tts-highlighting-implementation-plan.md)
@@ -0,0 +1,110 @@
1
+ /**
2
+ * SvelteKit API route for TTS synthesis
3
+ *
4
+ * Copy this file to your SvelteKit app:
5
+ * src/routes/api/tts/synthesize/+server.ts
6
+ */
7
+
8
+ import { generateHashedCacheKey } from "@pie-players/tts-server-core";
9
+ import { PollyServerProvider } from "@pie-players/tts-server-polly";
10
+ import { error, json } from "@sveltejs/kit";
11
+ import type { RequestHandler } from "./$types";
12
+
13
+ // Initialize Polly provider (singleton)
14
+ let pollyProvider: PollyServerProvider | null = null;
15
+
16
+ async function getPollyProvider(): Promise<PollyServerProvider> {
17
+ if (!pollyProvider) {
18
+ pollyProvider = new PollyServerProvider();
19
+ await pollyProvider.initialize({
20
+ region: process.env.AWS_REGION || "us-east-1",
21
+ credentials: {
22
+ accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
23
+ secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
24
+ },
25
+ engine: "neural",
26
+ defaultVoice: "Joanna",
27
+ });
28
+ }
29
+ return pollyProvider;
30
+ }
31
+
32
+ // Optional: Redis caching
33
+ // import { createClient } from 'redis';
34
+ // const redis = createClient({ url: process.env.REDIS_URL });
35
+ // await redis.connect();
36
+
37
+ export const POST: RequestHandler = async ({ request }) => {
38
+ try {
39
+ const body = await request.json();
40
+ const {
41
+ text,
42
+ provider = "polly",
43
+ voice,
44
+ language,
45
+ rate,
46
+ includeSpeechMarks = true,
47
+ } = body;
48
+
49
+ // Validate request
50
+ if (!text || typeof text !== "string") {
51
+ throw error(400, { message: "Text is required and must be a string" });
52
+ }
53
+
54
+ if (text.length > 3000) {
55
+ throw error(400, { message: "Text too long (max 3000 characters)" });
56
+ }
57
+
58
+ // Optional: Check Redis cache
59
+ // const cacheKey = await generateHashedCacheKey({
60
+ // providerId: 'aws-polly',
61
+ // text,
62
+ // voice: voice || 'Joanna',
63
+ // language: language || 'en-US',
64
+ // rate: rate || 1.0,
65
+ // format: 'mp3',
66
+ // });
67
+ //
68
+ // const cached = await redis.get(cacheKey);
69
+ // if (cached) {
70
+ // console.log('[TTS API] Cache hit:', cacheKey);
71
+ // return json(JSON.parse(cached));
72
+ // }
73
+
74
+ // Get Polly provider
75
+ const polly = await getPollyProvider();
76
+
77
+ // Synthesize speech
78
+ const result = await polly.synthesize({
79
+ text,
80
+ voice: voice || "Joanna",
81
+ language: language || "en-US",
82
+ rate,
83
+ includeSpeechMarks,
84
+ });
85
+
86
+ // Convert Buffer to base64 for JSON response
87
+ const response = {
88
+ audio:
89
+ result.audio instanceof Buffer
90
+ ? result.audio.toString("base64")
91
+ : result.audio,
92
+ contentType: result.contentType,
93
+ speechMarks: result.speechMarks,
94
+ metadata: result.metadata,
95
+ };
96
+
97
+ // Optional: Cache result
98
+ // await redis.setex(cacheKey, 24 * 60 * 60, JSON.stringify(response));
99
+
100
+ return json(response);
101
+ } catch (err) {
102
+ console.error("[TTS API] Synthesis error:", err);
103
+
104
+ if (err instanceof Error) {
105
+ throw error(500, { message: err.message });
106
+ }
107
+
108
+ throw error(500, { message: "Internal server error" });
109
+ }
110
+ };