kugelaudio 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -0
- package/dist/index.d.mts +80 -0
- package/dist/index.d.ts +80 -0
- package/dist/index.js +80 -3
- package/dist/index.mjs +80 -3
- package/package.json +1 -1
- package/src/client.ts +82 -0
- package/src/types.ts +19 -0
package/README.md
CHANGED
|
@@ -150,6 +150,8 @@ const audio = await client.tts.generate({
|
|
|
150
150
|
maxNewTokens: 2048, // Maximum tokens to generate
|
|
151
151
|
sampleRate: 24000, // Output sample rate
|
|
152
152
|
speakerPrefix: true, // Add speaker prefix for better quality
|
|
153
|
+
normalize: true, // Enable text normalization (see below)
|
|
154
|
+
language: 'en', // Language for normalization
|
|
153
155
|
});
|
|
154
156
|
|
|
155
157
|
// Audio properties
|
|
@@ -250,6 +252,53 @@ onChunk: (chunk) => {
|
|
|
250
252
|
}
|
|
251
253
|
```
|
|
252
254
|
|
|
255
|
+
## Text Normalization
|
|
256
|
+
|
|
257
|
+
Text normalization converts numbers, dates, times, and other non-verbal text into spoken words. For example:
|
|
258
|
+
- "I have 3 apples" → "I have three apples"
|
|
259
|
+
- "The meeting is at 2:30 PM" → "The meeting is at two thirty PM"
|
|
260
|
+
- "€50.99" → "fifty euros and ninety-nine cents"
|
|
261
|
+
|
|
262
|
+
### Usage
|
|
263
|
+
|
|
264
|
+
```typescript
|
|
265
|
+
// With explicit language (recommended - fastest)
|
|
266
|
+
const audio = await client.tts.generate({
|
|
267
|
+
text: 'I bought 3 items for €50.99 on 01/15/2024.',
|
|
268
|
+
normalize: true,
|
|
269
|
+
language: 'en', // Specify language for best performance
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
// With auto-detection (adds ~150ms latency)
|
|
273
|
+
const audio = await client.tts.generate({
|
|
274
|
+
text: 'Ich habe 3 Artikel für 50,99€ gekauft.',
|
|
275
|
+
normalize: true,
|
|
276
|
+
// language not specified - will auto-detect
|
|
277
|
+
});
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### Supported Languages
|
|
281
|
+
|
|
282
|
+
| Code | Language | Code | Language |
|
|
283
|
+
|------|----------|------|----------|
|
|
284
|
+
| `de` | German | `nl` | Dutch |
|
|
285
|
+
| `en` | English | `pl` | Polish |
|
|
286
|
+
| `fr` | French | `sv` | Swedish |
|
|
287
|
+
| `es` | Spanish | `da` | Danish |
|
|
288
|
+
| `it` | Italian | `no` | Norwegian |
|
|
289
|
+
| `pt` | Portuguese | `fi` | Finnish |
|
|
290
|
+
| `cs` | Czech | `hu` | Hungarian |
|
|
291
|
+
| `ro` | Romanian | `el` | Greek |
|
|
292
|
+
| `uk` | Ukrainian | `bg` | Bulgarian |
|
|
293
|
+
| `tr` | Turkish | `vi` | Vietnamese |
|
|
294
|
+
| `ar` | Arabic | `hi` | Hindi |
|
|
295
|
+
| `zh` | Chinese | `ja` | Japanese |
|
|
296
|
+
| `ko` | Korean | | |
|
|
297
|
+
|
|
298
|
+
### Performance Warning
|
|
299
|
+
|
|
300
|
+
> ⚠️ **Latency Warning**: Using `normalize: true` without specifying `language` adds approximately **150ms latency** for language auto-detection. For best performance in latency-sensitive applications, always specify the `language` parameter.
|
|
301
|
+
|
|
253
302
|
## Error Handling
|
|
254
303
|
|
|
255
304
|
```typescript
|
|
@@ -306,9 +355,13 @@ interface GenerateOptions {
|
|
|
306
355
|
maxNewTokens?: number; // Default: 2048
|
|
307
356
|
sampleRate?: number; // Default: 24000
|
|
308
357
|
speakerPrefix?: boolean; // Default: true
|
|
358
|
+
normalize?: boolean; // Default: false - Enable text normalization
|
|
359
|
+
language?: string; // ISO 639-1 code for normalization (e.g., 'en', 'de')
|
|
309
360
|
}
|
|
310
361
|
```
|
|
311
362
|
|
|
363
|
+
> ⚠️ **Note**: Using `normalize: true` without `language` adds ~150ms latency for auto-detection.
|
|
364
|
+
|
|
312
365
|
### AudioChunk
|
|
313
366
|
|
|
314
367
|
```typescript
|
package/dist/index.d.mts
CHANGED
|
@@ -59,6 +59,25 @@ interface GenerateOptions {
|
|
|
59
59
|
sampleRate?: number;
|
|
60
60
|
/** Whether to add speaker prefix (default: true) */
|
|
61
61
|
speakerPrefix?: boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
64
|
+
* When true, text will be normalized before TTS generation.
|
|
65
|
+
* Default: false
|
|
66
|
+
*
|
|
67
|
+
* ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
|
|
68
|
+
* latency for language auto-detection. For best performance, always specify
|
|
69
|
+
* the language parameter when using normalization.
|
|
70
|
+
*/
|
|
71
|
+
normalize?: boolean;
|
|
72
|
+
/**
|
|
73
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
74
|
+
* If not provided and normalize is true, language will be auto-detected
|
|
75
|
+
* (adds ~150ms latency).
|
|
76
|
+
*
|
|
77
|
+
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
78
|
+
* el, uk, bg, tr, vi, ar, hi, zh, ja, ko
|
|
79
|
+
*/
|
|
80
|
+
language?: string;
|
|
62
81
|
}
|
|
63
82
|
/**
|
|
64
83
|
* Streaming session configuration.
|
|
@@ -209,6 +228,28 @@ declare class TTSResource {
|
|
|
209
228
|
private pendingRequests;
|
|
210
229
|
private requestCounter;
|
|
211
230
|
constructor(client: KugelAudio);
|
|
231
|
+
/**
|
|
232
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
233
|
+
*
|
|
234
|
+
* Call this at application startup to eliminate cold start latency
|
|
235
|
+
* (~300-500ms) from your first TTS request.
|
|
236
|
+
*
|
|
237
|
+
* @example
|
|
238
|
+
* ```typescript
|
|
239
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
240
|
+
*
|
|
241
|
+
* // Pre-connect at startup
|
|
242
|
+
* await client.tts.connect();
|
|
243
|
+
*
|
|
244
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
245
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
246
|
+
* ```
|
|
247
|
+
*/
|
|
248
|
+
connect(): Promise<void>;
|
|
249
|
+
/**
|
|
250
|
+
* Check if WebSocket connection is established and open.
|
|
251
|
+
*/
|
|
252
|
+
isConnected(): boolean;
|
|
212
253
|
/**
|
|
213
254
|
* Generate audio from text with streaming via WebSocket.
|
|
214
255
|
* Returns complete audio after all chunks are received.
|
|
@@ -290,6 +331,23 @@ declare class KugelAudio {
|
|
|
290
331
|
/** TTS resource */
|
|
291
332
|
readonly tts: TTSResource;
|
|
292
333
|
constructor(options: KugelAudioOptions);
|
|
334
|
+
/**
|
|
335
|
+
* Create a pre-connected KugelAudio client.
|
|
336
|
+
*
|
|
337
|
+
* Use this factory method to get a client that's already connected
|
|
338
|
+
* and ready for fast TTS requests. This eliminates cold start latency
|
|
339
|
+
* (~300-500ms) from your first TTS request.
|
|
340
|
+
*
|
|
341
|
+
* @example
|
|
342
|
+
* ```typescript
|
|
343
|
+
* // Client is ready immediately - no cold start on first request
|
|
344
|
+
* const client = await KugelAudio.create({ apiKey: 'your_api_key' });
|
|
345
|
+
*
|
|
346
|
+
* // First request is fast (~100ms instead of ~500ms)
|
|
347
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
348
|
+
* ```
|
|
349
|
+
*/
|
|
350
|
+
static create(options: KugelAudioOptions): Promise<KugelAudio>;
|
|
293
351
|
/** Get API key */
|
|
294
352
|
get apiKey(): string;
|
|
295
353
|
/** Check if using master key authentication */
|
|
@@ -303,6 +361,28 @@ declare class KugelAudio {
|
|
|
303
361
|
* This closes any pooled WebSocket connections.
|
|
304
362
|
*/
|
|
305
363
|
close(): void;
|
|
364
|
+
/**
|
|
365
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
366
|
+
*
|
|
367
|
+
* Call this at application startup to eliminate cold start latency
|
|
368
|
+
* (~300-500ms) from your first TTS request.
|
|
369
|
+
*
|
|
370
|
+
* @example
|
|
371
|
+
* ```typescript
|
|
372
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
373
|
+
*
|
|
374
|
+
* // Pre-connect at startup
|
|
375
|
+
* await client.connect();
|
|
376
|
+
*
|
|
377
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
378
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
379
|
+
* ```
|
|
380
|
+
*/
|
|
381
|
+
connect(): Promise<void>;
|
|
382
|
+
/**
|
|
383
|
+
* Check if WebSocket connection is established and open.
|
|
384
|
+
*/
|
|
385
|
+
isConnected(): boolean;
|
|
306
386
|
/**
|
|
307
387
|
* Make an HTTP request to the API.
|
|
308
388
|
* @internal
|
package/dist/index.d.ts
CHANGED
|
@@ -59,6 +59,25 @@ interface GenerateOptions {
|
|
|
59
59
|
sampleRate?: number;
|
|
60
60
|
/** Whether to add speaker prefix (default: true) */
|
|
61
61
|
speakerPrefix?: boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
64
|
+
* When true, text will be normalized before TTS generation.
|
|
65
|
+
* Default: false
|
|
66
|
+
*
|
|
67
|
+
* ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
|
|
68
|
+
* latency for language auto-detection. For best performance, always specify
|
|
69
|
+
* the language parameter when using normalization.
|
|
70
|
+
*/
|
|
71
|
+
normalize?: boolean;
|
|
72
|
+
/**
|
|
73
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
74
|
+
* If not provided and normalize is true, language will be auto-detected
|
|
75
|
+
* (adds ~150ms latency).
|
|
76
|
+
*
|
|
77
|
+
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
78
|
+
* el, uk, bg, tr, vi, ar, hi, zh, ja, ko
|
|
79
|
+
*/
|
|
80
|
+
language?: string;
|
|
62
81
|
}
|
|
63
82
|
/**
|
|
64
83
|
* Streaming session configuration.
|
|
@@ -209,6 +228,28 @@ declare class TTSResource {
|
|
|
209
228
|
private pendingRequests;
|
|
210
229
|
private requestCounter;
|
|
211
230
|
constructor(client: KugelAudio);
|
|
231
|
+
/**
|
|
232
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
233
|
+
*
|
|
234
|
+
* Call this at application startup to eliminate cold start latency
|
|
235
|
+
* (~300-500ms) from your first TTS request.
|
|
236
|
+
*
|
|
237
|
+
* @example
|
|
238
|
+
* ```typescript
|
|
239
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
240
|
+
*
|
|
241
|
+
* // Pre-connect at startup
|
|
242
|
+
* await client.tts.connect();
|
|
243
|
+
*
|
|
244
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
245
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
246
|
+
* ```
|
|
247
|
+
*/
|
|
248
|
+
connect(): Promise<void>;
|
|
249
|
+
/**
|
|
250
|
+
* Check if WebSocket connection is established and open.
|
|
251
|
+
*/
|
|
252
|
+
isConnected(): boolean;
|
|
212
253
|
/**
|
|
213
254
|
* Generate audio from text with streaming via WebSocket.
|
|
214
255
|
* Returns complete audio after all chunks are received.
|
|
@@ -290,6 +331,23 @@ declare class KugelAudio {
|
|
|
290
331
|
/** TTS resource */
|
|
291
332
|
readonly tts: TTSResource;
|
|
292
333
|
constructor(options: KugelAudioOptions);
|
|
334
|
+
/**
|
|
335
|
+
* Create a pre-connected KugelAudio client.
|
|
336
|
+
*
|
|
337
|
+
* Use this factory method to get a client that's already connected
|
|
338
|
+
* and ready for fast TTS requests. This eliminates cold start latency
|
|
339
|
+
* (~300-500ms) from your first TTS request.
|
|
340
|
+
*
|
|
341
|
+
* @example
|
|
342
|
+
* ```typescript
|
|
343
|
+
* // Client is ready immediately - no cold start on first request
|
|
344
|
+
* const client = await KugelAudio.create({ apiKey: 'your_api_key' });
|
|
345
|
+
*
|
|
346
|
+
* // First request is fast (~100ms instead of ~500ms)
|
|
347
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
348
|
+
* ```
|
|
349
|
+
*/
|
|
350
|
+
static create(options: KugelAudioOptions): Promise<KugelAudio>;
|
|
293
351
|
/** Get API key */
|
|
294
352
|
get apiKey(): string;
|
|
295
353
|
/** Check if using master key authentication */
|
|
@@ -303,6 +361,28 @@ declare class KugelAudio {
|
|
|
303
361
|
* This closes any pooled WebSocket connections.
|
|
304
362
|
*/
|
|
305
363
|
close(): void;
|
|
364
|
+
/**
|
|
365
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
366
|
+
*
|
|
367
|
+
* Call this at application startup to eliminate cold start latency
|
|
368
|
+
* (~300-500ms) from your first TTS request.
|
|
369
|
+
*
|
|
370
|
+
* @example
|
|
371
|
+
* ```typescript
|
|
372
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
373
|
+
*
|
|
374
|
+
* // Pre-connect at startup
|
|
375
|
+
* await client.connect();
|
|
376
|
+
*
|
|
377
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
378
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
379
|
+
* ```
|
|
380
|
+
*/
|
|
381
|
+
connect(): Promise<void>;
|
|
382
|
+
/**
|
|
383
|
+
* Check if WebSocket connection is established and open.
|
|
384
|
+
*/
|
|
385
|
+
isConnected(): boolean;
|
|
306
386
|
/**
|
|
307
387
|
* Make an HTTP request to the API.
|
|
308
388
|
* @internal
|
package/dist/index.js
CHANGED
|
@@ -217,6 +217,32 @@ var TTSResource = class {
|
|
|
217
217
|
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
218
218
|
this.requestCounter = 0;
|
|
219
219
|
}
|
|
220
|
+
/**
|
|
221
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
222
|
+
*
|
|
223
|
+
* Call this at application startup to eliminate cold start latency
|
|
224
|
+
* (~300-500ms) from your first TTS request.
|
|
225
|
+
*
|
|
226
|
+
* @example
|
|
227
|
+
* ```typescript
|
|
228
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
229
|
+
*
|
|
230
|
+
* // Pre-connect at startup
|
|
231
|
+
* await client.tts.connect();
|
|
232
|
+
*
|
|
233
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
234
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
235
|
+
* ```
|
|
236
|
+
*/
|
|
237
|
+
async connect() {
|
|
238
|
+
await this.getConnection();
|
|
239
|
+
}
|
|
240
|
+
/**
|
|
241
|
+
* Check if WebSocket connection is established and open.
|
|
242
|
+
*/
|
|
243
|
+
isConnected() {
|
|
244
|
+
return this.wsConnection !== null && this.wsConnection.readyState === WebSocket.OPEN;
|
|
245
|
+
}
|
|
220
246
|
/**
|
|
221
247
|
* Generate audio from text with streaming via WebSocket.
|
|
222
248
|
* Returns complete audio after all chunks are received.
|
|
@@ -390,7 +416,9 @@ var TTSResource = class {
|
|
|
390
416
|
cfg_scale: options.cfgScale ?? 2,
|
|
391
417
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
392
418
|
sample_rate: options.sampleRate ?? 24e3,
|
|
393
|
-
speaker_prefix: options.speakerPrefix ?? true
|
|
419
|
+
speaker_prefix: options.speakerPrefix ?? true,
|
|
420
|
+
normalize: options.normalize ?? false,
|
|
421
|
+
...options.language && { language: options.language }
|
|
394
422
|
}));
|
|
395
423
|
});
|
|
396
424
|
}
|
|
@@ -410,7 +438,9 @@ var TTSResource = class {
|
|
|
410
438
|
cfg_scale: options.cfgScale ?? 2,
|
|
411
439
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
412
440
|
sample_rate: options.sampleRate ?? 24e3,
|
|
413
|
-
speaker_prefix: options.speakerPrefix ?? true
|
|
441
|
+
speaker_prefix: options.speakerPrefix ?? true,
|
|
442
|
+
normalize: options.normalize ?? false,
|
|
443
|
+
...options.language && { language: options.language }
|
|
414
444
|
}));
|
|
415
445
|
};
|
|
416
446
|
ws.onmessage = (event) => {
|
|
@@ -492,7 +522,7 @@ var TTSResource = class {
|
|
|
492
522
|
return new KugelAudioError(message);
|
|
493
523
|
}
|
|
494
524
|
};
|
|
495
|
-
var KugelAudio = class {
|
|
525
|
+
var KugelAudio = class _KugelAudio {
|
|
496
526
|
constructor(options) {
|
|
497
527
|
if (!options.apiKey) {
|
|
498
528
|
throw new Error("API key is required");
|
|
@@ -507,6 +537,27 @@ var KugelAudio = class {
|
|
|
507
537
|
this.voices = new VoicesResource(this);
|
|
508
538
|
this.tts = new TTSResource(this);
|
|
509
539
|
}
|
|
540
|
+
/**
|
|
541
|
+
* Create a pre-connected KugelAudio client.
|
|
542
|
+
*
|
|
543
|
+
* Use this factory method to get a client that's already connected
|
|
544
|
+
* and ready for fast TTS requests. This eliminates cold start latency
|
|
545
|
+
* (~300-500ms) from your first TTS request.
|
|
546
|
+
*
|
|
547
|
+
* @example
|
|
548
|
+
* ```typescript
|
|
549
|
+
* // Client is ready immediately - no cold start on first request
|
|
550
|
+
* const client = await KugelAudio.create({ apiKey: 'your_api_key' });
|
|
551
|
+
*
|
|
552
|
+
* // First request is fast (~100ms instead of ~500ms)
|
|
553
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
554
|
+
* ```
|
|
555
|
+
*/
|
|
556
|
+
static async create(options) {
|
|
557
|
+
const client = new _KugelAudio(options);
|
|
558
|
+
await client.connect();
|
|
559
|
+
return client;
|
|
560
|
+
}
|
|
510
561
|
/** Get API key */
|
|
511
562
|
get apiKey() {
|
|
512
563
|
return this._apiKey;
|
|
@@ -530,6 +581,32 @@ var KugelAudio = class {
|
|
|
530
581
|
close() {
|
|
531
582
|
this.tts.close();
|
|
532
583
|
}
|
|
584
|
+
/**
|
|
585
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
586
|
+
*
|
|
587
|
+
* Call this at application startup to eliminate cold start latency
|
|
588
|
+
* (~300-500ms) from your first TTS request.
|
|
589
|
+
*
|
|
590
|
+
* @example
|
|
591
|
+
* ```typescript
|
|
592
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
593
|
+
*
|
|
594
|
+
* // Pre-connect at startup
|
|
595
|
+
* await client.connect();
|
|
596
|
+
*
|
|
597
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
598
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
599
|
+
* ```
|
|
600
|
+
*/
|
|
601
|
+
async connect() {
|
|
602
|
+
await this.tts.connect();
|
|
603
|
+
}
|
|
604
|
+
/**
|
|
605
|
+
* Check if WebSocket connection is established and open.
|
|
606
|
+
*/
|
|
607
|
+
isConnected() {
|
|
608
|
+
return this.tts.isConnected();
|
|
609
|
+
}
|
|
533
610
|
/**
|
|
534
611
|
* Make an HTTP request to the API.
|
|
535
612
|
* @internal
|
package/dist/index.mjs
CHANGED
|
@@ -181,6 +181,32 @@ var TTSResource = class {
|
|
|
181
181
|
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
182
182
|
this.requestCounter = 0;
|
|
183
183
|
}
|
|
184
|
+
/**
|
|
185
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
186
|
+
*
|
|
187
|
+
* Call this at application startup to eliminate cold start latency
|
|
188
|
+
* (~300-500ms) from your first TTS request.
|
|
189
|
+
*
|
|
190
|
+
* @example
|
|
191
|
+
* ```typescript
|
|
192
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
193
|
+
*
|
|
194
|
+
* // Pre-connect at startup
|
|
195
|
+
* await client.tts.connect();
|
|
196
|
+
*
|
|
197
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
198
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
199
|
+
* ```
|
|
200
|
+
*/
|
|
201
|
+
async connect() {
|
|
202
|
+
await this.getConnection();
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* Check if WebSocket connection is established and open.
|
|
206
|
+
*/
|
|
207
|
+
isConnected() {
|
|
208
|
+
return this.wsConnection !== null && this.wsConnection.readyState === WebSocket.OPEN;
|
|
209
|
+
}
|
|
184
210
|
/**
|
|
185
211
|
* Generate audio from text with streaming via WebSocket.
|
|
186
212
|
* Returns complete audio after all chunks are received.
|
|
@@ -354,7 +380,9 @@ var TTSResource = class {
|
|
|
354
380
|
cfg_scale: options.cfgScale ?? 2,
|
|
355
381
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
356
382
|
sample_rate: options.sampleRate ?? 24e3,
|
|
357
|
-
speaker_prefix: options.speakerPrefix ?? true
|
|
383
|
+
speaker_prefix: options.speakerPrefix ?? true,
|
|
384
|
+
normalize: options.normalize ?? false,
|
|
385
|
+
...options.language && { language: options.language }
|
|
358
386
|
}));
|
|
359
387
|
});
|
|
360
388
|
}
|
|
@@ -374,7 +402,9 @@ var TTSResource = class {
|
|
|
374
402
|
cfg_scale: options.cfgScale ?? 2,
|
|
375
403
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
376
404
|
sample_rate: options.sampleRate ?? 24e3,
|
|
377
|
-
speaker_prefix: options.speakerPrefix ?? true
|
|
405
|
+
speaker_prefix: options.speakerPrefix ?? true,
|
|
406
|
+
normalize: options.normalize ?? false,
|
|
407
|
+
...options.language && { language: options.language }
|
|
378
408
|
}));
|
|
379
409
|
};
|
|
380
410
|
ws.onmessage = (event) => {
|
|
@@ -456,7 +486,7 @@ var TTSResource = class {
|
|
|
456
486
|
return new KugelAudioError(message);
|
|
457
487
|
}
|
|
458
488
|
};
|
|
459
|
-
var KugelAudio = class {
|
|
489
|
+
var KugelAudio = class _KugelAudio {
|
|
460
490
|
constructor(options) {
|
|
461
491
|
if (!options.apiKey) {
|
|
462
492
|
throw new Error("API key is required");
|
|
@@ -471,6 +501,27 @@ var KugelAudio = class {
|
|
|
471
501
|
this.voices = new VoicesResource(this);
|
|
472
502
|
this.tts = new TTSResource(this);
|
|
473
503
|
}
|
|
504
|
+
/**
|
|
505
|
+
* Create a pre-connected KugelAudio client.
|
|
506
|
+
*
|
|
507
|
+
* Use this factory method to get a client that's already connected
|
|
508
|
+
* and ready for fast TTS requests. This eliminates cold start latency
|
|
509
|
+
* (~300-500ms) from your first TTS request.
|
|
510
|
+
*
|
|
511
|
+
* @example
|
|
512
|
+
* ```typescript
|
|
513
|
+
* // Client is ready immediately - no cold start on first request
|
|
514
|
+
* const client = await KugelAudio.create({ apiKey: 'your_api_key' });
|
|
515
|
+
*
|
|
516
|
+
* // First request is fast (~100ms instead of ~500ms)
|
|
517
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
518
|
+
* ```
|
|
519
|
+
*/
|
|
520
|
+
static async create(options) {
|
|
521
|
+
const client = new _KugelAudio(options);
|
|
522
|
+
await client.connect();
|
|
523
|
+
return client;
|
|
524
|
+
}
|
|
474
525
|
/** Get API key */
|
|
475
526
|
get apiKey() {
|
|
476
527
|
return this._apiKey;
|
|
@@ -494,6 +545,32 @@ var KugelAudio = class {
|
|
|
494
545
|
close() {
|
|
495
546
|
this.tts.close();
|
|
496
547
|
}
|
|
548
|
+
/**
|
|
549
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
550
|
+
*
|
|
551
|
+
* Call this at application startup to eliminate cold start latency
|
|
552
|
+
* (~300-500ms) from your first TTS request.
|
|
553
|
+
*
|
|
554
|
+
* @example
|
|
555
|
+
* ```typescript
|
|
556
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
557
|
+
*
|
|
558
|
+
* // Pre-connect at startup
|
|
559
|
+
* await client.connect();
|
|
560
|
+
*
|
|
561
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
562
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
563
|
+
* ```
|
|
564
|
+
*/
|
|
565
|
+
async connect() {
|
|
566
|
+
await this.tts.connect();
|
|
567
|
+
}
|
|
568
|
+
/**
|
|
569
|
+
* Check if WebSocket connection is established and open.
|
|
570
|
+
*/
|
|
571
|
+
isConnected() {
|
|
572
|
+
return this.tts.isConnected();
|
|
573
|
+
}
|
|
497
574
|
/**
|
|
498
575
|
* Make an HTTP request to the API.
|
|
499
576
|
* @internal
|
package/package.json
CHANGED
package/src/client.ts
CHANGED
|
@@ -122,6 +122,34 @@ class TTSResource {
|
|
|
122
122
|
|
|
123
123
|
constructor(private client: KugelAudio) {}
|
|
124
124
|
|
|
125
|
+
/**
|
|
126
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
127
|
+
*
|
|
128
|
+
* Call this at application startup to eliminate cold start latency
|
|
129
|
+
* (~300-500ms) from your first TTS request.
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* ```typescript
|
|
133
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
134
|
+
*
|
|
135
|
+
* // Pre-connect at startup
|
|
136
|
+
* await client.tts.connect();
|
|
137
|
+
*
|
|
138
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
139
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
140
|
+
* ```
|
|
141
|
+
*/
|
|
142
|
+
async connect(): Promise<void> {
|
|
143
|
+
await this.getConnection();
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Check if WebSocket connection is established and open.
|
|
148
|
+
*/
|
|
149
|
+
isConnected(): boolean {
|
|
150
|
+
return this.wsConnection !== null && this.wsConnection.readyState === WebSocket.OPEN;
|
|
151
|
+
}
|
|
152
|
+
|
|
125
153
|
/**
|
|
126
154
|
* Generate audio from text with streaming via WebSocket.
|
|
127
155
|
* Returns complete audio after all chunks are received.
|
|
@@ -342,6 +370,8 @@ class TTSResource {
|
|
|
342
370
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
343
371
|
sample_rate: options.sampleRate ?? 24000,
|
|
344
372
|
speaker_prefix: options.speakerPrefix ?? true,
|
|
373
|
+
normalize: options.normalize ?? false,
|
|
374
|
+
...(options.language && { language: options.language }),
|
|
345
375
|
}));
|
|
346
376
|
});
|
|
347
377
|
}
|
|
@@ -368,6 +398,8 @@ class TTSResource {
|
|
|
368
398
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
369
399
|
sample_rate: options.sampleRate ?? 24000,
|
|
370
400
|
speaker_prefix: options.speakerPrefix ?? true,
|
|
401
|
+
normalize: options.normalize ?? false,
|
|
402
|
+
...(options.language && { language: options.language }),
|
|
371
403
|
}));
|
|
372
404
|
};
|
|
373
405
|
|
|
@@ -518,6 +550,28 @@ export class KugelAudio {
|
|
|
518
550
|
this.tts = new TTSResource(this);
|
|
519
551
|
}
|
|
520
552
|
|
|
553
|
+
/**
|
|
554
|
+
* Create a pre-connected KugelAudio client.
|
|
555
|
+
*
|
|
556
|
+
* Use this factory method to get a client that's already connected
|
|
557
|
+
* and ready for fast TTS requests. This eliminates cold start latency
|
|
558
|
+
* (~300-500ms) from your first TTS request.
|
|
559
|
+
*
|
|
560
|
+
* @example
|
|
561
|
+
* ```typescript
|
|
562
|
+
* // Client is ready immediately - no cold start on first request
|
|
563
|
+
* const client = await KugelAudio.create({ apiKey: 'your_api_key' });
|
|
564
|
+
*
|
|
565
|
+
* // First request is fast (~100ms instead of ~500ms)
|
|
566
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
567
|
+
* ```
|
|
568
|
+
*/
|
|
569
|
+
static async create(options: KugelAudioOptions): Promise<KugelAudio> {
|
|
570
|
+
const client = new KugelAudio(options);
|
|
571
|
+
await client.connect();
|
|
572
|
+
return client;
|
|
573
|
+
}
|
|
574
|
+
|
|
521
575
|
/** Get API key */
|
|
522
576
|
get apiKey(): string {
|
|
523
577
|
return this._apiKey;
|
|
@@ -546,6 +600,34 @@ export class KugelAudio {
|
|
|
546
600
|
this.tts.close();
|
|
547
601
|
}
|
|
548
602
|
|
|
603
|
+
/**
|
|
604
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
605
|
+
*
|
|
606
|
+
* Call this at application startup to eliminate cold start latency
|
|
607
|
+
* (~300-500ms) from your first TTS request.
|
|
608
|
+
*
|
|
609
|
+
* @example
|
|
610
|
+
* ```typescript
|
|
611
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
612
|
+
*
|
|
613
|
+
* // Pre-connect at startup
|
|
614
|
+
* await client.connect();
|
|
615
|
+
*
|
|
616
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
617
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
618
|
+
* ```
|
|
619
|
+
*/
|
|
620
|
+
async connect(): Promise<void> {
|
|
621
|
+
await this.tts.connect();
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Check if WebSocket connection is established and open.
|
|
626
|
+
*/
|
|
627
|
+
isConnected(): boolean {
|
|
628
|
+
return this.tts.isConnected();
|
|
629
|
+
}
|
|
630
|
+
|
|
549
631
|
/**
|
|
550
632
|
* Make an HTTP request to the API.
|
|
551
633
|
* @internal
|
package/src/types.ts
CHANGED
|
@@ -65,6 +65,25 @@ export interface GenerateOptions {
|
|
|
65
65
|
sampleRate?: number;
|
|
66
66
|
/** Whether to add speaker prefix (default: true) */
|
|
67
67
|
speakerPrefix?: boolean;
|
|
68
|
+
/**
|
|
69
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
70
|
+
* When true, text will be normalized before TTS generation.
|
|
71
|
+
* Default: false
|
|
72
|
+
*
|
|
73
|
+
* ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
|
|
74
|
+
* latency for language auto-detection. For best performance, always specify
|
|
75
|
+
* the language parameter when using normalization.
|
|
76
|
+
*/
|
|
77
|
+
normalize?: boolean;
|
|
78
|
+
/**
|
|
79
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
80
|
+
* If not provided and normalize is true, language will be auto-detected
|
|
81
|
+
* (adds ~150ms latency).
|
|
82
|
+
*
|
|
83
|
+
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
84
|
+
* el, uk, bg, tr, vi, ar, hi, zh, ja, ko
|
|
85
|
+
*/
|
|
86
|
+
language?: string;
|
|
68
87
|
}
|
|
69
88
|
|
|
70
89
|
/**
|