kugelaudio 0.1.5 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -0
- package/dist/index.d.mts +238 -3
- package/dist/index.d.ts +238 -3
- package/dist/index.js +327 -4
- package/dist/index.mjs +327 -4
- package/package.json +1 -1
- package/src/client.ts +390 -1
- package/src/index.ts +4 -0
- package/src/types.ts +87 -0
package/README.md
CHANGED
|
@@ -150,6 +150,8 @@ const audio = await client.tts.generate({
|
|
|
150
150
|
maxNewTokens: 2048, // Maximum tokens to generate
|
|
151
151
|
sampleRate: 24000, // Output sample rate
|
|
152
152
|
speakerPrefix: true, // Add speaker prefix for better quality
|
|
153
|
+
normalize: true, // Enable text normalization (see below)
|
|
154
|
+
language: 'en', // Language for normalization
|
|
153
155
|
});
|
|
154
156
|
|
|
155
157
|
// Audio properties
|
|
@@ -250,6 +252,53 @@ onChunk: (chunk) => {
|
|
|
250
252
|
}
|
|
251
253
|
```
|
|
252
254
|
|
|
255
|
+
## Text Normalization
|
|
256
|
+
|
|
257
|
+
Text normalization converts numbers, dates, times, and other non-verbal text into spoken words. For example:
|
|
258
|
+
- "I have 3 apples" → "I have three apples"
|
|
259
|
+
- "The meeting is at 2:30 PM" → "The meeting is at two thirty PM"
|
|
260
|
+
- "€50.99" → "fifty euros and ninety-nine cents"
|
|
261
|
+
|
|
262
|
+
### Usage
|
|
263
|
+
|
|
264
|
+
```typescript
|
|
265
|
+
// With explicit language (recommended - fastest)
|
|
266
|
+
const audio = await client.tts.generate({
|
|
267
|
+
text: 'I bought 3 items for €50.99 on 01/15/2024.',
|
|
268
|
+
normalize: true,
|
|
269
|
+
language: 'en', // Specify language for best performance
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
// With auto-detection (adds ~150ms latency)
|
|
273
|
+
const audio = await client.tts.generate({
|
|
274
|
+
text: 'Ich habe 3 Artikel für 50,99€ gekauft.',
|
|
275
|
+
normalize: true,
|
|
276
|
+
// language not specified - will auto-detect
|
|
277
|
+
});
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### Supported Languages
|
|
281
|
+
|
|
282
|
+
| Code | Language | Code | Language |
|
|
283
|
+
|------|----------|------|----------|
|
|
284
|
+
| `de` | German | `nl` | Dutch |
|
|
285
|
+
| `en` | English | `pl` | Polish |
|
|
286
|
+
| `fr` | French | `sv` | Swedish |
|
|
287
|
+
| `es` | Spanish | `da` | Danish |
|
|
288
|
+
| `it` | Italian | `no` | Norwegian |
|
|
289
|
+
| `pt` | Portuguese | `fi` | Finnish |
|
|
290
|
+
| `cs` | Czech | `hu` | Hungarian |
|
|
291
|
+
| `ro` | Romanian | `el` | Greek |
|
|
292
|
+
| `uk` | Ukrainian | `bg` | Bulgarian |
|
|
293
|
+
| `tr` | Turkish | `vi` | Vietnamese |
|
|
294
|
+
| `ar` | Arabic | `hi` | Hindi |
|
|
295
|
+
| `zh` | Chinese | `ja` | Japanese |
|
|
296
|
+
| `ko` | Korean | | |
|
|
297
|
+
|
|
298
|
+
### Performance Warning
|
|
299
|
+
|
|
300
|
+
> ⚠️ **Latency Warning**: Using `normalize: true` without specifying `language` adds approximately **150ms latency** for language auto-detection. For best performance in latency-sensitive applications, always specify the `language` parameter.
|
|
301
|
+
|
|
253
302
|
## Error Handling
|
|
254
303
|
|
|
255
304
|
```typescript
|
|
@@ -306,9 +355,13 @@ interface GenerateOptions {
|
|
|
306
355
|
maxNewTokens?: number; // Default: 2048
|
|
307
356
|
sampleRate?: number; // Default: 24000
|
|
308
357
|
speakerPrefix?: boolean; // Default: true
|
|
358
|
+
normalize?: boolean; // Default: false - Enable text normalization
|
|
359
|
+
language?: string; // ISO 639-1 code for normalization (e.g., 'en', 'de')
|
|
309
360
|
}
|
|
310
361
|
```
|
|
311
362
|
|
|
363
|
+
> ⚠️ **Note**: Using `normalize: true` without `language` adds ~150ms latency for auto-detection.
|
|
364
|
+
|
|
312
365
|
### AudioChunk
|
|
313
366
|
|
|
314
367
|
```typescript
|
package/dist/index.d.mts
CHANGED
|
@@ -59,6 +59,25 @@ interface GenerateOptions {
|
|
|
59
59
|
sampleRate?: number;
|
|
60
60
|
/** Whether to add speaker prefix (default: true) */
|
|
61
61
|
speakerPrefix?: boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
64
|
+
* When true, text will be normalized before TTS generation.
|
|
65
|
+
* Default: false
|
|
66
|
+
*
|
|
67
|
+
* ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
|
|
68
|
+
* latency for language auto-detection. For best performance, always specify
|
|
69
|
+
* the language parameter when using normalization.
|
|
70
|
+
*/
|
|
71
|
+
normalize?: boolean;
|
|
72
|
+
/**
|
|
73
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
74
|
+
* If not provided and normalize is true, language will be auto-detected
|
|
75
|
+
* (adds ~150ms latency).
|
|
76
|
+
*
|
|
77
|
+
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
78
|
+
* el, uk, bg, tr, vi, ar, hi, zh, ja, ko
|
|
79
|
+
*/
|
|
80
|
+
language?: string;
|
|
62
81
|
}
|
|
63
82
|
/**
|
|
64
83
|
* Streaming session configuration.
|
|
@@ -157,6 +176,8 @@ interface KugelAudioOptions {
|
|
|
157
176
|
isMasterKey?: boolean;
|
|
158
177
|
/** Whether apiKey is a JWT token (for user authentication). Takes precedence over isMasterKey. */
|
|
159
178
|
isToken?: boolean;
|
|
179
|
+
/** Organisation ID to bill usage against (required for token auth to enable usage recording). */
|
|
180
|
+
orgId?: number;
|
|
160
181
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
161
182
|
apiUrl?: string;
|
|
162
183
|
/** TTS server URL (default: https://eu.kugelaudio.com) */
|
|
@@ -164,10 +185,68 @@ interface KugelAudioOptions {
|
|
|
164
185
|
/** Request timeout in milliseconds (default: 60000) */
|
|
165
186
|
timeout?: number;
|
|
166
187
|
}
|
|
167
|
-
|
|
168
188
|
/**
|
|
169
|
-
*
|
|
189
|
+
* Multi-context session configuration.
|
|
190
|
+
*/
|
|
191
|
+
interface MultiContextConfig {
|
|
192
|
+
/** Default voice ID for new contexts */
|
|
193
|
+
defaultVoiceId?: number;
|
|
194
|
+
/** Output sample rate (default: 24000) */
|
|
195
|
+
sampleRate?: number;
|
|
196
|
+
/** CFG scale for generation (default: 2.0) */
|
|
197
|
+
cfgScale?: number;
|
|
198
|
+
/** Maximum tokens to generate (default: 2048) */
|
|
199
|
+
maxNewTokens?: number;
|
|
200
|
+
/** Enable text normalization (default: true) */
|
|
201
|
+
normalize?: boolean;
|
|
202
|
+
/** Add speaker prefix (default: true) */
|
|
203
|
+
speakerPrefix?: boolean;
|
|
204
|
+
/** Seconds before context auto-closes (default: 20.0) */
|
|
205
|
+
inactivityTimeout?: number;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Voice settings for a specific context.
|
|
209
|
+
*/
|
|
210
|
+
interface ContextVoiceSettings {
|
|
211
|
+
/** Stability (0.0-1.0) */
|
|
212
|
+
stability?: number;
|
|
213
|
+
/** Similarity boost (0.0-1.0) */
|
|
214
|
+
similarityBoost?: number;
|
|
215
|
+
/** Style (0.0-1.0) */
|
|
216
|
+
style?: number;
|
|
217
|
+
/** Use speaker boost */
|
|
218
|
+
useSpeakerBoost?: boolean;
|
|
219
|
+
/** Speed multiplier */
|
|
220
|
+
speed?: number;
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Audio chunk from multi-context streaming.
|
|
170
224
|
*/
|
|
225
|
+
interface MultiContextAudioChunk extends AudioChunk {
|
|
226
|
+
/** Context ID this audio belongs to */
|
|
227
|
+
contextId: string;
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Event callbacks for multi-context streaming.
|
|
231
|
+
*/
|
|
232
|
+
interface MultiContextCallbacks {
|
|
233
|
+
/** Called when session is started */
|
|
234
|
+
onSessionStarted?: (sessionId: string) => void;
|
|
235
|
+
/** Called when a context is created */
|
|
236
|
+
onContextCreated?: (contextId: string) => void;
|
|
237
|
+
/** Called when an audio chunk is received */
|
|
238
|
+
onChunk?: (chunk: MultiContextAudioChunk) => void;
|
|
239
|
+
/** Called when a context finishes generating */
|
|
240
|
+
onContextFinal?: (contextId: string) => void;
|
|
241
|
+
/** Called when a context is closed */
|
|
242
|
+
onContextClosed?: (contextId: string) => void;
|
|
243
|
+
/** Called when a context times out */
|
|
244
|
+
onContextTimeout?: (contextId: string) => void;
|
|
245
|
+
/** Called when session is closed */
|
|
246
|
+
onSessionClosed?: (stats: Record<string, unknown>) => void;
|
|
247
|
+
/** Called on error */
|
|
248
|
+
onError?: (error: Error, contextId?: string) => void;
|
|
249
|
+
}
|
|
171
250
|
|
|
172
251
|
/**
|
|
173
252
|
* Models resource for listing TTS models.
|
|
@@ -209,6 +288,28 @@ declare class TTSResource {
|
|
|
209
288
|
private pendingRequests;
|
|
210
289
|
private requestCounter;
|
|
211
290
|
constructor(client: KugelAudio);
|
|
291
|
+
/**
|
|
292
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
293
|
+
*
|
|
294
|
+
* Call this at application startup to eliminate cold start latency
|
|
295
|
+
* (~300-500ms) from your first TTS request.
|
|
296
|
+
*
|
|
297
|
+
* @example
|
|
298
|
+
* ```typescript
|
|
299
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
300
|
+
*
|
|
301
|
+
* // Pre-connect at startup
|
|
302
|
+
* await client.tts.connect();
|
|
303
|
+
*
|
|
304
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
305
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
306
|
+
* ```
|
|
307
|
+
*/
|
|
308
|
+
connect(): Promise<void>;
|
|
309
|
+
/**
|
|
310
|
+
* Check if WebSocket connection is established and open.
|
|
311
|
+
*/
|
|
312
|
+
isConnected(): boolean;
|
|
212
313
|
/**
|
|
213
314
|
* Generate audio from text with streaming via WebSocket.
|
|
214
315
|
* Returns complete audio after all chunks are received.
|
|
@@ -249,6 +350,98 @@ declare class TTSResource {
|
|
|
249
350
|
*/
|
|
250
351
|
close(): void;
|
|
251
352
|
private parseError;
|
|
353
|
+
/**
|
|
354
|
+
* Create a multi-context session for concurrent TTS streams.
|
|
355
|
+
*
|
|
356
|
+
* Allows managing up to 5 independent audio generation contexts
|
|
357
|
+
* over a single WebSocket connection. Each context has its own
|
|
358
|
+
* text buffer, voice settings, and generation queue.
|
|
359
|
+
*
|
|
360
|
+
* @example
|
|
361
|
+
* ```typescript
|
|
362
|
+
* const session = client.tts.createMultiContextSession({
|
|
363
|
+
* defaultVoiceId: 123,
|
|
364
|
+
* });
|
|
365
|
+
*
|
|
366
|
+
* session.connect({
|
|
367
|
+
* onChunk: (chunk) => {
|
|
368
|
+
* console.log(`Audio from ${chunk.contextId}`);
|
|
369
|
+
* playAudio(chunk.audio);
|
|
370
|
+
* },
|
|
371
|
+
* onContextFinal: (contextId) => {
|
|
372
|
+
* console.log(`${contextId} finished`);
|
|
373
|
+
* },
|
|
374
|
+
* });
|
|
375
|
+
*
|
|
376
|
+
* // Create contexts with different voices
|
|
377
|
+
* session.createContext('narrator', { voiceId: 123 });
|
|
378
|
+
* session.createContext('character', { voiceId: 456 });
|
|
379
|
+
*
|
|
380
|
+
* // Send text to different speakers
|
|
381
|
+
* session.send('narrator', 'The story begins.', true);
|
|
382
|
+
* session.send('character', 'Hello!', true);
|
|
383
|
+
*
|
|
384
|
+
* // Close when done
|
|
385
|
+
* session.close();
|
|
386
|
+
* ```
|
|
387
|
+
*/
|
|
388
|
+
createMultiContextSession(config?: MultiContextConfig): MultiContextSession;
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Multi-context WebSocket session for concurrent TTS streams.
|
|
392
|
+
*/
|
|
393
|
+
declare class MultiContextSession {
|
|
394
|
+
private client;
|
|
395
|
+
private ws;
|
|
396
|
+
private config;
|
|
397
|
+
private callbacks;
|
|
398
|
+
private contexts;
|
|
399
|
+
private _sessionId;
|
|
400
|
+
private isStarted;
|
|
401
|
+
constructor(client: KugelAudio, config?: MultiContextConfig);
|
|
402
|
+
/**
|
|
403
|
+
* Get the current session ID, or null if not connected.
|
|
404
|
+
*/
|
|
405
|
+
get sessionId(): string | null;
|
|
406
|
+
/**
|
|
407
|
+
* Connect to the multi-context WebSocket endpoint.
|
|
408
|
+
*/
|
|
409
|
+
connect(callbacks: MultiContextCallbacks): void;
|
|
410
|
+
/**
|
|
411
|
+
* Create a new context with optional voice settings.
|
|
412
|
+
*/
|
|
413
|
+
createContext(contextId: string, options?: {
|
|
414
|
+
voiceId?: number;
|
|
415
|
+
voiceSettings?: ContextVoiceSettings;
|
|
416
|
+
}): void;
|
|
417
|
+
/**
|
|
418
|
+
* Send text to a specific context.
|
|
419
|
+
*/
|
|
420
|
+
send(contextId: string, text: string, flush?: boolean): void;
|
|
421
|
+
/**
|
|
422
|
+
* Flush a context's buffer.
|
|
423
|
+
*/
|
|
424
|
+
flush(contextId: string): void;
|
|
425
|
+
/**
|
|
426
|
+
* Close a specific context.
|
|
427
|
+
*/
|
|
428
|
+
closeContext(contextId: string): void;
|
|
429
|
+
/**
|
|
430
|
+
* Send keep-alive to reset a context's inactivity timeout.
|
|
431
|
+
*/
|
|
432
|
+
keepAlive(contextId: string): void;
|
|
433
|
+
/**
|
|
434
|
+
* Close the session and all contexts.
|
|
435
|
+
*/
|
|
436
|
+
close(): void;
|
|
437
|
+
/**
|
|
438
|
+
* Get active context IDs.
|
|
439
|
+
*/
|
|
440
|
+
get activeContexts(): string[];
|
|
441
|
+
/**
|
|
442
|
+
* Check if connected.
|
|
443
|
+
*/
|
|
444
|
+
get isConnected(): boolean;
|
|
252
445
|
}
|
|
253
446
|
/**
|
|
254
447
|
* KugelAudio API client.
|
|
@@ -280,6 +473,7 @@ declare class KugelAudio {
|
|
|
280
473
|
private _apiKey;
|
|
281
474
|
private _isMasterKey;
|
|
282
475
|
private _isToken;
|
|
476
|
+
private _orgId;
|
|
283
477
|
private _apiUrl;
|
|
284
478
|
private _ttsUrl;
|
|
285
479
|
private _timeout;
|
|
@@ -290,12 +484,31 @@ declare class KugelAudio {
|
|
|
290
484
|
/** TTS resource */
|
|
291
485
|
readonly tts: TTSResource;
|
|
292
486
|
constructor(options: KugelAudioOptions);
|
|
487
|
+
/**
|
|
488
|
+
* Create a pre-connected KugelAudio client.
|
|
489
|
+
*
|
|
490
|
+
* Use this factory method to get a client that's already connected
|
|
491
|
+
* and ready for fast TTS requests. This eliminates cold start latency
|
|
492
|
+
* (~300-500ms) from your first TTS request.
|
|
493
|
+
*
|
|
494
|
+
* @example
|
|
495
|
+
* ```typescript
|
|
496
|
+
* // Client is ready immediately - no cold start on first request
|
|
497
|
+
* const client = await KugelAudio.create({ apiKey: 'your_api_key' });
|
|
498
|
+
*
|
|
499
|
+
* // First request is fast (~100ms instead of ~500ms)
|
|
500
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
501
|
+
* ```
|
|
502
|
+
*/
|
|
503
|
+
static create(options: KugelAudioOptions): Promise<KugelAudio>;
|
|
293
504
|
/** Get API key */
|
|
294
505
|
get apiKey(): string;
|
|
295
506
|
/** Check if using master key authentication */
|
|
296
507
|
get isMasterKey(): boolean;
|
|
297
508
|
/** Check if using JWT token authentication */
|
|
298
509
|
get isToken(): boolean;
|
|
510
|
+
/** Get organisation ID for billing */
|
|
511
|
+
get orgId(): number | undefined;
|
|
299
512
|
/** Get TTS URL */
|
|
300
513
|
get ttsUrl(): string;
|
|
301
514
|
/**
|
|
@@ -303,6 +516,28 @@ declare class KugelAudio {
|
|
|
303
516
|
* This closes any pooled WebSocket connections.
|
|
304
517
|
*/
|
|
305
518
|
close(): void;
|
|
519
|
+
/**
|
|
520
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
521
|
+
*
|
|
522
|
+
* Call this at application startup to eliminate cold start latency
|
|
523
|
+
* (~300-500ms) from your first TTS request.
|
|
524
|
+
*
|
|
525
|
+
* @example
|
|
526
|
+
* ```typescript
|
|
527
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
528
|
+
*
|
|
529
|
+
* // Pre-connect at startup
|
|
530
|
+
* await client.connect();
|
|
531
|
+
*
|
|
532
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
533
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
534
|
+
* ```
|
|
535
|
+
*/
|
|
536
|
+
connect(): Promise<void>;
|
|
537
|
+
/**
|
|
538
|
+
* Check if WebSocket connection is established and open.
|
|
539
|
+
*/
|
|
540
|
+
isConnected(): boolean;
|
|
306
541
|
/**
|
|
307
542
|
* Make an HTTP request to the API.
|
|
308
543
|
* @internal
|
|
@@ -371,4 +606,4 @@ declare function createWavFile(audio: ArrayBuffer, sampleRate: number): ArrayBuf
|
|
|
371
606
|
*/
|
|
372
607
|
declare function createWavBlob(audio: ArrayBuffer, sampleRate: number): Blob;
|
|
373
608
|
|
|
374
|
-
export { type AudioChunk, type AudioResponse, AuthenticationError, ConnectionError, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioOptions, type Model, RateLimitError, type StreamCallbacks, type StreamConfig, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceSex, base64ToArrayBuffer, createWavBlob, createWavFile, decodePCM16 };
|
|
609
|
+
export { type AudioChunk, type AudioResponse, AuthenticationError, ConnectionError, type ContextVoiceSettings, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioOptions, type Model, type MultiContextAudioChunk, type MultiContextCallbacks, type MultiContextConfig, RateLimitError, type StreamCallbacks, type StreamConfig, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceSex, base64ToArrayBuffer, createWavBlob, createWavFile, decodePCM16 };
|
package/dist/index.d.ts
CHANGED
|
@@ -59,6 +59,25 @@ interface GenerateOptions {
|
|
|
59
59
|
sampleRate?: number;
|
|
60
60
|
/** Whether to add speaker prefix (default: true) */
|
|
61
61
|
speakerPrefix?: boolean;
|
|
62
|
+
/**
|
|
63
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
64
|
+
* When true, text will be normalized before TTS generation.
|
|
65
|
+
* Default: false
|
|
66
|
+
*
|
|
67
|
+
* ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
|
|
68
|
+
* latency for language auto-detection. For best performance, always specify
|
|
69
|
+
* the language parameter when using normalization.
|
|
70
|
+
*/
|
|
71
|
+
normalize?: boolean;
|
|
72
|
+
/**
|
|
73
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
74
|
+
* If not provided and normalize is true, language will be auto-detected
|
|
75
|
+
* (adds ~150ms latency).
|
|
76
|
+
*
|
|
77
|
+
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
78
|
+
* el, uk, bg, tr, vi, ar, hi, zh, ja, ko
|
|
79
|
+
*/
|
|
80
|
+
language?: string;
|
|
62
81
|
}
|
|
63
82
|
/**
|
|
64
83
|
* Streaming session configuration.
|
|
@@ -157,6 +176,8 @@ interface KugelAudioOptions {
|
|
|
157
176
|
isMasterKey?: boolean;
|
|
158
177
|
/** Whether apiKey is a JWT token (for user authentication). Takes precedence over isMasterKey. */
|
|
159
178
|
isToken?: boolean;
|
|
179
|
+
/** Organisation ID to bill usage against (required for token auth to enable usage recording). */
|
|
180
|
+
orgId?: number;
|
|
160
181
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
161
182
|
apiUrl?: string;
|
|
162
183
|
/** TTS server URL (default: https://eu.kugelaudio.com) */
|
|
@@ -164,10 +185,68 @@ interface KugelAudioOptions {
|
|
|
164
185
|
/** Request timeout in milliseconds (default: 60000) */
|
|
165
186
|
timeout?: number;
|
|
166
187
|
}
|
|
167
|
-
|
|
168
188
|
/**
|
|
169
|
-
*
|
|
189
|
+
* Multi-context session configuration.
|
|
190
|
+
*/
|
|
191
|
+
interface MultiContextConfig {
|
|
192
|
+
/** Default voice ID for new contexts */
|
|
193
|
+
defaultVoiceId?: number;
|
|
194
|
+
/** Output sample rate (default: 24000) */
|
|
195
|
+
sampleRate?: number;
|
|
196
|
+
/** CFG scale for generation (default: 2.0) */
|
|
197
|
+
cfgScale?: number;
|
|
198
|
+
/** Maximum tokens to generate (default: 2048) */
|
|
199
|
+
maxNewTokens?: number;
|
|
200
|
+
/** Enable text normalization (default: true) */
|
|
201
|
+
normalize?: boolean;
|
|
202
|
+
/** Add speaker prefix (default: true) */
|
|
203
|
+
speakerPrefix?: boolean;
|
|
204
|
+
/** Seconds before context auto-closes (default: 20.0) */
|
|
205
|
+
inactivityTimeout?: number;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Voice settings for a specific context.
|
|
209
|
+
*/
|
|
210
|
+
interface ContextVoiceSettings {
|
|
211
|
+
/** Stability (0.0-1.0) */
|
|
212
|
+
stability?: number;
|
|
213
|
+
/** Similarity boost (0.0-1.0) */
|
|
214
|
+
similarityBoost?: number;
|
|
215
|
+
/** Style (0.0-1.0) */
|
|
216
|
+
style?: number;
|
|
217
|
+
/** Use speaker boost */
|
|
218
|
+
useSpeakerBoost?: boolean;
|
|
219
|
+
/** Speed multiplier */
|
|
220
|
+
speed?: number;
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Audio chunk from multi-context streaming.
|
|
170
224
|
*/
|
|
225
|
+
interface MultiContextAudioChunk extends AudioChunk {
|
|
226
|
+
/** Context ID this audio belongs to */
|
|
227
|
+
contextId: string;
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Event callbacks for multi-context streaming.
|
|
231
|
+
*/
|
|
232
|
+
interface MultiContextCallbacks {
|
|
233
|
+
/** Called when session is started */
|
|
234
|
+
onSessionStarted?: (sessionId: string) => void;
|
|
235
|
+
/** Called when a context is created */
|
|
236
|
+
onContextCreated?: (contextId: string) => void;
|
|
237
|
+
/** Called when an audio chunk is received */
|
|
238
|
+
onChunk?: (chunk: MultiContextAudioChunk) => void;
|
|
239
|
+
/** Called when a context finishes generating */
|
|
240
|
+
onContextFinal?: (contextId: string) => void;
|
|
241
|
+
/** Called when a context is closed */
|
|
242
|
+
onContextClosed?: (contextId: string) => void;
|
|
243
|
+
/** Called when a context times out */
|
|
244
|
+
onContextTimeout?: (contextId: string) => void;
|
|
245
|
+
/** Called when session is closed */
|
|
246
|
+
onSessionClosed?: (stats: Record<string, unknown>) => void;
|
|
247
|
+
/** Called on error */
|
|
248
|
+
onError?: (error: Error, contextId?: string) => void;
|
|
249
|
+
}
|
|
171
250
|
|
|
172
251
|
/**
|
|
173
252
|
* Models resource for listing TTS models.
|
|
@@ -209,6 +288,28 @@ declare class TTSResource {
|
|
|
209
288
|
private pendingRequests;
|
|
210
289
|
private requestCounter;
|
|
211
290
|
constructor(client: KugelAudio);
|
|
291
|
+
/**
|
|
292
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
293
|
+
*
|
|
294
|
+
* Call this at application startup to eliminate cold start latency
|
|
295
|
+
* (~300-500ms) from your first TTS request.
|
|
296
|
+
*
|
|
297
|
+
* @example
|
|
298
|
+
* ```typescript
|
|
299
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
300
|
+
*
|
|
301
|
+
* // Pre-connect at startup
|
|
302
|
+
* await client.tts.connect();
|
|
303
|
+
*
|
|
304
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
305
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
306
|
+
* ```
|
|
307
|
+
*/
|
|
308
|
+
connect(): Promise<void>;
|
|
309
|
+
/**
|
|
310
|
+
* Check if WebSocket connection is established and open.
|
|
311
|
+
*/
|
|
312
|
+
isConnected(): boolean;
|
|
212
313
|
/**
|
|
213
314
|
* Generate audio from text with streaming via WebSocket.
|
|
214
315
|
* Returns complete audio after all chunks are received.
|
|
@@ -249,6 +350,98 @@ declare class TTSResource {
|
|
|
249
350
|
*/
|
|
250
351
|
close(): void;
|
|
251
352
|
private parseError;
|
|
353
|
+
/**
|
|
354
|
+
* Create a multi-context session for concurrent TTS streams.
|
|
355
|
+
*
|
|
356
|
+
* Allows managing up to 5 independent audio generation contexts
|
|
357
|
+
* over a single WebSocket connection. Each context has its own
|
|
358
|
+
* text buffer, voice settings, and generation queue.
|
|
359
|
+
*
|
|
360
|
+
* @example
|
|
361
|
+
* ```typescript
|
|
362
|
+
* const session = client.tts.createMultiContextSession({
|
|
363
|
+
* defaultVoiceId: 123,
|
|
364
|
+
* });
|
|
365
|
+
*
|
|
366
|
+
* session.connect({
|
|
367
|
+
* onChunk: (chunk) => {
|
|
368
|
+
* console.log(`Audio from ${chunk.contextId}`);
|
|
369
|
+
* playAudio(chunk.audio);
|
|
370
|
+
* },
|
|
371
|
+
* onContextFinal: (contextId) => {
|
|
372
|
+
* console.log(`${contextId} finished`);
|
|
373
|
+
* },
|
|
374
|
+
* });
|
|
375
|
+
*
|
|
376
|
+
* // Create contexts with different voices
|
|
377
|
+
* session.createContext('narrator', { voiceId: 123 });
|
|
378
|
+
* session.createContext('character', { voiceId: 456 });
|
|
379
|
+
*
|
|
380
|
+
* // Send text to different speakers
|
|
381
|
+
* session.send('narrator', 'The story begins.', true);
|
|
382
|
+
* session.send('character', 'Hello!', true);
|
|
383
|
+
*
|
|
384
|
+
* // Close when done
|
|
385
|
+
* session.close();
|
|
386
|
+
* ```
|
|
387
|
+
*/
|
|
388
|
+
createMultiContextSession(config?: MultiContextConfig): MultiContextSession;
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Multi-context WebSocket session for concurrent TTS streams.
|
|
392
|
+
*/
|
|
393
|
+
declare class MultiContextSession {
|
|
394
|
+
private client;
|
|
395
|
+
private ws;
|
|
396
|
+
private config;
|
|
397
|
+
private callbacks;
|
|
398
|
+
private contexts;
|
|
399
|
+
private _sessionId;
|
|
400
|
+
private isStarted;
|
|
401
|
+
constructor(client: KugelAudio, config?: MultiContextConfig);
|
|
402
|
+
/**
|
|
403
|
+
* Get the current session ID, or null if not connected.
|
|
404
|
+
*/
|
|
405
|
+
get sessionId(): string | null;
|
|
406
|
+
/**
|
|
407
|
+
* Connect to the multi-context WebSocket endpoint.
|
|
408
|
+
*/
|
|
409
|
+
connect(callbacks: MultiContextCallbacks): void;
|
|
410
|
+
/**
|
|
411
|
+
* Create a new context with optional voice settings.
|
|
412
|
+
*/
|
|
413
|
+
createContext(contextId: string, options?: {
|
|
414
|
+
voiceId?: number;
|
|
415
|
+
voiceSettings?: ContextVoiceSettings;
|
|
416
|
+
}): void;
|
|
417
|
+
/**
|
|
418
|
+
* Send text to a specific context.
|
|
419
|
+
*/
|
|
420
|
+
send(contextId: string, text: string, flush?: boolean): void;
|
|
421
|
+
/**
|
|
422
|
+
* Flush a context's buffer.
|
|
423
|
+
*/
|
|
424
|
+
flush(contextId: string): void;
|
|
425
|
+
/**
|
|
426
|
+
* Close a specific context.
|
|
427
|
+
*/
|
|
428
|
+
closeContext(contextId: string): void;
|
|
429
|
+
/**
|
|
430
|
+
* Send keep-alive to reset a context's inactivity timeout.
|
|
431
|
+
*/
|
|
432
|
+
keepAlive(contextId: string): void;
|
|
433
|
+
/**
|
|
434
|
+
* Close the session and all contexts.
|
|
435
|
+
*/
|
|
436
|
+
close(): void;
|
|
437
|
+
/**
|
|
438
|
+
* Get active context IDs.
|
|
439
|
+
*/
|
|
440
|
+
get activeContexts(): string[];
|
|
441
|
+
/**
|
|
442
|
+
* Check if connected.
|
|
443
|
+
*/
|
|
444
|
+
get isConnected(): boolean;
|
|
252
445
|
}
|
|
253
446
|
/**
|
|
254
447
|
* KugelAudio API client.
|
|
@@ -280,6 +473,7 @@ declare class KugelAudio {
|
|
|
280
473
|
private _apiKey;
|
|
281
474
|
private _isMasterKey;
|
|
282
475
|
private _isToken;
|
|
476
|
+
private _orgId;
|
|
283
477
|
private _apiUrl;
|
|
284
478
|
private _ttsUrl;
|
|
285
479
|
private _timeout;
|
|
@@ -290,12 +484,31 @@ declare class KugelAudio {
|
|
|
290
484
|
/** TTS resource */
|
|
291
485
|
readonly tts: TTSResource;
|
|
292
486
|
constructor(options: KugelAudioOptions);
|
|
487
|
+
/**
|
|
488
|
+
* Create a pre-connected KugelAudio client.
|
|
489
|
+
*
|
|
490
|
+
* Use this factory method to get a client that's already connected
|
|
491
|
+
* and ready for fast TTS requests. This eliminates cold start latency
|
|
492
|
+
* (~300-500ms) from your first TTS request.
|
|
493
|
+
*
|
|
494
|
+
* @example
|
|
495
|
+
* ```typescript
|
|
496
|
+
* // Client is ready immediately - no cold start on first request
|
|
497
|
+
* const client = await KugelAudio.create({ apiKey: 'your_api_key' });
|
|
498
|
+
*
|
|
499
|
+
* // First request is fast (~100ms instead of ~500ms)
|
|
500
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
501
|
+
* ```
|
|
502
|
+
*/
|
|
503
|
+
static create(options: KugelAudioOptions): Promise<KugelAudio>;
|
|
293
504
|
/** Get API key */
|
|
294
505
|
get apiKey(): string;
|
|
295
506
|
/** Check if using master key authentication */
|
|
296
507
|
get isMasterKey(): boolean;
|
|
297
508
|
/** Check if using JWT token authentication */
|
|
298
509
|
get isToken(): boolean;
|
|
510
|
+
/** Get organisation ID for billing */
|
|
511
|
+
get orgId(): number | undefined;
|
|
299
512
|
/** Get TTS URL */
|
|
300
513
|
get ttsUrl(): string;
|
|
301
514
|
/**
|
|
@@ -303,6 +516,28 @@ declare class KugelAudio {
|
|
|
303
516
|
* This closes any pooled WebSocket connections.
|
|
304
517
|
*/
|
|
305
518
|
close(): void;
|
|
519
|
+
/**
|
|
520
|
+
* Pre-establish WebSocket connection for faster first request.
|
|
521
|
+
*
|
|
522
|
+
* Call this at application startup to eliminate cold start latency
|
|
523
|
+
* (~300-500ms) from your first TTS request.
|
|
524
|
+
*
|
|
525
|
+
* @example
|
|
526
|
+
* ```typescript
|
|
527
|
+
* const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
528
|
+
*
|
|
529
|
+
* // Pre-connect at startup
|
|
530
|
+
* await client.connect();
|
|
531
|
+
*
|
|
532
|
+
* // First request is now fast (~100ms instead of ~500ms)
|
|
533
|
+
* await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
|
|
534
|
+
* ```
|
|
535
|
+
*/
|
|
536
|
+
connect(): Promise<void>;
|
|
537
|
+
/**
|
|
538
|
+
* Check if WebSocket connection is established and open.
|
|
539
|
+
*/
|
|
540
|
+
isConnected(): boolean;
|
|
306
541
|
/**
|
|
307
542
|
* Make an HTTP request to the API.
|
|
308
543
|
* @internal
|
|
@@ -371,4 +606,4 @@ declare function createWavFile(audio: ArrayBuffer, sampleRate: number): ArrayBuf
|
|
|
371
606
|
*/
|
|
372
607
|
declare function createWavBlob(audio: ArrayBuffer, sampleRate: number): Blob;
|
|
373
608
|
|
|
374
|
-
export { type AudioChunk, type AudioResponse, AuthenticationError, ConnectionError, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioOptions, type Model, RateLimitError, type StreamCallbacks, type StreamConfig, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceSex, base64ToArrayBuffer, createWavBlob, createWavFile, decodePCM16 };
|
|
609
|
+
export { type AudioChunk, type AudioResponse, AuthenticationError, ConnectionError, type ContextVoiceSettings, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioOptions, type Model, type MultiContextAudioChunk, type MultiContextCallbacks, type MultiContextConfig, RateLimitError, type StreamCallbacks, type StreamConfig, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceSex, base64ToArrayBuffer, createWavBlob, createWavFile, decodePCM16 };
|